In [1]:
import numpy as np
import pandas as pd
from math import log2

# -----------------------------
# Entropy Function
# -----------------------------
def entropy(y):
    values, counts = np.unique(y, return_counts=True)
    ent = 0
    for i in range(len(values)):
        p = counts[i] / np.sum(counts)
        ent += -p * log2(p)
    return ent


# -----------------------------
# Information Gain
# -----------------------------
def information_gain(X, y, feature):
    total_entropy = entropy(y)
    values, counts = np.unique(X[feature], return_counts=True)
    
    weighted_entropy = 0
    for i in range(len(values)):
        sub_y = y[X[feature] == values[i]]
        weighted_entropy += (counts[i] / np.sum(counts)) * entropy(sub_y)
    
    return total_entropy - weighted_entropy


# -----------------------------
# ID3 Algorithm (Recursive)
# -----------------------------
def id3(X, y, features):
    # If all target values same → leaf node
    if len(np.unique(y)) == 1:
        return np.unique(y)[0]
    
    # If no features left → return majority class
    if len(features) == 0:
        return y.value_counts().idxmax()
    
    # Select best feature (max information gain)
    gains = [information_gain(X, y, f) for f in features]
    best_feature = features[np.argmax(gains)]
    
    # Create tree dictionary
    tree = {best_feature: {}}
    
    # Remove chosen feature
    remaining_features = [f for f in features if f != best_feature]
    
    # For each possible value of the best feature
    for value in np.unique(X[best_feature]):
        sub_X = X[X[best_feature] == value]
        sub_y = y[X[best_feature] == value]
        
        # If the subset is empty → majority class
        if len(sub_y) == 0:
            tree[best_feature][value] = y.value_counts().idxmax()
        else:
            tree[best_feature][value] = id3(sub_X, sub_y, remaining_features)
    
    return tree


# -----------------------------
# Example Dataset
# -----------------------------
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No']
}

df = pd.DataFrame(data)

X = df.drop(columns=['Play'])
y = df['Play']
features = list(X.columns)

# -----------------------------
# Build Decision Tree using ID3
# -----------------------------
tree = id3(X, y, features)

print("ID3 Decision Tree:")
print(tree)


ID3 Decision Tree:
{'Outlook': {'Overcast': 'Yes', 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': 'No'}}
