In [15]:
import numpy as np
import pandas as pd
import math
def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy_val = 0
    for i in range(len(elements)):
        p = counts[i] / np.sum(counts)
        entropy_val += -p * np.log2(p)
    return entropy_val

def info_gain(data, split_attribute, target_attribute):
    total_entropy = entropy(data[target_attribute])
    vals, counts = np.unique(data[split_attribute], return_counts=True)
    weighted_entropy = 0    
    for i in range(len(vals)):
        subset = data[data[split_attribute] == vals[i]]
        weighted_entropy += (counts[i] / np.sum(counts)) * entropy(subset[target_attribute])
    
    return total_entropy - weighted_entropy
    
def id3(data, original_data, features, target_attribute, parent_class=None):
    if len(np.unique(data[target_attribute])) == 1:
        return np.unique(data[target_attribute])[0]
    if len(data) == 0:
        return np.unique(original_data[target_attribute])[
            np.argmax(np.unique(original_data[target_attribute], return_counts=True)[1])
        ]

    # If no features left â†’ return majority class
    if len(features) == 0:
        return parent_class

    # Majority class of current dataset
    parent_class = np.unique(data[target_attribute])[np.argmax(
        np.unique(data[target_attribute], return_counts=True)[1]
    )]

    # Select feature with highest information gain
    gains = [info_gain(data, feature, target_attribute) for feature in features]
    best_feature = features[np.argmax(gains)]

    # Build tree
    tree = {best_feature: {}}

    # Remove chosen feature
    features = [f for f in features if f != best_feature]

    # Branch for each value of best feature
    for value in np.unique(data[best_feature]):
        sub_data = data[data[best_feature] == value]
        subtree = id3(sub_data, original_data, features.copy(), target_attribute, parent_class)
        tree[best_feature][value] = subtree

    return tree


# ----------------------------
# Example Usage
# ----------------------------
if __name__ == "__main__":
    data = {
        'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast'],
        'Temp':    ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild'],
        'Play':    ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes']
    }

    df = pd.DataFrame(data)

    features = ['Outlook', 'Temp']
    target = 'Play'

    tree = id3(df, df, features, target)
    print(tree)


{'Outlook': {'Overcast': 'Yes', 'Rain': {'Temp': {'Cool': 'Yes', 'Mild': 'Yes'}}, 'Sunny': 'No'}}


In [10]:
import numpy as np
import pandas as pd
import math

In [11]:
def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy_val = 0
    for i in range(len(elements)):
        p = counts[i] / np.sum(counts)
        entropy_val += -p * np.log2(p)
    return entropy_val

In [12]:
def info_gain(data, split_attribute, target_attribute):
    total_entropy = entropy(data[target_attribute])
    vals, counts = np.unique(data[split_attribute], return_counts=True)
    weighted_entropy = 0   
    for i in range(len(vals)):
        subset = data[data[split_attribute] == vals[i]]
        weighted_entropy += (counts[i] / np.sum(counts)) * entropy(subset[target_attribute])   
    return total_entropy - weighted_entropy

In [13]:
def id3(data, original_data, features, target_attribute, parent_class=None):
    if len(np.unique(data[target_attribute])) == 1:
        return np.unique(data[target_attribute])[0]
    if len(data) == 0:
        return np.unique(original_data[target_attribute])[
            np.argmax(np.unique(original_data[target_attribute], return_counts=True)[1])
        ]
    if len(features) == 0:
        return parent_class
    parent_class = np.unique(data[target_attribute])[np.argmax(
        np.unique(data[target_attribute], return_counts=True)[1]
    )]

In [14]:
if __name__ == "__main__":
    data = {
        'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast'],
        'Temp':    ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild'],
        'Play':    ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes']
    }
    df = pd.DataFrame(data)
    features = ['Outlook', 'Temp']
    target = 'Play'
    tree = id3(df, df, features, target)
    print(tree)

None
