In [5]:
import math

# Data
data = { 
    'Age': ['youth', 'youth', 'middle-aged', 'senior', 'senior', 'senior', 'middle-aged', 'youth', 'youth', 'senior', 'youth', 'middle-aged', 'middle-aged', 'senior'], 
    'Income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 'medium', 'medium', 'high', 'medium'], 
    'Student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no'], 
    'Credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'], 
    'Buys_Computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no'] 
}

features = ['Age', 'Income', 'Student', 'Credit_rating']
target = 'Buys_Computer'

# === Helper Functions ===

def entropy_of_dataset_Info_D(dataset):
    """Calculate entropy of the target values in the dataset."""
    yes_count = dataset[target].count('yes')
    no_count = dataset[target].count('no')
    total = yes_count + no_count

    if total == 0:
        return 0

    p_yes = yes_count / total
    p_no = no_count / total

    entropy = 0
    if p_yes > 0:
        entropy -= p_yes * math.log2(p_yes)
    if p_no > 0:
        entropy -= p_no * math.log2(p_no)

    print(f"→ Entropy of dataset: {entropy:.4f}")
    return entropy

def entropy_after_split(dataset, feature):
    """Compute expected entropy after splitting on a feature."""
    total = len(dataset[target])
    values = set(dataset[feature])
    entropy = 0

    for val in values:
        subset_yes = 0
        subset_no = 0
        subset_total = 0

        for i in range(total):
            if dataset[feature][i] == val:
                subset_total += 1
                if dataset[target][i] == 'yes':
                    subset_yes += 1
                else:
                    subset_no += 1

        if subset_total == 0:
            continue

        p_yes = subset_yes / subset_total
        p_no = subset_no / subset_total
        subset_entropy = 0
        if p_yes > 0:
            subset_entropy -= p_yes * math.log2(p_yes)
        if p_no > 0:
            subset_entropy -= p_no * math.log2(p_no)

        entropy += (subset_total / total) * subset_entropy

    return entropy

def info_gain(dataset, base_entropy, feature):
    """Information Gain from splitting on a feature."""
    gain = base_entropy - entropy_after_split(dataset, feature)
    print(f"Information Gain ({feature}): {gain:.4f}")
    return gain

def get_best_feature(dataset, features, base_entropy):
    """Select the feature with the highest information gain."""
    best = None
    max_gain = -1
    for feature in features:
        gain = info_gain(dataset, base_entropy, feature)
        if gain > max_gain:
            max_gain = gain
            best = feature
    return best

def majority_class(labels):
    """Return the most common class label."""
    freq = {}
    for label in labels:
        freq[label] = freq.get(label, 0) + 1
    return max(freq, key=freq.get)

# === Tree Builder ===

def build_tree(dataset, features):
    # If all target values are the same
    if dataset[target].count(dataset[target][0]) == len(dataset[target]):
        return dataset[target][0]

    # If no more features, return majority class
    if not features:
        return majority_class(dataset[target])

    base_entropy = entropy_of_dataset(dataset)
    best_feature = get_best_feature(dataset, features, base_entropy)
    tree = {best_feature: {}}
    print(f"\nBest feature to split: {best_feature}\n")

    unique_vals = set(dataset[best_feature])

    for val in unique_vals:
        # Create subset for this value
        subset = {k: [] for k in dataset}
        for i in range(len(dataset[best_feature])):
            if dataset[best_feature][i] == val:
                for k in dataset:
                    subset[k].append(dataset[k][i])

        remaining_features = [f for f in features if f != best_feature]
        subtree = build_tree(subset, remaining_features)
        tree[best_feature][val] = subtree

    return tree

# === Run ===

decision_tree = build_tree(data, features)
print("\nFinal Decision Tree:")
print(decision_tree)


→ Entropy of dataset: 0.9403
Information Gain (Age): 0.2467
Information Gain (Income): 0.0292
Information Gain (Student): 0.2361
Information Gain (Credit_rating): 0.0481

Best feature to split: Age

→ Entropy of dataset: 0.9710
Information Gain (Income): 0.5710
Information Gain (Student): 0.9710
Information Gain (Credit_rating): 0.0200

Best feature to split: Student

→ Entropy of dataset: 0.9710
Information Gain (Income): 0.0200
Information Gain (Student): 0.0200
Information Gain (Credit_rating): 0.9710

Best feature to split: Credit_rating


Final Decision Tree:
{'Age': {'middle-aged': 'yes', 'youth': {'Student': {'yes': 'yes', 'no': 'no'}}, 'senior': {'Credit_rating': {'excellent': 'no', 'fair': 'yes'}}}}
