In [1]:
import pandas as pd
import math

class Node:
    def __init__(self, feature=None, label=None):
        self.feature = feature
        self.label = label
        self.children = {}

def entropy(data, target):
    values = data[target].value_counts(normalize=True)
    return -sum(p * math.log2(p) for p in values if p > 0)

def info_gain_ratio(data, feature, target):
    total_entropy = entropy(data, target)
    values = data[feature].unique()
    split_entropy = 0
    weighted_entropy = 0

    for v in values:
        subset = data[data[feature] == v]
        p = len(subset) / len(data)
        weighted_entropy += p * entropy(subset, target)
        split_entropy -= p * math.log2(p) if p > 0 else 0

    if split_entropy == 0:
        return 0
    gain = total_entropy - weighted_entropy
    return gain / split_entropy

def c45(data, features, target):
    labels = data[target].unique()

    if len(labels) == 1:
        return Node(label=labels[0])

    if not features:
        majority = data[target].mode()[0]
        return Node(label=majority)

    best_feature = max(features, key=lambda f: info_gain_ratio(data, f, target))
    root = Node(feature=best_feature)

    for v in data[best_feature].unique():
        subset = data[data[best_feature] == v]
        if subset.empty:
            majority = data[target].mode()[0]
            child = Node(label=majority)
        else:
            child = c45(subset, [f for f in features if f != best_feature], target)
        root.children[v] = child

    return root

def print_tree(node, depth=0):
    if node.label is not None:
        print("  " * depth + f"Leaf: {node.label}")
    else:
        print("  " * depth + f"[{node.feature}]")
        for v, child in node.children.items():
            print("  " * (depth + 1) + f"{v} →")
            print_tree(child, depth + 2)

data = pd.DataFrame([
    {"Outlook": "Sunny", "Temp": "Hot", "Humidity": "High", "Wind": "Weak", "Play": "No"},
    {"Outlook": "Sunny", "Temp": "Hot", "Humidity": "High", "Wind": "Strong", "Play": "No"},
    {"Outlook": "Overcast", "Temp": "Hot", "Humidity": "High", "Wind": "Weak", "Play": "Yes"},
    {"Outlook": "Rain", "Temp": "Mild", "Humidity": "High", "Wind": "Weak", "Play": "Yes"},
    {"Outlook": "Rain", "Temp": "Cool", "Humidity": "Normal", "Wind": "Weak", "Play": "Yes"},
    {"Outlook": "Rain", "Temp": "Cool", "Humidity": "Normal", "Wind": "Strong", "Play": "No"},
    {"Outlook": "Overcast", "Temp": "Cool", "Humidity": "Normal", "Wind": "Strong", "Play": "Yes"},
])

features = ["Outlook", "Temp", "Humidity", "Wind"]
target = "Play"
tree = c45(data, features, target)

print("C4.5 Decision Tree:")
print_tree(tree)


C4.5 Decision Tree:
[Outlook]
  Sunny →
    Leaf: No
  Overcast →
    Leaf: Yes
  Rain →
    [Wind]
      Weak →
        Leaf: Yes
      Strong →
        Leaf: No


In [2]:
import pandas as pd

class Node:
    def __init__(self, feature=None, label=None):
        self.feature = feature
        self.label = label
        self.children = {}

def gini(data, target):
    probs = data[target].value_counts(normalize=True)
    return 1 - sum(p**2 for p in probs)

def gini_gain(data, feature, target):
    total_gini = gini(data, target)
    values = data[feature].unique()
    weighted_gini = 0

    for v in values:
        subset = data[data[feature] == v]
        weighted_gini += (len(subset) / len(data)) * gini(subset, target)

    return total_gini - weighted_gini

def cart(data, features, target):
    labels = data[target].unique()

    if len(labels) == 1:
        return Node(label=labels[0])

    if not features:
        majority = data[target].mode()[0]
        return Node(label=majority)

    best_feature = max(features, key=lambda f: gini_gain(data, f, target))
    root = Node(feature=best_feature)

    for v in data[best_feature].unique():
        subset = data[data[best_feature] == v]
        if subset.empty:
            majority = data[target].mode()[0]
            child = Node(label=majority)
        else:
            child = cart(subset, [f for f in features if f != best_feature], target)
        root.children[v] = child

    return root

def print_tree(node, depth=0):
    if node.label is not None:
        print("  " * depth + f"Leaf: {node.label}")
    else:
        print("  " * depth + f"[{node.feature}]")
        for v, child in node.children.items():
            print("  " * (depth + 1) + f"{v} →")
            print_tree(child, depth + 2)

data = pd.DataFrame([
    {"Outlook": "Sunny", "Temp": "Hot", "Humidity": "High", "Wind": "Weak", "Play": "No"},
    {"Outlook": "Sunny", "Temp": "Hot", "Humidity": "High", "Wind": "Strong", "Play": "No"},
    {"Outlook": "Overcast", "Temp": "Hot", "Humidity": "High", "Wind": "Weak", "Play": "Yes"},
    {"Outlook": "Rain", "Temp": "Mild", "Humidity": "High", "Wind": "Weak", "Play": "Yes"},
    {"Outlook": "Rain", "Temp": "Cool", "Humidity": "Normal", "Wind": "Weak", "Play": "Yes"},
    {"Outlook": "Rain", "Temp": "Cool", "Humidity": "Normal", "Wind": "Strong", "Play": "No"},
    {"Outlook": "Overcast", "Temp": "Cool", "Humidity": "Normal", "Wind": "Strong", "Play": "Yes"},
])

features = ["Outlook", "Temp", "Humidity", "Wind"]
target = "Play"
tree = cart(data, features, target)

print("CART Decision Tree:")
print_tree(tree)


CART Decision Tree:
[Outlook]
  Sunny →
    Leaf: No
  Overcast →
    Leaf: Yes
  Rain →
    [Wind]
      Weak →
        Leaf: Yes
      Strong →
        Leaf: No
