In [4]:
import math
import pandas as pd

In [5]:
#STEP 1: Load the Dataset
data = pd.read_csv("buy.csv")
for col in data.columns:
    if "s.no" in col.lower() or "unnamed" in col.lower():
        data = data.drop(columns=[col])

rows = data.to_dict(orient="records")

In [6]:
#STEP 2: Entropy Function
def entropy(data, target="Buy"):
    values = [row[target] for row in data]
    classes = set(values)
    ent = 0
    for c in classes:
        p = values.count(c) / len(values)
        ent -= p * math.log2(p)
    return ent

In [7]:
#STEP 3: Information Gain
def info_gain(data, attribute, target="Buy"):
    total_entropy = entropy(data, target)
    values = set([row[attribute] for row in data])
    weighted_entropy = 0
    for v in values:
        subset = [row for row in data if row[attribute] == v]
        weighted_entropy += (len(subset)/len(data)) * entropy(subset, target)
    return total_entropy - weighted_entropy

In [8]:
#STEP 4: ID3 Algorithm (Recursive)
def id3(data, features, target="Buy"):
    classes = [row[target] for row in data]
    if classes.count(classes[0]) == len(classes):
        return classes[0]

    if not features:
        return max(set(classes), key=classes.count)

    gains = [info_gain(data, f, target) for f in features]
    best_feature = features[gains.index(max(gains))]
    tree = {best_feature:{}}

    values = set([row[best_feature] for row in data])
    for v in values:
        subset = [row for row in data if row[best_feature] == v]
        if not subset:
            tree[best_feature][v] = max(set(classes), key=classes.count)
        else:
            new_features = [f for f in features if f != best_feature]
            subtree = id3(subset, new_features, target)
            tree[best_feature][v] = subtree
    return tree

In [9]:
#STEP 5: Build Tree
features = list(data.columns)
if "Buy" in features:
    features.remove("Buy")
decision_tree = id3(rows, features)

In [10]:
#STEP 6: Pretty Print Tree (Graph Format)
def print_tree(tree, indent="", is_last=True):
    """Recursively print tree in structured format"""
    if isinstance(tree, dict):
        root = list(tree.keys())[0]
        print(indent + ("└── " if is_last else "├── ") + f"[{root}]")
        children = tree[root]
        n = len(children)
        for i, (val, subtree) in enumerate(children.items()):
            last = (i == n - 1)
            branch = "    " if is_last else "│   "
            print(indent + ("    " if is_last else "│   ") + ("└── " if last else "├── ") + f"{val} → ", end="")
            if isinstance(subtree, dict):
                print()
                print_tree(subtree, indent + branch + ("    " if last else "│   "), last)
            else:
                print(subtree)
    else:
        print(indent + ("└── " if is_last else "├── ") + str(tree))


In [11]:
#STEP 7: Output Final Tree
print("Final Decision Tree (Graph Format):")
print_tree(decision_tree)

Final Decision Tree (Graph Format):
└── [Age]
    ├── 31-40 → Yes
    ├── >40 → 
    │   ├── [Credit]
    │   │   ├── Excellent → No
    │   │   └── Fair → Yes
    └── <30 → 
        └── [Student]
            ├── No → No
            └── Yes → Yes
