In [1]:
import pandas as pd
import math

In [2]:
#Step 1: Load Dataset
data = pd.read_csv("buy.csv")
for col in data.columns:
    if "s.no" in col.lower() or "unnamed" in col.lower():
        data = data.drop(columns=[col])

In [4]:
#Step 2: Gini Index Function
def gini_index(groups, classes):
    n_instances = sum([len(group) for group in groups])
    gini = 0.0
    for group in groups:
        size = len(group)
        if size == 0:
            continue
        score = 0.0
        for class_val in classes:
            p = (group['Buy'] == class_val).sum() / size
            score += p * p
        gini += (1 - score) * (size / n_instances)
    return gini

In [5]:
# Step 3: Split Dataset
def test_split(attribute, value, dataset):
    left = dataset[dataset[attribute] == value]
    right = dataset[dataset[attribute] != value]
    return left, right


In [6]:
# Step 4: Find Best Split
def get_best_split(dataset, attributes):
    class_values = dataset['Buy'].unique()
    best_gini = 1.0
    best_split = None
    for attr in attributes:
        values = dataset[attr].unique()
        for val in values:
            left, right = test_split(attr, val, dataset)
            gini = gini_index([left, right], class_values)
            if gini < best_gini:
                best_gini = gini
                best_split = {
                    'attribute': attr,
                    'value': val,
                    'left': left,
                    'right': right
                }
    return best_split

In [7]:
# Step 5: Recursive Tree Builder

def build_tree(dataset, attributes, depth=0, max_depth=10):
    if len(dataset['Buy'].unique()) == 1:
        return dataset['Buy'].iloc[0]
    if len(dataset) == 0:
        return 'Yes' if (dataset['Buy'] == 'Yes').sum() >= (dataset['Buy'] == 'No').sum() else 'No'

    split = get_best_split(dataset, attributes)
    if split is None:
        return 'Yes' if (dataset['Buy'] == 'Yes').sum() >= (dataset['Buy'] == 'No').sum() else 'No'

    left_tree = build_tree(split['left'], attributes, depth + 1, max_depth)
    right_tree = build_tree(split['right'], attributes, depth + 1, max_depth)

    return {
        'attribute': split['attribute'],
        'value': split['value'],
        'left': left_tree,
        'right': right_tree,
        'groups': (split['left'], split['right'])
    }

In [8]:
#Step 6: Print Tree (Pretty)
def print_tree(node, indent=""):
    if not isinstance(node, dict):
        print(indent + str(node))
        return

    attr = node['attribute']
    val = node['value']
    left = node['left']
    right = node['right']

    print(f"{indent}[{attr}]")
    print(f"{indent}├── {val} → ", end="")
    if isinstance(left, dict):
        print()
        print_tree(left, indent + "│   ")
    else:
        print(left)

    remaining_values = set(node['groups'][1][attr])
    remaining_str = ", ".join(sorted(remaining_values, key=lambda x: (x.isdigit(), x, x)))
    print(f"{indent}└── {remaining_str} → ", end="")
    if isinstance(right, dict):
        print()
        print_tree(right, indent + "    ")
    else:
        print(right)

In [9]:
#Step 7: Build & Print Tree
attributes = ['Age', 'Income', 'Student', 'Credit']
tree = build_tree(data, attributes)
print_tree(tree)

[Age]
├── 31-40 → Yes
└── <30, >40 → 
    [Student]
    ├── No → 
    │   [Age]
    │   ├── <30 → No
    │   └── >40 → 
    │       [Credit]
    │       ├── Fair → Yes
    │       └── Excellent → No
    └── Yes → 
        [Credit]
        ├── Fair → Yes
        └── Excellent → 
            [Age]
            ├── >40 → No
            └── <30 → Yes
