323051 22311368 PRITHVI PATIL

Decision Tree Using Gini Index without library

In [1]:
import pandas as pd
from collections import defaultdict, Counter

In [2]:
def load_dataset(filename):
    return pd.read_csv(filename)

def gini_index(labels):
    total = len(labels)
    if total == 0:
        return 0
    label_counts = Counter(labels)
    gini = 1.0
    for count in label_counts.values():
        prob = count / total
        gini -= prob ** 2
    return gini

def gini_for_attribute(data, attribute, target="Buys"):
    total_len = len(data)
    gini_sum = 0.0
    for value in data[attribute].unique():
        subset = data[data[attribute] == value]
        labels = subset[target].tolist()
        gini = gini_index(labels)
        gini_sum += (len(labels) / total_len) * gini
    return gini_sum

def build_tree(data, attributes, target="Buys"):
    labels = data[target].tolist()
    if labels.count(labels[0]) == len(labels):
        return labels[0]
    if len(attributes) == 0:
        return Counter(labels).most_common(1)[0][0] 

    gini_scores = {}
    for attr in attributes:
        gini_scores[attr] = gini_for_attribute(data, attr, target)
    best_attr = min(gini_scores, key=gini_scores.get)

    tree = {best_attr: {}}

    for value in data[best_attr].unique():
        subset = data[data[best_attr] == value].copy()
        subtree = build_tree(subset, [a for a in attributes if a != best_attr], target)
        tree[best_attr][value] = subtree

    return tree

def print_tree(tree, indent=""):
    if isinstance(tree, dict):
        for attr, branches in tree.items():
            for val, sub in branches.items():
                print(f"{indent}if {attr} == {val}:")
                print_tree(sub, indent + "  ")
    else:
        print(f"{indent}=> {tree}")


In [3]:
df = load_dataset("Lipstick.csv")
attributes = ["Age", "Income", "Gender", "Marital Status"]
tree = build_tree(df, attributes)
print(" Decision Tree (based on Gini index):")
print_tree(tree)

 Decision Tree (based on Gini index):
if Age == <21:
  if Gender == Male:
    => No
  if Gender == Female:
    => Yes
if Age == 21-35:
  => Yes
if Age == >35:
  if Marital Status == Single:
    => Yes
  if Marital Status == Married:
    => No
