In [1]:
import pandas as pd
import numpy as np
from collections import Counter


In [2]:
def gini_impurity(data):
    labels = data.iloc[:, -1]
    label_counts = labels.value_counts()
    total = len(data)
    return 1 - sum((count / total) ** 2 for count in label_counts)


In [3]:
def gini_split(data, feature, value):
    left_split = data[data[feature] <= value]
    right_split = data[data[feature] > value]
    
    total = len(data)
    weight_left = len(left_split) / total
    weight_right = len(right_split) / total
    
    return weight_left * gini_impurity(left_split) + weight_right * gini_impurity(right_split)


In [13]:
def best_split(data):
    features = data.columns[:-1]  
    best_gini = float('inf')
    best_feature, best_value = None, None
    
    for feature in features:
        values = data[feature].unique()
        for value in values:
            gini = gini_split(data, feature, value)
            if gini < best_gini:
                best_gini = gini
                best_feature, best_value = feature, value
                
    return best_feature, best_value


In [14]:
def cart(data, max_depth, depth=0):
    labels = data.iloc[:, -1]
    
    if len(labels.unique()) == 1 or depth >= max_depth:
        return labels.mode()[0]
    
    
    feature, value = best_split(data)
    if feature is None:
        return labels.mode()[0]
    left_split = data[data[feature] <= value]
    right_split = data[data[feature] > value]
    
    tree = {'feature': feature, 'value': value, 'left': None, 'right': None}
    tree['left'] = cart(left_split, max_depth, depth + 1)
    tree['right'] = cart(right_split, max_depth, depth + 1)
    
    return tree


In [12]:
def load_and_prepare_data(file_path):
    data = pd.read_csv(file_path)
    
   
    for column in data.columns:
        if data[column].dtype == 'object':
            data[column], _ = pd.factorize(data[column])
    
    return data


In [15]:
file_path = 'play_tennis.csv'  


data = load_and_prepare_data(file_path)

data.head()

Unnamed: 0,day,outlook,temp,humidity,wind,play
0,0,0,0,0,0,0
1,1,0,0,0,1,0
2,2,1,0,0,0,1
3,3,2,1,0,0,1
4,4,2,2,1,0,1


In [16]:
max_depth = 3
tree = cart(data, max_depth)
print(tree)


{'feature': 'day', 'value': 1, 'left': 0, 'right': {'feature': 'day', 'value': 12, 'left': {'feature': 'day', 'value': 7, 'left': 1, 'right': 1}, 'right': 0}}
