In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data = pd.read_csv('BankNote_Authentication.csv')
data.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [4]:
# Function to calculate gini impurity
def gini_impurity(y):
    classes, counts = np.unique(y, return_counts=True)
    probability = counts / counts.sum()
    return 1 - np.sum(probability ** 2)

In [1]:
def best_split(x, y, feature_index):
    unique_values = np.unique(x[:, feature_index])
    best_gini, best_value, best_left, best_right = float('inf'), None, None, None

    for val in unique_values:
        left_mask = x[:, feature_index] <= val
        right_mask = ~left_mask

        left_gini = gini_impurity(y[left_mask])
        right_gini = gini_impurity(y[right_mask])

        weighted_gini = (left_gini * left_mask.sum() + right_gini * right_mask.sum()) / len(y)

        if weighted_gini < best_gini:
            best_gini, best_value = weighted_gini, val
            best_left, best_right = left_mask, right_mask

        return best_value, best_left, best_right, best_gini

In [2]:
def best_feature(x, y):
    best_gini, best_features, best_value, best_left, best_right = float('inf'), None, None, None, None

    for i in range(x.shape[1]):
        value, left, right, gini = best_split(x, y, i)

        if gini < best_gini:
            best_gini, best_value, best_features = gini, value, i
            best_left, best_right = left, right

    return best_features, best_value, best_left, best_right

In [3]:
def build_tree(x, y, depth = 0, max_depth = 3):
    if depth >= max_depth or len(np.unique(y)) == 1:
        return np.bincount(y).argmax()
    
    feature, value, left_mask, right_mask = best_feature(x, y)

    if feature is None:
        return np.bincount(y).argmax()
    
    left_subtree = build_tree(x[left_mask], y[left_mask], depth + 1, max_depth)
    right_subtree = build_tree(x[right_mask], y[right_mask], depth + 1, max_depth)