In [None]:
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def gini(y):
    counts = Counter(y)
    impurity = 1
    for lbl in counts:
        prob = counts[lbl] / len(y)
        impurity -= prob ** 2
    return impurity


In [None]:
def split(X, y, feature, threshold):
    left_mask = X[:, feature] <= threshold
    right_mask = X[:, feature] > threshold
    return X[left_mask], y[left_mask], X[right_mask], y[right_mask]


def best_split(X, y):
    best_feature, best_thresh, best_gain = None, None, 0
    n_features = X.shape[1]
    current_gini = gini(y)
    
    for feature in range(n_features):
        thresholds = np.unique(X[:, feature])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split(X, y, feature, threshold)
            if len(y_left) == 0 or len(y_right) == 0:
                continue
            p = len(y_left) / len(y)
            gain = current_gini - (p * gini(y_left) + (1 - p) * gini(y_right))
            if gain > best_gain:
                best_feature, best_thresh, best_gain = feature, threshold, gain
    return best_feature, best_thresh

In [None]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value


def build_tree(X, y, depth=0, max_depth=5):
    if len(set(y)) == 1 or depth == max_depth:
        return Node(value=Counter(y).most_common(1)[0][0])
    
    feature, threshold = best_split(X, y)
    if feature is None:
        return Node(value=Counter(y).most_common(1)[0][0])
    
    X_left, y_left, X_right, y_right = split(X, y, feature, threshold)
    left_node = build_tree(X_left, y_left, depth + 1, max_depth)
    right_node = build_tree(X_right, y_right, depth + 1, max_depth)
    return Node(feature=feature, threshold=threshold, left=left_node, right=right_node)

In [None]:
def predict_one(x, node):
    if node.value is not None:
        return node.value
    if x[node.feature] <= node.threshold:
        return predict_one(x, node.left)
    else:
        return predict_one(x, node.right)

def predict(X, tree):
    return [predict_one(x, tree) for x in X]

In [None]:
iris = load_iris()
X, y = iris.data, iris.target
# For binary classification, use only two classes
X, y = X[y != 2], y[y != 2]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

tree = build_tree(X_train, y_train, max_depth=3)
y_pred = predict(X_test, tree)

accuracy = np.mean(y_pred == y_test)
print(f"✅ Accuracy: {accuracy * 100:.2f}%")