In [1]:
import numpy as np
from collections import Counter
import math

X = np.array([
    [25, 50000, 1, 650],
    [35, 60000, 0, 700],
    [45, 80000, 0, 720],
    [20, 30000, 1, 600],
    [23, 40000, 1, 620],
    [52, 110000, 0, 750],
    [43, 90000, 0, 710],
    [33, 65000, 1, 690],
    [27, 48000, 1, 640],
    [48, 100000, 0, 730]
])

y = np.array([1, 1, 1, 0, 0, 1, 1, 1, 0, 1])


def entropy(y):
    counts = Counter(y)
    total = len(y)
    ent = 0
    for count in counts.values():
        p = count / total
        ent -= p * math.log2(p)
    return ent


def information_gain(X_column, y, threshold):
    parent_entropy = entropy(y)

    left_idx = X_column <= threshold
    right_idx = X_column > threshold

    if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
        return 0

    n = len(y)
    n_left = len(y[left_idx])
    n_right = len(y[right_idx])

    child_entropy = (n_left/n) * entropy(y[left_idx]) + \
                    (n_right/n) * entropy(y[right_idx])

    return parent_entropy - child_entropy


class DecisionTree:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.tree = self._build_tree(X, y, depth=0)

    def _build_tree(self, X, y, depth):
        if depth >= self.max_depth or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]

        best_feature = None
        best_threshold = None
        best_gain = -1

        for feature in range(X.shape[1]):
            thresholds = np.unique(X[:, feature])

            for threshold in thresholds:
                gain = information_gain(X[:, feature], y, threshold)
                if gain > best_gain:
                    best_gain = gain
                    best_feature = feature
                    best_threshold = threshold

        if best_gain == 0:
            return Counter(y).most_common(1)[0][0]

        left_idx = X[:, best_feature] <= best_threshold
        right_idx = X[:, best_feature] > best_threshold

        left_subtree = self._build_tree(X[left_idx], y[left_idx], depth+1)
        right_subtree = self._build_tree(X[right_idx], y[right_idx], depth+1)

        return {
            "feature": best_feature,
            "threshold": best_threshold,
            "left": left_subtree,
            "right": right_subtree
        }

    def predict(self, X):
        return np.array([self._traverse(x, self.tree) for x in X])

    def _traverse(self, x, node):
        if not isinstance(node, dict):
            return node

        if x[node["feature"]] <= node["threshold"]:
            return self._traverse(x, node["left"])
        else:
            return self._traverse(x, node["right"])

model = DecisionTree(max_depth=3)
model.fit(X, y)

predictions = model.predict(X)

accuracy = np.mean(predictions == y)

print("Predictions:", predictions)
print("Actual:", y)
print("Accuracy:", accuracy)
print("\nTree Structure:\n", model.tree)


Predictions: [1 1 1 0 0 1 1 1 0 1]
Actual: [1 1 1 0 0 1 1 1 0 1]
Accuracy: 1.0

Tree Structure:
 {'feature': 1, 'threshold': np.int64(48000), 'left': np.int64(0), 'right': np.int64(1)}
