In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.tree) for x in X])

    def _build_tree(self, X, y, depth=0):
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))

        if num_classes == 1 or num_samples <= 1 or (self.max_depth and depth >= self.max_depth):
            return {"type": "leaf", "class": np.argmax(np.bincount(y))}

        best_feature, best_threshold = self._find_best_split(X, y)

        if best_feature is None:
            return {"type": "leaf", "class": np.argmax(np.bincount(y))}

        left_idx = X[:, best_feature] <= best_threshold
        right_idx = X[:, best_feature] > best_threshold
        left_subtree = self._build_tree(X[left_idx], y[left_idx], depth + 1)
        right_subtree = self._build_tree(X[right_idx], y[right_idx], depth + 1)

        return {"type": "node", "feature": best_feature, "threshold": best_threshold,
                "left": left_subtree, "right": right_subtree}

    def _find_best_split(self, X, y):
        best_feature, best_threshold = None, None
        best_impurity = float("inf")

        for feature in range(X.shape[1]):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_idx = X[:, feature] <= threshold
                right_idx = X[:, feature] > threshold
                if len(left_idx) == 0 or len(right_idx) == 0:
                    continue

                impurity = self._weighted_impurity(y[left_idx], y[right_idx])
                if impurity < best_impurity:
                    best_impurity = impurity
                    best_feature = feature
                    best_threshold = threshold

        return best_feature, best_threshold

    def _weighted_impurity(self, left_y, right_y):
        n = len(left_y) + len(right_y)
        left_impurity = self._gini(left_y)
        right_impurity = self._gini(right_y)
        return (len(left_y) / n) * left_impurity + (len(right_y) / n) * right_impurity

    def _gini(self, y):
        classes, counts = np.unique(y, return_counts=True)
        prob = counts / len(y)
        return 1 - np.sum(prob ** 2)

    def _traverse_tree(self, x, node):
        if node["type"] == "leaf":
            return node["class"]
        if x[node["feature"]] <= node["threshold"]:
            return self._traverse_tree(x, node["left"])
        return self._traverse_tree(x, node["right"])


if __name__ == "__main__":

    X = np.array([
        [2, 3], [1, 2], [3, 6], [5, 7], [6, 8],
        [7, 10], [8, 12], [5, 9], [4, 5], [7, 6],
        [3, 4], [2, 2], [6, 7], [4, 3], [8, 11],
        [9, 13], [10, 15], [6, 6], [5, 5], [7, 8]
    ])
    y = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
                  0, 0, 1, 0, 1, 1, 1, 1, 0, 0])


    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


    tree = DecisionTree(max_depth=3)
    tree.fit(X_train, y_train)


    y_pred = tree.predict(X_test)


    accuracy = accuracy_score(y_test, y_pred)

    print("Predictions:", y_pred)
    print("Actual:", y_test)
    print("Accuracy:", accuracy)


Predictions: [0 1 1 0 0 0]
Actual: [0 1 1 0 0 1]
Accuracy: 0.8333333333333334
