In [2]:
import numpy as np

class DecisionTreeNode:
    def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value  # For leaf: class (classification) or float (regression)

class DecisionTreeClassifier:
    """
    Decision Tree for Classification supporting Gini impurity and Entropy.
    """
    def __init__(self, max_depth=5, min_samples_split=2, criterion="gini"):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.criterion = criterion  # "gini" or "entropy"
        self.root = None

    def fit(self, X, y):
        self.n_classes_ = len(np.unique(y))
        self.root = self._grow_tree(X, y, depth=0)

    def _gini(self, y):
        m = len(y)
        return 1.0 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))
    
    def _entropy(self, y):
        m = len(y)
        entropy = 0.0
        for c in np.unique(y):
            p = np.sum(y == c) / m
            entropy -= p * np.log2(p + 1e-9)
        return entropy

    def _best_split(self, X, y):
        m, n = X.shape
        if m <= 1:
            return None, None
        if self.criterion == "gini":
            parent_impurity = self._gini(y)
        else:
            parent_impurity = self._entropy(y)
        best_impurity = np.inf
        best_idx, best_thr = None, None

        for idx in range(n):
            thresholds, classes = zip(*sorted(zip(X[:, idx], y)))
            for i in range(1, m):
                if thresholds[i] == thresholds[i - 1]:
                    continue
                y_left = np.array(classes[:i])
                y_right = np.array(classes[i:])
                if self.criterion == "gini":
                    impurity_left = self._gini(y_left)
                    impurity_right = self._gini(y_right)
                else:
                    impurity_left = self._entropy(y_left)
                    impurity_right = self._entropy(y_right)
                impurity = (i * impurity_left + (m - i) * impurity_right) / m
                if impurity < best_impurity:
                    best_impurity = impurity
                    best_idx = idx
                    best_thr = (thresholds[i] + thresholds[i - 1]) / 2
        return best_idx, best_thr

    def _grow_tree(self, X, y, depth):
        num_samples_per_class = [np.sum(y == i) for i in range(self.n_classes_)]
        predicted_class = np.argmax(num_samples_per_class)
        node = DecisionTreeNode(value=predicted_class)
        if (
            depth < self.max_depth
            and len(y) >= self.min_samples_split
            and len(set(y)) > 1
        ):
            idx, thr = self._best_split(X, y)
            if idx is not None:
                indices_left = X[:, idx] <= thr
                X_left, y_left = X[indices_left], y[indices_left]
                X_right, y_right = X[~indices_left], y[~indices_left]
                node.feature_index = idx
                node.threshold = thr
                node.left = self._grow_tree(X_left, y_left, depth + 1)
                node.right = self._grow_tree(X_right, y_right, depth + 1)
                node.value = None  # Not a leaf
        return node

    def predict(self, X):
        return np.array([self._predict(inputs) for inputs in X])

    def _predict(self, inputs):
        node = self.root
        while node.value is None:
            if inputs[node.feature_index] <= node.threshold:
                node = node.left
            else:
                node = node.right
        return node.value

class DecisionTreeRegressor:
    """
    Decision Tree for Regression supporting mean squared error (MSE) and mean absolute error (MAE).
    """
    def __init__(self, max_depth=5, min_samples_split=2, criterion="mse"):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.criterion = criterion  # "mse" or "mae"
        self.root = None

    def fit(self, X, y):
        self.root = self._grow_tree(X, y, depth=0)

    def _mse(self, y):
        return np.mean((y - np.mean(y))**2)

    def _mae(self, y):
        return np.mean(np.abs(y - np.mean(y)))

    def _best_split(self, X, y):
        m, n = X.shape
        if m <= 1:
            return None, None
        if self.criterion == "mse":
            parent_error = self._mse(y)
        else:
            parent_error = self._mae(y)
        best_error = np.inf
        best_idx, best_thr = None, None

        for idx in range(n):
            thresholds, targets = zip(*sorted(zip(X[:, idx], y)))
            for i in range(1, m):
                if thresholds[i] == thresholds[i - 1]:
                    continue
                y_left = np.array(targets[:i])
                y_right = np.array(targets[i:])
                if self.criterion == "mse":
                    error_left = self._mse(y_left)
                    error_right = self._mse(y_right)
                else:
                    error_left = self._mae(y_left)
                    error_right = self._mae(y_right)
                error = (i * error_left + (m - i) * error_right) / m
                if error < best_error:
                    best_error = error
                    best_idx = idx
                    best_thr = (thresholds[i] + thresholds[i - 1]) / 2
        return best_idx, best_thr

    def _grow_tree(self, X, y, depth):
        prediction = np.mean(y)
        node = DecisionTreeNode(value=prediction)
        if (
            depth < self.max_depth
            and len(y) >= self.min_samples_split
            and len(set(y)) > 1
        ):
            idx, thr = self._best_split(X, y)
            if idx is not None:
                indices_left = X[:, idx] <= thr
                X_left, y_left = X[indices_left], y[indices_left]
                X_right, y_right = X[~indices_left], y[~indices_left]
                node.feature_index = idx
                node.threshold = thr
                node.left = self._grow_tree(X_left, y_left, depth + 1)
                node.right = self._grow_tree(X_right, y_right, depth + 1)
                node.value = None
        return node

    def predict(self, X):
        return np.array([self._predict(inputs) for inputs in X])

    def _predict(self, inputs):
        node = self.root
        while node.value is None:
            if inputs[node.feature_index] <= node.threshold:
                node = node.left
            else:
                node = node.right
        return node.value

> ## Example usage:

### Classification

In [3]:
# Classification
print("Classification (Gini):")
Xc = np.array([[2, 3], [1, 1], [3, 6], [6, 8], [7, 6], [8, 5]])
yc = np.array([0, 0, 0, 1, 1, 1])
clf = DecisionTreeClassifier(max_depth=3, criterion="gini")
clf.fit(Xc, yc)
print("Predictions:", clf.predict(np.array([[2, 2], [7, 7]])))

print("Classification (Entropy):")
clf_e = DecisionTreeClassifier(max_depth=3, criterion="entropy")
clf_e.fit(Xc, yc)
print("Predictions:", clf_e.predict(np.array([[2, 2], [7, 7]])))

Classification (Gini):
Predictions: [0 1]
Classification (Entropy):
Predictions: [0 1]


### Regression

In [4]:
 # Regression
print("Regression (MSE):")
Xr = np.array([[2], [4], [6], [8], [10], [12]])
yr = np.array([1.2, 2.5, 3.7, 6.0, 8.8, 10.2])
reg = DecisionTreeRegressor(max_depth=2, criterion="mse")
reg.fit(Xr, yr)
print("Predictions:", reg.predict(np.array([[3], [9], [11]])))

print("Regression (MAE):")
reg_mae = DecisionTreeRegressor(max_depth=2, criterion="mae")
reg_mae.fit(Xr, yr)
print("Predictions:", reg_mae.predict(np.array([[3], [9], [11]])))

Regression (MSE):
Predictions: [1.2 6.  9.5]
Regression (MAE):
Predictions: [1.2 6.  9.5]
