In [3]:
import numpy as np
from collections import Counter

class DecisionTreeNode:
    def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

class DecisionTreeClassifierRF:
    """
    Decision Tree for Random Forest (Classification).
    Uses Gini impurity for splits. Supports random feature selection.
    """
    def __init__(self, max_depth=5, min_samples_split=2, max_features=None):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.max_features = max_features  # Number of features to consider at each split
        self.root = None

    def fit(self, X, y):
        self.n_classes_ = len(np.unique(y))
        self.n_features_ = X.shape[1]
        self.root = self._grow_tree(X, y, depth=0)

    def _gini(self, y):
        m = len(y)
        return 1.0 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

    def _best_split(self, X, y):
        m, n = X.shape
        if m <= 1:
            return None, None

        if self.max_features is None:
            features = range(n)
        else:
            features = np.random.choice(n, self.max_features, replace=False)

        best_gini = np.inf
        best_idx, best_thr = None, None

        for idx in features:
            thresholds, classes = zip(*sorted(zip(X[:, idx], y)))
            for i in range(1, m):
                if thresholds[i] == thresholds[i - 1]:
                    continue
                y_left = np.array(classes[:i])
                y_right = np.array(classes[i:])
                gini_left = self._gini(y_left)
                gini_right = self._gini(y_right)
                gini = (i * gini_left + (m - i) * gini_right) / m
                if gini < best_gini:
                    best_gini = gini
                    best_idx = idx
                    best_thr = (thresholds[i] + thresholds[i - 1]) / 2
        return best_idx, best_thr

    def _grow_tree(self, X, y, depth):
        num_samples_per_class = [np.sum(y == i) for i in range(self.n_classes_)]
        predicted_class = np.argmax(num_samples_per_class)
        node = DecisionTreeNode(value=predicted_class)
        if (
            depth < self.max_depth
            and len(y) >= self.min_samples_split
            and len(set(y)) > 1
        ):
            idx, thr = self._best_split(X, y)
            if idx is not None:
                indices_left = X[:, idx] <= thr
                X_left, y_left = X[indices_left], y[indices_left]
                X_right, y_right = X[~indices_left], y[~indices_left]
                node.feature_index = idx
                node.threshold = thr
                node.left = self._grow_tree(X_left, y_left, depth + 1)
                node.right = self._grow_tree(X_right, y_right, depth + 1)
                node.value = None
        return node

    def predict(self, X):
        return np.array([self._predict(inputs) for inputs in X])

    def _predict(self, inputs):
        node = self.root
        while node.value is None:
            if inputs[node.feature_index] <= node.threshold:
                node = node.left
            else:
                node = node.right
        return node.value

class RandomForestClassifier:
    """
    Random Forest classifier using an ensemble of Decision Trees.
    """
    def __init__(self, n_estimators=10, max_depth=5, min_samples_split=2, max_features=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.max_features = max_features
        self.trees = []

    def _bootstrap_sample(self, X, y):
        n_samples = X.shape[0]
        indices = np.random.choice(n_samples, n_samples, replace=True)
        return X[indices], y[indices]

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):
            tree = DecisionTreeClassifierRF(
                max_depth=self.max_depth,
                min_samples_split=self.min_samples_split,
                max_features=self.max_features
            )
            X_sample, y_sample = self._bootstrap_sample(X, y)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        # Collect predictions from all trees
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        # Majority vote
        majority_votes = [Counter(tree_preds[:, i]).most_common(1)[0][0] for i in range(X.shape[0])]
        return np.array(majority_votes)

> ## Example usage:

In [4]:
np.random.seed(42)
# Simple 2D dataset
X = np.array([[2, 3], [1, 1], [3, 6], [6, 8], [7, 6], [8, 5]])
y = np.array([0, 0, 0, 1, 1, 1])  # Binary class

clf = RandomForestClassifier(n_estimators=5, max_depth=3, max_features=1)
clf.fit(X, y)
X_test = np.array([[2, 2], [7, 7]])
print("Random Forest Predictions:", clf.predict(X_test))

Random Forest Predictions: [0 1]
