In [1]:
import numpy as np

In [2]:
class Node:
    def __init__(self, *, is_leaf=False, value=None, feat=None, thr=None, left=None, right=None, depth=0, n=0):
        self.is_leaf = is_leaf
        self.value = value
        self.feat = feat
        self.thr = thr
        self.left = left
        self.right = right
        self.depth = depth
        self.n = n

In [3]:
class DecisionTree:
    def __init__(self, max_depth=None, min_samples_split=2, min_gain=0.0,
                 task='classification', criterion='gini', max_features=None, random_state=None):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_gain = min_gain
        self.task = task
        self.criterion = criterion
        self.max_features = max_features
        self.rng = np.random.RandomState(random_state)
        self.root = None

    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)
        self.n_feats = X.shape[1]
        if self.task == 'classification':
            self.classes = np.unique(y)
        self.root = self._build(X, y, depth=0)
        return self

    def _build(self, X, y, depth):
        n, m = X.shape
        node = Node(depth=depth, n=n)

        if self._stop(y, depth):
            node.is_leaf = True
            node.value = self._leaf_value(y)
            return node

        feat, thr, gain = self._best_split(X, y)
        if feat is None or gain < self.min_gain:
            node.is_leaf = True
            node.value = self._leaf_value(y)
            return node

        mask = X[:, feat] <= thr
        X_left, y_left = X[mask], y[mask]
        X_right, y_right = X[~mask], y[~mask]

        node.feat = feat
        node.thr = thr
        node.left = self._build(X_left, y_left, depth + 1)
        node.right = self._build(X_right, y_right, depth + 1)
        return node

    def _stop(self, y, depth):
        if len(y) < self.min_samples_split:
            return True
        if self.max_depth is not None and depth >= self.max_depth:
            return True
        if self.task == 'classification':
            return np.unique(y).size == 1
        else:
            return np.var(y) < 1e-9

    def _leaf_value(self, y):
        if self.task == 'classification':
            vals, cnts = np.unique(y, return_counts=True)
            return vals[np.argmax(cnts)]
        else:
            return np.mean(y)

    def _impurity(self, y):
        if len(y) == 0:
            return 0
        if self.task == 'classification':
            vals, cnts = np.unique(y, return_counts=True)
            p = cnts / len(y)
            if self.criterion == 'gini':
                return 1 - np.sum(p ** 2)
            elif self.criterion == 'entropy':
                p = p[p > 0]
                return -np.sum(p * np.log2(p))
        else:
            return np.var(y)
        return 0

    def _best_split(self, X, y):
        n, m = X.shape
        best_gain = 0
        best_feat = None
        best_thr = None
        parent_imp = self._impurity(y)

        feats = np.arange(m)
        if self.max_features is not None:
            if isinstance(self.max_features, float):
                k = int(self.max_features * m)
            else:
                k = self.max_features
            feats = self.rng.choice(m, k, replace=False)

        for j in feats:
            vals = np.unique(X[:, j])
            if len(vals) <= 1:
                continue
            thresholds = (vals[:-1] + vals[1:]) / 2
            for thr in thresholds:
                mask = X[:, j] <= thr
                if np.all(mask) or np.all(~mask):
                    continue
                y_l, y_r = y[mask], y[~mask]
                imp_l, imp_r = self._impurity(y_l), self._impurity(y_r)
                gain = parent_imp - (len(y_l)/n)*imp_l - (len(y_r)/n)*imp_r
                if gain > best_gain:
                    best_gain = gain
                    best_feat = j
                    best_thr = thr
        return best_feat, best_thr, best_gain

    def predict(self, X):
        X = np.asarray(X)
        return np.array([self._predict_row(x, self.root) for x in X])

    def _predict_row(self, x, node):
        while not node.is_leaf:
            if x[node.feat] <= node.thr:
                node = node.left
            else:
                node = node.right
        return node.value

In [4]:
class RandomForest:
    def __init__(self, n_trees=10, max_depth=None, min_samples_split=2, min_gain=0.0,
                 max_features=None, bootstrap=True, task='classification', criterion='gini', random_state=None):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_gain = min_gain
        self.max_features = max_features
        self.bootstrap = bootstrap
        self.task = task
        self.criterion = criterion
        self.rng = np.random.RandomState(random_state)
        self.trees = []

    def fit(self, X, y):
        n = X.shape[0]
        for i in range(self.n_trees):
            idx = self.rng.choice(n, n, replace=True) if self.bootstrap else np.arange(n)
            Xs, ys = X[idx], y[idx]

            tree = DecisionTree(max_depth=self.max_depth,
                                min_samples_split=self.min_samples_split,
                                min_gain=self.min_gain,
                                task=self.task,
                                criterion=self.criterion,
                                max_features=self.max_features,
                                random_state=self.rng.randint(0, 1e6))
            tree.fit(Xs, ys)
            self.trees.append(tree)
        return self

    def predict(self, X):
        preds = np.array([t.predict(X) for t in self.trees])
        if self.task == 'classification':
            final = []
            for i in range(preds.shape[1]):
                vals, cnts = np.unique(preds[:, i], return_counts=True)
                final.append(vals[np.argmax(cnts)])
            return np.array(final)
        else:
            return np.mean(preds, axis=0)

In [9]:
X = np.array([[0], [1], [2], [3], [4], [5]])
y = np.array([0, 0, 0, 1, 1, 1])

forest = RandomForest(n_trees=5, max_depth=2, task='classification', criterion='gini', random_state=42)
forest.fit(X, y)

print("=== RandomForest Классификация ===")
print("Прогнозы:", forest.predict([[0], [1.5], [3.5], [5]]))

Xr = np.array([[0], [1], [2], [3], [4], [5]])
yr = np.array([0, 0, 1, 10, 11, 10])

forest_r = RandomForest(n_trees=5, max_depth=2, task='regression', criterion='mse', random_state=42)
forest_r.fit(Xr, yr)

print("\n=== RandomForest Регрессия ===")
print("Прогнозы:", forest_r.predict([[0], [2], [3.5], [5]]))

=== RandomForest Классификация ===
Прогнозы: [0 0 1 1]

=== RandomForest Регрессия ===
Прогнозы: [ 0.2  0.6 10.  10.4]


In [7]:
class GradientBoosting:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, 
                 min_samples_split=2, min_gain=0.0, max_features=None,
                 task='regression', criterion='mse', random_state=None):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_gain = min_gain
        self.max_features = max_features
        self.task = task
        self.criterion = criterion
        self.random_state = np.random.RandomState(random_state)
        self.trees = []
        self.init_pred = None

    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)

        if self.task == 'regression':
            self.init_pred = np.mean(y)
            pred = np.full_like(y, self.init_pred, dtype=float)
        else:
            self.init_pred = np.log(np.mean(y) / (1 - np.mean(y)))
            pred = np.full_like(y, self.init_pred, dtype=float)

        for i in range(self.n_estimators):
            if self.task == 'regression':
                residual = y - pred
            else:
                p = 1 / (1 + np.exp(-pred))
                residual = y - p

            tree = DecisionTree(max_depth=self.max_depth,
                                min_samples_split=self.min_samples_split,
                                min_gain=self.min_gain,
                                task='regression',
                                criterion='mse',
                                max_features=self.max_features,
                                random_state=self.random_state.randint(0, 1e6))
            tree.fit(X, residual)
            self.trees.append(tree)

            update = tree.predict(X)
            pred += self.learning_rate * update

        return self

    def predict(self, X):
        X = np.asarray(X)
        n = X.shape[0]
        pred = np.full(n, self.init_pred, dtype=float)

        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)

        if self.task == 'regression':
            return pred
        else:
            return (1 / (1 + np.exp(-pred))) >= 0.5

In [8]:
Xr = np.array([[0], [1], [2], [3], [4], [5]])
yr = np.array([0, 0, 1, 10, 11, 10])

gb_r = GradientBoosting(n_estimators=20, learning_rate=0.2, max_depth=2, random_state=42)
gb_r.fit(Xr, yr)

print("=== GradientBoosting Регрессия ===")
print("Прогнозы:", gb_r.predict([[0], [1.5], [3.5], [5]]))


Xc = np.array([[0], [1], [2], [3], [4], [5]])
yc = np.array([0, 0, 0, 1, 1, 1])

gb_c = GradientBoosting(n_estimators=10, learning_rate=0.5, max_depth=2, task='classification', random_state=42)
gb_c.fit(Xc, yc)

print("\n=== GradientBoosting Классификация ===")
print("Прогнозы:", gb_c.predict([[0], [1.5], [3.5], [5]]))

=== GradientBoosting Регрессия ===
Прогнозы: [ 0.06702857  0.06702857 10.00236848  9.99456021]

=== GradientBoosting Классификация ===
Прогнозы: [False False  True  True]
