In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
class Node:
    
    def __init__(self, feature_index=None, threshold=None, value=None, left=None, right=None):
        self.feature_index = feature_index  # Индекс признака, по которому происходит разделение
        self.threshold = threshold          # Пороговое значение для разделения
        self.value = value                  # Значение узла (для листового узла)
        self.left = left                    # Левое поддерево
        self.right = right                  # Правое поддерево

class CART:
    
    def __init__(self, max_depth=None, min_samples_split=2):
        self.max_depth = max_depth                  # Максимальная глубина дерева
        self.min_samples_split = min_samples_split  # Минимальное количество объектов для разделения

    def gini_impurity(self, y):
        classes, counts = np.unique(y, return_counts=True)
        probabilities = counts / np.sum(counts)
        impurity = 1 - np.sum(probabilities ** 2)
        return impurity

    def split(self, X, y, feature_index, threshold):
        left_mask = X[:, feature_index] <= threshold
        right_mask = ~left_mask
        X_left, y_left = X[left_mask], y[left_mask]
        X_right, y_right = X[right_mask], y[right_mask]
        return X_left, y_left, X_right, y_right

    def find_best_split(self, X, y):
        best_gini = float('inf')
        best_feature_index = None
        best_threshold = None

        n_features = X.shape[1]
        for feature_index in range(n_features):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                X_left, y_left, X_right, y_right = self.split(X, y, feature_index, threshold)
                gini = (y_left.shape[0] / y.shape[0]) * self.gini_impurity(y_left) + \
                        (y_right.shape[0] / y.shape[0]) * self.gini_impurity(y_right)
                if gini < best_gini:
                    best_gini = gini
                    best_feature_index = feature_index
                    best_threshold = threshold

        return best_feature_index, best_threshold

    def build_tree(self, X, y, depth=0):
        # Проверка условий остановки
        if depth == self.max_depth or X.shape[0] < self.min_samples_split or np.unique(y).size == 1:
            value = np.argmax(np.bincount(y))
            return Node(value=value)

        feature_index, threshold = self.find_best_split(X, y)
        X_left, y_left, X_right, y_right = self.split(X, y, feature_index, threshold)
        
        left_child = self.build_tree(X_left, y_left, depth + 1)
        right_child = self.build_tree(X_right, y_right, depth + 1)
        
        return Node(feature_index=feature_index, threshold=threshold, left=left_child, right=right_child)

    def fit(self, X, y):
        self.n_classes = np.unique(y).size
        self.tree = self.build_tree(X, y)

    def predict_single(self, x, node):
        if node.value is not None:
            return node.value
        if x[node.feature_index] <= node.threshold:
            return self.predict_single(x, node.left)
        else:
            return self.predict_single(x, node.right)

    def predict(self, X):
        return np.array([self.predict_single(x, self.tree) for x in X])

In [9]:
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

In [10]:
model = CART()
model.fit(X_train, y_train)

In [11]:
y_pred = model.predict(X_test)

print("Predicted: ", y_pred)
print("     Real: ", y_test)
print("\n Accuracy:", accuracy_score(y_test, y_pred))

Predicted:  [1 1 1 1 0 0 2 0 0 2 0 1 2 0 0 2 0 1 2 1 2 2 0 0 0 1 2 2 2 1]
     Real:  [1 1 1 1 0 0 2 0 0 2 0 1 2 0 0 2 0 1 2 1 2 2 0 0 0 1 2 2 2 1]

 Accuracy: 1.0
