## <h1 id="aa" style="color:white;background:#B00000;padding:8px;border-radius:8px;font-family:newtimeroman;"> Импорт библиотек </h1> 

In [11]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import BaseEstimator
from sklearn.metrics import f1_score


## <h1 id="aa" style="color:white;background:#B00000;padding:8px;border-radius:8px;font-family:newtimeroman;"> Загрузка данных </h1> 

In [2]:
data = load_iris()
X = data.data
y = data.target
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y
df


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


## <h1 id="aa" style="color:white;background:#B00000;padding:8px;border-radius:8px;font-family:newtimeroman;"> Предварительная обработка данных</h1> 

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=1)


## <h1 id="aa" style="color:white;background:#B00000;padding:8px;border-radius:8px;font-family:newtimeroman;"> Собственная реализация случайного леса </h1> 

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import BaseEstimator

class CustomRandomForest(BaseEstimator):
    def __init__(self, n_estimators=10, max_features='sqrt', max_depth=None, random_state=1):
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.max_depth = max_depth
        self.random_state = random_state
        self.trees = []
        self.feature_indices = []
    
    def fit(self, X, y):
        np.random.seed(self.random_state)
        n_samples, n_features = X.shape
        self.trees = []
        self.feature_indices = []
        
        for _ in range(self.n_estimators):
            # Bootstrap выборка
            bootstrap_indices = np.random.choice(range(n_samples), size=n_samples, replace=True)
            X_bootstrap = X[bootstrap_indices]
            y_bootstrap = y[bootstrap_indices]
            
            # Случайный выбор признаков
            if self.max_features == 'sqrt':
                n_selected_features = int(np.sqrt(n_features))
            elif self.max_features == 'log2':
                n_selected_features = int(np.log2(n_features))
            else:
                n_selected_features = n_features
            
            feature_indices = np.random.choice(range(n_features), size=n_selected_features, replace=False)
            self.feature_indices.append(feature_indices)
            
            # Построение дерева
            tree = DecisionTreeClassifier(max_depth=self.max_depth, random_state=self.random_state)
            tree.fit(X_bootstrap[:, feature_indices], y_bootstrap)
            self.trees.append(tree)
    
    def predict(self, X):
        # Предсказания всех деревьев
        predictions = np.array([
            tree.predict(X[:, features]) for tree, features in zip(self.trees, self.feature_indices)
        ])
        # Голосование (мода)
        final_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
        return final_predictions


In [9]:
custom_rf = CustomRandomForest(n_estimators=10, max_features='sqrt', max_depth=5, random_state=1)
custom_rf.fit(X_train, y_train)

y_pred_custom = custom_rf.predict(X_test)

print("Custom Random Forest:")
print(classification_report(y_test, y_pred_custom))


Custom Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      0.92      0.96        13
           2       0.86      1.00      0.92         6

    accuracy                           0.97        30
   macro avg       0.95      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30



## <h1 id="aa" style="color:white;background:#B00000;padding:8px;border-radius:8px;font-family:newtimeroman;"> Сравнение с RandovForest от Sklern </h1> 

In [10]:
sklearn_rf = RandomForestClassifier(n_estimators=10, max_features='sqrt', max_depth=5, random_state=1)
sklearn_rf.fit(X_train, y_train)

y_pred_sklearn = sklearn_rf.predict(X_test)

print("Scikit-learn Random Forest:")
print(classification_report(y_test, y_pred_sklearn))


Scikit-learn Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      0.92      0.96        13
           2       0.86      1.00      0.92         6

    accuracy                           0.97        30
   macro avg       0.95      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30



In [12]:
f1_custom = f1_score(y_test, y_pred_custom, average='weighted')
f1_sklearn = f1_score(y_test, y_pred_sklearn, average='weighted')

f1_improvement = ((f1_sklearn - f1_custom) / f1_custom) * 100

print(f"F1 (Custom Random Forest): {f1_custom:.4f}")
print(f"F1 (Scikit-learn Random Forest): {f1_sklearn:.4f}")
print(f"Scikit-learn Random Forest лучше Custom Random Forest на {f1_improvement:.2f}% по F1-мере.")


F1 (Custom Random Forest): 0.9673
F1 (Scikit-learn Random Forest): 0.9673
Scikit-learn Random Forest лучше Custom Random Forest на 0.00% по F1-мере.
