## Лабораторная работа №5

#### Задание 1
Провести классификацию найденного датасета, методами решающего дерева и случайного леса . В формате Markdown написать пояснения. Объяснить почему были выбраны именно такие гиперпараметры, была ли перекрестная проверка, и т.д.

#### Ход работы:

Импортируем библиотеки и загружаем датасет

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# Загрузка датасета
file_path = "../Dataset.xls"
df = pd.read_excel(file_path)

df

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,29996,220000,1,3,1,39,0,0,0,0,...,88004,31237,15980,8500,20000,5003,3047,5000,1000,0
29996,29997,150000,1,3,2,43,-1,-1,-1,-1,...,8979,5190,0,1837,3526,8998,129,0,0,0
29997,29998,30000,1,2,2,37,4,3,2,-1,...,20878,20582,19357,0,0,22000,4200,2000,3100,1
29998,29999,80000,1,3,1,41,1,-1,0,0,...,52774,11855,48944,85900,3409,1178,1926,52964,1804,1


Разделение данных и нормализация

In [2]:
# Разделение на признаки (X) и целевую переменную (y)
X = df.drop('default payment next month', axis=1)
y = df['default payment next month']

# Разделение данных на тренировочный и тестовый наборы
# random_state=42 - гарантирует, что данные каждый раз будут одинакого разбиваться
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Масштабирование признаков (нормализация)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#### Метод решающего дерева:

Гиперпараметры:
   - max_depth: Максимальная глубина дерева. Ограничивает количество уровней в дереве. Значение None позволяет узлам расширяться до тех пор, пока все листовые узлы не будут содержать минимальное количество выборок.
   - min_samples_split: Минимальное количество выборок, необходимых для разделения внутреннего узла. Если количество выборок в узле меньше этого значения, узел не будет разделяться.
   - min_samples_leaf: Минимальное количество выборок, необходимых для существования листового узла. Это определяет, сколько выборок должно быть на каждом листе.

In [3]:
param_grid_tree = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [4]:
# Создание модели решающего дерева
decision_tree = DecisionTreeClassifier()

# Поиск лучших параметров для решающего дерева
grid_search_tree = GridSearchCV(decision_tree, param_grid_tree, refit=True, verbose=3, cv=5)
grid_search_tree.fit(X_train_scaled, y_train)

# Лучшие параметры для решающего дерева
best_params_tree = grid_search_tree.best_params_
best_score_tree = grid_search_tree.best_score_

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2;, score=0.724 total time=   1.3s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2;, score=0.735 total time=   1.1s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2;, score=0.729 total time=   1.1s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2;, score=0.736 total time=   1.3s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2;, score=0.724 total time=   1.0s
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5;, score=0.736 total time=   1.4s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5;, score=0.739 total time=   1.3s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5;, score=0.734 total time=   1.2s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5;, score=0.739 total time=   1.3s
[CV 5/5] END max_dep

[CV 2/5] END max_depth=5, min_samples_leaf=4, min_samples_split=5;, score=0.824 total time=   0.2s
[CV 3/5] END max_depth=5, min_samples_leaf=4, min_samples_split=5;, score=0.820 total time=   0.2s
[CV 4/5] END max_depth=5, min_samples_leaf=4, min_samples_split=5;, score=0.818 total time=   0.2s
[CV 5/5] END max_depth=5, min_samples_leaf=4, min_samples_split=5;, score=0.817 total time=   0.2s
[CV 1/5] END max_depth=5, min_samples_leaf=4, min_samples_split=10;, score=0.821 total time=   0.2s
[CV 2/5] END max_depth=5, min_samples_leaf=4, min_samples_split=10;, score=0.824 total time=   0.2s
[CV 3/5] END max_depth=5, min_samples_leaf=4, min_samples_split=10;, score=0.820 total time=   0.3s
[CV 4/5] END max_depth=5, min_samples_leaf=4, min_samples_split=10;, score=0.818 total time=   0.3s
[CV 5/5] END max_depth=5, min_samples_leaf=4, min_samples_split=10;, score=0.817 total time=   0.2s
[CV 1/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2;, score=0.806 total time=   0.5s
[CV 

[CV 4/5] END max_depth=15, min_samples_leaf=2, min_samples_split=10;, score=0.779 total time=   0.9s
[CV 5/5] END max_depth=15, min_samples_leaf=2, min_samples_split=10;, score=0.778 total time=   0.7s
[CV 1/5] END max_depth=15, min_samples_leaf=4, min_samples_split=2;, score=0.778 total time=   0.7s
[CV 2/5] END max_depth=15, min_samples_leaf=4, min_samples_split=2;, score=0.783 total time=   0.8s
[CV 3/5] END max_depth=15, min_samples_leaf=4, min_samples_split=2;, score=0.782 total time=   0.8s
[CV 4/5] END max_depth=15, min_samples_leaf=4, min_samples_split=2;, score=0.778 total time=   0.7s
[CV 5/5] END max_depth=15, min_samples_leaf=4, min_samples_split=2;, score=0.775 total time=   0.7s
[CV 1/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5;, score=0.782 total time=   0.7s
[CV 2/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5;, score=0.781 total time=   0.8s
[CV 3/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5;, score=0.782 total time=   0.7

Обучение решающего дерева:

In [6]:
best_tree = DecisionTreeClassifier(**best_params_tree)
best_tree.fit(X_train_scaled, y_train)

Результаты:

In [9]:
accuracy_tree = best_tree.score(X_test_scaled, y_test)
print("Лучшие параметры для решающего дерева:", best_params_tree)
print("Точность решающего дерева на тестовом наборе:", accuracy_tree)

predictions = best_tree.predict(X_test_scaled)

# Отчет по классификации
print("\nОтчет по классификации:")
print(classification_report(y_test, predictions))

Лучшие параметры для решающего дерева: {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2}
Точность решающего дерева на тестовом наборе: 0.8171666666666667

Отчет по классификации:
              precision    recall  f1-score   support

           0       0.84      0.95      0.89      4687
           1       0.66      0.35      0.45      1313

    accuracy                           0.82      6000
   macro avg       0.75      0.65      0.67      6000
weighted avg       0.80      0.82      0.79      6000



#### Метод случаного леса:

Гиперпараметры:
   - n_estimators: Количество деревьев в лесу. Большее количество деревьев может улучшить производительность, но с большими затратами на вычислительные ресурсы.
   - max_depth: Максимальная глубина каждого дерева в лесу. Это ограничивает глубину каждого дерева в лесу, что помогает управлять переобучением.
   - min_samples_split: Минимальное количество выборок, необходимых для разделения внутреннего узла дерева. Определяет, сколько выборок должно быть в узле, чтобы он был разделен.
   - min_samples_leaf: Минимальное количество выборок, необходимых для существования листового узла. Это определяет, сколько выборок должно быть на каждом листе дерева.

In [10]:
# Определение параметров для случайного леса
# Указал маленькие значения для ускорения обучения
param_grid_forest = {
    'n_estimators': range(2, 10),
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2, 4]
}

In [11]:
# Создание модели случайного леса
random_forest = RandomForestClassifier()

# Поиск лучших параметров для случайного леса
grid_search_forest = GridSearchCV(random_forest, param_grid_forest, refit=True, verbose=3, cv=5)
grid_search_forest.fit(X_train_scaled, y_train)

# Лучшие параметры для случайного леса
best_params_forest = grid_search_forest.best_params_
best_score_forest = grid_search_forest.best_score_

Fitting 5 folds for each of 192 candidates, totalling 960 fits
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=2;, score=0.789 total time=   0.2s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=2;, score=0.789 total time=   0.3s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=2;, score=0.786 total time=   0.2s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=2;, score=0.776 total time=   0.3s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=2;, score=0.790 total time=   0.2s
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=3;, score=0.774 total time=   0.4s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=3;, score=0.774 total time=   0.5s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=3;, score=0.769 total time= 

[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=7;, score=0.797 total time=   1.0s
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=8;, score=0.802 total time=   1.2s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=8;, score=0.810 total time=   1.2s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=8;, score=0.805 total time=   1.0s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=8;, score=0.804 total time=   1.2s
[CV 5/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=8;, score=0.800 total time=   1.1s
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=9;, score=0.802 total time=   1.3s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=9;, score=0.816 total time=   1.3s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_sam

[CV 5/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=5;, score=0.794 total time=   0.7s
[CV 1/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=6;, score=0.802 total time=   0.8s
[CV 2/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=6;, score=0.803 total time=   0.9s
[CV 3/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=6;, score=0.804 total time=   0.8s
[CV 4/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=6;, score=0.805 total time=   0.7s
[CV 5/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=6;, score=0.803 total time=   0.7s
[CV 1/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=7;, score=0.799 total time=   1.0s
[CV 2/5] END max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=7;, score=0.810 total time=   0.9s
[CV 3/5] END max_depth=None, min_samples_leaf=2, min_sam

[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=3;, score=0.797 total time=   0.4s
[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=4;, score=0.795 total time=   0.5s
[CV 2/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=4;, score=0.812 total time=   0.5s
[CV 3/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=4;, score=0.803 total time=   0.4s
[CV 4/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=4;, score=0.800 total time=   0.5s
[CV 5/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=4;, score=0.795 total time=   0.5s
[CV 1/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=5;, score=0.804 total time=   0.6s
[CV 2/5] END max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=5;, score=0.809 total time=   0.6s
[CV 3/5] END max_depth=None, min_samples_leaf=4, min_sam

[CV 2/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=2;, score=0.820 total time=   0.0s
[CV 3/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=2;, score=0.814 total time=   0.0s
[CV 4/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=2;, score=0.802 total time=   0.0s
[CV 5/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=2;, score=0.809 total time=   0.0s
[CV 1/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=3;, score=0.818 total time=   0.0s
[CV 2/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=3;, score=0.817 total time=   0.0s
[CV 3/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=3;, score=0.812 total time=   0.0s
[CV 4/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=3;, score=0.819 total time=   0.1s
[CV 5/5] END max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=

[CV 4/5] END max_depth=5, min_samples_leaf=2, min_samples_split=2, n_estimators=8;, score=0.819 total time=   0.3s
[CV 5/5] END max_depth=5, min_samples_leaf=2, min_samples_split=2, n_estimators=8;, score=0.821 total time=   0.3s
[CV 1/5] END max_depth=5, min_samples_leaf=2, min_samples_split=2, n_estimators=9;, score=0.819 total time=   0.3s
[CV 2/5] END max_depth=5, min_samples_leaf=2, min_samples_split=2, n_estimators=9;, score=0.824 total time=   0.4s
[CV 3/5] END max_depth=5, min_samples_leaf=2, min_samples_split=2, n_estimators=9;, score=0.811 total time=   0.3s
[CV 4/5] END max_depth=5, min_samples_leaf=2, min_samples_split=2, n_estimators=9;, score=0.815 total time=   0.3s
[CV 5/5] END max_depth=5, min_samples_leaf=2, min_samples_split=2, n_estimators=9;, score=0.811 total time=   0.3s
[CV 1/5] END max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=2;, score=0.801 total time=   0.0s
[CV 2/5] END max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=

[CV 1/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=7;, score=0.810 total time=   0.2s
[CV 2/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=7;, score=0.824 total time=   0.3s
[CV 3/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=7;, score=0.820 total time=   0.2s
[CV 4/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=7;, score=0.819 total time=   0.3s
[CV 5/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=7;, score=0.815 total time=   0.2s
[CV 1/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=8;, score=0.809 total time=   0.2s
[CV 2/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=8;, score=0.818 total time=   0.1s
[CV 3/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=8;, score=0.816 total time=   0.2s
[CV 4/5] END max_depth=5, min_samples_leaf=4, min_samples_split=2, n_estimators=

[CV 3/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=5;, score=0.809 total time=   0.3s
[CV 4/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=5;, score=0.814 total time=   0.4s
[CV 5/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=5;, score=0.811 total time=   0.3s
[CV 1/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=6;, score=0.811 total time=   0.4s
[CV 2/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=6;, score=0.814 total time=   0.4s
[CV 3/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=6;, score=0.815 total time=   0.4s
[CV 4/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=6;, score=0.809 total time=   0.5s
[CV 5/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=6;, score=0.809 total time=   0.4s
[CV 1/5] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_es

[CV 4/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=3;, score=0.807 total time=   0.1s
[CV 5/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=3;, score=0.810 total time=   0.2s
[CV 1/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=4;, score=0.808 total time=   0.2s
[CV 2/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=4;, score=0.816 total time=   0.3s
[CV 3/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=4;, score=0.810 total time=   0.3s
[CV 4/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=4;, score=0.809 total time=   0.2s
[CV 5/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=4;, score=0.800 total time=   0.3s
[CV 1/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=5;, score=0.812 total time=   0.3s
[CV 2/5] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_es

[CV 5/5] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=9;, score=0.815 total time=   0.6s
[CV 1/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_estimators=2;, score=0.806 total time=   0.1s
[CV 2/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_estimators=2;, score=0.809 total time=   0.1s
[CV 3/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_estimators=2;, score=0.798 total time=   0.1s
[CV 4/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_estimators=2;, score=0.804 total time=   0.1s
[CV 5/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_estimators=2;, score=0.802 total time=   0.1s
[CV 1/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_estimators=3;, score=0.809 total time=   0.1s
[CV 2/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_estimators=3;, score=0.817 total time=   0.2s
[CV 3/5] END max_depth=10, min_samples_leaf=4, min_samples_split=2, n_es

[CV 1/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=8;, score=0.815 total time=   0.6s
[CV 2/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=8;, score=0.820 total time=   0.6s
[CV 3/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=8;, score=0.809 total time=   0.6s
[CV 4/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=8;, score=0.816 total time=   0.6s
[CV 5/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=8;, score=0.813 total time=   0.6s
[CV 1/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=9;, score=0.818 total time=   0.7s
[CV 2/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=9;, score=0.826 total time=   0.6s
[CV 3/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_estimators=9;, score=0.818 total time=   0.6s
[CV 4/5] END max_depth=10, min_samples_leaf=4, min_samples_split=5, n_es

[CV 2/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=6;, score=0.810 total time=   0.5s
[CV 3/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=6;, score=0.809 total time=   0.6s
[CV 4/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=6;, score=0.807 total time=   0.6s
[CV 5/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=6;, score=0.807 total time=   0.6s
[CV 1/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=7;, score=0.804 total time=   0.8s
[CV 2/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=7;, score=0.813 total time=   0.7s
[CV 3/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=7;, score=0.812 total time=   0.7s
[CV 4/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=7;, score=0.806 total time=   0.7s
[CV 5/5] END max_depth=15, min_samples_leaf=1, min_samples_split=5, n_es

[CV 3/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=4;, score=0.802 total time=   0.3s
[CV 4/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=4;, score=0.804 total time=   0.3s
[CV 5/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=4;, score=0.799 total time=   0.4s
[CV 1/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=5;, score=0.810 total time=   0.5s
[CV 2/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=5;, score=0.811 total time=   0.5s
[CV 3/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=5;, score=0.806 total time=   0.5s
[CV 4/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=5;, score=0.807 total time=   0.5s
[CV 5/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=5;, score=0.803 total time=   0.5s
[CV 1/5] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_es

[CV 4/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=2;, score=0.801 total time=   0.2s
[CV 5/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=2;, score=0.800 total time=   0.1s
[CV 1/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=3;, score=0.799 total time=   0.2s
[CV 2/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=3;, score=0.807 total time=   0.3s
[CV 3/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=3;, score=0.803 total time=   0.2s
[CV 4/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=3;, score=0.803 total time=   0.3s
[CV 5/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=3;, score=0.802 total time=   0.2s
[CV 1/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_estimators=4;, score=0.806 total time=   0.3s
[CV 2/5] END max_depth=15, min_samples_leaf=4, min_samples_split=5, n_es

Обучение случайного леса:

In [12]:
best_forest = RandomForestClassifier(**best_params_forest)
best_forest.fit(X_train_scaled, y_train)

Результаты:

In [14]:
accuracy_forest = best_forest.score(X_test_scaled, y_test)
print("Лучшие параметры для случайного леса:", best_params_forest)
print("Точность случайного леса на тестовом наборе:", accuracy_forest)

predictions = best_forest.predict(X_test_scaled)

# Отчет по классификации
print("\nОтчет по классификации:")
print(classification_report(y_test, predictions))

Лучшие параметры для случайного леса: {'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 8}
Точность случайного леса на тестовом наборе: 0.8171666666666667

Отчет по классификации:
              precision    recall  f1-score   support

           0       0.84      0.95      0.89      4687
           1       0.66      0.34      0.45      1313

    accuracy                           0.82      6000
   macro avg       0.75      0.64      0.67      6000
weighted avg       0.80      0.82      0.79      6000

