In [21]:
from sklearn.datasets import load_digits, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, classification_report
from sklearn.model_selection import cross_val_score

In [22]:
digits = load_breast_cancer()
X, y = digits.data, digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()

In [23]:
cross_val_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='f1_macro')
print("Cross-Validation F1-Scores:", cross_val_scores)
print("Mean F1-Score:", cross_val_scores.mean())


Cross-Validation F1-Scores: [0.96456846 0.94017094 0.97678571 0.94164422 0.91623932]
Mean F1-Score: 0.9478817309740656


In [24]:
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='f1_macro', n_jobs=-1)
grid_search.fit(X_train, y_train)

In [25]:
print("Best Parameters:", grid_search.best_params_)

# Предсказание на тестовой выборке
y_pred = grid_search.predict(X_test)

# Оценка качества модели на тестовой выборке с использованием метрики F1-score
f1_test = f1_score(y_test, y_pred, average='macro')
print("Test F1-Score:", f1_test)

# Вывод отчета о классификации
print("Classification Report:\n", classification_report(y_test, y_pred))

Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 10}
Test F1-Score: 0.9623015873015873
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

