In [6]:
import pandas as pd
import numpy as np

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

In [7]:
# загрузка данных
df = pd.read_csv('D:\\notebooks\\sem2\\Classic_MO\\course\\df_CC50_median.csv')

In [8]:
random_state = 17
test_size = 0.2

# модели
models = {
    'LogisticRegression': LogisticRegression(max_iter=1000),
    'DecisionTree': DecisionTreeClassifier(random_state=random_state),
    'RandomForest': RandomForestClassifier(random_state=random_state),
    'CatBoost': CatBoostClassifier(verbose=0, random_state=random_state),
    'SVC': SVC(probability=True)
}

# параметры для GridSearch
param_grids = {
    'LogisticRegression': {'C': [0.1, 1, 10]},
    'DecisionTree': {'max_depth': [3, 5, 7, 10, None]},
    'RandomForest': {'n_estimators': [50, 100], 'max_depth': [5, 10, None]},
    'CatBoost': {'depth': [4, 6, 8], 'learning_rate': [0.01, 0.1]},
    'SVC': {'C': [0.1, 1, 10], 'kernel': ['rbf', 'linear']}
}

In [9]:
features = df.drop(columns='greater_median')
target = df['greater_median']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=test_size, random_state=random_state)

In [10]:
results = []

# обучение
for name, model in models.items():
    print(f"\n{name}")
    grid = GridSearchCV(model, param_grids[name], cv=5, scoring='f1_macro', n_jobs=-1)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    
    y_pred = best_model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    print("Best Parameters:", grid.best_params_)
    print("Accuracy:", acc)
    print("F1 Score (macro):", f1)
    print("Classification Report:\n", classification_report(y_test, y_pred))

    results.append({
        'Model': name,
        'Best Params': grid.best_params_,
        'accuracy': acc,
        'f1_score': f1
    })

# сводная таблица результатов
pd.DataFrame(results)


LogisticRegression
Best Parameters: {'C': 10}
Accuracy: 0.7114427860696517
F1 Score (macro): 0.7113784907902555
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.72      0.71        97
           1       0.73      0.70      0.72       104

    accuracy                           0.71       201
   macro avg       0.71      0.71      0.71       201
weighted avg       0.71      0.71      0.71       201


DecisionTree
Best Parameters: {'max_depth': 7}
Accuracy: 0.6666666666666666
F1 Score (macro): 0.6666336609154144
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.68      0.66        97
           1       0.69      0.65      0.67       104

    accuracy                           0.67       201
   macro avg       0.67      0.67      0.67       201
weighted avg       0.67      0.67      0.67       201


RandomForest
Best Parameters: {'max_depth': None, 'n_estimators': 100}

Unnamed: 0,Model,Best Params,accuracy,f1_score
0,LogisticRegression,{'C': 10},0.711443,0.711378
1,DecisionTree,{'max_depth': 7},0.666667,0.666634
2,RandomForest,"{'max_depth': None, 'n_estimators': 100}",0.726368,0.726124
3,CatBoost,"{'depth': 4, 'learning_rate': 0.01}",0.746269,0.746244
4,SVC,"{'C': 10, 'kernel': 'rbf'}",0.716418,0.71639
