In [14]:
import pandas as pd
import numpy as np

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

In [15]:
# загрузка данных
df = pd.read_csv('D:\\notebooks\\sem2\\Classic_MO\\course\\df_IC50_median.csv')

In [16]:
random_state = 17
test_size = 0.2

# модели
models = {
    'LogisticRegression': LogisticRegression(max_iter=1000),
    'DecisionTree': DecisionTreeClassifier(random_state=random_state),
    'RandomForest': RandomForestClassifier(random_state=random_state),
    'CatBoost': CatBoostClassifier(verbose=0, random_state=random_state),
    'SVC': SVC(probability=True)
}

# параметры для GridSearch
param_grids = {
    'LogisticRegression': {'C': [0.1, 1, 10]},
    'DecisionTree': {'max_depth': [3, 5, 7, 10, None]},
    'RandomForest': {'n_estimators': [50, 100], 'max_depth': [5, 10, None]},
    'CatBoost': {'depth': [4, 6, 8], 'learning_rate': [0.01, 0.1]},
    'SVC': {'C': [0.1, 1, 10], 'kernel': ['rbf', 'linear']}
}

In [17]:
features = df.drop(columns='greater_median')
target = df['greater_median']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=test_size, random_state=random_state)

In [18]:
results = []

# обучение
for name, model in models.items():
    print(f"\n{name}")
    grid = GridSearchCV(model, param_grids[name], cv=5, scoring='f1_macro', n_jobs=-1)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    
    y_pred = best_model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    print("Best Parameters:", grid.best_params_)
    print("Accuracy:", acc)
    print("F1 Score (macro):", f1)
    print("Classification Report:\n", classification_report(y_test, y_pred))

    results.append({
        'Model': name,
        'Best Params': grid.best_params_,
        'accuracy': acc,
        'f1_score': f1
    })

# сводная таблица результатов
pd.DataFrame(results)


LogisticRegression
Best Parameters: {'C': 10}
Accuracy: 0.6467661691542289
F1 Score (macro): 0.6467311929103646
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.69      0.65        95
           1       0.69      0.60      0.64       106

    accuracy                           0.65       201
   macro avg       0.65      0.65      0.65       201
weighted avg       0.65      0.65      0.65       201


DecisionTree
Best Parameters: {'max_depth': None}
Accuracy: 0.582089552238806
F1 Score (macro): 0.5819964349376114
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.63      0.59        95
           1       0.62      0.54      0.58       106

    accuracy                           0.58       201
   macro avg       0.59      0.58      0.58       201
weighted avg       0.59      0.58      0.58       201


RandomForest
Best Parameters: {'max_depth': None, 'n_estimators': 10

Unnamed: 0,Model,Best Params,accuracy,f1_score
0,LogisticRegression,{'C': 10},0.646766,0.646731
1,DecisionTree,{'max_depth': None},0.58209,0.581996
2,RandomForest,"{'max_depth': None, 'n_estimators': 100}",0.626866,0.626829
3,CatBoost,"{'depth': 4, 'learning_rate': 0.01}",0.686567,0.685446
4,SVC,"{'C': 10, 'kernel': 'rbf'}",0.681592,0.680636
