In [12]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from catboost import CatBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.decomposition import PCA
from umap.umap_ import UMAP
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.patches as mpatches

In [13]:
# загрузка данных
df = pd.read_csv('D:\\notebooks\\sem2\\Classic_MO\\course\\df_SI.csv')

In [14]:
random_state = 17
test_size = 0.2

# модели
models = {
    'LinearRegression': LinearRegression(),
    'DecisionTree': DecisionTreeRegressor(random_state=random_state),
    'RandomForest': RandomForestRegressor(random_state=random_state, n_estimators=100),
    'CatBoost': CatBoostRegressor(verbose=0, random_state=random_state),
    'SVR': SVR()
}

# параметры для GridSearch
param_grids = {
    'LinearRegression': {},
    'DecisionTree': {'max_depth': [3, 5, 7, 10, None]},
    'RandomForest': {'n_estimators': [50, 100], 'max_depth': [5, 10, None]},
    'CatBoost': {'depth': [4, 6, 8], 'learning_rate': [0.01, 0.1]},
    'SVR': {'C': [0.1, 1, 10], 'kernel': ['rbf', 'linear']}
}

In [15]:
features = df.drop(columns='SI')
target = df['SI']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=test_size, random_state=random_state)

In [16]:
results = []

# обучение
for name, model in models.items():
    grid = GridSearchCV(model, param_grids[name], cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid.fit(X_train, y_train)
    
    best_model = grid.best_estimator_
    y_pred = best_model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results.append({
        'Model': name,
        'Best Params': grid.best_params_,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    })

# сводная таблица результатов
pd.DataFrame(results)

Unnamed: 0,Model,Best Params,MAE,RMSE,R2
0,LinearRegression,{},6.205717,145.034423,-0.728224
1,DecisionTree,{'max_depth': None},1.797453,15.149786,0.819476
2,RandomForest,"{'max_depth': 10, 'n_estimators': 100}",1.248469,7.97352,0.904988
3,CatBoost,"{'depth': 4, 'learning_rate': 0.1}",1.000802,4.232411,0.949567
4,SVR,"{'C': 10, 'kernel': 'linear'}",4.965624,68.694177,0.181443
