In [1]:
import numpy as np
import pandas as pd
import warnings
import model_search

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, \
    HistGradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from nw_kernel import NWScikit

warnings.filterwarnings(action="ignore")

In [2]:
# Загрузка данных
df = pd.read_csv('../data/synth_ds_0.csv')
columns = [*df]

# Преобразование в массивы
x_data = np.array(df[columns[:-1]].values.tolist())
y_data = np.array(df[columns[-1]].values.tolist()).reshape([-1, 1])

# Разделение на обучающую и тестовую выборки
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, shuffle=True)

# Нормализация
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [3]:
# Классы моделей
model_classes = [NWScikit, HistGradientBoostingRegressor, XGBRegressor, GradientBoostingRegressor,
                ExtraTreesRegressor,
                RandomForestRegressor, CatBoostRegressor]

# Гиперпараметрический поиск и оценка моделей
BO = model_search.MultiModelBayesianSearchCV(model_classes, n_iter=30, random_state=42)
BO.fit(x_train, y_train)
BO.score(x_test, y_test)
results = BO.get_results()
results.to_csv('../results/synth_results.csv')
print(results.to_string(), '\n')

Оптимизация модели NWScikit
|   iter    |  target   | batch_... |  epoch_n  | n_neurons | n_layers  |    lr     |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m-1.025   [39m | [39m78       [39m | [39m448      [39m | [39m64       [39m | [39m1        [39m | [39m0.0003920[39m |
| [35m2        [39m | [35m-0.9329  [39m | [35m60       [39m | [35m202      [39m | [35m171      [39m | [35m1        [39m | [35m5.8987...[39m |
| [39m3        [39m | [39m-0.9628  [39m | [39m50       [39m | [39m187      [39m | [39m166      [39m | [39m2        [39m | [39m8.0004...[39m |
| [39m4        [39m | [39m-0.9804  [39m | [39m42       [39m | [39m249      [39m | [39m102      [39m | [39m2        [39m | [39m0.0003637[39m |
| [39m5        [39m | [39m-0.9678  [39m | [39m69       [39m | [39m393      [39m | [39m179      [39m | [39m2        [39m | [39m0.0004961[39m |
| [35m6        [39

In [4]:
# Оптимизация над аппроксимируемой функцией
BO.find_max(scaler)

Model NWScikit found maximum 4.091183662414551 with parameters {'x1': 0.29798189684864473, 'x2': 131.14552906901218, 'x3': 91.661699943136}
Model HistGradientBoostingRegressor found maximum 4.4682829444298955 with parameters {'x1': 1.7034491037373556, 'x2': 133.6556681537032, 'x3': 33.746404980293896}
Model XGBRegressor found maximum 3.0571649074554443 with parameters {'x1': 0.23226387588963782, 'x2': 80.95618812491857, 'x3': 69.85097817564485}
Model GradientBoostingRegressor found maximum 3.8962299371221487 with parameters {'x1': 0.02856496842189582, 'x2': 84.0315536253847, 'x3': 135.16631071050108}
Model ExtraTreesRegressor found maximum 3.702846406256083 with parameters {'x1': 0.08902048869827928, 'x2': 85.80832351648169, 'x3': 92.7327139103365}
Model RandomForestRegressor found maximum 3.3665748393059562 with parameters {'x1': 2.3363247490780603, 'x2': 138.99348285365954, 'x3': 34.15124084840153}
Model CatBoostRegressor found maximum 3.287480958161512 with parameters {'x1': 0.02285