In [1]:
import numpy as np
import pandas as pd
import warnings
import model_search

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, \
    HistGradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from nw_kernel import NWScikit

warnings.filterwarnings(action="ignore")

In [5]:
# Загрузка данных
df = pd.read_csv('../data/real_ds_0.csv')
columns = [*df]

# Преобразование в массивы
x_data = np.array(df[columns[:-3]].values.tolist())
y_data = np.array(df[columns[-1]].values.tolist()).reshape([-1, 1])

# Разделение на обучающую и тестовую выборки
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, shuffle=True)

# Нормализация
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [6]:
# Классы моделей
model_classes = [NWScikit, HistGradientBoostingRegressor, XGBRegressor, GradientBoostingRegressor,
                ExtraTreesRegressor,
                RandomForestRegressor, CatBoostRegressor]

# Гиперпараметрический поиск и оценка моделей
BO = model_search.MultiModelBayesianSearchCV(model_classes, n_iter=80, random_state=42)
BO.fit(x_train, y_train)
BO.score(x_test, y_test)
results = BO.get_results()
results.to_csv('../results/real_results.csv')
print(results.to_string(), '\n')

Оптимизация модели NWScikit
|   iter    |  target   | batch_... |  epoch_n  | n_neurons | n_layers  |    lr     |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m-2.405   [39m | [39m78       [39m | [39m448      [39m | [39m64       [39m | [39m1        [39m | [39m0.0003920[39m |
| [35m2        [39m | [35m-2.287   [39m | [35m60       [39m | [35m202      [39m | [35m171      [39m | [35m1        [39m | [35m5.8987...[39m |
| [35m3        [39m | [35m-2.238   [39m | [35m50       [39m | [35m187      [39m | [35m166      [39m | [35m2        [39m | [35m8.0004...[39m |
| [39m4        [39m | [39m-2.378   [39m | [39m42       [39m | [39m249      [39m | [39m102      [39m | [39m2        [39m | [39m0.0003637[39m |
| [39m5        [39m | [39m-2.329   [39m | [39m69       [39m | [39m393      [39m | [39m179      [39m | [39m2        [39m | [39m0.0004961[39m |
| [39m6        [39

In [9]:
# Классы моделей
model_classes = [NWScikit, HistGradientBoostingRegressor, XGBRegressor, GradientBoostingRegressor,
                ExtraTreesRegressor,
                RandomForestRegressor, CatBoostRegressor]

# Гиперпараметрический поиск и оценка моделей
BO2 = model_search.MultiModelBayesianSearchCV(model_classes, n_iter=30, random_state=42)
BO2.fit(x_train, y_train)
BO2.score(x_test, y_test)
results = BO2.get_results()
results.to_csv('../results/real_results.csv')
print(results.to_string(), '\n')

Оптимизация модели NWScikit
|   iter    |  target   | batch_... |  epoch_n  | n_neurons | n_layers  |    lr     |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m-2.309   [39m | [39m78       [39m | [39m448      [39m | [39m64       [39m | [39m1        [39m | [39m0.0003920[39m |
| [35m2        [39m | [35m-2.238   [39m | [35m60       [39m | [35m202      [39m | [35m171      [39m | [35m1        [39m | [35m5.8987...[39m |
| [39m3        [39m | [39m-2.346   [39m | [39m50       [39m | [39m187      [39m | [39m166      [39m | [39m2        [39m | [39m8.0004...[39m |
| [39m4        [39m | [39m-2.286   [39m | [39m42       [39m | [39m249      [39m | [39m102      [39m | [39m2        [39m | [39m0.0003637[39m |
| [39m5        [39m | [39m-2.493   [39m | [39m69       [39m | [39m393      [39m | [39m179      [39m | [39m2        [39m | [39m0.0004961[39m |
| [39m6        [39

In [8]:
# Оптимизация над аппроксимируемой функцией
BO.find_max(scaler, True)

Model NWScikit found maximum 17.66986846923828 with parameters {'Pc': 4.0, 'U': 75.35081765518777, 't': 5.0, 'L': 9.62260765332963, 'B': 17.26623492394488}
Model HistGradientBoostingRegressor found maximum 11.089678253117198 with parameters {'Pc': 4.0, 'U': 87.78212144361584, 't': 5.0, 'L': 8.388994512182215, 'B': 14.727187557064267}
Model XGBRegressor found maximum 11.382760047912598 with parameters {'Pc': 3.0, 'U': 76.9224972823769, 't': 5.0, 'L': 10.28814633921898, 'B': 11.995484179449535}
Model GradientBoostingRegressor found maximum 10.947368112969015 with parameters {'Pc': 4.0, 'U': 90.5100815676592, 't': 5.0, 'L': 15.703487550621158, 'B': 14.480177671913005}
Model ExtraTreesRegressor found maximum 8.626948529411765 with parameters {'Pc': 4.0, 'U': 99.58203579915643, 't': 5.0, 'L': 9.102573136662125, 'B': 9.434397113828899}
Model RandomForestRegressor found maximum 10.38728175909058 with parameters {'Pc': 4.0, 'U': 90.64262985736873, 't': 5.0, 'L': 9.23241779696131, 'B': 8.804629

In [13]:
# Оптимизация над аппроксимируемой функцией
BO2.find_max(scaler, True)

Model NWScikit found maximum 17.688493728637695 with parameters {'Pc': 4.0, 'U': 65.8691118867588, 't': 5.0, 'L': 9.55662733891514, 'B': 16.280402708433364}
Model HistGradientBoostingRegressor found maximum 10.563906685065842 with parameters {'Pc': 4.0, 'U': 88.62552124092738, 't': 5.0, 'L': 15.426273107384793, 'B': 8.379813596949198}
Model XGBRegressor found maximum 9.796538352966309 with parameters {'Pc': 4.0, 'U': 82.88332746810894, 't': 5.0, 'L': 14.585554008042887, 'B': 10.14286902793306}
Model GradientBoostingRegressor found maximum 10.76404798552681 with parameters {'Pc': 4.0, 'U': 97.93367574335522, 't': 5.0, 'L': 14.921975676889168, 'B': 12.693789761296522}
Model ExtraTreesRegressor found maximum 8.771920060999616 with parameters {'Pc': 4.0, 'U': 99.93892347715811, 't': 5.0, 'L': 15.863076685150194, 'B': 15.721545164688493}
Model RandomForestRegressor found maximum 10.390077705189546 with parameters {'Pc': 4.0, 'U': 99.58005172796003, 't': 5.0, 'L': 9.582074975496258, 'B': 8.8