In [1]:
import numpy as np
import pandas as pd
import warnings
import model_search

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, \
    HistGradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from nw_kernel import NWScikit

warnings.filterwarnings(action="ignore")

In [2]:
# Загрузка данных
df = pd.read_csv('../data/real_ds_0.csv')
columns = [*df]

# Преобразование в массивы
x_data = np.array(df[columns[:-3]].values.tolist())
y_data = np.array(df[columns[-1]].values.tolist()).reshape([-1, 1])

# Разделение на обучающую и тестовую выборки
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.1, shuffle=True)

# Нормализация
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [70]:
# Классы моделей
model_classes = [NWScikit, HistGradientBoostingRegressor, XGBRegressor, GradientBoostingRegressor,
                ExtraTreesRegressor,
                RandomForestRegressor, CatBoostRegressor]

# Гиперпараметрический поиск и оценка моделей
BO = model_search.MultiModelBayesianSearchCV(model_classes, n_iter=80, random_state=42)
BO.fit(x_train, y_train)
BO.score(x_test, y_test)
results = BO.get_results()
results.to_csv('../results/real_results.csv')
print(results.to_string(), '\n')

Оптимизация модели NWScikit
|   iter    |  target   | batch_... |  epoch_n  | n_neurons | n_layers  |    lr     |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m-2.242   [39m | [39m78       [39m | [39m448      [39m | [39m64       [39m | [39m1        [39m | [39m0.0003920[39m |
| [35m2        [39m | [35m-2.172   [39m | [35m60       [39m | [35m202      [39m | [35m171      [39m | [35m1        [39m | [35m5.8987...[39m |
| [39m3        [39m | [39m-2.257   [39m | [39m50       [39m | [39m187      [39m | [39m166      [39m | [39m2        [39m | [39m8.0004...[39m |
| [39m4        [39m | [39m-2.502   [39m | [39m42       [39m | [39m249      [39m | [39m102      [39m | [39m2        [39m | [39m0.0003637[39m |
| [39m5        [39m | [39m-2.308   [39m | [39m69       [39m | [39m393      [39m | [39m179      [39m | [39m2        [39m | [39m0.0004961[39m |
| [39m6        [39

In [73]:
# Оптимизация над аппроксимируемой функцией
BO.find_max(scaler, True)

Model NWScikit found maximum 17.678356170654297 with parameters {'Pc': 4.0, 'U': 76.71835589609606, 't': 5.0, 'L': 9.815029910520092, 'B': 17.488002735798588}
Model HistGradientBoostingRegressor found maximum 9.735505847141878 with parameters {'Pc': 4.0, 'U': 94.483207291312, 't': 5.0, 'L': 10.022212652392511, 'B': 13.302860181748363}
Model XGBRegressor found maximum 10.725809097290039 with parameters {'Pc': 4.0, 'U': 78.01262425625887, 't': 5.0, 'L': 10.733825738399213, 'B': 8.321457551896248}
Model GradientBoostingRegressor found maximum 9.522555628210625 with parameters {'Pc': 4.0, 'U': 78.42016135216693, 't': 5.0, 'L': 15.539479151025525, 'B': 8.183486049973425}
Model ExtraTreesRegressor found maximum 8.175460703442754 with parameters {'Pc': 4.0, 'U': 88.33093627395692, 't': 5.0, 'L': 8.528786974738928, 'B': 12.027860361909639}
Model RandomForestRegressor found maximum 8.849667578410893 with parameters {'Pc': 4.0, 'U': 99.88867772851398, 't': 5.0, 'L': 13.987933794766388, 'B': 8.00