In [None]:
import numpy as np
import pandas as pd
import warnings
import model_search

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

warnings.filterwarnings(action="ignore")

In [None]:
# Загрузка данных
df = pd.read_csv('../data/real_ds_0.csv')
columns = ['U','t','L','B','Pp','D']

# Преобразование в массивы
x_data = np.array(df[columns[:-1]].values.tolist())
y_data = np.array(df[columns[-1]].values.tolist()).reshape([-1, 1])

# Разделение на обучающую и тестовую выборки
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.01, shuffle=True)

# Нормализация
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
# Классы моделей
model_classes = ['HistGradientBoostingRegressor', 'XGBRegressor', 
                'RandomForestRegressor', 'CatBoostRegressor', 'ElasticNet',
                'BayesianRidge']

# Гиперпараметрический поиск и оценка моделей
OS = model_search.OptunaSearchCV(model_classes, compare_kfold=False)
OS.fit(x_train, y_train, x_test, y_test, ['loo'], n_trials=100, n_startup_trials=50)

In [None]:
OS.results_df.to_csv('../results/real_results1.csv')

In [None]:
df = pd.read_csv('../results/real_results1.csv')
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
	display(df.sort_values(by=['R2_val'], ascending=False, na_position='last'))

In [None]:
# Оптимизация над аппроксимируемой функцией
model = OS.best_models[1]

OS.optimize([model], scaler, direction='maximize', problem='real', plot=True, n_trials=200)