# Оптимизация

In [None]:
import numpy as np
import pandas as pd
import torch

from scipy.optimize import minimize, basinhopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error as mse 
from sklearn.metrics import r2_score

from nw_kernel import NWScikit

from catboost import CatBoostRegressor 

In [10]:
df = pd.read_csv('data/synth_ds_0.csv')
df.head()

Unnamed: 0,x1,x2,x3,y
0,56.567039,51.645444,63.818649,0.791216
1,9.811206,73.933886,99.861635,0.432769
2,95.315984,39.907571,5.629303,1.145016
3,52.790528,98.471771,2.766417,2.084271
4,19.59673,80.399587,3.699458,-1.098826


In [11]:
# Преобразование в массивы
x_data = np.array(df[['x1','x2','x3']].values.tolist())
y_data = np.array(df['y'].values.tolist()).reshape([-1,1])
#x_one_column = x_data.reshape([1, -1]) # Для нормализации преобразование в одномерный

# Нормализация
scaler = StandardScaler()
x_norm = scaler.fit_transform(x_data)

In [12]:
# Разделение на обучающую и тестовую выборки
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, shuffle=True)

In [13]:
# Преобразование в тензоры
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [None]:
# Поиск оптимальных параметров
model = NWScikit(
        dist_mode='nam',
        batch_size=24,
        x_val=x_train_tensor, y_val=y_train_tensor,
        kernel_fit_background=True,
        optimizer='Adam',
        lr=1e-3,
        weight_decay=0,
        background_lr=1e-3,
        background_weight_decay=0,
        epoch_n=100,
        pred_batch_size=16,
        verbose=False,
        verbose_tqdm=True,
        n_neurons=8,
        n_layers=3,
        batch_norm=True,
        problem_mode='reg'
    )

param_grid = {'batch_size': [16, 24, 50, 100, 200],
              'dist_mode': ['nam', 'mlp'],
			  'epoch_n': [50, 75, 100, 200],
			  'n_neurons': [8, 16, 32, 64, 128, 256, 512],
			  'n_layers': [1, 2, 3, 4],
			  'optimizer': ['SGD', 'Adam']}

#param_grid = {'batch_size': [16, 24, 50, 100, 200],
#              'dist_mode': ['nam', 'mlp'],}

search = GridSearchCV(model, param_grid, scoring='r2', n_jobs=-1)
search.fit(X=pd.DataFrame(x_train_tensor), y=y_train_tensor.detach().cpu().numpy())
results_df = pd.DataFrame(search.cv_results_)

print("Best Parameters: ", search.best_params_)
print("Best Cross-validation Score: ", search.best_score_)

In [14]:
boost_model = 	CatBoostRegressor(
				loss_function='RMSE',
				depth=4,
				iterations=500,
				rsm=1,
				leaf_estimation_iterations=20,
				random_seed=42
				)

boost_model.fit(x_train, y_train, verbose=0)

y_train_pred = boost_model.predict(x_train) 
y_test_pred = boost_model.predict(x_test) 

print(y_train[0], y_train_pred[0])
print("Training R2: ", r2_score(y_train, y_train_pred))
print((y_test[0], y_test_pred[0]))
print("Testing R2: ", r2_score(y_test, y_test_pred))

[-1.46036073] -1.3653970308958652
Training R2:  0.9915251902464296
(array([0.45741923]), 0.2895841713468371)
Testing R2:  0.7232323907530345


In [None]:
#boost_param_grid = {'iterations': [500],
#          			'depth': [2, 4, 8],
#          			'logging_level':['Silent'],
#					'rsm': [0.8, 1.0],
#          			'random_seed': [42]}

#boost_search = GridSearchCV(boost_model, boost_param_grid, scoring='r2', n_jobs=-1)
#boost_search.fit(X=pd.DataFrame(x_train_tensor), y=y_train_tensor.detach().cpu().numpy())

#print("Best Parameters: ", boost_search.best_params_)
#print("Best Cross-validation Score: ", boost_search.best_score_)

In [None]:
# MSE
pd.concat([pd.DataFrame(search.cv_results_["params"]),pd.DataFrame(search.cv_results_["mean_test_score"], columns=["Accuracy"])],axis=1).sort_values('Accuracy', ascending=False)

In [None]:
# Оптимизация

def func(x):
	return

def objective(params, model):
	x = np.array(params).reshape(1, -1)
	y_pred = model.predict(x)[0]
	return y_pred

initial_guess = np.mean(x_norm, axis=0)
result = basinhopping
result = minimize(objective, initial_guess, args=(boost_model), method='Newton-CG')

optimal_x = result.x
max_y = result.fun  # Максимальное значение y

print(min(x_data[0]), min(x_data[1]), min(x_data[2]), min(y_data))
print(max(x_data[0]), max(x_data[1]), max(x_data[2]), max(y_data))

print("Оптимальные параметры x:", optimal_x.reshape(1, -1))
print("Максимальное значение y:", max_y)

#print("Оптимальные параметры x:", scaler.inverse_transform(optimal_x.reshape(1, -1)))
#print("Максимальное значение y:", max_y)

ValueError: Jacobian is required for Newton-CG method