# Оптимизация

In [1]:
import numpy as np
import pandas as pd
import torch

from scipy.optimize import minimize, basinhopping, shgo
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error as mse 
from sklearn.metrics import r2_score

from nw_kernel import NWScikit

from catboost import CatBoostRegressor 

In [2]:
df = pd.read_csv('data/real_ds_0.csv')
df.head()

Unnamed: 0,Pc,U,t,L,B,Sc,Pp,D
0,2,100,2,21.2,14.0,296.8,1.9,2.17
1,2,100,2,16.0,12.7,203.2,1.3,5.08
2,2,100,2,11.0,16.6,182.6,1.17,3.24
3,3,100,2,13.9,16.3,226.6,2.18,6.06
4,3,100,2,18.2,14.8,269.4,2.59,1.99


In [4]:
# Преобразование в массивы
x_data = np.array(df[['Pc','U','t','L','B','Sc','Pp']].values.tolist())
y_data = np.array(df['D'].values.tolist()).reshape([-1,1])
x_one_column = x_data.reshape([1, -1]) # Для нормализации преобразование в одномерный

# Нормализация
scaler = StandardScaler()
x_norm = scaler.fit_transform(x_data)

In [None]:
# Разделение на обучающую и тестовую выборки
x_train, x_test, y_train, y_test = train_test_split(x_norm, y_data, test_size=0.2, shuffle=True)

In [6]:
# Преобразование в тензоры
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [None]:
# Поиск оптимальных параметров
model = NWScikit(
        dist_mode='nam',
        batch_size=24,
        x_val=x_train_tensor, y_val=y_train_tensor,
        kernel_fit_background=True,
        optimizer='Adam',
        lr=1e-3,
        weight_decay=0,
        background_lr=1e-3,
        background_weight_decay=0,
        epoch_n=100,
        pred_batch_size=16,
        verbose=False,
        verbose_tqdm=True,
        n_neurons=8,
        n_layers=3,
        batch_norm=True,
        problem_mode='reg'
    )

param_grid = {'batch_size': [16, 24, 50, 100, 200],
              'dist_mode': ['nam', 'mlp'],
			  'epoch_n': [50, 75, 100, 200],
			  'n_neurons': [8, 16, 32, 64, 128, 256, 512],
			  'n_layers': [1, 2, 3, 4],
			  'optimizer': ['SGD', 'Adam']}

#param_grid = {'batch_size': [16, 24, 50, 100, 200],
#              'dist_mode': ['nam', 'mlp'],}

search = GridSearchCV(model, param_grid, scoring='r2', n_jobs=-1)
search.fit(X=pd.DataFrame(x_train_tensor), y=y_train_tensor.detach().cpu().numpy())
results_df = pd.DataFrame(search.cv_results_)

print("Best Parameters: ", search.best_params_)
print("Best Cross-validation Score: ", search.best_score_)

In [7]:
boost_model = 	CatBoostRegressor(
				loss_function='RMSE',
				depth=4,
				iterations=500,
				rsm=1,
				leaf_estimation_iterations=20,
				random_seed=42
				)

boost_model.fit(x_train, y_train, verbose=0)

y_train_pred = boost_model.predict(x_train) 
y_test_pred = boost_model.predict(x_test) 

print(y_train[0], y_train_pred[0])
print("Training R2: ", r2_score(y_train, y_train_pred))
print((y_test[0], y_test_pred[0]))
print("Testing R2: ", r2_score(y_test, y_test_pred))

[10.7] 10.597987467470697
Training R2:  0.9924763734259026
(array([2.98]), 5.798370973998657)
Testing R2:  0.0805650476040165


In [None]:
boost_param_grid = {'iterations': [100, 250, 500, 1000],
          			'depth': [2, 4, 8],
          			'logging_level':['Silent'],
					'rsm': [0.8, 1.0],
					'leaf_estimation_iterations': [10, 20, 40],
          			'random_seed': [42]}

boost_search = GridSearchCV(boost_model, boost_param_grid, scoring='r2', n_jobs=-1)
boost_search.fit(X=pd.DataFrame(x_train_tensor), y=y_train_tensor.detach().cpu().numpy())

print("Best Parameters: ", boost_search.best_params_)
print("Best Cross-validation Score: ", boost_search.best_score_)

Best Parameters:  {'depth': 4, 'iterations': 100, 'leaf_estimation_iterations': 20, 'logging_level': 'Silent', 'random_seed': 42, 'rsm': 0.8}
Best Cross-validation Score:  -9.374696796570978


In [None]:
# MSE
pd.concat([pd.DataFrame(search.cv_results_["params"]),pd.DataFrame(search.cv_results_["mean_test_score"], columns=["Accuracy"])],axis=1).sort_values('Accuracy', ascending=False)

In [None]:
# Оптимизация

def func(x):
	return

def objective(params):
	x = np.array(params).reshape(1, -1)
	y_pred = boost_model.predict(x)[0]
	return -y_pred

initial_guess = np.mean(x_norm, axis=0)
bounds = [(0, 100),(0, 100),(0, 100)]
#bounds = [(None, None), ]*3

#result = shgo(objective, bounds, n=1000)
result = basinhopping(objective, initial_guess, niter=10000)
#result = minimize(objective, initial_guess, args=(boost_model), method='Newton-CG')

optimal_x = result.x
max_y = result.fun  # Максимальное значение y

print("Оптимальные параметры x:", optimal_x.reshape(1, -1))
print("Максимальное значение y:", -max_y)

#print("Оптимальные параметры x:", scaler.inverse_transform(optimal_x.reshape(1, -1)))
#print("Максимальное значение y:", max_y)