In [1]:
import numpy as np
import pandas as pd
import model_search

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Множество синусов
df = pd.read_csv('../data/synth_ds_0.csv')
full_df = pd.read_csv('../data/f_opt/ds0/synth_full_ds_0.csv').sort_values('x1')
columns = [*df]

# Преобразование в массивы
x_train = np.array(df[columns[:-1]].values.tolist())
x_full = np.array(full_df[columns[:-1]].values.tolist())
y_train = np.array(df[columns[-1]].values.tolist()).reshape([-1, 1])
y_full = np.array(full_df[columns[-1]].values.tolist()).reshape([-1, 1])

# Нормализация
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_full_scaled = scaler.transform(x_full)

In [2]:
# Бранин
full_df = pd.read_csv('../data/f_opt/branin/branin_ds_0.csv')
columns = [*full_df]

# Преобразование в массивы
x_full = np.array(full_df[columns[1:-1]].values.tolist())
y_full = np.array(full_df[columns[-1]].values.tolist()).reshape([-1, 1])

_, x_train, _, y_train = train_test_split(x_full, y_full, test_size=0.05, shuffle=True, random_state=39)

# Нормализация
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)

In [4]:
# Розенброк
full_df = pd.read_csv('../data/f_opt/branin/branin_ds_0.csv')
columns = [*full_df]

# Преобразование в массивы
x_full = np.array(full_df[columns[1:-1]].values.tolist())
y_full = np.array(full_df[columns[-1]].values.tolist()).reshape([-1, 1])

_, x_train, _, y_train = train_test_split(x_full, y_full, test_size=0.05, shuffle=True, random_state=39)

# Нормализация
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)

In [7]:
# Классы моделей
model_classes = ['HistGradientBoostingRegressor', 'XGBRegressor', 'CatBoostRegressor', 
                'RandomForestRegressor', 'ElasticNet', 'BayesianRidge']

# Гиперпараметрический поиск и оценка моделей
OS = model_search.OptunaSearchCV(model_classes, compare_kfold=False)
OS.fit(x_train_scaled, y_train, n_trials=200, n_startup_trials=50)


HistGradientBoostingRegressor hyperoptimization


Best trial: 20. Best value: 0.64214:  32%|███▏      | 63/200 [00:15<00:32,  4.16it/s] 

[W 2025-06-12 04:12:21,706] Trial 63 failed with parameters: {'learning_rate': 0.007436908812164796, 'max_depth': 9, 'min_samples_leaf': 7, 'max_features': 0.5580847500269281, 'l2_regularization': 0.5865851678188605, 'n_iter_no_change': 29} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\kiril\AppData\Local\Programs\Python\Python312\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "c:\Users\kiril\PycharmProjects\SM\src\model_search.py", line 126, in __call__
    y_pred = cross_val_predict(regressor_obj, self.X, self.y, cv=self.cv, verbose=0, n_jobs=-1)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\kiril\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\utils\_param_validation.py", line 216, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^




KeyboardInterrupt: 

In [None]:
# Записать результаты в csv
OS.results_df.to_csv('../results/synth_results1.csv')

In [None]:
# Построить график тестовой оценки R2 для лучших моделей
model_names = [f'{type(model).__name__}'for model in OS.best_models]
model_names[0] = 'ANN'
test_r2_scores = OS.best_models_r_test
val_r2_scores = OS.best_models_r_val

plt.figure(figsize=(15, 9))

plt.grid()
tbars = plt.bar(model_names, test_r2_scores, color='skyblue')
plt.title('Сравнение моделей по test R²')
plt.xlabel('Модели')
plt.ylabel('Test R² Score')
plt.legend()
plt.xticks(rotation=45, ha='right')
for tbar, tscore in zip(tbars, test_r2_scores):
    plt.text(tbar.get_x() + tbar.get_width() / 2, tbar.get_height(), 
             f'{tscore:.2f}', ha='center', va='bottom')
plt.tight_layout()

In [None]:
# Построить график валидационной оценки R2 для лучших моделей
plt.figure(figsize=(15, 9))

plt.grid()
bars = plt.bar(model_names, val_r2_scores, color='skyblue')
plt.title('Сравнение моделей по val R²')
plt.xlabel('Модели')
plt.ylabel('Val R² Score')
plt.xticks(rotation=45, ha='right')
for bar, score in zip(bars, val_r2_scores):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), 
             f'{score:.2f}', ha='center', va='bottom')
plt.tight_layout()

In [None]:
# Выбрать модель для симуляции
scores = OS.best_models_r_val

r2_max = -99

for i, test_r2_score in enumerate(scores):
    if r2_max < test_r2_score:
        r2_max = test_r2_score
        ii = i

model_to_simulate = OS.best_models[ii]
print(type(model_to_simulate).__name__)
print(r2_max)

In [None]:
# Ручной выбор модели
model_to_simulate = OS.best_models[2]

In [None]:
# Отобразить все результаты
df = pd.read_csv('../results/synth_results.csv')
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
	display(df.sort_values(by=['R2_val'], ascending=False))

In [None]:
# Оптимизация над аппроксимируемой функцией
OS.optimize([OS.best_models[1]], scaler, direction='minimize', problem='synth', plot=True, verbose=True)

In [None]:
# Симуляция эксперимента
def func(x):
	return np.sin(x.T[0])+np.sin(x.T[0]/2)+np.sin(x.T[0]/4)+np.cos(x.T[0]/8)+np.cos(x.T[0]/16)

min_y_row = full_df.loc[full_df['y'].idxmin()]
x_min = float(min_y_row[['x1']])
y_min = min_y_row['y']
tol = 0.5
n_trials = 1000
n_startup_trials = 10
max_iter = 20

accuracy_history, min_history, x_history, y_history,y_true_history = OS.simulate_experiment(func, model_to_simulate, scaler, x_train_scaled, y_train, x_min, y_min, tol, direction='minimize', n_trials=n_trials, n_startup_trials=n_startup_trials, max_iter=max_iter)

In [None]:
print(x_min)

In [None]:
x_np = np.array(x_history)[:,0]
print(x_np)

In [None]:
for i, yi in enumerate(y_history):
	print(f'{i} {yi}')

In [None]:
# Построить график вектора x от номера шага
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(x_np) + 1), x_np, marker='o', linestyle='-')

plt.xlabel('Номер шага')
plt.ylabel('x')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Построить график |y_pred-y_true| от номера шага

plt.figure(figsize=(10, 5))
plt.plot(range(1, len(accuracy_history) + 1), accuracy_history, marker='o', linestyle='-')

plt.xlabel('Номер шага')
plt.ylabel('|y_pred-y_true|')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Построить график |y_pred-global_min(y_true)| от номера шага
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(min_history) + 1), min_history, marker='o', linestyle='-')

plt.xlabel('Номер шага')
plt.ylabel('|y_pred-global_min|')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Создать анимацию истории выбора точек
from copy import deepcopy
from matplotlib.animation import FuncAnimation

_x_train = deepcopy(x_train)
_y_train = deepcopy(y_train)

frames = []
frames.append((x_train, y_train))
l = len(x_train)

i=0
for x_new, y_new in zip(x_history, y_true_history):
    i+=1
    _x_train = np.vstack([_x_train, x_new])
    _y_train = np.append(_y_train, y_new)

    frames.append((_x_train, _y_train))

fig, ax = plt.subplots()
line, = ax.plot([], [], 'b-')
plt.scatter(x_train.T[0], y_train, color='darkorange')
scatter_new = ax.scatter([], [], c='green', s=50)
ax.plot(x_full.T[0], y_full, c='skyblue')

def update(frame):
    global line
    X_tr, y_tr = frame
    npc = np.c_[X_tr[l:,0], y_tr[l:]]
    line = line
    scatter_new.set_offsets(npc)
    ax.set_xlim(0, 100)
    ax.set_ylim(-4, 5)
    return line, scatter_new

ani = FuncAnimation(fig, update, frames=frames, interval=500, blit=True)
ani.save("animated.gif", writer='pillow')
plt.legend()
plt.xlabel("x")
plt.ylabel("y")
plt.title("Итеративное обучение модели")
plt.show()