## Общий алгоритм работы с Optuna

In [None]:
!pip install optuna -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.3/365.3 KB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.5/210.5 KB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 KB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h

1. Определяем целевую функцию objective, через аргументы она будет получать специальный объект trial. С его помощью можно назначать различные гипермараметры, Например, как в примере ниже, мы задаем x в интервале [-10,10].

2. Далее создаем объект обучения с помощью метода optuna.create_study.

3. Запускаем оптимизацию целевой функции objective на 10 итераций n_trials=10. Происходит 10 вызовов нашей функции с различными параметрами от -10 до 10. Какие именно параметры выбирает optuna будет описано ниже.

In [None]:
import optuna

def objective(trial):
    x = trial.suggest_float('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials=20)

study.best_params 

[32m[I 2023-03-05 14:36:49,304][0m A new study created in memory with name: no-name-c4ac6997-45e1-4792-98a3-66bcc5a4f76c[0m
[32m[I 2023-03-05 14:36:49,314][0m Trial 0 finished with value: 47.09766018867022 and parameters: {'x': -4.862773505563929}. Best is trial 0 with value: 47.09766018867022.[0m
[32m[I 2023-03-05 14:36:49,320][0m Trial 1 finished with value: 14.01070423681723 and parameters: {'x': 5.743087527271735}. Best is trial 1 with value: 14.01070423681723.[0m
[32m[I 2023-03-05 14:36:49,322][0m Trial 2 finished with value: 5.10330391026164 and parameters: {'x': -0.25904933772187455}. Best is trial 2 with value: 5.10330391026164.[0m
[32m[I 2023-03-05 14:36:49,332][0m Trial 3 finished with value: 21.666504578094543 and parameters: {'x': 6.654729270118139}. Best is trial 2 with value: 5.10330391026164.[0m
[32m[I 2023-03-05 14:36:49,337][0m Trial 4 finished with value: 0.019053192885253242 and parameters: {'x': 1.8619666964633055}. Best is trial 4 with value: 0.019

{'x': 1.8619666964633055}

## Загрузка данных и импорт библиотек

In [None]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import r2_score

from sklearn.datasets import fetch_california_housing

In [None]:
RANDOM_STATE = 42

In [None]:
!pip install lightgbm -q

In [None]:
from lightgbm import LGBMRegressor

In [None]:
data = fetch_california_housing(as_frame=True)

X = data.data 
y = data.target

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25, random_state=RANDOM_STATE)

## Подбор гиперпараметров с Optuna

Разобъем данные на тренировочную и тестовую часть. На тренировочной части по кросс-валидации подберем гиперпараметры моделей, а затем проверим качество на тестовой части.

In [None]:
def objective_lgbm(trial):    
    max_depth = trial.suggest_int("max_depth", 2, 20)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
    n_estimators = trial.suggest_int("n_estimators", 10, 1000)

    score = cross_val_score(LGBMRegressor(max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators),
                            Xtrain, ytrain, cv=3, scoring='r2', n_jobs=-1).mean()
    return score


study = optuna.create_study(direction="maximize")
study.optimize(objective_lgbm, n_trials=30)

[32m[I 2023-03-05 14:38:48,549][0m A new study created in memory with name: no-name-74cc1834-2ec8-47d3-a9d8-108acc25d179[0m
[32m[I 2023-03-05 14:38:51,759][0m Trial 0 finished with value: 0.3677673885978045 and parameters: {'max_depth': 6, 'learning_rate': 0.013501587495049401, 'n_estimators': 30}. Best is trial 0 with value: 0.3677673885978045.[0m
[32m[I 2023-03-05 14:39:02,387][0m Trial 1 finished with value: 0.42605147785600367 and parameters: {'max_depth': 10, 'learning_rate': 0.0006579522897687368, 'n_estimators': 759}. Best is trial 1 with value: 0.42605147785600367.[0m
[32m[I 2023-03-05 14:39:07,830][0m Trial 2 finished with value: 0.805802126506022 and parameters: {'max_depth': 13, 'learning_rate': 0.4309027882979761, 'n_estimators': 558}. Best is trial 2 with value: 0.805802126506022.[0m
[32m[I 2023-03-05 14:39:15,789][0m Trial 3 finished with value: 0.13940211792410437 and parameters: {'max_depth': 14, 'learning_rate': 0.00012103562004268691, 'n_estimators': 998

In [None]:
study.best_params

{'max_depth': 18, 'learning_rate': 0.06301704090957079, 'n_estimators': 898}

In [None]:
model = LGBMRegressor(**study.best_params)
model.fit(Xtrain, ytrain)

pred = model.predict(Xtest)

r2_score(ytest, pred)

0.8542513730157428