# optunaの使い方サンプル

In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import optuna

# California Housingデータセットをロード
data = fetch_california_housing()
X = data.data
X = X[:1000] # 学習時間を減らすために最初の1000個を使用
y = data.target
y = y[:1000]

# データをトレーニングセットとテストセットに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# データを標準化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# モデルの評価関数（目的関数）を定義
def objective(trial):
    # ハイパーパラメータの範囲を指定
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 2, 32, log=True)

    # Random Forest回帰モデルを作成
    model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)

    # モデルをトレーニング
    model.fit(X_train, y_train)

    # モデルを使用してテストデータを予測
    y_pred = model.predict(X_test)

    # 平均二乗誤差（MSE）を計算して返す
    mse = mean_squared_error(y_test, y_pred)
    return mse

# Optunaの最適化を実行
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# 最適なハイパーパラメータを取得
best_params = study.best_params
best_n_estimators = best_params['n_estimators']
best_max_depth = best_params['max_depth']

# 最適なハイパーパラメータでモデルを再トレーニング
best_model = RandomForestRegressor(n_estimators=best_n_estimators, max_depth=best_max_depth, random_state=42)
best_model.fit(X_train, y_train)

# 最終的なモデルの評価
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("最適なハイパーパラメータ:", best_params)
print("最適なハイパーパラメータでのMSE:", mse)


[I 2023-09-10 23:59:10,877] A new study created in memory with name: no-name-f46086c1-f3d5-45c6-bf52-d83172710d65
[I 2023-09-10 23:59:11,375] Trial 0 finished with value: 0.2556642306444964 and parameters: {'n_estimators': 187, 'max_depth': 2}. Best is trial 0 with value: 0.2556642306444964.
[I 2023-09-10 23:59:13,304] Trial 1 finished with value: 0.11578061416515242 and parameters: {'n_estimators': 163, 'max_depth': 13}. Best is trial 1 with value: 0.11578061416515242.
[I 2023-09-10 23:59:15,360] Trial 2 finished with value: 0.11579947719137734 and parameters: {'n_estimators': 157, 'max_depth': 29}. Best is trial 1 with value: 0.11578061416515242.
[I 2023-09-10 23:59:16,157] Trial 3 finished with value: 0.14728475753849557 and parameters: {'n_estimators': 146, 'max_depth': 5}. Best is trial 1 with value: 0.11578061416515242.
[I 2023-09-10 23:59:17,091] Trial 4 finished with value: 0.13634686799682944 and parameters: {'n_estimators': 144, 'max_depth': 6}. Best is trial 1 with value: 0.

最適なハイパーパラメータ: {'n_estimators': 181, 'max_depth': 15}
最適なハイパーパラメータでのMSE: 0.11378463292953037
