In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
import optuna

# California Housingデータセットをロード
data = fetch_california_housing()
X = data.data
X = X[:1000] # 学習時間を減らすために最初の1000個を使用
y = data.target
y = y[:1000]

# データをトレーニングセットとテストセットに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# データを標準化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# モデルの評価関数（目的関数）を定義
def objective(trial):
    # ハイパーパラメータの範囲を指定
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 2, 32, log=True)

    # アルゴリズムを選択
    algorithm = trial.suggest_categorical('algorithm', ['RandomForest', 'GradientBoosting', 'SVR', 'KNeighbors'])

    if algorithm == 'RandomForest':
        model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    elif algorithm == 'GradientBoosting':
        model = GradientBoostingRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    elif algorithm == 'SVR':
        C = trial.suggest_loguniform('C', 1e-3, 1e3)
        model = SVR(C=C)
    elif algorithm == 'KNeighbors':
        n_neighbors = trial.suggest_int('n_neighbors', 1, 20)
        model = KNeighborsRegressor(n_neighbors=n_neighbors)

    # モデルをトレーニング
    model.fit(X_train, y_train)

    # モデルを使用してテストデータを予測
    y_pred = model.predict(X_test)

    # 平均二乗誤差（MSE）を計算して返す
    mse = mean_squared_error(y_test, y_pred)
    return mse

# Optunaの最適化を実行
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# 最適なハイパーパラメータを取得
best_params = study.best_params
best_n_estimators = best_params.get('n_estimators')
best_max_depth = best_params.get('max_depth')
best_algorithm = best_params.get('algorithm')

# 最適なハイパーパラメータでモデルを再トレーニング
if best_algorithm == 'RandomForest':
    best_model = RandomForestRegressor(n_estimators=best_n_estimators, max_depth=best_max_depth, random_state=42)
elif best_algorithm == 'GradientBoosting':
    best_model = GradientBoostingRegressor(n_estimators=best_n_estimators, max_depth=best_max_depth, random_state=42)
elif best_algorithm == 'SVR':
    best_C = best_params.get('C')
    best_model = SVR(C=best_C)
elif best_algorithm == 'KNeighbors':
    best_n_neighbors = best_params.get('n_neighbors')
    best_model = KNeighborsRegressor(n_neighbors=best_n_neighbors)

best_model.fit(X_train, y_train)

# 最終的なモデルの評価
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

# 結果をデータフレームにまとめて返す
results_df = pd.DataFrame({
    'Algorithm': [best_algorithm],
    'n_estimators': [best_n_estimators],
    'max_depth': [best_max_depth],
    'Mean Squared Error': [mse]
})

print("最適なハイパーパラメータ:", best_params)
print("最適なハイパーパラメータでのMSE:", mse)
print(results_df)


[I 2023-09-11 00:01:40,595] A new study created in memory with name: no-name-f5cb2c67-0d49-4968-9d7f-1f7c75429f9e
[I 2023-09-11 00:01:42,529] Trial 0 finished with value: 0.24331811124544545 and parameters: {'n_estimators': 92, 'max_depth': 25, 'algorithm': 'GradientBoosting'}. Best is trial 0 with value: 0.24331811124544545.
[I 2023-09-11 00:01:44,241] Trial 1 finished with value: 0.11945015153264098 and parameters: {'n_estimators': 187, 'max_depth': 9, 'algorithm': 'RandomForest'}. Best is trial 1 with value: 0.11945015153264098.
[I 2023-09-11 00:01:45,815] Trial 2 finished with value: 0.12563227786449946 and parameters: {'n_estimators': 156, 'max_depth': 7, 'algorithm': 'GradientBoosting'}. Best is trial 1 with value: 0.11945015153264098.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
[I 2023-09-11 00:01:45,852] Trial 3 finished with value: 0.18462249205513132 and parameters: {'n_estimators': 139, 'max_depth': 10, 'algorithm': 'SVR', 'C': 0.07103444909233934}. Best is trial 1 with v

最適なハイパーパラメータ: {'n_estimators': 188, 'max_depth': 18, 'algorithm': 'SVR', 'C': 2.3297936228966947}
最適なハイパーパラメータでのMSE: 0.10264400251078047
  Algorithm  n_estimators  max_depth  Mean Squared Error
0       SVR           188         18            0.102644
