In [1]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
import lightgbm as lgb
import numpy as np
import pandas as pd
import optuna
import importlib
import model

importlib.reload(model) 



<module 'model' from 'c:\\Users\\mocch\\OneDrive\\デスクトップ\\vsc_file\\data_final\\experiment\\model.py'>

In [None]:
param_grid = {
    'learning_rate': [0.05, 0.1],
    'num_leaves': [31, 50, 100],
    'max_depth': [-1, 5, 10],
    'n_estimators': [100, 150, 200],
    'feature_fraction': [0.8, 0.9, 1.0],
    'bagging_fraction' : [0.8, 0.9, 1.0]
}

# KFold交差検証（StratifiedKFoldを使用してクラスの分布を保つ）
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# LightGBMのインスタンス
lgbm = lgb.LGBMClassifier()

# グリッドサーチの設定
grid_search = GridSearchCV(
    estimator=lgbm,  # 使用するモデル
    param_grid=param_grid,  # 探索するパラメーターのグリッド
    cv=kf,  # KFold交差検証
    scoring='accuracy',  # 精度を評価指標として使用
    n_jobs=-1,  # 並列計算を使って計算を高速化
    verbose=1  # グリッドサーチの進捗を表示
)

train = pd.read_csv('../data/baseline/train.csv')
x_train = train.drop(['Survived'], axis=1, inplace=False)
y_train = train['Survived']

# グリッドサーチの実行
grid_search.fit(x_train, y_train)

# 最適なパラメーターとスコア
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Cross-validation Accuracy: {grid_search.best_score_}")

Best Parameters: {'bagging_fraction': 0.8, 
                  'feature_fraction': 0.8,
                    'learning_rate': 0.05, 
                    'max_depth': 5, 
                    'n_estimators': 100, 
                    'num_leaves': 31
                    }
Best Cross-validation Accuracy: 0.8462243424769318

In [None]:
input_train = pd.read_csv('../data/new_base/train.csv')
input_test = pd.read_csv('../data/new_base/test.csv')

def objective(trial):
    # ここでパラメータの範囲を定義
    params = {
        "objective": "binary",
        "metric": "binary_error",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 10, 100),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
        "subsample": trial.suggest_uniform("subsample", 0.3, 0.9),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.3, 0.9),
        "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
        "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
    }

    # k_gbm を呼び出してスコアを計算
    _, _, scores = model.k_gbm(input_train, input_test, params, n_splits=5)
    
    # 平均スコアを返す（Optuna はこれを最大化する）
    return np.mean(scores)

# Optuna の最適化実行
study = optuna.create_study(direction="maximize")  # 精度を最大化
study.optimize(objective, n_trials=50)

# 最適なパラメータの表示
print("Best parameters:", study.best_params)
print("Best Accuracy:", study.best_value)

Best parameters: {'learning_rate': 0.1109006773045451, 'num_leaves': 64, 'max_depth': 11, 'min_child_samples': 9, 'subsample': 0.5623226907045367, 'colsample_bytree': 0.5737897762821246, 'lambda_l1': 0.15874498150345134, 'lambda_l2': 0.0008756575871445052}
Best Accuracy: 0.8574602975331116

best_params = {
     'learning_rate': 0.1109006773045451,
     'num_leaves': 64,
     'max_depth': 11,
     'min_child_samples': 9,
     'subsample': 0.5623226907045367,
     'colsample_bytree': 0.5737897762821246, 
     'lambda_l1': 0.15874498150345134, 
     'lambda_l2': 0.0008756575871445052
     }