## パイプライン

### パイプラインとは
#### 変換器(特徴量の生成と選択)と予測気を直列に繋げ，一連の処理をまとめて実行できるようにする仕組みのことをパイプラインと言う．
### メリット
1. いくつかの前処理を実行している場合でも，fit や predict をパイプラインに対して1度だけ呼ぶだけで良い
2. パイプラインに対して，グリッドサーチを行う際，パラメータを1度に指定できる．fit を読んだ後の変換器をキャッシュでき，性能向上が期待できる．
3. transformer と estimatorに同じデータが使われるため，交差検証の際にデータがリークされることを防げる

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy
import pickle

from sklearn.model_selection import train_test_split, KFold
from sklearn.feature_selection import SelectFromModel

# 変換器
from sklearn.preprocessing import StandardScaler

# 予測器
import lightgbm as lgb

# パイプライン
from sklearn.pipeline import Pipeline

# パラメーター調整
import optuna
import optuna.integration.lightgbm as lgb_tune
from lightgbm import early_stopping

# 評価関数
from sklearn.metrics import mean_squared_error, r2_score

# ベイズ最適化
import optuna.integration.lightgbm as ilgbm

In [2]:
df = pd.read_csv('..//Data-science//data//boston.csv')

X = df[['INDUS', 'RM', 'TAX', 'PTRATIO', 'LSTAT']]
y = df['house prices']

X_train_val, X_test, y_train_val, y_test = train_test_split(X,y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, random_state=53)

X_train_val = X_train_val.values
X_train = X_train.values
X_val = X_val.values
X_test = X_test.values

y_train_val = y_train_val.values
y_train = y_train.values
y_val = y_val.values
y_test = y_test.values

### ベイズ最適化

In [3]:
model = lgb.LGBMRegressor()

param_distributions = {
    'n_estimators':optuna.distributions.IntLogUniformDistribution(1e+0, 1e+3, 1),
    'max_depth': optuna.distributions.IntUniformDistribution(1, 20, 1),
    'reg_alpha': optuna.distributions.IntLogUniformDistribution(1e+0, 10.0, 0.1),
    'reg_lambda': optuna.distributions.IntLogUniformDistribution(1e+0, 10.0, 0.1),
    #'subsample_freq':optuna.distributions.IntUniformDistribution(1, 200, 5),
    'min_child_samples':optuna.distributions.IntUniformDistribution(1, 100, 5),
    #'bagging_freq':optuna.distributions.IntUniformDistribution(1, 10, 1),
    'num_leaves': optuna.distributions.IntUniformDistribution(100, 200, 5),
}

# クロスバリデーションによる、ハイパーパラメータの探索設定.cvのデフォルト設定値は5
optuna_search = optuna.integration.OptunaSearchCV(model,param_distributions,cv=5, n_jobs=3,n_trials=25,random_state=42,scoring='neg_root_mean_squared_error')

# 探索の実行
optuna_search.fit(X_train_val, y_train_val)

  optuna_search = optuna.integration.OptunaSearchCV(model,param_distributions,cv=5, n_jobs=3,n_trials=25,random_state=42,scoring='neg_root_mean_squared_error')
[32m[I 2022-01-09 15:39:29,894][0m A new study created in memory with name: no-name-fcfda2da-9ffe-4bbd-880e-a980970dca5c[0m
[32m[I 2022-01-09 15:39:30,130][0m Trial 1 finished with value: -6.918626393663386 and parameters: {'n_estimators': 6, 'max_depth': 18, 'reg_alpha': 1, 'reg_lambda': 1, 'min_child_samples': 61, 'num_leaves': 165}. Best is trial 1 with value: -6.918626393663386.[0m
[32m[I 2022-01-09 15:39:30,709][0m Trial 0 finished with value: -4.585284920440033 and parameters: {'n_estimators': 23, 'max_depth': 12, 'reg_alpha': 1, 'reg_lambda': 1, 'min_child_samples': 41, 'num_leaves': 100}. Best is trial 0 with value: -4.585284920440033.[0m
[32m[I 2022-01-09 15:39:32,506][0m Trial 3 finished with value: -4.242445653729601 and parameters: {'n_estimators': 106, 'max_depth': 3, 'reg_alpha': 2, 'reg_lambda': 1, 'min

OptunaSearchCV(estimator=LGBMRegressor(), n_jobs=3, n_trials=25,
               param_distributions={'max_depth': IntUniformDistribution(high=20, low=1, step=1),
                                    'min_child_samples': IntUniformDistribution(high=96, low=1, step=5),
                                    'n_estimators': IntLogUniformDistribution(high=1000.0, low=1.0, step=1),
                                    'num_leaves': IntUniformDistribution(high=200, low=100, step=5),
                                    'reg_alpha': IntLogUniformDistribution(high=10.0, low=1.0, step=0.1),
                                    'reg_lambda': IntLogUniformDistribution(high=10.0, low=1.0, step=0.1)},
               random_state=42, scoring='neg_root_mean_squared_error')

In [4]:
best_params = optuna_search.best_params_
best_params

{'n_estimators': 79,
 'max_depth': 7,
 'reg_alpha': 5,
 'reg_lambda': 3,
 'min_child_samples': 1,
 'num_leaves': 115}

In [5]:
optuna_search.best_params_

{'n_estimators': 79,
 'max_depth': 7,
 'reg_alpha': 5,
 'reg_lambda': 3,
 'min_child_samples': 1,
 'num_leaves': 115}

### パイプライン構築

In [6]:
pipe = Pipeline(
    steps=[
        ('scaler', StandardScaler()),
        ('regressor', lgb.LGBMRegressor(
            objective="regression",
            n_estimators= 68,
            max_depth=11,
            reg_alpha=2,
            reg_lambda=4,
            subsample_freq=151,
            min_child_samples=1,
            bagging_freq=96,
            num_leaves=160
        ))
    ]
)

In [7]:
pipe.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()),
                ('regressor',
                 LGBMRegressor(bagging_freq=96, max_depth=11,
                               min_child_samples=1, n_estimators=68,
                               num_leaves=160, objective='regression',
                               reg_alpha=2, reg_lambda=4,
                               subsample_freq=151))])

In [8]:
pipe.get_params

<bound method Pipeline.get_params of Pipeline(steps=[('scaler', StandardScaler()),
                ('regressor',
                 LGBMRegressor(bagging_freq=96, max_depth=11,
                               min_child_samples=1, n_estimators=68,
                               num_leaves=160, objective='regression',
                               reg_alpha=2, reg_lambda=4,
                               subsample_freq=151))])>

In [9]:
# r2
pipe.score(X_test, y_test)

0.804943945266863

In [10]:
pred = pipe.predict(X_test)

In [11]:
rmse = np.sqrt(mean_squared_error(y_test, pred))

print(rmse)

3.782089161020395


In [12]:
# モデルの保存
with open('3_pipe_lgbm.model', 'wb') as f:
  pickle.dump(pipe, f)

### 課題3の精度
- rmse: 3.752752469634236
- r2: 0.7988901108422742

## ベイズ最適化1

In [18]:
model = lgb.LGBMRegressor()

param_distributions = {
    'n_estimators':optuna.distributions.IntLogUniformDistribution(1e+0, 1e+3, 1),
    'max_depth': optuna.distributions.IntUniformDistribution(1, 20, 1)
}

# クロスバリデーションによる、ハイパーパラメータの探索設定.cvのデフォルト設定値は5
optuna_search = optuna.integration.OptunaSearchCV(
    model,
    param_distributions,
    cv=5,
    n_jobs=-1,
    n_trials=100,
    random_state=42,
    scoring='neg_root_mean_squared_error')

# 探索の実行
optuna_search.fit(X_train_val, y_train_val)

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2022-01-09 15:47:11,207][0m A new study created in memory with name: no-name-090479d8-2598-409f-9688-14a1f8474661[0m
[32m[I 2022-01-09 15:47:11,467][0m Trial 1 finished with value: -8.627795117318598 and parameters: {'n_estimators': 1, 'max_depth': 20}. Best is trial 1 with value: -8.627795117318598.[0m
[32m[I 2022-01-09 15:47:11,475][0m Trial 5 finished with value: -8.5306064131743 and parameters: {'n_estimators': 2, 'max_depth': 1}. Best is trial 5 with value: -8.5306064131743.[0m
[32m[I 2022-01-09 15:47:11,554][0m Trial 3 finished with value: -8.038342766329482 and parameters: {'n_estimators': 2, 'max_depth': 12}. Best is trial 3 with value: -8.038342766329482.[0m
[32m[I 2022-01-09 15:47:11,658][0m Trial 7 finished with value: -7.133801739613139 and parameters: {'n_estimators': 4, 'max_depth': 3}. Best is trial 7 with value: -7.133801739613139.[0m
[32m[I 2022-01-09 15:47:11,839][0m Trial 8 finished with val

OptunaSearchCV(estimator=LGBMRegressor(), n_jobs=-1, n_trials=100,
               param_distributions={'max_depth': IntUniformDistribution(high=20, low=1, step=1),
                                    'n_estimators': IntLogUniformDistribution(high=1000.0, low=1.0, step=1)},
               random_state=42, scoring='neg_root_mean_squared_error')

In [19]:
optuna_search.best_score_

-3.7912583934094153

In [20]:
print(optuna_search.score(X_test, y_test))

-3.9127614133880844


In [21]:
best_params = optuna_search.best_params_
best_params

{'n_estimators': 949, 'max_depth': 2}

In [22]:
from sklearn.decomposition import PCA
from lightgbm.basic import param_dict_to_str

pca = PCA()
regressor = lgb.LGBMRegressor(param_dict_to_str(best_params))

estimators = [
    #('StandScaler', scaler),
    #('feature_selection', fr),
    ('pca', pca),
    ('estimator', regressor)
]

In [23]:
pipe = Pipeline(steps=estimators)

In [24]:
# 学習
pipe.fit(X_train, y_train)

# 予測
pred = pipe.predict(X_test)



In [25]:
# r2
r2 = pipe.score(X_test, y_test)
print(f'r2: {r2}')

# rmse
rmse = np.sqrt(mean_squared_error(y_test, pred))

print(rmse)

r2: 0.7890087093274946
3.9335466895531046


### ベイズ最適化2

In [None]:
params = {
        "objective": "regression",
        "metric": "rmse",
        "verbosity": -1,
        "random_seed": 42,
        "boosting_type": "gbdt",
    }

In [None]:
dtrain = lgb.Dataset(X_train, label=y_train)

tuner = ilgbm.LightGBMTunerCV(
    params,
    dtrain,
    folds=KFold(n_splits=5),
    verbose_eval=100,
    early_stopping_rounds=100,
)

# ハイパーパラメータ探索の実行
tuner.run()

[32m[I 2021-12-30 21:54:50,376][0m A new study created in memory with name: no-name-db78b237-09c1-4a00-8264-e8212c3dbb4d[0m


[100]	cv_agg's rmse: 3.89765 + 0.491751
[200]	cv_agg's rmse: 3.87197 + 0.50453


feature_fraction, val_score: 3.864767:  14%|#4        | 1/7 [00:07<00:44,  7.41s/it][32m[I 2021-12-30 21:54:57,796][0m Trial 0 finished with value: 3.8647666105615315 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 3.8647666105615315.[0m
feature_fraction, val_score: 3.864767:  14%|#4        | 1/7 [00:07<00:44,  7.41s/it]

[300]	cv_agg's rmse: 3.87796 + 0.483789




[100]	cv_agg's rmse: 3.93573 + 0.662539


feature_fraction, val_score: 3.864767:  29%|##8       | 2/7 [00:08<00:18,  3.67s/it][32m[I 2021-12-30 21:54:58,846][0m Trial 1 finished with value: 3.892737838821894 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 3.8647666105615315.[0m
feature_fraction, val_score: 3.864767:  29%|##8       | 2/7 [00:08<00:18,  3.67s/it]

[200]	cv_agg's rmse: 3.90767 + 0.606654




[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


feature_fraction, val_score: 3.826909:  43%|####2     | 3/7 [00:10<00:11,  2.87s/it][32m[I 2021-12-30 21:55:00,765][0m Trial 2 finished with value: 3.82690870650089 and parameters: {'feature_fraction': 0.4}. Best is trial 2 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.93573 + 0.662539
[200]	cv_agg's rmse: 3.90767 + 0.606654


feature_fraction, val_score: 3.826909:  57%|#####7    | 4/7 [00:14<00:10,  3.36s/it][32m[I 2021-12-30 21:55:04,885][0m Trial 3 finished with value: 3.892737838821894 and parameters: {'feature_fraction': 0.8}. Best is trial 2 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.89765 + 0.491751
[200]	cv_agg's rmse: 3.87197 + 0.50453


feature_fraction, val_score: 3.826909:  71%|#######1  | 5/7 [00:15<00:05,  2.68s/it][32m[I 2021-12-30 21:55:06,361][0m Trial 4 finished with value: 3.8647666105615315 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 2 with value: 3.82690870650089.[0m
feature_fraction, val_score: 3.826909:  71%|#######1  | 5/7 [00:15<00:05,  2.68s/it]

[300]	cv_agg's rmse: 3.87796 + 0.483789




[100]	cv_agg's rmse: 3.93561 + 0.656102
[200]	cv_agg's rmse: 3.90947 + 0.621206


feature_fraction, val_score: 3.826909:  86%|########5 | 6/7 [00:17<00:02,  2.20s/it][32m[I 2021-12-30 21:55:07,625][0m Trial 5 finished with value: 3.901078198578754 and parameters: {'feature_fraction': 0.6}. Best is trial 2 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.93561 + 0.656102
[200]	cv_agg's rmse: 3.90947 + 0.621206


feature_fraction, val_score: 3.826909: 100%|##########| 7/7 [00:18<00:00,  1.88s/it][32m[I 2021-12-30 21:55:08,853][0m Trial 6 finished with value: 3.901078198578754 and parameters: {'feature_fraction': 0.5}. Best is trial 2 with value: 3.82690870650089.[0m
feature_fraction, val_score: 3.826909: 100%|##########| 7/7 [00:18<00:00,  2.64s/it]


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:   5%|5         | 1/20 [00:01<00:23,  1.21s/it][32m[I 2021-12-30 21:55:10,076][0m Trial 7 finished with value: 3.82690870650089 and parameters: {'num_leaves': 162}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  10%|#         | 2/20 [00:02<00:23,  1.29s/it][32m[I 2021-12-30 21:55:11,415][0m Trial 8 finished with value: 3.82690870650089 and parameters: {'num_leaves': 40}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882


num_leaves, val_score: 3.826909:  15%|#5        | 3/20 [00:03<00:20,  1.19s/it][32m[I 2021-12-30 21:55:12,493][0m Trial 9 finished with value: 3.82690870650089 and parameters: {'num_leaves': 121}. Best is trial 7 with value: 3.82690870650089.[0m
num_leaves, val_score: 3.826909:  15%|#5        | 3/20 [00:03<00:20,  1.19s/it]

[300]	cv_agg's rmse: 3.82931 + 0.502153




[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  20%|##        | 4/20 [00:05<00:21,  1.33s/it][32m[I 2021-12-30 21:55:14,040][0m Trial 10 finished with value: 3.82690870650089 and parameters: {'num_leaves': 115}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  25%|##5       | 5/20 [00:11<00:47,  3.18s/it][32m[I 2021-12-30 21:55:20,487][0m Trial 11 finished with value: 3.82690870650089 and parameters: {'num_leaves': 35}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  30%|###       | 6/20 [00:13<00:38,  2.74s/it][32m[I 2021-12-30 21:55:22,391][0m Trial 12 finished with value: 3.82690870650089 and parameters: {'num_leaves': 136}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882


num_leaves, val_score: 3.826909:  35%|###5      | 7/20 [00:14<00:29,  2.23s/it][32m[I 2021-12-30 21:55:23,574][0m Trial 13 finished with value: 3.82690870650089 and parameters: {'num_leaves': 37}. Best is trial 7 with value: 3.82690870650089.[0m
num_leaves, val_score: 3.826909:  35%|###5      | 7/20 [00:14<00:29,  2.23s/it]

[300]	cv_agg's rmse: 3.82931 + 0.502153




[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882


num_leaves, val_score: 3.826909:  40%|####      | 8/20 [00:15<00:22,  1.87s/it][32m[I 2021-12-30 21:55:24,657][0m Trial 14 finished with value: 3.82690870650089 and parameters: {'num_leaves': 20}. Best is trial 7 with value: 3.82690870650089.[0m
num_leaves, val_score: 3.826909:  40%|####      | 8/20 [00:15<00:22,  1.87s/it]

[300]	cv_agg's rmse: 3.82931 + 0.502153




[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  45%|####5     | 9/20 [00:17<00:19,  1.75s/it][32m[I 2021-12-30 21:55:26,161][0m Trial 15 finished with value: 3.82690870650089 and parameters: {'num_leaves': 242}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882


num_leaves, val_score: 3.826909:  50%|#####     | 10/20 [00:18<00:15,  1.58s/it][32m[I 2021-12-30 21:55:27,364][0m Trial 16 finished with value: 3.82690870650089 and parameters: {'num_leaves': 52}. Best is trial 7 with value: 3.82690870650089.[0m
num_leaves, val_score: 3.826909:  50%|#####     | 10/20 [00:18<00:15,  1.58s/it]

[300]	cv_agg's rmse: 3.82931 + 0.502153




[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  55%|#####5    | 11/20 [00:19<00:13,  1.45s/it][32m[I 2021-12-30 21:55:28,508][0m Trial 17 finished with value: 3.82690870650089 and parameters: {'num_leaves': 225}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  60%|######    | 12/20 [00:21<00:11,  1.44s/it][32m[I 2021-12-30 21:55:29,927][0m Trial 18 finished with value: 3.82690870650089 and parameters: {'num_leaves': 157}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882


num_leaves, val_score: 3.826909:  65%|######5   | 13/20 [00:22<00:09,  1.38s/it][32m[I 2021-12-30 21:55:31,157][0m Trial 19 finished with value: 3.82690870650089 and parameters: {'num_leaves': 187}. Best is trial 7 with value: 3.82690870650089.[0m
num_leaves, val_score: 3.826909:  65%|######5   | 13/20 [00:22<00:09,  1.38s/it]

[300]	cv_agg's rmse: 3.82931 + 0.502153




[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  70%|#######   | 14/20 [00:23<00:08,  1.37s/it][32m[I 2021-12-30 21:55:32,524][0m Trial 20 finished with value: 3.82690870650089 and parameters: {'num_leaves': 76}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  75%|#######5  | 15/20 [00:24<00:06,  1.35s/it][32m[I 2021-12-30 21:55:33,821][0m Trial 21 finished with value: 3.82690870650089 and parameters: {'num_leaves': 80}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  80%|########  | 16/20 [00:26<00:05,  1.34s/it][32m[I 2021-12-30 21:55:35,149][0m Trial 22 finished with value: 3.82690870650089 and parameters: {'num_leaves': 182}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  85%|########5 | 17/20 [00:27<00:03,  1.32s/it][32m[I 2021-12-30 21:55:36,406][0m Trial 23 finished with value: 3.82690870650089 and parameters: {'num_leaves': 90}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882


num_leaves, val_score: 3.826909:  90%|######### | 18/20 [00:28<00:02,  1.26s/it][32m[I 2021-12-30 21:55:37,538][0m Trial 24 finished with value: 3.82690870650089 and parameters: {'num_leaves': 88}. Best is trial 7 with value: 3.82690870650089.[0m
num_leaves, val_score: 3.826909:  90%|######### | 18/20 [00:28<00:02,  1.26s/it]

[300]	cv_agg's rmse: 3.82931 + 0.502153




[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882
[300]	cv_agg's rmse: 3.82931 + 0.502153


num_leaves, val_score: 3.826909:  95%|#########5| 19/20 [00:30<00:01,  1.33s/it][32m[I 2021-12-30 21:55:39,020][0m Trial 25 finished with value: 3.82690870650089 and parameters: {'num_leaves': 191}. Best is trial 7 with value: 3.82690870650089.[0m


[100]	cv_agg's rmse: 3.87807 + 0.541494
[200]	cv_agg's rmse: 3.84702 + 0.529882


num_leaves, val_score: 3.826909: 100%|##########| 20/20 [00:31<00:00,  1.24s/it][32m[I 2021-12-30 21:55:40,069][0m Trial 26 finished with value: 3.82690870650089 and parameters: {'num_leaves': 151}. Best is trial 7 with value: 3.82690870650089.[0m
num_leaves, val_score: 3.826909: 100%|##########| 20/20 [00:31<00:00,  1.56s/it]


[300]	cv_agg's rmse: 3.82931 + 0.502153




[100]	cv_agg's rmse: 3.90199 + 0.590545
[200]	cv_agg's rmse: 3.87817 + 0.589876


bagging, val_score: 3.826909:  10%|#         | 1/10 [00:01<00:09,  1.07s/it][32m[I 2021-12-30 21:55:41,151][0m Trial 27 finished with value: 3.8571342631271284 and parameters: {'bagging_fraction': 0.851960965413801, 'bagging_freq': 7}. Best is trial 27 with value: 3.8571342631271284.[0m


[100]	cv_agg's rmse: 3.8406 + 0.554623


bagging, val_score: 3.773539:  20%|##        | 2/10 [00:02<00:07,  1.01it/s][32m[I 2021-12-30 21:55:42,082][0m Trial 28 finished with value: 3.773538787013539 and parameters: {'bagging_fraction': 0.7260348407611517, 'bagging_freq': 5}. Best is trial 28 with value: 3.773538787013539.[0m
bagging, val_score: 3.773539:  20%|##        | 2/10 [00:02<00:07,  1.01it/s]

[200]	cv_agg's rmse: 3.79554 + 0.575313




[100]	cv_agg's rmse: 3.86261 + 0.538622
[200]	cv_agg's rmse: 3.82085 + 0.565843


bagging, val_score: 3.773539:  30%|###       | 3/10 [00:05<00:13,  2.00s/it][32m[I 2021-12-30 21:55:45,286][0m Trial 29 finished with value: 3.788702291210447 and parameters: {'bagging_fraction': 0.7381334172441654, 'bagging_freq': 5}. Best is trial 28 with value: 3.773538787013539.[0m


[100]	cv_agg's rmse: 3.89074 + 0.61627
[200]	cv_agg's rmse: 3.85967 + 0.595238


bagging, val_score: 3.773539:  40%|####      | 4/10 [00:06<00:09,  1.64s/it][32m[I 2021-12-30 21:55:46,366][0m Trial 30 finished with value: 3.8389671173053133 and parameters: {'bagging_fraction': 0.9582869752674317, 'bagging_freq': 5}. Best is trial 28 with value: 3.773538787013539.[0m
bagging, val_score: 3.773539:  40%|####      | 4/10 [00:06<00:09,  1.64s/it]

[300]	cv_agg's rmse: 3.86763 + 0.560458




[100]	cv_agg's rmse: 4.17649 + 0.586557
[200]	cv_agg's rmse: 4.00268 + 0.635668


bagging, val_score: 3.773539:  50%|#####     | 5/10 [00:07<00:07,  1.57s/it][32m[I 2021-12-30 21:55:47,822][0m Trial 31 finished with value: 3.8770632643746965 and parameters: {'bagging_fraction': 0.5743656863330115, 'bagging_freq': 7}. Best is trial 28 with value: 3.773538787013539.[0m
bagging, val_score: 3.773539:  50%|#####     | 5/10 [00:07<00:07,  1.57s/it]

[300]	cv_agg's rmse: 3.91783 + 0.642758
[400]	cv_agg's rmse: 3.91507 + 0.613456




[100]	cv_agg's rmse: 4.08331 + 0.555591
[200]	cv_agg's rmse: 3.93284 + 0.569815
[300]	cv_agg's rmse: 3.86011 + 0.576513


bagging, val_score: 3.773539:  60%|######    | 6/10 [00:10<00:08,  2.14s/it][32m[I 2021-12-30 21:55:51,063][0m Trial 32 finished with value: 3.835715985973566 and parameters: {'bagging_fraction': 0.5784145201069739, 'bagging_freq': 5}. Best is trial 28 with value: 3.773538787013539.[0m


[100]	cv_agg's rmse: 3.91262 + 0.614562
[200]	cv_agg's rmse: 3.84976 + 0.619455
[300]	cv_agg's rmse: 3.83259 + 0.569547


bagging, val_score: 3.773539:  70%|#######   | 7/10 [00:12<00:05,  1.82s/it][32m[I 2021-12-30 21:55:52,235][0m Trial 33 finished with value: 3.82718706189321 and parameters: {'bagging_fraction': 0.891555816967092, 'bagging_freq': 4}. Best is trial 28 with value: 3.773538787013539.[0m


[100]	cv_agg's rmse: 4.33562 + 0.433677
[200]	cv_agg's rmse: 4.12378 + 0.532491


bagging, val_score: 3.773539:  80%|########  | 8/10 [00:13<00:03,  1.59s/it][32m[I 2021-12-30 21:55:53,312][0m Trial 34 finished with value: 3.9914279143109574 and parameters: {'bagging_fraction': 0.44308558397621217, 'bagging_freq': 5}. Best is trial 28 with value: 3.773538787013539.[0m
bagging, val_score: 3.773539:  80%|########  | 8/10 [00:13<00:03,  1.59s/it]

[300]	cv_agg's rmse: 4.01824 + 0.584076




[100]	cv_agg's rmse: 4.11259 + 0.557603
[200]	cv_agg's rmse: 3.93275 + 0.613511
[300]	cv_agg's rmse: 3.92312 + 0.549104


bagging, val_score: 3.773539:  90%|######### | 9/10 [00:14<00:01,  1.48s/it][32m[I 2021-12-30 21:55:54,568][0m Trial 35 finished with value: 3.856923550845599 and parameters: {'bagging_fraction': 0.5843136701772395, 'bagging_freq': 4}. Best is trial 28 with value: 3.773538787013539.[0m
bagging, val_score: 3.773539:  90%|######### | 9/10 [00:14<00:01,  1.48s/it]

[400]	cv_agg's rmse: 3.89455 + 0.550216




[100]	cv_agg's rmse: 3.99059 + 0.573779
[200]	cv_agg's rmse: 3.87143 + 0.6021
[300]	cv_agg's rmse: 3.8319 + 0.590891
[400]	cv_agg's rmse: 3.81423 + 0.56274


bagging, val_score: 3.773539: 100%|##########| 10/10 [00:15<00:00,  1.44s/it][32m[I 2021-12-30 21:55:55,917][0m Trial 36 finished with value: 3.799165575879681 and parameters: {'bagging_fraction': 0.6148748297370408, 'bagging_freq': 3}. Best is trial 28 with value: 3.773538787013539.[0m
bagging, val_score: 3.773539: 100%|##########| 10/10 [00:15<00:00,  1.58s/it]


[100]	cv_agg's rmse: 3.8406 + 0.554623


feature_fraction_stage2, val_score: 3.773539:  33%|###3      | 1/3 [00:00<00:01,  1.14it/s][32m[I 2021-12-30 21:55:56,806][0m Trial 37 finished with value: 3.773538787013539 and parameters: {'feature_fraction': 0.41600000000000004}. Best is trial 37 with value: 3.773538787013539.[0m
feature_fraction_stage2, val_score: 3.773539:  33%|###3      | 1/3 [00:00<00:01,  1.14it/s]

[200]	cv_agg's rmse: 3.79554 + 0.575313




[100]	cv_agg's rmse: 3.8406 + 0.554623
[200]	cv_agg's rmse: 3.79554 + 0.575313


feature_fraction_stage2, val_score: 3.773539:  67%|######6   | 2/3 [00:02<00:01,  1.13s/it][32m[I 2021-12-30 21:55:58,114][0m Trial 38 finished with value: 3.773538787013539 and parameters: {'feature_fraction': 0.44800000000000006}. Best is trial 37 with value: 3.773538787013539.[0m


[100]	cv_agg's rmse: 3.8406 + 0.554623
[200]	cv_agg's rmse: 3.79554 + 0.575313


feature_fraction_stage2, val_score: 3.773539: 100%|##########| 3/3 [00:03<00:00,  1.01s/it][32m[I 2021-12-30 21:55:58,990][0m Trial 39 finished with value: 3.773538787013539 and parameters: {'feature_fraction': 0.48000000000000004}. Best is trial 37 with value: 3.773538787013539.[0m
feature_fraction_stage2, val_score: 3.773539: 100%|##########| 3/3 [00:03<00:00,  1.02s/it]


[100]	cv_agg's rmse: 3.84061 + 0.554609


regularization_factors, val_score: 3.773539:   5%|5         | 1/20 [00:00<00:14,  1.28it/s][32m[I 2021-12-30 21:55:59,786][0m Trial 40 finished with value: 3.7745996454663038 and parameters: {'lambda_l1': 0.0014542619305818996, 'lambda_l2': 2.00898380739263e-07}. Best is trial 40 with value: 3.7745996454663038.[0m
regularization_factors, val_score: 3.773539:   5%|5         | 1/20 [00:00<00:14,  1.28it/s]

[200]	cv_agg's rmse: 3.79655 + 0.576607




[100]	cv_agg's rmse: 3.84063 + 0.554598
[200]	cv_agg's rmse: 3.79654 + 0.576603


regularization_factors, val_score: 3.773539:  10%|#         | 2/20 [00:01<00:16,  1.08it/s][32m[I 2021-12-30 21:56:00,815][0m Trial 41 finished with value: 3.7746068044444896 and parameters: {'lambda_l1': 0.004230147128232097, 'lambda_l2': 2.6106524854258748e-06}. Best is trial 40 with value: 3.7745996454663038.[0m


[100]	cv_agg's rmse: 3.83005 + 0.555028


regularization_factors, val_score: 3.773168:  15%|#5        | 3/20 [00:05<00:35,  2.10s/it][32m[I 2021-12-30 21:56:04,304][0m Trial 42 finished with value: 3.7731680433988997 and parameters: {'lambda_l1': 7.169140126536836e-05, 'lambda_l2': 0.016553814565382553}. Best is trial 42 with value: 3.7731680433988997.[0m
regularization_factors, val_score: 3.773168:  15%|#5        | 3/20 [00:05<00:35,  2.10s/it]

[200]	cv_agg's rmse: 3.79059 + 0.583713




[100]	cv_agg's rmse: 3.8406 + 0.554623
[200]	cv_agg's rmse: 3.79554 + 0.575313


regularization_factors, val_score: 3.773168:  20%|##        | 4/20 [00:06<00:26,  1.65s/it][32m[I 2021-12-30 21:56:05,274][0m Trial 43 finished with value: 3.773538943116845 and parameters: {'lambda_l1': 6.303111061006723e-05, 'lambda_l2': 4.436361289555192e-07}. Best is trial 42 with value: 3.7731680433988997.[0m


[100]	cv_agg's rmse: 3.93949 + 0.538601
[200]	cv_agg's rmse: 3.82864 + 0.579964


regularization_factors, val_score: 3.773168:  25%|##5       | 5/20 [00:07<00:21,  1.41s/it][32m[I 2021-12-30 21:56:06,268][0m Trial 44 finished with value: 3.802834852828885 and parameters: {'lambda_l1': 0.0067319326983694095, 'lambda_l2': 6.386423640388836}. Best is trial 42 with value: 3.7731680433988997.[0m
regularization_factors, val_score: 3.773168:  25%|##5       | 5/20 [00:07<00:21,  1.41s/it]

[300]	cv_agg's rmse: 3.81123 + 0.585559




[100]	cv_agg's rmse: 3.82998 + 0.555109


regularization_factors, val_score: 3.773168:  30%|###       | 6/20 [00:08<00:17,  1.26s/it][32m[I 2021-12-30 21:56:07,230][0m Trial 45 finished with value: 3.7768938788866535 and parameters: {'lambda_l1': 6.551076416492275e-05, 'lambda_l2': 0.018627419427505672}. Best is trial 42 with value: 3.7731680433988997.[0m
regularization_factors, val_score: 3.773168:  30%|###       | 6/20 [00:08<00:17,  1.26s/it]

[200]	cv_agg's rmse: 3.79124 + 0.583161




[100]	cv_agg's rmse: 3.83897 + 0.5468
[200]	cv_agg's rmse: 3.78393 + 0.584749


regularization_factors, val_score: 3.762518:  35%|###5      | 7/20 [00:09<00:14,  1.11s/it][32m[I 2021-12-30 21:56:08,024][0m Trial 46 finished with value: 3.7625177926601325 and parameters: {'lambda_l1': 0.00010723161749754091, 'lambda_l2': 0.11607744427424994}. Best is trial 46 with value: 3.7625177926601325.[0m


[100]	cv_agg's rmse: 3.83526 + 0.549704
[200]	cv_agg's rmse: 3.79281 + 0.586694


regularization_factors, val_score: 3.762518:  40%|####      | 8/20 [00:09<00:12,  1.03s/it][32m[I 2021-12-30 21:56:08,878][0m Trial 47 finished with value: 3.7727844256338114 and parameters: {'lambda_l1': 0.03633160144300175, 'lambda_l2': 0.0004445927461802615}. Best is trial 46 with value: 3.7625177926601325.[0m


[100]	cv_agg's rmse: 3.8406 + 0.554614


regularization_factors, val_score: 3.762518:  45%|####5     | 9/20 [00:10<00:10,  1.02it/s][32m[I 2021-12-30 21:56:09,743][0m Trial 48 finished with value: 3.7735416642775084 and parameters: {'lambda_l1': 0.000541902165398374, 'lambda_l2': 1.3074983450614036e-05}. Best is trial 46 with value: 3.7625177926601325.[0m
regularization_factors, val_score: 3.762518:  45%|####5     | 9/20 [00:10<00:10,  1.02it/s]

[200]	cv_agg's rmse: 3.79554 + 0.575307




[100]	cv_agg's rmse: 3.84063 + 0.554607
[200]	cv_agg's rmse: 3.79554 + 0.575307


regularization_factors, val_score: 3.762518:  50%|#####     | 10/20 [00:11<00:09,  1.08it/s][32m[I 2021-12-30 21:56:10,568][0m Trial 49 finished with value: 3.773547030378384 and parameters: {'lambda_l1': 0.00032006813005368815, 'lambda_l2': 0.0027191311367232187}. Best is trial 46 with value: 3.7625177926601325.[0m


[100]	cv_agg's rmse: 3.84999 + 0.536403
[200]	cv_agg's rmse: 3.79753 + 0.551616


regularization_factors, val_score: 3.762518:  55%|#####5    | 11/20 [00:12<00:08,  1.06it/s][32m[I 2021-12-30 21:56:11,549][0m Trial 50 finished with value: 3.782155974001023 and parameters: {'lambda_l1': 6.940896089188272e-08, 'lambda_l2': 0.4710891680293815}. Best is trial 46 with value: 3.7625177926601325.[0m
regularization_factors, val_score: 3.762518:  55%|#####5    | 11/20 [00:12<00:08,  1.06it/s]

[300]	cv_agg's rmse: 3.80341 + 0.56144




[100]	cv_agg's rmse: 3.85937 + 0.51366
[200]	cv_agg's rmse: 3.80297 + 0.559591


regularization_factors, val_score: 3.762518:  60%|######    | 12/20 [00:13<00:07,  1.12it/s][32m[I 2021-12-30 21:56:12,320][0m Trial 51 finished with value: 3.781433388463486 and parameters: {'lambda_l1': 3.4479740157046264, 'lambda_l2': 0.0001167451839241661}. Best is trial 46 with value: 3.7625177926601325.[0m


[100]	cv_agg's rmse: 3.82084 + 0.536733
[200]	cv_agg's rmse: 3.77835 + 0.565428


regularization_factors, val_score: 3.758110:  65%|######5   | 13/20 [00:14<00:05,  1.18it/s][32m[I 2021-12-30 21:56:13,060][0m Trial 52 finished with value: 3.758109733189707 and parameters: {'lambda_l1': 0.821964022476991, 'lambda_l2': 0.0004011295898825152}. Best is trial 52 with value: 3.758109733189707.[0m


[100]	cv_agg's rmse: 3.8406 + 0.554623
[200]	cv_agg's rmse: 3.79554 + 0.575313


regularization_factors, val_score: 3.758110:  70%|#######   | 14/20 [00:15<00:06,  1.05s/it][32m[I 2021-12-30 21:56:14,573][0m Trial 53 finished with value: 3.773538788910431 and parameters: {'lambda_l1': 6.966595612803856e-07, 'lambda_l2': 1.3661256816549483e-08}. Best is trial 52 with value: 3.758109733189707.[0m


[100]	cv_agg's rmse: 3.94216 + 0.496197
[200]	cv_agg's rmse: 3.83865 + 0.542046
[300]	cv_agg's rmse: 3.80074 + 0.563662
[400]	cv_agg's rmse: 3.77876 + 0.578356
[500]	cv_agg's rmse: 3.77522 + 0.575417
[600]	cv_agg's rmse: 3.7722 + 0.575047


regularization_factors, val_score: 3.758110:  75%|#######5  | 15/20 [00:21<00:13,  2.63s/it][32m[I 2021-12-30 21:56:20,856][0m Trial 54 finished with value: 3.763662423970561 and parameters: {'lambda_l1': 8.690978795876562, 'lambda_l2': 0.15980879806344106}. Best is trial 52 with value: 3.758109733189707.[0m


[100]	cv_agg's rmse: 3.83784 + 0.543717


regularization_factors, val_score: 3.758110:  80%|########  | 16/20 [00:22<00:08,  2.12s/it][32m[I 2021-12-30 21:56:21,786][0m Trial 55 finished with value: 3.7748909587672363 and parameters: {'lambda_l1': 0.3134553879846451, 'lambda_l2': 0.00022935736124507413}. Best is trial 52 with value: 3.758109733189707.[0m
regularization_factors, val_score: 3.758110:  80%|########  | 16/20 [00:22<00:08,  2.12s/it]

[200]	cv_agg's rmse: 3.78877 + 0.580617




[100]	cv_agg's rmse: 3.85123 + 0.526895
[200]	cv_agg's rmse: 3.80638 + 0.569517


regularization_factors, val_score: 3.758110:  85%|########5 | 17/20 [00:24<00:05,  1.86s/it][32m[I 2021-12-30 21:56:23,056][0m Trial 56 finished with value: 3.784865207061786 and parameters: {'lambda_l1': 2.902007100691075e-06, 'lambda_l2': 0.5977358193202031}. Best is trial 52 with value: 3.758109733189707.[0m


[100]	cv_agg's rmse: 3.95068 + 0.521676
[200]	cv_agg's rmse: 3.82399 + 0.56025
[300]	cv_agg's rmse: 3.80316 + 0.576006
[400]	cv_agg's rmse: 3.80038 + 0.566758


regularization_factors, val_score: 3.758110:  90%|######### | 18/20 [00:25<00:03,  1.71s/it][32m[I 2021-12-30 21:56:24,430][0m Trial 57 finished with value: 3.794995653444994 and parameters: {'lambda_l1': 0.11435426835369937, 'lambda_l2': 7.509605470483008}. Best is trial 52 with value: 3.758109733189707.[0m
regularization_factors, val_score: 3.758110:  90%|######### | 18/20 [00:25<00:03,  1.71s/it]

[500]	cv_agg's rmse: 3.81134 + 0.560508




[100]	cv_agg's rmse: 3.83592 + 0.548165
[200]	cv_agg's rmse: 3.79837 + 0.573832


regularization_factors, val_score: 3.758110:  95%|#########5| 19/20 [00:26<00:01,  1.52s/it][32m[I 2021-12-30 21:56:25,489][0m Trial 58 finished with value: 3.7799369363554325 and parameters: {'lambda_l1': 4.450561998926284e-06, 'lambda_l2': 0.009307354449504373}. Best is trial 52 with value: 3.758109733189707.[0m


[100]	cv_agg's rmse: 3.83628 + 0.548413
[200]	cv_agg's rmse: 3.79906 + 0.58205


regularization_factors, val_score: 3.758110: 100%|##########| 20/20 [00:27<00:00,  1.28s/it][32m[I 2021-12-30 21:56:26,204][0m Trial 59 finished with value: 3.779133211009783 and parameters: {'lambda_l1': 0.025735168461511963, 'lambda_l2': 2.6239637440799195e-05}. Best is trial 52 with value: 3.758109733189707.[0m
regularization_factors, val_score: 3.758110: 100%|##########| 20/20 [00:27<00:00,  1.36s/it]


[100]	cv_agg's rmse: 4.05196 + 0.548869
[200]	cv_agg's rmse: 3.89468 + 0.596483


min_data_in_leaf, val_score: 3.758110:  20%|##        | 1/5 [00:01<00:04,  1.08s/it][32m[I 2021-12-30 21:56:27,291][0m Trial 60 finished with value: 3.831634458049406 and parameters: {'min_child_samples': 25}. Best is trial 60 with value: 3.831634458049406.[0m
min_data_in_leaf, val_score: 3.758110:  20%|##        | 1/5 [00:01<00:04,  1.08s/it]

[300]	cv_agg's rmse: 3.84441 + 0.590248




[100]	cv_agg's rmse: 6.288 + 0.499431
[200]	cv_agg's rmse: 6.28213 + 0.497649


min_data_in_leaf, val_score: 3.758110:  40%|####      | 2/5 [00:01<00:02,  1.10it/s][32m[I 2021-12-30 21:56:28,089][0m Trial 61 finished with value: 6.226962827235945 and parameters: {'min_child_samples': 100}. Best is trial 60 with value: 3.831634458049406.[0m
min_data_in_leaf, val_score: 3.758110:  40%|####      | 2/5 [00:01<00:02,  1.10it/s]

[300]	cv_agg's rmse: 6.25539 + 0.4884
[400]	cv_agg's rmse: 6.23657 + 0.477211




[100]	cv_agg's rmse: 3.77418 + 0.472748


min_data_in_leaf, val_score: 3.745051:  60%|######    | 3/5 [00:02<00:01,  1.06it/s][32m[I 2021-12-30 21:56:29,077][0m Trial 62 finished with value: 3.74505135521175 and parameters: {'min_child_samples': 10}. Best is trial 62 with value: 3.74505135521175.[0m
min_data_in_leaf, val_score: 3.745051:  60%|######    | 3/5 [00:02<00:01,  1.06it/s]

[200]	cv_agg's rmse: 3.80876 + 0.469713




[100]	cv_agg's rmse: 3.87261 + 0.444554


min_data_in_leaf, val_score: 3.745051:  80%|########  | 4/5 [00:03<00:00,  1.06it/s][32m[I 2021-12-30 21:56:30,016][0m Trial 63 finished with value: 3.8393076165530955 and parameters: {'min_child_samples': 5}. Best is trial 62 with value: 3.74505135521175.[0m


[100]	cv_agg's rmse: 4.91372 + 0.45545
[200]	cv_agg's rmse: 4.74367 + 0.462594
[300]	cv_agg's rmse: 4.6469 + 0.441924
[400]	cv_agg's rmse: 4.56992 + 0.417832
[500]	cv_agg's rmse: 4.53881 + 0.423919
[600]	cv_agg's rmse: 4.48926 + 0.384294
[700]	cv_agg's rmse: 4.48542 + 0.343205


min_data_in_leaf, val_score: 3.745051: 100%|##########| 5/5 [00:19<00:00,  6.38s/it][32m[I 2021-12-30 21:56:46,033][0m Trial 64 finished with value: 4.4682301346922015 and parameters: {'min_child_samples': 50}. Best is trial 62 with value: 3.74505135521175.[0m
min_data_in_leaf, val_score: 3.745051: 100%|##########| 5/5 [00:19<00:00,  3.96s/it]

[800]	cv_agg's rmse: 4.49427 + 0.352493





In [None]:
best_params = tuner.best_params
best_params

{'objective': 'regression',
 'metric': 'rmse',
 'verbosity': -1,
 'random_seed': 0,
 'boosting_type': 'gbdt',
 'feature_pre_filter': False,
 'lambda_l1': 0.821964022476991,
 'lambda_l2': 0.0004011295898825152,
 'num_leaves': 31,
 'feature_fraction': 0.4,
 'bagging_fraction': 0.7260348407611517,
 'bagging_freq': 5,
 'min_child_samples': 10}