In [1]:
import lightgbm as lgb

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error, r2_score, mean_squared_error


In [2]:
df_train = pd.read_csv("processed/processed_train_2.csv")
df_test = pd.read_csv("processed/processed_test_2.csv")
df_train

Unnamed: 0,full_sq,life_sq,floor,max_floor,build_year,num_room,kitch_sq,state,area_m,raion_popul,...,railroad_1line_no,railroad_1line_yes,material_1.0,material_2.0,material_3.0,material_4.0,material_5.0,material_6.0,material_nan,price_doc
0,43,27.000000,4.0,12.559171,1879,1.909844,6.399244,2.105145,6.407578e+06,155572,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.668660e+06
1,34,19.000000,3.0,12.559171,1879,1.909844,6.399244,2.105145,9.589337e+06,115352,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.814010e+06
2,43,29.000000,2.0,12.559171,1879,1.909844,6.399244,2.105145,4.808270e+06,101708,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.523310e+06
3,89,50.000000,9.0,12.559171,1879,1.909844,6.399244,2.105145,1.258354e+07,178473,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.269391e+07
4,77,77.000000,4.0,12.559171,1879,1.909844,6.399244,2.105145,8.398461e+06,108171,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.582519e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30464,44,27.000000,7.0,9.000000,1975,2.000000,6.000000,3.000000,1.005305e+07,175518,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,7.170610e+06
30465,86,59.000000,3.0,9.000000,1935,4.000000,10.000000,3.000000,7.307411e+06,75377,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.422501e+07
30466,45,34.404467,10.0,20.000000,1879,1.000000,1.000000,1.000000,2.553630e+07,4001,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,6.754869e+06
30467,64,32.000000,5.0,15.000000,2003,2.000000,11.000000,2.000000,6.050065e+06,78616,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.308151e+07


In [3]:
X_train, X_val, y_train, y_val = train_test_split(df_train.drop(columns=['price_doc']),
                                                 df_train['price_doc'], test_size=0.15, random_state=42)

In [4]:
## https://neptune.ai/blog/lightgbm-parameters-guide

In [5]:
from sklearn.metrics import mean_squared_error
import optuna

def objective(trial):
    params = {
        "objective": "regression",
        "metric": "rmse",
        "verbosity": -1,
        "n_estimators": trial.suggest_int("n_estimators", 800, 1200),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "rf", "dart"]),
        "lambda_l2": trial.suggest_float("lambda_l2", 0, 10),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0, 1),
        "bagging_freq": 1,
        "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.5, 1),
        "max_depth": trial.suggest_int("max_depth", 1, 50),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 100),
    }

    model = lgb.LGBMRegressor(**params)
    model.fit(X_train, y_train, verbose=False)
    predictions = model.predict(X_val)
    rmse = mean_squared_error(y_val, predictions, squared=False)
    return rmse

In [6]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

[I 2023-10-30 18:24:38,750] A new study created in memory with name: no-name-698c5a4f-4e01-405f-8f7b-df472a06e2d4




[I 2023-10-30 18:24:44,034] Trial 0 finished with value: 2653111.496233502 and parameters: {'n_estimators': 1152, 'boosting': 'gbdt', 'lambda_l2': 2.229449447868286, 'bagging_fraction': 0.9112454745493027, 'num_leaves': 565, 'feature_fraction': 0.8864492913112696, 'max_depth': 4, 'learning_rate': 0.004187587848234434, 'subsample': 0.9122521994357183, 'colsample_bytree': 0.7292891314199127, 'min_data_in_leaf': 59}. Best is trial 0 with value: 2653111.496233502.




[I 2023-10-30 18:24:54,009] Trial 1 finished with value: 5063542.70817733 and parameters: {'n_estimators': 1171, 'boosting': 'dart', 'lambda_l2': 9.638169946599145, 'bagging_fraction': 0.06697772855853035, 'num_leaves': 26, 'feature_fraction': 0.8368953192500224, 'max_depth': 28, 'learning_rate': 0.002022372163629401, 'subsample': 0.38879502438126395, 'colsample_bytree': 0.07639779896178667, 'min_data_in_leaf': 52}. Best is trial 0 with value: 2653111.496233502.




[I 2023-10-30 18:26:00,911] Trial 2 finished with value: 2649372.3807400647 and parameters: {'n_estimators': 1001, 'boosting': 'dart', 'lambda_l2': 9.434116362659111, 'bagging_fraction': 0.9702489049875914, 'num_leaves': 495, 'feature_fraction': 0.6798971727984808, 'max_depth': 30, 'learning_rate': 0.04656280774257206, 'subsample': 0.27606466614689573, 'colsample_bytree': 0.6693521122102537, 'min_data_in_leaf': 14}. Best is trial 2 with value: 2649372.3807400647.




[I 2023-10-30 18:26:19,552] Trial 3 finished with value: 2887450.253879538 and parameters: {'n_estimators': 929, 'boosting': 'rf', 'lambda_l2': 9.574498793755026, 'bagging_fraction': 0.9984822282939793, 'num_leaves': 254, 'feature_fraction': 0.6440744921664677, 'max_depth': 17, 'learning_rate': 0.01669779151533324, 'subsample': 0.14420802656911746, 'colsample_bytree': 0.19969617221877556, 'min_data_in_leaf': 85}. Best is trial 2 with value: 2649372.3807400647.




[I 2023-10-30 18:27:06,841] Trial 4 finished with value: 2798080.0941294325 and parameters: {'n_estimators': 1163, 'boosting': 'rf', 'lambda_l2': 0.015269066930787556, 'bagging_fraction': 0.8236338611160338, 'num_leaves': 393, 'feature_fraction': 0.627860098154764, 'max_depth': 47, 'learning_rate': 0.07142730700257663, 'subsample': 0.27629327762377853, 'colsample_bytree': 0.6255932193635804, 'min_data_in_leaf': 44}. Best is trial 2 with value: 2649372.3807400647.




[I 2023-10-30 18:27:10,452] Trial 5 finished with value: 2662514.532616621 and parameters: {'n_estimators': 979, 'boosting': 'gbdt', 'lambda_l2': 6.460269341519112, 'bagging_fraction': 0.5344675614153079, 'num_leaves': 244, 'feature_fraction': 0.9267144715298715, 'max_depth': 4, 'learning_rate': 0.0069023200723682, 'subsample': 0.5023640890089518, 'colsample_bytree': 0.6690839925004739, 'min_data_in_leaf': 83}. Best is trial 2 with value: 2649372.3807400647.




[I 2023-10-30 18:27:15,860] Trial 6 finished with value: 2633005.9314145357 and parameters: {'n_estimators': 863, 'boosting': 'gbdt', 'lambda_l2': 7.081957241444616, 'bagging_fraction': 0.3349264969672755, 'num_leaves': 570, 'feature_fraction': 0.668191068538806, 'max_depth': 10, 'learning_rate': 0.0242251622886038, 'subsample': 0.056988194347188, 'colsample_bytree': 0.6146399978037402, 'min_data_in_leaf': 67}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:27:28,762] Trial 7 finished with value: 2783107.544061262 and parameters: {'n_estimators': 889, 'boosting': 'dart', 'lambda_l2': 3.0879600143198283, 'bagging_fraction': 0.09773318745060122, 'num_leaves': 288, 'feature_fraction': 0.8792269504163504, 'max_depth': 9, 'learning_rate': 0.09697202251916945, 'subsample': 0.19105586408410136, 'colsample_bytree': 0.4204890549094711, 'min_data_in_leaf': 71}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:28:22,486] Trial 8 finished with value: 2765679.2942053494 and parameters: {'n_estimators': 1174, 'boosting': 'rf', 'lambda_l2': 5.387873717441025, 'bagging_fraction': 0.7685205860690892, 'num_leaves': 325, 'feature_fraction': 0.9329349147609525, 'max_depth': 46, 'learning_rate': 0.003023517330694156, 'subsample': 0.5353464141652452, 'colsample_bytree': 0.8362991036046267, 'min_data_in_leaf': 24}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:28:35,889] Trial 9 finished with value: 3056635.2707226896 and parameters: {'n_estimators': 1135, 'boosting': 'dart', 'lambda_l2': 4.670572915079738, 'bagging_fraction': 0.11271251738787391, 'num_leaves': 465, 'feature_fraction': 0.7399147173838689, 'max_depth': 5, 'learning_rate': 0.010173360299773405, 'subsample': 0.43317303069872287, 'colsample_bytree': 0.8884746853600546, 'min_data_in_leaf': 33}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:28:41,846] Trial 10 finished with value: 3470731.1580717294 and parameters: {'n_estimators': 801, 'boosting': 'gbdt', 'lambda_l2': 7.466391282843988, 'bagging_fraction': 0.3031762656187183, 'num_leaves': 916, 'feature_fraction': 0.5032090084887042, 'max_depth': 18, 'learning_rate': 0.001192744441625588, 'subsample': 0.07653301741254136, 'colsample_bytree': 0.9823177934971018, 'min_data_in_leaf': 100}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:29:42,674] Trial 11 finished with value: 2650244.7372420058 and parameters: {'n_estimators': 1049, 'boosting': 'gbdt', 'lambda_l2': 8.12786438020596, 'bagging_fraction': 0.5905393581135105, 'num_leaves': 680, 'feature_fraction': 0.7292765580242468, 'max_depth': 33, 'learning_rate': 0.028095363693427217, 'subsample': 0.05381809386711214, 'colsample_bytree': 0.4925608827762196, 'min_data_in_leaf': 14}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:30:45,749] Trial 12 finished with value: 2640762.701465851 and parameters: {'n_estimators': 1039, 'boosting': 'dart', 'lambda_l2': 8.237511108553496, 'bagging_fraction': 0.3902331221313592, 'num_leaves': 759, 'feature_fraction': 0.6604120390449859, 'max_depth': 36, 'learning_rate': 0.036396310546215095, 'subsample': 0.2606242307855746, 'colsample_bytree': 0.5612894342757714, 'min_data_in_leaf': 10}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:30:59,260] Trial 13 finished with value: 2654022.1467867475 and parameters: {'n_estimators': 1069, 'boosting': 'gbdt', 'lambda_l2': 7.654014307146344, 'bagging_fraction': 0.35520695367360766, 'num_leaves': 771, 'feature_fraction': 0.5787815636227573, 'max_depth': 37, 'learning_rate': 0.02579874980233953, 'subsample': 0.05014456627250638, 'colsample_bytree': 0.39370656028609025, 'min_data_in_leaf': 68}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:31:28,453] Trial 14 finished with value: 2655805.174107333 and parameters: {'n_estimators': 817, 'boosting': 'dart', 'lambda_l2': 6.434660979148086, 'bagging_fraction': 0.3647839870950125, 'num_leaves': 1002, 'feature_fraction': 0.791978633198261, 'max_depth': 18, 'learning_rate': 0.042730436436864985, 'subsample': 0.22336892583737755, 'colsample_bytree': 0.5490541511623811, 'min_data_in_leaf': 39}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:31:44,242] Trial 15 finished with value: 2946080.97899715 and parameters: {'n_estimators': 873, 'boosting': 'dart', 'lambda_l2': 8.092201388615823, 'bagging_fraction': 0.21516104907889821, 'num_leaves': 784, 'feature_fraction': 0.6999920734330773, 'max_depth': 41, 'learning_rate': 0.013917680659520103, 'subsample': 0.34489457116436706, 'colsample_bytree': 0.34235105267508753, 'min_data_in_leaf': 56}. Best is trial 6 with value: 2633005.9314145357.




[I 2023-10-30 18:32:14,322] Trial 16 finished with value: 2621276.231498524 and parameters: {'n_estimators': 1071, 'boosting': 'gbdt', 'lambda_l2': 8.468523355212147, 'bagging_fraction': 0.5010040820685515, 'num_leaves': 677, 'feature_fraction': 0.7806206714381678, 'max_depth': 24, 'learning_rate': 0.021390599834400743, 'subsample': 0.18018217771372194, 'colsample_bytree': 0.5591040313982958, 'min_data_in_leaf': 26}. Best is trial 16 with value: 2621276.231498524.




[I 2023-10-30 18:32:42,747] Trial 17 finished with value: 2592383.289728547 and parameters: {'n_estimators': 1099, 'boosting': 'gbdt', 'lambda_l2': 6.34626319719219, 'bagging_fraction': 0.606401497014063, 'num_leaves': 634, 'feature_fraction': 0.825580749602095, 'max_depth': 23, 'learning_rate': 0.019272861579545896, 'subsample': 0.18101363470868598, 'colsample_bytree': 0.7527783817596005, 'min_data_in_leaf': 28}. Best is trial 17 with value: 2592383.289728547.




[I 2023-10-30 18:33:19,679] Trial 18 finished with value: 2577051.200226115 and parameters: {'n_estimators': 1104, 'boosting': 'gbdt', 'lambda_l2': 5.801561587337013, 'bagging_fraction': 0.6494653540018881, 'num_leaves': 626, 'feature_fraction': 0.7892495091448404, 'max_depth': 23, 'learning_rate': 0.008028784316971306, 'subsample': 0.18237713572683323, 'colsample_bytree': 0.7779908511174316, 'min_data_in_leaf': 26}. Best is trial 18 with value: 2577051.200226115.




[I 2023-10-30 18:34:01,408] Trial 19 finished with value: 2561774.161927067 and parameters: {'n_estimators': 1096, 'boosting': 'gbdt', 'lambda_l2': 5.367177824334458, 'bagging_fraction': 0.6432460581908271, 'num_leaves': 897, 'feature_fraction': 0.9874999981988422, 'max_depth': 24, 'learning_rate': 0.007714193394097146, 'subsample': 0.605547027712792, 'colsample_bytree': 0.7598084571106424, 'min_data_in_leaf': 26}. Best is trial 19 with value: 2561774.161927067.




[I 2023-10-30 18:34:23,174] Trial 20 finished with value: 2551799.9930268377 and parameters: {'n_estimators': 1102, 'boosting': 'gbdt', 'lambda_l2': 5.003475419441374, 'bagging_fraction': 0.6823455645912265, 'num_leaves': 897, 'feature_fraction': 0.977922910390989, 'max_depth': 13, 'learning_rate': 0.006790994041786697, 'subsample': 0.6328088366609974, 'colsample_bytree': 0.8005305948508215, 'min_data_in_leaf': 43}. Best is trial 20 with value: 2551799.9930268377.




[I 2023-10-30 18:34:43,092] Trial 21 finished with value: 2549413.6181684 and parameters: {'n_estimators': 1111, 'boosting': 'gbdt', 'lambda_l2': 4.986326594579468, 'bagging_fraction': 0.6726362178929178, 'num_leaves': 903, 'feature_fraction': 0.9997009943799137, 'max_depth': 13, 'learning_rate': 0.0063106163502641725, 'subsample': 0.6300808893105623, 'colsample_bytree': 0.8077059623500861, 'min_data_in_leaf': 41}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:35:01,329] Trial 22 finished with value: 2550612.220459148 and parameters: {'n_estimators': 1200, 'boosting': 'gbdt', 'lambda_l2': 4.486716200591772, 'bagging_fraction': 0.6966382622933757, 'num_leaves': 883, 'feature_fraction': 0.9843873411868239, 'max_depth': 11, 'learning_rate': 0.00572818749016262, 'subsample': 0.6615344974491301, 'colsample_bytree': 0.8971625055598238, 'min_data_in_leaf': 45}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:35:20,087] Trial 23 finished with value: 2555749.336756417 and parameters: {'n_estimators': 1193, 'boosting': 'gbdt', 'lambda_l2': 4.288608598019909, 'bagging_fraction': 0.7347316911305052, 'num_leaves': 1021, 'feature_fraction': 0.9992692857998761, 'max_depth': 11, 'learning_rate': 0.0050374900817086125, 'subsample': 0.6759225309632929, 'colsample_bytree': 0.9276823738685074, 'min_data_in_leaf': 48}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:35:37,685] Trial 24 finished with value: 2562519.9402912995 and parameters: {'n_estimators': 1125, 'boosting': 'gbdt', 'lambda_l2': 3.8071506702904476, 'bagging_fraction': 0.7056472249570577, 'num_leaves': 903, 'feature_fraction': 0.9595641763527119, 'max_depth': 14, 'learning_rate': 0.012717897529775963, 'subsample': 0.7124883254513367, 'colsample_bytree': 0.8599221455757927, 'min_data_in_leaf': 39}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:36:07,113] Trial 25 finished with value: 2554234.460186504 and parameters: {'n_estimators': 1199, 'boosting': 'gbdt', 'lambda_l2': 4.790814353843039, 'bagging_fraction': 0.8326952531358638, 'num_leaves': 843, 'feature_fraction': 0.9983410473109503, 'max_depth': 14, 'learning_rate': 0.006043633618865978, 'subsample': 0.7854620767524065, 'colsample_bytree': 0.9653633978219514, 'min_data_in_leaf': 41}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:36:09,280] Trial 26 finished with value: 4031936.2253305092 and parameters: {'n_estimators': 979, 'boosting': 'rf', 'lambda_l2': 3.67558595594568, 'bagging_fraction': 0.6844874108924593, 'num_leaves': 974, 'feature_fraction': 0.954389311052118, 'max_depth': 1, 'learning_rate': 0.004255355503337488, 'subsample': 0.6013673505360652, 'colsample_bytree': 0.820667723183611, 'min_data_in_leaf': 62}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:36:17,206] Trial 27 finished with value: 2578325.7526316266 and parameters: {'n_estimators': 1027, 'boosting': 'gbdt', 'lambda_l2': 5.21304469068181, 'bagging_fraction': 0.5601759139416629, 'num_leaves': 829, 'feature_fraction': 0.9047538110631537, 'max_depth': 8, 'learning_rate': 0.010273293403359134, 'subsample': 0.7929225488161564, 'colsample_bytree': 0.8964980069945112, 'min_data_in_leaf': 50}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:36:46,160] Trial 28 finished with value: 2640007.222354662 and parameters: {'n_estimators': 1125, 'boosting': 'gbdt', 'lambda_l2': 5.7505928162548905, 'bagging_fraction': 0.45392505117629467, 'num_leaves': 924, 'feature_fraction': 0.9608915583789369, 'max_depth': 14, 'learning_rate': 0.002711061674793462, 'subsample': 0.5149679914809937, 'colsample_bytree': 0.996825040502983, 'min_data_in_leaf': 33}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:36:48,863] Trial 29 finished with value: 3288514.521730242 and parameters: {'n_estimators': 1139, 'boosting': 'gbdt', 'lambda_l2': 2.669127746326127, 'bagging_fraction': 0.8711201797141817, 'num_leaves': 830, 'feature_fraction': 0.9003735097328458, 'max_depth': 1, 'learning_rate': 0.004862595922254778, 'subsample': 0.9806988965125001, 'colsample_bytree': 0.8068384389181539, 'min_data_in_leaf': 60}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:37:36,186] Trial 30 finished with value: 2605840.6292526894 and parameters: {'n_estimators': 1081, 'boosting': 'gbdt', 'lambda_l2': 4.358929040963914, 'bagging_fraction': 0.7826113001215316, 'num_leaves': 720, 'feature_fraction': 0.8755398525728846, 'max_depth': 21, 'learning_rate': 0.0034827536108475504, 'subsample': 0.8449506366697725, 'colsample_bytree': 0.7192202050220958, 'min_data_in_leaf': 34}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:38:01,253] Trial 31 finished with value: 2550995.470563494 and parameters: {'n_estimators': 1197, 'boosting': 'gbdt', 'lambda_l2': 4.831064715879882, 'bagging_fraction': 0.8443482510104968, 'num_leaves': 845, 'feature_fraction': 0.9901932615805181, 'max_depth': 14, 'learning_rate': 0.005892148750564004, 'subsample': 0.7473365237861109, 'colsample_bytree': 0.9428811662663968, 'min_data_in_leaf': 43}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:38:11,495] Trial 32 finished with value: 2578527.5581561276 and parameters: {'n_estimators': 1159, 'boosting': 'gbdt', 'lambda_l2': 3.969512888912289, 'bagging_fraction': 0.718194152123905, 'num_leaves': 953, 'feature_fraction': 0.9728844728959027, 'max_depth': 7, 'learning_rate': 0.004998201074700581, 'subsample': 0.6958109464656097, 'colsample_bytree': 0.934275924506481, 'min_data_in_leaf': 46}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:38:32,938] Trial 33 finished with value: 2553172.3803349594 and parameters: {'n_estimators': 1177, 'boosting': 'gbdt', 'lambda_l2': 4.6959513329125455, 'bagging_fraction': 0.8909593724160273, 'num_leaves': 112, 'feature_fraction': 0.9338190261876019, 'max_depth': 12, 'learning_rate': 0.006560505599254823, 'subsample': 0.6539009605761733, 'colsample_bytree': 0.8816666835160427, 'min_data_in_leaf': 54}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:39:24,050] Trial 34 finished with value: 2578793.7204321646 and parameters: {'n_estimators': 1149, 'boosting': 'gbdt', 'lambda_l2': 3.5573510419516436, 'bagging_fraction': 0.9128813524320016, 'num_leaves': 864, 'feature_fraction': 0.9686023965681402, 'max_depth': 28, 'learning_rate': 0.0038328250486311002, 'subsample': 0.7476931407691383, 'colsample_bytree': 0.8409765583515667, 'min_data_in_leaf': 52}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:40:03,578] Trial 35 finished with value: 2783970.6939438456 and parameters: {'n_estimators': 1199, 'boosting': 'rf', 'lambda_l2': 5.021669125612922, 'bagging_fraction': 0.7691299164696312, 'num_leaves': 788, 'feature_fraction': 0.9993588583504732, 'max_depth': 17, 'learning_rate': 0.008852885110836334, 'subsample': 0.61278124467288, 'colsample_bytree': 0.9266813357404338, 'min_data_in_leaf': 42}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:40:34,588] Trial 36 finished with value: 2570056.040555223 and parameters: {'n_estimators': 1116, 'boosting': 'gbdt', 'lambda_l2': 1.981693517447689, 'bagging_fraction': 0.6630723058653598, 'num_leaves': 971, 'feature_fraction': 0.9415178200533473, 'max_depth': 20, 'learning_rate': 0.005973227233740056, 'subsample': 0.8648192464341404, 'colsample_bytree': 0.7211642006498968, 'min_data_in_leaf': 37}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:41:10,191] Trial 37 finished with value: 2685323.9548551305 and parameters: {'n_estimators': 1017, 'boosting': 'gbdt', 'lambda_l2': 5.962223484927767, 'bagging_fraction': 0.9454848425028018, 'num_leaves': 723, 'feature_fraction': 0.9135893057092159, 'max_depth': 14, 'learning_rate': 0.002414405296770414, 'subsample': 0.736539404319797, 'colsample_bytree': 0.7995697198115451, 'min_data_in_leaf': 43}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:41:17,467] Trial 38 finished with value: 2914960.8727299874 and parameters: {'n_estimators': 1174, 'boosting': 'rf', 'lambda_l2': 4.350688293460479, 'bagging_fraction': 0.8208316595284443, 'num_leaves': 554, 'feature_fraction': 0.8715027945679933, 'max_depth': 5, 'learning_rate': 0.012231093569833483, 'subsample': 0.6542269966311706, 'colsample_bytree': 0.6951195641985387, 'min_data_in_leaf': 48}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:41:26,900] Trial 39 finished with value: 2626190.4231553953 and parameters: {'n_estimators': 941, 'boosting': 'gbdt', 'lambda_l2': 6.8485880037292235, 'bagging_fraction': 0.5984627262583391, 'num_leaves': 871, 'feature_fraction': 0.9734185223404805, 'max_depth': 7, 'learning_rate': 0.0036137647080579055, 'subsample': 0.5474803263993347, 'colsample_bytree': 0.8600319719040397, 'min_data_in_leaf': 20}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:41:37,238] Trial 40 finished with value: 2581930.8607286746 and parameters: {'n_estimators': 1155, 'boosting': 'gbdt', 'lambda_l2': 5.427133100809459, 'bagging_fraction': 0.7065179352441608, 'num_leaves': 40, 'feature_fraction': 0.9292803158587715, 'max_depth': 11, 'learning_rate': 0.007293189000427967, 'subsample': 0.45919899597593294, 'colsample_bytree': 0.6679813712793786, 'min_data_in_leaf': 74}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:41:50,660] Trial 41 finished with value: 2551106.333738107 and parameters: {'n_estimators': 1172, 'boosting': 'gbdt', 'lambda_l2': 4.6780967321967735, 'bagging_fraction': 0.847698431302929, 'num_leaves': 80, 'feature_fraction': 0.9445937893176352, 'max_depth': 12, 'learning_rate': 0.006241601881996193, 'subsample': 0.6527227515159228, 'colsample_bytree': 0.8970574414107435, 'min_data_in_leaf': 56}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:42:14,418] Trial 42 finished with value: 2558400.166142341 and parameters: {'n_estimators': 1182, 'boosting': 'gbdt', 'lambda_l2': 4.986533334096474, 'bagging_fraction': 0.853663021408189, 'num_leaves': 326, 'feature_fraction': 0.9431758089029016, 'max_depth': 16, 'learning_rate': 0.005715030735582296, 'subsample': 0.5659335368950589, 'colsample_bytree': 0.9166615277239789, 'min_data_in_leaf': 58}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:42:37,265] Trial 43 finished with value: 2555558.266530529 and parameters: {'n_estimators': 1149, 'boosting': 'gbdt', 'lambda_l2': 3.319773389526874, 'bagging_fraction': 0.7962064194704177, 'num_leaves': 395, 'feature_fraction': 0.9717754785485588, 'max_depth': 20, 'learning_rate': 0.009404426853481444, 'subsample': 0.6440187963145113, 'colsample_bytree': 0.9629583970588135, 'min_data_in_leaf': 52}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:42:48,954] Trial 44 finished with value: 2595292.8274113345 and parameters: {'n_estimators': 1164, 'boosting': 'gbdt', 'lambda_l2': 4.2157035912507395, 'bagging_fraction': 0.7546886827462935, 'num_leaves': 72, 'feature_fraction': 0.9178510262087918, 'max_depth': 9, 'learning_rate': 0.004358616980066335, 'subsample': 0.697483629613902, 'colsample_bytree': 0.996087897863684, 'min_data_in_leaf': 64}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:43:23,570] Trial 45 finished with value: 2912017.3474910227 and parameters: {'n_estimators': 1135, 'boosting': 'rf', 'lambda_l2': 4.746382339743225, 'bagging_fraction': 0.993112201331293, 'num_leaves': 195, 'feature_fraction': 0.9823539277137114, 'max_depth': 12, 'learning_rate': 0.006761340728401816, 'subsample': 0.576550060949434, 'colsample_bytree': 0.8328101692202289, 'min_data_in_leaf': 45}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:44:13,134] Trial 46 finished with value: 2988950.2695148652 and parameters: {'n_estimators': 1186, 'boosting': 'dart', 'lambda_l2': 5.90886049199866, 'bagging_fraction': 0.9372728714568633, 'num_leaves': 187, 'feature_fraction': 0.9498143809096139, 'max_depth': 16, 'learning_rate': 0.008366845986012955, 'subsample': 0.6185059584380675, 'colsample_bytree': 0.8830075237603163, 'min_data_in_leaf': 31}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:44:19,763] Trial 47 finished with value: 2680709.8353226334 and parameters: {'n_estimators': 1050, 'boosting': 'gbdt', 'lambda_l2': 5.354028330937584, 'bagging_fraction': 0.8126658192858758, 'num_leaves': 496, 'feature_fraction': 0.979223789248514, 'max_depth': 5, 'learning_rate': 0.003208130251315147, 'subsample': 0.665584952727149, 'colsample_bytree': 0.784036351565008, 'min_data_in_leaf': 56}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:44:59,970] Trial 48 finished with value: 2580768.5035512666 and parameters: {'n_estimators': 1115, 'boosting': 'gbdt', 'lambda_l2': 3.016277230613636, 'bagging_fraction': 0.8669421694188698, 'num_leaves': 940, 'feature_fraction': 0.9432936910178845, 'max_depth': 28, 'learning_rate': 0.010848963811765235, 'subsample': 0.5074098278779302, 'colsample_bytree': 0.945578138094481, 'min_data_in_leaf': 36}. Best is trial 21 with value: 2549413.6181684.




[I 2023-10-30 18:45:53,806] Trial 49 finished with value: 2711446.8093136037 and parameters: {'n_estimators': 1083, 'boosting': 'dart', 'lambda_l2': 4.01454113985347, 'bagging_fraction': 0.7500441391343291, 'num_leaves': 428, 'feature_fraction': 0.8547780179213449, 'max_depth': 13, 'learning_rate': 0.015175162966046466, 'subsample': 0.7438130051417878, 'colsample_bytree': 0.8901535529942564, 'min_data_in_leaf': 20}. Best is trial 21 with value: 2549413.6181684.


In [7]:
print('Best hyperparameters:', study.best_params)
print('Best RMSE:', study.best_value)

Best hyperparameters: {'n_estimators': 1111, 'boosting': 'gbdt', 'lambda_l2': 4.986326594579468, 'bagging_fraction': 0.6726362178929178, 'num_leaves': 903, 'feature_fraction': 0.9997009943799137, 'max_depth': 13, 'learning_rate': 0.0063106163502641725, 'subsample': 0.6300808893105623, 'colsample_bytree': 0.8077059623500861, 'min_data_in_leaf': 41}
Best RMSE: 2549413.6181684
