In [6]:
import os
import pandas as pd
import numpy as np
from preprocess import *
import optuna
from sklearn.metrics import r2_score 
from optuna.samplers import TPESampler


from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import lightgbm as lgb
import xgboost as xgb

In [23]:
df_train = pd.read_csv("../data/train.csv")
df_test = pd.read_csv("../data/test.csv")

df_train = df_train.drop('Id',axis=1)
df_train = df_train.drop('parentspecies',axis=1)



X_train = df_train.loc[:,df_train.columns != 'pSat_Pa']
y_train = np.log10(df_train.loc[:, 'pSat_Pa'])
y_test = df_test

In [24]:
RANDOM_SEED = 42
train_x, test_x, train_y, test_y = train_test_split(X_train, y_train, test_size=0.25, random_state=RANDOM_SEED)

In [25]:

train_x.dtypes

MW                              float64
NumOfAtoms                        int64
NumOfC                            int64
NumOfO                            int64
NumOfN                            int64
NumHBondDonors                    int64
NumOfConf                         int64
NumOfConfUsed                     int64
C.C..non.aromatic.                int64
C.C.C.O.in.non.aromatic.ring      int64
hydroxyl..alkyl.                  int64
aldehyde                          int64
ketone                            int64
carboxylic.acid                   int64
ester                             int64
ether..alicyclic.                 int64
nitrate                           int64
nitro                             int64
aromatic.hydroxyl                 int64
carbonylperoxynitrate             int64
peroxide                          int64
hydroperoxide                     int64
carbonylperoxyacid                int64
nitroester                        int64
dtype: object

In [26]:
print(f"The dimension of train_x is {train_x.shape}")
print(f"The dimension of train_y is {train_y.shape}")
print(f"The dimension of test_x is {test_x.shape}")
print(f"The dimension of train_x is {test_y.shape}")

The dimension of train_x is (20360, 24)
The dimension of train_y is (20360,)
The dimension of test_x is (6787, 24)
The dimension of train_x is (6787,)


In [28]:

def objective(trial):
    
    params = {
        "n_estimators": 1000,
        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1, log=True),
        "subsample": trial.suggest_float("subsample", 0.25, 0.75),
        "subsample_freq": 1,
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 0.5),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 50),
        "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
        "random_state": RANDOM_SEED
        
    }
    datas = lgb.Dataset(train_x, label=train_y)
    model = lgb.train(params, datas)

    predictions = model.predict(test_x)
    r2 = r2_score(test_y, predictions)
    return r2
    

# Remember to assign "lgbm-wine-1" as the study_name to your Optuna study. 
study_name = "lgbm-svp-1"

# For this assignment, it is enough to use a simple sqlite3 database for persisting study history
storage = "sqlite:///optuna.sqlite3"

# Create (and run) the study and record the history in the SQlite3 database file

study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=RANDOM_SEED),
    study_name=study_name,
    storage=storage,
    load_if_exists=True
)
study.optimize(objective, n_trials=100)


print("Best R2", study.best_value)
print("Best params:", study.best_trial.params)

[I 2023-11-30 20:01:54,031] A new study created in RDB with name: lgbm-svp-1


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000679 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:01:59,631] Trial 0 finished with value: 0.7324485362602042 and parameters: {'learning_rate': 0.005611516415334507, 'subsample': 0.7253571532049581, 'colsample_bytree': 0.3793972738151323, 'min_child_samples': 30, 'num_leaves': 161}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000700 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:04,174] Trial 1 finished with value: 0.6806990454304943 and parameters: {'learning_rate': 0.002051110418843397, 'subsample': 0.27904180608409973, 'colsample_bytree': 0.4397792655987208, 'min_child_samples': 31, 'num_leaves': 726}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000620 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 663
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 24
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:09,702] Trial 2 finished with value: 0.5778313651221276 and parameters: {'learning_rate': 0.0010994335574766201, 'subsample': 0.7349549260809971, 'colsample_bytree': 0.4245991883601898, 'min_child_samples': 11, 'num_leaves': 188}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000766 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:15,605] Trial 3 finished with value: 0.6749517206804324 and parameters: {'learning_rate': 0.002327067708383781, 'subsample': 0.40212112147976886, 'colsample_bytree': 0.28614039423450705, 'min_child_samples': 22, 'num_leaves': 299}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000530 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:19,661] Trial 4 finished with value: 0.7238767327728273 and parameters: {'learning_rate': 0.01673808578875214, 'subsample': 0.3197469303260209, 'colsample_bytree': 0.1814650918408482, 'min_child_samples': 19, 'num_leaves': 468}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000572 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:22,077] Trial 5 finished with value: 0.728945706025099 and parameters: {'learning_rate': 0.037183641805732096, 'subsample': 0.34983689107917987, 'colsample_bytree': 0.28140549728612524, 'min_child_samples': 30, 'num_leaves': 49}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000566 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:23,843] Trial 6 finished with value: 0.70178723808792 and parameters: {'learning_rate': 0.016409286730647923, 'subsample': 0.33526206184364576, 'colsample_bytree': 0.0792732168433758, 'min_child_samples': 48, 'num_leaves': 989}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000688 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:25,941] Trial 7 finished with value: 0.7183281973890157 and parameters: {'learning_rate': 0.041380401125610165, 'subsample': 0.40230688458668534, 'colsample_bytree': 0.09395245130287275, 'min_child_samples': 35, 'num_leaves': 452}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000539 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:27,758] Trial 8 finished with value: 0.34478236187346956 and parameters: {'learning_rate': 0.0017541893487450805, 'subsample': 0.4975884550556351, 'colsample_bytree': 0.06547483450184828, 'min_child_samples': 46, 'num_leaves': 266}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003345 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:32,655] Trial 9 finished with value: 0.7291311760187937 and parameters: {'learning_rate': 0.02113705944064573, 'subsample': 0.4058555380447055, 'colsample_bytree': 0.28403060953001485, 'min_child_samples': 28, 'num_leaves': 191}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000763 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 663
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 24
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:34,303] Trial 10 finished with value: 0.721734806429222 and parameters: {'learning_rate': 0.0059057803847388166, 'subsample': 0.7164309999726112, 'colsample_bytree': 0.48629169985453957, 'min_child_samples': 1, 'num_leaves': 17}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000625 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:41,036] Trial 11 finished with value: 0.7316514550993345 and parameters: {'learning_rate': 0.006316680188035293, 'subsample': 0.6303908711954849, 'colsample_bytree': 0.3376873425799389, 'min_child_samples': 38, 'num_leaves': 660}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000590 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:02:52,882] Trial 12 finished with value: 0.7313499217641974 and parameters: {'learning_rate': 0.006206423255207313, 'subsample': 0.637859185644367, 'colsample_bytree': 0.3557251924593569, 'min_child_samples': 37, 'num_leaves': 685}. Best is trial 0 with value: 0.7324485362602042.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000682 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:01,728] Trial 13 finished with value: 0.7339027539103458 and parameters: {'learning_rate': 0.007398172895155005, 'subsample': 0.6299039764773626, 'colsample_bytree': 0.37083019063619194, 'min_child_samples': 41, 'num_leaves': 682}. Best is trial 13 with value: 0.7339027539103458.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000599 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:12,260] Trial 14 finished with value: 0.6975644944204156 and parameters: {'learning_rate': 0.06665510953435545, 'subsample': 0.6454178991106394, 'colsample_bytree': 0.3799627810522696, 'min_child_samples': 43, 'num_leaves': 886}. Best is trial 13 with value: 0.7339027539103458.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005306 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 663
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 24
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:25,971] Trial 15 finished with value: 0.7279379408360941 and parameters: {'learning_rate': 0.003513170046246134, 'subsample': 0.5718466804981882, 'colsample_bytree': 0.498207439286346, 'min_child_samples': 13, 'num_leaves': 541}. Best is trial 13 with value: 0.7339027539103458.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000575 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:33,793] Trial 16 finished with value: 0.7330634563528774 and parameters: {'learning_rate': 0.012407786783122397, 'subsample': 0.7328177444450796, 'colsample_bytree': 0.3948484214921534, 'min_child_samples': 41, 'num_leaves': 818}. Best is trial 13 with value: 0.7339027539103458.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002166 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:40,722] Trial 17 finished with value: 0.7352919627131734 and parameters: {'learning_rate': 0.010770448552311282, 'subsample': 0.6785439691843469, 'colsample_bytree': 0.4259046273493144, 'min_child_samples': 50, 'num_leaves': 811}. Best is trial 17 with value: 0.7352919627131734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000665 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:45,977] Trial 18 finished with value: 0.7358335738375563 and parameters: {'learning_rate': 0.009408119302910922, 'subsample': 0.5627442989042388, 'colsample_bytree': 0.4444854244703367, 'min_child_samples': 50, 'num_leaves': 916}. Best is trial 18 with value: 0.7358335738375563.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001825 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:51,255] Trial 19 finished with value: 0.7357042999890455 and parameters: {'learning_rate': 0.009558040777181883, 'subsample': 0.5397830247330323, 'colsample_bytree': 0.4220635437172493, 'min_child_samples': 48, 'num_leaves': 987}. Best is trial 18 with value: 0.7358335738375563.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000673 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:03:56,820] Trial 20 finished with value: 0.7245143165371435 and parameters: {'learning_rate': 0.0037875256896075554, 'subsample': 0.5292109787412766, 'colsample_bytree': 0.4526971596408333, 'min_child_samples': 45, 'num_leaves': 1003}. Best is trial 18 with value: 0.7358335738375563.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000648 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:02,188] Trial 21 finished with value: 0.7358778065510256 and parameters: {'learning_rate': 0.009780923841071695, 'subsample': 0.5666783444891919, 'colsample_bytree': 0.45587007281343905, 'min_child_samples': 50, 'num_leaves': 881}. Best is trial 21 with value: 0.7358778065510256.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002334 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:07,457] Trial 22 finished with value: 0.735520927916071 and parameters: {'learning_rate': 0.010081690151018043, 'subsample': 0.5581141161404466, 'colsample_bytree': 0.46940643057631004, 'min_child_samples': 50, 'num_leaves': 917}. Best is trial 21 with value: 0.7358778065510256.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000706 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:13,296] Trial 23 finished with value: 0.7353228031808391 and parameters: {'learning_rate': 0.008820695767877218, 'subsample': 0.49362787245797746, 'colsample_bytree': 0.45205930239027575, 'min_child_samples': 45, 'num_leaves': 910}. Best is trial 21 with value: 0.7358778065510256.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000590 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:20,587] Trial 24 finished with value: 0.7233091561758633 and parameters: {'learning_rate': 0.022381257676742344, 'subsample': 0.5856184836752758, 'colsample_bytree': 0.49918542175143643, 'min_child_samples': 35, 'num_leaves': 810}. Best is trial 21 with value: 0.7358778065510256.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000682 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:26,313] Trial 25 finished with value: 0.7363444503622264 and parameters: {'learning_rate': 0.01267513058971133, 'subsample': 0.4664565157755191, 'colsample_bytree': 0.4147928366143744, 'min_child_samples': 50, 'num_leaves': 1005}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000540 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:31,970] Trial 26 finished with value: 0.7335177860201609 and parameters: {'learning_rate': 0.01366401828179319, 'subsample': 0.47351596210884217, 'colsample_bytree': 0.4681643773566547, 'min_child_samples': 41, 'num_leaves': 593}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000701 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:36,622] Trial 27 finished with value: 0.7312468778405843 and parameters: {'learning_rate': 0.02604934893123108, 'subsample': 0.4542800839886473, 'colsample_bytree': 0.333169340330269, 'min_child_samples': 50, 'num_leaves': 774}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001399 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:43,638] Trial 28 finished with value: 0.7327047642013205 and parameters: {'learning_rate': 0.013951059559990495, 'subsample': 0.5939544298534774, 'colsample_bytree': 0.40830325872238016, 'min_child_samples': 44, 'num_leaves': 1024}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001937 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:50,064] Trial 29 finished with value: 0.7278492975810973 and parameters: {'learning_rate': 0.004500512971962959, 'subsample': 0.5247765589057445, 'colsample_bytree': 0.3921999671477501, 'min_child_samples': 38, 'num_leaves': 891}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000730 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:04:58,743] Trial 30 finished with value: 0.7346514903010304 and parameters: {'learning_rate': 0.0076347804718793376, 'subsample': 0.5976496797896869, 'colsample_bytree': 0.45747957283883184, 'min_child_samples': 33, 'num_leaves': 946}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000970 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:05:05,279] Trial 31 finished with value: 0.7353327467414945 and parameters: {'learning_rate': 0.009478342027153717, 'subsample': 0.5515346142191582, 'colsample_bytree': 0.4049174195730026, 'min_child_samples': 46, 'num_leaves': 959}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001647 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:05:11,650] Trial 32 finished with value: 0.7361122143220027 and parameters: {'learning_rate': 0.010474736765941062, 'subsample': 0.5315241945139286, 'colsample_bytree': 0.43291376776023927, 'min_child_samples': 48, 'num_leaves': 846}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000799 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:05:17,544] Trial 33 finished with value: 0.7356809330489109 and parameters: {'learning_rate': 0.012124511248184141, 'subsample': 0.5035968036724809, 'colsample_bytree': 0.43684532232144285, 'min_child_samples': 48, 'num_leaves': 862}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000735 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:05:23,622] Trial 34 finished with value: 0.7287760290302832 and parameters: {'learning_rate': 0.004627691207086896, 'subsample': 0.4527314620335023, 'colsample_bytree': 0.43390181649801085, 'min_child_samples': 42, 'num_leaves': 763}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000736 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:05:34,570] Trial 35 finished with value: 0.7249510912040071 and parameters: {'learning_rate': 0.016059761097420102, 'subsample': 0.5248282711685983, 'colsample_bytree': 0.4751689744218492, 'min_child_samples': 22, 'num_leaves': 865}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001906 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 659
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 23
[LightGBM] [Info] Start training from score -3.851741


[I 2023-11-30 20:05:42,297] Trial 36 finished with value: 0.7349438322770391 and parameters: {'learning_rate': 0.007406126591934742, 'subsample': 0.5634285522811873, 'colsample_bytree': 0.4173601488098583, 'min_child_samples': 47, 'num_leaves': 725}. Best is trial 25 with value: 0.7363444503622264.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.284159 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 663
[LightGBM] [Info] Number of data points in the train set: 20360, number of used features: 24
[LightGBM] [Info] Start training from score -3.851741


In [27]:
optuna.delete_study(study_name="lgbm-svp-1", storage="sqlite:///optuna.sqlite3")