In [1]:
%load_ext autoreload
%autoreload 2

In [71]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV

from src.paths import PREPROCESSED_DATA_DIR, MODEL_DIR

import pickle

In [3]:
# read training data
data_train = pd.read_pickle(PREPROCESSED_DATA_DIR / 'data_train.pkl')
labels = data_train.pop('WnvPresent')

In [57]:
import optuna
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_validate


def objective(trial):
    model = trial.suggest_categorical('model', ['lgbm', 'xgb'])
    if model == 'lgbm':
        params = {
            'max_depth': trial.suggest_int('max_depth',2,10), 
            "min_child_weight": trial.suggest_categorical('min_child_weight',[1,3,5]),
            "subsample": trial.suggest_float('subsample', 0.5,1.0),
            "learning_rate": trial.suggest_float('learning_rate', 1e-4, 1, log=True),
            "reg_lambda": trial.suggest_float('reg_lambda', 0.1,10)
        }
        clf = LGBMClassifier(**params)
    else:
        params = {
            'max_depth': trial.suggest_int('max_depth',2,10), 
            "min_child_weight": trial.suggest_categorical('min_child_weight',[1,3,5]),
            "subsample": trial.suggest_float('subsample', 0.5,1.0),
            "eta": trial.suggest_float('eta', 1e-4, 1, log=True),
            "lambda": trial.suggest_float('lambda', 0.1,10)
        }
        clf = XGBClassifier(**params)
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3)
    res = cross_validate(clf, data_train, labels, scoring='roc_auc', cv=cv)
    return res['test_score'].mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

[I 2024-09-06 13:07:56,711] A new study created in memory with name: no-name-c1a11cad-9b93-4d12-a340-1b613b22aadf


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000304 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 481
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000087 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [In

[I 2024-09-06 13:07:57,661] Trial 0 finished with value: 0.7470484763646644 and parameters: {'model': 'lgbm', 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.6219790477507119, 'learning_rate': 0.0003117532332638189, 'reg_lambda': 8.057906818865964}. Best is trial 0 with value: 0.7470484763646644.


[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 481
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [Info] Start training from score -0.530043
[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000262 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 484
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number

[I 2024-09-06 13:07:58,947] Trial 1 finished with value: 0.7630470197690564 and parameters: {'model': 'lgbm', 'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.5721386166417219, 'learning_rate': 0.0005592423752743132, 'reg_lambda': 6.835386620063744}. Best is trial 1 with value: 0.7630470197690564.


[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000269 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 486
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [Info] Start training from score -0.530043
[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number

[I 2024-09-06 13:08:00,169] Trial 2 finished with value: 0.7562567684012984 and parameters: {'model': 'lgbm', 'max_depth': 10, 'min_child_weight': 5, 'subsample': 0.9375625664775937, 'learning_rate': 0.0006799343460677248, 'reg_lambda': 7.834189895956998}. Best is trial 1 with value: 0.7630470197690564.


[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [Info] Start training from score -0.530043


[I 2024-09-06 13:08:02,178] Trial 3 finished with value: 0.7379105039502919 and parameters: {'model': 'xgb', 'max_depth': 2, 'min_child_weight': 5, 'subsample': 0.8963816228160101, 'eta': 0.00337274626958856, 'lambda': 5.17221640635797}. Best is trial 1 with value: 0.7630470197690564.
[I 2024-09-06 13:08:05,942] Trial 4 finished with value: 0.7763737297362348 and parameters: {'model': 'xgb', 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.7999009111999482, 'eta': 0.00024377465426680527, 'lambda': 3.892307173064092}. Best is trial 4 with value: 0.7763737297362348.
[I 2024-09-06 13:08:09,120] Trial 5 finished with value: 0.7736096573537643 and parameters: {'model': 'xgb', 'max_depth': 7, 'min_child_weight': 3, 'subsample': 0.607261403252082, 'eta': 0.11026871205980965, 'lambda': 1.1822435010368195}. Best is trial 4 with value: 0.7763737297362348.


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001433 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 487
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093
[LightGBM] [Info] Number

[I 2024-09-06 13:08:10,756] Trial 6 finished with value: 0.7279636566339813 and parameters: {'model': 'lgbm', 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.9815698394663561, 'learning_rate': 0.819817026551946, 'reg_lambda': 4.295593489133612}. Best is trial 4 with value: 0.7763737297362348.


[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000301 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 487
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [Info] Start training from score -0.530043


[I 2024-09-06 13:08:13,238] Trial 7 finished with value: 0.7876078896847375 and parameters: {'model': 'xgb', 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.5338033220901559, 'eta': 0.12688406626430593, 'lambda': 8.826782245785422}. Best is trial 7 with value: 0.7876078896847375.


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 487
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 481
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093
[LightGBM] [Info] Number

[I 2024-09-06 13:08:14,025] Trial 8 finished with value: 0.7932913758868148 and parameters: {'model': 'lgbm', 'max_depth': 3, 'min_child_weight': 1, 'subsample': 0.5036108914088027, 'learning_rate': 0.12254939979960823, 'reg_lambda': 0.9014701965290473}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093
[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000213 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 477
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [Info] Start training from score -0.530043
[LightGBM] [Info] Number

[I 2024-09-06 13:08:16,150] Trial 9 finished with value: 0.7766069848470915 and parameters: {'model': 'lgbm', 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.9846707595177402, 'learning_rate': 0.09817808933402825, 'reg_lambda': 3.806137981567359}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 477
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000282 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093
[LightGBM] [Info] Number

[I 2024-09-06 13:08:16,828] Trial 10 finished with value: 0.7752304758855658 and parameters: {'model': 'lgbm', 'max_depth': 2, 'min_child_weight': 1, 'subsample': 0.7084476760011218, 'learning_rate': 0.026833711628812085, 'reg_lambda': 0.23476659506375785}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 487
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000077 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [In

[I 2024-09-06 13:08:19,293] Trial 11 finished with value: 0.7581851954029251 and parameters: {'model': 'xgb', 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.5018076553891415, 'eta': 0.5232928661048156, 'lambda': 9.640552265253081}. Best is trial 8 with value: 0.7932913758868148.
[I 2024-09-06 13:08:21,679] Trial 12 finished with value: 0.7864818708909074 and parameters: {'model': 'xgb', 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.50379047941692, 'eta': 0.027181716642598143, 'lambda': 9.752836970952414}. Best is trial 8 with value: 0.7932913758868148.
[I 2024-09-06 13:08:23,588] Trial 13 finished with value: 0.7444607735283332 and parameters: {'model': 'xgb', 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.697778531116668, 'eta': 0.9917600316598492, 'lambda': 7.035913689585481}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000197 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 481
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000220 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 476
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093
[LightGBM] [Info] Number

[I 2024-09-06 13:08:24,488] Trial 14 finished with value: 0.7780244113162607 and parameters: {'model': 'lgbm', 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.5579605178631066, 'learning_rate': 0.3183991306810289, 'reg_lambda': 0.19820205536359636}. Best is trial 8 with value: 0.7932913758868148.




[I 2024-09-06 13:08:27,459] Trial 15 finished with value: 0.7841968231910911 and parameters: {'model': 'xgb', 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.8066225943038864, 'eta': 0.01026473243208279, 'lambda': 7.128234698868248}. Best is trial 8 with value: 0.7932913758868148.
[I 2024-09-06 13:08:30,000] Trial 16 finished with value: 0.7758379750255727 and parameters: {'model': 'xgb', 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.6667882575288165, 'eta': 0.0005882246230326327, 'lambda': 7.390670300691246}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 482
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093
[LightGBM] [Info] Number

[I 2024-09-06 13:08:30,805] Trial 17 finished with value: 0.7670458367818206 and parameters: {'model': 'lgbm', 'max_depth': 3, 'min_child_weight': 1, 'subsample': 0.5515488542720166, 'learning_rate': 0.005675843491602072, 'reg_lambda': 2.485172694708947}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 487
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [Info] Start training from score -0.530043
[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000044 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 480
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [In

[I 2024-09-06 13:08:33,593] Trial 18 finished with value: 0.7820663698980838 and parameters: {'model': 'xgb', 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.7689127438977329, 'eta': 0.07977513200252705, 'lambda': 0.8811216455135646}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 402, number of negative: 682
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1084, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370849 -> initscore=-0.528578
[LightGBM] [Info] Start training from score -0.528578
[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 485
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [In

[I 2024-09-06 13:08:34,407] Trial 19 finished with value: 0.7675597130404476 and parameters: {'model': 'lgbm', 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.6251269294831603, 'learning_rate': 0.007436095289516033, 'reg_lambda': 9.802484459704644}. Best is trial 8 with value: 0.7932913758868148.


[LightGBM] [Info] Number of positive: 403, number of negative: 682
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 478
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093
[LightGBM] [Info] Number of positive: 402, number of negative: 683
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 487
[LightGBM] [Info] Number of data points in the train set: 1085, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370507 -> initscore=-0.530043
[LightGBM] [In

In [58]:
study.best_params

{'model': 'lgbm',
 'max_depth': 3,
 'min_child_weight': 1,
 'subsample': 0.5036108914088027,
 'learning_rate': 0.12254939979960823,
 'reg_lambda': 0.9014701965290473}

In [61]:
study.best_trial

FrozenTrial(number=8, state=TrialState.COMPLETE, values=[0.7932913758868148], datetime_start=datetime.datetime(2024, 9, 6, 13, 8, 13, 240063), datetime_complete=datetime.datetime(2024, 9, 6, 13, 8, 14, 24846), params={'model': 'lgbm', 'max_depth': 3, 'min_child_weight': 1, 'subsample': 0.5036108914088027, 'learning_rate': 0.12254939979960823, 'reg_lambda': 0.9014701965290473}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'model': CategoricalDistribution(choices=('lgbm', 'xgb')), 'max_depth': IntDistribution(high=10, log=False, low=2, step=1), 'min_child_weight': CategoricalDistribution(choices=(1, 3, 5)), 'subsample': FloatDistribution(high=1.0, log=False, low=0.5, step=None), 'learning_rate': FloatDistribution(high=1.0, log=True, low=0.0001, step=None), 'reg_lambda': FloatDistribution(high=10.0, log=False, low=0.1, step=None)}, trial_id=8, value=None)

In [62]:
study.best_value

0.7932913758868148

In [69]:
def train_best(best_params, data_train, labels):
    model = best_params.pop('model', None)
    
    if model == 'lgbm':
        clf = LGBMClassifier(**best_params)
    else:
        clf = XGBClassifier(**best_params)
    
    clf.fit(data_train, labels)
    
    return clf

best_model = train_best(study.best_params, data_train, labels)
        

[LightGBM] [Info] Number of positive: 503, number of negative: 853
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000144 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 487
[LightGBM] [Info] Number of data points in the train set: 1356, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370944 -> initscore=-0.528169
[LightGBM] [Info] Start training from score -0.528169


In [73]:
with open(MODEL_DIR / 'best_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)

In [68]:
lgbm