# 特征工程

In [1]:
import numpy as np
import pandas as pd
from jesse import helpers, research

from custom_indicators.all_features import feature_matrix

warmup_candles, trading_candles = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "3m",
    helpers.date_to_timestamp("2021-01-01"),
    helpers.date_to_timestamp("2024-12-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

final_fe = feature_matrix(trading_candles, sequential=True)
final_fe.features.shape

(700800, 394)

In [2]:
invalid_len = (np.isfinite(final_fe.features) == False).sum(axis=0).max()
invalid_len

50

# 模型

In [3]:
invalid_len = 200

train_mask = trading_candles[:, 0] < helpers.date_to_timestamp("2024-06-30")
vali_mask = (trading_candles[:, 0] >= helpers.date_to_timestamp("2024-06-30")) & (
    trading_candles[:, 0] < helpers.date_to_timestamp("2024-09-30")
)
test_mask = trading_candles[:, 0] >= helpers.date_to_timestamp("2024-09-30")

train_len = train_mask.sum()
vali_len = vali_mask.sum()
test_len = test_mask.sum()
assert train_len + vali_len + test_len == trading_candles.shape[0]

final_fe_df = pd.DataFrame(
    final_fe.features, columns=final_fe.names, index=trading_candles[:, 0]
)
final_fe_df.head()

Unnamed: 0,acc_swing_index,acc_swing_index_lag1,acc_swing_index_lag2,acc_swing_index_dt,acc_swing_index_ddt,ehlers_early_onset_trend_ddt,bandpass,bandpass_dt,bandpass_ddt,highpass_bp,...,swamicharts_stochastic_39,swamicharts_stochastic_40,swamicharts_stochastic_41,swamicharts_stochastic_42,swamicharts_stochastic_43,swamicharts_stochastic_44,swamicharts_stochastic_45,swamicharts_stochastic_46,swamicharts_stochastic_47,swamicharts_stochastic_48
1609459000000.0,0.0,,,,,,1.0,,,1.0,...,,,,,,,,,,
1609459000000.0,-0.038929,0.0,,-0.038929,,,0.983821,-0.016179,,0.852647,...,,,,,,,,,,
1609460000000.0,-0.064255,-0.038929,0.0,-0.025326,0.013604,1.0,0.874514,-0.109307,-0.093129,0.63781,...,,,,,,,,,,
1609460000000.0,-0.152038,-0.064255,-0.038929,-0.087783,-0.062457,-1.0,0.689142,-0.185372,-0.076064,0.380348,...,,,,,,,,,,
1609460000000.0,-0.217581,-0.152038,-0.064255,-0.065544,0.022239,-0.228989,0.45063,-0.238512,-0.05314,0.107308,...,,,,,,,,,,


In [4]:
label_df = pd.read_csv("data/label.csv.zst", index_col=0)
label_df.head()

Unnamed: 0,close,ret,trgt,bin,side
2021-01-01 00:00:00+00:00,29016.23,,,0.0,0.0
2021-01-01 00:03:00+00:00,28965.02,,,0.0,0.0
2021-01-01 00:06:00+00:00,28928.06,,,0.0,0.0
2021-01-01 00:09:00+00:00,28851.82,,,0.0,0.0
2021-01-01 00:12:00+00:00,28786.75,,,0.0,0.0


In [5]:
train_fe = final_fe_df[train_mask].iloc[invalid_len:]
train_label = label_df[train_mask].iloc[invalid_len:]

train_fe = train_fe[train_label["ret"].notna().to_numpy()]
train_label = train_label[train_label["ret"].notna().to_numpy()]["bin"].astype(int)

assert train_fe.shape[0] == train_label.shape[0]
print(train_label.value_counts())

vali_fe = final_fe_df[vali_mask].iloc[invalid_len:]
vali_label = label_df[vali_mask].iloc[invalid_len:]

vali_fe = vali_fe[vali_label["ret"].notna().to_numpy()]
vali_label = vali_label[vali_label["ret"].notna().to_numpy()]["bin"].astype(int)

assert vali_fe.shape[0] == vali_label.shape[0]
print(vali_label.value_counts())

test_fe = final_fe_df[test_mask].iloc[invalid_len:]
test_label = label_df[test_mask].iloc[invalid_len:]

test_fe = test_fe[test_label["ret"].notna().to_numpy()]
test_label = test_label[test_label["ret"].notna().to_numpy()]["bin"].astype(int)

assert test_fe.shape[0] == test_label.shape[0]
print(test_label.value_counts())

bin
0    437622
1     32211
Name: count, dtype: int64
bin
0    31550
1     2299
Name: count, dtype: int64
bin
0    31315
1     2399
Name: count, dtype: int64


In [7]:
import optuna
import lightgbm as lgb
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score


def objective(trial):
    dtrain = lgb.Dataset(train_fe, label=train_label)

    # 设定需要调整的超参数范围
    param = {
        "objective": "binary",
        "boosting_type": trial.suggest_categorical("boosting_type", ["gbdt", "dart"]),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.1),
        "num_leaves": trial.suggest_int("num_leaves", 31, 512),
        "max_depth": trial.suggest_int("max_depth", -1, 16),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.5, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.5, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 5),
        "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
        "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
        # 针对正负样本数严重不平衡的情况，可以设置scale_pos_weight
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 1.0, 20.0),
        "verbosity": -1,
    }

    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(vali_fe)
    pred_labels = (preds >= trial.suggest_uniform("label_threshold", 0.5, 1.0)).astype(
        int
    )

    # 计算AUC、精确率、召回率
    auc_score = roc_auc_score(vali_label, preds, average="weighted")
    precision = precision_score(vali_label, pred_labels, zero_division=0)
    recall = recall_score(vali_label, pred_labels, zero_division=0)
    f1 = f1_score(vali_label, pred_labels, zero_division=0)
    # 输出精确率与召回率供参考
    print(
        f"AUC: {auc_score:.5f}, Precision: {precision:.5f}, Recall: {recall:.5f}, F1: {f1:.5f}"
    )

    return f1


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=500, show_progress_bar=True)

[I 2025-02-10 23:31:12,913] A new study created in memory with name: no-name-cadcd388-2e07-447f-9e3d-c30df33de88a
Best trial: 0. Best value: 0:   0%|          | 1/500 [00:11<1:35:01, 11.43s/it]

AUC: 0.67709, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:31:24,395] Trial 0 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01162869786456743, 'num_leaves': 279, 'max_depth': 8, 'feature_fraction': 0.9198128745615626, 'bagging_fraction': 0.9114948512185802, 'bagging_freq': 5, 'lambda_l1': 1.2256021271607538, 'lambda_l2': 0.7069892785932729, 'scale_pos_weight': 5.825699005269939, 'label_threshold': 0.7205625132168395}. Best is trial 0 with value: 0.0.


Best trial: 1. Best value: 0.2144:   0%|          | 2/500 [00:19<1:16:26,  9.21s/it]

AUC: 0.67368, Precision: 0.15484, Recall: 0.34841, F1: 0.21440
[I 2025-02-10 23:31:32,053] Trial 1 finished with value: 0.21440042826552463 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.05185197073809939, 'num_leaves': 398, 'max_depth': 7, 'feature_fraction': 0.8970305822423044, 'bagging_fraction': 0.8111453724083731, 'bagging_freq': 5, 'lambda_l1': 0.10520460763681902, 'lambda_l2': 3.957691804579501e-05, 'scale_pos_weight': 15.057044781910013, 'label_threshold': 0.5866676541692166}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   1%|          | 3/500 [00:30<1:25:04, 10.27s/it]

AUC: 0.68016, Precision: 0.35263, Recall: 0.02914, F1: 0.05384
[I 2025-02-10 23:31:43,585] Trial 2 finished with value: 0.05383688228204098 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0175976358293552, 'num_leaves': 261, 'max_depth': 12, 'feature_fraction': 0.5339816239506954, 'bagging_fraction': 0.832279563078165, 'bagging_freq': 3, 'lambda_l1': 7.585923641777299e-07, 'lambda_l2': 0.0025583637030590265, 'scale_pos_weight': 10.591405265159347, 'label_threshold': 0.6043183179896394}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   1%|          | 4/500 [00:35<1:06:19,  8.02s/it]

AUC: 0.67574, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:31:48,164] Trial 3 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.017918129987156216, 'num_leaves': 124, 'max_depth': 5, 'feature_fraction': 0.523021080832353, 'bagging_fraction': 0.9328453262803074, 'bagging_freq': 2, 'lambda_l1': 6.879565197826979e-08, 'lambda_l2': 8.139167818279505, 'scale_pos_weight': 11.45820623390146, 'label_threshold': 0.827191977002603}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   1%|          | 5/500 [00:51<1:30:36, 10.98s/it]

AUC: 0.67052, Precision: 0.29651, Recall: 0.02218, F1: 0.04128
[I 2025-02-10 23:32:04,395] Trial 4 finished with value: 0.04127883447996762 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.05699568372208754, 'num_leaves': 416, 'max_depth': 10, 'feature_fraction': 0.5444962706583688, 'bagging_fraction': 0.9415592056620021, 'bagging_freq': 2, 'lambda_l1': 0.0003196213750808435, 'lambda_l2': 0.005166803486453249, 'scale_pos_weight': 6.894680783632456, 'label_threshold': 0.5880481971137725}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   1%|          | 6/500 [01:06<1:41:39, 12.35s/it]

AUC: 0.66002, Precision: 0.19960, Recall: 0.13006, F1: 0.15749
[I 2025-02-10 23:32:19,390] Trial 5 finished with value: 0.15749275744008429 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.08093115534188423, 'num_leaves': 290, 'max_depth': -1, 'feature_fraction': 0.7107900201602713, 'bagging_fraction': 0.9512279463641479, 'bagging_freq': 4, 'lambda_l1': 0.013561774224200747, 'lambda_l2': 9.332853157448419e-06, 'scale_pos_weight': 18.36869892380298, 'label_threshold': 0.6409059969277833}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   1%|▏         | 7/500 [01:19<1:42:06, 12.43s/it]

AUC: 0.67222, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:32:31,979] Trial 6 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.014264681836788662, 'num_leaves': 107, 'max_depth': 13, 'feature_fraction': 0.8486768703952077, 'bagging_fraction': 0.742671160450109, 'bagging_freq': 5, 'lambda_l1': 0.7937498121355829, 'lambda_l2': 0.01086808116695164, 'scale_pos_weight': 8.564129632066482, 'label_threshold': 0.5860017803426185}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   2%|▏         | 8/500 [01:34<1:49:06, 13.31s/it]

AUC: 0.65988, Precision: 0.27165, Recall: 0.03001, F1: 0.05405
[I 2025-02-10 23:32:47,168] Trial 7 finished with value: 0.05405405405405406 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.09600010756912199, 'num_leaves': 281, 'max_depth': 16, 'feature_fraction': 0.8490606701743606, 'bagging_fraction': 0.9453498867868035, 'bagging_freq': 2, 'lambda_l1': 6.263097461629604e-06, 'lambda_l2': 4.863613390046522e-07, 'scale_pos_weight': 9.980822558014312, 'label_threshold': 0.650369760608682}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   2%|▏         | 9/500 [01:48<1:52:35, 13.76s/it]

AUC: 0.67488, Precision: 0.27243, Recall: 0.07264, F1: 0.11470
[I 2025-02-10 23:33:01,926] Trial 8 finished with value: 0.11469780219780219 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.022873205083769017, 'num_leaves': 373, 'max_depth': 13, 'feature_fraction': 0.533004592494787, 'bagging_fraction': 0.6407581663924067, 'bagging_freq': 0, 'lambda_l1': 1.2000408848170245, 'lambda_l2': 2.9804231244767275e-07, 'scale_pos_weight': 16.934196123805762, 'label_threshold': 0.6615599424659087}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   2%|▏         | 10/500 [01:59<1:44:35, 12.81s/it]

AUC: 0.68237, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:33:12,602] Trial 9 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.06171021416011658, 'num_leaves': 70, 'max_depth': 11, 'feature_fraction': 0.7103507845838302, 'bagging_fraction': 0.8252517618471269, 'bagging_freq': 3, 'lambda_l1': 1.2482452442663884e-08, 'lambda_l2': 3.8999867252383696e-08, 'scale_pos_weight': 13.866541940585682, 'label_threshold': 0.8435343218576574}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   2%|▏         | 11/500 [02:05<1:27:55, 10.79s/it]

AUC: 0.66279, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:33:18,813] Trial 10 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.036098241781296836, 'num_leaves': 509, 'max_depth': 4, 'feature_fraction': 0.9969386958073478, 'bagging_fraction': 0.5213171693386652, 'bagging_freq': 0, 'lambda_l1': 0.008013179485793765, 'lambda_l2': 3.65652758642515e-05, 'scale_pos_weight': 15.225107239006348, 'label_threshold': 0.9693808356267579}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   2%|▏         | 12/500 [02:22<1:42:36, 12.62s/it]

AUC: 0.65113, Precision: 0.16071, Recall: 0.23271, F1: 0.19012
[I 2025-02-10 23:33:35,609] Trial 11 finished with value: 0.19012082444918266 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.09957167215603506, 'num_leaves': 371, 'max_depth': -1, 'feature_fraction': 0.6707645254931291, 'bagging_fraction': 0.7399086651298046, 'bagging_freq': 4, 'lambda_l1': 0.01416179384235006, 'lambda_l2': 2.4340137314263835e-05, 'scale_pos_weight': 19.905594513858354, 'label_threshold': 0.5105574134605662}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   3%|▎         | 13/500 [02:40<1:55:25, 14.22s/it]

AUC: 0.67599, Precision: 0.12866, Recall: 0.49326, F1: 0.20409
[I 2025-02-10 23:33:53,521] Trial 12 finished with value: 0.20408530549806533 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.04035084472836989, 'num_leaves': 413, 'max_depth': -1, 'feature_fraction': 0.6355338594692882, 'bagging_fraction': 0.7044815511581132, 'bagging_freq': 4, 'lambda_l1': 0.020476082076962856, 'lambda_l2': 5.080813923523291e-05, 'scale_pos_weight': 19.899284349428537, 'label_threshold': 0.5027418734525263}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   3%|▎         | 14/500 [02:44<1:30:30, 11.17s/it]

AUC: 0.66783, Precision: 0.11153, Recall: 0.58112, F1: 0.18714
[I 2025-02-10 23:33:57,656] Trial 13 finished with value: 0.18714105617033197 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.03720747767342018, 'num_leaves': 483, 'max_depth': 3, 'feature_fraction': 0.7955436777068645, 'bagging_fraction': 0.6531017895040178, 'bagging_freq': 4, 'lambda_l1': 0.00022149668607256915, 'lambda_l2': 0.00022164830503312874, 'scale_pos_weight': 14.112691064098968, 'label_threshold': 0.5183856377899065}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   3%|▎         | 15/500 [02:48<1:11:25,  8.84s/it]

AUC: 0.65816, Precision: 0.50000, Recall: 0.00217, F1: 0.00433
[I 2025-02-10 23:34:01,071] Trial 14 finished with value: 0.0043308791684712 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.04938185718903758, 'num_leaves': 438, 'max_depth': 2, 'feature_fraction': 0.6276365392243914, 'bagging_fraction': 0.6543856574968366, 'bagging_freq': 5, 'lambda_l1': 8.520342381228192, 'lambda_l2': 2.4250347250766384e-06, 'scale_pos_weight': 3.333289872386265, 'label_threshold': 0.5040660427747823}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   3%|▎         | 16/500 [02:54<1:04:25,  7.99s/it]

AUC: 0.67734, Precision: 0.29697, Recall: 0.02131, F1: 0.03977
[I 2025-02-10 23:34:07,088] Trial 15 finished with value: 0.03977272727272727 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.028335784861910355, 'num_leaves': 363, 'max_depth': 6, 'feature_fraction': 0.621687646285768, 'bagging_fraction': 0.8278145257552312, 'bagging_freq': 4, 'lambda_l1': 0.06774438474940696, 'lambda_l2': 0.00024558066989019043, 'scale_pos_weight': 16.890055285270243, 'label_threshold': 0.7492798331549495}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   3%|▎         | 17/500 [02:57<52:48,  6.56s/it]  

AUC: 0.64843, Precision: 0.16547, Recall: 0.18095, F1: 0.17287
[I 2025-02-10 23:34:10,327] Trial 16 finished with value: 0.17286515686681903 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.04732385272496287, 'num_leaves': 198, 'max_depth': 1, 'feature_fraction': 0.7825456335912477, 'bagging_fraction': 0.5654767607836357, 'bagging_freq': 5, 'lambda_l1': 0.0007287739208991656, 'lambda_l2': 0.016790186765627487, 'scale_pos_weight': 12.85235665358821, 'label_threshold': 0.5585315332047265}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   4%|▎         | 18/500 [03:08<1:03:15,  7.87s/it]

AUC: 0.67291, Precision: 0.22512, Recall: 0.14659, F1: 0.17756
[I 2025-02-10 23:34:21,263] Trial 17 finished with value: 0.17755532139093783 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.027539679515902367, 'num_leaves': 434, 'max_depth': 8, 'feature_fraction': 0.9915835001262348, 'bagging_fraction': 0.7844562980650335, 'bagging_freq': 3, 'lambda_l1': 0.08728137414295215, 'lambda_l2': 1.3019056671212689e-08, 'scale_pos_weight': 19.788304741618447, 'label_threshold': 0.6825902306139838}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   4%|▍         | 19/500 [03:30<1:36:49, 12.08s/it]

AUC: 0.67519, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:34:43,136] Trial 18 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.0693441002562208, 'num_leaves': 336, 'max_depth': 16, 'feature_fraction': 0.9165928638761597, 'bagging_fraction': 0.7002981353335392, 'bagging_freq': 1, 'lambda_l1': 1.3605179690859234e-05, 'lambda_l2': 0.0004991995837159103, 'scale_pos_weight': 17.029815366146863, 'label_threshold': 0.9987588664379334}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   4%|▍         | 20/500 [03:35<1:21:21, 10.17s/it]

AUC: 0.68184, Precision: 0.30909, Recall: 0.00739, F1: 0.01444
[I 2025-02-10 23:34:48,855] Trial 19 finished with value: 0.014443500424808835 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.040296489084238385, 'num_leaves': 467, 'max_depth': 6, 'feature_fraction': 0.6006715609436234, 'bagging_fraction': 0.8650174778365185, 'bagging_freq': 4, 'lambda_l1': 0.0016393177240013868, 'lambda_l2': 0.16434642002598007, 'scale_pos_weight': 15.628784825662763, 'label_threshold': 0.8156490238606171}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   4%|▍         | 21/500 [03:39<1:05:57,  8.26s/it]

AUC: 0.64921, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:34:52,669] Trial 20 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.049054499090128476, 'num_leaves': 225, 'max_depth': 1, 'feature_fraction': 0.8823792216693441, 'bagging_fraction': 0.7012565366441552, 'bagging_freq': 5, 'lambda_l1': 0.15855452695697356, 'lambda_l2': 4.953456032656076e-06, 'scale_pos_weight': 1.5390023963689838, 'label_threshold': 0.5417511441821039}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   4%|▍         | 22/500 [03:57<1:28:51, 11.15s/it]

AUC: 0.65123, Precision: 0.16126, Recall: 0.22271, F1: 0.18707
[I 2025-02-10 23:35:10,568] Trial 21 finished with value: 0.18706613080014614 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.09930143838501403, 'num_leaves': 388, 'max_depth': -1, 'feature_fraction': 0.6790900285567552, 'bagging_fraction': 0.7434668736957346, 'bagging_freq': 4, 'lambda_l1': 0.008099122344978869, 'lambda_l2': 6.950955654743168e-05, 'scale_pos_weight': 19.203648239059763, 'label_threshold': 0.5006936983132592}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   5%|▍         | 23/500 [04:13<1:39:54, 12.57s/it]

AUC: 0.66252, Precision: 0.18210, Recall: 0.20705, F1: 0.19377
[I 2025-02-10 23:35:26,433] Trial 22 finished with value: 0.19377162629757785 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0743513571445979, 'num_leaves': 337, 'max_depth': 0, 'feature_fraction': 0.6454641644059208, 'bagging_fraction': 0.7747006445765252, 'bagging_freq': 4, 'lambda_l1': 0.03171205691268127, 'lambda_l2': 2.1803553066547632e-05, 'scale_pos_weight': 17.89975458906689, 'label_threshold': 0.5698462964096824}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 1. Best value: 0.2144:   5%|▍         | 24/500 [04:16<1:17:42,  9.79s/it]

AUC: 0.65287, Precision: 0.13317, Recall: 0.38191, F1: 0.19748
[I 2025-02-10 23:35:29,758] Trial 23 finished with value: 0.197480881691408 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.07405549303382197, 'num_leaves': 323, 'max_depth': 1, 'feature_fraction': 0.5901025220887725, 'bagging_fraction': 0.7915835245875534, 'bagging_freq': 3, 'lambda_l1': 0.30255043878369964, 'lambda_l2': 6.326683036698837e-07, 'scale_pos_weight': 18.10175474431838, 'label_threshold': 0.6155952244646209}. Best is trial 1 with value: 0.21440042826552463.


Best trial: 24. Best value: 0.215712:   5%|▌         | 25/500 [04:20<1:03:46,  8.06s/it]

AUC: 0.67289, Precision: 0.15453, Recall: 0.35711, F1: 0.21571
[I 2025-02-10 23:35:33,761] Trial 24 finished with value: 0.21571203363110877 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.05704594288346679, 'num_leaves': 326, 'max_depth': 3, 'feature_fraction': 0.5839286452240832, 'bagging_fraction': 0.8798391390249707, 'bagging_freq': 3, 'lambda_l1': 8.574241411296786, 'lambda_l2': 5.706079688794327e-07, 'scale_pos_weight': 15.665587322346273, 'label_threshold': 0.6212400377226356}. Best is trial 24 with value: 0.21571203363110877.


Best trial: 24. Best value: 0.215712:   5%|▌         | 26/500 [04:24<54:09,  6.85s/it]  

AUC: 0.66884, Precision: 0.31212, Recall: 0.04480, F1: 0.07836
[I 2025-02-10 23:35:37,812] Trial 25 finished with value: 0.07835678965386078 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.04450681897028026, 'num_leaves': 401, 'max_depth': 3, 'feature_fraction': 0.5771445056861318, 'bagging_fraction': 0.9976894104786622, 'bagging_freq': 3, 'lambda_l1': 8.23059141367895, 'lambda_l2': 1.0833499634789443e-07, 'scale_pos_weight': 12.251899383808844, 'label_threshold': 0.6944638418831744}. Best is trial 24 with value: 0.21571203363110877.


Best trial: 26. Best value: 0.217792:   5%|▌         | 27/500 [04:34<1:00:01,  7.61s/it]

AUC: 0.68345, Precision: 0.14192, Recall: 0.46803, F1: 0.21779
[I 2025-02-10 23:35:47,196] Trial 26 finished with value: 0.21779172148567957 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.05806849270546989, 'num_leaves': 452, 'max_depth': 7, 'feature_fraction': 0.7271100286649295, 'bagging_fraction': 0.8868645548505083, 'bagging_freq': 1, 'lambda_l1': 3.977220099132249, 'lambda_l2': 2.075396889286104e-06, 'scale_pos_weight': 15.004231273996535, 'label_threshold': 0.555573499688569}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   6%|▌         | 28/500 [04:49<1:17:11,  9.81s/it]

AUC: 0.66357, Precision: 0.21307, Recall: 0.14746, F1: 0.17429
[I 2025-02-10 23:36:02,141] Trial 27 finished with value: 0.17429305912596402 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.061320355036794616, 'num_leaves': 461, 'max_depth': 9, 'feature_fraction': 0.7509074642769888, 'bagging_fraction': 0.8862872921805622, 'bagging_freq': 1, 'lambda_l1': 3.0031203221304423, 'lambda_l2': 2.019622242009217e-06, 'scale_pos_weight': 15.639210562018189, 'label_threshold': 0.6250766113524584}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   6%|▌         | 29/500 [04:56<1:12:18,  9.21s/it]

AUC: 0.67639, Precision: 0.28571, Recall: 0.00087, F1: 0.00173
[I 2025-02-10 23:36:09,948] Trial 28 finished with value: 0.0017346053772766695 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.055416336174373866, 'num_leaves': 194, 'max_depth': 6, 'feature_fraction': 0.7842383255476157, 'bagging_fraction': 0.8600461307333597, 'bagging_freq': 1, 'lambda_l1': 0.5600611372957621, 'lambda_l2': 8.761995132336774e-08, 'scale_pos_weight': 13.935226382712768, 'label_threshold': 0.7774344363193065}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   6%|▌         | 30/500 [05:07<1:14:35,  9.52s/it]

AUC: 0.67587, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:36:20,197] Trial 29 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.031764633078234454, 'num_leaves': 510, 'max_depth': 7, 'feature_fraction': 0.8281905798773644, 'bagging_fraction': 0.9037892156408077, 'bagging_freq': 1, 'lambda_l1': 2.880574543367942, 'lambda_l2': 1.2184693246704085e-06, 'scale_pos_weight': 8.990595431968508, 'label_threshold': 0.7194692958301467}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   6%|▌         | 31/500 [05:23<1:30:13, 11.54s/it]

AUC: 0.66555, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:36:36,454] Trial 30 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.08394583922911596, 'num_leaves': 306, 'max_depth': 9, 'feature_fraction': 0.9541454774017, 'bagging_fraction': 0.864492139313923, 'bagging_freq': 0, 'lambda_l1': 2.76007001710924, 'lambda_l2': 6.333789586572381e-06, 'scale_pos_weight': 14.918293678318626, 'label_threshold': 0.9095219048547662}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   6%|▋         | 32/500 [05:28<1:14:32,  9.56s/it]

AUC: 0.66676, Precision: 0.40000, Recall: 0.00087, F1: 0.00174
[I 2025-02-10 23:36:41,378] Trial 31 finished with value: 0.001736111111111111 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010119665051029002, 'num_leaves': 437, 'max_depth': 4, 'feature_fraction': 0.7269068401264521, 'bagging_fraction': 0.6988159275165179, 'bagging_freq': 5, 'lambda_l1': 0.38453615044237105, 'lambda_l2': 0.0001099901134666181, 'scale_pos_weight': 16.254880891137933, 'label_threshold': 0.5434731167662935}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   7%|▋         | 33/500 [05:37<1:12:15,  9.28s/it]

AUC: 0.67980, Precision: 0.15563, Recall: 0.36190, F1: 0.21766
[I 2025-02-10 23:36:50,022] Trial 32 finished with value: 0.21765860039241333 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.04104211134478948, 'num_leaves': 410, 'max_depth': 7, 'feature_fraction': 0.6695730484258666, 'bagging_fraction': 0.8111096520436738, 'bagging_freq': 2, 'lambda_l1': 0.003048774209808288, 'lambda_l2': 0.0011497366153120684, 'scale_pos_weight': 13.709194541196426, 'label_threshold': 0.5453829950487922}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   7%|▋         | 34/500 [05:45<1:09:50,  8.99s/it]

AUC: 0.68181, Precision: 0.23309, Recall: 0.18138, F1: 0.20401
[I 2025-02-10 23:36:58,334] Trial 33 finished with value: 0.20401174168297456 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.05363792332290064, 'num_leaves': 252, 'max_depth': 7, 'feature_fraction': 0.676047670497939, 'bagging_fraction': 0.8060579259119697, 'bagging_freq': 2, 'lambda_l1': 0.002332344265877491, 'lambda_l2': 0.001216957449431659, 'scale_pos_weight': 12.278246211636379, 'label_threshold': 0.6045021624524191}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   7%|▋         | 35/500 [05:51<1:03:02,  8.13s/it]

AUC: 0.67551, Precision: 0.19977, Recall: 0.22793, F1: 0.21292
[I 2025-02-10 23:37:04,468] Trial 34 finished with value: 0.21292157659488012 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.06498792069916466, 'num_leaves': 345, 'max_depth': 5, 'feature_fraction': 0.7487900862428386, 'bagging_fraction': 0.9076483880316797, 'bagging_freq': 2, 'lambda_l1': 6.444342493425625e-05, 'lambda_l2': 0.05105628593446761, 'scale_pos_weight': 10.940516503534129, 'label_threshold': 0.5737182263109589}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   7%|▋         | 36/500 [06:04<1:13:30,  9.50s/it]

AUC: 0.66960, Precision: 0.19409, Recall: 0.18269, F1: 0.18821
[I 2025-02-10 23:37:17,171] Trial 35 finished with value: 0.1882142056912391 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.032566084514245315, 'num_leaves': 398, 'max_depth': 9, 'feature_fraction': 0.5632302534377135, 'bagging_fraction': 0.8439857036473528, 'bagging_freq': 1, 'lambda_l1': 9.996213597240311, 'lambda_l2': 0.000707927588711727, 'scale_pos_weight': 12.805977218219466, 'label_threshold': 0.5461759745808542}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   7%|▋         | 37/500 [06:10<1:05:41,  8.51s/it]

AUC: 0.66966, Precision: 0.19901, Recall: 0.20966, F1: 0.20419
[I 2025-02-10 23:37:23,370] Trial 36 finished with value: 0.20419402668926076 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.043250276092918344, 'num_leaves': 476, 'max_depth': 5, 'feature_fraction': 0.7006462586403672, 'bagging_fraction': 0.8880851342833032, 'bagging_freq': 2, 'lambda_l1': 0.17379799750453845, 'lambda_l2': 1.6002862788027903, 'scale_pos_weight': 14.373019494091333, 'label_threshold': 0.5964054229701882}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   8%|▊         | 38/500 [06:20<1:08:54,  8.95s/it]

AUC: 0.66933, Precision: 0.25524, Recall: 0.09004, F1: 0.13312
[I 2025-02-10 23:37:33,338] Trial 37 finished with value: 0.13311897106109324 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.05690823191653845, 'num_leaves': 304, 'max_depth': 8, 'feature_fraction': 0.5056620396195834, 'bagging_fraction': 0.9715694831129693, 'bagging_freq': 2, 'lambda_l1': 1.2791903648701386, 'lambda_l2': 0.002367469315955347, 'scale_pos_weight': 13.27911081164662, 'label_threshold': 0.6371179738715037}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   8%|▊         | 39/500 [06:39<1:32:13, 12.00s/it]

AUC: 0.67397, Precision: 0.00000, Recall: 0.00000, F1: 0.00000
[I 2025-02-10 23:37:52,469] Trial 38 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.02021564344382161, 'num_leaves': 441, 'max_depth': 11, 'feature_fraction': 0.6569739812765429, 'bagging_fraction': 0.8063236441122529, 'bagging_freq': 1, 'lambda_l1': 4.189685664281005e-07, 'lambda_l2': 1.2179089238142366e-05, 'scale_pos_weight': 11.59805715675364, 'label_threshold': 0.6881110455167117}. Best is trial 26 with value: 0.21779172148567957.


Best trial: 26. Best value: 0.217792:   8%|▊         | 40/500 [06:54<1:39:09, 12.93s/it]

AUC: 0.66943, Precision: 0.27536, Recall: 0.01653, F1: 0.03119
[I 2025-02-10 23:38:07,569] Trial 39 finished with value: 0.031185884283955682 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.08143303180404182, 'num_leaves': 362, 'max_depth': 10, 'feature_fraction': 0.5541186170897009, 'bagging_fraction': 0.7648500567686234, 'bagging_freq': 2, 'lambda_l1': 0.0038285827369063407, 'lambda_l2': 1.7078558186384972e-07, 'scale_pos_weight': 9.354699931331218, 'label_threshold': 0.6589681685318786}. Best is trial 26 with value: 0.21779172148567957.


In [15]:
study.best_trial

array([ 72,  81,  78,  69,  54,  65, 110, 134,  80,  97], dtype=int32)