In [1]:
import numpy as np

np.show_config()

Build Dependencies:
  blas:
    detection method: pkgconfig
    found: true
    include directory: /opt/arm64-builds/include
    lib directory: /opt/arm64-builds/lib
    name: openblas64
    openblas configuration: USE_64BITINT=1 DYNAMIC_ARCH=1 DYNAMIC_OLDER= NO_CBLAS=
      NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP= SANDYBRIDGE MAX_THREADS=3
    pc file directory: /usr/local/lib/pkgconfig
    version: 0.3.23.dev
  lapack:
    detection method: internal
    found: true
    include directory: unknown
    lib directory: unknown
    name: dep4350285776
    openblas configuration: unknown
    pc file directory: unknown
    version: 1.26.4
Compilers:
  c:
    args: -fno-strict-aliasing, -DBLAS_SYMBOL_SUFFIX=64_, -DHAVE_BLAS_ILP64
    commands: cc
    linker: ld64
    linker args: -fno-strict-aliasing, -DBLAS_SYMBOL_SUFFIX=64_, -DHAVE_BLAS_ILP64
    name: clang
    version: 14.0.0
  c++:
    args: -DBLAS_SYMBOL_SUFFIX=64_, -DHAVE_BLAS_ILP64
    commands: c++
    linker: ld64
    linker

# side model

In [2]:
import pandas as pd

df_features = pd.read_parquet("data/features_15m.parquet")
df_label_side = pd.read_parquet("data/label_15m_side.parquet")

df_features = df_features.iloc[240:]
df_label_side = df_label_side.iloc[240:]

print(df_features.shape)
print(df_label_side.shape)

df_label_side.head()

(178032, 1263)
(178032, 8)


Unnamed: 0_level_0,open,high,low,close,volume,ret,trgt,bin
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-03 12:00:00+00:00,7338.4,7347.0,7320.01,7344.13,1485.629,,,0.0
2020-01-03 12:15:00+00:00,7344.84,7348.52,7335.1,7336.76,1387.831,,,0.0
2020-01-03 12:30:00+00:00,7337.05,7337.06,7315.39,7317.8,1354.678,,,0.0
2020-01-03 12:45:00+00:00,7317.82,7323.67,7306.01,7321.57,1362.688,,,0.0
2020-01-03 13:00:00+00:00,7321.31,7329.0,7310.85,7324.98,783.078,,,0.0


In [3]:
from custom_indicators.selection import SIDE_ALL

valid_mask = (df_label_side["bin"].astype(int) != 0).tolist()
side_features = df_features[valid_mask][SIDE_ALL]
side_label = (df_label_side[valid_mask]["bin"].astype(int) == 1).astype(int)
print(side_label.value_counts())

train_test_split_point = int(side_features.shape[0] * 0.8)
side_features_train = side_features.iloc[:train_test_split_point]
side_features_test = side_features.iloc[train_test_split_point:]
side_label_train = side_label.iloc[:train_test_split_point]
side_label_test = side_label.iloc[train_test_split_point:]

side_label_test.value_counts()

bin
1    10427
0    10157
Name: count, dtype: int64


bin
1    2076
0    2041
Name: count, dtype: int64

In [4]:
import lightgbm as lgb
import optuna
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score


def objective(trial):
    params = {
        "objective": "binary",
        "metric": "auc",
        "num_threads": -1,
        "verbose": -1,
        "early_stopping_round": 100,
        "boosting": "gbdt",
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 100),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 200),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 1),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 1),
    }
    dtrain = lgb.Dataset(side_features_train, side_label_train)
    dtest = lgb.Dataset(side_features_test, side_label_test)
    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dtest],
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1000),
    )
    pred_proba = model.predict(side_features_test)
    pred_label = (pred_proba > 0.5).astype(int)
    precision = precision_score(side_label_test, pred_label)
    recall = recall_score(side_label_test, pred_label)
    f1 = f1_score(side_label_test, pred_label)
    auc = roc_auc_score(side_label_test, pred_proba)
    print(f"precision: {precision}, recall: {recall}, f1: {f1}")
    return auc


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=50, n_jobs=1)

  from .autonotebook import tqdm as notebook_tqdm
  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[I 2025-02-24 22:07:48,996] A new study created in memory with name: no-name-32e79e13-feed-42d3-a041-896b2e8867ff
[I 2025-02-24 22:07:55,197] Trial 0 finished with value: 0.6826700047862744 and parameters: {'num_leaves': 245, 'max_depth': 54, 'min_gain_to_split': 0.36985648123091536, 'min_data_in_leaf': 155, 'lambda_l1': 0.5033598133739919, 'lambda_l2': 0.09125418421821364, 'num_boost_round': 541}. Best is trial 0 with value: 0.6826700047862744.


precision: 0.6694267515923567, recall: 0.50626204238921, f1: 0.5765222161272627


[I 2025-02-24 22:08:01,313] Trial 1 finished with value: 0.6830511602703349 and parameters: {'num_leaves': 70, 'max_depth': 67, 'min_gain_to_split': 0.42619246639701935, 'min_data_in_leaf': 77, 'lambda_l1': 0.44950345382201606, 'lambda_l2': 0.5323917774159685, 'num_boost_round': 876}. Best is trial 1 with value: 0.6830511602703349.


precision: 0.6597938144329897, recall: 0.4932562620423892, f1: 0.5644983461962514


[I 2025-02-24 22:08:08,042] Trial 2 finished with value: 0.6854645943136795 and parameters: {'num_leaves': 133, 'max_depth': 43, 'min_gain_to_split': 0.35067837054033263, 'min_data_in_leaf': 84, 'lambda_l1': 0.2033801720255551, 'lambda_l2': 0.6199748120929661, 'num_boost_round': 887}. Best is trial 2 with value: 0.6854645943136795.


precision: 0.6608369768894441, recall: 0.5096339113680154, f1: 0.5754691324449279


[I 2025-02-24 22:08:16,300] Trial 3 finished with value: 0.6924526021945115 and parameters: {'num_leaves': 130, 'max_depth': 96, 'min_gain_to_split': 0.13843516048066246, 'min_data_in_leaf': 108, 'lambda_l1': 0.7084546848465194, 'lambda_l2': 0.3104856658488992, 'num_boost_round': 317}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6681557115507338, recall: 0.5043352601156069, f1: 0.5748009881965414


[I 2025-02-24 22:08:20,918] Trial 4 finished with value: 0.6878176099025848 and parameters: {'num_leaves': 75, 'max_depth': 31, 'min_gain_to_split': 0.5388161593977823, 'min_data_in_leaf': 152, 'lambda_l1': 0.48149770624429333, 'lambda_l2': 0.2930681279149222, 'num_boost_round': 178}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6568986568986569, recall: 0.5183044315992292, f1: 0.5794291868605277


[I 2025-02-24 22:08:26,065] Trial 5 finished with value: 0.6833726053287188 and parameters: {'num_leaves': 63, 'max_depth': 81, 'min_gain_to_split': 0.5235741350349659, 'min_data_in_leaf': 29, 'lambda_l1': 0.5237892561843204, 'lambda_l2': 0.1526255120831417, 'num_boost_round': 790}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6599749058971142, recall: 0.5067437379576107, f1: 0.5732970027247957


[I 2025-02-24 22:08:31,092] Trial 6 finished with value: 0.6821120781210617 and parameters: {'num_leaves': 63, 'max_depth': 93, 'min_gain_to_split': 0.6073079652278717, 'min_data_in_leaf': 36, 'lambda_l1': 0.9716555616149423, 'lambda_l2': 0.4630657134241842, 'num_boost_round': 599}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6565589980224127, recall: 0.4797687861271676, f1: 0.5544113554133037


[I 2025-02-24 22:08:37,499] Trial 7 finished with value: 0.6834781016144 and parameters: {'num_leaves': 123, 'max_depth': 53, 'min_gain_to_split': 0.3230036462075524, 'min_data_in_leaf': 94, 'lambda_l1': 0.7492476613457116, 'lambda_l2': 0.4245332000635647, 'num_boost_round': 509}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6607032695866749, recall: 0.5158959537572254, f1: 0.5793886935352989


[I 2025-02-24 22:08:41,489] Trial 8 finished with value: 0.6921578262195323 and parameters: {'num_leaves': 109, 'max_depth': 88, 'min_gain_to_split': 0.8517412852353262, 'min_data_in_leaf': 173, 'lambda_l1': 0.87882229265951, 'lambda_l2': 0.16725835567898725, 'num_boost_round': 785}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6631016042780749, recall: 0.5375722543352601, f1: 0.5937749401436552


[I 2025-02-24 22:08:45,749] Trial 9 finished with value: 0.6825137664392478 and parameters: {'num_leaves': 102, 'max_depth': 97, 'min_gain_to_split': 0.9615765819475235, 'min_data_in_leaf': 20, 'lambda_l1': 0.6452160768309655, 'lambda_l2': 0.42942118280942576, 'num_boost_round': 894}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6565406075635462, recall: 0.5101156069364162, f1: 0.5741393331526159


[I 2025-02-24 22:08:53,549] Trial 10 finished with value: 0.6877293423168023 and parameters: {'num_leaves': 202, 'max_depth': 76, 'min_gain_to_split': 0.00673545778214818, 'min_data_in_leaf': 122, 'lambda_l1': 0.13475453924029607, 'lambda_l2': 0.8741865336270114, 'num_boost_round': 191}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6603658536585366, recall: 0.5216763005780347, f1: 0.5828848223896663


[I 2025-02-24 22:08:57,267] Trial 11 finished with value: 0.6857452097133996 and parameters: {'num_leaves': 183, 'max_depth': 87, 'min_gain_to_split': 0.9771701042084764, 'min_data_in_leaf': 181, 'lambda_l1': 0.9282763229845462, 'lambda_l2': 7.069824408512115e-06, 'num_boost_round': 363}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.661892901618929, recall: 0.5120423892100193, f1: 0.5774035850081477


[I 2025-02-24 22:09:01,467] Trial 12 finished with value: 0.6854811149848152 and parameters: {'num_leaves': 152, 'max_depth': 99, 'min_gain_to_split': 0.7722591710466229, 'min_data_in_leaf': 198, 'lambda_l1': 0.7785201306149986, 'lambda_l2': 0.2395105947816429, 'num_boost_round': 703}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.659963436928702, recall: 0.5216763005780347, f1: 0.58272800645682


[I 2025-02-24 22:09:06,639] Trial 13 finished with value: 0.6770820057794028 and parameters: {'num_leaves': 223, 'max_depth': 74, 'min_gain_to_split': 0.1222192586482036, 'min_data_in_leaf': 125, 'lambda_l1': 0.8103909297012878, 'lambda_l2': 0.27953344833802557, 'num_boost_round': 341}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6407185628742516, recall: 0.5669556840077071, f1: 0.6015844620495784


[I 2025-02-24 22:09:11,134] Trial 14 finished with value: 0.6850765945515771 and parameters: {'num_leaves': 299, 'max_depth': 85, 'min_gain_to_split': 0.7488572084057598, 'min_data_in_leaf': 156, 'lambda_l1': 0.6531341097546054, 'lambda_l2': 0.7088174488335026, 'num_boost_round': 993}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.6611213801601972, recall: 0.516859344894027, f1: 0.5801567991349014


[I 2025-02-24 22:09:17,874] Trial 15 finished with value: 0.6820957934595135 and parameters: {'num_leaves': 105, 'max_depth': 90, 'min_gain_to_split': 0.23634342191994953, 'min_data_in_leaf': 64, 'lambda_l1': 0.32191663295517337, 'lambda_l2': 0.34022168089966465, 'num_boost_round': 373}. Best is trial 3 with value: 0.6924526021945115.


precision: 0.65113427345187, recall: 0.5115606936416185, f1: 0.5729700566495819


[I 2025-02-24 22:09:21,437] Trial 16 finished with value: 0.69642865571771 and parameters: {'num_leaves': 33, 'max_depth': 67, 'min_gain_to_split': 0.744866412641944, 'min_data_in_leaf': 112, 'lambda_l1': 0.8829589836533817, 'lambda_l2': 0.1729864564667037, 'num_boost_round': 669}. Best is trial 16 with value: 0.69642865571771.


precision: 0.6838046272493573, recall: 0.51252408477842, f1: 0.5859030837004405


[I 2025-02-24 22:09:25,570] Trial 17 finished with value: 0.6828378076030961 and parameters: {'num_leaves': 171, 'max_depth': 62, 'min_gain_to_split': 0.6636616422916429, 'min_data_in_leaf': 107, 'lambda_l1': 0.010041501770463757, 'lambda_l2': 0.008475967531244355, 'num_boost_round': 104}. Best is trial 16 with value: 0.69642865571771.


precision: 0.6702599873176919, recall: 0.5091522157996147, f1: 0.5787024363536819


[I 2025-02-24 22:09:33,749] Trial 18 finished with value: 0.68790658551713 and parameters: {'num_leaves': 40, 'max_depth': 67, 'min_gain_to_split': 0.16403454458056452, 'min_data_in_leaf': 134, 'lambda_l1': 0.6519545281223604, 'lambda_l2': 0.8170611454252187, 'num_boost_round': 451}. Best is trial 16 with value: 0.69642865571771.


precision: 0.6818181818181818, recall: 0.4841040462427746, f1: 0.5661971830985916


[I 2025-02-24 22:09:36,805] Trial 19 finished with value: 0.6896037304619463 and parameters: {'num_leaves': 31, 'max_depth': 43, 'min_gain_to_split': 0.6615474523127666, 'min_data_in_leaf': 60, 'lambda_l1': 0.9966868088454767, 'lambda_l2': 0.558963606089438, 'num_boost_round': 661}. Best is trial 16 with value: 0.69642865571771.


precision: 0.6627475247524752, recall: 0.5158959537572254, f1: 0.5801733477789816


[I 2025-02-24 22:09:47,082] Trial 20 finished with value: 0.6888319791103195 and parameters: {'num_leaves': 263, 'max_depth': 58, 'min_gain_to_split': 0.012967777084986198, 'min_data_in_leaf': 106, 'lambda_l1': 0.7141392331947407, 'lambda_l2': 0.3580535904083962, 'num_boost_round': 268}. Best is trial 16 with value: 0.69642865571771.


precision: 0.6689612015018773, recall: 0.5149325626204239, f1: 0.5819270549809472


[I 2025-02-24 22:09:50,323] Trial 21 finished with value: 0.6799997923115628 and parameters: {'num_leaves': 147, 'max_depth': 79, 'min_gain_to_split': 0.8360983964210157, 'min_data_in_leaf': 174, 'lambda_l1': 0.8790948229574196, 'lambda_l2': 0.1678228271290514, 'num_boost_round': 743}. Best is trial 16 with value: 0.69642865571771.


precision: 0.6422067487948581, recall: 0.5775529865125241, f1: 0.6081663707836673


[I 2025-02-24 22:09:54,285] Trial 22 finished with value: 0.6782670099190109 and parameters: {'num_leaves': 99, 'max_depth': 100, 'min_gain_to_split': 0.8772657858938503, 'min_data_in_leaf': 141, 'lambda_l1': 0.8477924034608613, 'lambda_l2': 0.1928410419802397, 'num_boost_round': 611}. Best is trial 16 with value: 0.69642865571771.


precision: 0.65625, recall: 0.4956647398843931, f1: 0.5647639956092206


[I 2025-02-24 22:09:58,184] Trial 23 finished with value: 0.6979620100086946 and parameters: {'num_leaves': 122, 'max_depth': 71, 'min_gain_to_split': 0.889013478088318, 'min_data_in_leaf': 176, 'lambda_l1': 0.8768625349591431, 'lambda_l2': 0.07968899701661042, 'num_boost_round': 775}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6697530864197531, recall: 0.5226396917148363, f1: 0.5871212121212122


[I 2025-02-24 22:10:02,032] Trial 24 finished with value: 0.6791598341891041 and parameters: {'num_leaves': 199, 'max_depth': 71, 'min_gain_to_split': 0.6932247688444994, 'min_data_in_leaf': 198, 'lambda_l1': 0.733702926827802, 'lambda_l2': 0.08983431784820538, 'num_boost_round': 465}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6568933250155957, recall: 0.5072254335260116, f1: 0.5724381625441696


[I 2025-02-24 22:10:06,026] Trial 25 finished with value: 0.6912581576713972 and parameters: {'num_leaves': 89, 'max_depth': 49, 'min_gain_to_split': 0.9161535961812594, 'min_data_in_leaf': 119, 'lambda_l1': 0.6019697853771693, 'lambda_l2': 0.08969244472894891, 'num_boost_round': 680}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6597484276729559, recall: 0.5052986512524085, f1: 0.5722858701582105


[I 2025-02-24 22:10:10,810] Trial 26 finished with value: 0.6850919351747746 and parameters: {'num_leaves': 126, 'max_depth': 62, 'min_gain_to_split': 0.7808658694329409, 'min_data_in_leaf': 51, 'lambda_l1': 0.9170415545446714, 'lambda_l2': 0.3511400643998431, 'num_boost_round': 826}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6580565805658056, recall: 0.5154142581888247, f1: 0.5780659103187467


[I 2025-02-24 22:10:16,421] Trial 27 finished with value: 0.692284091348927 and parameters: {'num_leaves': 146, 'max_depth': 70, 'min_gain_to_split': 0.4688664705344348, 'min_data_in_leaf': 96, 'lambda_l1': 0.8189456302123187, 'lambda_l2': 0.23373559406624217, 'num_boost_round': 607}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.662969808995687, recall: 0.5183044315992292, f1: 0.5817788591511219


[I 2025-02-24 22:10:21,370] Trial 28 finished with value: 0.6932998766141876 and parameters: {'num_leaves': 55, 'max_depth': 83, 'min_gain_to_split': 0.586472915384668, 'min_data_in_leaf': 139, 'lambda_l1': 0.5690123875695006, 'lambda_l2': 0.9697060779725075, 'num_boost_round': 977}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6710444027517198, recall: 0.516859344894027, f1: 0.5839455782312926


[I 2025-02-24 22:10:26,585] Trial 29 finished with value: 0.6925767432376173 and parameters: {'num_leaves': 46, 'max_depth': 80, 'min_gain_to_split': 0.611700177351087, 'min_data_in_leaf': 157, 'lambda_l1': 0.3917215010805073, 'lambda_l2': 0.7646159689499417, 'num_boost_round': 942}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6823295830575777, recall: 0.49662813102119463, f1: 0.5748536381377195


[I 2025-02-24 22:10:31,938] Trial 30 finished with value: 0.6884442153578045 and parameters: {'num_leaves': 52, 'max_depth': 74, 'min_gain_to_split': 0.5782128511665143, 'min_data_in_leaf': 144, 'lambda_l1': 0.567106877558714, 'lambda_l2': 0.9439307463471278, 'num_boost_round': 957}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6683870967741935, recall: 0.49903660886319845, f1: 0.5714285714285714


[I 2025-02-24 22:10:37,014] Trial 31 finished with value: 0.6942689319810929 and parameters: {'num_leaves': 46, 'max_depth': 82, 'min_gain_to_split': 0.6964714071899222, 'min_data_in_leaf': 162, 'lambda_l1': 0.3395584788755338, 'lambda_l2': 0.9947667378953065, 'num_boost_round': 933}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6735218508997429, recall: 0.5048169556840078, f1: 0.5770925110132159


[I 2025-02-24 22:10:41,503] Trial 32 finished with value: 0.6896875138655633 and parameters: {'num_leaves': 81, 'max_depth': 83, 'min_gain_to_split': 0.7087115938158156, 'min_data_in_leaf': 165, 'lambda_l1': 0.3116126342694324, 'lambda_l2': 0.9716330729329605, 'num_boost_round': 847}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6658639373871162, recall: 0.5327552986512524, f1: 0.5919186513245919


[I 2025-02-24 22:10:44,595] Trial 33 finished with value: 0.686073735059413 and parameters: {'num_leaves': 49, 'max_depth': 65, 'min_gain_to_split': 0.7961326372143852, 'min_data_in_leaf': 185, 'lambda_l1': 0.39424947091831875, 'lambda_l2': 0.898042804565702, 'num_boost_round': 925}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6612021857923497, recall: 0.5245664739884393, f1: 0.5850120870265915


[I 2025-02-24 22:10:47,180] Trial 34 finished with value: 0.689592402001739 and parameters: {'num_leaves': 32, 'max_depth': 76, 'min_gain_to_split': 0.8990740744585525, 'min_data_in_leaf': 131, 'lambda_l1': 0.2200676512660214, 'lambda_l2': 0.6012409912085924, 'num_boost_round': 988}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6587254318046456, recall: 0.5327552986512524, f1: 0.589081225033289


[I 2025-02-24 22:10:51,443] Trial 35 finished with value: 0.6876271501653484 and parameters: {'num_leaves': 62, 'max_depth': 71, 'min_gain_to_split': 0.4128801778588434, 'min_data_in_leaf': 164, 'lambda_l1': 0.4216982633415054, 'lambda_l2': 0.9891845454268106, 'num_boost_round': 872}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.653610771113831, recall: 0.5144508670520231, f1: 0.5757412398921833


[I 2025-02-24 22:10:56,369] Trial 36 finished with value: 0.6865020924610041 and parameters: {'num_leaves': 83, 'max_depth': 92, 'min_gain_to_split': 0.7211685260780176, 'min_data_in_leaf': 143, 'lambda_l1': 0.3164456112494036, 'lambda_l2': 0.6674125781871598, 'num_boost_round': 749}. Best is trial 23 with value: 0.6979620100086946.


precision: 0.6620947630922693, recall: 0.5115606936416185, f1: 0.5771739130434783


[I 2025-02-24 22:11:00,281] Trial 37 finished with value: 0.7009130738927138 and parameters: {'num_leaves': 60, 'max_depth': 58, 'min_gain_to_split': 0.48176635792830663, 'min_data_in_leaf': 191, 'lambda_l1': 0.4842426606577862, 'lambda_l2': 0.8401075140310387, 'num_boost_round': 812}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6701570680628273, recall: 0.5549132947976878, f1: 0.6071146245059289


[I 2025-02-24 22:11:04,231] Trial 38 finished with value: 0.6850640860434314 and parameters: {'num_leaves': 76, 'max_depth': 56, 'min_gain_to_split': 0.4842441861489823, 'min_data_in_leaf': 189, 'lambda_l1': 0.46081268444043844, 'lambda_l2': 0.8395701087800302, 'num_boost_round': 806}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6527369040612124, recall: 0.5342003853564548, f1: 0.5875496688741721


[I 2025-02-24 22:11:10,621] Trial 39 finished with value: 0.6894600006230653 and parameters: {'num_leaves': 70, 'max_depth': 48, 'min_gain_to_split': 0.3713380286197713, 'min_data_in_leaf': 171, 'lambda_l1': 0.5209006820991333, 'lambda_l2': 0.7519738487024856, 'num_boost_round': 742}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6727154893065457, recall: 0.5, f1: 0.5736391268306162


[I 2025-02-24 22:11:15,024] Trial 40 finished with value: 0.6839522448759958 and parameters: {'num_leaves': 118, 'max_depth': 60, 'min_gain_to_split': 0.8265885195636422, 'min_data_in_leaf': 191, 'lambda_l1': 0.20971104688783465, 'lambda_l2': 0.06585122073239573, 'num_boost_round': 900}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.664204163868368, recall: 0.47639691714836224, f1: 0.5548387096774193


[I 2025-02-24 22:11:19,783] Trial 41 finished with value: 0.690904143289917 and parameters: {'num_leaves': 56, 'max_depth': 65, 'min_gain_to_split': 0.5566577238293475, 'min_data_in_leaf': 180, 'lambda_l1': 0.3674990783219024, 'lambda_l2': 0.9253375433236353, 'num_boost_round': 861}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.678927680798005, recall: 0.5245664739884393, f1: 0.5918478260869565


[I 2025-02-24 22:11:22,638] Trial 42 finished with value: 0.6812718839890152 and parameters: {'num_leaves': 40, 'max_depth': 69, 'min_gain_to_split': 0.6277418588529937, 'min_data_in_leaf': 152, 'lambda_l1': 0.5902067272107429, 'lambda_l2': 0.8469352509403584, 'num_boost_round': 555}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6506928406466512, recall: 0.5428709055876686, f1: 0.5919117647058824


[I 2025-02-24 22:11:26,622] Trial 43 finished with value: 0.6892242270449994 and parameters: {'num_leaves': 65, 'max_depth': 31, 'min_gain_to_split': 0.4980649074381538, 'min_data_in_leaf': 164, 'lambda_l1': 0.4930787651845505, 'lambda_l2': 0.9970983590165944, 'num_boost_round': 803}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6519976838448176, recall: 0.5423892100192679, f1: 0.5921640809886931


[I 2025-02-24 22:11:31,580] Trial 44 finished with value: 0.6854181004249116 and parameters: {'num_leaves': 87, 'max_depth': 79, 'min_gain_to_split': 0.4360513438584094, 'min_data_in_leaf': 92, 'lambda_l1': 0.23983417484506264, 'lambda_l2': 0.9082638119042359, 'num_boost_round': 927}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6587155963302752, recall: 0.5187861271676301, f1: 0.5804365400161682


[I 2025-02-24 22:11:35,378] Trial 45 finished with value: 0.6754027975632482 and parameters: {'num_leaves': 92, 'max_depth': 53, 'min_gain_to_split': 0.5460884955361351, 'min_data_in_leaf': 147, 'lambda_l1': 0.26491672902658353, 'lambda_l2': 0.784501598810701, 'num_boost_round': 674}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6382179280730005, recall: 0.5727360308285164, f1: 0.6037065244986037


[I 2025-02-24 22:11:39,631] Trial 46 finished with value: 0.6867253575309245 and parameters: {'num_leaves': 55, 'max_depth': 84, 'min_gain_to_split': 0.9383806632618883, 'min_data_in_leaf': 133, 'lambda_l1': 0.10376804249822136, 'lambda_l2': 0.5180150369720965, 'num_boost_round': 760}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6708526107072043, recall: 0.4889210019267823, f1: 0.5656171635553079


[I 2025-02-24 22:11:44,540] Trial 47 finished with value: 0.6809794681099126 and parameters: {'num_leaves': 113, 'max_depth': 36, 'min_gain_to_split': 0.6695319189596964, 'min_data_in_leaf': 118, 'lambda_l1': 0.9433895777654612, 'lambda_l2': 0.9455397448204813, 'num_boost_round': 715}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6635279347143754, recall: 0.5091522157996147, f1: 0.5761787953120742


[I 2025-02-24 22:11:47,826] Trial 48 finished with value: 0.6851057417356523 and parameters: {'num_leaves': 71, 'max_depth': 76, 'min_gain_to_split': 0.7365268372871164, 'min_data_in_leaf': 176, 'lambda_l1': 0.5513145488532778, 'lambda_l2': 0.8685216866340054, 'num_boost_round': 998}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6377708978328174, recall: 0.5953757225433526, f1: 0.6158445440956651


[I 2025-02-24 22:11:51,480] Trial 49 finished with value: 0.6777928666574151 and parameters: {'num_leaves': 136, 'max_depth': 95, 'min_gain_to_split': 0.30760941392472074, 'min_data_in_leaf': 194, 'lambda_l1': 0.7760910144660571, 'lambda_l2': 0.042658072393528146, 'num_boost_round': 641}. Best is trial 37 with value: 0.7009130738927138.


precision: 0.6439732142857143, recall: 0.5558766859344894, f1: 0.5966907962771458


In [6]:
study.best_params

{'num_leaves': 60,
 'max_depth': 58,
 'min_gain_to_split': 0.48176635792830663,
 'min_data_in_leaf': 191,
 'lambda_l1': 0.4842426606577862,
 'lambda_l2': 0.8401075140310387,
 'num_boost_round': 812}

In [7]:
dtrain = lgb.Dataset(side_features_train, side_label_train)
model = lgb.train(study.best_params, dtrain)

In [9]:
import plotly.express as px

res = model.predict(df_features[SIDE_ALL][df_label_side["bin"].astype(int) != 0])

fig = px.histogram(res, nbins=100)
fig.show()

In [10]:
import plotly.express as px

res = model.predict(df_features[SIDE_ALL][df_label_side["bin"].astype(int) == 0])

fig = px.histogram(res, nbins=100)
fig.show()

In [11]:
model.save_model("custom_indicators/models/model_side.txt")

<lightgbm.basic.Booster at 0x3c2c41cd0>

# meta model

In [1]:
import lightgbm as lgb
import numpy as np
import pandas as pd

from custom_indicators.selection import SIDE_ALL

df_features = pd.read_parquet("data/features_15m.parquet")
df_label = pd.read_parquet("data/label_15m_meta.parquet")

model_side = lgb.Booster(model_file="custom_indicators/models/model_side.txt")
model_side_res = model_side.predict(df_features[SIDE_ALL])
df_features["model_side_res"] = model_side_res

df_features = df_features[df_label["ret"].notna()].iloc[240:]
df_label = df_label[df_label["ret"].notna()].iloc[240:]

print(df_features.shape)

assert df_features.shape[0] == df_label.shape[0]
print(df_label.shape)
df_label.head()

(34977, 1264)
(34977, 9)


Unnamed: 0_level_0,open,high,low,close,volume,ret,trgt,bin,side
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-14 02:45:00+00:00,8421.35,8440.0,8410.0,8438.64,1784.328,0.009365,0.005059,0.0,1.0
2020-01-14 04:00:00+00:00,8469.54,8598.27,8469.4,8548.31,8772.286,-0.002025,0.005723,0.0,1.0
2020-01-14 04:15:00+00:00,8548.94,8581.45,8515.8,8532.9,3475.529,-0.000105,0.005871,0.0,1.0
2020-01-14 04:30:00+00:00,8533.92,8554.85,8525.0,8547.34,1797.736,-0.002497,0.005882,0.0,1.0
2020-01-14 09:15:00+00:00,8529.99,8550.0,8522.69,8550.0,1916.405,0.000583,0.002437,0.0,-1.0


In [2]:
from custom_indicators.selection import META_ALL

# valid_mask = df_label["ret"].notna().tolist()
valid_mask = [True] * df_features.shape[0]

meta_features = df_features[valid_mask][META_ALL]
print(meta_features.shape)
meta_label = df_label[valid_mask]["bin"].astype(int)

train_test_split_point = int(meta_features.shape[0] * 0.8)
meta_features_train = meta_features.iloc[:train_test_split_point]
meta_features_test = meta_features.iloc[train_test_split_point:]
meta_label_train = meta_label.iloc[:train_test_split_point]
meta_label_test = meta_label.iloc[train_test_split_point:]

meta_label_test.value_counts()

(34977, 1027)


bin
0    4287
1    2709
Name: count, dtype: int64

In [3]:
import lightgbm as lgb
import optuna
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score


def objective(trial):
    params = {
        "objective": "binary",
        "is_unbalance": True,
        "metric": "auc",
        "num_threads": -1,
        "verbose": -1,
        "early_stopping_round": 100,
        "boosting": "gbdt",
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 100),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 200),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 1),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 1),
    }
    dtrain = lgb.Dataset(meta_features_train, meta_label_train)
    dtest = lgb.Dataset(meta_features_test, meta_label_test)
    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dtest],
        num_boost_round=trial.suggest_int("num_boost_round", 300, 1000),
    )
    pred_proba = model.predict(meta_features_test)
    pred_label = (pred_proba > 0.5).astype(int)
    precision = precision_score(meta_label_test, pred_label)
    recall = recall_score(meta_label_test, pred_label)
    f1 = f1_score(meta_label_test, pred_label)
    auc = roc_auc_score(meta_label_test, pred_proba)
    print(f"precision: {precision}, recall: {recall}, f1: {f1}, auc: {auc}")
    return f1


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=50, n_jobs=1)

  from .autonotebook import tqdm as notebook_tqdm
  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[I 2025-02-24 22:33:02,692] A new study created in memory with name: no-name-a75245b0-375a-4e77-8e37-cde8d8fd4ba2
[I 2025-02-24 22:33:12,387] Trial 0 finished with value: 0.4785714285714286 and parameters: {'num_leaves': 83, 'max_depth': 55, 'min_gain_to_split': 0.08476357935115539, 'min_data_in_leaf': 29, 'lambda_l1': 0.6458443074124002, 'lambda_l2': 0.9961686382023126, 'num_boost_round': 361}. Best is trial 0 with value: 0.4785714285714286.


precision: 0.5173745173745173, recall: 0.44518272425249167, f1: 0.4785714285714286, auc: 0.618831749269362


[I 2025-02-24 22:33:19,954] Trial 1 finished with value: 0.48114129372679304 and parameters: {'num_leaves': 96, 'max_depth': 46, 'min_gain_to_split': 0.82380446174861, 'min_data_in_leaf': 142, 'lambda_l1': 0.45150273354693926, 'lambda_l2': 0.0809410949918626, 'num_boost_round': 389}. Best is trial 1 with value: 0.48114129372679304.


precision: 0.5112126245847176, recall: 0.4544112218530823, f1: 0.48114129372679304, auc: 0.6208832440707064


[I 2025-02-24 22:33:28,431] Trial 2 finished with value: 0.4852855193919314 and parameters: {'num_leaves': 45, 'max_depth': 77, 'min_gain_to_split': 0.6801512619003673, 'min_data_in_leaf': 29, 'lambda_l1': 0.06569805133012406, 'lambda_l2': 0.056980666564593684, 'num_boost_round': 493}. Best is trial 2 with value: 0.4852855193919314.


precision: 0.514037985136251, recall: 0.45957918050941304, f1: 0.4852855193919314, auc: 0.6233604509517084


[I 2025-02-24 22:33:40,555] Trial 3 finished with value: 0.48306389530408006 and parameters: {'num_leaves': 225, 'max_depth': 100, 'min_gain_to_split': 0.35195515734825106, 'min_data_in_leaf': 164, 'lambda_l1': 0.14320302955059971, 'lambda_l2': 0.8975252020185219, 'num_boost_round': 498}. Best is trial 2 with value: 0.4852855193919314.


precision: 0.5046240450341777, recall: 0.46327057954964934, f1: 0.48306389530408006, auc: 0.6180202786709208


[I 2025-02-24 22:33:48,162] Trial 4 finished with value: 0.4835294117647059 and parameters: {'num_leaves': 168, 'max_depth': 45, 'min_gain_to_split': 0.9846514178057538, 'min_data_in_leaf': 69, 'lambda_l1': 0.3607293486588619, 'lambda_l2': 0.6465768120411154, 'num_boost_round': 465}. Best is trial 2 with value: 0.4852855193919314.


precision: 0.5156838143036386, recall: 0.45514950166112955, f1: 0.4835294117647059, auc: 0.6189404160663946


[I 2025-02-24 22:33:57,945] Trial 5 finished with value: 0.4867273784150358 and parameters: {'num_leaves': 289, 'max_depth': 99, 'min_gain_to_split': 0.6494159004294724, 'min_data_in_leaf': 90, 'lambda_l1': 0.5893423692481125, 'lambda_l2': 0.9034053783146286, 'num_boost_round': 639}. Best is trial 5 with value: 0.4867273784150358.


precision: 0.5122349102773246, recall: 0.46363971945367294, f1: 0.4867273784150358, auc: 0.6189830389384476


[I 2025-02-24 22:34:07,253] Trial 6 finished with value: 0.47877358490566035 and parameters: {'num_leaves': 238, 'max_depth': 41, 'min_gain_to_split': 0.6640333925494523, 'min_data_in_leaf': 42, 'lambda_l1': 0.7137637783932135, 'lambda_l2': 0.4245738213604363, 'num_boost_round': 797}. Best is trial 5 with value: 0.4867273784150358.


precision: 0.5119798234552333, recall: 0.4496124031007752, f1: 0.47877358490566035, auc: 0.6200403444858016


[I 2025-02-24 22:34:16,791] Trial 7 finished with value: 0.4757204895380971 and parameters: {'num_leaves': 278, 'max_depth': 66, 'min_gain_to_split': 0.4211050176458926, 'min_data_in_leaf': 198, 'lambda_l1': 0.5938764068027919, 'lambda_l2': 0.4393085422575057, 'num_boost_round': 518}. Best is trial 5 with value: 0.4867273784150358.


precision: 0.5112431056427662, recall: 0.44481358434846807, f1: 0.4757204895380971, auc: 0.6182917734498772


[I 2025-02-24 22:34:33,402] Trial 8 finished with value: 0.48321623731459795 and parameters: {'num_leaves': 267, 'max_depth': 57, 'min_gain_to_split': 0.157784696556477, 'min_data_in_leaf': 47, 'lambda_l1': 0.3479983704684421, 'lambda_l2': 0.2958664857608363, 'num_boost_round': 676}. Best is trial 5 with value: 0.4867273784150358.


precision: 0.5126293995859214, recall: 0.4569952011812477, f1: 0.48321623731459795, auc: 0.6167385787708992


[I 2025-02-24 22:34:39,512] Trial 9 finished with value: 0.47859450726978997 and parameters: {'num_leaves': 57, 'max_depth': 84, 'min_gain_to_split': 0.6598838383334821, 'min_data_in_leaf': 66, 'lambda_l1': 0.9969098931420801, 'lambda_l2': 0.6935347195948918, 'num_boost_round': 423}. Best is trial 5 with value: 0.4867273784150358.


precision: 0.5283102987070887, recall: 0.4374307862679956, f1: 0.47859450726978997, auc: 0.6188676558100614


[I 2025-02-24 22:34:52,263] Trial 10 finished with value: 0.4919827923347673 and parameters: {'num_leaves': 164, 'max_depth': 99, 'min_gain_to_split': 0.24287989866157395, 'min_data_in_leaf': 106, 'lambda_l1': 0.9960166687476688, 'lambda_l2': 0.7765638922809274, 'num_boost_round': 960}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5230769230769231, recall: 0.4643779992617202, f1: 0.4919827923347673, auc: 0.6205730873330594


[I 2025-02-24 22:35:02,761] Trial 11 finished with value: 0.4723779023218575 and parameters: {'num_leaves': 148, 'max_depth': 100, 'min_gain_to_split': 0.2509224690185293, 'min_data_in_leaf': 95, 'lambda_l1': 0.981001294121158, 'lambda_l2': 0.7872254325903489, 'num_boost_round': 998}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5159597726278968, recall: 0.43558508674787744, f1: 0.4723779023218575, auc: 0.6165277892945638


[I 2025-02-24 22:35:12,234] Trial 12 finished with value: 0.4824101068999028 and parameters: {'num_leaves': 176, 'max_depth': 89, 'min_gain_to_split': 0.5597222679802442, 'min_data_in_leaf': 110, 'lambda_l1': 0.8328482766140276, 'lambda_l2': 0.8117589859221794, 'num_boost_round': 833}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5094417077175698, recall: 0.45810262089331855, f1: 0.4824101068999028, auc: 0.6175985275046254


[I 2025-02-24 22:35:23,285] Trial 13 finished with value: 0.48253968253968255 and parameters: {'num_leaves': 128, 'max_depth': 90, 'min_gain_to_split': 0.014384910868500245, 'min_data_in_leaf': 132, 'lambda_l1': 0.8102884151081841, 'lambda_l2': 0.6126344147279998, 'num_boost_round': 647}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5216645216645217, recall: 0.44887412329272797, f1: 0.48253968253968255, auc: 0.6137657410787101


[I 2025-02-24 22:35:36,620] Trial 14 finished with value: 0.47685554668794894 and parameters: {'num_leaves': 203, 'max_depth': 30, 'min_gain_to_split': 0.2631836225232122, 'min_data_in_leaf': 87, 'lambda_l1': 0.2095740974085859, 'lambda_l2': 0.9824017365288158, 'num_boost_round': 994}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5188884064264003, recall: 0.44112218530823183, f1: 0.47685554668794894, auc: 0.6197642860457969


[I 2025-02-24 22:35:46,396] Trial 15 finished with value: 0.48231385577486807 and parameters: {'num_leaves': 296, 'max_depth': 74, 'min_gain_to_split': 0.49949832327554033, 'min_data_in_leaf': 137, 'lambda_l1': 0.8354351848444351, 'lambda_l2': 0.8053947195936303, 'num_boost_round': 634}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5124584717607974, recall: 0.4555186415651532, f1: 0.48231385577486807, auc: 0.6185964193515416


[I 2025-02-24 22:35:54,186] Trial 16 finished with value: 0.48051192553810357 and parameters: {'num_leaves': 195, 'max_depth': 93, 'min_gain_to_split': 0.8543544059508039, 'min_data_in_leaf': 106, 'lambda_l1': 0.5237943786814465, 'lambda_l2': 0.5508706346284551, 'num_boost_round': 857}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5061274509803921, recall: 0.4573643410852713, f1: 0.48051192553810357, auc: 0.6178337713156337


[I 2025-02-24 22:36:04,492] Trial 17 finished with value: 0.48247177658942364 and parameters: {'num_leaves': 133, 'max_depth': 78, 'min_gain_to_split': 0.3631180367317303, 'min_data_in_leaf': 74, 'lambda_l1': 0.3215437260280372, 'lambda_l2': 0.7393185237598247, 'num_boost_round': 736}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5205128205128206, recall: 0.4496124031007752, f1: 0.48247177658942364, auc: 0.619665177104922


[I 2025-02-24 22:36:14,439] Trial 18 finished with value: 0.4828526890101325 and parameters: {'num_leaves': 245, 'max_depth': 69, 'min_gain_to_split': 0.49984581848748205, 'min_data_in_leaf': 124, 'lambda_l1': 0.7348854757410045, 'lambda_l2': 0.8886476229463921, 'num_boost_round': 560}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5113495666529096, recall: 0.4573643410852713, f1: 0.4828526890101325, auc: 0.6148798771221347


[I 2025-02-24 22:36:23,088] Trial 19 finished with value: 0.4782258064516129 and parameters: {'num_leaves': 100, 'max_depth': 85, 'min_gain_to_split': 0.21301179034857948, 'min_data_in_leaf': 154, 'lambda_l1': 0.9110209462070233, 'lambda_l2': 0.30651780233267056, 'num_boost_round': 921}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5268769435806309, recall: 0.4377999261720192, f1: 0.4782258064516129, auc: 0.6176082575744073


[I 2025-02-24 22:36:30,583] Trial 20 finished with value: 0.4831974547623782 and parameters: {'num_leaves': 212, 'max_depth': 95, 'min_gain_to_split': 0.7797712217831668, 'min_data_in_leaf': 176, 'lambda_l1': 0.49224012833640735, 'lambda_l2': 0.8866809040526177, 'num_boost_round': 574}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5237068965517241, recall: 0.4485049833887043, f1: 0.4831974547623782, auc: 0.6228116922373761


[I 2025-02-24 22:36:36,138] Trial 21 finished with value: 0.4675916338786498 and parameters: {'num_leaves': 43, 'max_depth': 79, 'min_gain_to_split': 0.6559156462149203, 'min_data_in_leaf': 25, 'lambda_l1': 0.08218532749026025, 'lambda_l2': 0.08731958367951437, 'num_boost_round': 303}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5325471698113208, recall: 0.41675895164267257, f1: 0.4675916338786498, auc: 0.6220928725688926


[I 2025-02-24 22:36:44,009] Trial 22 finished with value: 0.4802710243124751 and parameters: {'num_leaves': 77, 'max_depth': 96, 'min_gain_to_split': 0.7290896926054706, 'min_data_in_leaf': 50, 'lambda_l1': 0.02575926526682959, 'lambda_l2': 0.2401184262896961, 'num_boost_round': 753}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5218709398007796, recall: 0.44481358434846807, f1: 0.4802710243124751, auc: 0.6217820269767477


[I 2025-02-24 22:36:48,810] Trial 23 finished with value: 0.47170959237477184 and parameters: {'num_leaves': 32, 'max_depth': 84, 'min_gain_to_split': 0.5840613571964761, 'min_data_in_leaf': 114, 'lambda_l1': 0.23903806691689972, 'lambda_l2': 0.16479015660195212, 'num_boost_round': 611}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5234023402340234, recall: 0.4293097083794758, f1: 0.47170959237477184, auc: 0.6210830118750765


[I 2025-02-24 22:36:56,134] Trial 24 finished with value: 0.4859086491739553 and parameters: {'num_leaves': 128, 'max_depth': 60, 'min_gain_to_split': 0.9440321371111311, 'min_data_in_leaf': 87, 'lambda_l1': 0.6187454520897803, 'lambda_l2': 0.008477820195075814, 'num_boost_round': 708}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.513136288998358, recall: 0.4614248800295312, f1: 0.4859086491739553, auc: 0.6197278628642243


[I 2025-02-24 22:37:03,352] Trial 25 finished with value: 0.48286604361370716 and parameters: {'num_leaves': 123, 'max_depth': 56, 'min_gain_to_split': 0.9674313092470125, 'min_data_in_leaf': 89, 'lambda_l1': 0.6022222397488864, 'lambda_l2': 0.5193806412846335, 'num_boost_round': 715}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5109188298310672, recall: 0.45773348098929495, f1: 0.48286604361370716, auc: 0.6180251867592177


[I 2025-02-24 22:37:10,982] Trial 26 finished with value: 0.4827452500969368 and parameters: {'num_leaves': 154, 'max_depth': 61, 'min_gain_to_split': 0.9034199809941867, 'min_data_in_leaf': 80, 'lambda_l1': 0.7563950562802163, 'lambda_l2': 0.4216350028414757, 'num_boost_round': 940}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5083707635769702, recall: 0.45957918050941304, f1: 0.4827452500969368, auc: 0.618964698187443


[I 2025-02-24 22:37:22,208] Trial 27 finished with value: 0.478515625 and parameters: {'num_leaves': 183, 'max_depth': 70, 'min_gain_to_split': 0.4042986430517611, 'min_data_in_leaf': 101, 'lambda_l1': 0.6533427031348964, 'lambda_l2': 0.5995646083838727, 'num_boost_round': 883}. Best is trial 10 with value: 0.4919827923347673.


precision: 0.5080879303193696, recall: 0.45219638242894056, f1: 0.478515625, auc: 0.6165534491246081


[I 2025-02-24 22:37:30,016] Trial 28 finished with value: 0.49275921992662675 and parameters: {'num_leaves': 111, 'max_depth': 52, 'min_gain_to_split': 0.9195525272241841, 'min_data_in_leaf': 58, 'lambda_l1': 0.9027259499319511, 'lambda_l2': 0.00040518311288572154, 'num_boost_round': 693}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5165991902834008, recall: 0.4710225175341454, f1: 0.49275921992662675, auc: 0.6189734810822902


[I 2025-02-24 22:37:39,108] Trial 29 finished with value: 0.4814004376367615 and parameters: {'num_leaves': 75, 'max_depth': 47, 'min_gain_to_split': 0.10503815260498586, 'min_data_in_leaf': 59, 'lambda_l1': 0.9093662368570216, 'lambda_l2': 0.9392623644377549, 'num_boost_round': 787}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5220017256255393, recall: 0.4466592838685862, f1: 0.4814004376367615, auc: 0.6221192212534344


[I 2025-02-24 22:37:48,491] Trial 30 finished with value: 0.4843901493116153 and parameters: {'num_leaves': 98, 'max_depth': 34, 'min_gain_to_split': 0.574592271202969, 'min_data_in_leaf': 120, 'lambda_l1': 0.9132324054366667, 'lambda_l2': 0.7210816857327803, 'num_boost_round': 590}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5102124183006536, recall: 0.46105574012550754, f1: 0.4843901493116153, auc: 0.6179445907829718


[I 2025-02-24 22:37:56,114] Trial 31 finished with value: 0.47842218316735013 and parameters: {'num_leaves': 113, 'max_depth': 51, 'min_gain_to_split': 0.9367001554066485, 'min_data_in_leaf': 86, 'lambda_l1': 0.6797222416380452, 'lambda_l2': 0.004287601857056763, 'num_boost_round': 677}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.50787728026534, recall: 0.45219638242894056, f1: 0.47842218316735013, auc: 0.6149516902035332


[I 2025-02-24 22:38:04,307] Trial 32 finished with value: 0.47940368772067476 and parameters: {'num_leaves': 150, 'max_depth': 62, 'min_gain_to_split': 0.856898332152754, 'min_data_in_leaf': 57, 'lambda_l1': 0.4326314163780053, 'lambda_l2': 0.13897437482460548, 'num_boost_round': 711}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5115110925073253, recall: 0.4510889627168697, f1: 0.47940368772067476, auc: 0.6195135430085876


[I 2025-02-24 22:38:12,514] Trial 33 finished with value: 0.48922539312754804 and parameters: {'num_leaves': 145, 'max_depth': 53, 'min_gain_to_split': 0.9054053461896744, 'min_data_in_leaf': 36, 'lambda_l1': 0.5785904737966804, 'lambda_l2': 0.18869708388607453, 'num_boost_round': 805}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.515970515970516, recall: 0.46511627906976744, f1: 0.48922539312754804, auc: 0.6190925668036023


[I 2025-02-24 22:38:21,431] Trial 34 finished with value: 0.48805855161787365 and parameters: {'num_leaves': 144, 'max_depth': 42, 'min_gain_to_split': 0.7906865554201086, 'min_data_in_leaf': 36, 'lambda_l1': 0.5658120556065652, 'lambda_l2': 0.19716136296121003, 'num_boost_round': 767}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5102698348771647, recall: 0.46770025839793283, f1: 0.48805855161787365, auc: 0.6203263913160246


[I 2025-02-24 22:38:30,154] Trial 35 finished with value: 0.4761152783260955 and parameters: {'num_leaves': 161, 'max_depth': 39, 'min_gain_to_split': 0.7944836714852116, 'min_data_in_leaf': 38, 'lambda_l1': 0.5372754731683222, 'lambda_l2': 0.18955534925537776, 'num_boost_round': 786}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5116673737802291, recall: 0.44518272425249167, f1: 0.4761152783260955, auc: 0.6153571671823173


[I 2025-02-24 22:38:38,252] Trial 36 finished with value: 0.47695390781563124 and parameters: {'num_leaves': 112, 'max_depth': 51, 'min_gain_to_split': 0.8799061099214113, 'min_data_in_leaf': 33, 'lambda_l1': 0.4251760994131756, 'lambda_l2': 0.3065895110460843, 'num_boost_round': 916}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5217010083296799, recall: 0.4392764857881137, f1: 0.47695390781563124, auc: 0.6173567395758878


[I 2025-02-24 22:38:47,237] Trial 37 finished with value: 0.4895051030233006 and parameters: {'num_leaves': 139, 'max_depth': 51, 'min_gain_to_split': 0.7400405817182787, 'min_data_in_leaf': 33, 'lambda_l1': 0.7812652578285452, 'lambda_l2': 0.11693860814393917, 'num_boost_round': 845}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5116747181964574, recall: 0.46917681801402733, f1: 0.4895051030233006, auc: 0.6196402061293756


[I 2025-02-24 22:38:56,449] Trial 38 finished with value: 0.483200621479899 and parameters: {'num_leaves': 185, 'max_depth': 49, 'min_gain_to_split': 0.7267761656732004, 'min_data_in_leaf': 23, 'lambda_l1': 0.7908826916952485, 'lambda_l2': 0.09244462080712437, 'num_boost_round': 836}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5098360655737705, recall: 0.45921004060538945, f1: 0.483200621479899, auc: 0.6217347543368341


[I 2025-02-24 22:39:03,855] Trial 39 finished with value: 0.48661324995114325 and parameters: {'num_leaves': 165, 'max_depth': 53, 'min_gain_to_split': 0.9775289505315026, 'min_data_in_leaf': 54, 'lambda_l1': 0.8654285328518355, 'lambda_l2': 0.05870138083514377, 'num_boost_round': 871}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5170265780730897, recall: 0.45957918050941304, f1: 0.48661324995114325, auc: 0.6203920047069428


[I 2025-02-24 22:39:12,771] Trial 40 finished with value: 0.48409405255878285 and parameters: {'num_leaves': 88, 'max_depth': 45, 'min_gain_to_split': 0.724363702509332, 'min_data_in_leaf': 20, 'lambda_l1': 0.940775451587337, 'lambda_l2': 0.2446090976155099, 'num_boost_round': 966}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5208333333333334, recall: 0.45219638242894056, f1: 0.48409405255878285, auc: 0.6216639745371824


[I 2025-02-24 22:39:21,611] Trial 41 finished with value: 0.48517729122263126 and parameters: {'num_leaves': 147, 'max_depth': 41, 'min_gain_to_split': 0.7897714666466541, 'min_data_in_leaf': 39, 'lambda_l1': 0.6907654000690705, 'lambda_l2': 0.37499255299890855, 'num_boost_round': 809}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5106035889070146, recall: 0.46216315983757844, f1: 0.48517729122263126, auc: 0.6145096178295522


[I 2025-02-24 22:39:29,638] Trial 42 finished with value: 0.4876256767208043 and parameters: {'num_leaves': 135, 'max_depth': 38, 'min_gain_to_split': 0.9121183875342089, 'min_data_in_leaf': 34, 'lambda_l1': 0.9618921486191939, 'lambda_l2': 0.19890301062574756, 'num_boost_round': 759}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.511977263499797, recall: 0.4654854189737911, f1: 0.4876256767208043, auc: 0.6240444834680517


[I 2025-02-24 22:39:37,857] Trial 43 finished with value: 0.48522550544323484 and parameters: {'num_leaves': 111, 'max_depth': 56, 'min_gain_to_split': 0.8253531708973809, 'min_data_in_leaf': 46, 'lambda_l1': 0.8687410506995772, 'lambda_l2': 0.13714363478359656, 'num_boost_round': 906}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5125256673511294, recall: 0.46068660022148394, f1: 0.48522550544323484, auc: 0.6197801296992469


[I 2025-02-24 22:39:45,172] Trial 44 finished with value: 0.4854064642507346 and parameters: {'num_leaves': 139, 'max_depth': 44, 'min_gain_to_split': 0.992868686208939, 'min_data_in_leaf': 60, 'lambda_l1': 0.7613508209550991, 'lambda_l2': 0.1114539430035884, 'num_boost_round': 824}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5171118530884808, recall: 0.4573643410852713, f1: 0.4854064642507346, auc: 0.6194018624731272


[I 2025-02-24 22:39:52,978] Trial 45 finished with value: 0.4818847752920214 and parameters: {'num_leaves': 170, 'max_depth': 36, 'min_gain_to_split': 0.8268603784866027, 'min_data_in_leaf': 66, 'lambda_l1': 0.9880886326581377, 'lambda_l2': 0.25930227125776206, 'num_boost_round': 679}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5196413321947054, recall: 0.44924326319675156, f1: 0.4818847752920214, auc: 0.6194309665756603


[I 2025-02-24 22:40:02,015] Trial 46 finished with value: 0.48530842576376726 and parameters: {'num_leaves': 117, 'max_depth': 64, 'min_gain_to_split': 0.7445920910431079, 'min_data_in_leaf': 28, 'lambda_l1': 0.5441082943431721, 'lambda_l2': 0.048575288489866475, 'num_boost_round': 779}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5131687242798354, recall: 0.4603174603174603, f1: 0.48530842576376726, auc: 0.6242305602892776


[I 2025-02-24 22:40:11,655] Trial 47 finished with value: 0.4848250531606418 and parameters: {'num_leaves': 143, 'max_depth': 43, 'min_gain_to_split': 0.6126790585179721, 'min_data_in_leaf': 42, 'lambda_l1': 0.8644190824769088, 'lambda_l2': 0.03622023383423653, 'num_boost_round': 964}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5089285714285714, recall: 0.4629014396456257, f1: 0.4848250531606418, auc: 0.6154406046833667


[I 2025-02-24 22:40:20,160] Trial 48 finished with value: 0.4833036949219522 and parameters: {'num_leaves': 63, 'max_depth': 59, 'min_gain_to_split': 0.46566966360674733, 'min_data_in_leaf': 32, 'lambda_l1': 0.5680023813737746, 'lambda_l2': 0.35982088344591023, 'num_boost_round': 850}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5199829931972789, recall: 0.4514581026208933, f1: 0.4833036949219522, auc: 0.6211783321162135


[I 2025-02-24 22:40:29,353] Trial 49 finished with value: 0.4866217516843118 and parameters: {'num_leaves': 156, 'max_depth': 53, 'min_gain_to_split': 0.6973169280001594, 'min_data_in_leaf': 74, 'lambda_l1': 0.47670671391293745, 'lambda_l2': 0.1991752632557778, 'num_boost_round': 899}. Best is trial 28 with value: 0.49275921992662675.


precision: 0.5084473049074819, recall: 0.46659283868586193, f1: 0.4866217516843118, auc: 0.6176893701915265


In [5]:
study.best_params

{'num_leaves': 111,
 'max_depth': 52,
 'min_gain_to_split': 0.9195525272241841,
 'min_data_in_leaf': 58,
 'lambda_l1': 0.9027259499319511,
 'lambda_l2': 0.00040518311288572154,
 'num_boost_round': 693}

In [6]:
params = {
    "is_unbalance": True,
    **study.best_params,
}

model = lgb.train(
    study.best_params,
    lgb.Dataset(meta_features_train, meta_label_train),
)

In [8]:
import plotly.express as px

res = model.predict(df_features[META_ALL])

fig = px.histogram(res, nbins=100)
fig.show()

In [9]:
model.save_model("custom_indicators/models/model_meta.txt")

<lightgbm.basic.Booster at 0x1107f60d0>