# model L5

In [1]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcDeapV1Voting.models.config import FEAT_L5, FEAT_FRACDIFF, DeepSSMContainer, LGSSMContainer

df_feat_raw = pd.read_parquet("data/feat_hard_L5.parquet")
model_deep_ssm = DeepSSMContainer()
model_lg_ssm = LGSSMContainer()

train_mask = df_feat_raw.index.to_numpy() < date_to_timestamp("2025-03-01")
train_raw = df_feat_raw.iloc[train_mask]
train_deep_ssm = model_deep_ssm.transform(df_feat_raw[FEAT_FRACDIFF].iloc[train_mask])
train_lg_ssm = model_lg_ssm.transform(df_feat_raw[FEAT_FRACDIFF].iloc[train_mask])

df_feat_l5 = pd.concat([train_deep_ssm, train_lg_ssm, train_raw], axis=1)[FEAT_L5]
print(f"{df_feat_l5.shape = }")

label = np.load("data/label_hard_L5.npy")
train_y = label[train_mask]

assert df_feat_l5.shape[0] == train_y.shape[0]

print(np.unique(train_y, return_counts=True))

  import pkg_resources


PyTorch configured: device=cpu, dtype=torch.float32
Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.json
Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.json
df_feat_l5.shape = (4632, 2152)
(array([0, 1]), array([2010, 2622]))


In [2]:
import lightgbm as lgb
import optuna
from sklearn.metrics import f1_score

METRIC = "f1"

def eval_metric(preds, eval_dataset):
    metric_name = METRIC
    y_true = eval_dataset.get_label()
    value = f1_score(y_true, preds > 0.5, average="weighted")
    higher_better = True
    return metric_name, value, higher_better


def objective(trial):
    params = {
        "objective": "binary",
        "metric": METRIC,
        "num_threads": -1,
        "verbose": -1,
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 500),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 100),
    }
    dtrain = lgb.Dataset(df_feat_l5, train_y)
    # dtest = lgb.Dataset(side_features_test, side_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1500),
        feval=eval_metric,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=150, n_jobs=1)

[I 2025-09-19 16:54:34,116] A new study created in memory with name: no-name-d0bc2376-6d8a-4f5b-9a7a-b3e4f9bf0aff
[I 2025-09-19 16:54:46,015] Trial 0 finished with value: 0.8237307313035298 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'gbdt', 'num_leaves': 69, 'max_depth': 528, 'min_gain_to_split': 0.7405368216062204, 'min_data_in_leaf': 338, 'lambda_l1': 59.43977204992401, 'lambda_l2': 56.698515005011636, 'num_boost_round': 1466}. Best is trial 0 with value: 0.8237307313035298.
[I 2025-09-19 16:54:56,388] Trial 1 finished with value: 0.837971977134836 and parameters: {'is_unbalance': False, 'extra_trees': False, 'boosting': 'gbdt', 'num_leaves': 241, 'max_depth': 966, 'min_gain_to_split': 0.5678949485550338, 'min_data_in_leaf': 248, 'lambda_l1': 17.80427050565514, 'lambda_l2': 49.49664872345951, 'num_boost_round': 1076}. Best is trial 1 with value: 0.837971977134836.
[I 2025-09-19 16:55:04,080] Trial 2 finished with value: 0.8321178731487955 and parameters:

In [3]:
print("side model long label: ")
study.best_params

side model long label: 


{'is_unbalance': False,
 'extra_trees': True,
 'boosting': 'gbdt',
 'num_leaves': 73,
 'max_depth': 226,
 'min_gain_to_split': 0.03044353403282171,
 'min_data_in_leaf': 304,
 'lambda_l1': 0.217338158287114,
 'lambda_l2': 58.933518814058424,
 'num_boost_round': 1176}

In [4]:
params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

dtrain = lgb.Dataset(df_feat_l5, train_y)
side_model = lgb.train(params, dtrain)
# side_model_prod = lgb.train(params, lgb.Dataset(features, label))

In [5]:
from strategies.BinanceBtcDeapV1Voting.models.config import FEAT_L5, FEAT_FRACDIFF, DeepSSMContainer, LGSSMContainer

df_feat_raw = pd.read_parquet("data/feat_hard_L5.parquet")
model_deep_ssm = DeepSSMContainer()
model_lg_ssm = LGSSMContainer()

df_deep_ssm = model_deep_ssm.transform(df_feat_raw[FEAT_FRACDIFF])
df_lg_ssm = model_lg_ssm.transform(df_feat_raw[FEAT_FRACDIFF])
df_feat_l5_full = pd.concat([df_deep_ssm, df_lg_ssm, df_feat_raw], axis=1)[FEAT_L5]

label_full = np.load("data/label_hard_L5.npy")

dtrain = lgb.Dataset(df_feat_l5_full, label_full)
side_model_prod = lgb.train(params, dtrain)

Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.json
Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.json


In [7]:
import plotly.express as px

test_features = df_feat_l5_full[df_feat_l5_full.index.to_numpy() >= date_to_timestamp("2025-03-01")]
res = side_model.predict(test_features)

fig = px.histogram(res, nbins=100)
fig.show()

In [8]:
side_model.save_model("strategies/BinanceBtcDeapV1Voting/models/model_l5.txt")
side_model_prod.save_model(
    "strategies/BinanceBtcDeapV1Voting/models/model_l5_prod.txt"
)

<lightgbm.basic.Booster at 0x3032962d0>

# model L6

In [10]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcDeapV1Voting.models.config import FEAT_L6, FEAT_FRACDIFF, DeepSSMContainer, LGSSMContainer

df_feat_raw = pd.read_parquet("data/feat_hard_L6.parquet")
model_deep_ssm = DeepSSMContainer()
model_lg_ssm = LGSSMContainer()

train_mask = df_feat_raw.index.to_numpy() < date_to_timestamp("2025-03-01")
train_raw = df_feat_raw.iloc[train_mask]
train_deep_ssm = model_deep_ssm.transform(df_feat_raw[FEAT_FRACDIFF].iloc[train_mask])
train_lg_ssm = model_lg_ssm.transform(df_feat_raw[FEAT_FRACDIFF].iloc[train_mask])

df_feat_l6 = pd.concat([train_deep_ssm, train_lg_ssm, train_raw], axis=1)[FEAT_L6]
print(f"{df_feat_l6.shape = }")

label = np.load("data/label_hard_L6.npy")
train_y = label[train_mask]

assert df_feat_l6.shape[0] == train_y.shape[0]

print(np.unique(train_y, return_counts=True))

Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.json
Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.json
df_feat_l6.shape = (4632, 2427)
(array([0, 1]), array([2133, 2499]))


In [11]:
import lightgbm as lgb
import optuna
from sklearn.metrics import f1_score

METRIC = "f1"

def eval_metric(preds, eval_dataset):
    metric_name = METRIC
    y_true = eval_dataset.get_label()
    value = f1_score(y_true, preds > 0.5, average="weighted")
    higher_better = True
    return metric_name, value, higher_better


def objective(trial):
    params = {
        "objective": "binary",
        "metric": METRIC,
        "num_threads": -1,
        "verbose": -1,
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 500),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 100),
    }
    dtrain = lgb.Dataset(df_feat_l6, train_y)
    # dtest = lgb.Dataset(side_features_test, side_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1500),
        feval=eval_metric,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=150, n_jobs=1)

[I 2025-09-19 17:38:08,323] A new study created in memory with name: no-name-b39422ea-6b69-4ddb-abf2-de8ff96aa16e
[I 2025-09-19 17:38:14,105] Trial 0 finished with value: 0.7788259700551328 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'gbdt', 'num_leaves': 286, 'max_depth': 260, 'min_gain_to_split': 0.9955972036630282, 'min_data_in_leaf': 161, 'lambda_l1': 34.26344801090784, 'lambda_l2': 55.856475107126414, 'num_boost_round': 495}. Best is trial 0 with value: 0.7788259700551328.
[I 2025-09-19 17:38:37,132] Trial 1 finished with value: 0.7778392500064372 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'dart', 'num_leaves': 127, 'max_depth': 107, 'min_gain_to_split': 0.35914709812779877, 'min_data_in_leaf': 372, 'lambda_l1': 52.252414313686295, 'lambda_l2': 10.433037929239747, 'num_boost_round': 1079}. Best is trial 0 with value: 0.7788259700551328.
[I 2025-09-19 17:38:49,694] Trial 2 finished with value: 0.775946610927677 and paramete

In [12]:
print("side model long label: ")
study.best_params

side model long label: 


{'is_unbalance': True,
 'extra_trees': False,
 'boosting': 'gbdt',
 'num_leaves': 285,
 'max_depth': 942,
 'min_gain_to_split': 0.013693137449950854,
 'min_data_in_leaf': 335,
 'lambda_l1': 0.19773490532867394,
 'lambda_l2': 22.42907170348074,
 'num_boost_round': 947}

In [13]:
params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

dtrain = lgb.Dataset(df_feat_l6, train_y)
side_model = lgb.train(params, dtrain)

In [14]:
from strategies.BinanceBtcDeapV1Voting.models.config import FEAT_L6, FEAT_FRACDIFF, DeepSSMContainer, LGSSMContainer

df_feat_raw = pd.read_parquet("data/feat_hard_L6.parquet")
model_deep_ssm = DeepSSMContainer()
model_lg_ssm = LGSSMContainer()

df_deep_ssm = model_deep_ssm.transform(df_feat_raw[FEAT_FRACDIFF])
df_lg_ssm = model_lg_ssm.transform(df_feat_raw[FEAT_FRACDIFF])
df_feat_l6_full = pd.concat([df_deep_ssm, df_lg_ssm, df_feat_raw], axis=1)[FEAT_L6]

label_full = np.load("data/label_hard_L6.npy")

dtrain = lgb.Dataset(df_feat_l6_full, label_full)
side_model_prod = lgb.train(params, dtrain)

Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.json
Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.json


In [15]:
import plotly.express as px

test_features = df_feat_l6_full[df_feat_l6_full.index.to_numpy() >= date_to_timestamp("2025-03-01")]
res = side_model.predict(test_features)

fig = px.histogram(res, nbins=100)
fig.show()

In [16]:
side_model.save_model("strategies/BinanceBtcDeapV1Voting/models/model_l6.txt")
side_model_prod.save_model(
    "strategies/BinanceBtcDeapV1Voting/models/model_l6_prod.txt"
)

<lightgbm.basic.Booster at 0x30aba7e30>

# model L7

In [17]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcDeapV1Voting.models.config import FEAT_L7, FEAT_FRACDIFF, DeepSSMContainer, LGSSMContainer

df_feat_raw = pd.read_parquet("data/feat_hard_L7.parquet")
model_deep_ssm = DeepSSMContainer()
model_lg_ssm = LGSSMContainer()

train_mask = df_feat_raw.index.to_numpy() < date_to_timestamp("2025-03-01")
train_raw = df_feat_raw.iloc[train_mask]
train_deep_ssm = model_deep_ssm.transform(df_feat_raw[FEAT_FRACDIFF].iloc[train_mask])
train_lg_ssm = model_lg_ssm.transform(df_feat_raw[FEAT_FRACDIFF].iloc[train_mask])

df_feat_l7 = pd.concat([train_deep_ssm, train_lg_ssm, train_raw], axis=1)[FEAT_L7]
print(f"{df_feat_l7.shape = }")

label = np.load("data/label_hard_L7.npy")
train_y = label[train_mask]

assert df_feat_l7.shape[0] == train_y.shape[0]

print(np.unique(train_y, return_counts=True))

Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/deep_ssm.json
Model loaded from /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.safetensors and /Users/yangqiuyu/Github/jesse-trade/strategies/BinanceBtcDeapV1Voting/models/lg_ssm.json
df_feat_l7.shape = (4632, 277)
(array([0, 1]), array([1779, 2853]))


In [19]:
import lightgbm as lgb
import optuna
from sklearn.metrics import f1_score

METRIC = "f1"

def eval_metric(preds, eval_dataset):
    metric_name = METRIC
    y_true = eval_dataset.get_label()
    value = f1_score(y_true, preds > 0.5, average="weighted")
    higher_better = True
    return metric_name, value, higher_better


def objective(trial):
    params = {
        "objective": "binary",
        "metric": METRIC,
        "num_threads": -1,
        "verbose": -1,
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 500),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 100),
    }
    dtrain = lgb.Dataset(df_feat_l7, train_y)
    # dtest = lgb.Dataset(side_features_test, side_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1500),
        feval=eval_metric,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(n_startup_trials=50),
)
study.optimize(objective, n_trials=300, n_jobs=1)

[I 2025-09-19 18:20:45,124] A new study created in memory with name: no-name-7e015d3c-08a6-43b4-b54b-fcd39a92d4f4
[I 2025-09-19 18:20:47,918] Trial 0 finished with value: 0.5442930586736014 and parameters: {'is_unbalance': True, 'extra_trees': True, 'boosting': 'gbdt', 'num_leaves': 290, 'max_depth': 968, 'min_gain_to_split': 0.4366156469613521, 'min_data_in_leaf': 27, 'lambda_l1': 35.85236561468527, 'lambda_l2': 25.925052747459354, 'num_boost_round': 566}. Best is trial 0 with value: 0.5442930586736014.
[I 2025-09-19 18:20:49,173] Trial 1 finished with value: 0.5408619753592612 and parameters: {'is_unbalance': True, 'extra_trees': True, 'boosting': 'gbdt', 'num_leaves': 76, 'max_depth': 187, 'min_gain_to_split': 0.06750576444327246, 'min_data_in_leaf': 422, 'lambda_l1': 67.69477210984113, 'lambda_l2': 64.93790456950119, 'num_boost_round': 248}. Best is trial 0 with value: 0.5442930586736014.
[I 2025-09-19 18:20:54,594] Trial 2 finished with value: 0.46954069219968336 and parameters: {

KeyboardInterrupt: 

In [5]:
print("side model long label: ")
study.best_params

side model long label: 


{'is_unbalance': True,
 'extra_trees': False,
 'boosting': 'gbdt',
 'num_leaves': 102,
 'max_depth': 942,
 'min_gain_to_split': 0.016293100493589244,
 'min_data_in_leaf': 87,
 'lambda_l1': 1.626035317109549,
 'lambda_l2': 45.58793271832796,
 'num_boost_round': 398}

In [None]:
params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

dtrain = lgb.Dataset(df_feat_l7, train_y)
side_model = lgb.train(params, dtrain)

In [None]:
from strategies.BinanceBtcDeapV1Voting.models.config import FEAT_L7, FEAT_FRACDIFF, DeepSSMContainer, LGSSMContainer

df_feat_raw = pd.read_parquet("data/feat_hard_L7.parquet")
model_deep_ssm = DeepSSMContainer()
model_lg_ssm = LGSSMContainer()

df_deep_ssm = model_deep_ssm.transform(df_feat_raw[FEAT_FRACDIFF])
df_lg_ssm = model_lg_ssm.transform(df_feat_raw[FEAT_FRACDIFF])
df_feat_l7_full = pd.concat([df_deep_ssm, df_lg_ssm, df_feat_raw], axis=1)[FEAT_L7]

label_full = np.load("data/label_hard_L7.npy")

dtrain = lgb.Dataset(df_feat_l7_full, label_full)
side_model_prod = lgb.train(params, dtrain)

In [None]:
import plotly.express as px

test_features = df_feat_l7_full[df_feat_l7_full.index.to_numpy() >= date_to_timestamp("2025-03-01")]
res = side_model.predict(test_features)

fig = px.histogram(res, nbins=100)
fig.show()

In [9]:
side_model.save_model("strategies/BinanceBtcDeapV1Voting/models/model_l7.txt")
side_model_prod.save_model(
    "strategies/BinanceBtcDeapV1Voting/models/model_l7_prod.txt"
)

<lightgbm.basic.Booster at 0x15e233450>

# meta model

In [1]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcEntropyBarV1.config import (
    META_ALL,
    SIDE,
    get_side_model,
)

df_features = pd.read_parquet("data/features.parquet")
meta_label = np.load("data/label_meta.npy")
print(f"{np.unique(meta_label, return_counts=True) = }")

side_model = get_side_model(False)

side_model_res = side_model.predict(df_features[SIDE])

df_features["model"] = side_model_res

meta_features = df_features[META_ALL]
print(meta_features.shape)
print(meta_label.shape)

mask = meta_features.index < date_to_timestamp("2025-01-01")
meta_features_masked = meta_features[mask]
meta_label_masked = meta_label[mask]

print(meta_features_masked.shape)
print(meta_label_masked.shape)
print(f"{np.unique(meta_label_masked, return_counts=True) = }")

meta_features.isna().sum(axis=0).sort_values(ascending=False)

np.unique(meta_label, return_counts=True) = (array([0., 1.]), array([  669, 12550]))
(13219, 2664)
(13219,)
(12115, 2664)
(12115,)
np.unique(meta_label_masked, return_counts=True) = (array([0., 1.]), array([  632, 11483]))


approximate_entropy_win128_spot    0
cwt_win1024_13_lag2                0
cwt_win256_11_dt_lag8              0
cwt_win128_8_lag4                  0
cwt_win64_12_dt                    0
                                  ..
cwt_win512_20_dt_lag11             0
cwt_win1024_11_dt_lag14            0
cwt_win128_1_dt_lag2               0
adx_7_lag3                         0
model                              0
Length: 2664, dtype: int64

In [2]:
import lightgbm as lgb
from sklearn.metrics import (
    f1_score,
    fbeta_score,
)

METRIC = "f1"


def eval_metric(preds, eval_dataset):
    metric_name = METRIC
    y_true = eval_dataset.get_label()
    value = f1_score(y_true, preds > 0.5, average="weighted")
    higher_better = True
    return metric_name, value, higher_better


params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    "is_unbalance": True,
    "extra_trees": False,
    "num_leaves": 100,
    "max_depth": 20,
    "min_gain_to_split": 1e-8,
    "min_data_in_leaf": 20,
    "lambda_l1": 1e-4,
    "lambda_l2": 1e-4,
}
dtrain = lgb.Dataset(meta_features, meta_label)
res = lgb.cv(
    params, dtrain, num_boost_round=100, nfold=5, stratified=True, feval=eval_metric
)
res.keys()

dict_keys(['valid binary_logloss-mean', 'valid binary_logloss-stdv', 'valid f1-mean', 'valid f1-stdv'])

In [3]:
import optuna


def objective(trial):
    params = {
        "objective": "binary",
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "num_threads": -1,
        "verbose": -1,
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 500),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 300),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 100),
    }
    dtrain = lgb.Dataset(meta_features, meta_label)
    # dtest = lgb.Dataset(meta_features_test, meta_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1500),
        stratified=True,
        feval=eval_metric,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(n_startup_trials=50),
)
study.optimize(objective, n_trials=200, n_jobs=1)

[I 2025-07-01 10:10:04,971] A new study created in memory with name: no-name-1188c647-119a-443b-b088-01c9353277eb
[I 2025-07-01 10:10:12,087] Trial 0 finished with value: 0.924743520258233 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'dart', 'num_leaves': 140, 'max_depth': 951, 'min_gain_to_split': 0.8206526039318547, 'min_data_in_leaf': 201, 'lambda_l1': 12.089838569582271, 'lambda_l2': 95.92885685399514, 'num_boost_round': 193}. Best is trial 0 with value: 0.924743520258233.
[I 2025-07-01 10:10:32,056] Trial 1 finished with value: 0.924743520258233 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'gbdt', 'num_leaves': 72, 'max_depth': 576, 'min_gain_to_split': 0.5274289677732332, 'min_data_in_leaf': 250, 'lambda_l1': 36.60501958746357, 'lambda_l2': 9.341901524626236, 'num_boost_round': 1043}. Best is trial 0 with value: 0.924743520258233.
[I 2025-07-01 10:10:37,441] Trial 2 finished with value: 0.9280243308865458 and parameters: {'i

In [4]:
print("meta model: ")
study.best_params

meta model: 


{'is_unbalance': True,
 'extra_trees': False,
 'boosting': 'dart',
 'num_leaves': 234,
 'max_depth': 579,
 'min_gain_to_split': 0.4783776963600167,
 'min_data_in_leaf': 254,
 'lambda_l1': 5.143714268925239,
 'lambda_l2': 21.356313125652914,
 'num_boost_round': 715}

In [5]:
import lightgbm as lgb

params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

model = lgb.train(
    params,
    lgb.Dataset(meta_features_masked, meta_label_masked),
)

prod_model = lgb.train(params, lgb.Dataset(meta_features, meta_label))

In [6]:
import plotly.express as px

res = model.predict(
    meta_features[meta_features.index > date_to_timestamp("2025-01-01")]
)
pred_label = (res > 0.5).astype(int)

fig = px.histogram(res, nbins=100)
fig.show()

In [7]:
model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_meta.txt")
prod_model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_meta_prod.txt")

<lightgbm.basic.Booster at 0x11afd6690>