# side model

In [1]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcEntropyBarV1.config import SIDE_LONG

df_features = pd.read_parquet("data/features.parquet")

features_long = df_features[SIDE_LONG]
label_long = np.load("data/side_label_long.npy")
len_gap = len(label_long) - len(df_features)
label_long = label_long[len_gap:]

print(features_long.shape)
print(label_long.shape)

mask = features_long.index < date_to_timestamp("2025-01-01")
features_long_masked = features_long[mask]
label_long_masked = label_long[mask]

print(features_long_masked.shape)
print(label_long_masked.shape)
print(np.unique(label_long_masked, return_counts=True))

features_long.isna().sum().sort_values(ascending=False)

(7996, 295)
(7996,)
(7470, 295)
(7470,)
(array([0, 1]), array([5529, 1941]))


williams_r                        0
bekker_parkinson_vol_dt_lag1      0
kyle_lambda_lag5                  0
fisher_ddt_lag2                   0
evenbetter_sinewave_short_lag4    0
                                 ..
bandpass_dt_lag2                  0
swamicharts_stochastic_9          0
forecast_oscillator_lag1          0
voss_dt_lag2                      0
voss_ddt                          0
Length: 295, dtype: int64

In [None]:
import lightgbm as lgb

params = {
    "objective": "binary",
    "metric": "auc",
    "num_threads": -1,
    "verbose": -1,
    "is_unbalance": True,
    "extra_trees": False,
    "num_leaves": 100,
    "max_depth": 20,
    "min_gain_to_split": 1e-8,
    "min_data_in_leaf": 20,
    "lambda_l1": 1e-4,
    "lambda_l2": 1e-4,
}
dtrain = lgb.Dataset(features_long_masked, label_long_masked)
res = lgb.cv(params, dtrain, num_boost_round=100, nfold=5, stratified=True)
res.keys()

In [None]:
res["valid auc-mean"][-1]

In [2]:
import lightgbm as lgb
import optuna


def objective(trial):
    METRIC = "auc"

    params = {
        "objective": "binary",
        "metric": METRIC,
        "num_threads": -1,
        "verbose": -1,
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 500),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 100),
    }
    dtrain = lgb.Dataset(features_long, label_long)
    # dtest = lgb.Dataset(side_features_test, side_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1000),
        metrics=METRIC,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=100, n_jobs=1)

[I 2025-06-01 12:21:46,022] A new study created in memory with name: no-name-723a5446-d71b-4bd9-b738-ce7649ccc7b4
[I 2025-06-01 12:21:48,674] Trial 0 finished with value: 0.913598765340596 and parameters: {'is_unbalance': False, 'extra_trees': False, 'boosting': 'gbdt', 'num_leaves': 204, 'max_depth': 118, 'min_gain_to_split': 0.29316835861940116, 'min_data_in_leaf': 381, 'lambda_l1': 28.43242970180437, 'lambda_l2': 73.36660695946217, 'num_boost_round': 592}. Best is trial 0 with value: 0.913598765340596.
[I 2025-06-01 12:22:10,196] Trial 1 finished with value: 0.9049012478402458 and parameters: {'is_unbalance': False, 'extra_trees': False, 'boosting': 'dart', 'num_leaves': 90, 'max_depth': 454, 'min_gain_to_split': 0.8403299667653158, 'min_data_in_leaf': 74, 'lambda_l1': 84.05607383141059, 'lambda_l2': 55.38724378768311, 'num_boost_round': 868}. Best is trial 0 with value: 0.913598765340596.
[I 2025-06-01 12:22:15,587] Trial 2 finished with value: 0.9151662245757907 and parameters: {'

In [3]:
print("side model long label: ")
study.best_params

side model long label: 


{'is_unbalance': True,
 'extra_trees': False,
 'boosting': 'dart',
 'num_leaves': 196,
 'max_depth': 805,
 'min_gain_to_split': 0.9147660872927446,
 'min_data_in_leaf': 112,
 'lambda_l1': 0.12229909274412121,
 'lambda_l2': 14.48956578126283,
 'num_boost_round': 657}

In [4]:
import lightgbm as lgb  # noqa

params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

dtrain = lgb.Dataset(features_long_masked, label_long_masked)
side_model_long = lgb.train(params, dtrain)
side_model_long_prod = lgb.train(params, lgb.Dataset(features_long, label_long))

In [5]:
import plotly.express as px

res = side_model_long.predict(features_long)

fig = px.histogram(res, nbins=100)
fig.show()

In [6]:
side_model_long.save_model(
    "strategies/BinanceBtcEntropyBarV1/model/model_side_long.txt"
)
side_model_long_prod.save_model(
    "strategies/BinanceBtcEntropyBarV1/model/model_side_long_prod.txt"
)

<lightgbm.basic.Booster at 0x335e2e410>

In [7]:
from strategies.BinanceBtcEntropyBarV1.config import SIDE_SHORT

df_features = pd.read_parquet("data/features.parquet")

features_short = df_features[SIDE_SHORT]
label_short = np.load("data/side_label_short.npy")
len_gap = len(label_short) - len(df_features)
label_short = label_short[len_gap:]

print(features_short.shape)
print(label_short.shape)

mask = features_short.index < date_to_timestamp("2025-01-01")
features_short_masked = features_short[mask]
label_short_masked = label_short[mask]

print(features_short_masked.shape)
print(label_short_masked.shape)
print(np.unique(label_short_masked, return_counts=True))

features_short.isna().sum().sort_values(ascending=False)


(7996, 68)
(7996,)
(7470, 68)
(7470,)
(array([0, 1]), array([5601, 1869]))


roofing_filter_ddt_lag1              0
reactivity                           0
trendflex_dt                         0
ehlers_early_onset_trend_ddt_lag1    0
natr_lag8                            0
                                    ..
fisher_dt                            0
voss_filt                            0
norm_on_balance_volume_dt_lag1       0
fisher_dt_lag1                       0
stc_dt_lag1                          0
Length: 68, dtype: int64

In [8]:
def objective(trial):
    METRIC = "auc"

    params = {
        "objective": "binary",
        "metric": METRIC,
        "num_threads": -1,
        "verbose": -1,
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 500),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 100),
    }
    dtrain = lgb.Dataset(features_short, label_short)
    # dtest = lgb.Dataset(side_features_test, side_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1000),
        metrics=METRIC,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=100, n_jobs=1)


[I 2025-06-01 12:56:26,876] A new study created in memory with name: no-name-f7958510-e64f-426a-b865-1dc6bd5c2394
[I 2025-06-01 12:56:28,465] Trial 0 finished with value: 0.9168281979697476 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'gbdt', 'num_leaves': 55, 'max_depth': 197, 'min_gain_to_split': 0.47738754456281957, 'min_data_in_leaf': 37, 'lambda_l1': 7.010563794048015, 'lambda_l2': 66.02881857837937, 'num_boost_round': 203}. Best is trial 0 with value: 0.9168281979697476.
[I 2025-06-01 12:56:29,329] Trial 1 finished with value: 0.9111017065136238 and parameters: {'is_unbalance': True, 'extra_trees': False, 'boosting': 'gbdt', 'num_leaves': 140, 'max_depth': 918, 'min_gain_to_split': 0.43117036956123406, 'min_data_in_leaf': 417, 'lambda_l1': 72.9694746575118, 'lambda_l2': 20.12955244516621, 'num_boost_round': 125}. Best is trial 0 with value: 0.9168281979697476.
[I 2025-06-01 12:56:33,218] Trial 2 finished with value: 0.9164177928056997 and parameters: {

In [9]:
print("side model short label: ")
study.best_params

side model short label: 


{'is_unbalance': True,
 'extra_trees': True,
 'boosting': 'dart',
 'num_leaves': 169,
 'max_depth': 475,
 'min_gain_to_split': 0.3254571273750752,
 'min_data_in_leaf': 382,
 'lambda_l1': 0.5136437096602151,
 'lambda_l2': 30.0769200516287,
 'num_boost_round': 570}

In [10]:
import lightgbm as lgb  # noqa

params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

dtrain = lgb.Dataset(features_short_masked, label_short_masked)
short_model = lgb.train(params, dtrain)

short_model_prod = lgb.train(params, lgb.Dataset(features_short, label_short))


In [11]:
import plotly.express as px

res = short_model.predict(features_short)

fig = px.histogram(res, nbins=100)
fig.show()


In [None]:
# import plotly.express as px
# from sklearn.metrics import roc_auc_score  # noqa

# res = model.predict(side_features_test)
# print(roc_auc_score(side_label_test, res))

# fig = px.histogram(res, nbins=100)
# fig.show()


In [12]:
short_model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_side_short.txt")
short_model_prod.save_model(
    "strategies/BinanceBtcEntropyBarV1/model/model_side_short_prod.txt"
)


<lightgbm.basic.Booster at 0x334f675d0>

# meta model

In [5]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcEntropyBarV1.config import (
    META_ALL,
    SIDE_LONG,
    SIDE_SHORT,
    get_side_model,
)

df_features = pd.read_parquet("data/features.parquet")
meta_label = np.load("data/label_meta.npy")

model_long = get_side_model(False, "long")
model_short = get_side_model(False, "short")

side_model_res_long = model_long.predict(df_features[SIDE_LONG])
side_model_res_short = model_short.predict(df_features[SIDE_SHORT])

df_features["model_long"] = side_model_res_long
df_features["model_short"] = side_model_res_short

meta_features = df_features[META_ALL]
print(meta_features.shape)
print(meta_label.shape)

mask = meta_features.index < date_to_timestamp("2025-01-01")
meta_features_masked = meta_features[mask]
meta_label_masked = meta_label[mask]

print(meta_features_masked.shape)
print(meta_label_masked.shape)

meta_features.isna().sum(axis=0).sort_values(ascending=False)

(7996, 76)
(7996,)
(7470, 76)
(7470,)


natr_lag2                       0
acc_swing_index_lag11           0
amihud_lambda_lag9              0
natr                            0
acc_swing_index_lag3            0
                               ..
hasbrouck_lambda_lag15          0
hasbrouck_lambda_lag17          0
hasbrouck_lambda_lag18          0
price_variance_ratio_dt_lag1    0
model_short                     0
Length: 76, dtype: int64

In [6]:
import lightgbm as lgb
from sklearn.metrics import (
    f1_score,  # noqa
    fbeta_score,  # noqa
)

METRIC = "f1"


def eval_metric(preds, eval_dataset):
    metric_name = METRIC
    y_true = eval_dataset.get_label()
    value = f1_score(y_true, preds > 0.5, average="macro")
    higher_better = True
    return metric_name, value, higher_better


params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    "is_unbalance": True,
    "extra_trees": False,
    "num_leaves": 100,
    "max_depth": 20,
    "min_gain_to_split": 1e-8,
    "min_data_in_leaf": 20,
    "lambda_l1": 1e-4,
    "lambda_l2": 1e-4,
}
dtrain = lgb.Dataset(meta_features, meta_label)
res = lgb.cv(
    params, dtrain, num_boost_round=100, nfold=5, stratified=True, feval=eval_metric
)
res.keys()

dict_keys(['valid binary_logloss-mean', 'valid binary_logloss-stdv', 'valid f1-mean', 'valid f1-stdv'])

In [9]:
import optuna


def objective(trial):
    params = {
        "objective": "binary",
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "num_threads": -1,
        "verbose": -1,
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 500),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 300),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 100),
    }
    dtrain = lgb.Dataset(meta_features, meta_label)
    # dtest = lgb.Dataset(meta_features_test, meta_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1000),
        stratified=True,
        feval=eval_metric,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=100, n_jobs=1)

[I 2025-06-01 16:54:43,464] A new study created in memory with name: no-name-65d4a03c-fd55-40c1-a799-e209fa0c9db1
[I 2025-06-01 16:54:48,367] Trial 0 finished with value: 0.9299663355282715 and parameters: {'is_unbalance': False, 'extra_trees': False, 'boosting': 'dart', 'num_leaves': 234, 'max_depth': 320, 'min_gain_to_split': 0.28694172396719664, 'min_data_in_leaf': 100, 'lambda_l1': 16.904307089899824, 'lambda_l2': 43.70312614046174, 'num_boost_round': 251}. Best is trial 0 with value: 0.9299663355282715.
[I 2025-06-01 16:54:56,706] Trial 1 finished with value: 0.9064050586382019 and parameters: {'is_unbalance': True, 'extra_trees': True, 'boosting': 'dart', 'num_leaves': 47, 'max_depth': 52, 'min_gain_to_split': 0.5181666529704779, 'min_data_in_leaf': 234, 'lambda_l1': 80.23587247703705, 'lambda_l2': 46.6109541478363, 'num_boost_round': 470}. Best is trial 0 with value: 0.9299663355282715.
[I 2025-06-01 16:55:06,275] Trial 2 finished with value: 0.9271906780300053 and parameters: {

In [10]:
print("meta model: ")
study.best_params

meta model: 


{'is_unbalance': True,
 'extra_trees': False,
 'boosting': 'gbdt',
 'num_leaves': 446,
 'max_depth': 137,
 'min_gain_to_split': 0.3183467819989737,
 'min_data_in_leaf': 37,
 'lambda_l1': 0.3842301846264844,
 'lambda_l2': 12.182212902800401,
 'num_boost_round': 696}

In [11]:
import lightgbm as lgb

params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

model = lgb.train(
    params,
    lgb.Dataset(meta_features_masked, meta_label_masked),
)

prod_model = lgb.train(params, lgb.Dataset(meta_features, meta_label))

In [12]:
import plotly.express as px

res = model.predict(meta_features)
pred_label = (res > 0.5).astype(int)

fig = px.histogram(res, nbins=100)
fig.show()

In [14]:
model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_meta.txt")
prod_model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_meta_prod.txt")

<lightgbm.basic.Booster at 0x322769010>