# side model

In [1]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcEntropyBarV1.config import SIDE

df_features = pd.read_parquet("data/features.parquet")

features = df_features[SIDE]
label = np.load("data/side_label.npy")
len_gap = len(label) - len(df_features)
label = label[len_gap:]

print(features.shape)
print(label.shape)

mask = features.index < date_to_timestamp("2024-09-30")
features_masked = features[mask]
label_masked = label[mask]

print(features_masked.shape)
print(label_masked.shape)
print(np.unique(label_masked, return_counts=True))

features.isna().sum().sort_values(ascending=False)

(13219, 3355)
(13219,)
(11485, 3355)
(11485,)
(array([0, 1]), array([4536, 6949]))


cmma                              0
cwt_win64_2_ddt_lag9              0
cwt_win32_10_dt_lag18             0
cwt_win32_7_ddt_lag18             0
chaiken_money_flow_ddt_lag13      0
                                 ..
williams_r_dt_lag9                0
cwt_win32_19_dt_lag6              0
cwt_win128_17_dt_lag19            0
ehlers_early_onset_trend_lag19    0
fti_best_period_dt_lag12          0
Length: 3355, dtype: int64

In [2]:
import lightgbm as lgb

params = {
    "objective": "binary",
    "metric": "auc",
    "num_threads": -1,
    "verbose": -1,
    "is_unbalance": True,
    "extra_trees": False,
    "num_leaves": 100,
    "max_depth": 20,
    "min_gain_to_split": 1e-8,
    "min_data_in_leaf": 20,
    "lambda_l1": 1e-4,
    "lambda_l2": 1e-4,
}
dtrain = lgb.Dataset(features_masked, label_masked)
res = lgb.cv(params, dtrain, num_boost_round=100, nfold=5, stratified=True)
res.keys()

dict_keys(['valid auc-mean', 'valid auc-stdv'])

In [3]:
res["valid auc-mean"][-1]

0.9013170017639288

In [3]:
import lightgbm as lgb
import optuna


def objective(trial):
    METRIC = "auc"

    params = {
        "objective": "binary",
        "metric": METRIC,
        "num_threads": -1,
        "verbose": -1,
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 500),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 100),
    }
    dtrain = lgb.Dataset(features, label)
    # dtest = lgb.Dataset(side_features_test, side_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1500),
        metrics=METRIC,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=100, n_jobs=1)

[I 2025-07-01 00:27:35,648] A new study created in memory with name: no-name-6e965e6b-2709-42ed-b884-89acc09c4fdc
[I 2025-07-01 00:28:13,372] Trial 0 finished with value: 0.8995340528111477 and parameters: {'is_unbalance': True, 'extra_trees': False, 'boosting': 'dart', 'num_leaves': 133, 'max_depth': 938, 'min_gain_to_split': 0.876320364197209, 'min_data_in_leaf': 75, 'lambda_l1': 53.2519406675719, 'lambda_l2': 94.57298271892145, 'num_boost_round': 499}. Best is trial 0 with value: 0.8995340528111477.
[I 2025-07-01 00:29:03,126] Trial 1 finished with value: 0.8954669099234398 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'dart', 'num_leaves': 198, 'max_depth': 161, 'min_gain_to_split': 0.6686003280320008, 'min_data_in_leaf': 215, 'lambda_l1': 54.76931807774014, 'lambda_l2': 51.01154999848247, 'num_boost_round': 1204}. Best is trial 0 with value: 0.8995340528111477.
[I 2025-07-01 00:29:17,358] Trial 2 finished with value: 0.8952561704810634 and parameters: {'

In [4]:
print("side model long label: ")
study.best_params

side model long label: 


{'is_unbalance': True,
 'extra_trees': False,
 'boosting': 'gbdt',
 'num_leaves': 293,
 'max_depth': 378,
 'min_gain_to_split': 0.06585562895792825,
 'min_data_in_leaf': 454,
 'lambda_l1': 3.3546439468310396,
 'lambda_l2': 88.63416062569485,
 'num_boost_round': 1443}

In [5]:
import lightgbm as lgb  # noqa

params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

dtrain = lgb.Dataset(features_masked, label_masked)
side_model = lgb.train(params, dtrain)
side_model_prod = lgb.train(params, lgb.Dataset(features, label))

In [6]:
import plotly.express as px

test_features = features[features.index > date_to_timestamp("2024-09-30")]
res = side_model.predict(test_features)

fig = px.histogram(res, nbins=100)
fig.show()

In [7]:
side_model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_side.txt")
side_model_prod.save_model(
    "strategies/BinanceBtcEntropyBarV1/model/model_side_prod.txt"
)

<lightgbm.basic.Booster at 0x13792d690>

# meta model

In [1]:
import numpy as np
import pandas as pd

from jesse.helpers import date_to_timestamp

from strategies.BinanceBtcEntropyBarV1.config import (
    META_ALL,
    SIDE,
    get_side_model,
)

df_features = pd.read_parquet("data/features.parquet")
meta_label = np.load("data/label_meta.npy")
print(f"{np.unique(meta_label, return_counts=True) = }")

side_model = get_side_model(False)

side_model_res = side_model.predict(df_features[SIDE])

df_features["model"] = side_model_res

meta_features = df_features[META_ALL]
print(meta_features.shape)
print(meta_label.shape)

mask = meta_features.index < date_to_timestamp("2025-01-01")
meta_features_masked = meta_features[mask]
meta_label_masked = meta_label[mask]

print(meta_features_masked.shape)
print(meta_label_masked.shape)
print(f"{np.unique(meta_label_masked, return_counts=True) = }")

meta_features.isna().sum(axis=0).sort_values(ascending=False)

np.unique(meta_label, return_counts=True) = (array([0., 1.]), array([  669, 12550]))
(13219, 2664)
(13219,)
(12115, 2664)
(12115,)
np.unique(meta_label_masked, return_counts=True) = (array([0., 1.]), array([  632, 11483]))


approximate_entropy_win128_spot    0
cwt_win1024_13_lag2                0
cwt_win256_11_dt_lag8              0
cwt_win128_8_lag4                  0
cwt_win64_12_dt                    0
                                  ..
cwt_win512_20_dt_lag11             0
cwt_win1024_11_dt_lag14            0
cwt_win128_1_dt_lag2               0
adx_7_lag3                         0
model                              0
Length: 2664, dtype: int64

In [2]:
import lightgbm as lgb
from sklearn.metrics import (
    f1_score,  # noqa
    fbeta_score,  # noqa
)

METRIC = "f1"


def eval_metric(preds, eval_dataset):
    metric_name = METRIC
    y_true = eval_dataset.get_label()
    value = f1_score(y_true, preds > 0.5, average="weighted")
    higher_better = True
    return metric_name, value, higher_better


params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    "is_unbalance": True,
    "extra_trees": False,
    "num_leaves": 100,
    "max_depth": 20,
    "min_gain_to_split": 1e-8,
    "min_data_in_leaf": 20,
    "lambda_l1": 1e-4,
    "lambda_l2": 1e-4,
}
dtrain = lgb.Dataset(meta_features, meta_label)
res = lgb.cv(
    params, dtrain, num_boost_round=100, nfold=5, stratified=True, feval=eval_metric
)
res.keys()

dict_keys(['valid binary_logloss-mean', 'valid binary_logloss-stdv', 'valid f1-mean', 'valid f1-stdv'])

In [3]:
import optuna


def objective(trial):
    params = {
        "objective": "binary",
        "is_unbalance": trial.suggest_categorical("is_unbalance", [True, False]),
        "num_threads": -1,
        "verbose": -1,
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
        "boosting": trial.suggest_categorical("boosting", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 31, 500),
        "max_depth": trial.suggest_int("max_depth", 30, 1000),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 300),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 100),
    }
    dtrain = lgb.Dataset(meta_features, meta_label)
    # dtest = lgb.Dataset(meta_features_test, meta_label_test)
    model_res = lgb.cv(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 100, 1500),
        stratified=True,
        feval=eval_metric,
    )
    return model_res[f"valid {METRIC}-mean"][-1]


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(n_startup_trials=50),
)
study.optimize(objective, n_trials=200, n_jobs=1)

[I 2025-07-01 10:10:04,971] A new study created in memory with name: no-name-1188c647-119a-443b-b088-01c9353277eb
[I 2025-07-01 10:10:12,087] Trial 0 finished with value: 0.924743520258233 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'dart', 'num_leaves': 140, 'max_depth': 951, 'min_gain_to_split': 0.8206526039318547, 'min_data_in_leaf': 201, 'lambda_l1': 12.089838569582271, 'lambda_l2': 95.92885685399514, 'num_boost_round': 193}. Best is trial 0 with value: 0.924743520258233.
[I 2025-07-01 10:10:32,056] Trial 1 finished with value: 0.924743520258233 and parameters: {'is_unbalance': False, 'extra_trees': True, 'boosting': 'gbdt', 'num_leaves': 72, 'max_depth': 576, 'min_gain_to_split': 0.5274289677732332, 'min_data_in_leaf': 250, 'lambda_l1': 36.60501958746357, 'lambda_l2': 9.341901524626236, 'num_boost_round': 1043}. Best is trial 0 with value: 0.924743520258233.
[I 2025-07-01 10:10:37,441] Trial 2 finished with value: 0.9280243308865458 and parameters: {'i

In [4]:
print("meta model: ")
study.best_params

meta model: 


{'is_unbalance': True,
 'extra_trees': False,
 'boosting': 'dart',
 'num_leaves': 234,
 'max_depth': 579,
 'min_gain_to_split': 0.4783776963600167,
 'min_data_in_leaf': 254,
 'lambda_l1': 5.143714268925239,
 'lambda_l2': 21.356313125652914,
 'num_boost_round': 715}

In [5]:
import lightgbm as lgb

params = {
    "objective": "binary",
    "num_threads": -1,
    "verbose": -1,
    **study.best_params,
}

model = lgb.train(
    params,
    lgb.Dataset(meta_features_masked, meta_label_masked),
)

prod_model = lgb.train(params, lgb.Dataset(meta_features, meta_label))

In [6]:
import plotly.express as px

res = model.predict(
    meta_features[meta_features.index > date_to_timestamp("2025-01-01")]
)
pred_label = (res > 0.5).astype(int)

fig = px.histogram(res, nbins=100)
fig.show()

In [7]:
model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_meta.txt")
prod_model.save_model("strategies/BinanceBtcEntropyBarV1/model/model_meta_prod.txt")

<lightgbm.basic.Booster at 0x11afd6690>