
# Tune side model

In [None]:
import lightgbm as lgb
import optuna
import pandas as pd
import numpy as np
from jesse import helpers, research
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.preprocessing import LabelBinarizer

from custom_indicators.all_features import feature_bundle
from offline.labeling.labeling import TripleBarrierLabeler, expand_labels

_, trading_3m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "3m",
    helpers.date_to_timestamp("2020-06-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
_, trading_15m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "15m",
    helpers.date_to_timestamp("2020-06-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
_, trading_1h = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1h",
    helpers.date_to_timestamp("2020-06-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

features_3m = feature_bundle(trading_3m, sequential=True)
features_3m = pd.DataFrame(
    {f"3m_{k}": v for k, v in features_3m.items()},
    index=pd.DatetimeIndex([helpers.timestamp_to_time(i) for i in trading_3m[:, 0]]),
)
features_3m = features_3m.resample("15min").agg(
    {k: "last" for k in features_3m.columns}
)
features_15m = feature_bundle(trading_15m, sequential=True)
features_15m = pd.DataFrame(
    {f"15m_{k}": v for k, v in features_15m.items()},
    index=pd.DatetimeIndex([helpers.timestamp_to_time(i) for i in trading_15m[:, 0]]),
)
features_1h = feature_bundle(trading_1h, sequential=True)
features_1h = pd.DataFrame(
    {f"1h_{k}": v for k, v in features_1h.items()},
    index=pd.DatetimeIndex([helpers.timestamp_to_time(i) for i in trading_1h[:, 0]]),
)
features_1h = (
    features_1h.resample("15min").agg({k: "last" for k in features_1h.columns}).ffill()
)

df_features = pd.concat([features_3m, features_15m, features_1h], axis=1)
print(df_features.shape)


def get_side_label(candles, num_hours, target_ret, pt_sl):
    labeler = TripleBarrierLabeler(
        candles, num_hours=num_hours, num_minutes=1, verbose=False
    )
    side_labels = labeler.side_labels(pt=pt_sl, sl=pt_sl, target_ret=target_ret)
    candle_df = expand_labels(side_labels, candles, fill=0)
    return candle_df

In [None]:
df_features.to_parquet("data/features.parquet")

In [None]:
def objective(trial):
    label_encoder = LabelBinarizer()
    num_hours = trial.suggest_int("num_hours", 1, 24)
    target_ret = trial.suggest_float("target_ret", 0.0005, 0.05)
    pt_sl = trial.suggest_float("pt_sl", 0.1, 3)
    label_df = get_side_label(trading_15m, num_hours, target_ret, pt_sl)
    label_df = label_df.iloc[240:]
    side_features = df_features.iloc[240:]
    valid_mask = label_df["ret"].notna()
    label_df = label_df[valid_mask]
    side_features = side_features[valid_mask]

    train_test_split_point = int(len(label_df) * 0.75)
    train_features = side_features.iloc[:train_test_split_point]
    test_features = side_features.iloc[train_test_split_point:]
    train_labels = label_df.iloc[:train_test_split_point]
    test_labels = label_df.iloc[train_test_split_point:]

    train_labels = train_labels["bin"].astype(int)
    test_labels = test_labels["bin"].astype(int)
    if test_labels.nunique() <= 1:
        return 0

    params = {
        "objective": "multiclassova",
        "num_class": 3,
        "num_threads": -1,
        "verbose": -1,
        "boosting": "gbdt",
        "num_leaves": trial.suggest_int("num_leaves", 31, 200),
        "max_depth": trial.suggest_int("max_depth", 30, 100),
    }
    dtrain = lgb.Dataset(train_features, train_labels)
    # dtest = lgb.Dataset(test_features, test_labels)
    model = lgb.train(
        params,
        dtrain,
        num_boost_round=trial.suggest_int("num_boost_round", 300, 800),
    )
    label_encoder.fit(train_labels)
    pred_proba = model.predict(test_features)
    pred_labels = label_encoder.transform(np.argmax(pred_proba, axis=1) - 1)
    f1_array = f1_score(label_encoder.transform(test_labels), pred_labels, average=None)
    return np.mean(f1_array)


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=500, show_progress_bar=True, n_jobs=1)

In [None]:
from jesse import helpers, research

from offline.labeling.labeling import TripleBarrierLabeler, expand_labels

warmup_3m, trading_3m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "3m",
    helpers.date_to_timestamp("2020-06-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

num_minutes = 10
pt_sl = 0.9656765406603607
target_ret = 0.005978786042921383
labeler = TripleBarrierLabeler(trading_3m, num_minutes=10, verbose=True)
side_labels = labeler.side_labels(pt=pt_sl, sl=pt_sl, target_ret=target_ret)
print(side_labels["bin"].value_counts().sort_index().to_numpy())
candle_df = expand_labels(side_labels, trading_3m, fill=0)

In [None]:
candle_df.to_parquet("data/label_side.parquet")

# Tune meta model

In [None]:
import lightgbm as lgb
import optuna
import pandas as pd
from jesse import helpers, research
from sklearn.metrics import roc_auc_score

from custom_indicators.all_features import feature_bundle
from custom_indicators.selection import SIDE_1M, SIDE_3M, SIDE_15M
from offline.labeling.labeling import TripleBarrierLabeler, expand_labels

warmup_1m, trading_1m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2020-06-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
warmup_3m, trading_3m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "3m",
    helpers.date_to_timestamp("2020-06-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
warmup_15m, trading_15m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "15m",
    helpers.date_to_timestamp("2020-06-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

features_1m = feature_bundle(trading_1m, sequential=True)
features_1m = pd.DataFrame(
    {f"1m_{k}": v for k, v in features_1m.items()},
    index=pd.DatetimeIndex([helpers.timestamp_to_time(i) for i in trading_1m[:, 0]]),
)
features_1m = features_1m.resample("3T").agg({k: "last" for k in features_1m.columns})
features_3m = feature_bundle(trading_3m, sequential=True)
features_3m = pd.DataFrame(
    {f"3m_{k}": v for k, v in features_3m.items()},
    index=pd.DatetimeIndex([helpers.timestamp_to_time(i) for i in trading_3m[:, 0]]),
)
features_15m = feature_bundle(trading_15m, sequential=True)
features_15m = pd.DataFrame(
    {f"15m_{k}": v for k, v in features_15m.items()},
    index=pd.DatetimeIndex([helpers.timestamp_to_time(i) for i in trading_15m[:, 0]]),
)
features_15m = (
    features_15m.resample("3T").agg({k: "last" for k in features_15m.columns}).ffill()
)

df_features = pd.concat([features_1m, features_3m, features_15m], axis=1)
print(df_features.shape)

side_model = lgb.Booster(model_file="custom_indicators/models/model_side.txt")
side_features_col = SIDE_1M + SIDE_3M + SIDE_15M
side_model_pred = side_model.predict(df_features[side_features_col])
df_features["model_side_res"] = side_model_pred


def get_meta_label(candles, target_ret, pt, sl, num_minutes=0, num_hours=0):
    labeler = TripleBarrierLabeler(
        candles, num_minutes=num_minutes, num_hours=num_hours, verbose=False
    )
    side_labels = labeler.side_labels(pt=pt, sl=sl, target_ret=target_ret)
    side_full_df = expand_labels(side_labels, candles)
    side_full_df["pred"] = [1 if i > 0.5 else -1 for i in side_model_pred]
    meta_labels = labeler.meta_labels(side_full_df, pt=pt, sl=sl, target_ret=target_ret)
    candle_df = expand_labels(meta_labels, candles, fill=0)
    return candle_df

In [None]:
def objective(trial):
    num_minutes = trial.suggest_int("num_minutes", 10, 120)
    target_ret = trial.suggest_float("target_ret", 0.0005, 0.01)
    pt = trial.suggest_float("pt", 0.1, 1.2)
    sl = trial.suggest_float("sl", 0.1, 1.2)
    label_df = get_meta_label(trading_3m, num_minutes, target_ret, pt, sl)

    train_test_split_point = int(len(label_df) * 0.8)
    train_features = df_features.iloc[:train_test_split_point]
    test_features = df_features.iloc[train_test_split_point:]
    train_labels = label_df.iloc[:train_test_split_point]
    test_labels = label_df.iloc[train_test_split_point:]

    train_valid_mask = train_labels["ret"].notna().tolist()
    test_valid_mask = test_labels["ret"].notna().tolist()

    train_features = train_features[train_valid_mask]
    train_labels = train_labels[train_valid_mask]["bin"].astype(int)
    test_features = test_features[test_valid_mask]
    test_labels = test_labels[test_valid_mask]["bin"].astype(int)
    if test_labels.nunique() <= 1:
        return 0

    params = {
        "objective": "binary",
        "metric": "auc",
        "num_threads": -1,
        "verbose": -1,
        "early_stopping_round": 100,
        "boosting": "gbdt",
        "num_leaves": trial.suggest_int("num_leaves", 31, 200),
        "max_depth": trial.suggest_int("max_depth", 30, 100),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-8, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 200),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 1),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 1),
    }
    dtrain = lgb.Dataset(train_features, train_labels)
    dtest = lgb.Dataset(test_features, test_labels)
    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dtest],
        num_boost_round=trial.suggest_int("num_boost_round", 300, 800),
    )
    pred_proba = model.predict(test_features)
    auc = roc_auc_score(test_labels, pred_proba, average="weighted")
    return auc


study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=500, show_progress_bar=True, n_jobs=1)

In [None]:
{
    "num_minutes": 12,
    "target_ret": 0.00999169655418248,
    "pt": 1.0586409809460318,
    "sl": 0.6472137540480188,
    "num_leaves": 43,
    "max_depth": 71,
    "min_gain_to_split": 0.5374875726311118,
    "min_data_in_leaf": 46,
    "lambda_l1": 0.8426077111660407,
    "lambda_l2": 0.6195771592283434,
    "num_boost_round": 707,
}

In [None]:
df_meta_label = get_meta_label(
    trading_3m, 12, 0.00999169655418248, 1.0586409809460318, 0.6472137540480188
)
print(df_meta_label.shape)
df_meta_label.head()

In [None]:
df_meta_label.to_parquet("data/label_meta.parquet")