In [2]:
from custom_indicators.all_features import feature_bundle
import optuna
import numpy as np
import pandas as pd
from jesse import research, helpers
import lightgbm as lgb
from sklearn.metrics import roc_auc_score

from offline.labeling.labeling import (
    TripleBarrierLabeler,
    expand_labels,
)

warmup_1m, trading_1m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2020-01-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
warmup_3m, trading_3m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "3m",
    helpers.date_to_timestamp("2020-01-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
warmup_15m, trading_15m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "15m",
    helpers.date_to_timestamp("2020-01-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

features_1m = feature_bundle(trading_1m, sequential=True)
features_1m = {f"1m_{k}": v[::3] for k, v in features_1m.items()}
features_3m = feature_bundle(trading_3m, sequential=True)
features_3m = {f"3m_{k}": v for k, v in features_3m.items()}
features_15m = feature_bundle(trading_15m, sequential=True)
features_15m = {f"15m_{k}": np.repeat(v, 5) for k, v in features_15m.items()}
df_features = pd.DataFrame({**features_1m, **features_3m, **features_15m})

def get_label(candles, num_minutes, target_ret, pt_sl):
    labeler = TripleBarrierLabeler(
        candles, num_minutes=num_minutes, verbose=False
    )
    side_labels = labeler.side_labels(pt=pt_sl, sl=pt_sl, target_ret=target_ret)
    meta_labels = labeler.meta_labels(side_labels, pt=pt_sl, sl=pt_sl, target_ret=target_ret)
    candle_df = expand_labels(meta_labels, candles, fill=0)
    return candle_df

def objective(trial):
    num_minutes = trial.suggest_int("num_minutes", 10, 120)
    target_ret = trial.suggest_float("target_ret", 0.0005, 0.01)
    pt_sl = trial.suggest_float("pt_sl", 0.1, 1.5)
    label_df = get_label(trading_3m, num_minutes, target_ret, pt_sl)

    train_test_split_point = int(len(label_df) * 0.8)
    train_features = df_features.iloc[:train_test_split_point]
    test_features = df_features.iloc[train_test_split_point:]
    train_labels = label_df.iloc[:train_test_split_point]
    test_labels = label_df.iloc[train_test_split_point:]

    train_valid_mask = train_labels['ret'].notna().tolist()
    test_valid_mask = test_labels['ret'].notna().tolist()

    train_features = train_features[train_valid_mask]
    train_labels = train_labels[train_valid_mask]['bin'].astype(int)
    test_features = test_features[test_valid_mask]
    test_labels = test_labels[test_valid_mask]['bin'].astype(int)

    params = {
        "objective": "binary",
        "metric": "auc",
        "num_threads": -1,
        "verbose": -1,
        "early_stopping_round": 100,
        "boosting": "gbdt",
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 30, 100),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 1e-6, 1),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 200),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-4, 1),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-4, 1),
    }
    dtrain = lgb.Dataset(train_features, train_labels)
    dtest = lgb.Dataset(test_features, test_labels)
    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dtest],
        num_boost_round=trial.suggest_int("num_boost_round", 300, 1000),
    )
    pred_proba = model.predict(test_features)
    auc = roc_auc_score(test_labels, pred_proba, average="weighted")
    return auc

study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=3),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=1000, show_progress_bar=True, n_jobs=1)

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=3),
[I 2025-02-19 00:13:56,879] A new study created in memory with name: no-name-16c2bdfb-4d57-422b-a720-abfb035ca3da
  0%|          | 0/1000 [07:41<?, ?it/s]

[W 2025-02-19 00:21:38,388] Trial 0 failed with parameters: {'num_minutes': 109, 'target_ret': 0.0008411962528446362, 'pt_sl': 0.15811996827219882} because of the following error: ValueError('Item wrong length 2139264 instead of 891360.').
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jesse/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/2g/md1t2j7s6hjdjc5jlvkmn7_c0000gn/T/ipykernel_42268/3930331556.py", line 77, in objective
    train_features = train_features[train_valid_mask]
                     ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jesse/lib/python3.11/site-packages/pandas/core/frame.py", line 4093, in __getitem__
    return self._getitem_bool_array(key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jesse/lib/python3.11/site-packag




ValueError: Item wrong length 2139264 instead of 891360.

In [16]:
features_1m['1m_conv_0'].shape

(891360,)

In [17]:
features_3m['3m_conv_0'].shape

(891360,)

In [18]:
features_15m['15m_conv_0'].shape

(891360,)