In [1]:
import sys
from pathlib import Path

sys.path.append(Path(".").parent.resolve().as_posix())

import numpy as np
from jesse import helpers, research
import jesse.indicators as ta

warmup_1m, trading_1m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2024-01-01"),
    helpers.date_to_timestamp("2024-12-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
trading_1m.shape

(525600, 6)

In [2]:
from research_tools import TrailingStopLabel

trailing_stop_label = TrailingStopLabel(trading_1m, n_bar=15, k=1.5)
np.unique(trailing_stop_label.labels, return_counts=True)

(array([-1,  0,  1], dtype=int8), array([128779, 266633, 130188]))

In [3]:
np.unique(trailing_stop_label.bar_duration, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16]),
 array([266633,  29877,  42600,  37711,  30973,  24774,  19873,  15915,
         12648,  10477,   8395,   6968,   5776,   4625,   3934,   3320,
          1101]))

# 特征工程

In [4]:
from custom_indicators.td_sequential import td_sequential

st_trend, st_changed = ta.supertrend(trading_1m, sequential=True)
fe_supertrend = (helpers.get_candle_source(trading_1m, "close") / st_trend).reshape(
    -1, 1
)

kama = ta.kama(trading_1m, sequential=True)
fe_kama = (helpers.get_candle_source(trading_1m, "close") / kama).reshape(-1, 1)

lrsi = ta.lrsi(trading_1m, sequential=True).reshape(-1, 1)

boll_upper, boll_middle, boll_lower = ta.bollinger_bands(trading_1m, sequential=True)
fe_boll_upper = (helpers.get_candle_source(trading_1m, "close") / boll_upper).reshape(
    -1, 1
)
fe_boll_middle = (helpers.get_candle_source(trading_1m, "close") / boll_middle).reshape(
    -1, 1
)
fe_boll_lower = (helpers.get_candle_source(trading_1m, "close") / boll_lower).reshape(
    -1, 1
)

boll_width = ta.bollinger_bands_width(trading_1m, sequential=True).reshape(-1, 1)

atr = ta.atr(trading_1m, sequential=True).reshape(-1, 1)
vwap = ta.vwap(trading_1m, anchor="h", sequential=True)
fe_vwap = (helpers.get_candle_source(trading_1m, "close") / vwap).reshape(-1, 1)

sar = ta.sar(trading_1m, sequential=True)
fe_sar = (helpers.get_candle_source(trading_1m, "close") / sar).reshape(-1, 1)

td_buy, td_sell = td_sequential(trading_1m, sequential=True)
fe_td_buy = td_buy.reshape(-1, 1)
fe_td_sell = td_sell.reshape(-1, 1)

final_fe = np.concatenate(
    [
        fe_supertrend,
        fe_kama,
        lrsi,
        fe_boll_upper,
        fe_boll_middle,
        fe_boll_lower,
        boll_width,
        atr,
        fe_vwap,
        fe_sar,
    ],
    axis=1,
)
final_fe.shape

  fe_supertrend = (helpers.get_candle_source(trading_1m, "close") / st_trend).reshape(


(525600, 10)

In [19]:
np.diff(kama, prepend=np.nan).shape

(525600,)

In [5]:
invalid_len = (np.isfinite(final_fe) == False).sum(axis=0).max()
invalid_len

30

# 模型

In [7]:
train_len = trading_1m[
    trading_1m[:, 0] < helpers.date_to_timestamp("2024-06-30")
].shape[0]
vali_len = trading_1m[
    (trading_1m[:, 0] >= helpers.date_to_timestamp("2024-06-30"))
    & (trading_1m[:, 0] < helpers.date_to_timestamp("2024-09-30"))
].shape[0]
test_len = trading_1m[
    trading_1m[:, 0] >= helpers.date_to_timestamp("2024-09-30")
].shape[0]
assert train_len + vali_len + test_len == trading_1m.shape[0]

train_start, train_end = invalid_len, train_len
vali_start, vali_end = invalid_len * 2 + train_len, train_len + vali_len
test_start = invalid_len * 2 + train_len + vali_len

train_fe = final_fe[train_start:train_end]
vali_fe = final_fe[vali_start:vali_end]
test_fe = final_fe[test_start:]

train_label = trailing_stop_label.labels[train_start:train_end]
train_weights = trailing_stop_label.realized_r[train_start:train_end] * 100
vali_label = trailing_stop_label.labels[vali_start:vali_end]
vali_weights = trailing_stop_label.realized_r[vali_start:vali_end] * 100
test_label = trailing_stop_label.labels[test_start:]
test_weights = trailing_stop_label.realized_r[test_start:] * 100

train_fe = train_fe[train_label != 0]
train_label = (train_label[train_label != 0] == 1).astype(int)

vali_fe = vali_fe[vali_label != 0]
vali_label = (vali_label[vali_label != 0] == 1).astype(int)

test_fe = test_fe[test_label != 0]
test_label = (test_label[test_label != 0] == 1).astype(int)

# 验证测试集中中性特征的预测结果
test_fe_extra = test_fe[test_label == 0]

print(train_fe.shape, train_label.shape)
print(vali_fe.shape, vali_label.shape)
print(test_fe.shape, test_label.shape)
print(test_fe_extra.shape)

(127215, 10) (127215,)
(64012, 10) (64012,)
(67671, 10) (67671,)
(34059, 10)


In [11]:
import lightgbm as lgb

train_data = lgb.Dataset(train_fe, label=train_label)
vali_data = lgb.Dataset(vali_fe, label=vali_label)
test_data = lgb.Dataset(test_fe, label=test_label)

param = {
    "objective": "binary",
    "boosting_type": "gbdt",
    "num_iterations": 1000,
    "learning_rate": 0.05,
    "num_leaves": 31,
    "max_depth": -1,
    "min_data_in_leaf": 20,
    "bagging_fraction": 0.8,
    "bagging_freq": 1,
    "feature_fraction": 0.8,
    "lambda_l1": 0.0,
    "lambda_l2": 0.0,
    "early_stopping_round": 50,
    "max_bin": 2048,
    "data_sample_strategy": "bagging",
    "metric": "auc,average_precision,binary_logloss",
    "verbose": 1,
}

model = lgb.train(param, train_data, valid_sets=[vali_data])

[LightGBM] [Info] Number of positive: 64389, number of negative: 62826
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001335 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20480
[LightGBM] [Info] Number of data points in the train set: 127215, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.506143 -> initscore=0.024574
[LightGBM] [Info] Start training from score 0.024574
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[28]	valid_0's auc: 0.543176	valid_0's average_precision: 0.534383	valid_0's binary_logloss: 0.690362


In [15]:
model.feature_importance(importance_type="split")

array([ 72,  81,  78,  69,  54,  65, 110, 134,  80,  97], dtype=int32)