In [1]:
import sys
from pathlib import Path

sys.path.append(Path(".").parent.resolve().as_posix())

import jesse.indicators as ta
import numpy as np
from jesse import helpers, research

warmup_1m, trading_1m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2021-01-01"),
    helpers.date_to_timestamp("2024-12-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)
trading_1m.shape

(2102400, 6)

In [2]:
from research_tools import TrailingStopLabel

trailing_stop_label = TrailingStopLabel(trading_1m, n_bar=15, k=1)
np.unique(trailing_stop_label.labels, return_counts=True)

(array([-1,  0,  1], dtype=int8), array([972092, 163062, 967246]))

In [3]:
trailing_stop_label.return_of_label

7.0287453215383495

# 特征工程

In [4]:
from custom_indicators.all_features import feature_matrix

final_fe = feature_matrix(trading_1m, sequential=True)
final_fe.features.shape

(2102400, 394)

In [5]:
invalid_len = (np.isfinite(final_fe.features) == False).sum(axis=0).max()
invalid_len

48

# 模型

In [5]:
invalid_len = 240

train_len = trading_1m[
    trading_1m[:, 0] < helpers.date_to_timestamp("2024-06-30")
].shape[0]
vali_len = trading_1m[
    (trading_1m[:, 0] >= helpers.date_to_timestamp("2024-06-30"))
    & (trading_1m[:, 0] < helpers.date_to_timestamp("2024-09-30"))
].shape[0]
test_len = trading_1m[
    trading_1m[:, 0] >= helpers.date_to_timestamp("2024-09-30")
].shape[0]
assert train_len + vali_len + test_len == trading_1m.shape[0]

train_start, train_end = invalid_len, train_len
vali_start, vali_end = invalid_len + train_len, train_len + vali_len
test_start = invalid_len + train_len + vali_len

train_fe = final_fe.features[train_start:train_end]
vali_fe = final_fe.features[vali_start:vali_end]
test_fe = final_fe.features[test_start:]

train_label = trailing_stop_label.labels[train_start:train_end]
train_weights = np.abs(trailing_stop_label.realized_r[train_start:train_end] * 100)
vali_label = trailing_stop_label.labels[vali_start:vali_end]
vali_weights = np.abs(trailing_stop_label.realized_r[vali_start:vali_end] * 100)
test_label = trailing_stop_label.labels[test_start:]
test_weights = np.abs(trailing_stop_label.realized_r[test_start:] * 100)

train_fe = train_fe[train_label != 0]
train_weights = train_weights[train_label != 0]
train_label = (train_label[train_label != 0] == 1).astype(int)

vali_fe = vali_fe[vali_label != 0]
vali_weights = vali_weights[vali_label != 0]
vali_label = (vali_label[vali_label != 0] == 1).astype(int)

# 验证测试集中中性特征的预测结果
test_fe_extra = test_fe[test_label == 0]

test_fe = test_fe[test_label != 0]
test_weights = test_weights[test_label != 0]
test_label = (test_label[test_label != 0] == 1).astype(int)


print(train_fe.shape, train_label.shape, train_weights.shape)
print(vali_fe.shape, vali_label.shape, vali_weights.shape)
print(test_fe.shape, test_label.shape, test_weights.shape)
print(test_fe_extra.shape)

(1685465, 394) (1685465,) (1685465,)
(125939, 394) (125939,) (125939,)
(127292, 394) (127292,) (127292,)
(4948, 394)


In [9]:
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score

model = LGBMClassifier(
    boosting_type="gbdt",
    num_leaves=300,
    max_depth=20,
    learning_rate=0.05,
    n_estimators=300,
    reg_alpha=1.0,
    reg_lambda=1.0,
    n_jobs=-1,
)

model.fit(train_fe, train_label)
auc_score = roc_auc_score(vali_label, model.predict_proba(vali_fe)[:, 1])
auc_score

[LightGBM] [Info] Number of positive: 840377, number of negative: 845088
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.371832 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100217
[LightGBM] [Info] Number of data points in the train set: 1685465, number of used features: 394
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498602 -> initscore=-0.005590
[LightGBM] [Info] Start training from score -0.005590


0.5215815804854896

In [15]:
model.feature_importance(importance_type="split")

array([ 72,  81,  78,  69,  54,  65, 110, 134,  80,  97], dtype=int32)