In [1]:
import numpy as np
from jesse import helpers, research

from custom_indicators.all_features import feature_bundle

_, trading_1m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2021-01-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

_, trading_3m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "3m",
    helpers.date_to_timestamp("2021-01-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

_, trading_15m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "15m",
    helpers.date_to_timestamp("2021-01-01"),
    helpers.date_to_timestamp("2025-01-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

In [2]:
features_1m = feature_bundle(trading_1m, sequential=True)
features_1m = {f"1m_{k}": v[::3] for k, v in features_1m.items()}
features_3m = feature_bundle(trading_3m, sequential=True)
features_15m = feature_bundle(trading_15m, sequential=True)
features_15m = {f"15m_{k}": np.repeat(v, 5) for k, v in features_15m.items()}

len_3m = trading_3m.shape[0]
for k, v in features_1m.items():
    assert len(v) == len_3m, f"{k} has length {len(v)} not {len_3m}"
for k, v in features_3m.items():
    assert len(v) == len_3m, f"{k} has length {len(v)} not {len_3m}"
for k, v in features_15m.items():
    assert len(v) == len_3m, f"{k} has length {len(v)} not {len_3m}"

In [3]:
import pandas as pd

df_features = pd.DataFrame({**features_1m, **features_3m, **features_15m})
print(df_features.shape)
df_features.head()

(715680, 1263)


Unnamed: 0,1m_acc_swing_index,1m_acc_swing_index_lag1,1m_acc_swing_index_lag2,1m_acc_swing_index_lag3,1m_acc_swing_index_dt,1m_acc_swing_index_ddt,1m_ehlers_early_onset_trend,1m_ehlers_early_onset_trend_lag1,1m_ehlers_early_onset_trend_lag2,1m_ehlers_early_onset_trend_lag3,...,15m_swamicharts_stochastic_34,15m_swamicharts_stochastic_35,15m_swamicharts_stochastic_36,15m_swamicharts_stochastic_37,15m_swamicharts_stochastic_38,15m_swamicharts_stochastic_39,15m_swamicharts_stochastic_40,15m_swamicharts_stochastic_41,15m_swamicharts_stochastic_42,15m_swamicharts_stochastic_43
0,0.0,,,,,,0.0,,,,...,,,,,,,,,,
1,0.0285,0.036568,0.048011,0.0,-0.008068,0.003376,-1.0,-1.0,0.0,0.0,...,,,,,,,,,,
2,-0.011203,-0.023382,0.026734,0.0285,0.012179,0.062296,-1.0,-1.0,-1.0,-1.0,...,,,,,,,,,,
3,-0.121147,-0.070155,-0.020838,-0.011203,-0.050992,-0.001675,-1.0,-1.0,-1.0,-1.0,...,,,,,,,,,,
4,-0.129019,-0.160702,-0.119638,-0.121147,0.031683,0.072747,-1.0,-1.0,-1.0,-1.0,...,,,,,,,,,,


In [None]:
df_features.to_csv("data/features_d002.csv.zst", index=False)

In [4]:
df_label = pd.read_csv("data/label_d002.csv.zst", index_col=0)
print(df_label.shape)
df_label.head()

(715680, 5)


Unnamed: 0,close,ret,trgt,bin,side
2021-01-01 00:00:00+00:00,29016.23,,,0.0,0.0
2021-01-01 00:03:00+00:00,28965.02,,,0.0,0.0
2021-01-01 00:06:00+00:00,28928.06,,,0.0,0.0
2021-01-01 00:09:00+00:00,28851.82,,,0.0,0.0
2021-01-01 00:12:00+00:00,28786.75,,,0.0,0.0


In [10]:
meta_mask = df_label["ret"].notna().tolist()
side_mask = (df_label["side"].astype(int) != 0).tolist()

meta_label = df_label[meta_mask]["bin"].astype(np.int8).reset_index(drop=True)
meta_label.name = "label"
side_label = (
    (df_label[side_mask]["side"].astype(int) == 1)
    .astype(np.int8)
    .reset_index(drop=True)
)
side_label.name = "label"

meta_features = df_features[meta_mask].reset_index(drop=True)
side_features = df_features[side_mask].reset_index(drop=True)

assert meta_features.shape[0] == meta_label.shape[0]
assert side_features.shape[0] == side_label.shape[0]

invalid_len = 300
meta_features = meta_features.iloc[invalid_len:]
meta_label = meta_label.iloc[invalid_len:]
side_features = side_features.iloc[invalid_len:]
side_label = side_label.iloc[invalid_len:]

meta_split = int(meta_features.shape[0] * 0.7)
side_split = int(side_features.shape[0] * 0.7)

meta_x_train = meta_features.iloc[:meta_split]
meta_x_test = meta_features.iloc[meta_split:]
meta_y_train = meta_label.iloc[:meta_split]
meta_y_test = meta_label.iloc[meta_split:]

assert meta_x_train.shape[0] == meta_y_train.shape[0]
assert meta_x_test.shape[0] == meta_y_test.shape[0]

side_x_train = side_features.iloc[:side_split]
side_x_test = side_features.iloc[side_split:]
side_y_train = side_label.iloc[:side_split]
side_y_test = side_label.iloc[side_split:]

assert side_x_train.shape[0] == side_y_train.shape[0]
assert side_x_test.shape[0] == side_y_test.shape[0]

In [11]:
# meta feature selection
from featurewiz import FeatureWiz

fwiz_meta = FeatureWiz(corr_limit=0.95, verbose=0, imbalanced=True)
fwiz_meta.fit(meta_x_train, meta_y_train)
fwiz_meta.transform(meta_x_test)
fwiz_meta.features

featurewiz is given 0.9 as correlation limit...
    Skipping feature engineering since no feature_engg input...
    final list of category encoders given: ['label', 'label']
    final list of scalers given: []
Loaded input data. Shape = (373650, 1263)
#### Starting featurewiz transform for test data ####
Loaded input data. Shape = (160137, 1263)
#### Starting lazytransform for test data ####
LazyTransformer has not been fit yet. Fit it first and try again.


TypeError: object of type 'NoneType' has no len()

In [None]:
# side feature selection
fwiz_side = FeatureWiz(corr_limit=0.99, verbose=0)
fwiz_side.fit(side_x_train, side_y_train)
fwiz_side.transform(side_x_test)
fwiz_side.features

In [6]:
import json

with open("offline/fwiz.json", "w") as f:
    json.dump(sorted(fwiz.features), f)