In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side_45m.npy")[:, :-1]
dollar_bar_15m = np.load("data/dollar_bar_15m.npy")
dollar_bar_4h = np.load("data/dollar_bar_4h.npy")
print(label_dollar_bar.shape, dollar_bar_15m.shape, dollar_bar_4h.shape)

(59138, 6) (177384, 6) (11226, 6)


In [2]:
import gc

import pandas as pd
from jesse import helpers

from custom_indicators.all_features import feature_bundle

SHORT_TERM = "15m"
MID_TERM = "45m"
LONG_TERM = "4h"

features_short = {
    f"{SHORT_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_15m, sequential=True).items()
}
df_short = pd.DataFrame(
    features_short,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in dollar_bar_15m[:, 0]]
    ),
)
del features_short, dollar_bar_15m
gc.collect()

features_mid = {
    f"{MID_TERM}_{k}": v
    for k, v in feature_bundle(label_dollar_bar, sequential=True).items()
}
df_mid = pd.DataFrame(
    features_mid,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in label_dollar_bar[:, 0]]
    ),
)
del features_mid, label_dollar_bar
gc.collect()

features_long = {
    f"{LONG_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_4h, sequential=True).items()
}
df_long = pd.DataFrame(
    features_long,
    index=pd.DatetimeIndex([helpers.timestamp_to_time(t) for t in dollar_bar_4h[:, 0]]),
)
del features_long, dollar_bar_4h
gc.collect()

df_features = pd.concat([df_short, df_mid, df_long], axis=1)
df_features = df_features[df_features[f"{MID_TERM}_acr"].notna()].ffill()
df_features.shape

(59138, 11841)

In [3]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

4h_williams_r_ddt_lag39         538
4h_acc_swing_index_lag22        538
4h_hurst_coef_fast_ddt_lag21    538
4h_hurst_coef_fast_ddt_lag20    538
4h_hurst_coef_fast_ddt_lag19    538
                               ... 
45m_mod_stochastic                0
45m_conv_5                        0
45m_conv_4                        0
45m_conv_3                        0
45m_ac_45                         0
Length: 11841, dtype: int64

In [4]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")