In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side_14m.npy")[:, :-1]
dollar_bar_7m = np.load("data/dollar_bar_7m.npy")
dollar_bar_84m = np.load("data/dollar_bar_84m.npy")
print(label_dollar_bar.shape, dollar_bar_7m.shape, dollar_bar_84m.shape)

(190209, 6) (379735, 6) (32006, 6)


In [2]:
import pandas as pd
from jesse import helpers
import gc

from custom_indicators.all_features import feature_bundle

SHORT_TERM = "7m"
MID_TERM = "14m"
LONG_TERM = "84m"

features_short = {
    f"{SHORT_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_7m, sequential=True).items()
}
df_short = pd.DataFrame(
    features_short,
    index=pd.DatetimeIndex([helpers.timestamp_to_time(t) for t in dollar_bar_7m[:, 0]]),
)
del features_short, dollar_bar_7m
gc.collect()

features_mid = {
    f"{MID_TERM}_{k}": v
    for k, v in feature_bundle(label_dollar_bar, sequential=True).items()
}
df_mid = pd.DataFrame(
    features_mid,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in label_dollar_bar[:, 0]]
    ),
)
del features_mid, label_dollar_bar
gc.collect()

features_long = {
    f"{LONG_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_84m, sequential=True).items()
}
df_long = pd.DataFrame(
    features_long,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in dollar_bar_84m[:, 0]]
    ),
)
del features_long, dollar_bar_84m
gc.collect()

df_features = pd.concat([df_short, df_mid, df_long], axis=1)
df_features = df_features[df_features[f"{MID_TERM}_acr"].notna()].ffill()
df_features.shape

(190209, 6381)

In [3]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

84m_williams_r_ddt_lag19       694
84m_natr_dt_lag19              694
84m_dft_dom_cycle_ddt_lag16    694
84m_dft_dom_cycle_ddt_lag15    694
84m_dft_dom_cycle_ddt_lag14    694
                              ... 
14m_conv_1                       0
14m_conv_2                       0
14m_conv_3                       0
14m_vwap                         0
14m_conv_14                      0
Length: 6381, dtype: int64

In [4]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")