In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_dollar_bar_45m.npy")[:, :-1]
dollar_bar_10m = np.load("data/dollar_bar_10m.npy")
dollar_bar_4h = np.load("data/dollar_bar_4h.npy")
print(label_dollar_bar.shape, dollar_bar_10m.shape, dollar_bar_4h.shape)

(59137, 6) (268408, 6) (11225, 6)


In [2]:
from jesse import helpers
import pandas as pd
from custom_indicators.all_features import feature_bundle

SHORT_TERM = "10m"
MID_TERM = "45m"
LONG_TERM = "4h"

features_short = {
    f"{SHORT_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_10m, sequential=True).items()
}
features_mid = {
    f"{MID_TERM}_{k}": v
    for k, v in feature_bundle(label_dollar_bar, sequential=True).items()
}
features_long = {
    f"{LONG_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_4h, sequential=True).items()
}

df_short = pd.DataFrame(
    features_short,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in dollar_bar_10m[:, 0]]
    ),
)
df_mid = pd.DataFrame(
    features_mid,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in label_dollar_bar[:, 0]]
    ),
)
df_long = pd.DataFrame(
    features_long,
    index=pd.DatetimeIndex([helpers.timestamp_to_time(t) for t in dollar_bar_4h[:, 0]]),
)

In [3]:
df_features = pd.concat([df_short, df_mid, df_long], axis=1)
df_features = df_features[df_features[f"{MID_TERM}_acr"].notna()].ffill()
df_features.shape

(59137, 11841)

In [4]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

4h_williams_r_ddt_lag39         538
4h_adaptive_stochastic_lag28    538
4h_adaptive_stochastic_lag21    538
4h_adaptive_stochastic_lag22    538
4h_adaptive_stochastic_lag23    538
                               ... 
45m_comb_spectrum_pwr_38          0
45m_comb_spectrum_pwr_37          0
45m_comb_spectrum_pwr_36          0
45m_comb_spectrum_pwr_35          0
45m_comb_spectrum_pwr_28          0
Length: 11841, dtype: int64

In [5]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")