In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side_25m.npy")[:, :-1]
dollar_bar_10m = np.load("data/dollar_bar_10m.npy")
dollar_bar_2h = np.load("data/dollar_bar_2h.npy")
print(label_dollar_bar.shape, dollar_bar_10m.shape, dollar_bar_2h.shape)

(106174, 6) (268413, 6) (22243, 6)


In [2]:
import gc

import pandas as pd
from jesse import helpers

from custom_indicators.all_features import feature_bundle

SHORT_TERM = "10m"
MID_TERM = "25m"
LONG_TERM = "2h"

features_short = {
    f"{SHORT_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_10m, sequential=True).items()
}
df_short = pd.DataFrame(
    features_short,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in dollar_bar_10m[:, 0]]
    ),
)
del features_short, dollar_bar_10m
gc.collect()

features_mid = {
    f"{MID_TERM}_{k}": v
    for k, v in feature_bundle(label_dollar_bar, sequential=True).items()
}
df_mid = pd.DataFrame(
    features_mid,
    index=pd.DatetimeIndex(
        [helpers.timestamp_to_time(t) for t in label_dollar_bar[:, 0]]
    ),
)
del features_mid, label_dollar_bar
gc.collect()

features_long = {
    f"{LONG_TERM}_{k}": v
    for k, v in feature_bundle(dollar_bar_2h, sequential=True).items()
}
df_long = pd.DataFrame(
    features_long,
    index=pd.DatetimeIndex([helpers.timestamp_to_time(t) for t in dollar_bar_2h[:, 0]]),
)
del features_long, dollar_bar_2h
gc.collect()

df_features = pd.concat([df_short, df_mid, df_long], axis=1)
df_features = df_features[df_features[f"{MID_TERM}_acr"].notna()].ffill()
df_features.shape

(106174, 11841)

In [3]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

2h_williams_r_ddt_lag39               808
2h_evenbetter_sinewave_short_lag28    808
2h_evenbetter_sinewave_short_lag20    808
2h_evenbetter_sinewave_short_lag21    808
2h_evenbetter_sinewave_short_lag22    808
                                     ... 
25m_comb_spectrum_pwr_9                 0
25m_comb_spectrum_pwr_8                 0
25m_comb_spectrum_pwr_7                 0
25m_comb_spectrum_pwr_6                 0
25m_ac_37                               0
Length: 11841, dtype: int64

In [4]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")