In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side_1h.npy")[:, :-1]
dollar_bar_15m = np.load("data/dollar_bar_15m.npy")
dollar_bar_4h = np.load("data/dollar_bar_4h.npy")
print(label_dollar_bar.shape, dollar_bar_15m.shape, dollar_bar_4h.shape)

(44867, 6) (177382, 6) (11225, 6)


In [2]:
import gc

import pandas as pd
from jesse import helpers

from custom_indicators.all_features import feature_bundle

SHORT_TERM = "15m"
MID_TERM = "1h"
LONG_TERM = "4h"

feature_mid = feature_bundle(label_dollar_bar, sequential=True)
feature_mid = {f"{MID_TERM}_{k}": v for k, v in feature_mid.items()}
df_feature_mid = pd.DataFrame(feature_mid)
print(df_feature_mid.shape)
df_feature_mid.head(1)

(44867, 3947)


Unnamed: 0,1h_ac_0,1h_ac_1,1h_ac_2,1h_ac_3,1h_ac_4,1h_ac_5,1h_ac_6,1h_ac_7,1h_ac_8,1h_ac_9,...,1h_williams_r_ddt_lag30,1h_williams_r_ddt_lag31,1h_williams_r_ddt_lag32,1h_williams_r_ddt_lag33,1h_williams_r_ddt_lag34,1h_williams_r_ddt_lag35,1h_williams_r_ddt_lag36,1h_williams_r_ddt_lag37,1h_williams_r_ddt_lag38,1h_williams_r_ddt_lag39
0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,,,,,,,,,,


In [3]:
# 根据mid term的每个时间点，用1min bar直接构建每个时间点的dollar bar快照
from tqdm.auto import tqdm

from custom_indicators.config import (
    DOLLAR_BAR_THRESHOLD_LONG,
    DOLLAR_BAR_THRESHOLD_SHORT,
)
from custom_indicators.toolbox.dollar_bar import build_dollar_bar

candles_1m = np.load("data/btc_1m.npy")

df_feature_long = None
df_feature_short = None

for tp in tqdm(label_dollar_bar[:, 0]):
    dollar_bar_long = build_dollar_bar(
        candles_1m[candles_1m[:, 0] <= tp],
        DOLLAR_BAR_THRESHOLD_LONG,
        max_bars=5000,
    )

    try:
        feature_long = feature_bundle(dollar_bar_long, sequential=False)
        feature_long = {f"{LONG_TERM}_{k}": v for k, v in feature_long.items()}
    except:
        continue

    if df_feature_long is None:
        df_feature_long = pd.DataFrame(feature_long)
    else:
        df_feature_long = pd.concat(
            [df_feature_long, pd.DataFrame(feature_long)], axis=0, ignore_index=True
        )

    dollar_bar_short = build_dollar_bar(
        candles_1m[candles_1m[:, 0] <= tp],
        DOLLAR_BAR_THRESHOLD_SHORT,
        max_bars=5000,
    )

    try:
        feature_short = feature_bundle(dollar_bar_short, sequential=False)
        feature_short = {f"{SHORT_TERM}_{k}": v for k, v in feature_short.items()}
    except:
        continue

    if df_feature_short is None:
        df_feature_short = pd.DataFrame(feature_short)
    else:
        df_feature_short = pd.concat(
            [df_feature_short, pd.DataFrame(feature_short)], axis=0, ignore_index=True
        )

print(df_feature_long.shape)
print(df_feature_short.shape)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 44867/44867 [1:56:42<00:00,  6.41it/s]  


(44662, 3947)


100%|██████████| 44867/44867 [1:55:35<00:00,  6.47it/s]  

(44856, 3947)





In [4]:
rows_to_add = len(df_feature_mid) - len(df_feature_long)
if rows_to_add > 0:
    empty_rows = pd.DataFrame(
        np.nan, index=range(rows_to_add), columns=df_feature_long.columns
    )
    df_feature_long_aligned = pd.concat(
        [empty_rows, df_feature_long], ignore_index=True
    )

rows_to_add = len(df_feature_mid) - len(df_feature_short)
if rows_to_add > 0:
    empty_rows = pd.DataFrame(
        np.nan, index=range(rows_to_add), columns=df_feature_short.columns
    )
    df_feature_short_aligned = pd.concat(
        [empty_rows, df_feature_short], ignore_index=True
    )

assert len(df_feature_long_aligned) == len(df_feature_mid)
assert len(df_feature_short_aligned) == len(df_feature_mid)

In [5]:
df_features = pd.concat(
    [df_feature_short_aligned, df_feature_mid, df_feature_long_aligned], axis=1
)
print(df_features.shape)
df_features.head(1)

(44867, 11841)


Unnamed: 0,15m_ac_0,15m_ac_1,15m_ac_2,15m_ac_3,15m_ac_4,15m_ac_5,15m_ac_6,15m_ac_7,15m_ac_8,15m_ac_9,...,4h_williams_r_ddt_lag30,4h_williams_r_ddt_lag31,4h_williams_r_ddt_lag32,4h_williams_r_ddt_lag33,4h_williams_r_ddt_lag34,4h_williams_r_ddt_lag35,4h_williams_r_ddt_lag36,4h_williams_r_ddt_lag37,4h_williams_r_ddt_lag38,4h_williams_r_ddt_lag39
0,,,,,,,,,,,...,,,,,,,,,,


In [6]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

4h_dft_dom_cycle_ddt_lag39    364
4h_dft_dom_cycle_ddt_lag38    360
4h_dft_dom_cycle_dt_lag39     360
4h_dft_dom_cycle_ddt_lag37    356
4h_dft_dom_cycle_lag39        356
                             ... 
1h_ac_23                        0
1h_ac_22                        0
1h_mod_stochastic               0
1h_trendflex                    0
1h_ac_31                        0
Length: 11841, dtype: int64

In [7]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")