In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side.npy")[:, :-1]
print(label_dollar_bar.shape)

(11332, 6)


In [3]:
import pandas as pd

from custom_indicators.all_features import feature_bundle

SHORT_TERM = "1h"
MID_TERM = "4h"
LONG_TERM = "16h"

feature_mid = feature_bundle(label_dollar_bar, sequential=True)
feature_mid = {f"{MID_TERM}_{k}": v for k, v in feature_mid.items()}
df_feature_mid = pd.DataFrame(feature_mid)
print(df_feature_mid.shape)
df_feature_mid.head(1)

(11332, 3947)


Unnamed: 0,4h_ac_0,4h_ac_1,4h_ac_2,4h_ac_3,4h_ac_4,4h_ac_5,4h_ac_6,4h_ac_7,4h_ac_8,4h_ac_9,...,4h_williams_r_ddt_lag30,4h_williams_r_ddt_lag31,4h_williams_r_ddt_lag32,4h_williams_r_ddt_lag33,4h_williams_r_ddt_lag34,4h_williams_r_ddt_lag35,4h_williams_r_ddt_lag36,4h_williams_r_ddt_lag37,4h_williams_r_ddt_lag38,4h_williams_r_ddt_lag39
0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,,,,,,,,,,


In [4]:
# 根据mid term的每个时间点，用1min bar直接构建每个时间点的dollar bar快照
from tqdm.auto import tqdm

from custom_indicators.toolbox.bar.dollar_bar import build_dollar_bar
from model.config import DOLLAR_BAR_THRESHOLD_LONG, DOLLAR_BAR_THRESHOLD_SHORT

candles_1m = np.load("data/btc_1m.npy")


def build_bar(candles, threshold, tp_list, func, prefix, max_bars=5000):
    df_feature = []
    for tp in tqdm(tp_list, desc=f"Building {prefix} bars"):
        bars = func(candles[candles[:, 0] <= tp], threshold, max_bars=max_bars)
        try:
            feature = feature_bundle(bars, sequential=False)
            feature = {f"{prefix}_{k}": v[-1] for k, v in feature.items()}
        except:
            continue

        df_feature.append(feature)

    df_feature = pd.DataFrame.from_records(df_feature)

    print(df_feature.shape)

    return df_feature


df_dollar_bar_long = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_LONG,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    LONG_TERM,
)

df_dollar_bar_short = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_SHORT,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    SHORT_TERM,
)

print(f"{df_dollar_bar_long.shape = }")
print(f"{df_dollar_bar_short.shape = }")

  from .autonotebook import tqdm as notebook_tqdm
Building 16h bars: 100%|██████████| 11332/11332 [10:47<00:00, 17.51it/s]


(11129, 3947)


Building 1h bars: 100%|██████████| 11332/11332 [09:42<00:00, 19.44it/s]


(11319, 3947)
df_dollar_bar_long.shape = (11129, 3947)
df_dollar_bar_short.shape = (11319, 3947)


In [5]:
def fill_row(df, target_df):
    rows_to_add = len(target_df) - len(df)
    if rows_to_add > 0:
        empty_rows = pd.DataFrame(np.nan, index=range(rows_to_add), columns=df.columns)
        df = pd.concat([empty_rows, df], ignore_index=True)
    return df


df_dollar_bar_long_aligned = fill_row(df_dollar_bar_long, df_feature_mid)
df_dollar_bar_short_aligned = fill_row(df_dollar_bar_short, df_feature_mid)

assert len(df_dollar_bar_long_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_short_aligned) == len(df_feature_mid)

In [6]:
df_features = pd.concat(
    [
        df_dollar_bar_long_aligned,
        df_feature_mid,
        df_dollar_bar_short_aligned,
    ],
    axis=1,
)
print(df_features.shape)
df_features.head(1)

(11332, 11841)


Unnamed: 0,16h_ac_0,16h_ac_1,16h_ac_2,16h_ac_3,16h_ac_4,16h_ac_5,16h_ac_6,16h_ac_7,16h_ac_8,16h_ac_9,...,1h_williams_r_ddt_lag30,1h_williams_r_ddt_lag31,1h_williams_r_ddt_lag32,1h_williams_r_ddt_lag33,1h_williams_r_ddt_lag34,1h_williams_r_ddt_lag35,1h_williams_r_ddt_lag36,1h_williams_r_ddt_lag37,1h_williams_r_ddt_lag38,1h_williams_r_ddt_lag39
0,,,,,,,,,,,...,,,,,,,,,,


In [7]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

16h_dft_dom_cycle_ddt_lag39    361
16h_dft_dom_cycle_ddt_lag38    357
16h_dft_dom_cycle_dt_lag39     357
16h_dft_dom_cycle_lag39        353
16h_dft_dom_cycle_ddt_lag37    353
                              ... 
4h_comb_spectrum_pwr_28          0
4h_comb_spectrum_pwr_27          0
4h_comb_spectrum_pwr_26          0
4h_comb_spectrum_pwr_24          0
4h_ac_42                         0
Length: 11841, dtype: int64

In [9]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")