In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side.npy")[:, :-1]
print(label_dollar_bar.shape)

(9058, 6)


In [2]:
import pandas as pd

from src.features.all_features import feature_bundle

SHORT_TERM = "100m"
MID_TERM = "5h"
LONG_TERM = "15h"

feature_mid = feature_bundle(label_dollar_bar, sequential=True)
feature_mid = {f"{MID_TERM}_{k}": v for k, v in feature_mid.items()}
df_feature_mid = pd.DataFrame(feature_mid)
print(df_feature_mid.shape)
df_feature_mid.head(1)

(9058, 3947)


Unnamed: 0,5h_ac_0,5h_ac_1,5h_ac_2,5h_ac_3,5h_ac_4,5h_ac_5,5h_ac_6,5h_ac_7,5h_ac_8,5h_ac_9,...,5h_williams_r_ddt_lag30,5h_williams_r_ddt_lag31,5h_williams_r_ddt_lag32,5h_williams_r_ddt_lag33,5h_williams_r_ddt_lag34,5h_williams_r_ddt_lag35,5h_williams_r_ddt_lag36,5h_williams_r_ddt_lag37,5h_williams_r_ddt_lag38,5h_williams_r_ddt_lag39
0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,,,,,,,,,,


In [3]:
# 根据mid term的每个时间点，用1min bar直接构建每个时间点的dollar bar快照
from tqdm.auto import tqdm

from bar import build_dollar_bar
from model.config import DOLLAR_BAR_THRESHOLD_LONG, DOLLAR_BAR_THRESHOLD_SHORT

candles_1m = np.load("data/btc_1m.npy")


def build_bar(candles, threshold, tp_list, func, prefix, max_bars=5000):
    df_feature = []
    for tp in tqdm(tp_list, desc=f"Building {prefix} bars"):
        bars = func(candles[candles[:, 0] <= tp], threshold, max_bars=max_bars)
        try:
            feature = feature_bundle(bars, sequential=False)
            feature = {f"{prefix}_{k}": v[-1] for k, v in feature.items()}
        except:
            continue

        df_feature.append(feature)

    df_feature = pd.DataFrame.from_records(df_feature)

    print(df_feature.shape)

    return df_feature


df_dollar_bar_long = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_LONG,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    LONG_TERM,
)

df_dollar_bar_short = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_SHORT,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    SHORT_TERM,
)

print(f"{df_dollar_bar_long.shape = }")
print(f"{df_dollar_bar_short.shape = }")

  from .autonotebook import tqdm as notebook_tqdm
Building 15h bars: 100%|██████████| 9058/9058 [09:01<00:00, 16.72it/s]


(8907, 3947)


Building 100m bars: 100%|██████████| 9058/9058 [07:59<00:00, 18.88it/s]


(9042, 3947)
df_dollar_bar_long.shape = (8907, 3947)
df_dollar_bar_short.shape = (9042, 3947)


In [8]:
def fill_row(df, target_df):
    rows_to_add = len(target_df) - len(df)
    if rows_to_add > 0:
        empty_rows = pd.DataFrame(np.nan, index=range(rows_to_add), columns=df.columns)
        df = pd.concat([empty_rows, df], ignore_index=True)
    return df


df_dollar_bar_long_aligned = fill_row(df_dollar_bar_long, df_feature_mid)
df_dollar_bar_short_aligned = fill_row(df_dollar_bar_short, df_feature_mid)

assert len(df_dollar_bar_long_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_short_aligned) == len(df_feature_mid)

In [9]:
df_features = pd.concat(
    [
        df_dollar_bar_long_aligned,
        df_feature_mid,
        df_dollar_bar_short_aligned,
    ],
    axis=1,
)
print(df_features.shape)
df_features.head(1)

(9058, 11841)


Unnamed: 0,15h_ac_0,15h_ac_1,15h_ac_2,15h_ac_3,15h_ac_4,15h_ac_5,15h_ac_6,15h_ac_7,15h_ac_8,15h_ac_9,...,100m_williams_r_ddt_lag30,100m_williams_r_ddt_lag31,100m_williams_r_ddt_lag32,100m_williams_r_ddt_lag33,100m_williams_r_ddt_lag34,100m_williams_r_ddt_lag35,100m_williams_r_ddt_lag36,100m_williams_r_ddt_lag37,100m_williams_r_ddt_lag38,100m_williams_r_ddt_lag39
0,,,,,,,,,,,...,,,,,,,,,,


In [10]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

15h_dft_dom_cycle_ddt_lag39    268
15h_dft_dom_cycle_ddt_lag38    265
15h_dft_dom_cycle_dt_lag39     265
15h_dft_dom_cycle_lag39        262
15h_dft_dom_cycle_ddt_lag37    262
                              ... 
5h_comb_spectrum_pwr_37          0
5h_comb_spectrum_pwr_36          0
5h_comb_spectrum_pwr_35          0
5h_comb_spectrum_pwr_34          0
5h_comb_spectrum_pwr_26          0
Length: 11841, dtype: int64

In [12]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")