In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side.npy")[:, :-1]
print(label_dollar_bar.shape)

(22445, 6)


In [2]:
import pandas as pd

from custom_indicators.all_features import feature_bundle

SHORT_TERM = "30m"
MID_TERM = "2h"
LONG_TERM = "6h"

feature_mid = feature_bundle(label_dollar_bar, sequential=True)
feature_mid = {f"{MID_TERM}_{k}": v for k, v in feature_mid.items()}
df_feature_mid = pd.DataFrame(feature_mid)
print(df_feature_mid.shape)
df_feature_mid.head(1)

(22445, 3947)


Unnamed: 0,2h_ac_0,2h_ac_1,2h_ac_2,2h_ac_3,2h_ac_4,2h_ac_5,2h_ac_6,2h_ac_7,2h_ac_8,2h_ac_9,...,2h_williams_r_ddt_lag30,2h_williams_r_ddt_lag31,2h_williams_r_ddt_lag32,2h_williams_r_ddt_lag33,2h_williams_r_ddt_lag34,2h_williams_r_ddt_lag35,2h_williams_r_ddt_lag36,2h_williams_r_ddt_lag37,2h_williams_r_ddt_lag38,2h_williams_r_ddt_lag39
0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,,,,,,,,,,


In [3]:
# 根据mid term的每个时间点，用1min bar直接构建每个时间点的dollar bar快照
from tqdm.auto import tqdm

from model.config import (
    DOLLAR_BAR_THRESHOLD_LONG,
    DOLLAR_BAR_THRESHOLD_SHORT,
)
from custom_indicators.toolbox.bar.dollar_bar import build_dollar_bar

candles_1m = np.load("data/btc_1m.npy")


def build_bar(candles, threshold, tp_list, func, prefix, max_bars=5000):
    df_feature = None
    for tp in tqdm(tp_list, desc=f"Building {prefix} bars"):
        bars = func(candles[candles[:, 0] <= tp], threshold, max_bars=max_bars)
        try:
            feature = feature_bundle(bars, sequential=False)
            feature = {f"{prefix}_{k}": v for k, v in feature.items()}
        except:
            continue

        if df_feature is None:
            df_feature = pd.DataFrame(feature)
        else:
            df_feature = pd.concat(
                [df_feature, pd.DataFrame(feature)], axis=0, ignore_index=True
            )

    print(df_feature.shape)

    return df_feature


df_dollar_bar_long = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_LONG,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    "6h",
)

df_dollar_bar_short = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_SHORT,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    "30m",
)

print(f"{df_dollar_bar_long.shape = }")
print(f"{df_dollar_bar_short.shape = }")

  from .autonotebook import tqdm as notebook_tqdm
Building 6h bars: 100%|██████████| 22445/22445 [42:35<00:00,  8.78it/s]


(22293, 3947)


Building 30m bars: 100%|██████████| 22445/22445 [40:54<00:00,  9.14it/s]

(22433, 3947)
df_dollar_bar_long.shape = (22293, 3947)
df_dollar_bar_short.shape = (22433, 3947)





In [4]:
def fill_row(df, target_df):
    rows_to_add = len(target_df) - len(df)
    if rows_to_add > 0:
        empty_rows = pd.DataFrame(np.nan, index=range(rows_to_add), columns=df.columns)
        df = pd.concat([empty_rows, df], ignore_index=True)
    return df


df_dollar_bar_long_aligned = fill_row(df_dollar_bar_long, df_feature_mid)
df_dollar_bar_short_aligned = fill_row(df_dollar_bar_short, df_feature_mid)

assert len(df_dollar_bar_long_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_short_aligned) == len(df_feature_mid)

In [5]:
df_features = pd.concat(
    [
        df_dollar_bar_long_aligned,
        df_feature_mid,
        df_dollar_bar_short_aligned,
    ],
    axis=1,
)
print(df_features.shape)
df_features.head(1)

(22445, 11841)


Unnamed: 0,6h_ac_0,6h_ac_1,6h_ac_2,6h_ac_3,6h_ac_4,6h_ac_5,6h_ac_6,6h_ac_7,6h_ac_8,6h_ac_9,...,30m_williams_r_ddt_lag30,30m_williams_r_ddt_lag31,30m_williams_r_ddt_lag32,30m_williams_r_ddt_lag33,30m_williams_r_ddt_lag34,30m_williams_r_ddt_lag35,30m_williams_r_ddt_lag36,30m_williams_r_ddt_lag37,30m_williams_r_ddt_lag38,30m_williams_r_ddt_lag39
0,,,,,,,,,,,...,,,,,,,,,,


In [6]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

6h_dft_dom_cycle_ddt_lag39    271
6h_dft_dom_cycle_ddt_lag38    268
6h_dft_dom_cycle_dt_lag39     268
6h_dft_dom_cycle_lag39        265
6h_dft_dom_cycle_ddt_lag37    265
                             ... 
2h_conv_9                       0
2h_conv_8                       0
2h_conv_7                       0
2h_conv_6                       0
2h_conv_23                      0
Length: 11841, dtype: int64

In [7]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")