In [1]:
import numpy as np

label_dollar_bar = np.load("data/label_side.npy")[:, :-1]
print(label_dollar_bar.shape)

(20650, 6)


In [2]:
import pandas as pd

from custom_indicators.all_features import feature_bundle

SHORT_TERM = "p0045"
MID_TERM = "p009"
LONG_TERM = "p018"

feature_mid = feature_bundle(label_dollar_bar, sequential=True)
feature_mid = {f"{MID_TERM}_{k}": v for k, v in feature_mid.items()}
df_feature_mid = pd.DataFrame(feature_mid)
print(df_feature_mid.shape)
df_feature_mid.head(1)

(20650, 2127)


Unnamed: 0,p009_ac_0,p009_ac_1,p009_ac_2,p009_ac_3,p009_ac_4,p009_ac_5,p009_ac_6,p009_ac_7,p009_ac_8,p009_ac_9,...,p009_williams_r_ddt_lag10,p009_williams_r_ddt_lag11,p009_williams_r_ddt_lag12,p009_williams_r_ddt_lag13,p009_williams_r_ddt_lag14,p009_williams_r_ddt_lag15,p009_williams_r_ddt_lag16,p009_williams_r_ddt_lag17,p009_williams_r_ddt_lag18,p009_williams_r_ddt_lag19
0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,,,,,,,,,,


In [3]:
# 根据mid term的每个时间点，用1min bar直接构建每个时间点的dollar bar快照
from tqdm.auto import tqdm

from custom_indicators.config import (
    DOLLAR_BAR_THRESHOLD_LONG,
    DOLLAR_BAR_THRESHOLD_MID,
    DOLLAR_BAR_THRESHOLD_SHORT,
    RANGE_BAR_THRESHOLD_LONG,
    RANGE_BAR_THRESHOLD_SHORT,
)
from custom_indicators.toolbox.bar.dollar_bar import build_dollar_bar
from custom_indicators.toolbox.bar.range_bar import build_range_bar

candles_1m = np.load("data/btc_1m.npy")


def build_bar(candles, threshold, tp_list, func, prefix, max_bars=5000):
    df_feature = None
    for tp in tqdm(tp_list, desc=f"Building {prefix} bars"):
        bars = func(candles[candles[:, 0] <= tp], threshold, max_bars=max_bars)
        try:
            feature = feature_bundle(bars, sequential=False)
            feature = {f"{prefix}_{k}": v for k, v in feature.items()}
        except:
            continue

        if df_feature is None:
            df_feature = pd.DataFrame(feature)
        else:
            df_feature = pd.concat(
                [df_feature, pd.DataFrame(feature)], axis=0, ignore_index=True
            )

    print(df_feature.shape)

    return df_feature


df_dollar_bar_long = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_LONG,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    "4h",
)

df_dollar_bar_mid = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_MID,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    "1h",
)

df_dollar_bar_short = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_SHORT,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    "15m",
)

df_range_bar_long = build_bar(
    candles_1m,
    RANGE_BAR_THRESHOLD_LONG,
    label_dollar_bar[:, 0],
    build_range_bar,
    "p018",
)

df_range_bar_short = build_bar(
    candles_1m,
    RANGE_BAR_THRESHOLD_SHORT,
    label_dollar_bar[:, 0],
    build_range_bar,
    "p0045",
)

print(f"{df_dollar_bar_long.shape = }")
print(f"{df_dollar_bar_mid.shape = }")
print(f"{df_dollar_bar_short.shape = }")
print(f"{df_range_bar_long.shape = }")
print(f"{df_range_bar_short.shape = }")

  from .autonotebook import tqdm as notebook_tqdm
Building 4h bars: 100%|██████████| 20650/20650 [19:44<00:00, 17.43it/s]


(18039, 2127)


Building 1h bars: 100%|██████████| 20650/20650 [19:52<00:00, 17.31it/s]


(19754, 2127)


Building 15m bars: 100%|██████████| 20650/20650 [20:09<00:00, 17.07it/s]


(20511, 2127)


Building p018 bars: 100%|██████████| 20650/20650 [21:05<00:00, 16.32it/s]


(20471, 2127)


Building p0045 bars: 100%|██████████| 20650/20650 [20:11<00:00, 17.05it/s]

(20635, 2127)
df_dollar_bar_long.shape = (18039, 2127)
df_dollar_bar_mid.shape = (19754, 2127)
df_dollar_bar_short.shape = (20511, 2127)
df_range_bar_long.shape = (20471, 2127)
df_range_bar_short.shape = (20635, 2127)





In [4]:
def fill_row(df, target_df):
    rows_to_add = len(target_df) - len(df)
    if rows_to_add > 0:
        empty_rows = pd.DataFrame(np.nan, index=range(rows_to_add), columns=df.columns)
        df = pd.concat([empty_rows, df], ignore_index=True)
    return df


df_dollar_bar_long_aligned = fill_row(df_dollar_bar_long, df_feature_mid)
df_dollar_bar_mid_aligned = fill_row(df_dollar_bar_mid, df_feature_mid)
df_dollar_bar_short_aligned = fill_row(df_dollar_bar_short, df_feature_mid)
df_range_bar_long_aligned = fill_row(df_range_bar_long, df_feature_mid)
df_range_bar_short_aligned = fill_row(df_range_bar_short, df_feature_mid)

assert len(df_dollar_bar_long_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_mid_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_short_aligned) == len(df_feature_mid)
assert len(df_range_bar_long_aligned) == len(df_feature_mid)
assert len(df_range_bar_short_aligned) == len(df_feature_mid)

In [5]:
df_features = pd.concat(
    [
        df_range_bar_long_aligned,
        df_feature_mid,
        df_range_bar_short_aligned,
        df_dollar_bar_long_aligned,
        df_dollar_bar_mid_aligned,
        df_dollar_bar_short_aligned,
    ],
    axis=1,
)
print(df_features.shape)
df_features.head(1)

(20650, 12762)


Unnamed: 0,p018_ac_0,p018_ac_1,p018_ac_2,p018_ac_3,p018_ac_4,p018_ac_5,p018_ac_6,p018_ac_7,p018_ac_8,p018_ac_9,...,15m_williams_r_ddt_lag10,15m_williams_r_ddt_lag11,15m_williams_r_ddt_lag12,15m_williams_r_ddt_lag13,15m_williams_r_ddt_lag14,15m_williams_r_ddt_lag15,15m_williams_r_ddt_lag16,15m_williams_r_ddt_lag17,15m_williams_r_ddt_lag18,15m_williams_r_ddt_lag19
0,,,,,,,,,,,...,,,,,,,,,,


In [6]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

4h_dft_dom_cycle_ddt_lag19    2963
4h_dft_dom_cycle_dt_lag19     2956
4h_dft_dom_cycle_ddt_lag18    2956
4h_dft_dom_cycle_lag19        2945
4h_dft_dom_cycle_dt_lag18     2945
                              ... 
p009_conv_16                     0
p009_conv_15                     0
p009_conv_14                     0
p009_conv_13                     0
p009_ac_32                       0
Length: 12762, dtype: int64

In [8]:
df_features.to_parquet(f"data/features_{MID_TERM}.parquet")