In [2]:
import numpy as np

label_dollar_bar = np.load("data/label_side.npy")[:, :-1]
print(label_dollar_bar.shape)

(42410, 6)


In [None]:
# 根据mid term的每个时间点，用1min bar直接构建每个时间点的dollar bar快照
import gc

import pandas as pd
from tqdm.auto import tqdm

from custom_indicators.all_features import feature_bundle
from custom_indicators.toolbox.bar.dollar_bar import build_dollar_bar
from strategies.BinanceMLV2.config import (
    DOLLAR_BAR_LONG_TERM,
    DOLLAR_BAR_SHORT_TERM,
    DOLLAR_BAR_SUPER_LONG_TERM,
    DOLLAR_BAR_SUPER_SHORT_TERM,
    DOLLAR_BAR_THRESHOLD_LONG,
    DOLLAR_BAR_THRESHOLD_SHORT,
    DOLLAR_BAR_THRESHOLD_SUPER_LONG,
    DOLLAR_BAR_THRESHOLD_SUPER_SHORT,
)

candles_1m = np.load("data/btc_1m.npy")


def build_bar(candles, threshold, tp_list, func, prefix, max_bars=5000):
    df_feature = []
    tp_list_reversed = tp_list[::-1]
    for tp in tqdm(tp_list_reversed, desc=f"Building {prefix} bars"):
        bars = func(candles[candles[:, 0] <= tp], threshold, max_bars=max_bars)
        try:
            feature = feature_bundle(bars, sequential=False)
            feature = {f"{prefix}_{k}": v[-1] for k, v in feature.items()}
        except Exception:
            break

        del bars
        gc.collect()

        df_feature.append(feature)

    df_feature.reverse()
    df_feature = pd.DataFrame.from_records(df_feature)

    print(df_feature.shape)

    return df_feature


df_dollar_bar_super_short = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_SUPER_SHORT,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    DOLLAR_BAR_SUPER_SHORT_TERM,
)

df_dollar_bar_super_long = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_SUPER_LONG,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    DOLLAR_BAR_SUPER_LONG_TERM,
)


df_dollar_bar_short = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_SHORT,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    DOLLAR_BAR_SHORT_TERM,
)

df_dollar_bar_long = build_bar(
    candles_1m,
    DOLLAR_BAR_THRESHOLD_LONG,
    label_dollar_bar[:, 0],
    build_dollar_bar,
    DOLLAR_BAR_LONG_TERM,
)


print(f"{df_dollar_bar_super_long.shape = }")
print(f"{df_dollar_bar_long.shape = }")
print(f"{df_dollar_bar_short.shape = }")
print(f"{df_dollar_bar_super_short.shape = }")

del candles_1m
gc.collect()

Building 15m bars:   0%|          | 6/42410 [00:33<65:48:04,  5.59s/it] 


KeyboardInterrupt: 

In [2]:
from strategies.BinanceMLV2.config import DOLLAR_BAR_MID_TERM

feature_mid = feature_bundle(label_dollar_bar, sequential=True)
feature_mid = {f"{DOLLAR_BAR_MID_TERM}_{k}": v for k, v in feature_mid.items()}
df_feature_mid = pd.DataFrame(feature_mid)
print(df_feature_mid.shape)
df_feature_mid.head(1)

(45975, 5017)


Unnamed: 0,60m_adx_7,60m_adx_7_dt,60m_adx_7_ddt,60m_adx_7_lag1,60m_adx_7_lag2,60m_adx_7_lag3,60m_adx_7_lag4,60m_adx_7_lag5,60m_adx_7_lag6,60m_adx_7_lag7,...,60m_williams_r_ddt_lag20,60m_williams_r_ddt_lag21,60m_williams_r_ddt_lag22,60m_williams_r_ddt_lag23,60m_williams_r_ddt_lag24,60m_williams_r_ddt_lag25,60m_williams_r_ddt_lag26,60m_williams_r_ddt_lag27,60m_williams_r_ddt_lag28,60m_williams_r_ddt_lag29
0,,,,,,,,,,,...,,,,,,,,,,


In [8]:
def fill_row(df, target_df):
    rows_to_add = len(target_df) - len(df)
    if rows_to_add > 0:
        empty_rows = pd.DataFrame(np.nan, index=range(rows_to_add), columns=df.columns)
        df = pd.concat([empty_rows, df], ignore_index=True)
    return df


df_dollar_bar_super_long_aligned = fill_row(df_dollar_bar_super_long, df_feature_mid)
df_dollar_bar_long_aligned = fill_row(df_dollar_bar_long, df_feature_mid)
df_dollar_bar_short_aligned = fill_row(df_dollar_bar_short, df_feature_mid)
df_dollar_bar_super_short_aligned = fill_row(df_dollar_bar_super_short, df_feature_mid)

assert len(df_dollar_bar_super_long_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_long_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_short_aligned) == len(df_feature_mid)
assert len(df_dollar_bar_super_short_aligned) == len(df_feature_mid)

In [9]:
df_features = pd.concat(
    [
        df_dollar_bar_super_long_aligned,
        df_dollar_bar_long_aligned,
        df_feature_mid,
        df_dollar_bar_short_aligned,
        df_dollar_bar_super_short_aligned,
    ],
    axis=1,
)
print(df_features.shape)
df_features.head(1)

(9058, 11841)


Unnamed: 0,15h_ac_0,15h_ac_1,15h_ac_2,15h_ac_3,15h_ac_4,15h_ac_5,15h_ac_6,15h_ac_7,15h_ac_8,15h_ac_9,...,100m_williams_r_ddt_lag30,100m_williams_r_ddt_lag31,100m_williams_r_ddt_lag32,100m_williams_r_ddt_lag33,100m_williams_r_ddt_lag34,100m_williams_r_ddt_lag35,100m_williams_r_ddt_lag36,100m_williams_r_ddt_lag37,100m_williams_r_ddt_lag38,100m_williams_r_ddt_lag39
0,,,,,,,,,,,...,,,,,,,,,,


In [10]:
df_features.isna().sum(axis=0).sort_values(ascending=False)

15h_dft_dom_cycle_ddt_lag39    268
15h_dft_dom_cycle_ddt_lag38    265
15h_dft_dom_cycle_dt_lag39     265
15h_dft_dom_cycle_lag39        262
15h_dft_dom_cycle_ddt_lag37    262
                              ... 
5h_comb_spectrum_pwr_37          0
5h_comb_spectrum_pwr_36          0
5h_comb_spectrum_pwr_35          0
5h_comb_spectrum_pwr_34          0
5h_comb_spectrum_pwr_26          0
Length: 11841, dtype: int64

In [12]:
df_features.to_parquet(f"data/features_{DOLLAR_BAR_MID_TERM}.parquet")