In [1]:
import numpy as np

candles = np.load("data/bar_deap_v1.npy")
candles.shape

(5866, 6)

In [2]:
from src.features.simple_feature_calculator import SimpleFeatureCalculator

feature_calculator = SimpleFeatureCalculator()
feature_calculator.load(candles, sequential=False)
feature_calculator_seq = SimpleFeatureCalculator()
feature_calculator_seq.load(candles, sequential=True)

PyTorch configured: device=cpu, dtype=torch.float32


  import pkg_resources


In [3]:
from src.features.simple_feature_calculator.buildin.feature_names import (
    BUILDIN_FEATURES,
)

WINDOW = 20
BASIC = ["bar_open_dt", "bar_high_dt", "bar_low_dt", "bar_close_dt"]

basic_hurst_feats = [f"{i}_hurst{WINDOW}" for i in BASIC]
basic_curv_feats = [f"{i}_curv{WINDOW}" for i in BASIC]
basic_phent_feats = [f"{i}_phent{WINDOW}" for i in BASIC]

mean_feats = [f"{i}_mean{WINDOW}" for i in BUILDIN_FEATURES]
std_feats = [f"{i}_std{WINDOW}" for i in BUILDIN_FEATURES]
hurst_feats = [f"{i}_hurst{WINDOW}" for i in BUILDIN_FEATURES]
curv_feats = [f"{i}_curv{WINDOW}" for i in BUILDIN_FEATURES]
phent_feats = [f"{i}_phent{WINDOW}" for i in BUILDIN_FEATURES]

dt_feats = [f"{i}_dt" for i in BUILDIN_FEATURES]
ddt_feats = [f"{i}_ddt" for i in BUILDIN_FEATURES]

feats = (
        BUILDIN_FEATURES
        + basic_hurst_feats
        + basic_curv_feats
        + basic_phent_feats
        + mean_feats
        + std_feats
        + hurst_feats
        + curv_feats
        + dt_feats
        + ddt_feats
)

lag_feats = [f"{i}_lag{l}" for i in feats for l in range(1, 6)]

ALL_FEATS = feats + phent_feats + lag_feats
print(f"{len(ALL_FEATS)} features")

# features = {}
#
# for f in all_feats:
#     res = feature_calculator.get([f])
#     assert len(res[f]) == 1, f"{f} not equal to 1"
#     assert res[f].shape == (1,), f"{f} shape not equal to 1"
#     features = {**features, **res}

23507 features


In [4]:
from tqdm.auto import tqdm

features = {}

for f in tqdm(ALL_FEATS):
    res = feature_calculator_seq.get([f])
    assert len(res[f]) == len(candles), f"{f} length {len(res[f])} != {len(candles)}"
    features = {**features, **res}

  0%|          | 0/23507 [00:00<?, ?it/s]

In [5]:
import pandas as pd

df = pd.DataFrame(features)
df.head(1)

Unnamed: 0,frac_o_o1_diff,frac_o_o2_diff,frac_o_o3_diff,frac_o_o4_diff,frac_o_o5_diff,frac_o_h1_diff,frac_o_h2_diff,frac_o_h3_diff,frac_o_h4_diff,frac_o_h5_diff,...,vmd_w256_1_ddt_lag1,vmd_w256_1_ddt_lag2,vmd_w256_1_ddt_lag3,vmd_w256_1_ddt_lag4,vmd_w256_1_ddt_lag5,vmd_w256_2_ddt_lag1,vmd_w256_2_ddt_lag2,vmd_w256_2_ddt_lag3,vmd_w256_2_ddt_lag4,vmd_w256_2_ddt_lag5
0,,,,,,,,,,,...,,,,,,,,,,


In [6]:
df.isna().sum().sort_values(ascending=False)

frac_h_h5_diff_std20_lag5     400
frac_h_c5_diff_mean20_lag5    400
frac_h_c5_diff_std20_lag5     400
frac_o_h5_diff_mean20_lag5    400
frac_c_h5_diff_std20_lag5     400
                             ... 
acp_pwr_36_curv20               0
acp_pwr_37_curv20               0
acp_pwr_38_curv20               0
acr_curv20                      0
ac_23_curv20                    0
Length: 23507, dtype: int64

In [7]:
df.shape

(5866, 23507)

# 标签特征对齐

In [8]:
from jesse.helpers import date_to_timestamp

timestamps = candles[:, 0]
print(timestamps.shape)
print(timestamps[timestamps < date_to_timestamp('2025-03-01')].shape)
print(timestamps[timestamps >= date_to_timestamp('2025-03-01')].shape)

(5866,)
(5032,)
(834,)


In [9]:
df.index = candles[:, 0].astype(np.int64)
df.head(1)

Unnamed: 0,frac_o_o1_diff,frac_o_o2_diff,frac_o_o3_diff,frac_o_o4_diff,frac_o_o5_diff,frac_o_h1_diff,frac_o_h2_diff,frac_o_h3_diff,frac_o_h4_diff,frac_o_h5_diff,...,vmd_w256_1_ddt_lag1,vmd_w256_1_ddt_lag2,vmd_w256_1_ddt_lag3,vmd_w256_1_ddt_lag4,vmd_w256_1_ddt_lag5,vmd_w256_2_ddt_lag1,vmd_w256_2_ddt_lag2,vmd_w256_2_ddt_lag3,vmd_w256_2_ddt_lag4,vmd_w256_2_ddt_lag5
1656661200000,,,,,,,,,,,...,,,,,,,,,,


In [11]:
# 标签
for i in [5, 6, 7]:
    label = np.load(f"data/raw_label_direction_L{i}.npy")

    # 预测下一个标签
    df_align = df.iloc[i-1:-1]
    assert len(df_align) == len(label)

    # 切除na
    max_na_len = df_align.isna().sum().max()

    df_align = df_align.iloc[max_na_len:]
    label_no_na = label[max_na_len:]
    assert len(df_align) == len(label_no_na)

    df_align.to_parquet(f"data/feat_direction_L{i}.parquet")
    np.save(f"data/label_direction_L{i}.npy", label_no_na)

In [13]:
label = np.load(f"data/raw_label_direction_L6.npy")

df_align = df.iloc[6-3:-3]
assert len(df_align) == len(label)

# 切除na
max_na_len = df_align.isna().sum().max()

df_align = df_align.iloc[max_na_len:]
label_no_na = label[max_na_len:]
assert len(df_align) == len(label_no_na)

df_align.to_parquet(f"data/feat_direction_L6_N3.parquet")
np.save(f"data/label_direction_L6_N3.npy", label_no_na)

In [10]:
# 标签
label_l5 = np.load("data/raw_label_hard_L5.npy")

# 预测下一个标签
df_l5 = df.iloc[4:-1]
assert len(df_l5) == len(label_l5)

# 切除na
max_na_len = df_l5.isna().sum().max()

df_l5 = df_l5.iloc[max_na_len:]
label_l5 = label_l5[max_na_len:]
assert len(df_l5) == len(label_l5)

df_l5.shape

(5451, 23507)

In [11]:
df_l5.to_parquet("data/feat_hard_L5.parquet")
np.save("data/label_hard_L5.npy", label_l5)

In [12]:
# 标签
label_l6 = np.load("data/raw_label_hard_L6.npy")

# 预测下2个标签
df_l6 = df.iloc[4:-2]
assert len(df_l6) == len(label_l6)

# 切除na
max_na_len = df_l6.isna().sum().max()

df_l6 = df_l6.iloc[max_na_len:]
label_l6 = label_l6[max_na_len:]
assert len(df_l6) == len(label_l6)

df_l6.shape

(5450, 23507)

In [13]:
df_l6.to_parquet("data/feat_hard_L6.parquet")
np.save("data/label_hard_L6.npy", label_l6)

In [14]:
# 标签
label_l7 = np.load("data/raw_label_hard_L7.npy")

# 预测下3个标签
df_l7 = df.iloc[4:-3]
assert len(df_l7) == len(label_l7)

# 切除na
max_na_len = df_l7.isna().sum().max()

df_l7 = df_l7.iloc[max_na_len:]
label_l7 = label_l7[max_na_len:]
assert len(df_l7) == len(label_l7)

df_l7.shape

(5449, 23507)

In [15]:
df_l7.to_parquet("data/feat_hard_L7.parquet")
np.save("data/label_hard_L7.npy", label_l7)

### 确认实际的nan数量

In [1]:
import numpy as np

from src.indicators.prod import VMD_NRBO
from src.indicators.prod.wavelets.cls_cwt_swt import CWT_SWT

merged_bar = np.load("data/merged_bar.npy")
print(merged_bar.shape)

vmd_nrbo = VMD_NRBO(merged_bar, 32, sequential=True)
cwt_swt = CWT_SWT(merged_bar, 32, sequential=True)

vmd_nrbo_single = VMD_NRBO(merged_bar, 32, sequential=False)
cwt_swt_single = CWT_SWT(merged_bar, 32, sequential=False)

(15063, 6)


In [7]:
single_res = cwt_swt_single.res(dt=True, lag=1)
sequential_res = cwt_swt.res(dt=True, lag=1)[-1]

single_res - sequential_res

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.]])

In [3]:
vmd_nrbo_single.res()

array([[-660.2680612 ,  136.15266516,   67.72702177]])

In [None]:
import numpy as np

from src.indicators.prod import _fill_gap

dt_result = [i[1:] - i[:-1] for i in cwt_swt.raw_result]
dt_result = np.array([i[-1] for i in dt_result])
dt_result = _fill_gap(dt_result, cwt_swt.candles)

In [11]:
dt_result.shape

(15063, 21)

In [1]:
import json

with open("strategies/BinanceBtcEntropyBarV1/feature_info.json", "r") as f:
    feature_info = json.load(f)

all_features = []
all_features.extend(feature_info["meta"]["meta"])
all_features.extend(feature_info["side"]["long"])
all_features.extend(feature_info["side"]["short"])

all_features = sorted(list(set(all_features)))
len(all_features)

343

In [2]:
import pandas as pd

df = pd.read_parquet("data/features.parquet")
df[all_features].isna().sum().sort_values(ascending=False)

ac_1                                0
price_variance_ratio_dt_lag3        0
reactivity_lag3                     0
reactivity_lag2                     0
reactivity_lag1                     0
                                   ..
evenbetter_sinewave_long            0
ehlers_early_onset_trend_lag15      0
ehlers_early_onset_trend_lag1       0
ehlers_early_onset_trend_dt_lag2    0
williams_r_lag6                     0
Length: 343, dtype: int64

### 新特征探索

In [6]:
import numpy as np

merged_bar = np.load("data/merged_bar.npy")
close_prices = merged_bar[:, 2]

In [9]:
from src.indicators.prod.wavelets import cwt

res = cwt(merged_bar, window=32, source_type="close", sequential=False)
np.isnan(res).sum()

0

In [17]:
res.ndim

1