In [1]:
import numpy as np

candles = np.load("data/bar_deap_v1.npy")
candles.shape

(5841, 6)

In [2]:
from src.features.simple_feature_calculator import SimpleFeatureCalculator

feature_calculator = SimpleFeatureCalculator()
feature_calculator.load(candles, sequential=False)
feature_calculator_seq = SimpleFeatureCalculator()
feature_calculator_seq.load(candles, sequential=True)

  import pkg_resources


In [3]:
from src.features.simple_feature_calculator.buildin.feature_names import BUILDIN_FEATURES

WINDOW = 20
BASIC = ["bar_open_dt", "bar_high_dt", "bar_low_dt", "bar_close_dt"]

basic_hurst_feats = [f"{i}_hurst{WINDOW}" for i in BASIC]
basic_curv_feats = [f"{i}_curv{WINDOW}" for i in BASIC]
basic_phent_feats = [f"{i}_phent{WINDOW}" for i in BASIC]

mean_feats = [f"{i}_mean{WINDOW}" for i in BUILDIN_FEATURES]
std_feats = [f"{i}_std{WINDOW}" for i in BUILDIN_FEATURES]
hurst_feats = [f"{i}_hurst{WINDOW}" for i in BUILDIN_FEATURES]
curv_feats = [f"{i}_curv{WINDOW}" for i in BUILDIN_FEATURES]
phent_feats = [f"{i}_phent{WINDOW}" for i in BUILDIN_FEATURES]

dt_feats = [f"{i}_dt" for i in BUILDIN_FEATURES]
ddt_feats = [f"{i}_ddt" for i in BUILDIN_FEATURES]

all_feats = (
BUILDIN_FEATURES
+ basic_hurst_feats + basic_curv_feats + basic_phent_feats
+ mean_feats + std_feats + hurst_feats + curv_feats
+ dt_feats + ddt_feats
)

lag_feats = [f"{i}_lag{l}" for i in all_feats for l in range(1, 6)]

all_feats = all_feats + phent_feats + lag_feats
print(f"{len(all_feats)} features")

# features = {}
#
# for f in all_feats:
#     res = feature_calculator.get([f])
#     assert len(res[f]) == 1, f"{f} not equal to 1"
#     assert res[f].shape == (1,), f"{f} shape not equal to 1"
#     features = {**features, **res}

24367 features


In [4]:
from tqdm.auto import tqdm

features = {}

for f in tqdm(all_feats):
    res = feature_calculator_seq.get([f])
    assert len(res[f]) == len(candles), f"{f} length {len(res[f])} != {len(candles)}"
    features = {**features, **res}

  0%|          | 0/24367 [00:00<?, ?it/s]

In [5]:
import pandas as pd

df = pd.DataFrame(features)
df.head(1)

Unnamed: 0,bar_duration,adx_7,adx_14,aroon_diff,ac_0,ac_1,ac_2,ac_3,ac_4,ac_5,...,vmd_w256_1_ddt_lag1,vmd_w256_1_ddt_lag2,vmd_w256_1_ddt_lag3,vmd_w256_1_ddt_lag4,vmd_w256_1_ddt_lag5,vmd_w256_2_ddt_lag1,vmd_w256_2_ddt_lag2,vmd_w256_2_ddt_lag3,vmd_w256_2_ddt_lag4,vmd_w256_2_ddt_lag5
0,0.0,,,,,,,,,,...,,,,,,,,,,


In [6]:
df.isna().sum().sort_values(ascending=False)

frac_h_o5_diff_mean20_lag5    400
frac_o_l5_diff_std20_lag5     400
frac_c_o5_diff_std20_lag5     400
frac_c_o5_diff_mean20_lag5    400
frac_c_c5_diff_mean20_lag5    400
                             ... 
cwt_w256_2_hurst20              0
cwt_w256_1_hurst20              0
cwt_w256_0_hurst20              0
cwt_w128_20_hurst20             0
bar_duration                    0
Length: 23507, dtype: int64

In [7]:
df.shape

(5841, 23507)

# 标签特征对齐

In [8]:
from jesse.helpers import date_to_timestamp

timestamps = candles[:, 0]
print(timestamps.shape)
print(timestamps[timestamps < date_to_timestamp('2025-03-01')].shape)
print(timestamps[timestamps >= date_to_timestamp('2025-03-01')].shape)

(5841,)
(5032,)
(809,)


In [9]:
df.index = candles[:, 0].astype(np.int64)
df.head(1)

Unnamed: 0,bar_duration,adx_7,adx_14,aroon_diff,ac_0,ac_1,ac_2,ac_3,ac_4,ac_5,...,vmd_w256_1_ddt_lag1,vmd_w256_1_ddt_lag2,vmd_w256_1_ddt_lag3,vmd_w256_1_ddt_lag4,vmd_w256_1_ddt_lag5,vmd_w256_2_ddt_lag1,vmd_w256_2_ddt_lag2,vmd_w256_2_ddt_lag3,vmd_w256_2_ddt_lag4,vmd_w256_2_ddt_lag5
1656661200000,0.0,,,,,,,,,,...,,,,,,,,,,


In [10]:
# 标签
label_l4 = np.load("data/raw_label_hard_L4.npy")

# 预测下一个标签
df_l4 = df.iloc[3:-1]
assert len(df_l4) == len(label_l4)

# 切除na
max_na_len = df_l4.isna().sum().max()

df_l4 = df_l4.iloc[max_na_len:]
label_l4 = label_l4[max_na_len:]
assert len(df_l4) == len(label_l4)

df_l4.shape

(5440, 23507)

In [11]:
df_l4.to_parquet("data/feat_hard_L4.parquet")
np.save("data/label_hard_L4.npy", label_l4)

In [12]:
# 标签
label_l5 = np.load("data/raw_label_hard_L5.npy")

# 预测下2个标签
df_l5 = df.iloc[3:-2]
assert len(df_l5) == len(label_l5)

# 切除na
max_na_len = df_l5.isna().sum().max()

df_l5 = df_l5.iloc[max_na_len:]
label_l5 = label_l5[max_na_len:]
assert len(df_l5) == len(label_l5)

df_l5.shape

(5439, 23507)

In [13]:
df_l5.to_parquet("data/feat_hard_L5.parquet")
np.save("data/label_hard_L5.npy", label_l5)

In [14]:
# 标签
label_l6 = np.load("data/raw_label_hard_L6.npy")

# 预测下3个标签
df_l6 = df.iloc[3:-3]
assert len(df_l6) == len(label_l6)

# 切除na
max_na_len = df_l6.isna().sum().max()

df_l6 = df_l6.iloc[max_na_len:]
label_l6 = label_l6[max_na_len:]
assert len(df_l6) == len(label_l6)

df_l6.shape

(5438, 23507)

In [15]:
df_l6.to_parquet("data/feat_hard_L6.parquet")
np.save("data/label_hard_L6.npy", label_l6)

### 确认实际的nan数量

In [1]:
import numpy as np

from src.indicators.prod import VMD_NRBO
from src.indicators.prod.wavelets.cls_cwt_swt import CWT_SWT

merged_bar = np.load("data/merged_bar.npy")
print(merged_bar.shape)

vmd_nrbo = VMD_NRBO(merged_bar, 32, sequential=True)
cwt_swt = CWT_SWT(merged_bar, 32, sequential=True)

vmd_nrbo_single = VMD_NRBO(merged_bar, 32, sequential=False)
cwt_swt_single = CWT_SWT(merged_bar, 32, sequential=False)

(15063, 6)


In [7]:
single_res = cwt_swt_single.res(dt=True, lag=1)
sequential_res = cwt_swt.res(dt=True, lag=1)[-1]

single_res - sequential_res

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.]])

In [3]:
vmd_nrbo_single.res()

array([[-660.2680612 ,  136.15266516,   67.72702177]])

In [None]:
import numpy as np

from src.indicators.prod import _fill_gap

dt_result = [i[1:] - i[:-1] for i in cwt_swt.raw_result]
dt_result = np.array([i[-1] for i in dt_result])
dt_result = _fill_gap(dt_result, cwt_swt.candles)

In [11]:
dt_result.shape

(15063, 21)

In [1]:
import json

with open("strategies/BinanceBtcEntropyBarV1/feature_info.json", "r") as f:
    feature_info = json.load(f)

all_features = []
all_features.extend(feature_info["meta"]["meta"])
all_features.extend(feature_info["side"]["long"])
all_features.extend(feature_info["side"]["short"])

all_features = sorted(list(set(all_features)))
len(all_features)

343

In [2]:
import pandas as pd

df = pd.read_parquet("data/features.parquet")
df[all_features].isna().sum().sort_values(ascending=False)

ac_1                                0
price_variance_ratio_dt_lag3        0
reactivity_lag3                     0
reactivity_lag2                     0
reactivity_lag1                     0
                                   ..
evenbetter_sinewave_long            0
ehlers_early_onset_trend_lag15      0
ehlers_early_onset_trend_lag1       0
ehlers_early_onset_trend_dt_lag2    0
williams_r_lag6                     0
Length: 343, dtype: int64

### 新特征探索

In [6]:
import numpy as np

merged_bar = np.load("data/merged_bar.npy")
close_prices = merged_bar[:, 2]

In [9]:
from src.indicators.prod.wavelets import cwt

res = cwt(merged_bar, window=32, source_type="close", sequential=False)
np.isnan(res).sum()

0

In [17]:
res.ndim

1