In [1]:
import numpy as np

merged_bar = np.load("data/bar_deap_v1.npy")
merged_bar.shape

(5789, 6)

In [2]:
import pickle
from pathlib import Path
from src.features.all_features_func import feature_bundle

temp_feature_path = Path("data/temp_feature.pkl")

if not temp_feature_path.exists():
    all_features = feature_bundle(merged_bar, sequential=True, lightweighted=True)
    with open(temp_feature_path, "wb") as f:
        pickle.dump(all_features, f)
else:
    with open(temp_feature_path, "rb") as f:
        all_features = pickle.load(f)

  import pkg_resources


In [3]:
for k, v in all_features.items():
    assert len(v) == len(merged_bar), f"{k} feature length not equal, {len(v)} != {len(merged_bar)}"

In [4]:
from src.features.simple_feature_calculator import SimpleFeatureCalculator

feature_calculator = SimpleFeatureCalculator()
feature_calculator.load(merged_bar, sequential=False)
feature_calculator_seq = SimpleFeatureCalculator()
feature_calculator_seq.load(merged_bar, sequential=True)

In [5]:
import traceback

for f in all_features:
    try:
        res = feature_calculator.get([f])[f]
        seq_res = feature_calculator_seq.get([f])[f]
        assert res[0] - seq_res[-1] < 1e-4, f"{f} not equal, {res[0]} != {seq_res[-1]}"
    except:
        print(f"failed to get feature {f}")
        print(f"{res = }")
        print(f"{seq_res = }")
        traceback.print_exc()
        break

failed to get feature bandpass
res = array([-0.04073687])
seq_res = array([ 1.        ,  0.96737826,  0.84350617, ...,  0.4009752 ,
       -0.08446878, -0.47270685])


Traceback (most recent call last):
  File "/var/folders/h9/d_xpszfn6yz9r8zww450m44w0000gn/T/ipykernel_80081/351568017.py", line 7, in <module>
    assert res[0] - seq_res[-1] < 1e-4, f"{f} not equal, {res[0]} != {seq_res[-1]}"
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: bandpass not equal, -0.04073686610207047 != -0.47270685242923394


In [2]:
import pandas as pd

from src.features.all_features import feature_bundle

features = feature_bundle(merged_bar, sequential=True)
df_features = pd.DataFrame(features, index=merged_bar[:, 0].astype(int))
print(df_features.shape)
print(f"max nan: {df_features.isna().sum().max()}")
df_features.head()

(15063, 12127)
max nan: 1844


Unnamed: 0,adx_7,adx_7_dt,adx_7_ddt,adx_7_lag1,adx_7_lag2,adx_7_lag3,adx_7_lag4,adx_7_lag5,adx_7_lag6,adx_7_lag7,...,williams_r_ddt_lag10,williams_r_ddt_lag11,williams_r_ddt_lag12,williams_r_ddt_lag13,williams_r_ddt_lag14,williams_r_ddt_lag15,williams_r_ddt_lag16,williams_r_ddt_lag17,williams_r_ddt_lag18,williams_r_ddt_lag19
1577874960000,,,,,,,,,,,...,,,,,,,,,,
1577896860000,,,,,,,,,,,...,,,,,,,,,,
1577923140000,,,,,,,,,,,...,,,,,,,,,,
1577944080000,,,,,,,,,,,...,,,,,,,,,,
1577967720000,,,,,,,,,,,...,,,,,,,,,,


In [3]:
df = df_features.iloc[1844:]
print(df.shape)
df.isna().sum().max()

(13219, 12127)


0

In [4]:
df.to_parquet("data/features.parquet")

### 确认实际的nan数量

In [1]:
import numpy as np

from src.indicators.prod import VMD_NRBO
from src.indicators.prod.wavelets.cls_cwt_swt import CWT_SWT

merged_bar = np.load("data/merged_bar.npy")
print(merged_bar.shape)

vmd_nrbo = VMD_NRBO(merged_bar, 32, sequential=True)
cwt_swt = CWT_SWT(merged_bar, 32, sequential=True)

vmd_nrbo_single = VMD_NRBO(merged_bar, 32, sequential=False)
cwt_swt_single = CWT_SWT(merged_bar, 32, sequential=False)

(15063, 6)


In [7]:
single_res = cwt_swt_single.res(dt=True, lag=1)
sequential_res = cwt_swt.res(dt=True, lag=1)[-1]

single_res - sequential_res

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.]])

In [3]:
vmd_nrbo_single.res()

array([[-660.2680612 ,  136.15266516,   67.72702177]])

In [None]:
import numpy as np

from src.indicators.prod import _fill_gap

dt_result = [i[1:] - i[:-1] for i in cwt_swt.raw_result]
dt_result = np.array([i[-1] for i in dt_result])
dt_result = _fill_gap(dt_result, cwt_swt.candles)

In [11]:
dt_result.shape

(15063, 21)

In [1]:
import json

with open("strategies/BinanceBtcEntropyBarV1/feature_info.json", "r") as f:
    feature_info = json.load(f)

all_features = []
all_features.extend(feature_info["meta"]["meta"])
all_features.extend(feature_info["side"]["long"])
all_features.extend(feature_info["side"]["short"])

all_features = sorted(list(set(all_features)))
len(all_features)

343

In [2]:
import pandas as pd

df = pd.read_parquet("data/features.parquet")
df[all_features].isna().sum().sort_values(ascending=False)

ac_1                                0
price_variance_ratio_dt_lag3        0
reactivity_lag3                     0
reactivity_lag2                     0
reactivity_lag1                     0
                                   ..
evenbetter_sinewave_long            0
ehlers_early_onset_trend_lag15      0
ehlers_early_onset_trend_lag1       0
ehlers_early_onset_trend_dt_lag2    0
williams_r_lag6                     0
Length: 343, dtype: int64

### 新特征探索

In [6]:
import numpy as np

merged_bar = np.load("data/merged_bar.npy")
close_prices = merged_bar[:, 2]

In [9]:
from src.indicators.prod.wavelets import cwt

res = cwt(merged_bar, window=32, source_type="close", sequential=False)
np.isnan(res).sum()

0

In [17]:
res.ndim

1