# side model feature selection

In [1]:
import pandas as pd

df_features = pd.read_parquet("data/features.parquet")
print(df_features.shape)
df_features.isna().sum(axis=0).sort_values(ascending=False)

(18718, 3447)


adx_7                               0
mod_rsi_lag17                       0
mod_rsi_lag19                       0
mod_rsi_dt_lag1                     0
mod_rsi_dt_lag2                     0
                                   ..
comb_spectrum_dom_cycle_ddt_lag5    0
comb_spectrum_dom_cycle_ddt_lag6    0
comb_spectrum_dom_cycle_ddt_lag7    0
comb_spectrum_dom_cycle_ddt_lag8    0
williams_r_ddt_lag19                0
Length: 3447, dtype: int64

In [2]:
import numpy as np

label = np.load("data/side_label.npy")

print(label.shape)

len_gap = len(label) - len(df_features)

label = label[len_gap:]

print(label.shape)

(18782,)
(18718,)


In [3]:
from custom_indicators.toolbox.feature_selection.rfcq_selector import RFCQSelector

selector = RFCQSelector()
selector.fit(df_features, label)

➤ 识别数值型变量...
➤ 计算特征与目标变量的相关性(使用随机森林)...
✓ 选择第1个特征: williams_r (最大重要性: 0.0428)
➤ 计算特征冗余度...
➤ 总计选择689个特征 (已选择1个，还需选择688个)...
➤ 开始MRMR迭代选择过程...


选择特征:   0%|                                                           | 0/688 [00:00<?, ?特征/s]


✅ 特征选择完成：从3447个特征中选择了689个，舍弃了2758个
✅ 选择的特征: ['williams_r', 'conv_1', 'highpass_bp_dt', 'vwap_dt_lag1', 'vwap_dt', 'ehlers_early_onset_trend_ddt', 'cmma', 'ehlers_early_onset_trend_ddt_lag1', 'evenbetter_sinewave_short_dt', 'iqr_ratio_dt', 'fisher_ddt', 'roofing_filter_ddt', 'trendflex_dt', 'cmma_lag1', 'fisher_dt', 'stc', 'roofing_filter_ddt_lag1', 'acc_swing_index_dt', 'acr', 'roofing_filter_dt', 'stc_dt', 'norm_on_balance_volume', 'adx_7_dt', 'trendflex_dt_lag1', 'evenbetter_sinewave_long_dt', 'fisher_ddt_lag1', 'williams_r_dt', 'adaptive_cci_dt', 'bekker_parkinson_vol_ddt_lag2', 'reactivity', 'trendflex_ddt_lag1', 'cmma_dt', 'fisher_dt_lag1', 'evenbetter_sinewave_short_dt_lag1', 'evenbetter_sinewave_short', 'voss_filt_dt', 'hasbrouck_lambda_ddt', 'williams_r_lag1', 'voss_dt_lag1', 'adaptive_stochastic_dt', 'evenbetter_sinewave_long_ddt', 'conv_4', 'fisher', 'voss', 'adx_14_dt_lag2', 'conv_0', 'bandpass_dt', 'chaiken_money_flow', 'stc_dt_lag1', 'adaptive_stochastic_ddt', 'ac_0', 'ev

<custom_indicators.toolbox.feature_selection.rfcq_selector.RFCQSelector at 0x359cfd790>

In [6]:
side_res_long = pd.Series(selector.relevance_, index=selector.variables_).sort_values(
    ascending=False
)
print((side_res_long > 0).sum())
side_res_long

472


williams_r                      0.042827
cmma                            0.038767
trendflex_dt                    0.034349
vwap_dt                         0.033840
evenbetter_sinewave_short_dt    0.029488
                                  ...   
conv_26                         0.000000
conv_27                         0.000000
conv_28                         0.000000
conv_29                         0.000000
williams_r_ddt_lag19            0.000000
Length: 3447, dtype: float64

In [7]:
import json

feature_info = {
    "side": {
        "side": [],
    },
    "meta": {
        "meta": [],
        "model": [],
    },
}

feature_long = side_res_long[side_res_long > 0].index.tolist()


feature_info["side"]["side"] = feature_long


with open("strategies/BinanceBtcEntropyBarV1/feature_info.json", "w") as f:
    json.dump(feature_info, f, indent=4)

# meta model feature selection

In [1]:
import numpy as np
import pandas as pd

df_features = pd.read_parquet("data/features.parquet")
meta_label = np.load("data/label_meta.npy")
print(df_features.shape)
print(meta_label.shape)

df_features.isna().sum(axis=0).sort_values(ascending=False)

(18718, 3447)
(18718,)


adx_7                               0
mod_rsi_lag17                       0
mod_rsi_lag19                       0
mod_rsi_dt_lag1                     0
mod_rsi_dt_lag2                     0
                                   ..
comb_spectrum_dom_cycle_ddt_lag5    0
comb_spectrum_dom_cycle_ddt_lag6    0
comb_spectrum_dom_cycle_ddt_lag7    0
comb_spectrum_dom_cycle_ddt_lag8    0
williams_r_ddt_lag19                0
Length: 3447, dtype: int64

In [2]:
from strategies.BinanceBtcEntropyBarV1.config import (
    SIDE,
    get_side_model,
)

side_model = get_side_model(False)

side_res = side_model.predict(df_features[SIDE])

assert df_features.shape[0] == len(side_res)

df_features["model"] = side_res

In [3]:
# meta feature selection
from custom_indicators.toolbox.feature_selection.rfcq_selector import RFCQSelector

selector = RFCQSelector()
selector.fit(df_features, meta_label)
meta_res = pd.Series(selector.relevance_, index=selector.variables_).sort_values(
    ascending=False
)

➤ 识别数值型变量...
➤ 计算特征与目标变量的相关性(使用随机森林)...
✓ 选择第1个特征: corwin_schultz_estimator_lag1 (最大重要性: 0.0600)
➤ 计算特征冗余度...
➤ 总计选择689个特征 (已选择1个，还需选择688个)...
➤ 开始MRMR迭代选择过程...


选择特征:   0%|                                                           | 0/688 [00:00<?, ?特征/s]


✅ 特征选择完成：从3448个特征中选择了689个，舍弃了2759个
✅ 选择的特征: ['corwin_schultz_estimator_lag1', 'acc_swing_index_ddt', 'reactivity_ddt_lag17', 'amihud_lambda_ddt_lag11', 'model', 'fti_dt', 'acc_swing_index_lag14', 'evenbetter_sinewave_long_ddt_lag9', 'hurst_coef_fast_ddt_lag1', 'adaptive_bp_lead_dt_lag11', 'adx_7_dt', 'hasbrouck_lambda_dt', 'natr_ddt_lag3', 'ac_1', 'acc_swing_index_lag5', 'williams_r', 'price_variance_ratio_dt_lag1', 'ma_difference_ddt', 'highpass_bp_ddt', 'hasbrouck_lambda_dt_lag3', 'natr_ddt_lag1', 'acc_swing_index_lag2', 'price_change_oscillator_lag1', 'iqr_ratio_dt_lag1', 'stc_dt', 'kyle_lambda_dt', 'amihud_lambda_dt_lag11', 'acc_swing_index_lag6', 'fti_ddt', 'bandpass_dt_lag6', 'adx_7_ddt_lag1', 'corwin_schultz_estimator_dt_lag2', 'adx_7', 'vwap_dt_lag2', 'vwap', 'vwap_dt', 'amihud_lambda_dt', 'fisher_ddt', 'natr_dt', 'vwap_lag18', 'adx_14_dt_lag1', 'forecast_oscillator_lag1', 'bekker_parkinson_vol_dt_lag1', 'ehlers_early_onset_trend_dt', 'bekker_parkinson_vol_dt_lag2', 'vwap_lag1

In [4]:
(meta_res > 0).sum()

73

In [5]:
import json
from pathlib import Path

feature_info = Path("strategies/BinanceBtcEntropyBarV1/feature_info.json")
if feature_info.exists():
    with open(feature_info, "r") as f:
        feature_info = json.load(f)

feature_info["meta"]["meta"] = []
feature_info["meta"]["model"] = []

meta_features = meta_res[meta_res > 0].index.tolist()
for k in meta_features:
    if k == "model":
        feature_info["meta"]["model"].append(k)
    else:
        feature_info["meta"]["meta"].append(k)


with open("strategies/BinanceBtcEntropyBarV1/feature_info.json", "w") as f:
    json.dump(feature_info, f, indent=4)