# side model feature selection

In [None]:
import pandas as pd

df_features = pd.read_parquet("data/features_25m.parquet")
print(df_features.shape)
df_features.isna().sum(axis=0).sort_values(ascending=False)

In [None]:
import numpy as np
from jesse import utils

label = np.load("data/label_side_25m.npy")
df_label = utils.numpy_candles_to_dataframe(label[:, :6]).set_index("date")
df_label["side_label"] = label[:, 6]
df_label.head(1)

In [None]:
side_features = df_features.iloc[808:]
side_label = df_label.iloc[808:]

side_label["side_label"] = (side_label["side_label"].astype(int) == 1).astype(int)
side_label = side_label["side_label"].to_numpy()

print(side_features.shape)
print(side_label.shape)

side_features.isna().sum(axis=0).sort_values(ascending=False)

In [None]:
from custom_indicators.mrmr_accelerated import FCQSelector

selector = FCQSelector()
selector.fit(side_features, side_label)

In [None]:
side_res = pd.Series(selector.relevance_, index=selector.variables_).sort_values(
    ascending=False
)
side_res

In [10]:
import json

with open(f"data/side_features_{selector.__class__.__name__}.json", "w") as f:
    json.dump(side_res.to_dict(), f, indent=4)

In [11]:
import json

import pandas as pd

with open(f"data/side_features_FCQSelector.json", "r") as f:
    side_res = pd.Series(json.load(f))

In [12]:
SHORT_TERM = "10m"
MEDIUM_TERM = "25m"
LONG_TERM = "2h"

feature_info = {
    "side": {SHORT_TERM: [], MEDIUM_TERM: [], LONG_TERM: []},
    "meta": {SHORT_TERM: [], MEDIUM_TERM: [], LONG_TERM: [], "model_res": []},
}

for k, v in side_res[: len(side_res) // 5].to_dict().items():
    if k.startswith(f"{SHORT_TERM}_"):
        feature_info["side"][SHORT_TERM].append(k)
    elif k.startswith(f"{MEDIUM_TERM}_"):
        feature_info["side"][MEDIUM_TERM].append(k)
    elif k.startswith(f"{LONG_TERM}_"):
        feature_info["side"][LONG_TERM].append(k)

feature_info["side"][SHORT_TERM] = sorted(list(set(feature_info["side"][SHORT_TERM])))
feature_info["side"][MEDIUM_TERM] = sorted(list(set(feature_info["side"][MEDIUM_TERM])))
feature_info["side"][LONG_TERM] = sorted(list(set(feature_info["side"][LONG_TERM])))

feature_info["meta"][SHORT_TERM] = sorted(list(set(feature_info["meta"][SHORT_TERM])))
feature_info["meta"][MEDIUM_TERM] = sorted(list(set(feature_info["meta"][MEDIUM_TERM])))
feature_info["meta"][LONG_TERM] = sorted(list(set(feature_info["meta"][LONG_TERM])))
feature_info["meta"]["model_res"] = sorted(list(set(feature_info["meta"]["model_res"])))

with open("custom_indicators/feature_info.json", "w") as f:
    json.dump(feature_info, f, indent=4)

# meta model feature selection

In [None]:
import lightgbm as lgb
import numpy as np
import pandas as pd

from custom_indicators.config import SIDE_ALL

df_features = pd.read_parquet("data/features_45m.parquet")
df_label = pd.read_parquet("data/label_meta_45m.parquet")

model_side = lgb.Booster(model_file="custom_indicators/models/model_side.txt")
model_side_res = model_side.predict(df_features[SIDE_ALL])
df_features["model_side_res"] = model_side_res
print(df_features.shape)

assert df_features.shape[0] == df_label.shape[0]
print(df_label.shape)
df_label.head()

In [None]:
meta_features = df_features[583:]
meta_label = df_label["bin"][583:]

# meta_features = meta_features[meta_label["ret"].notna()]
# meta_label = meta_label[meta_label["ret"].notna()]["bin"]

print(meta_features.shape)
print(meta_label.shape)

meta_label.value_counts()

In [None]:
# meta feature selection
from feature_engine.selection import MRMR

sel = MRMR(
    method="FCQ",
    max_features=meta_features.shape[1] // 2,
    scoring="f1",
    cv=3,
    regression=False,
    n_jobs=-1,
)
sel.fit(meta_features, meta_label)

In [None]:
import matplotlib.pyplot as plt

pd.Series(sel.relevance_, index=sel.variables_).sort_values(ascending=False).plot.bar(
    figsize=(15, 4)
)
plt.title("Relevance")
plt.show()

In [None]:
meta_res = pd.Series(sel.relevance_, index=sel.variables_).sort_values(ascending=False)
meta_res

In [6]:
import json

with open("data/mrmr_meta_features.json", "w") as f:
    json.dump(meta_res.to_dict(), f, indent=4)

In [5]:
import json
import pandas as pd

with open("data/mrmr_meta_features.json", "r") as f:
    meta_res = pd.Series(json.load(f))

In [None]:
meta_res.to_dict()["model_side_res"]

In [None]:
len([k for k, v in meta_res.to_dict().items() if v > 10])

In [8]:
from pathlib import Path

feature_info = Path("custom_indicators/feature_info.json")
if feature_info.exists():
    with open(feature_info, "r") as f:
        feature_info = json.load(f)

feature_info["meta"][SHORT_TERM] = []
feature_info["meta"][MEDIUM_TERM] = []
feature_info["meta"][LONG_TERM] = []
feature_info["meta"]["model_res"] = []

SHORT_TERM = "10m"
MEDIUM_TERM = "45m"
LONG_TERM = "4h"

for k, v in meta_res.to_dict().items():
    if v > 10:
        if k.startswith(f"{SHORT_TERM}_"):
            feature_info["meta"][SHORT_TERM].append(k)
            feature_info["all"].append(k.replace(f"{SHORT_TERM}_", ""))
        elif k.startswith(f"{MEDIUM_TERM}_"):
            feature_info["meta"][MEDIUM_TERM].append(k)
            feature_info["all"].append(k.replace(f"{MEDIUM_TERM}_", ""))
        elif k.startswith(f"{LONG_TERM}_"):
            feature_info["meta"][LONG_TERM].append(k)
            feature_info["all"].append(k.replace(f"{LONG_TERM}_", ""))
        else:
            feature_info["meta"]["model_res"].append(k)

feature_info["side"][SHORT_TERM] = sorted(list(set(feature_info["side"][SHORT_TERM])))
feature_info["side"][MEDIUM_TERM] = sorted(list(set(feature_info["side"][MEDIUM_TERM])))
feature_info["side"][LONG_TERM] = sorted(list(set(feature_info["side"][LONG_TERM])))

feature_info["meta"][SHORT_TERM] = sorted(list(set(feature_info["meta"][SHORT_TERM])))
feature_info["meta"][MEDIUM_TERM] = sorted(list(set(feature_info["meta"][MEDIUM_TERM])))
feature_info["meta"][LONG_TERM] = sorted(list(set(feature_info["meta"][LONG_TERM])))
feature_info["meta"]["model_res"] = sorted(list(set(feature_info["meta"]["model_res"])))

with open("custom_indicators/feature_info.json", "w") as f:
    json.dump(feature_info, f, indent=4)