# Feature Diagnostics

This notebook validates multi-scale feature stability across different aggregated timeframes (M1/M5/M15/H1) and visualizes time-based session flags alongside market-regime labels. Run it after generating the aggregated datasets in `data/processed/timeframes/` and the normalized splits, then tweak calculator parameters as needed.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from feature_engineering.pipeline import build_default_feature_pipeline, run_feature_pipeline

DATA_PATHS = {
    "M1": "data/raw/EURUSD_M1_202306010000_202412302358.csv",
    "M5": "data/processed/timeframes/EURUSD_M5.csv",
    "M15": "data/processed/timeframes/EURUSD_M15.csv",
    "H1": "data/processed/timeframes/EURUSD_H1.csv",
}

def load_dataset(tf="M5", n=5000):
    path = DATA_PATHS[tf]
    df = pd.read_csv(path)
    if "TIMESTAMP" not in df.columns:
        if "END_TIME" in df.columns:
            df["TIMESTAMP"] = pd.to_datetime(df["END_TIME"], errors="coerce")
        else:
            raise ValueError("Dataset missing TIMESTAMP/END_TIME columns")
    else:
        df["TIMESTAMP"] = pd.to_datetime(df["TIMESTAMP"], errors="coerce")
    return df.head(n)

pipeline = build_default_feature_pipeline()
sample_df = load_dataset("M5", n=2000)
result = run_feature_pipeline(sample_df, pipeline=pipeline)
feature_df = result.dataframe
feature_df.head()

In [None]:
summary = {}
for tf in ("M1", "M5", "M15", "H1"):
    df = load_dataset(tf, n=2000)
    res = run_feature_pipeline(df, pipeline=pipeline)
    subset = res.dataframe[[
        "BOS_BULLISH",
        "BOS_BEARISH",
        "SR_SUPPORT_STRENGTH",
        "SR_RESISTANCE_STRENGTH",
        "VOLATILITY_REGIME",
    ]]
    summary[tf] = subset.mean()

pd.DataFrame(summary)

In [None]:
plot_df = feature_df.tail(500)
fig, ax1 = plt.subplots(figsize=(12, 5))
ax1.plot(plot_df["TIMESTAMP"], plot_df["CLOSE"], label="Close", color="black")
ax1.set_ylabel("Price")
ax1_twin = ax1.twinx()
ax1_twin.plot(plot_df["TIMESTAMP"], plot_df["VOLATILITY_REGIME"], label="Volatility Regime", color="tab:orange", alpha=0.6)
ax1_twin.plot(plot_df["TIMESTAMP"], plot_df["SESSION_OVERLAP"], label="Session Overlap", color="tab:blue", alpha=0.4)
ax1_twin.set_ylabel("Indicator Value")
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax1_twin.get_legend_handles_labels()
ax1.legend(lines + lines2, labels + labels2, loc="upper left")
ax1.set_title("Session Overlaps and Volatility Regimes")
plt.tight_layout()
plt.show()