<a href="https://colab.research.google.com/github/Kojack84/FuturesTrainer/blob/main/MNQ_FeatureSelector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
BASE_DIR = "/content/gdrive/MyDrive/FuturesTrainer"

INPUT_CSV = f"{BASE_DIR}/MNQ_Advanced_MFE_MAE.csv"

FEATURE_SELECTION_DIR = "feature_selection_v1"      # relative under BASE_DIR
MODELS_DIR            = "models_v1"                # relative under BASE_DIR

PROFIT_TARGETS = "25,50,75,100"
STOP_SIZES_FSEL = "50,100,150,200"   # example for top_features
STOP_SIZES_TRAIN = "50,100,150,200"  # keep in sync for train_models


In [None]:
!python /content/top_features.py --input "{INPUT_CSV}" --direction both --profit-targets {PROFIT_TARGETS} --stop-sizes {STOP_SIZES_FSEL} --max-features 50 --output-dir "{FEATURE_SELECTION_DIR}" --gdrive-root "{BASE_DIR}"

Initial candidate features after filters: 54
Number of (direction,T,S) configs: 32
Config long_T25_S50: positives=133503, samples=503738
Config long_T25_S100: positives=148894, samples=503738
Config long_T25_S150: positives=153327, samples=503738
Config long_T25_S200: positives=155009, samples=503738


In [None]:
FEATURES_FILE = f"{BASE_DIR}/{FEATURE_SELECTION_DIR}/selected_features.txt"

!python /content/train_models.py --input "{INPUT_CSV}" --features "{FEATURES_FILE}" --direction both --profit-targets {PROFIT_TARGETS} --stop-sizes {STOP_SIZES_TRAIN} --n-splits 5 --min-positives 100 --threshold-min-trades 200 --output-dir "{MODELS_DIR}" --gdrive-root "{BASE_DIR}"


In [None]:
import os
import json
import glob
import pandas as pd

BASE_DIR = "/content/drive/MyDrive/FeatureTrainer"
MODELS_DIR = "models_v1"

models_path = os.path.join(BASE_DIR, MODELS_DIR)
thresholds_path = os.path.join(models_path, "thresholds.json")

# 1) Load thresholds.json
with open(thresholds_path, "r") as f:
    thresholds_data = json.load(f)

thr_df = pd.DataFrame(thresholds_data)

# 2) Load all metrics_*.csv and aggregate basic stats
metrics_files = glob.glob(os.path.join(models_path, "metrics_*.csv"))

metrics_list = []
for path in metrics_files:
    m = pd.read_csv(path)
    # metrics file already has: config_key, fold_index, auc, best_ev_per_trade, best_n_trades, etc.
    # Aggregate per config_key
    agg = (
        m.groupby("config_key")
         .agg(
             cv_auc_mean=("auc", "mean"),
             cv_auc_std=("auc", "std"),
             cv_ev_per_trade=("best_ev_per_trade", "mean"),
             cv_ev_std=("best_ev_per_trade", "std"),
             cv_trades_mean=("best_n_trades", "mean"),
             cv_trades_std=("best_n_trades", "std"),
         )
         .reset_index()
    )
    metrics_list.append(agg)

if metrics_list:
    metrics_df = pd.concat(metrics_list, ignore_index=True)
else:
    metrics_df = pd.DataFrame(columns=[
        "config_key", "cv_auc_mean", "cv_auc_std",
        "cv_ev_per_trade", "cv_ev_std",
        "cv_trades_mean", "cv_trades_std"
    ])

# 3) Merge thresholds_df (direction, T, S, best_threshold, cv_ev_per_trade etc.) with metrics_df
# thresholds.json already has cv_ev_per_trade and AUC stats too, but we keep them and also bring in metric aggregates
merged = thr_df.copy()

# Build config_key to match metrics
merged["config_key"] = merged.apply(
    lambda row: f"{row['direction']}_T{int(row['profit_target'])}_S{int(row['stop_size'])}",
    axis=1
)

summary = pd.merge(
    merged,
    metrics_df,
    on="config_key",
    how="left",
    suffixes=("_thr", "_m")
)

# 4) Sort by primary objective: high EV, then high AUC, then more trades
summary_sorted = summary.sort_values(
    by=["cv_ev_per_trade_thr", "cv_auc_mean_thr", "cv_trades_mean_thr"],
    ascending=[False, False, False]
)

display(summary_sorted)

# 5) Save to Drive for offline analysis (Excel, Sheets, etc.)
summary_path = os.path.join(models_path, "summary_configs.csv")
summary_sorted.to_csv(summary_path, index=False)
print(f"Saved summary to: {summary_path}")
