In [None]:
# ==============================================================================
# Ensemble Tuning & Evaluation
# ==============================================================================

import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  # noqa: F401  # used in notebooks

# --- Locate project root ---
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(5):
        if (cur / "src").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()


NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# Imports
from src.config import GlobalConfig  # noqa: F401
from src.evaluation import rmse, mae  # noqa: F401
from src.models.ensemble import (
    load_level0_pool,
    load_calibration_pool,
    equal_weight_ensemble,
    trimmed_mean_ensemble,
    median_ensemble,
    fit_stacking_ensemble,
    fit_ewa_ensemble,
)

print(f"Project root: {PROJECT_ROOT}")

# --- CONFIG ---
# Exact folder names under outputs/stageB/
BASE_MODELS = [
    "elastic_net_with_target_700",
    "lightgbm_with_target_mac",
    "svr_with_target",
    "tabpfn_with_target",
]

STACKING_LAMBDAS = [1e-2, 0.1, 1.0]
EWA_ETAS = [0.1, 0.3, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3]
EWA_DELTA = 0.95

OUT_DIR = PROJECT_ROOT / "outputs" / "ensembles"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# --- LOAD DATA (Stage A + Stage B) ---

print("\n1) Loading Stage B (test set)...")
try:
    pool_test = load_level0_pool(BASE_MODELS)
    F_test = pool_test.F
    y_test = pool_test.y_true
    dates_test = pool_test.dates
    print(f"   Test period: {dates_test[0].date()} to {dates_test[-1].date()} (n={len(dates_test)})")
except Exception as e:
    print(f"   ERROR: Could not load Stage B: {e}")
    raise

print("\n2) Loading Stage A (calibration set, block 3)...")
try:
    pool_cal = load_calibration_pool(BASE_MODELS)
    F_cal = pool_cal.F
    y_cal = pool_cal.y_true
    dates_cal = pool_cal.dates
    print(f"   Cal period:  {dates_cal[0].date()} to {dates_cal[-1].date()} (n={len(dates_cal)})")

    # Merge history for training (A + B)
    F_full = pd.concat([F_cal, F_test])
    y_full = pd.concat([y_cal, y_test])

    # Keep index sorted
    F_full.sort_index(inplace=True)
    y_full.sort_index(inplace=True)

    # Train/tune only on Stage A dates
    cal_dates_list = list(dates_cal)

except Exception as e:
    print(f"   WARNING: Could not load Stage A ({e}).")
    print("   Fallback: using the first 24 months of Stage B for calibration (reduces test data!).")
    split_idx = 24
    F_full = F_test
    y_full = y_test
    cal_dates_list = list(dates_test[:split_idx])

# --- ENSEMBLES ---

print("\n--- Benchmark ensembles (equal/trimmed/median) ---")
ens_equal = equal_weight_ensemble(F_test)
ens_trim10 = trimmed_mean_ensemble(F_test, alpha=0.10)
ens_median = median_ensemble(F_test)

print("\n--- Stacking (trained on Stage A) ---")
stack_res = fit_stacking_ensemble(
    y=y_full,
    F=F_full,
    cal_dates=cal_dates_list,
    lambdas=STACKING_LAMBDAS,
)
ens_stack = stack_res.y_pred.loc[dates_test]

print(f"  Best lambda: {stack_res.lambda_opt}")
print("  Weights (from Stage A):")
print(stack_res.weights)

print("\n--- EWA (online learning A -> B) ---")
ewa_res = fit_ewa_ensemble(
    y=y_full,
    F=F_full,
    cal_dates=cal_dates_list,  # tune eta on Stage A
    etas=EWA_ETAS,
    delta=EWA_DELTA,
)
ens_ewa = ewa_res.y_pred.loc[dates_test]

print(f"  Best eta: {ewa_res.eta_opt}")

# --- EVALUATION ---

df_ens = pd.DataFrame(index=dates_test)
df_ens["y_true"] = y_test
for m in BASE_MODELS:
    df_ens[f"base_{m}"] = F_test[m]

df_ens["ens_equal"] = ens_equal
df_ens["ens_stack"] = ens_stack
df_ens["ens_ewa"] = ens_ewa
df_ens["dispersion"] = F_test.std(axis=1)

print("\n" + "=" * 60)
print(f"RESULTS (Full Stage B: {dates_test[0].date()} - {dates_test[-1].date()})")
print("=" * 60)


def score(y, y_hat, label):
    r = rmse(y, y_hat)
    print(f"{label:<25} | RMSE: {r:.4f}")


print("--- Base models ---")
for m in BASE_MODELS:
    score(y_test, F_test[m], m)

print("\n--- Ensembles ---")
score(y_test, ens_equal, "Equal-weight")
score(y_test, ens_stack, "Stacked (static)")
score(y_test, ens_ewa, "EWA (online)")

# Save
df_ens.to_csv(OUT_DIR / "ensemble_predictions.csv")
print(f"\nSaved to {OUT_DIR}")


def export_ensemble_to_stageB(
    df_results: pd.DataFrame,
    col_name: str,
    model_export_name: str,
    base_dir: Path,
):
    """Save one ensemble column in the Stage B prediction format."""
    model_dir = base_dir / "stageB" / model_export_name
    monthly_dir = model_dir / "monthly"
    summary_dir = model_dir / "summary"

    monthly_dir.mkdir(parents=True, exist_ok=True)
    summary_dir.mkdir(parents=True, exist_ok=True)

    # 1) preds.csv (match the format of other models)
    df_out = pd.DataFrame()
    df_out["date_t_plus_1"] = df_results.index
    df_out["t"] = df_results.index - pd.DateOffset(months=1)  # dummy origin date
    df_out["y_true"] = df_results["y_true"].values
    df_out["y_pred"] = df_results[col_name].values

    # Fake metadata so analysis scripts keep working
    df_out["config_id"] = "ensemble_v1"
    df_out["is_active"] = True

    out_path = monthly_dir / "preds.csv"
    df_out.to_csv(out_path, index=False)
    print(f"  Exported: {out_path}")

    # 2) summary.csv (often used to load configs)
    mse = ((df_out["y_true"] - df_out["y_pred"]) ** 2).mean()
    rmse_val = np.sqrt(mse)

    df_sum = pd.DataFrame(
        [
            {
                "config_id": "ensemble_v1",
                "model": model_export_name,
                "rmse_overall": rmse_val,
                "setup_name": "Ensemble",
            }
        ]
    )
    df_sum.to_csv(summary_dir / "summary.csv", index=False)


print("\n--- Exporting ensembles into Stage B structure ---")

# Mapping: df column name -> folder name under outputs/stageB/
ensembles_to_export = {
    "ens_equal": "ensemble_equal_weight",
    "ens_trim10": "ensemble_trimmed",
    "ens_median": "ensemble_median",
    "ens_stack": "ensemble_stacking",
    "ens_ewa": "ensemble_ewa",
}

OUTPUTS_ROOT = PROJECT_ROOT / "outputs"

for col, folder_name in ensembles_to_export.items():
    if col in df_ens.columns:
        export_ensemble_to_stageB(df_ens, col, folder_name, OUTPUTS_ROOT)

print("\nDone! The ensembles are now ready for the comparison notebooks.")
