In [2]:

# # ==============================================================================
# Masterarbeit Ensemble Tuning & Evaluation
# ==============================================================================

import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --- 1. Projekt-Root finden ---

def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(5):
        if (cur / "src").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# --- Imports aus dem Projekt ---
from src.config import GlobalConfig
from src.evaluation import rmse, mae
from src.ensemble import (
    load_level0_pool,
    equal_weight_ensemble,
    trimmed_mean_ensemble,
    median_ensemble,
    fit_stacking_ensemble,
    fit_ewa_ensemble,
)

print(f"Projekt-Root: {PROJECT_ROOT}")

# --- 2. Konfiguration ---

# WICHTIG: Passe diese Liste exakt an deine Ordnernamen in outputs/stageB/ an!
BASE_MODELS = [
    "elastic_net",
    "lightgbm",
    "extra_trees",  # oder "kernel_ridge", "tabpfn" je nach Setup
    # "tabpfn",
]

# Hyperparameter gemäß Thesis
STACKING_LAMBDAS = [1e-2, 1.0]  # Ridge shrinkage
EWA_ETAS         = [0.1, 0.3]   # Learning rates
EWA_DELTA        = 0.95         # Forgetting factor

# Output Verzeichnis
OUT_DIR = PROJECT_ROOT / "outputs" / "ensembles"
OUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Modell-Pool: {BASE_MODELS}")

# --- 3. Daten laden (Level-0 OOS Predictions) ---

try:
    pool = load_level0_pool(BASE_MODELS)
    dates = pool.dates
    y_true = pool.y_true
    F = pool.F  # Matrix der Nowcasts (T x M)

    print("\nLevel-0 Pool erfolgreich geladen.")
    print(f"Zeitraum: {dates[0].date()} bis {dates[-1].date()} (n={len(dates)})")
except Exception as e:
    print(f"\nKRITISCHER FEHLER: {e}")
    print("Stelle sicher, dass 'run_stageB' für alle Modelle gelaufen ist.")
    raise

# --- 4. Kalibrierungs-Split definieren ---

# Die Arbeit beschreibt eine "time-blocked calibration period" für Stacking/EWA.
# Wir nutzen hier z.B. die ersten 24 Monate der OOS-Phase zum Lernen der Gewichte.
CAL_HORIZON = min(24, len(dates))
cal_dates = dates[:CAL_HORIZON]

print(f"Kalibrierung: {len(cal_dates)} Monate ({cal_dates[0].date()} - {cal_dates[-1].date()})")
print(f"Evaluation:   {len(dates) - len(cal_dates)} Monate")

# --- 5. Benchmark Ensembles ---

print("\n--- Berechne Benchmarks ---")
ens_equal  = equal_weight_ensemble(F)
ens_trim10 = trimmed_mean_ensemble(F, alpha=0.10) # 10% Trimmed Mean
ens_median = median_ensemble(F)

# --- 6. Stacking (Static Meta-Learner) ---

print("\n--- Berechne Stacking ---")
stack_res = fit_stacking_ensemble(
    y=y_true,
    F=F,
    cal_dates=cal_dates,
    lambdas=STACKING_LAMBDAS,
)

print(f"  Best Lambda: {stack_res.lambda_opt}")
print(f"  RMSE (Cal):  {stack_res.rmse_cal:.4f}")
print("  Statische Gewichte:")
print(stack_res.weights.to_string())

ens_stack = stack_res.y_pred

# --- 7. EWA / Hedge (Online) ---

print("\n--- Berechne EWA (Online) ---")
ewa_res = fit_ewa_ensemble(
    y=y_true,
    F=F,
    cal_dates=cal_dates,
    etas=EWA_ETAS,
    delta=EWA_DELTA,
)

print(f"  Best Eta:   {ewa_res.eta_opt}")
print(f"  Delta:      {ewa_res.delta}")
print(f"  RMSE (Cal): {ewa_res.rmse_cal:.4f}")

ens_ewa = ewa_res.y_pred

# --- 8. Visualisierung der EWA-Gewichte ---

plt.figure(figsize=(12, 6))
ewa_res.weights_history.plot(ax=plt.gca(), linewidth=1.5)
plt.title(f"EWA Weights Evolution ($\eta={ewa_res.eta_opt}$, $\delta={ewa_res.delta}$)")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0.)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUT_DIR / "ewa_weights_plot.png", dpi=300)
print(f"  Plot gespeichert unter: {OUT_DIR / 'ewa_weights_plot.png'}")
plt.close()

# --- 9. Ergebnis-DataFrame erstellen & Dispersion ergänzen ---

df_ens = pd.DataFrame(index=dates)
df_ens["y_true"] = y_true

# Basis-Modelle
for name in BASE_MODELS:
    df_ens[f"base_{name}"] = F[name]

# Ensembles
df_ens["ens_equal"]  = ens_equal
df_ens["ens_trim10"] = ens_trim10
df_ens["ens_median"] = ens_median
df_ens["ens_stack"]  = ens_stack
df_ens["ens_ewa"]    = ens_ewa

# WICHTIG FÜR CPA-TEST: Cross-Sectional Dispersion der Basis-Modelle
# (Standardabweichung der Prognosen zum Zeitpunkt t)
df_ens["dispersion"] = F.std(axis=1)

# --- 10. Evaluation (Metriken) ---

def _print_scores(label: str, y_true: pd.Series, y_pred: pd.Series, mask: pd.Series):
    # Gesamt
    r_all = rmse(y_true.values, y_pred.values)
    a_all = mae(y_true.values, y_pred.values)
    # Post-Calibration (reine Test-Performance)
    r_post = rmse(y_true[mask].values, y_pred[mask].values)
    a_post = mae(y_true[mask].values, y_pred[mask].values)

    print(f"{label:<20} | Total: RMSE={r_all:.4f}, MAE={a_all:.4f} | Post-Cal: RMSE={r_post:.4f}, MAE={a_post:.4f}")

# Maske für den Zeitraum NACH der Kalibrierung
post_mask = df_ens.index > cal_dates[-1]

print("\n" + "="*80)
print(f"EVALUATION RESULTS (Post-Cal ab {dates[len(cal_dates)].date()})")
print("="*80)

print("--- Base Models ---")
for name in BASE_MODELS:
    _print_scores(name, df_ens["y_true"], df_ens[f"base_{name}"], post_mask)

print("\n--- Ensembles ---")
_print_scores("Equal-Weight", df_ens["y_true"], df_ens["ens_equal"], post_mask)
_print_scores("Trimmed (10%)", df_ens["y_true"], df_ens["ens_trim10"], post_mask)
_print_scores("Median", df_ens["y_true"], df_ens["ens_median"], post_mask)
_print_scores("Stacked", df_ens["y_true"], df_ens["ens_stack"], post_mask)
_print_scores("EWA (Online)", df_ens["y_true"], df_ens["ens_ewa"], post_mask)

# --- 11. Speichern ---

# Haupt-CSV für statistische Tests (enthält y, preds, dispersion)
df_ens.to_csv(OUT_DIR / "ensemble_predictions.csv", index_label="date")

# Metadaten
stack_res.weights.to_csv(OUT_DIR / "stacking_weights.csv")
ewa_res.weights_history.to_csv(OUT_DIR / "ewa_weights_history.csv", index_label="date")

print("\n" + "="*80)
print(f"FERTIG. Alle Ergebnisse gespeichert in: {OUT_DIR}")
print("="*80)

PROJECT_ROOT = /Users/jonasschernich/Documents/Masterarbeit/Code
[INFO] Modell-Setup: I
[INFO] Modell-Tags: ['sfm_setup_I', 'lgbm_setup_I']
[INFO] OUTPUTS-Basis: /Users/jonasschernich/Documents/Masterarbeit/Code/outputs
[INFO] Lade Stage-A Level-1 Daten (echte OOS für Stacking-Fit) ...
            y_true  y_pred_sfm_setup_I  y_pred_lgbm_setup_I
1995-03-01 -1.4157              0.4491              -0.1713
1995-04-01  0.5222              0.5412              -0.1713
1995-05-01  0.6494              0.5412              -0.1713
1995-06-01 -1.1613              0.5412              -0.1713
1995-07-01 -0.2611              0.5412              -0.1713
[INFO] Tune Stacking-Gewichte auf Stage-A OOS ...

--- Gefrorene Stacking-Gewichte ---
y_pred_lgbm_setup_I   0.6663
y_pred_sfm_setup_I    0.3337
dtype: float64
Summe: 1.000000
[INFO] Lade Stage-B Level-1 Daten (Anwendung/Evaluierung) ...
            y_true  y_pred_sfm_setup_I  y_pred_lgbm_setup_I
1997-04-01 -0.3876              0.6812              -0.