In [10]:
# # ==============================================================================
# Masterarbeit Ensemble Tuning & Evaluation (Full Timeline 2011-2024)
# ==============================================================================

import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --- Projekt-Root finden ---
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(5):
        if (cur / "src").exists(): return cur
        if cur.parent == cur: break
        cur = cur.parent
    return start.resolve()

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT))

# Imports
from src.config import GlobalConfig
from src.evaluation import rmse, mae
from src.models.ensemble import (
    load_level0_pool,
    load_calibration_pool,
    equal_weight_ensemble,
    trimmed_mean_ensemble,
    median_ensemble,
    fit_stacking_ensemble,
    fit_ewa_ensemble,
)

print(f"Projekt-Root: {PROJECT_ROOT}")

# --- KONFIGURATION ---
# Die exakten Ordnernamen unter outputs/stageB/
BASE_MODELS = [
    "elastic_net_without_target_700",
    "lightgbm_without_target_mac",
    "svr_without_target",
    "tabpfn_without_target",
]

STACKING_LAMBDAS = [1e-2, 0.1, 1.0]
EWA_ETAS = [0.1, 0.3, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3]
EWA_DELTA = 0.95

OUT_DIR = PROJECT_ROOT / "outputs" / "ensembles"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# --- DATEN LADEN (Stage A + Stage B) ---

print("\n1. Lade Stage B (Test Set)...")
try:
    pool_test = load_level0_pool(BASE_MODELS)
    F_test = pool_test.F
    y_test = pool_test.y_true
    dates_test = pool_test.dates
    print(f"   Test Period: {dates_test[0].date()} bis {dates_test[-1].date()} (n={len(dates_test)})")
except Exception as e:
    print(f"   FEHLER: Konnte Stage B nicht laden: {e}")
    raise

print("\n2. Lade Stage A (Calibration Set, Block 3)...")
try:
    pool_cal = load_calibration_pool(BASE_MODELS)
    F_cal = pool_cal.F
    y_cal = pool_cal.y_true
    dates_cal = pool_cal.dates
    print(f"   Cal Period:  {dates_cal[0].date()} bis {dates_cal[-1].date()} (n={len(dates_cal)})")

    # Merge für Training (Historie A + B)
    F_full = pd.concat([F_cal, F_test])
    y_full = pd.concat([y_cal, y_test])
    # Sicherstellen, dass Index sortiert ist
    F_full.sort_index(inplace=True)
    y_full.sort_index(inplace=True)

    cal_dates_list = list(dates_cal) # Wir trainieren NUR auf A

except Exception as e:
    print(f"   WARNUNG: Konnte Stage A nicht laden ({e}).")
    print("   Fallback: Nutze die ersten 24 Monate von Stage B als Kalibrierung (Verlust von Testdaten!).")
    split_idx = 24
    F_full = F_test
    y_full = y_test
    cal_dates_list = list(dates_test[:split_idx])
    # Für die Evaluation später schneiden wir dann ab

# --- ENSEMBLE BERECHNUNG ---

print("\n--- Benchmark Ensembles (Equal/Trim/Median) ---")
# Diese brauchen kein Training, können direkt auf Test berechnet werden
ens_equal = equal_weight_ensemble(F_test)
ens_trim10 = trimmed_mean_ensemble(F_test, alpha=0.10)
ens_median = median_ensemble(F_test)

print("\n--- Stacking (Learned on Stage A) ---")
stack_res = fit_stacking_ensemble(
    y=y_full,
    F=F_full,
    cal_dates=cal_dates_list,
    lambdas=STACKING_LAMBDAS
)
# Extrahiere Vorhersagen nur für Stage B
ens_stack = stack_res.y_pred.loc[dates_test]

print(f"  Best Lambda: {stack_res.lambda_opt}")
print("  Weights (from Stage A):")
print(stack_res.weights)

print("\n--- EWA (Online Learning A -> B) ---")
# EWA läuft über die ganze Historie (A+B), damit es bei B schon "schlau" ist
ewa_res = fit_ewa_ensemble(
    y=y_full,
    F=F_full,
    cal_dates=cal_dates_list, # Tuning von Eta auf A
    etas=EWA_ETAS,
    delta=EWA_DELTA
)
ens_ewa = ewa_res.y_pred.loc[dates_test]

print(f"  Best Eta: {ewa_res.eta_opt}")

# --- EVALUATION ---

df_ens = pd.DataFrame(index=dates_test)
df_ens["y_true"] = y_test
for m in BASE_MODELS:
    df_ens[f"base_{m}"] = F_test[m]

df_ens["ens_equal"] = ens_equal
df_ens["ens_stack"] = ens_stack
df_ens["ens_ewa"] = ens_ewa
df_ens["dispersion"] = F_test.std(axis=1)

print("\n" + "="*60)
print(f"RESULTS (Full Stage B: {dates_test[0].date()} - {dates_test[-1].date()})")
print("="*60)

def score(y, y_hat, label):
    r = rmse(y, y_hat)
    print(f"{label:<25} | RMSE: {r:.4f}")

print("--- Base Models ---")
for m in BASE_MODELS:
    score(y_test, F_test[m], m)

print("\n--- Ensembles ---")
score(y_test, ens_equal, "Equal-Weight")
score(y_test, ens_stack, "Stacked (Static)")
score(y_test, ens_ewa, "EWA (Online)")

# Speichern
df_ens.to_csv(OUT_DIR / "ensemble_predictions.csv")
print(f"\nSaved to {OUT_DIR}")
# ==============================================================================
# 12. Export als "Fake" Stage B Modelle für einfache Vergleichbarkeit
# ==============================================================================
# Ziel: Erstelle Ordnerstruktur wie bei normalen Modellen:
# outputs/stageB/ensemble_stacking/monthly/preds.csv
# outputs/stageB/ensemble_ewa/monthly/preds.csv
# ...

def export_ensemble_to_stageB(
    df_results: pd.DataFrame,
    col_name: str,
    model_export_name: str,
    base_dir: Path
):
    """
    Speichert eine Ensemble-Spalte im Format der Stage B Predictions.
    """
    # Zielverzeichnis erstellen
    model_dir = base_dir / "stageB" / model_export_name
    monthly_dir = model_dir / "monthly"
    summary_dir = model_dir / "summary"

    monthly_dir.mkdir(parents=True, exist_ok=True)
    summary_dir.mkdir(parents=True, exist_ok=True)

    # 1. preds.csv erstellen
    # Das Format muss exakt den anderen Modellen entsprechen
    df_out = pd.DataFrame()
    df_out["date_t_plus_1"] = df_results.index
    # Wir brauchen eine Dummy 't' Spalte (Origin), nehmen wir einfach Monat davor
    df_out["t"] = df_results.index - pd.DateOffset(months=1)
    df_out["y_true"] = df_results["y_true"].values
    df_out["y_pred"] = df_results[col_name].values

    # Metadaten faken, damit die Analysis-Skripte nicht abstürzen
    df_out["config_id"] = "ensemble_v1" # Dummy Config ID
    df_out["is_active"] = True          # Ensembles sind immer "aktiv"

    # Speichern
    out_path = monthly_dir / "preds.csv"
    df_out.to_csv(out_path, index=False)
    print(f"  Exportiert: {out_path}")

    # 2. summary.csv erstellen (wird oft zum Laden der Configs gesucht)
    # Berechne RMSE für dieses Ensemble
    mse = ((df_out["y_true"] - df_out["y_pred"])**2).mean()
    rmse_val = np.sqrt(mse)

    df_sum = pd.DataFrame([{
        "config_id": "ensemble_v1",
        "model": model_export_name,
        "rmse_overall": rmse_val, # Gesamt-RMSE
        "setup_name": "Ensemble"
    }])
    df_sum.to_csv(summary_dir / "summary.csv", index=False)

print("\n--- Exportiere Ensembles in Stage B Struktur ---")

# Mapping: Spaltenname im DataFrame -> Name des Ordners in outputs/stageB
ensembles_to_export = {
    "ens_equal":  "ensemble_equal_weight",
    "ens_trim10": "ensemble_trimmed",
    "ens_median": "ensemble_median",
    "ens_stack":  "ensemble_stacking",
    "ens_ewa":    "ensemble_ewa"
}

OUTPUTS_ROOT = PROJECT_ROOT / "outputs"

for col, folder_name in ensembles_to_export.items():
    if col in df_ens.columns:
        export_ensemble_to_stageB(df_ens, col, folder_name, OUTPUTS_ROOT)

print("\nFertig! Die Ensembles sind jetzt bereit für die Vergleichs-Notebooks.")

Projekt-Root: /Users/jonasschernich/Documents/Masterarbeit/Code

1. Lade Stage B (Test Set)...
   Test Period: 2011-03-01 bis 2024-12-01 (n=166)

2. Lade Stage A (Calibration Set, Block 3)...
   Cal Period:  2009-07-01 bis 2011-02-01 (n=20)

--- Benchmark Ensembles (Equal/Trim/Median) ---

--- Stacking (Learned on Stage A) ---
  Best Lambda: 0.01
  Weights (from Stage A):
elastic_net_without_target_700    4.858722e-01
lightgbm_without_target_mac       1.721713e-16
svr_without_target                2.711475e-01
tabpfn_without_target             2.429803e-01
Name: weights, dtype: float64

--- EWA (Online Learning A -> B) ---
  Best Eta: 0.2

RESULTS (Full Stage B: 2011-03-01 - 2024-12-01)
--- Base Models ---
elastic_net_without_target_700 | RMSE: 2.3126
lightgbm_without_target_mac | RMSE: 2.3156
svr_without_target        | RMSE: 2.3072
tabpfn_without_target     | RMSE: 2.1976

--- Ensembles ---
Equal-Weight              | RMSE: 2.2398
Stacked (Static)          | RMSE: 2.2363
EWA (Online)