In [None]:
# ==============================================================================
# TabPFN – Thesis Tuning Pipeline
# ==============================================================================
# Spezifikation gemäß Masterarbeit:
# - Modell: Pre-Trained (kein HP-Tuning). Fokus liegt auf Input-Optimierung.
# - Setup I & II: FE-Grid mit DR-Constraint (TabPFN braucht kompakte Inputs).
#   -> Wenn SIS > 300 Features, ist DR (PCA/PLS) Pflicht.
# - Setup III: Dynamic FI (Strikt Top 20 Features).
# - Stage A Shortlist: Top 10 frozen.
# ==============================================================================

import os, sys
from pathlib import Path
import numpy as np
import pandas as pd
from itertools import product

# --- 1) Pfad-Setup ---
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if (cur / "src").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# --- 2) Imports ---
from src.config import (
    GlobalConfig,
    DEFAULT_CORR_SPEC,    # expanding
    EWMA_CORR_SPEC,       # ewma
    outputs_for_model,
)
from src.tuning import run_stageA, run_stageB
from src.io_timesplits import (
    load_target,
    load_ifo_features,
    load_full_lagged_features,
    load_rolling_importance,
)
# WICHTIG: Modell-Import für TabPFN
from src.models.TabPFN import ForecastModel

# --- 3) Konfiguration ---
USE_DYNAMIC_FI_PIPELINE = True  # False = Standard Setup (I & II)
SEED = 42

# GPU Einstellungen für TabPFN (CUDA oder MPS)
USE_GPU = True
FORCE_DEVICE = None # z.B. "cuda" oder "mps", falls Auto-Detect versagt

if USE_DYNAMIC_FI_PIPELINE:
    MODEL_NAME = "tabpfn_dynamic_fi"
else:
    MODEL_NAME = "tabpfn_with_target"

outputs_for_model(MODEL_NAME)
print(f"--- Starte Tuning für: {MODEL_NAME} ---")

# --- 4) Daten laden ---
y = load_target()
X_ifo = load_ifo_features()

# Align Indizes
idx_common = y.index.intersection(X_ifo.index)
y = y.loc[idx_common]
X_ifo = X_ifo.loc[idx_common]

X_full_lagged = None
rolling_imp = None
y_fi = None

if USE_DYNAMIC_FI_PIPELINE:
    FI_BASE_DIR = PROJECT_ROOT / "outputs" / "feature_importance" / "outputs_no_missing"
    try:
        X_full_lagged = load_full_lagged_features(base_dir=FI_BASE_DIR)
        rolling_imp   = load_rolling_importance(base_dir=FI_BASE_DIR)

        idx_fi = y.index.intersection(X_full_lagged.index).intersection(rolling_imp.index)
        y_fi          = y.loc[idx_fi]
        X_full_lagged = X_full_lagged.loc[idx_fi]
        rolling_imp   = rolling_imp.loc[idx_fi]
        print(f"Dynamic FI Modus: {X_full_lagged.shape[1]} Features geladen.")
    except FileNotFoundError:
        print("FEHLER: Dynamic FI Artefakte nicht gefunden.")
        sys.exit(1)
else:
    print(f"Full FE Modus (Setup I/II): {X_ifo.shape[1]} Basis-Features.")

# --- 5) Config Defaults (Thesis Policy) ---
def get_thesis_cfg() -> GlobalConfig:
    cfg = GlobalConfig(preset="thesis")
    cfg.policy_window = 24
    cfg.policy_decay = 0.97
    cfg.selection_mode = "decayed_best"
    return cfg

cfg_obj = get_thesis_cfg()

# --- 6) Grid Definition ---------------------------------------

def build_grid_full_fe():
    """Setup I (ifo) und Setup II (ifo + TargetBlocks)."""

    # A) FE & DR (Standard Thesis Grid)
    lag_candidates = [tuple(range(7))]

    corr_opts = [
        {"corr_spec": dict(DEFAULT_CORR_SPEC)},
        {"corr_spec": dict(EWMA_CORR_SPEC)},
    ]

    k1_opts = [700]
    red_opts = [0.9, 1.0]

    dr_opts = [
        {"dr_method": "none"},
        {"dr_method": "pca", "pca_kmax": 30, "pca_var_target": 0.99},
        {"dr_method": "pls", "pls_components": 30},
    ]

    # B) Setup II (Target Blocks)
    block_opts = [
        None,                                 # Setup I
        #["AR1", "Chronos", "TSFresh"]         # Setup II b
    ]

    # C) Weights
    # TabPFN unterstützt keine sample_weights (siehe src/models/TabPFN.py).
    # Wir setzen nur None, um redundante Berechnungen zu sparen.
    weight_opts = [
        {"sample_weight_decay": None}
    ]

    grid = []

    # 1. FE Loop
    for lags, corr, k1, red, dr in product(lag_candidates, corr_opts, k1_opts, red_opts, dr_opts):

        # --- TABPFN CONSTRAINT ---
        # TabPFN performed am besten mit kleinen Inputs (<100 Features).
        # Wenn DR="none" und K1 > 300 (also 5k oder 50k), überspringen wir das.
        # Das Modell würde sonst extrem langsam sein oder Context-Limits sprengen.
        if dr["dr_method"] == "none" and k1 > 300:
            continue

        base_fe = {
            "lag_candidates": lags,
            "k1_topk": k1,
            "redundancy_param": red,
            **dr,
            **corr
        }

        # 2. Blocks & Weights & Model Params
        for blocks, weights in product(block_opts, weight_opts):
            hp = {
                **base_fe,
                "target_block_set": blocks,
                **weights,
                # TabPFN spezifisch
                "use_gpu": USE_GPU,
                "device": FORCE_DEVICE,
                "seed": SEED
            }
            grid.append(hp)

    return grid

def build_grid_dynamic_fi():
    """Setup III: Dynamic Feature Importance via strict Top-N."""

    n_features_list = [20]

    # Auch hier: TabPFN ignoriert Weights -> nur None
    weight_opts = [{"sample_weight_decay": None}]

    grid = []
    for n_feat, w in product(n_features_list, weight_opts):
        hp = {
            "n_features_to_use": n_feat,
            **w,
            "use_gpu": USE_GPU,
            "device": FORCE_DEVICE,
            "seed": SEED
        }
        grid.append(hp)
    return grid

# --- 7) Ausführung --------------------------------------------

if USE_DYNAMIC_FI_PIPELINE:
    grid = build_grid_dynamic_fi()
    print(f"Dynamic FI Grid Größe (Setup III): {len(grid)} Konfigurationen.")

    shortlist = run_stageA(
        model_name=MODEL_NAME,
        model_ctor=lambda hp: ForecastModel(hp),
        model_grid=grid,
        X=X_ifo, # Dummy
        y=y_fi,
        cfg=cfg_obj,
        X_full_lagged=X_full_lagged,
        rolling_imp=rolling_imp,
        keep_top_k_final=5,
        min_survivors_per_block=5
    )

    run_stageB(
        model_name=MODEL_NAME,
        model_ctor=lambda hp: ForecastModel(hp),
        shortlist=shortlist,
        X=X_ifo, # Dummy
        y=y_fi,
        cfg=cfg_obj,
        X_full_lagged=X_full_lagged,
        rolling_imp=rolling_imp
    )

else:
    grid = build_grid_full_fe()
    print(f"Full FE Grid Größe (Setup I & II): {len(grid)} Konfigurationen.")

    # Grid ist durch den DR-Constraint moderat.

    shortlist = run_stageA(
        model_name=MODEL_NAME,
        model_ctor=lambda hp: ForecastModel(hp),
        model_grid=grid,
        X=X_ifo,
        y=y,
        cfg=cfg_obj,
        keep_top_k_final=5,
        min_survivors_per_block=5
    )

    run_stageB(
        model_name=MODEL_NAME,
        model_ctor=lambda hp: ForecastModel(hp),
        shortlist=shortlist,
        X=X_ifo,
        y=y,
        cfg=cfg_obj
    )

print("\nTuning abgeschlossen.")