In [1]:
# ============================
# TabPFN – Minimaler Debug-Workflow
# ============================
# Ziel:
# - Gleiche Logik wie ET/EN/LGBM-Notebooks (run_stageA / run_stageB)
# - Aber: preset="debug" + extrem kleines Grid
# - Kein Dynamic-FI, nur Full-FE (Gleis 1/2)
# ============================

import os, sys
from pathlib import Path
import numpy as np
import pandas as pd
from itertools import product

# --- 1) Pfad-Setup (analog ET.ipynb) --------------------------------------
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if (cur / "src").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("PROJECT_ROOT =", PROJECT_ROOT)

# --- 2) Repo-Imports -------------------------------------------------------
from src.config import (
    GlobalConfig,
    DEFAULT_CORR_SPEC,
    EWMA_CORR_SPEC,
    outputs_for_model,
)
from src.tuning import run_stageA, run_stageB
from src.io_timesplits import (
    load_target,
    load_ifo_features,
    # Für Debug: wir nutzen hier nur Full-FE, also KEIN Dynamic-FI
    # load_full_lagged_features, load_rolling_importance
)
from src.models.TabPFN import ForecastModel   # dein TabPFN-Wrapper

# --- 3) Debug-Meta ---------------------------------------------------------
USE_DYNAMIC_FI_PIPELINE = False   # Für diesen Debug-Workflow: NUR Full-FE
SEED   = 42

if USE_DYNAMIC_FI_PIPELINE:
    MODEL_NAME = "tabpfn_dynamic_fi_debug"
else:
    MODEL_NAME = "tabpfn_debug"

outputs_for_model(MODEL_NAME)
print(f"Modell {MODEL_NAME} (Debug) wird getunt.")

# --- 4) Daten laden (Full-FE / Gleis 1+2) ---------------------------------
y = load_target()
X_ifo = load_ifo_features()

# Nur Full-FE: Indizes matchen, Dynamic-FI-Objekte bleiben None
idx = y.index.intersection(X_ifo.index)
y = y.loc[idx]
X_ifo = X_ifo.loc[idx]
X_full_lagged = None
rolling_imp   = None

print("Full-FE Debug-Daten geladen. Shapes:", X_ifo.shape, y.shape)

# --- 5) Debug-Config (kleiner als Thesis) ---------------------------------
def base_cfg_debug() -> GlobalConfig:
    """
    GlobalConfig im 'debug'-Preset (muss in src.config definiert sein).
    Falls du dort kein 'debug'-Preset hast, kannst du auch
    GlobalConfig(preset="thesis") nehmen und einzelne Dinge manuell kleiner machen.
    """
    cfg = GlobalConfig(preset="debug")
    # Du KANNST hier noch weiter schärfen, z.B. kleinere policy_window etc.,
    # aber typischerweise ist im debug-Preset schon alles reduziert.
    cfg.policy_window   = 12     # optional: kleiner als 24
    cfg.policy_decay    = 0.95
    cfg.policy_gain_min = 0.03
    cfg.policy_cooldown = 3
    return cfg

cfg0 = base_cfg_debug()

# --- 6) Korrelations-Helper (wie ET/EN/LGBM) ------------------------------
def make_corr_spec(kind: str) -> dict:
    if kind == "expanding":
        return dict(DEFAULT_CORR_SPEC)
    elif kind == "ewm":
        return dict(EWMA_CORR_SPEC)
    else:
        raise ValueError("kind must be 'expanding' or 'ewm'")

# --- 7) Minimal-Grid (Full FE, TabPFN-HP klein gehalten) ------------------
if USE_DYNAMIC_FI_PIPELINE:
    # Für diesen Debug-Workflow lassen wir Dynamic-FI ABSICHTLICH weg.
    # Wenn du es testen willst, kannst du später analog EN/LGBM ein kleines
    # Dynamic-FI Grid bauen.
    raise RuntimeError("Debug-Notebook ist nur für Full-FE (USE_DYNAMIC_FI_PIPELINE=False) gedacht.")
else:
    print("Erstelle MINIMALES HP-Grid für 'Full FE' (Debug)...")

    # A) FE/DR-Listen – JEWEILS NUR EINE OPTION
    corr_options = [
        ("expanding", make_corr_spec("expanding")),  # nur expanding
        # Wenn du ewm testen willst: ("ewm", make_corr_spec("ewm")),
    ]
    lag_candidates_list   = [(1, 2, 3, 6, 12)]   # kannst du auch auf (1, 3) reduzieren
    top_k_lags_list       = [1]
    use_rm3_list          = [True]               # oder [False] – egal für Debug
    k1_topk_list          = [100]                # nur eine Variante
    redundancy_param_list = [0.90]
    dr_options_list       = [
        {"dr_method": "none"},                   # kein PCA/PLS in Debug
    ]

    # B) TabPFN-spezifische HPs – MINIMAL
    #    (wir nehmen hier BEWUSST nur EINE Kombination, um Rechenzeit zu sparen)
    n_estimators_list = [1]                      # nur 1 Ensemble-Konfiguration
    # ggf. weitere TabPFN-HPs hier, falls du sie im Wrapper nutzt:
    # z.B. max_time_list = [None] o.ä.

    # C) Keine Target Blocks / einfaches Weighting für Debug
    target_block_options = [None]
    weighting_options    = [{"sample_weight_decay": None}]

    def build_model_grid_full_fe_debug():
        hp_grid = []

        fe_lists = [
            lag_candidates_list,
            top_k_lags_list,
            use_rm3_list,
            k1_topk_list,
            redundancy_param_list,
            dr_options_list,
        ]

        for (corr_tag, corr_spec) in corr_options:
            for (lags, k_lags, rm3, k1, red, dr_opt) in product(*fe_lists):
                # keine speziellen Einschränkungen mehr nötig, da alles = 'none'
                base_fe = {
                    "lag_candidates": lags,
                    "top_k_lags_per_feature": k_lags,
                    "use_rm3": rm3,
                    "k1_topk": k1,
                    "redundancy_param": red,
                    **dr_opt,            # 'dr_method': 'none'
                    "corr_tag": corr_tag,
                    "corr_spec": corr_spec,
                }

                for n_est in n_estimators_list:
                    base_model = {
                        "n_estimators": n_est,
                        "seed": SEED,
                        # falls dein TabPFN-Wrapper weitere keys braucht,
                        # kannst du sie hier hinzufügen, z.B.:
                        # "use_gpu": False,
                    }

                    for block_set in target_block_options:
                        for w in weighting_options:
                            hp = {
                                **base_fe,
                                **base_model,
                                "target_block_set": block_set,
                                **w,
                            }
                            hp_grid.append(hp)

        return hp_grid

    model_grid = build_model_grid_full_fe_debug()

print("Anzahl HP-Kombinationen (Debug):", len(model_grid))
print("Erstes HP-Set (Debug):", model_grid[0] if model_grid else "Grid ist leer")

# --- 8) Stage A/B – identische Logik wie bei ET/EN/LGBM -------------------
if model_grid:
    shortlist = run_stageA(
        model_name=MODEL_NAME,
        model_ctor=lambda hp: ForecastModel(hp),
        model_grid=model_grid,
        X=X_ifo,
        y=y,
        cfg=cfg0,
        # winzig halten:
        keep_top_k_final=min(2, len(model_grid)),
        min_survivors_per_block=1,
    )

    run_stageB(
        model_name=MODEL_NAME,
        model_ctor=lambda hp: ForecastModel(hp),
        shortlist=shortlist,
        X=X_ifo,
        y=y,
        cfg=cfg0,
    )
else:
    print("Keine gültigen HP-Kombinationen gefunden, Stages übersprungen.")

print(f"\nDebug-Run fertig. Check outputs/stageA|stageB/{MODEL_NAME} für Ergebnisse.")


PROJECT_ROOT = /Users/jonasschernich/Documents/Masterarbeit/Code
Modell tabpfn_debug (Debug) wird getunt.
INFO in load_ifo_features: Renaming columns to ensure validity.
Full-FE Debug-Daten geladen. Shapes: (407, 2160) (407,)
Erstelle MINIMALES HP-Grid für 'Full FE' (Debug)...
Anzahl HP-Kombinationen (Debug): 1
Erstes HP-Set (Debug): {'lag_candidates': (1, 2, 3, 6, 12), 'top_k_lags_per_feature': 1, 'use_rm3': True, 'k1_topk': 100, 'redundancy_param': 0.9, 'dr_method': 'none', 'corr_tag': 'expanding', 'corr_spec': {'mode': 'expanding', 'window': None, 'lam': None}, 'n_estimators': 1, 'seed': 42, 'target_block_set': None, 'sample_weight_decay': None}
[Stage A] Using FULL FE pipeline (Gleis 1 & 2).
[Stage A][Block 1] train_end=48, OOS=49-60 | configs=1
  - Config 1/1
    · Month 5/12 processed | running...RMSE=0.8429
    · Month 10/12 processed | running...RMSE=1.1182
    · Month 12/12 processed | running...RMSE=1.1768
[Stage A][Block 1] kept 1 configs (floor=1).
[Stage A][Block 2] train_

KeyboardInterrupt: 