# TabPFN Rolling-Origin Workflow (with FE Grid, corr-spec variants, and target-only on/off)

This notebook runs Stage A (calibration) and Stage B (final) for TabPFN
over a **feature-engineering grid** in `tuning.py`, across **correlation specs**
(expanding vs EWMA) and **target-only blocks** (TSFresh + Chronos) on/off.

**Prereqs:** Your repo layout with `src/` (config, tuning, io_timesplits, models) is available.
TSFresh/Chronos parquet files exist when `use_target_blocks=True` is selected.

> If `tabpfn` is missing in your env: `pip install tabpfn torch`.


In [1]:
# --- Setup & Imports ---
import os, sys
from pathlib import Path


# Try to locate repo root that contains `src/`
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(5):  # walk up to 5 levels
        if (cur / 'src').exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()


NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ['PROJECT_ROOT'] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.config import GlobalConfig, DEFAULT_CORR_SPEC, outputs_for_model
from src.tuning import run_stageA, run_stageB
from src.io_timesplits import load_target, load_ifo_features
from src.models.tabpfn import ForecastModel

import numpy as np, pandas as pd

# Ensure base output structure exists – nur EIN Modellname:
MODEL_NAME = "tabpfn"
outputs_for_model(MODEL_NAME)
print('PROJECT_ROOT =', PROJECT_ROOT)
print('Imports ok. If `tabpfn` is missing: `pip install tabpfn torch`.')

PROJECT_ROOT = /Users/jonasschernich/Documents/Masterarbeit/Code
Imports ok. If `tabpfn` is missing: `pip install tabpfn torch`.


In [2]:
# --- Load data ---
y = load_target()  # ΔIP with DatetimeIndex
X = load_ifo_features()  # ifo panel

idx = y.index.intersection(X.index)
y, X = y.loc[idx], X.loc[idx]
print('Shapes:', X.shape, y.shape)
display(y.describe())


Shapes: (407, 20) (407,)


count    407.000000
mean       0.042741
std        1.905304
min      -18.219895
25%       -0.822431
50%        0.105485
75%        1.063048
max       10.000000
Name: IP_change, dtype: float64

In [3]:
# --- Corr-spec helper ---
def make_corr_spec(kind: str, window: int = 60, lam: float = 0.98) -> dict:
    spec = dict(DEFAULT_CORR_SPEC)
    spec['mode'] = kind
    if kind == 'expanding':
        spec.pop('window', None)
        spec.pop('lambda', None)
    elif kind == 'ewm':
        spec['window'] = int(window)
        spec['lambda'] = float(lam)
    else:
        raise ValueError("kind must be 'expanding' or 'ewm'")
    return spec

# Corr-Optionen werden Teil des HP-Grids:
corr_options = [
    ("ewm_098", make_corr_spec("ewm", window=40, lam=0.98)),
    ("ewm_098", make_corr_spec("ewm", window=60, lam=0.96)),
    ("expanding", make_corr_spec("expanding")),
]


In [4]:
# --- Base config (splits & policy as in thesis) ---
def base_cfg() -> GlobalConfig:
    cfg = GlobalConfig()
    # Stage A/B rolling-origin splits
    cfg.W0_A     = 180
    cfg.BLOCKS_A = [(181,200), (201,220), (221,240)]
    cfg.W0_B     = 240
    # FE refresh cadence (months)
    cfg.refresh_cadence = 12
    cfg.policy_window   = 24
    cfg.policy_decay    = 0.95
    cfg.policy_gain_min = 0.03
    cfg.policy_cooldown = 3
    # Target-only blocks (TSFresh + Chronos Parquet) per Variante – hier standardmäßig aus:
    cfg.use_target_blocks = True
    return cfg

cfg0 = base_cfg()
cfg0


GlobalConfig(seed=123, refresh_cadence_months=12, corr_spec={'mode': 'expanding', 'window': None, 'lam': None}, lag_candidates=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), top_k_lags_per_feature=1, use_rm3=True, k1_topk=50, screen_threshold=None, redundancy_method='greedy', redundancy_param=0.9, dr_method='none', pca_var_target=0.95, pca_kmax=25, pls_components=2, W0_A=180, BLOCKS_A=[(181, 200), (201, 220), (221, 240)], W0_B=240, policy_window=24, policy_gain_min=0.03, policy_cooldown=3)

In [6]:
# --- FE/DR/Screening grid (corr steckt jetzt im HP) ---
# EINE Konfiguration getestet wird.
target_block_options = [
    None,                       # 1. Nur Ifo-Features (Baseline)
    ["AR1"],                    # 2. Ifo + AR1
    ["Chronos"],                # 3. Ifo + Chronos
    ["TSFresh"],                # 4. Ifo + TSFresh     # 6. Ifo + Chronos + TSFresh
    ["AR1", "TSFresh", "Chronos"] # 7. Alle zusammen
]
# -----------------------------------------------------------------


lag_sets = [
    (1, 2, 3, 4, 6, 12),
]
topk_lags     = [1]
use_rm3_flags = [True]
k1_topk_vals  = [20]
redund_params = [0.99]

dr_options = [
    {"dr_method": "none"},
    {"dr_method": "pca", "pca_var_target": 0.9, "pca_kmax": 50},
    {"dr_method": "pls", "pls_components": 4},
]

def build_model_grid():
    hp_grid = []
    for corr_tag, corr_spec in corr_options:
        for L in lag_sets:
            for k_top in topk_lags:
                for rm3 in use_rm3_flags:
                    for k1 in k1_topk_vals:
                        for red in redund_params:
                            for dr in dr_options:
                                # --- NEUE SCHLEIFE für Target Blocks ---
                                for block_set in target_block_options:
                                    hp = {
                                        'lag_candidates': L,
                                        'top_k_lags_per_feature': k_top,
                                        'use_rm3': rm3,
                                        'k1_topk': k1,
                                        'screen_threshold': None,
                                        'redundancy_method': 'greedy',
                                        'redundancy_param': red,

                                        # TabPFN specifics
                                        'use_gpu': False,          # set True if CUDA is stable
                                        'posterior_samples': 8,

                                        # Corr als HP:
                                        'corr_tag': corr_tag,
                                        'corr_spec': corr_spec,

                                        # --- NEUER HP ---
                                        'target_block_set': block_set,
                                    }
                                    hp.update(dr)
                                    hp_grid.append(hp)
    return hp_grid

model_grid = build_model_grid()
print("HP-Kombinationen:", len(model_grid))


HP-Kombinationen: 45


In [7]:
# --- Stage A/B: EIN Lauf, EIN Ordner ---
shortlist = run_stageA(
    model_name=MODEL_NAME,
    model_ctor=lambda hp: ForecastModel(hp),
    model_grid=model_grid,
    X=X, y=y, cfg=cfg0,
    keep_top_k_final=4,
    min_survivors_per_block=3,
)

run_stageB(
    model_name=MODEL_NAME,
    model_ctor=lambda hp: ForecastModel(hp),
    shortlist=shortlist,
    X=X, y=y, cfg=cfg0,
    # max_months=24,   # optional throttling
)

print("\nDone. Check outputs/stageA|stageB/tabpfn for results.")

[Stage A][Block 1] train_end=180, OOS=181-200 | configs=45
  - Config 1/45
    · Month 5/20 processed | running...RMSE=1.6210
    · Month 10/20 processed | running...RMSE=1.2966
    · Month 15/20 processed | running...RMSE=1.2222
    · Month 20/20 processed | running...RMSE=1.0928
  - Config 2/45
    · Month 5/20 processed | running...RMSE=1.6210
    · Month 10/20 processed | running...RMSE=1.2966
    · Month 15/20 processed | running...RMSE=1.2222
    · Month 20/20 processed | running...RMSE=1.0928
  - Config 3/45
    · Month 5/20 processed | running...RMSE=1.6006
    · Month 10/20 processed | running...RMSE=1.2716
    · Month 15/20 processed | running...RMSE=1.2068
    · Month 20/20 processed | running...RMSE=1.1161
  - Config 4/45
    · Month 5/20 processed | running...RMSE=1.4624
    · Month 10/20 processed | running...RMSE=1.1710
    · Month 15/20 processed | running...RMSE=1.1116
    · Month 20/20 processed | running...RMSE=0.9822
  - Config 5/45
    · Month 5/20 processed | runn

KeyboardInterrupt: 