# 02 — Ejecutar E0 (ablación FS0/FS1/FS2)

Este notebook ejecuta el experimento **E0** para comparar el impacto de diferentes conjuntos de features (FS0/FS1/FS2) sobre modelos deep globales (LSTM / Transformer).

## Split
Usamos un split temporal global:
- `val_weeks = 8`
- `test_weeks = 39` (coherente con la evaluación final del proyecto)

Estrategia de evaluación:
1) Entrenar con **train** y evaluar en **val**.
2) Reentrenar con **train+val** y evaluar en **test**.

## Anti-leakage
- Features causales (lags/rollings) vienen de `src.common.make_features`.
- En modelos deep, el escalado se ajusta con el dataframe de entrenamiento del paso correspondiente.

**Requisito**: ejecuta antes el notebook 01 para generar `outputs/E0_ablation/feature_sets.json`.

In [77]:
from __future__ import annotations

import json
import time
import sys
from pathlib import Path

import pandas as pd
import torch

# Ensure PROJECT_ROOT is on sys.path so `import src.*` works reliably
NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = NOTEBOOK_DIR
if (PROJECT_ROOT / "src").exists() is False and (PROJECT_ROOT.parent / "src").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent
sys.path.insert(0, str(PROJECT_ROOT))

from src.e0_ablation_utils import (
    collect_versions,
    get_project_paths,
    get_torch_device,
    set_global_seed,
)

paths = get_project_paths(project_root=PROJECT_ROOT, output_dir="outputs/E0_ablation")
DATA_PATH = paths.data_path
OUTPUT_DIR = paths.output_dir

SEED = 42
DEBUG = False  # set True solo para smoke-test rápido

seed_info = set_global_seed(SEED, deterministic=False)
# Forzar CPU explícitamente
raw_device, device_details = get_torch_device(prefer_cuda=False)
device = "cpu"
if isinstance(device_details, dict):
    device_details["requested"] = "cpu"
    device_details["torch_cuda_available"] = bool(torch.cuda.is_available())

print("PROJECT_ROOT:", PROJECT_ROOT)
print("DATA_PATH:", DATA_PATH)
print("OUTPUT_DIR:", OUTPUT_DIR)
print("DEVICE_SELECTED:", device)
print("torch.cuda.is_available:", torch.cuda.is_available())
print("seed:", seed_info)

PROJECT_ROOT: c:\Users\usuario\Documents\Master AI\TFM\MEMORIA 2.0
DATA_PATH: C:\Users\usuario\Documents\Master AI\TFM\MEMORIA 2.0\data\Walmart_Sales.csv
OUTPUT_DIR: C:\Users\usuario\Documents\Master AI\TFM\MEMORIA 2.0\outputs\E0_ablation
DEVICE_SELECTED: cpu
torch.cuda.is_available: False
seed: {'seed': 42, 'deterministic': False, 'numpy': 'ok', 'torch': 'ok'}


In [78]:
# Limpiar únicamente los outputs de E0
import shutil
OUT = OUTPUT_DIR
if OUT.exists():
    shutil.rmtree(OUT)
OUT.mkdir(parents=True, exist_ok=True)
print("Cleaned:", OUT)

Cleaned: C:\Users\usuario\Documents\Master AI\TFM\MEMORIA 2.0\outputs\E0_ablation


In [79]:
# Cargar o reconstruir feature sets (FS0/FS1/FS2)
feature_sets_path = OUTPUT_DIR / "feature_sets.json"
if not feature_sets_path.exists():
    # Construcción por defecto alineada con LAGS/ROLLINGS y EXOG_COLUMNS
    lag_cols = [f"lag_{k}" for k in LAGS]
    roll_cols = [c for w in ROLLINGS for c in (f"roll_mean_{w}", f"roll_std_{w}")]
    calendar_cols = ["weekofyear", "month", "year"]

    feature_sets = {
        "FS0": {
            "add_calendar": False,
            "exog_cols": [],
            "feature_cols": lag_cols + roll_cols,
        },
        "FS1": {
            "add_calendar": True,
            "exog_cols": [],
            "feature_cols": lag_cols + roll_cols + calendar_cols,
        },
        "FS2": {
            "add_calendar": True,
            "exog_cols": list(EXOG_COLUMNS),
            "feature_cols": lag_cols + roll_cols + list(EXOG_COLUMNS) + calendar_cols,
        },
    }
    feature_sets_path.write_text(json.dumps(feature_sets, indent=2, ensure_ascii=False), encoding="utf-8")
    print("Created feature_sets.json at", feature_sets_path)
else:
    feature_sets = json.loads(feature_sets_path.read_text(encoding="utf-8"))
    print("Loaded feature sets:", list(feature_sets.keys()))

# quick peek
for k in ["FS0", "FS1", "FS2"]:
    print(k, "n_features=", len(feature_sets[k]["feature_cols"]))

Created feature_sets.json at C:\Users\usuario\Documents\Master AI\TFM\MEMORIA 2.0\outputs\E0_ablation\feature_sets.json
FS0 n_features= 11
FS1 n_features= 14
FS2 n_features= 19


## Configuración del experimento
Ajusta hiperparámetros aquí. En modo `DEBUG=True`, se reducen epochs y/o se filtran tiendas para acelerar.

In [None]:
import importlib
import src.common as common_mod
common_mod = importlib.reload(common_mod)
from src.common import EXOG_COLUMNS, TEST_WEEKS as TEST_WEEKS_DEFAULT, get_E2_test_mask, load_data, make_features, temporal_split

VAL_WEEKS = 8
TEST_WEEKS = TEST_WEEKS_DEFAULT

MODEL_SPECS = [
    # (name, constructor)
    ("lstm_exog", "LSTMForecaster"),
    ("transformer_exog", "TransformerForecaster"),
]

# Carga de datos base
df_raw = load_data(DATA_PATH)
print("df_raw shape:", df_raw.shape)
print("date range:", df_raw["Date"].min(), "→", df_raw["Date"].max())

df_raw shape: (6435, 8)
date range: 2010-02-05 00:00:00 → 2012-10-26 00:00:00


In [None]:
# Feature config (oficial, alineado con 10_Run_All_Experiments)
LAGS = [1, 2, 4, 8, 52]
ROLLINGS = [4, 8, 12]

BASE_CFG = {
    "lags": list(LAGS),
    "rollings": list(ROLLINGS),
}

print("LAGS:", LAGS)
print("ROLLINGS:", ROLLINGS)

LAGS: [1, 2, 4, 8, 52]
ROLLINGS: [4, 8, 12]


In [82]:
# Fast/Full mode (CPU-friendly)
FAST_MODE = False  # Cambia a True para smoke-test

if FAST_MODE:
    TRAINING_CFG = dict(
        lookback=52,
        epochs=3,
        batch_size=64,
        lr=1e-3,
        suppress_lookback_warning=False,
        early_stopping=True,
        patience=1,
        device=device,
    )
    MAX_STORES = 5
    RUN_VALIDATION = "strategic"
    K_FOLDS = 1
else:
    TRAINING_CFG = dict(
        lookback=52,
        epochs=20,
        batch_size=64,
        lr=1e-3,
        suppress_lookback_warning=False,
        early_stopping=True,
        patience=3,
        device=device,
    )
    MAX_STORES = None  # usar todas las tiendas
    RUN_VALIDATION = "strategic"  # preferir strategic en CPU para evitar K folds
    K_FOLDS = 1

# Aplicar subconjunto opcional de tiendas para smoke-test CPU
df = df_raw.copy()
if MAX_STORES is not None:
    keep_stores = sorted(df["Store"].unique())[:MAX_STORES]
    df = df[df["Store"].isin(keep_stores)].copy()
    print(f"FAST_MODE={FAST_MODE} → usando {len(keep_stores)} tiendas: {keep_stores}")
else:
    print("FAST_MODE=False → usando todas las tiendas")

print("TRAINING_CFG:", TRAINING_CFG)
print("RUN_VALIDATION:", RUN_VALIDATION, "K_FOLDS:", K_FOLDS)

FAST_MODE=False → usando todas las tiendas
RUN_VALIDATION: strategic K_FOLDS: 1


In [83]:
# Split temporal global (train/val/test)
train_raw, val_raw, test_raw, split_cfg = temporal_split(df, val_weeks=VAL_WEEKS, test_weeks=TEST_WEEKS)
print(split_cfg.as_dict())

# sanity: no overlap
assert train_raw['Date'].max() < val_raw['Date'].min()
assert val_raw['Date'].max() < test_raw['Date'].min()
print('train/val/test shapes:', train_raw.shape, val_raw.shape, test_raw.shape)

{'train_start': '2010-02-05', 'train_end': '2011-12-02', 'val_start': '2011-12-09', 'val_end': '2012-01-27', 'test_start': '2012-02-03', 'test_end': '2012-10-26'}
train/val/test shapes: (4320, 8) (360, 8) (1755, 8)


In [84]:
# Build features once con la config oficial (LAGS/ROLLINGS) para compartir máscara
lag_cols_expected = [f"lag_{k}" for k in LAGS]
roll_cols_expected = [f"roll_mean_{w}" for w in ROLLINGS] + [f"roll_std_{w}" for w in ROLLINGS]

# Nota: make_features devuelve (df_feat_full, feature_cols)
df_feat_full, feature_cols_full = make_features(
    df,
    lags=LAGS,
    rollings=ROLLINGS,
    add_calendar=True,
)

lag_cols = [c for c in df_feat_full.columns if c.startswith("lag_")]
roll_cols = [c for c in df_feat_full.columns if c.startswith("roll_")]
print("LAGS:", LAGS)
print("ROLLINGS:", ROLLINGS)
print("n_lag_cols:", len(lag_cols), "n_roll_cols:", len(roll_cols))
print("sample lag/roll cols:", (lag_cols + roll_cols)[:10])

# Common evaluation mask: E2 test window ∩ rows without NaNs para FS2
fs2_cols = feature_sets["FS2"]["feature_cols"]
e2_mask, e2_split = get_E2_test_mask(df_feat_full, test_weeks=TEST_WEEKS)
mask_no_nan_fs2 = df_feat_full[fs2_cols + ["Weekly_Sales"]].notna().all(axis=1)
eval_mask_common = e2_mask & mask_no_nan_fs2

print("E2 test window:", e2_split["test_start"], "→", e2_split["test_end"])
print("Stores in mask:", df_feat_full.loc[e2_mask, "Store"].nunique())
print("Points in eval_mask_common:", int(eval_mask_common.sum()))

LAGS: [1, 2, 4, 8, 52]
ROLLINGS: [4, 8, 12]
n_lag_cols: 5 n_roll_cols: 6
sample lag/roll cols: ['lag_1', 'lag_2', 'lag_4', 'lag_8', 'lag_52', 'roll_mean_4', 'roll_std_4', 'roll_mean_8', 'roll_std_8', 'roll_mean_12']
E2 test window: 2012-02-03 → 2012-10-26
Stores in mask: 45
Points in eval_mask_common: 1755


## Runner
Ejecuta una combinación (modelo, feature set), guarda predicciones y métricas en `outputs/E0_ablation/<model>__<FS>/...`

In [85]:
import importlib
import src.common as common
common = importlib.reload(common)
import src.models.lstm_forecaster as lstm_mod
import src.models.transformer_forecaster as transf_mod
lstm_mod = importlib.reload(lstm_mod)
transf_mod = importlib.reload(transf_mod)
from src.common import evaluate_predictions
from src.models.lstm_forecaster import LSTMForecaster
from src.models.transformer_forecaster import TransformerForecaster

MODEL_CTORS = {
    "LSTMForecaster": LSTMForecaster,
    "TransformerForecaster": TransformerForecaster,
}


def run_one(model_label: str, model_ctor_name: str, fs_name: str) -> dict:
    t_run_start = time.time()
    fs = feature_sets[fs_name]

    # Reuse the precomputed feature frame; only keep columns needed by the FS
    df_feat = df_feat_full.copy()
    used_cols = list(fs["feature_cols"])

    # Sanity: required columns exist
    missing = [c for c in used_cols if c not in df_feat.columns]
    if missing:
        raise ValueError(f"Missing engineered columns for {fs_name}: {missing[:10]}")

    train_df = df_feat[df_feat["Date"].isin(train_raw["Date"].unique())].copy()
    val_df = df_feat[df_feat["Date"].isin(val_raw["Date"].unique())].copy()
    test_df = df_feat[eval_mask_common].copy()

    # Config passed to model
    cfg = {
        **BASE_CFG,
        **TRAINING_CFG,
        "add_calendar": bool(fs["add_calendar"]),
        "exog_cols": list(fs["exog_cols"]),
        "feature_cols": used_cols,
        "device": device,
        "run_validation": RUN_VALIDATION,
        "k_folds": K_FOLDS,
        "max_stores": MAX_STORES,
    }

    run_dir = OUTPUT_DIR / f"{model_label}__{fs_name}"
    (run_dir / "predictions").mkdir(parents=True, exist_ok=True)
    (run_dir / "metrics").mkdir(parents=True, exist_ok=True)
    (run_dir / "figures").mkdir(parents=True, exist_ok=True)

    print(f"[{model_label}] device={cfg['device']}")
    print(f"[{model_label}] torch.cuda.is_available={torch.cuda.is_available()}")

    # 1) Train -> predict val
    t0 = time.time()
    model = MODEL_CTORS[model_ctor_name]()
    model.fit(train_df, cfg)
    pred_val = model.predict(train_df, val_df, cfg)
    val_sec = time.time() - t0

    pred_val = pred_val.merge(
        val_df[["Store", "Date", "Weekly_Sales"]].rename(columns={"Weekly_Sales": "y_true"}),
        on=["Store", "Date"],
        how="left",
    )
    pred_val["model"] = model_label
    pred_val["feature_set"] = fs_name

    report_val = run_dir / "metrics" / f"debug_report_{model_label}_{fs_name}_VAL_{int(time.time())}.json"
    mglob_val, mstore_val, _ = evaluate_predictions(
        pred_val[["Store", "Date", "y_true", "y_pred"]],
        group_keys=["Store"],
        model_name=model_label,
        feature_set=fs_name,
        group_label="VAL",
        report_path=report_val,
    )

    pred_val.to_csv(run_dir / "predictions" / "val_predictions.csv", index=False)
    mglob_val.to_csv(run_dir / "metrics" / "val_metrics_global.csv", index=False)
    mstore_val.to_csv(run_dir / "metrics" / "val_metrics_by_store.csv", index=False)

    # 2) Train+Val -> predict test (using the common eval mask)
    t1 = time.time()
    model2 = MODEL_CTORS[model_ctor_name]()
    trainval_df = pd.concat([train_df, val_df], ignore_index=True)
    model2.fit(trainval_df, cfg)
    pred_test = model2.predict(trainval_df, test_df, cfg)
    test_sec = time.time() - t1

    pred_test = pred_test.merge(
        test_df[["Store", "Date", "Weekly_Sales"]].rename(columns={"Weekly_Sales": "y_true"}),
        on=["Store", "Date"],
        how="left",
    )
    pred_test["model"] = model_label
    pred_test["feature_set"] = fs_name

    report_test = run_dir / "metrics" / f"debug_report_{model_label}_{fs_name}_TEST_{int(time.time())}.json"
    mglob_test, mstore_test, _ = evaluate_predictions(
        pred_test[["Store", "Date", "y_true", "y_pred"]],
        group_keys=["Store"],
        model_name=model_label,
        feature_set=fs_name,
        group_label="TEST",
        report_path=report_test,
    )

    pred_test.to_csv(run_dir / "predictions" / "test_predictions.csv", index=False)
    mglob_test.to_csv(run_dir / "metrics" / "test_metrics_global.csv", index=False)
    mstore_test.to_csv(run_dir / "metrics" / "test_metrics_by_store.csv", index=False)

    # Save run metadata
    mae_global_micro = float(mglob_test.iloc[0]["MAE"])
    mae_store_macro = float(mstore_test["MAE"].mean()) if not mstore_test.empty else float("nan")

    runtime_sec = float(time.time() - t_run_start)

    meta = {
        "seed": SEED,
        "debug": DEBUG,
        "model": model_label,
        "feature_set": fs_name,
        "split": split_cfg.as_dict(),
        "config": cfg,
        "device": device,
        "device_details": device_details,
        "versions": collect_versions(),
        "timing_sec": {
            "val_fit_predict": float(val_sec),
            "test_fit_predict": float(test_sec),
            "runtime_sec": runtime_sec,
        },
        "eval_mask_common_n": int(eval_mask_common.sum()),
    }
    (run_dir / "run_metadata.json").write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8")

    return {
        "model": model_label,
        "feature_set": fs_name,
        "val_fit_predict_sec": float(val_sec),
        "test_fit_predict_sec": float(test_sec),
        "runtime_sec": runtime_sec,
        "mae_global_micro": mae_global_micro,
        "mae_store_macro": mae_store_macro,
        "MAE": mae_global_micro,
        "RMSE": float(mglob_test.iloc[0]["RMSE"]),
        "sMAPE": float(mglob_test.iloc[0]["sMAPE"]),
    }

In [86]:
# Run grid
results = []

print(f"FAST_MODE={FAST_MODE} | RUN_VALIDATION={RUN_VALIDATION} | device={device}")
for model_label, ctor_name in MODEL_SPECS:
    for fs_name in ["FS0", "FS1", "FS2"]:
        print("===", model_label, fs_name, "===")
        row = run_one(model_label=model_label, model_ctor_name=ctor_name, fs_name=fs_name)
        results.append(row)

summary = (
    pd.DataFrame(results)
    .sort_values(["model", "feature_set"])
    .reset_index(drop=True)
)[
    [
        "model",
        "feature_set",
        "mae_global_micro",
        "mae_store_macro",
        "RMSE",
        "sMAPE",
        "val_fit_predict_sec",
        "test_fit_predict_sec",
        "runtime_sec",
    ]
]
display(summary)

summary_path = OUTPUT_DIR / "summary_metrics.csv"
summary.to_csv(summary_path, index=False)
print("Saved:", summary_path)

FAST_MODE=False | RUN_VALIDATION=strategic | device=cpu
=== lstm_exog FS0 ===
[lstm_exog] device=cpu
[lstm_exog] torch.cuda.is_available=False
[LSTM] Reducing lookback from 52 to 43 due to limited per-store history (min len=44 after dropna).


[LSTM] Reducing lookback from 52 to 51 due to limited per-store history (min len=52 after dropna).
=== lstm_exog FS1 ===
[lstm_exog] device=cpu
[lstm_exog] torch.cuda.is_available=False
[LSTM] Reducing lookback from 52 to 43 due to limited per-store history (min len=44 after dropna).
[LSTM] Reducing lookback from 52 to 51 due to limited per-store history (min len=52 after dropna).
=== lstm_exog FS2 ===
[lstm_exog] device=cpu
[lstm_exog] torch.cuda.is_available=False
[LSTM] Reducing lookback from 52 to 43 due to limited per-store history (min len=44 after dropna).
[LSTM] Reducing lookback from 52 to 51 due to limited per-store history (min len=52 after dropna).
=== transformer_exog FS0 ===
[transformer_exog] device=cpu
[transformer_exog] torch.cuda.is_available=False
[Transformer] Reducing lookback from 52 to 43 due to limited per-store history (min len=44 after dropna).
[Transformer] Reducing lookback from 52 to 51 due to limited per-store history (min len=52 after dropna).
=== transfo

Unnamed: 0,model,feature_set,mae_global_micro,mae_store_macro,RMSE,sMAPE,val_fit_predict_sec,test_fit_predict_sec,runtime_sec
0,lstm_exog,FS0,498803.916637,498803.916637,615527.68482,60.957842,4.47747,7.257845,11.888426
1,lstm_exog,FS1,715894.886941,715894.886941,819447.415702,109.518243,4.266515,7.655337,12.078657
2,lstm_exog,FS2,484668.400916,484668.400916,596656.506938,54.880722,3.776755,7.740952,11.646127
3,transformer_exog,FS0,330957.363771,330957.363771,392481.497741,36.759372,6.555587,17.018241,23.731664
4,transformer_exog,FS1,490950.961268,490950.961268,577818.38943,57.917571,6.490634,14.738915,21.345433
5,transformer_exog,FS2,433880.420127,433880.420127,501104.942122,51.617952,6.94434,16.509493,23.569092


Saved: C:\Users\usuario\Documents\Master AI\TFM\MEMORIA 2.0\outputs\E0_ablation\summary_metrics.csv


In [87]:
import numpy as np

sample_pred_path = OUTPUT_DIR / "transformer_exog__FS2" / "predictions" / "test_predictions.csv"
if sample_pred_path.exists():
    mask_df = df_feat_full.loc[eval_mask_common, ["Store", "Date"]]
    sample_pred = pd.read_csv(sample_pred_path)
    sample_pred["Date"] = pd.to_datetime(sample_pred["Date"])
    sample_pred_common = sample_pred.merge(mask_df, on=["Store", "Date"], how="inner")

    mglob_chk, mstore_chk, _ = evaluate_predictions(
        sample_pred_common[["Store", "Date", "y_true", "y_pred"]],
        group_keys=["Store"],
        model_name="transformer_exog",
        feature_set="FS2",
        group_label="TEST_SANITY",
    )

    mae_micro_chk = float(mglob_chk.iloc[0]["MAE"])
    mae_macro_chk = float(mstore_chk["MAE"].mean()) if not mstore_chk.empty else float("nan")
    row = summary[(summary["model"] == "transformer_exog") & (summary["feature_set"] == "FS2")].iloc[0]

    assert np.isclose(mae_micro_chk, row["mae_global_micro"], atol=1e-6)
    assert np.isclose(mae_macro_chk, row["mae_store_macro"], atol=1e-6)

    print("Sanity check passed. n_points:", len(sample_pred_common))
    print("n_stores:", sample_pred_common["Store"].nunique())
    print(
        "date_min/date_max:",
        sample_pred_common["Date"].min().date(),
        "→",
        sample_pred_common["Date"].max().date(),
    )
    print(
        "y_true_mean/y_pred_mean:",
        sample_pred_common["y_true"].mean(),
        "/",
        sample_pred_common["y_pred"].mean(),
    )
else:
    print("Sanity check skipped (missing predictions for transformer_exog__FS2).")

Sanity check passed. n_points: 1755
n_stores: 45
date_min/date_max: 2012-02-03 → 2012-10-26
y_true_mean/y_pred_mean: 1043440.6767464387 / 613494.9440552092


Siguiente: ejecutar **09_results_summary_and_plots.ipynb** para consolidación, deltas y visualizaciones.