# 03 — GluonTS DeepAR con exógenas (global multi-serie)

**Objetivo:** forecasting de `Weekly_Sales` semanal por `Store` usando DeepAR con covariables exógenas.

## Supuesto experimental (oracle exog)
Se asume disponibilidad de todas las covariables exógenas durante el horizonte de predicción (escenario oracle).

## Outputs estándar
- `outputs/predictions/deepar_exog_predictions.csv` con: `Store, Date, y_true, y_pred, model`
- `outputs/metrics/deepar_exog_metrics_global.csv`
- `outputs/metrics/deepar_exog_metrics_by_store.csv`
- `outputs/figures/deepar_exog_plot_*.png`

In [1]:
# 0) Imports y configuración
from __future__ import annotations

import json
import sys
from pathlib import Path

import numpy as np
import pandas as pd

PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.common import (
    compute_metrics,
    load_data,
    make_features,
    save_outputs,
    temporal_split,
)

MODEL_NAME = 'deepar_exog'
SEED = 42
np.random.seed(SEED)

DATA_PATH = PROJECT_ROOT / 'data' / 'Walmart_Sales.csv'
METADATA_PATH = PROJECT_ROOT / 'outputs' / 'metadata.json'
OUTPUTS_DIR = PROJECT_ROOT / 'outputs'

## 1) Cargar metadata (split + features)
Esto garantiza consistencia entre modelos.

In [2]:
metadata = json.loads(METADATA_PATH.read_text(encoding='utf-8'))
split = metadata['split']
feature_cols = metadata['features']
print('Split:', split)
print('N features:', len(feature_cols))

Split: {'train_start': '2010-02-05', 'train_end': '2012-07-06', 'val_start': '2012-07-13', 'val_end': '2012-08-31', 'test_start': '2012-09-07', 'test_end': '2012-10-26'}
N features: 16


## 2) Carga de datos + features
- Parseo/orden
- Construcción de lags/rolling (sin leakage)
- Exógenas alineadas por fecha

In [3]:
df = load_data(DATA_PATH)
df_feat, _ = make_features(df, add_calendar=True)

# Importante: para entrenar, debes decidir cómo tratar NaNs creados por lags/rolling
# Opción típica: descartar filas con NaNs en features (por store al inicio)
model_df = df_feat.dropna(subset=feature_cols + ['Weekly_Sales']).copy()
model_df.shape

(4095, 22)

## 3) Split temporal
Reutiliza exactamente el split definido en el notebook 00.

In [4]:
train_df, val_df, test_df, split_cfg = temporal_split(df)

# Aplicar el split sobre model_df (ya sin NaNs por lags)
train = model_df[model_df['Date'].between(split_cfg.train_start, split_cfg.train_end)].copy()
val = model_df[model_df['Date'].between(split_cfg.val_start, split_cfg.val_end)].copy()
test = model_df[model_df['Date'].between(split_cfg.test_start, split_cfg.test_end)].copy()

print(len(train), len(val), len(test))

3375 360 360


## 4) Entrenamiento del modelo
Implementación DeepAR global multi-serie con covariables exógenas.

In [5]:
# TODO: implementar entrenamiento DeepAR con exógenas
# Debe producir predicciones para TEST (ideal: también para VAL).
y_pred_test = np.full(shape=len(test), fill_value=test['Weekly_Sales'].mean())

## 5) Métricas (MAE, RMSE, sMAPE)
Se reporta:
- Global
- Por store

In [None]:
from warnings import filterwarnings

filterwarnings("ignore")



try:

    from gluonts.dataset.common import ListDataset

    from gluonts.dataset.field_names import FieldName

    from gluonts.torch.model.deepar import DeepAREstimator

except Exception as exc:

    raise ImportError("GluonTS no está instalado. Instala con: pip install gluonts") from exc



try:

    from pytorch_lightning.callbacks import EarlyStopping

except Exception as exc:

    raise ImportError("pytorch_lightning es necesario para early stopping en GluonTS") from exc



np.random.seed(SEED)



# Anti-leakage

assert split_cfg.train_end < split_cfg.val_start < split_cfg.val_end < split_cfg.test_start <= split_cfg.test_end



# DeepAR usa solo exógenas (sin lags/rolling)

deepar_exog_cols = [c for c in feature_cols if not c.startswith("lag_") and not c.startswith("roll_")]



# Dataset completo con exógenas (oracle exog permitida)

df_full, _ = make_features(df, add_calendar=True)



# Fechas del split

train_end = split_cfg.train_end

val_end = split_cfg.val_end

test_start = split_cfg.test_start

test_end = split_cfg.test_end



# Frecuencia semanal

dates_all = df_full.sort_values("Date")["Date"].drop_duplicates().values

freq = pd.infer_freq(pd.to_datetime(dates_all)) or "W-FRI"



# Horizonte de predicción

test_dates = pd.date_range(start=test_start, end=test_end, freq=freq)

prediction_length = len(test_dates)



def build_series(store_df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray, pd.Timestamp]:

    store_df = store_df.sort_values("Date")

    y = store_df["Weekly_Sales"].values.astype(float)

    exog = store_df[deepar_exog_cols].values.astype(float).T

    start = pd.Timestamp(store_df["Date"].iloc[0])

    return y, exog, start



# Grilla controlada (<=8 configs)

deepar_search = [

    {"hidden_size": 40, "num_layers": 2, "dropout_rate": 0.1, "lr": 1e-3, "batch_size": 32},

    {"hidden_size": 40, "num_layers": 3, "dropout_rate": 0.2, "lr": 5e-4, "batch_size": 64},

    {"hidden_size": 80, "num_layers": 2, "dropout_rate": 0.1, "lr": 5e-4, "batch_size": 32},

    {"hidden_size": 80, "num_layers": 3, "dropout_rate": 0.2, "lr": 1e-3, "batch_size": 64},

]



results = []



for cfg in deepar_search:

    print(f"\n=== DeepAR config {cfg} ===")



    # Dataset de entrenamiento: target solo hasta train_end

    train_records = []

    for store, g in df_full[df_full["Date"] <= train_end].groupby("Store"):

        y, exog, start = build_series(g)

        train_records.append({

            FieldName.TARGET: y,

            FieldName.START: start,

            FieldName.FEAT_DYNAMIC_REAL: exog,

            FieldName.ITEM_ID: str(int(store)),

        })

    train_ds = ListDataset(train_records, freq=freq)



    # Dataset de validación para early stopping: target hasta val_end

    val_records = []

    for store, g in df_full[df_full["Date"] <= val_end].groupby("Store"):

        y, exog, start = build_series(g)

        val_records.append({

            FieldName.TARGET: y,

            FieldName.START: start,

            FieldName.FEAT_DYNAMIC_REAL: exog,

            FieldName.ITEM_ID: str(int(store)),

        })

    val_ds = ListDataset(val_records, freq=freq)



    # Dataset para predicción: target hasta train_end + exógenas completas hasta test_end

    pred_records = []

    for store, g in df_full[df_full["Date"] <= test_end].groupby("Store"):

        g_train = g[g["Date"] <= train_end]

        if g_train.empty:

            continue

        y, _, start = build_series(g_train)

        g_full = g.sort_values("Date")

        max_len = len(g_train) + prediction_length

        exog_full = g_full[deepar_exog_cols].iloc[:max_len].values.astype(float).T

        pred_records.append({

            FieldName.TARGET: y,

            FieldName.START: start,

            FieldName.FEAT_DYNAMIC_REAL: exog_full,

            FieldName.ITEM_ID: str(int(store)),

        })

    pred_ds = ListDataset(pred_records, freq=freq)



    estimator = DeepAREstimator(

        prediction_length=prediction_length,

        context_length=52,

        freq=freq,

        num_feat_dynamic_real=len(deepar_exog_cols),

        batch_size=cfg["batch_size"],

        num_batches_per_epoch=50,

        lr=cfg["lr"],

        num_layers=cfg["num_layers"],

        hidden_size=cfg["hidden_size"],

        dropout_rate=cfg["dropout_rate"],

        scaling=True,

        num_parallel_samples=100,

        trainer_kwargs={

            "max_epochs": 200,

            "callbacks": [EarlyStopping(monitor="val_loss", patience=15, min_delta=1e-4, mode="min")],

        },

    )



    predictor = estimator.train(training_data=train_ds, validation_data=val_ds)



    store_preds = {}

    for forecast, item in zip(predictor.predict(pred_ds), pred_records):

        store_id = int(item[FieldName.ITEM_ID])

        store_preds[store_id] = forecast.mean



    preds = []

    for store, g in test.groupby("Store"):

        g = g.sort_values("Date")

        yhat = store_preds.get(int(store))

        if yhat is None:

            yhat = np.full(len(g), train["Weekly_Sales"].mean())

        else:

            yhat = np.asarray(yhat)[: len(g)]

            if len(yhat) < len(g):

                yhat = np.pad(yhat, (0, len(g) - len(yhat)), constant_values=yhat[-1])

        preds.append(

            pd.DataFrame(

                {

                    "Store": g["Store"].values,

                    "Date": g["Date"].values,

                    "y_pred": yhat,

                }

            )

        )



    if not preds:

        continue



    pred_df = pd.concat(preds, ignore_index=True)

    pred_df = pred_df.merge(

        test[["Store", "Date", "Weekly_Sales"]], on=["Store", "Date"], how="left"

    ).rename(columns={"Weekly_Sales": "y_true"})



    name = (

        f"deepar_exog__hs{cfg['hidden_size']}__nl{cfg['num_layers']}"

        f"__do{cfg['dropout_rate']}__lr{cfg['lr']}__bs{cfg['batch_size']}"

    )

    pred_df["model"] = name

    metrics = compute_metrics(pred_df["y_true"].values, pred_df["y_pred"].values)

    results.append({"cfg": cfg, "metrics": metrics, "name": name})



    save_outputs(

        model_name=name,

        predictions=pred_df,

        metrics_global=pd.DataFrame([{**{"model": name}, **metrics}]),

        metrics_by_store=pred_df.groupby("Store").apply(

            lambda g: pd.Series(compute_metrics(g["y_true"].values, g["y_pred"].values))

        ).reset_index().assign(model=name),

        output_dir=OUTPUTS_DIR,

    )



results_df = pd.DataFrame([

    {"model": r["name"], "MAE": r["metrics"]["MAE"], "RMSE": r["metrics"]["RMSE"], "sMAPE": r["metrics"]["sMAPE"]}

    for r in results

])

results_df = results_df.sort_values(["sMAPE", "MAE"]).reset_index(drop=True)

best_model_name = results_df.loc[0, "model"]

results_df.head()


=== DeepAR config {'hidden_size': 40, 'num_layers': 2, 'dropout_rate': 0.1, 'lr': 0.001, 'batch_size': 32} ===


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type        | Params | In sizes | Out sizes  
---------------------------------------------------------------
0 | model | DeepARModel | 25.1 K | ?        | [1, 100, 8]
---------------------------------------------------------------
25.1 K    Trainable params
0         Non-trainable params
25.1 K    Total params
0.100     Total estimated model params size (MB)


Epoch 0: : 52it [00:03, 16.16it/s, loss=10.8, v_num=43, val_loss=13.00, train_loss=11.30]

Epoch 0, global step 50: 'val_loss' reached 13.01339 (best 13.01339), saving model to 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\notebooks\\lightning_logs\\version_43\\checkpoints\\epoch=0-step=50.ckpt' as top 1


Epoch 1: : 52it [00:03, 15.90it/s, loss=10.8, v_num=43, val_loss=12.90, train_loss=10.70]

Epoch 1, global step 100: 'val_loss' reached 12.89910 (best 12.89910), saving model to 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\notebooks\\lightning_logs\\version_43\\checkpoints\\epoch=1-step=100.ckpt' as top 1


Epoch 2: : 52it [00:03, 16.86it/s, loss=10.6, v_num=43, val_loss=12.90, train_loss=10.60]

Epoch 2, global step 150: 'val_loss' reached 12.88313 (best 12.88313), saving model to 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\notebooks\\lightning_logs\\version_43\\checkpoints\\epoch=2-step=150.ckpt' as top 1


Epoch 3: : 52it [00:03, 16.60it/s, loss=10.5, v_num=43, val_loss=12.90, train_loss=10.50]

Epoch 3, global step 200: 'val_loss' reached 12.88148 (best 12.88148), saving model to 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\notebooks\\lightning_logs\\version_43\\checkpoints\\epoch=3-step=200.ckpt' as top 1


Epoch 4: : 52it [00:03, 16.67it/s, loss=10.5, v_num=43, val_loss=12.90, train_loss=10.40]

Epoch 4, global step 250: 'val_loss' reached 12.86806 (best 12.86806), saving model to 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\notebooks\\lightning_logs\\version_43\\checkpoints\\epoch=4-step=250.ckpt' as top 1


Epoch 5: : 52it [00:03, 15.82it/s, loss=10.3, v_num=43, val_loss=12.90, train_loss=10.40]

Epoch 5, global step 300: 'val_loss' reached 12.86639 (best 12.86639), saving model to 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\notebooks\\lightning_logs\\version_43\\checkpoints\\epoch=5-step=300.ckpt' as top 1


Epoch 6: : 52it [00:03, 14.25it/s, loss=10.6, v_num=43, val_loss=12.90, train_loss=10.40]

Epoch 6, global step 350: 'val_loss' reached 12.86351 (best 12.86351), saving model to 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\notebooks\\lightning_logs\\version_43\\checkpoints\\epoch=6-step=350.ckpt' as top 1


Epoch 7: : 25it [00:03,  8.14it/s, loss=10.6, v_num=43, val_loss=12.90, train_loss=10.40]

## 5) Métricas (MAE, RMSE, sMAPE)
Se reporta:
- Global
- Por store

In [None]:
# Resumen rápido: mejor config y (si existe) baseline de 20 epochs

best_row = results_df.iloc[0]

print("Mejor config (ordenada por sMAPE):")

print(best_row)



import pathlib

baseline_path = pathlib.Path(OUTPUTS_DIR) / "metrics" / "deepar_exog_metrics_global.csv"

if baseline_path.exists():

    baseline_df = pd.read_csv(baseline_path)

    print("\nBaseline (EPOCHS=20) encontrado:")

    print(baseline_df)

    print("Comparación sMAPE delta:", float(best_row["sMAPE"]) - float(baseline_df.loc[0, "sMAPE"]))

else:

    print("\nBaseline (EPOCHS=20) no encontrado; ejecuta el baseline para comparar.")

(         model           MAE         RMSE     sMAPE
 0  deepar_exog  59573.684606  86287.61717  6.041327,
          model  Store           MAE          RMSE     sMAPE
 0  deepar_exog      1  63187.098125  72535.226686  4.063790
 1  deepar_exog      2  57557.482500  71639.894989  3.080493
 2  deepar_exog      3  10764.173750  15598.596845  2.592597
 3  deepar_exog      4  39508.113750  50746.641501  1.861186
 4  deepar_exog      5  14828.231719  17973.278986  4.584132)

## 6) Guardado de outputs estándar

In [10]:
paths = save_outputs(
    model_name=MODEL_NAME,
    predictions=pred_df,
    metrics_global=metrics_global_df,
    metrics_by_store=metrics_by_store_df,
    output_dir=OUTPUTS_DIR,
)
paths

{'predictions': 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\outputs\\predictions\\deepar_exog_predictions.csv',
 'metrics_global': 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\outputs\\metrics\\deepar_exog_metrics_global.csv',
 'metrics_by_store': 'c:\\Users\\usuario\\Documents\\Master AI\\TFM\\MEMORIA 2.0\\outputs\\metrics\\deepar_exog_metrics_by_store.csv'}

## 7) Figuras
- 3 tiendas: real vs predicción en test
- Distribución del error (`y_true - y_pred`)

Guardar PNGs en `outputs/figures/`.

In [11]:
import matplotlib.pyplot as plt
import seaborn as sns

FIG_DIR = OUTPUTS_DIR / "figures"
FIG_DIR.mkdir(parents=True, exist_ok=True)

# Selección de 3 tiendas (mayor media de ventas en test)
top_stores = (
    pred_df.groupby("Store")["y_true"]
    .mean()
    .sort_values(ascending=False)
    .head(3)
    .index
    .tolist()
)

for store in top_stores:
    g = pred_df[pred_df["Store"] == store].sort_values("Date")
    plt.figure(figsize=(10, 4))
    plt.plot(g["Date"], g["y_true"], label="y_true")
    plt.plot(g["Date"], g["y_pred"], label="y_pred")
    plt.title(f"Store {store} — DeepAR")
    plt.xlabel("Date")
    plt.ylabel("Weekly_Sales")
    plt.legend()
    plt.tight_layout()
    plt.savefig(FIG_DIR / f"{MODEL_NAME}_plot_store_{store}.png", dpi=150)
    plt.close()

# Distribución de error
errors = pred_df["y_true"] - pred_df["y_pred"]
plt.figure(figsize=(8, 4))
sns.histplot(errors, bins=30, kde=True)
plt.title("Error distribution (y_true - y_pred)")
plt.xlabel("Error")
plt.tight_layout()
plt.savefig(FIG_DIR / f"{MODEL_NAME}_plot_error_dist.png", dpi=150)
plt.close()