# 03 — GluonTS DeepAR con exógenas (global multi-serie)

**Objetivo:** forecasting de `Weekly_Sales` semanal por `Store` usando DeepAR con covariables exógenas.

## Supuesto experimental (oracle exog)
Se asume disponibilidad de todas las covariables exógenas durante el horizonte de predicción (escenario oracle).

## Outputs estándar
- `outputs/predictions/deepar_exog_predictions.csv` con: `Store, Date, y_true, y_pred, model`
- `outputs/metrics/deepar_exog_metrics_global.csv`
- `outputs/metrics/deepar_exog_metrics_by_store.csv`
- `outputs/figures/deepar_exog_plot_*.png`

In [1]:
# 0) Imports y configuración
from __future__ import annotations

import json
import sys
from pathlib import Path

import numpy as np
import pandas as pd

PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.common import (
    SplitConfig,
    TEST_WEEKS,
    compute_metrics,
    load_data,
    make_features,
    save_outputs,
    temporal_split,
 )

MODEL_NAME = 'deepar_exog'
SEED = 42
np.random.seed(SEED)

DATA_PATH = PROJECT_ROOT / 'data' / 'Walmart_Sales.csv'
METADATA_PATH = PROJECT_ROOT / 'outputs' / 'metadata.json'
OUTPUTS_DIR = PROJECT_ROOT / 'outputs'

## 1) Cargar metadata (split + features)
Esto garantiza consistencia entre modelos.

In [2]:
if METADATA_PATH.exists():
    metadata = json.loads(METADATA_PATH.read_text(encoding='utf-8'))
    split = metadata['split']
    feature_cols = metadata['features']
    split_cfg = SplitConfig(
        train_start=pd.Timestamp(split['train_start']),
        train_end=pd.Timestamp(split['train_end']),
        val_start=pd.Timestamp(split['val_start']),
        val_end=pd.Timestamp(split['val_end']),
        test_start=pd.Timestamp(split['test_start']),
        test_end=pd.Timestamp(split['test_end']),
    )
    print('Split (metadata):', split)
    print('N features:', len(feature_cols))
else:
    metadata = None
    split = None
    feature_cols = None
    split_cfg = None
    print('metadata.json not found; will compute split with TEST_WEEKS=', TEST_WEEKS)

Split (metadata): {'train_start': '2010-02-05', 'train_end': '2011-12-02', 'val_start': '2011-12-09', 'val_end': '2012-01-27', 'test_start': '2012-02-03', 'test_end': '2012-10-26'}
N features: 19


## 2) Carga de datos + features
- Parseo/orden
- Construcción de lags/rolling (sin leakage)
- Exógenas alineadas por fecha

In [3]:
df = load_data(DATA_PATH)
df_feat, feature_cols_auto = make_features(df, add_calendar=True)
if feature_cols is None:
    feature_cols = feature_cols_auto

# Importante: para entrenar, debes decidir cómo tratar NaNs creados por lags/rolling
# Opción típica: descartar filas con NaNs en features (por store al inicio)
model_df = df_feat.dropna(subset=feature_cols + ['Weekly_Sales']).copy()
model_df.shape

(4095, 22)

## 3) Split temporal
Reutiliza exactamente el split definido en el notebook 00.

In [4]:
if split_cfg is None:
    train_df, val_df, test_df, split_cfg = temporal_split(df, test_weeks=TEST_WEEKS)
else:
    train_df = df[df['Date'].between(split_cfg.train_start, split_cfg.train_end)].copy()
    val_df = df[df['Date'].between(split_cfg.val_start, split_cfg.val_end)].copy()
    test_df = df[df['Date'].between(split_cfg.test_start, split_cfg.test_end)].copy()

# Aplicar el split sobre model_df (ya sin NaNs por lags)
train = model_df[model_df['Date'].between(split_cfg.train_start, split_cfg.train_end)].copy()
val = model_df[model_df['Date'].between(split_cfg.val_start, split_cfg.val_end)].copy()
test = model_df[model_df['Date'].between(split_cfg.test_start, split_cfg.test_end)].copy()

print(len(train), len(val), len(test))

1980 360 1755


## 4) Entrenamiento del modelo
Implementación DeepAR global multi-serie con covariables exógenas.

In [5]:
# TODO: implementar entrenamiento DeepAR con exógenas
# Debe producir predicciones para TEST (ideal: también para VAL).
y_pred_test = np.full(shape=len(test), fill_value=test['Weekly_Sales'].mean())

## 5) Métricas (MAE, RMSE, sMAPE)
Se reporta:
- Global
- Por store

In [6]:
from warnings import filterwarnings

filterwarnings("ignore")



try:

    from gluonts.dataset.common import ListDataset

    from gluonts.dataset.field_names import FieldName

    from gluonts.torch.model.deepar import DeepAREstimator

except Exception as exc:

    raise ImportError("GluonTS no está instalado. Instala con: pip install gluonts") from exc



try:

    from lightning.pytorch.callbacks import EarlyStopping

except Exception as exc:

    raise ImportError("lightning es necesario para early stopping en GluonTS") from exc



np.random.seed(SEED)



# Anti-leakage

assert split_cfg.train_end < split_cfg.val_start < split_cfg.val_end < split_cfg.test_start <= split_cfg.test_end



# DeepAR usa solo exógenas (sin lags/rolling)

deepar_exog_cols = [c for c in feature_cols if not c.startswith("lag_") and not c.startswith("roll_")]



# Dataset completo con exógenas (oracle exog permitida)

df_full, _ = make_features(df, add_calendar=True)



# Fechas del split

train_end = split_cfg.train_end

val_end = split_cfg.val_end

test_start = split_cfg.test_start

test_end = split_cfg.test_end



# Frecuencia semanal

dates_all = df_full.sort_values("Date")["Date"].drop_duplicates().values

freq = pd.infer_freq(pd.to_datetime(dates_all)) or "W-FRI"



# Horizonte de predicción

test_dates = pd.date_range(start=test_start, end=test_end, freq=freq)

prediction_length = len(test_dates)



def build_series(store_df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray, pd.Timestamp]:

    store_df = store_df.sort_values("Date")

    y = store_df["Weekly_Sales"].values.astype(float)

    exog = store_df[deepar_exog_cols].values.astype(float).T

    start = pd.Timestamp(store_df["Date"].iloc[0])

    return y, exog, start



# Grilla controlada (<=8 configs)

deepar_search = [

    {"hidden_size": 40, "num_layers": 2, "dropout_rate": 0.1, "lr": 1e-3, "batch_size": 32},

    {"hidden_size": 40, "num_layers": 3, "dropout_rate": 0.2, "lr": 5e-4, "batch_size": 64},

    {"hidden_size": 80, "num_layers": 2, "dropout_rate": 0.1, "lr": 5e-4, "batch_size": 32},

    {"hidden_size": 80, "num_layers": 3, "dropout_rate": 0.2, "lr": 1e-3, "batch_size": 64},

]



results = []



for cfg in deepar_search:

    print(f"\n=== DeepAR config {cfg} ===")



    # Dataset de entrenamiento: target solo hasta train_end

    train_records = []

    for store, g in df_full[df_full["Date"] <= train_end].groupby("Store"):

        y, exog, start = build_series(g)

        train_records.append({

            FieldName.TARGET: y,

            FieldName.START: start,

            FieldName.FEAT_DYNAMIC_REAL: exog,

            FieldName.ITEM_ID: str(int(store)),

        })

    train_ds = ListDataset(train_records, freq=freq)



    # Dataset de validación para early stopping: target hasta val_end

    val_records = []

    for store, g in df_full[df_full["Date"] <= val_end].groupby("Store"):

        y, exog, start = build_series(g)

        val_records.append({

            FieldName.TARGET: y,

            FieldName.START: start,

            FieldName.FEAT_DYNAMIC_REAL: exog,

            FieldName.ITEM_ID: str(int(store)),

        })

    val_ds = ListDataset(val_records, freq=freq)



    # Dataset para predicción: target hasta train_end + exógenas completas hasta test_end

    pred_records = []

    for store, g in df_full[df_full["Date"] <= test_end].groupby("Store"):

        g_train = g[g["Date"] <= train_end]

        if g_train.empty:

            continue

        y, _, start = build_series(g_train)

        g_full = g.sort_values("Date")

        max_len = len(g_train) + prediction_length

        exog_full = g_full[deepar_exog_cols].iloc[:max_len].values.astype(float).T

        pred_records.append({

            FieldName.TARGET: y,

            FieldName.START: start,

            FieldName.FEAT_DYNAMIC_REAL: exog_full,

            FieldName.ITEM_ID: str(int(store)),

        })

    pred_ds = ListDataset(pred_records, freq=freq)



    estimator = DeepAREstimator(

        prediction_length=prediction_length,

        context_length=52,

        freq=freq,

        num_feat_dynamic_real=len(deepar_exog_cols),

        batch_size=cfg["batch_size"],

        num_batches_per_epoch=50,

        lr=cfg["lr"],

        num_layers=cfg["num_layers"],

        hidden_size=cfg["hidden_size"],

        dropout_rate=cfg["dropout_rate"],

        scaling=True,

        num_parallel_samples=100,

        trainer_kwargs={

            "max_epochs": 200,

            "callbacks": [EarlyStopping(monitor="val_loss", patience=15, min_delta=1e-4, mode="min")],

        },

    )



    predictor = estimator.train(training_data=train_ds, validation_data=val_ds)



    store_preds = {}

    for forecast, item in zip(predictor.predict(pred_ds), pred_records):

        store_id = int(item[FieldName.ITEM_ID])

        store_preds[store_id] = forecast.mean



    preds = []

    for store, g in test.groupby("Store"):

        g = g.sort_values("Date")

        yhat = store_preds.get(int(store))

        if yhat is None:

            yhat = np.full(len(g), train["Weekly_Sales"].mean())

        else:

            yhat = np.asarray(yhat)[: len(g)]

            if len(yhat) < len(g):

                yhat = np.pad(yhat, (0, len(g) - len(yhat)), constant_values=yhat[-1])

        preds.append(

            pd.DataFrame(

                {

                    "Store": g["Store"].values,

                    "Date": g["Date"].values,

                    "y_pred": yhat,

                }

            )

        )



    if not preds:

        continue



    pred_df = pd.concat(preds, ignore_index=True)

    pred_df = pred_df.merge(

        test[["Store", "Date", "Weekly_Sales"]], on=["Store", "Date"], how="left"

    ).rename(columns={"Weekly_Sales": "y_true"})



    name = (

        f"deepar_exog__hs{cfg['hidden_size']}__nl{cfg['num_layers']}"

        f"__do{cfg['dropout_rate']}__lr{cfg['lr']}__bs{cfg['batch_size']}"

    )

    pred_df["model"] = name

    metrics = compute_metrics(pred_df["y_true"].values, pred_df["y_pred"].values)

    results.append({"cfg": cfg, "metrics": metrics, "name": name})



    save_outputs(

        model_name=name,

        predictions=pred_df,

        metrics_global=pd.DataFrame([{**{"model": name}, **metrics}]),

        metrics_by_store=pred_df.groupby("Store").apply(

            lambda g: pd.Series(compute_metrics(g["y_true"].values, g["y_pred"].values))

        ).reset_index().assign(model=name),

        output_dir=OUTPUTS_DIR,

    )



results_df = pd.DataFrame([

    {"model": r["name"], "MAE": r["metrics"]["MAE"], "RMSE": r["metrics"]["RMSE"], "sMAPE": r["metrics"]["sMAPE"]}

    for r in results

])

results_df = results_df.sort_values(["sMAPE", "MAE"]).reset_index(drop=True)

best_model_name = results_df.loc[0, "model"]

results_df.head()


=== DeepAR config {'hidden_size': 40, 'num_layers': 2, 'dropout_rate': 0.1, 'lr': 0.001, 'batch_size': 32} ===


INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name  | Type        | Params | Mode  | In sizes                                                        | Out sizes   
-------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 25.1 K | train | [[1, 1], [1, 1], [1, 208, 11], [1, 208], [1, 208], [1, 39, 11]] | [1, 100, 39]
-------------------------------------------------------------------------------------------------------------------------------
25.1 K    Trainable params
0         Non-trainable params
25.1 K    Total params
0.100     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'val_loss' reached 13.52132 (best 13.52132), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=0-step=50.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 1, global step 100: 'val_loss' reached 13.12546 (best 13.12546), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=1-step=100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 2, global step 150: 'val_loss' reached 13.10425 (best 13.10425), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=2-step=150.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 3, global step 200: 'val_loss' reached 13.09079 (best 13.09079), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=3-step=200.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 4, global step 250: 'val_loss' reached 13.08328 (best 13.08328), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=4-step=250.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 5, global step 300: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 6, global step 350: 'val_loss' reached 13.07224 (best 13.07224), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=6-step=350.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 7, global step 400: 'val_loss' reached 13.06078 (best 13.06078), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=7-step=400.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 8, global step 450: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 9, global step 500: 'val_loss' reached 13.05267 (best 13.05267), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=9-step=500.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 10, global step 550: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 11, global step 600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 12, global step 650: 'val_loss' reached 13.04401 (best 13.04401), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=12-step=650.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 13, global step 700: 'val_loss' reached 13.03772 (best 13.03772), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=13-step=700.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 14, global step 750: 'val_loss' reached 13.03716 (best 13.03716), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=14-step=750.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 15, global step 800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 16, global step 850: 'val_loss' reached 13.03219 (best 13.03219), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=16-step=850.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 17, global step 900: 'val_loss' reached 13.03114 (best 13.03114), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=17-step=900.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 18, global step 950: 'val_loss' reached 13.02335 (best 13.02335), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=18-step=950.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 19, global step 1000: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 20, global step 1050: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 21, global step 1100: 'val_loss' reached 13.02013 (best 13.02013), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=21-step=1100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 22, global step 1150: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 23, global step 1200: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 24, global step 1250: 'val_loss' reached 13.01546 (best 13.01546), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=24-step=1250.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 25, global step 1300: 'val_loss' reached 13.01534 (best 13.01534), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=25-step=1300.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 26, global step 1350: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 27, global step 1400: 'val_loss' reached 13.01387 (best 13.01387), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=27-step=1400.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 28, global step 1450: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 29, global step 1500: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 30, global step 1550: 'val_loss' reached 13.01006 (best 13.01006), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=30-step=1550.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 31, global step 1600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 32, global step 1650: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 33, global step 1700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 34, global step 1750: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 35, global step 1800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 36, global step 1850: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 37, global step 1900: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 38, global step 1950: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 39, global step 2000: 'val_loss' reached 13.00314 (best 13.00314), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=39-step=2000.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 40, global step 2050: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 41, global step 2100: 'val_loss' reached 12.99696 (best 12.99696), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_73/checkpoints/epoch=41-step=2100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 42, global step 2150: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 43, global step 2200: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 44, global step 2250: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 45, global step 2300: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 46, global step 2350: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 47, global step 2400: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 48, global step 2450: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 49, global step 2500: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 50, global step 2550: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 51, global step 2600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 52, global step 2650: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 53, global step 2700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 54, global step 2750: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 55, global step 2800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 56, global step 2850: 'val_loss' was not in top 1



=== DeepAR config {'hidden_size': 40, 'num_layers': 3, 'dropout_rate': 0.2, 'lr': 0.0005, 'batch_size': 64} ===


INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name  | Type        | Params | Mode  | In sizes                                                        | Out sizes   
-------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 38.2 K | train | [[1, 1], [1, 1], [1, 208, 11], [1, 208], [1, 208], [1, 39, 11]] | [1, 100, 39]
-------------------------------------------------------------------------------------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'val_loss' reached 13.86644 (best 13.86644), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=0-step=50.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 1, global step 100: 'val_loss' reached 13.11876 (best 13.11876), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=1-step=100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 2, global step 150: 'val_loss' reached 13.09574 (best 13.09574), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=2-step=150.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 3, global step 200: 'val_loss' reached 13.08784 (best 13.08784), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=3-step=200.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 4, global step 250: 'val_loss' reached 13.08322 (best 13.08322), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=4-step=250.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 5, global step 300: 'val_loss' reached 13.07195 (best 13.07195), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=5-step=300.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 6, global step 350: 'val_loss' reached 13.06566 (best 13.06566), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=6-step=350.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 7, global step 400: 'val_loss' reached 13.06018 (best 13.06018), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=7-step=400.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 8, global step 450: 'val_loss' reached 13.05514 (best 13.05514), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=8-step=450.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 9, global step 500: 'val_loss' reached 13.05154 (best 13.05154), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=9-step=500.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 10, global step 550: 'val_loss' reached 13.04451 (best 13.04451), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=10-step=550.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 11, global step 600: 'val_loss' reached 13.03956 (best 13.03956), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=11-step=600.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 12, global step 650: 'val_loss' reached 13.03434 (best 13.03434), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=12-step=650.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 13, global step 700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 14, global step 750: 'val_loss' reached 13.02491 (best 13.02491), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=14-step=750.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 15, global step 800: 'val_loss' reached 13.00837 (best 13.00837), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=15-step=800.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 16, global step 850: 'val_loss' reached 12.98827 (best 12.98827), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=16-step=850.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 17, global step 900: 'val_loss' reached 12.97276 (best 12.97276), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=17-step=900.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 18, global step 950: 'val_loss' reached 12.93674 (best 12.93674), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=18-step=950.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 19, global step 1000: 'val_loss' reached 12.91331 (best 12.91331), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=19-step=1000.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 20, global step 1050: 'val_loss' reached 12.89124 (best 12.89124), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=20-step=1050.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 21, global step 1100: 'val_loss' reached 12.87878 (best 12.87878), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=21-step=1100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 22, global step 1150: 'val_loss' reached 12.86209 (best 12.86209), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=22-step=1150.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 23, global step 1200: 'val_loss' reached 12.85251 (best 12.85251), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=23-step=1200.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 24, global step 1250: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 25, global step 1300: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 26, global step 1350: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 27, global step 1400: 'val_loss' reached 12.84096 (best 12.84096), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=27-step=1400.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 28, global step 1450: 'val_loss' reached 12.82869 (best 12.82869), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=28-step=1450.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 29, global step 1500: 'val_loss' reached 12.82130 (best 12.82130), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=29-step=1500.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 30, global step 1550: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 31, global step 1600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 32, global step 1650: 'val_loss' reached 12.81771 (best 12.81771), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=32-step=1650.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 33, global step 1700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 34, global step 1750: 'val_loss' reached 12.81467 (best 12.81467), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=34-step=1750.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 35, global step 1800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 36, global step 1850: 'val_loss' reached 12.81114 (best 12.81114), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=36-step=1850.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 37, global step 1900: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 38, global step 1950: 'val_loss' reached 12.81074 (best 12.81074), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=38-step=1950.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 39, global step 2000: 'val_loss' reached 12.81011 (best 12.81011), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=39-step=2000.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 40, global step 2050: 'val_loss' reached 12.80662 (best 12.80662), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=40-step=2050.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 41, global step 2100: 'val_loss' reached 12.80625 (best 12.80625), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=41-step=2100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 42, global step 2150: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 43, global step 2200: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 44, global step 2250: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 45, global step 2300: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 46, global step 2350: 'val_loss' reached 12.80066 (best 12.80066), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=46-step=2350.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 47, global step 2400: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 48, global step 2450: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 49, global step 2500: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 50, global step 2550: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 51, global step 2600: 'val_loss' reached 12.79801 (best 12.79801), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=51-step=2600.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 52, global step 2650: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 53, global step 2700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 54, global step 2750: 'val_loss' reached 12.79393 (best 12.79393), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=54-step=2750.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 55, global step 2800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 56, global step 2850: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 57, global step 2900: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 58, global step 2950: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 59, global step 3000: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 60, global step 3050: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 61, global step 3100: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 62, global step 3150: 'val_loss' reached 12.79304 (best 12.79304), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_74/checkpoints/epoch=62-step=3150.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 63, global step 3200: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 64, global step 3250: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 65, global step 3300: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 66, global step 3350: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 67, global step 3400: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 68, global step 3450: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 69, global step 3500: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 70, global step 3550: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 71, global step 3600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 72, global step 3650: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 73, global step 3700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 74, global step 3750: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 75, global step 3800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 76, global step 3850: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 77, global step 3900: 'val_loss' was not in top 1



=== DeepAR config {'hidden_size': 80, 'num_layers': 2, 'dropout_rate': 0.1, 'lr': 0.0005, 'batch_size': 32} ===


INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name  | Type        | Params | Mode  | In sizes                                                        | Out sizes   
-------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 88.6 K | train | [[1, 1], [1, 1], [1, 208, 11], [1, 208], [1, 208], [1, 39, 11]] | [1, 100, 39]
-------------------------------------------------------------------------------------------------------------------------------
88.6 K    Trainable params
0         Non-trainable params
88.6 K    Total params
0.354     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'val_loss' reached 13.13478 (best 13.13478), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=0-step=50.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 1, global step 100: 'val_loss' reached 13.07429 (best 13.07429), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=1-step=100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 2, global step 150: 'val_loss' reached 13.06288 (best 13.06288), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=2-step=150.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 3, global step 200: 'val_loss' reached 13.05758 (best 13.05758), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=3-step=200.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 4, global step 250: 'val_loss' reached 13.04633 (best 13.04633), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=4-step=250.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 5, global step 300: 'val_loss' reached 13.04088 (best 13.04088), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=5-step=300.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 6, global step 350: 'val_loss' reached 13.02911 (best 13.02911), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=6-step=350.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 7, global step 400: 'val_loss' reached 13.02432 (best 13.02432), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=7-step=400.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 8, global step 450: 'val_loss' reached 13.02072 (best 13.02072), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=8-step=450.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 9, global step 500: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 10, global step 550: 'val_loss' reached 13.01861 (best 13.01861), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=10-step=550.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 11, global step 600: 'val_loss' reached 13.01023 (best 13.01023), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=11-step=600.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 12, global step 650: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 13, global step 700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 14, global step 750: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 15, global step 800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 16, global step 850: 'val_loss' reached 12.99520 (best 12.99520), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=16-step=850.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 17, global step 900: 'val_loss' reached 12.98711 (best 12.98711), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=17-step=900.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 18, global step 950: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 19, global step 1000: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 20, global step 1050: 'val_loss' reached 12.97198 (best 12.97198), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=20-step=1050.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 21, global step 1100: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 22, global step 1150: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 23, global step 1200: 'val_loss' reached 12.96034 (best 12.96034), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=23-step=1200.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 24, global step 1250: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 25, global step 1300: 'val_loss' reached 12.93875 (best 12.93875), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_75/checkpoints/epoch=25-step=1300.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 26, global step 1350: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 27, global step 1400: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 28, global step 1450: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 29, global step 1500: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 30, global step 1550: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 31, global step 1600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 32, global step 1650: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 33, global step 1700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 34, global step 1750: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 35, global step 1800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 36, global step 1850: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 37, global step 1900: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 38, global step 1950: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 39, global step 2000: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 40, global step 2050: 'val_loss' was not in top 1



=== DeepAR config {'hidden_size': 80, 'num_layers': 3, 'dropout_rate': 0.2, 'lr': 0.001, 'batch_size': 64} ===


INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name  | Type        | Params | Mode  | In sizes                                                        | Out sizes   
-------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 140 K  | train | [[1, 1], [1, 1], [1, 208, 11], [1, 208], [1, 208], [1, 39, 11]] | [1, 100, 39]
-------------------------------------------------------------------------------------------------------------------------------
140 K     Trainable params
0         Non-trainable params
140 K     Total params
0.562     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'val_loss' reached 13.11508 (best 13.11508), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=0-step=50.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 1, global step 100: 'val_loss' reached 13.08791 (best 13.08791), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=1-step=100.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 2, global step 150: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 3, global step 200: 'val_loss' reached 13.06167 (best 13.06167), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=3-step=200.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 4, global step 250: 'val_loss' reached 13.04484 (best 13.04484), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=4-step=250.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 5, global step 300: 'val_loss' reached 13.03481 (best 13.03481), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=5-step=300.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 6, global step 350: 'val_loss' reached 13.01991 (best 13.01991), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=6-step=350.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 7, global step 400: 'val_loss' reached 13.00343 (best 13.00343), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=7-step=400.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 8, global step 450: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 9, global step 500: 'val_loss' reached 12.98909 (best 12.98909), saving model to '/home/sagemaker-user/TFMAXEL/notebooks/lightning_logs/version_76/checkpoints/epoch=9-step=500.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 10, global step 550: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 11, global step 600: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 12, global step 650: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 13, global step 700: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 14, global step 750: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 15, global step 800: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 16, global step 850: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 17, global step 900: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 18, global step 950: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 19, global step 1000: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 20, global step 1050: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 21, global step 1100: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 22, global step 1150: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 23, global step 1200: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 24, global step 1250: 'val_loss' was not in top 1


Unnamed: 0,model,MAE,RMSE,sMAPE
0,deepar_exog__hs40__nl2__do0.1__lr0.001__bs32,67160.080318,96982.031497,6.89216
1,deepar_exog__hs80__nl3__do0.2__lr0.001__bs64,67506.510994,97688.276763,6.983673
2,deepar_exog__hs80__nl2__do0.1__lr0.0005__bs32,72776.266658,106678.840978,7.461332
3,deepar_exog__hs40__nl3__do0.2__lr0.0005__bs64,81907.719651,120732.97771,8.232743


## 5) Métricas (MAE, RMSE, sMAPE)
Se reporta:
- Global
- Por store

In [7]:
# Resumen rápido: mejor config y (si existe) baseline de 20 epochs

best_row = results_df.iloc[0]

print("Mejor config (ordenada por sMAPE):")

print(best_row)



import pathlib

baseline_path = pathlib.Path(OUTPUTS_DIR) / "metrics" / "deepar_exog_metrics_global.csv"

if baseline_path.exists():

    baseline_df = pd.read_csv(baseline_path)

    print("\nBaseline (EPOCHS=20) encontrado:")

    print(baseline_df)

    print("Comparación sMAPE delta:", float(best_row["sMAPE"]) - float(baseline_df.loc[0, "sMAPE"]))

else:

    print("\nBaseline (EPOCHS=20) no encontrado; ejecuta el baseline para comparar.")

Mejor config (ordenada por sMAPE):
model    deepar_exog__hs40__nl2__do0.1__lr0.001__bs32
MAE                                      67160.080318
RMSE                                     96982.031497
sMAPE                                         6.89216
Name: 0, dtype: object

Baseline (EPOCHS=20) encontrado:
         model           MAE         RMSE     sMAPE
0  deepar_exog  59573.684606  86287.61717  6.041327
Comparación sMAPE delta: 0.8508328327916894


## 6) Guardado de outputs estándar

In [8]:
# Guardar outputs para la última corrida (usa el último `pred_df` disponible)
try:
    model_name_to_save = name
except NameError:
    model_name_to_save = MODEL_NAME

# Construir métricas globales y por tienda a partir de `pred_df`
metrics_global_df = pd.DataFrame([{**{"model": model_name_to_save}, **compute_metrics(pred_df["y_true"].values, pred_df["y_pred"].values)}])
metrics_by_store_df = (
    pred_df.groupby("Store").apply(
        lambda g: pd.Series(compute_metrics(g["y_true"].values, g["y_pred"].values))
    ).reset_index().assign(model=model_name_to_save)
)

paths = save_outputs(
    model_name=model_name_to_save,
    predictions=pred_df,
    metrics_global=metrics_global_df,
    metrics_by_store=metrics_by_store_df,
    output_dir=OUTPUTS_DIR,
 )
paths

{'predictions': '/home/sagemaker-user/TFMAXEL/outputs/predictions/deepar_exog__hs80__nl3__do0.2__lr0.001__bs64_predictions.csv',
 'metrics_global': '/home/sagemaker-user/TFMAXEL/outputs/metrics/deepar_exog__hs80__nl3__do0.2__lr0.001__bs64_metrics_global.csv',
 'metrics_by_store': '/home/sagemaker-user/TFMAXEL/outputs/metrics/deepar_exog__hs80__nl3__do0.2__lr0.001__bs64_metrics_by_store.csv'}

## 7) Figuras
- 3 tiendas: real vs predicción en test
- Distribución del error (`y_true - y_pred`)

Guardar PNGs en `outputs/figures/`.

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns

FIG_DIR = OUTPUTS_DIR / "figures"
FIG_DIR.mkdir(parents=True, exist_ok=True)

# Selección de 3 tiendas (mayor media de ventas en test)
top_stores = (
    pred_df.groupby("Store")["y_true"]
    .mean()
    .sort_values(ascending=False)
    .head(3)
    .index
    .tolist()
)

for store in top_stores:
    g = pred_df[pred_df["Store"] == store].sort_values("Date")
    plt.figure(figsize=(10, 4))
    plt.plot(g["Date"], g["y_true"], label="y_true")
    plt.plot(g["Date"], g["y_pred"], label="y_pred")
    plt.title(f"Store {store} — DeepAR")
    plt.xlabel("Date")
    plt.ylabel("Weekly_Sales")
    plt.legend()
    plt.tight_layout()
    plt.savefig(FIG_DIR / f"{MODEL_NAME}_plot_store_{store}.png", dpi=150)
    plt.close()

# Distribución de error
errors = pred_df["y_true"] - pred_df["y_pred"]
plt.figure(figsize=(8, 4))
sns.histplot(errors, bins=30, kde=True)
plt.title("Error distribution (y_true - y_pred)")
plt.xlabel("Error")
plt.tight_layout()
plt.savefig(FIG_DIR / f"{MODEL_NAME}_plot_error_dist.png", dpi=150)
plt.close()