In [1]:
# -------------------------
# Configuración base
# -------------------------
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error

BASE_DIR = r"C:\Users\juanf\Downloads\EAFIT\Semestre II\PI-2\PI_2"

print("BASE_DIR:", BASE_DIR)

BASE_DIR: C:\Users\juanf\Downloads\EAFIT\Semestre II\PI-2\PI_2


In [2]:
# ==========================
#  Entrenar SARIMA en una sola serie
# ==========================

def train_and_validate_sarima(df_series, order, seasonal_order, train_start, train_end, valid_start, valid_end):
    """
    df_series: DataFrame con columnas ['date','sales'] para UN store-item
    order: tuple (p,d,q)
    seasonal_order: tuple (P,D,Q,s)
    Fechas para train/valid tomadas del diccionario splits
    """

    # Filtramos datos
    train_mask = (df_series['date'] >= train_start) & (df_series['date'] <= train_end)
    valid_mask = (df_series['date'] >= valid_start) & (df_series['date'] <= valid_end)

    train_data = df_series.loc[train_mask, 'sales']
    valid_data = df_series.loc[valid_mask, 'sales']

    # Entrenar modelo
    model = SARIMAX(train_data,
                    order=order,
                    seasonal_order=seasonal_order,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    model_fit = model.fit(disp=False)

    # Hacer forecast del tamaño del tramo valid
    n_valid = len(valid_data)
    forecast = model_fit.forecast(n_valid)

    # Métricas
    rmse = np.sqrt(mean_squared_error(valid_data, forecast))
    mae  = mean_absolute_error(valid_data, forecast)
    mape = (np.abs((valid_data - forecast) / valid_data).replace([np.inf, -np.inf], np.nan).mean()) * 100

    return forecast, rmse, mae, mape

In [3]:
# ==========================
#  Prueba con Store=1, Item=1
# ==========================
train = pd.read_csv(r"C:\Users\juanf\Downloads\EAFIT\Semestre II\PI-2\PI_2\data\raw\train.csv", parse_dates=["date"])
test  = pd.read_csv(r"C:\Users\juanf\Downloads\EAFIT\Semestre II\PI-2\PI_2\data\raw\test.csv",  parse_dates=["date"])

store_test = 1
item_test = 1

df_s = train[(train["store"] == store_test) & (train["item"] == item_test)][["date","sales"]].reset_index(drop=True)

order = (1,1,1)
seasonal_order = (0,1,1,7)

results_sarima = {}

from datetime import datetime

splits = {
    "fold1": {
        "train_start": datetime(2013, 1, 1),
        "train_end"  : datetime(2016, 12, 31),
        "valid_start": datetime(2017, 1, 1),
        "valid_end"  : datetime(2017, 3, 31),
    },
    "fold2": {
        "train_start": datetime(2013, 1, 1),
        "train_end"  : datetime(2017, 3, 31),
        "valid_start": datetime(2017, 4, 1),
        "valid_end"  : datetime(2017, 6, 30),
    },
    "fold3": {
        "train_start": datetime(2013, 1, 1),
        "train_end"  : datetime(2017, 6, 30),
        "valid_start": datetime(2017, 7, 1),
        "valid_end"  : datetime(2017, 9, 30),
    },
    "test_interno": {
        "start": datetime(2017, 10, 1),
        "end"  : datetime(2017, 12, 31),
    }
}

for fold_name, f in splits.items():
    if fold_name == "test_interno":
        continue

    forecast, rmse, mae, mape = train_and_validate_sarima(
        df_s,
        order,
        seasonal_order,
        f["train_start"], f["train_end"],
        f["valid_start"], f["valid_end"]
    )

    results_sarima[fold_name] = {"RMSE": rmse, "MAE": mae, "MAPE": mape}

results_sarima

{'fold1': {'RMSE': np.float64(4.2704411054491285),
  'MAE': 3.4412378938112567,
  'MAPE': np.float64(23.207669430645964)},
 'fold2': {'RMSE': np.float64(9.368262245216297),
  'MAE': 7.695190912629693,
  'MAPE': np.float64(27.722874041231538)},
 'fold3': {'RMSE': np.float64(6.45915579383533),
  'MAE': 5.4453193028855225,
  'MAPE': np.float64(25.264710504874643)}}

Con el objetivo de evaluar la capacidad predictiva del modelo SARIMA, se implementó una estrategia de validación basada en expanding windows con tres folds secuenciales, cubriendo el período de entrenamiento desde enero de 2013 hasta septiembre de 2017. El desempeño del modelo se midió utilizando tres métricas: RMSE (Root Mean Squared Error), MAE (Mean Absolute Error) y MAPE (Mean Absolute Percentage Error).  

El modelo SARIMA muestra un comportamiento general aceptable para series temporales agregadas como las utilizadas en este caso. Sin embargo, sus limitaciones se evidencian en contextos de alta variabilidad (como en el segundo trimestre de 2017).  

Importancia del backtesting temporal: Esta estrategia permitió identificar cómo varía el desempeño del modelo conforme se le expone a nuevos datos, simulando su uso real en producción.  

In [4]:
# ==========================
#  SARIMA para múltiples series
# ==========================

import time

def evaluate_sarima_multiple(train_df, stores, items, order, seasonal_order, splits):
    results = []

    for s in stores:
        for i in items:

            df_s = train_df[(train_df["store"] == s) & (train_df["item"] == i)][["date","sales"]]

            for fold_name, f in splits.items():
                if fold_name == "test_interno":
                    continue

                try:
                    forecast, rmse, mae, mape = train_and_validate_sarima(
                        df_s,
                        order,
                        seasonal_order,
                        f["train_start"], f["train_end"],
                        f["valid_start"], f["valid_end"]
                    )

                    results.append({
                        "store": s,
                        "item": i,
                        "fold": fold_name,
                        "RMSE": rmse,
                        "MAE": mae,
                        "MAPE": mape
                    })

                except Exception as e:
                    print(f"Error en store={s}, item={i}, fold={fold_name}: {e}")

    return pd.DataFrame(results)

In [5]:
# ==========================
#  Prueba con pocas series
# ==========================

stores_test = [1, 2]
items_test = [1, 2]

order = (1,1,1)
seasonal_order = (0,1,1,7)

df_sarima_test = evaluate_sarima_multiple(train, stores_test, items_test, order, seasonal_order, splits)
df_sarima_test

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates

Unnamed: 0,store,item,fold,RMSE,MAE,MAPE
0,1,1,fold1,4.270441,3.441238,23.207669
1,1,1,fold2,9.368262,7.695191,27.722874
2,1,1,fold3,6.459156,5.445319,25.264711
3,1,2,fold1,10.024674,7.992756,
4,1,2,fold2,22.135358,19.852356,
5,1,2,fold3,10.868581,8.886757,
6,2,1,fold1,4.760542,3.814247,
7,2,1,fold2,11.131217,9.588308,
8,2,1,fold3,6.889552,5.71145,
9,2,2,fold1,20.489449,16.772078,


El experimento con modelos SARIMA entrenados individualmente para cada combinación store–item permitió evaluar la capacidad de este enfoque clásico de series temporales en un entorno multiserial realista. Los resultados muestran un desempeño altamente heterogéneo, fuertemente dependiente de la estructura y regularidad de cada serie.  

SARIMA funciona adecuadamente solo en series con patrones históricos regulares, como es el caso de store=1, item=1 y store=2, item=1, donde los errores (RMSE 4–7) son relativamente bajos. Series más inestables o con altas fluctuaciones exhiben un deterioro evidente del modelo, con RMSE superiores a 10 e incluso 22 en ciertos folds. Esto confirma que el modelo es sensible a cambios estructurales, outliers y ventas irregulares.  

SARIMA funciona muy bien siempre y cuando el futuro se parezca al pasado, pero cuando los patrones cambian, se rompe.

In [6]:
# ==========================
# Guardado de resultados
# ==========================
output_path = Path(BASE_DIR) / "data" / "processed" / "sarima_local_test.csv"
df_sarima_test.to_csv(output_path, index=False)