# CC3045 — Laboratorio 8: Pronóstico de Demanda con LSTM/GRU/Conv1D

- Diego Valenzuela: 22309
- Gerson Ramierez: 22281

In [2]:
import sys, platform, os, warnings, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

warnings.filterwarnings("ignore")

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("Python:", sys.version)
print("OS:", platform.platform())
print("TensorFlow:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))


Python: 3.12.4 (tags/v3.12.4:8e8a4ba, Jun  6 2024, 19:30:16) [MSC v.1940 64 bit (AMD64)]
OS: Windows-11-10.0.22631-SP0
TensorFlow: 2.17.0
Num GPUs Available: 0


# Laboratorio — Pronóstico de Demanda (CPU)
Dataset: Store Item Demand Forecasting (train/test/submission).  
En este notebook: EDA mínima, baseline ingenuo, LSTM sencillo, evaluación (MAE/RMSE) y generación de `submission.csv`.


### Imports, paths y utilidades

In [3]:
import os, sys, warnings, math, random
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from sklearn.metrics import mean_absolute_error, mean_squared_error

warnings.filterwarnings("ignore")

SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)

DATA_DIR = "data"
TRAIN_CSV = os.path.join(DATA_DIR, "train.csv")
TEST_CSV  = os.path.join(DATA_DIR, "test.csv")
SAMPLE_SUB = os.path.join(DATA_DIR, "sample_submission.csv")

for p in [TRAIN_CSV, TEST_CSV, SAMPLE_SUB]:
    assert os.path.exists(p), f"No existe: {p}"

print("Python:", sys.version.split()[0])
print("TensorFlow:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))


Python: 3.12.4
TensorFlow: 2.17.0
Num GPUs Available: 0


### Carga y sanity checks

In [4]:
train = pd.read_csv(TRAIN_CSV, parse_dates=["date"])
test  = pd.read_csv(TEST_CSV,  parse_dates=["date"])
sample = pd.read_csv(SAMPLE_SUB)

print("train:", train.shape, train.dtypes.to_dict())
print("test:",  test.shape,  test.dtypes.to_dict())
train.head(), test.head()


train: (913000, 4) {'date': dtype('<M8[ns]'), 'store': dtype('int64'), 'item': dtype('int64'), 'sales': dtype('int64')}
test: (45000, 4) {'id': dtype('int64'), 'date': dtype('<M8[ns]'), 'store': dtype('int64'), 'item': dtype('int64')}


(        date  store  item  sales
 0 2013-01-01      1     1     13
 1 2013-01-02      1     1     11
 2 2013-01-03      1     1     14
 3 2013-01-04      1     1     13
 4 2013-01-05      1     1     10,
    id       date  store  item
 0   0 2018-01-01      1     1
 1   1 2018-01-02      1     1
 2   2 2018-01-03      1     1
 3   3 2018-01-04      1     1
 4   4 2018-01-05      1     1)

#### EDA mínima (distribuciones y rango temporal)

In [5]:
full_last_date = train["date"].max()
valid_horizon = 30

cut_date = full_last_date - pd.Timedelta(days=valid_horizon-1)
train_df = train[train["date"] < cut_date].copy()
valid_df = train[(train["date"] >= cut_date)].copy()

print("cut_date:", cut_date.date())
print("train_df:", train_df["date"].min(), "→", train_df["date"].max(), len(train_df))
print("valid_df:", valid_df["date"].min(), "→", valid_df["date"].max(), len(valid_df))


cut_date: 2017-12-02
train_df: 2013-01-01 00:00:00 → 2017-12-01 00:00:00 898000
valid_df: 2017-12-02 00:00:00 → 2017-12-31 00:00:00 15000


### Baseline ingenuo (last value) por serie

In [6]:
valid_df = valid_df.sort_values(["store","item","date"])
train_df = train_df.sort_values(["store","item","date"])

prev = train_df[["store","item","date","sales"]].copy()
prev["date"] = prev["date"] + pd.Timedelta(days=1)
prev = prev.rename(columns={"sales":"yesterday"})

valid_join = valid_df.merge(prev, on=["store","item","date"], how="left")
means = train_df.groupby(["store","item"])["sales"].mean().rename("mean_sales")
valid_join = valid_join.merge(means, on=["store","item"], how="left")
valid_join["yhat_naive"] = valid_join["yesterday"].fillna(valid_join["mean_sales"]).clip(lower=0)

mae_naive = mean_absolute_error(valid_join["sales"], valid_join["yhat_naive"])
rmse_naive = mean_squared_error(valid_join["sales"], valid_join["yhat_naive"], squared=False)
mae_naive, rmse_naive


(9.63321959910913, 12.43507187525516)

### Preparación para LSTM (windowing por serie)

In [7]:
WIN = 30
STEP_AHEAD = 1

def make_complete(df):
    out = []
    for (s,i), g in df.groupby(["store","item"]):
        g = g.sort_values("date")
        idx = pd.date_range(g["date"].min(), g["date"].max(), freq="D")
        gg = g.set_index("date").reindex(idx).rename_axis("date").reset_index()
        gg["store"]=s; gg["item"]=i
        gg["sales"] = gg["sales"].interpolate().fillna(method="bfill").fillna(method="ffill")
        out.append(gg)
    return pd.concat(out, ignore_index=True)

train_c = make_complete(train_df)
valid_c = make_complete(valid_df)

def zscore_by_series(train_c, valid_c):
    stats = train_c.groupby(["store","item"])["sales"].agg(["mean","std"]).reset_index()
    stats["std"] = stats["std"].replace(0, 1.0)
    train_c = train_c.merge(stats, on=["store","item"])
    valid_c = valid_c.merge(stats, on=["store","item"], how="left")

    for col in ["mean","std"]:
        valid_c[col] = valid_c[col].fillna(valid_c[col].median())

    train_c["y"] = (train_c["sales"] - train_c["mean"]) / train_c["std"]
    valid_c["y"] = (valid_c["sales"] - valid_c["mean"]) / valid_c["std"]
    return train_c, valid_c

train_c, valid_c = zscore_by_series(train_c, valid_c)

def build_windows(df, win=30, step_ahead=1):
    Xs, ys, meta = [], [], []
    for (s,i), g in df.groupby(["store","item"]):
        g = g.sort_values("date").reset_index(drop=True)
        y = g["y"].values.astype(np.float32)
        dates = g["date"].values
        for t in range(win, len(g)-step_ahead+1):
            Xs.append(y[t-win:t].reshape(win,1))
            ys.append(y[t+step_ahead-1])
            meta.append((s,i,dates[t+step_ahead-1]))
    X = np.stack(Xs) if Xs else np.empty((0,win,1), dtype=np.float32)
    y = np.array(ys, dtype=np.float32)
    return X, y, meta

Xtr, ytr, _ = build_windows(train_c, WIN, STEP_AHEAD)
Xva, yva, meta_va = build_windows(valid_c, WIN, STEP_AHEAD)

Xtr.shape, Xva.shape


((883000, 30, 1), (0, 30, 1))


### Modelo LSTM compacto

In [8]:
from tensorflow.keras import layers, models, callbacks

def build_lstm(win=30):
    model = models.Sequential([
        layers.Input(shape=(win,1)),
        layers.LSTM(32, return_sequences=False),
        layers.Dense(16, activation="relu"),
        layers.Dense(1)
    ])
    model.compile(optimizer="adam", loss="mse")
    return model

model = build_lstm(WIN)
cb = [
    callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor="val_loss")
]
hist = model.fit(
    Xtr, ytr,
    validation_data=(Xva, yva),
    epochs=50,
    batch_size=256,
    verbose=1,
    callbacks=cb
)


Epoch 1/50
[1m 695/3450[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m53s[0m 20ms/step - loss: 0.5766

KeyboardInterrupt: 