# LSTM model

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

DEVICE

device(type='cpu')

In [8]:
df = pd.read_csv("./data/processed/features.csv", parse_dates=["date"], index_col=0)
df = df.sort_values(by=["store_nbr", "family", "date"]).reset_index(drop=True)
categories = pd.unique(df["family"])
store_nbrs = pd.unique(df["store_nbr"])
df.head()

Unnamed: 0,store_nbr,family,sales,cluster,is_holiday,oil_price,year,day_of_year_sin,day_of_year_cos,day_of_month_sin,...,day_of_week_sin,day_of_week_cos,month_sin,month_cos,encoding_store_nbr,encoding_family,encoding_year,lag_4,lag_8,lag_oil_14
0,1,AUTOMOTIVE,0.001654,13,0,0.815705,0,0.385663,0.92264,-0.998717,...,0.974928,-0.222521,0.5,0.866025,0.070964,0.002897,0.0588,0.000551,0.001103,0.792965
1,1,AUTOMOTIVE,0.0,13,0,0.81914,0,0.401488,0.915864,-0.988468,...,0.433884,-0.900969,0.5,0.866025,0.07079,0.002892,0.058822,0.000551,0.001654,0.790951
2,1,AUTOMOTIVE,0.002756,13,0,0.816771,0,0.417194,0.908818,-0.937752,...,-0.433884,-0.900969,0.5,0.866025,0.070964,0.002897,0.0588,0.0,0.001103,0.792728
3,1,AUTOMOTIVE,0.001654,13,0,0.826247,0,0.46355,0.886071,-0.571268,...,0.0,1.0,0.5,0.866025,0.070894,0.0029,0.058626,0.000551,0.000551,0.793675
4,1,AUTOMOTIVE,0.001103,13,0,0.846026,0,0.478734,0.87796,-0.394356,...,0.781831,0.62349,0.5,0.866025,0.070894,0.0029,0.058626,0.001654,0.000551,0.793794


In [22]:
df = df[
    df["family"].isin(categories[:N_families]) & df["store_nbr"].isin(store_nbrs[:N_stores])
].copy()

In [23]:
SEQUENCE_LENGTH = 4


def create_sequences(
    data,
    seq_length=SEQUENCE_LENGTH,
    dynamic_cols=dynamic_num_cols,
    static_cols=static_cat_cols,
    target_col="sales",
):
    sequences = []
    targets = []
    static_features_for_target = []

    entity_cols = ["family", "store_nbr"]  # Core entity identifiers

    for _, group in tqdm(data.groupby(entity_cols)):
        group_dynamic_data = group[dynamic_cols].values
        group_static_data_at_target = group[
            static_cols + [target_col]
        ].values  # Static features + target sales

        for i in range(len(group) - seq_length):
            seq = group_dynamic_data[i : i + seq_length]
            sequences.append(seq)

            target_val = group_static_data_at_target[
                i + seq_length, -1
            ]  # Last col is target_col ('sales')
            targets.append(target_val)

            static_vals = group_static_data_at_target[i + seq_length, :-1]
            static_features_for_target.append(static_vals)

    return (
        np.array(sequences),
        np.array(static_features_for_target),
        np.array(targets).reshape(-1, 1),
    )


X_dynamic, X_static, y = create_sequences(df, SEQUENCE_LENGTH)

print(f"X_dynamic shape: {X_dynamic.shape}")
print(f"X_static shape: {X_static.shape}")
print(f"y shape: {y.shape}")


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:00<00:00, 214.13it/s]


X_dynamic shape: (117386, 4, 12)
X_static shape: (117386, 2)
y shape: (117386, 1)


In [24]:
(X_dynamic_train, X_dynamic_val, X_static_train, X_static_val, y_train, y_val) = (
    train_test_split(X_dynamic, X_static, y, test_size=0.2, random_state=42)
)

In [26]:
BATCH_SIZE = 64


class SalesDataset(Dataset):
    def __init__(self, dynamic_features, static_features, targets):
        self.dynamic_features = torch.tensor(dynamic_features, dtype=torch.float32)
        self.static_features = torch.tensor(static_features, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32)

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return (
            self.dynamic_features[idx],
            self.static_features[idx],
            self.targets[idx],
        )


train_dataset = SalesDataset(X_dynamic_train, X_static_train, y_train)
val_dataset = SalesDataset(X_dynamic_val, X_static_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [31]:
from lstm import SalesPredictorLSTM, HIDDEN_SIZE_LSTM, HIDDEN_SIZE_FC

model = SalesPredictorLSTM(
    num_dynamic_features=X_dynamic_train.shape[2],
    lstm_hidden_size=HIDDEN_SIZE_LSTM,
    fc_hidden_size=HIDDEN_SIZE_FC,
    num_static_features=X_static_train.shape[1],
).to(DEVICE)

print("\nModel Architecture:")
print(model)



Model Architecture:
SalesPredictorLSTM(
  (static_mlp): Sequential(
    (0): Linear(in_features=2, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=16, bias=True)
    (3): ReLU()
  )
  (lstm): LSTM(12, 64, batch_first=True)
  (fc1): Linear(in_features=80, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)


In [None]:
EPOCHS = 0
LEARNING_RATE = 5e-4

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

print("\nStarting Training...")
for epoch in range(EPOCHS):
    model.train()
    train_loss_epoch = 0
    for dynamic_batch, static_batch, target_batch in tqdm(train_loader):
        dynamic_batch, static_batch, target_batch = (
            dynamic_batch.to(DEVICE),
            static_batch.to(DEVICE),
            target_batch.to(DEVICE),
        )

        optimizer.zero_grad()
        outputs = model(dynamic_batch, static_batch)
        loss = criterion(outputs, target_batch)
        loss.backward()
        optimizer.step()
        train_loss_epoch += loss.item()

    train_loss_epoch /= len(train_loader)

    model.eval()
    val_loss_epoch = 0
    with torch.no_grad():
        for dynamic_batch, static_batch, target_batch in tqdm(val_loader):
            dynamic_batch, static_batch, target_batch = (
                dynamic_batch.to(DEVICE),
                static_batch.to(DEVICE),
                target_batch.to(DEVICE),
            )
            outputs = model(dynamic_batch, static_batch)
            loss = criterion(outputs, target_batch)
            val_loss_epoch += loss.item()
    val_loss_epoch /= len(val_loader)

    print(
        f"Epoch {epoch + 1}/{EPOCHS}, Train Loss: {train_loss_epoch:.6f}, Val Loss: {val_loss_epoch:.6f}"
    )



Starting Training...


100%|██████████| 1468/1468 [00:09<00:00, 149.16it/s]
100%|██████████| 367/367 [00:00<00:00, 912.52it/s]


Epoch 1/2, Train Loss: 0.082204, Val Loss: 0.083683


100%|██████████| 1468/1468 [00:06<00:00, 243.73it/s]
100%|██████████| 367/367 [00:00<00:00, 649.02it/s]

Epoch 2/2, Train Loss: 0.082177, Val Loss: 0.083683





In [None]:
torch.save(model.state_dict(), './lstm_sales.pth')
# To load: