# Time Series Forecasting with LSTM - Pytorch

- Since I intend to predict both the Sales Value and Product Name, given the customer name and city, we will develop a model using deep learning techniques, not limited to `LSTM` but I am experimenting that first.
- The steps included in this notebook are for experimenting before choosing the model to add to the live trainng pipeline for production.

I am Glad🤓💻

**Important Notes**

- Predicting the Sales Value is a Linear function, thus I will utilize `nn.Linear` class to create it's linear model
- Predicting the **Product Name** is a classification problem, thus we will have to assign all classes a probability as we predict them. (`Softmax` is a good fit)

In [4]:
import torch
import torchmetrics
import torch.nn as nn
import torch.nn.init as init
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torch.optim as optim
import pandas as pd
import numpy as np
import os
import random
import pickle
import time
import warnings
from pathlib import Path
from typing import List, Tuple

warnings.filterwarnings("ignore")

In [5]:
# TODO: Load the train and validation data
data_path = (Path.cwd() / "../data/processed").resolve()
print(f"Loading data from {data_path}")
train_file = data_path / "train.csv"
val_file = data_path / "val.csv"

if not train_file.exists() or not val_file.exists():
	print("Data files not found.")
else:
	train_data = pd.read_csv(train_file)
	val_data = pd.read_csv(val_file)

Loading data from /home/dan/Coding/sales_time_series_prediction/data/processed


In [6]:
train_data.head()

Unnamed: 0,Sub-Category,Product Name,dayofweek_cos,Customer Name,State,City,month_cos,Sales
0,5,790,1.0,380,40,271,-0.8660254,1.140088
1,12,1720,0.62349,645,3,415,-0.5,-0.129866
2,3,1589,0.62349,357,11,388,1.0,-0.39446
3,10,210,0.62349,700,28,346,-1.83697e-16,-0.362179
4,13,1135,-0.900969,686,20,115,-0.5,0.551033


In [7]:
train_data = train_data[["Sub-Category", "Customer Name", "State", "City", "dayofweek_cos", "month_cos","Product Name", "Sales"]]
train_data.head()

Unnamed: 0,Sub-Category,Customer Name,State,City,dayofweek_cos,month_cos,Product Name,Sales
0,5,380,40,271,1.0,-0.8660254,790,1.140088
1,12,645,3,415,0.62349,-0.5,1720,-0.129866
2,3,357,11,388,0.62349,1.0,1589,-0.39446
3,10,700,28,346,0.62349,-1.83697e-16,210,-0.362179
4,13,686,20,115,-0.900969,-0.5,1135,0.551033


In [8]:
# TODO: Train the Linear model for predicting sales
class LinearModel(nn.Module):
    def __init__(self, input_dim: int, output_dim: int) -> None:
        super().__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        self.elu = nn.ELU()
        self.dropout = nn.Dropout(p=0.3)

        # Weight Initialization
        init.kaiming_uniform_(self.linear.weight, nonlinearity='linear')

    def forward(self, x):
        x = self.elu(self.linear(x))
        x = self.dropout(x)
        x = self.elu(x)
        return x

# set random seed for reproducability
random.seed(42)
torch.manual_seed(42)
np.random.seed(42)

# TODO: Create the dataset class for training
class SalesDataset(Dataset):
    def __init__(self, data: pd.DataFrame) -> None:
        super().__init__()
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        features = torch.tensor(row[:-1].values, dtype=torch.float32)
        target = torch.tensor(row[-1], dtype=torch.float32)
        return features, target

# TODO: Create the dataset for training and validation
train_dataset = SalesDataset(train_data)
val_dataset = SalesDataset(val_data)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# TODO: Initialize the model, loss func and optimizer for a training loop of 5 epochs
input_dim = train_data.shape[1] - 1
output_dim = 1
model = LinearModel(input_dim, output_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.1)

metrics = torchmetrics.MeanAbsoluteError()
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# TODO: Training the model
for epoch in range(num_epochs):
    for i, (features, target) in enumerate(train_loader):
        features = features.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        outputs = model(features)

        loss = criterion(outputs, target.view(-1, 1))
        mae = metrics(outputs, target.view(-1, 1))

        loss.backward()
        optimizer.step()
        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}, MAE: {mae.item():.4f}")

# TODO: Evaluate the model on the validation set
model.eval()
val_loss = 0.0
with torch.no_grad():
    for features, target in val_loader:
        features = features.to(device)
        target = target.to(device)

        outputs = model(features)
        loss = criterion(outputs, target.view(-1, 1))
        mae = metrics(outputs, target.view(-1, 1))
        val_loss += loss.item()
    val_loss /= len(val_loader)
print(f"Validation Loss: {val_loss:.4f}, MAE: {mae.item():.4f}")

Epoch [1/10], Step [100/221], Loss: 1.9988, MAE: 0.7370
Epoch [1/10], Step [200/221], Loss: 0.5967, MAE: 0.5792
Epoch [2/10], Step [100/221], Loss: 0.9105, MAE: 0.6350
Epoch [2/10], Step [200/221], Loss: 1.7289, MAE: 0.8149
Epoch [3/10], Step [100/221], Loss: 1.1534, MAE: 0.6400
Epoch [3/10], Step [200/221], Loss: 0.6158, MAE: 0.6073
Epoch [4/10], Step [100/221], Loss: 1.2604, MAE: 0.6923
Epoch [4/10], Step [200/221], Loss: 1.4457, MAE: 0.7997
Epoch [5/10], Step [100/221], Loss: 1.3793, MAE: 0.8038
Epoch [5/10], Step [200/221], Loss: 2.1161, MAE: 0.8009
Epoch [6/10], Step [100/221], Loss: 0.3293, MAE: 0.5181
Epoch [6/10], Step [200/221], Loss: 0.6788, MAE: 0.6287
Epoch [7/10], Step [100/221], Loss: 1.2167, MAE: 0.7752
Epoch [7/10], Step [200/221], Loss: 0.8031, MAE: 0.7184
Epoch [8/10], Step [100/221], Loss: 0.2142, MAE: 0.4122
Epoch [8/10], Step [200/221], Loss: 1.1306, MAE: 0.6531
Epoch [9/10], Step [100/221], Loss: 0.4598, MAE: 0.5307
Epoch [9/10], Step [200/221], Loss: 0.5343, MAE:

In [9]:
# TODO: Create data sequences function for the train and test sets, this \
# sequences are entirely for predicting the sales value based on the sales data provided

def create_sales_sequences(data: pd.DataFrame, target_col: int | List[int], seq_len: int = 20) -> Tuple:
    """This function creates the data sequences for the time series data"""
    inputs, targets = [], []
    values = data.values

    for i in range(len(data) - seq_len):
        input_seq = values[i: i+seq_len]
        if isinstance(target_col, int):
            output_seq = values[i+seq_len, target_col]
        else:
            output_seq = values[i+seq_len, target_col].tolist()
                

        inputs.append(input_seq)
        targets.append(output_seq)

    return np.array(inputs), np.array(targets)

X_train, y_train = create_sales_sequences(train_data, [-2, -1], 30)

# print(X_train[:10])
print(y_train[:1])
print("Shape of y: ", y_train.shape)

[[784.           5.28187581]]
Shape of y:  (7018, 2)


In [10]:
# TODO: Create a model and pretrain it on one epoch for testing best hyperparameters for Deep Learning
print("Length of the train set: ", len(X_train))
print("Shape of the X_train data: ", X_train.shape)

Length of the train set:  7018
Shape of the X_train data:  (7018, 30, 8)


In [16]:
class MultiOutputModel(nn.Module):
    """this model inputs several features and outputs two labels, the product name and the sales value
    # LSTM-based Multi task model
        - classifies product name
        - regresses sales value
    """
    def __init__(
            self, 
            input_size: int,
            num_product_classes: int,
            num_layers: int = 2,
            hidden_size: int = 64,
            dropout: int = 0.3
    ):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.product_predict = nn.LSTM(
            input_size=input_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )

        self.softplus = nn.Softplus()
        self.elu = nn.ELU()

        # Classifier for the products using Linear model
        self.product_head = nn.Linear(hidden_size, num_product_classes)
        self.sales_head = nn.Linear(hidden_size, 1)


    def forward(self, x):
        batch_size = x.size(0)

        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)

        # TODO: predict the product using LSTM model
        output, _ = self.product_predict(x, (h0, c0))
        last_hidden = output[:, -1, :]

        # TODO: predict the sales value for the product predicted
        product = self.softplus(self.product_head(last_hidden))
        sales = self.elu(self.sales_head(last_hidden)).squeeze(-1)
        return product, sales


# TODO: Parameters for training the model
num_product_names = train_data["Product Name"].nunique()
input_size = X_train.shape[-1]

# TODO: Instantiate the model
model = MultiOutputModel(
    input_size,
    num_product_names
)


X_tensor = torch.from_numpy(X_train).float()
y_tensor = torch.from_numpy(y_train).float()

train_dataset = TensorDataset(X_tensor, y_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [17]:
# TODO: Train the model on the train data
num_epochs = 1
final_training_loss = 0
regression_criterion = nn.MSELoss()
classification_criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


model.train()
for epoch in range(num_epochs):
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.float()
        y_class_batch = y_batch[:, 0].long()
        y_sales_batch = y_batch[:, 1].float()

        optimizer.zero_grad()

        product_pred, sales_pred = model(x_batch)

        product_loss = classification_criterion(product_pred, y_class_batch)
        sales_loss = regression_criterion(sales_pred.squeeze(), y_sales_batch)

        loss = product_loss + 0.5 * sales_loss

        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss}")

Epoch 1, Loss: 8.024569511413574


In [18]:
# TODO: Test the model on the test dataset
encoders_path = (Path.cwd() / "../models/encoders").resolve()

product_name_encoder = encoders_path / "Product Name_encoder.pkl"

with open(product_name_encoder, "rb") as f:
    encoder = pickle.load(f)
    f.close()

encoder

In [19]:
# Set path
scalers_path = (Path.cwd() / "../models/scalers").resolve()
sales_scaler_path = scalers_path / "Sales_scaler.pkl"

# Load scaler
with open(sales_scaler_path, "rb") as f:
    scaler = pickle.load(f)

# Optional: sanity check
print("Scaler type:", type(scaler))

# Prepare test data
X_test, y_test = create_sales_sequences(train_data, [-2, -1], 30)
X_test_tensor = torch.from_numpy(X_test).float()

# Run inference
model.eval()
with torch.no_grad():
    product_logits, sales_preds = model(X_test_tensor)

# Postprocess predictions
predicted_product_classes = torch.argmax(product_logits, dim=1).numpy()
predicted_sales = sales_preds.squeeze().numpy()
predicted_values = scaler.inverse_transform(predicted_sales.reshape(-1, 1)).flatten()
predicted_product_names = encoder.inverse_transform(predicted_product_classes)

# Display
for name, sale in zip(predicted_product_names[:20], predicted_values[:20]):
    print(f"Predicted Product: {name}, Predicted Sales: {sale:.2f}")

Scaler type: <class 'sklearn.preprocessing._data.StandardScaler'>
Predicted Product: Staple envelope, Predicted Sales: 273.83
Predicted Product: Logitech Desktop MK120 Mouse and keyboard Combo, Predicted Sales: 269.75
Predicted Product: Staple envelope, Predicted Sales: 279.54
Predicted Product: Staple envelope, Predicted Sales: 271.54
Predicted Product: Staple envelope, Predicted Sales: 254.07
Predicted Product: Staple envelope, Predicted Sales: 261.25
Predicted Product: Staple envelope, Predicted Sales: 259.84
Predicted Product: Logitech Desktop MK120 Mouse and keyboard Combo, Predicted Sales: 266.26
Predicted Product: Staple envelope, Predicted Sales: 276.39
Predicted Product: Staple envelope, Predicted Sales: 256.34
Predicted Product: Staple envelope, Predicted Sales: 247.91
Predicted Product: Staple envelope, Predicted Sales: 245.73
Predicted Product: Staple envelope, Predicted Sales: 273.31
Predicted Product: Logitech Desktop MK120 Mouse and keyboard Combo, Predicted Sales: 259.6

In [15]:
scaler