# Decision Optimisation for Continuous Outcomes

- skip_exec: true


In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

pd.set_option("display.max_columns", None)

PROJECT_ROOT = Path.cwd().parent.parent

plt.rcParams["figure.facecolor"] = (1, 1, 1, 0)  # RGBA tuple with alpha=0
plt.rcParams["axes.facecolor"] = (1, 1, 1, 0)  # RGBA tuple with alpha=0

The data that we will use comes from the [Grupo Bimbo Inventory Demand](https://www.kaggle.com/competitions/grupo-bimbo-inventory-demand) Kaggle competition.


In [None]:
data = pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/train.csv", nrows=200000, low_memory=False)
clientes = pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/cliente_tabla.csv", low_memory=False)
productos = pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/producto_tabla.csv", low_memory=False)
town_state = pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/town_state.csv", low_memory=False)

data = pd.merge(data, clientes, on="Cliente_ID", how="left")
data = pd.merge(data, productos, on="Producto_ID", how="left")
data = pd.merge(data, town_state, on="Agencia_ID", how="left")

In [None]:
data

Unnamed: 0,Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,Venta_uni_hoy,Venta_hoy,Dev_uni_proxima,Dev_proxima,Demanda_uni_equil,NombreCliente,NombreProducto,Town,State
0,3,1110,7,3301,15766,1212,3,25.14,0,0.0,3,PUESTO DE PERIODICOS LAZARO,Roles Canela 2p 120g BIM 1212,2008 AG. LAGO FILT,"MÉXICO, D.F."
1,3,1110,7,3301,15766,1216,4,33.52,0,0.0,4,PUESTO DE PERIODICOS LAZARO,Roles Glass 2p 135g BIM 1216,2008 AG. LAGO FILT,"MÉXICO, D.F."
2,3,1110,7,3301,15766,1238,4,39.32,0,0.0,4,PUESTO DE PERIODICOS LAZARO,Panquecito Gota Choc 2p 140g BIM 1238,2008 AG. LAGO FILT,"MÉXICO, D.F."
3,3,1110,7,3301,15766,1240,4,33.52,0,0.0,4,PUESTO DE PERIODICOS LAZARO,Mantecadas Vainilla 4p 125g BIM 1240,2008 AG. LAGO FILT,"MÉXICO, D.F."
4,3,1110,7,3301,15766,1242,3,22.92,0,0.0,3,PUESTO DE PERIODICOS LAZARO,Donitas Espolvoreadas 6p 105g BIM 1242,2008 AG. LAGO FILT,"MÉXICO, D.F."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200735,3,1116,1,1466,2309869,1238,8,78.64,0,0.0,8,UNION DEL VALLE 2,Panquecito Gota Choc 2p 140g BIM 1238,2011 AG. SAN ANTONIO,"MÉXICO, D.F."
200736,3,1116,1,1466,2309869,1240,8,67.04,0,0.0,8,UNION DEL VALLE 2,Mantecadas Vainilla 4p 125g BIM 1240,2011 AG. SAN ANTONIO,"MÉXICO, D.F."
200737,3,1116,1,1466,2309869,1242,6,45.84,0,0.0,6,UNION DEL VALLE 2,Donitas Espolvoreadas 6p 105g BIM 1242,2011 AG. SAN ANTONIO,"MÉXICO, D.F."
200738,3,1116,1,1466,2309869,1250,27,206.28,0,0.0,27,UNION DEL VALLE 2,Donas Azucar 4p 105g BIM 1250,2011 AG. SAN ANTONIO,"MÉXICO, D.F."


In [None]:
categorical_cols = ["Agencia_ID", "Canal_ID", "Ruta_SAK", "Cliente_ID", "Producto_ID"]

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    le.fit(data[col])
    data[col] = le.transform(data[col])
    label_encoders[col] = le


In [None]:
num_unique_vals = {col: data[col].nunique() for col in categorical_cols}
embedding_sizes = {col: min(50, num_unique_vals[col] // 2) for col in categorical_cols}

In [None]:
num_unique_vals


{'Agencia_ID': 6,
 'Canal_ID': 6,
 'Ruta_SAK': 343,
 'Cliente_ID': 10472,
 'Producto_ID': 478}

In [None]:
embedding_sizes


{'Agencia_ID': 3,
 'Canal_ID': 3,
 'Ruta_SAK': 50,
 'Cliente_ID': 50,
 'Producto_ID': 50}

In [None]:
X = data[categorical_cols].values
y = data["Demanda_uni_equil"].values


In [None]:
X

array([[   0,    3,  293,    3,   43],
       [   0,    3,  293,    3,   44],
       [   0,    3,  293,    3,   48],
       ...,
       [   5,    0,  235, 7722,   50],
       [   5,    0,  235, 7722,   51],
       [   5,    0,  235, 7722,   53]])

In [None]:
y

array([ 3,  4,  4, ...,  6, 27, 13])

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
class BimboDataset(Dataset):
    def __init__(self, X, y):
        self.X = [torch.tensor(X[:, i], dtype=torch.long) for i in range(X.shape[1])]
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return [x[idx] for x in self.X], self.y[idx]


In [None]:
train_dataset = BimboDataset(X_train, y_train)
val_dataset = BimboDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)


In [None]:
class SimpleModel(nn.Module):
    def __init__(self, embedding_sizes, hidden_size=128):
        super(SimpleModel, self).__init__()
        self.embeddings = nn.ModuleList(
            [nn.Embedding(num_unique_vals[col], embedding_sizes[col]) for col in categorical_cols]
        )
        self.fc1 = nn.Linear(sum(embedding_sizes.values()), hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = [embedding(x_i) for x_i, embedding in zip(x, self.embeddings)]
        x = torch.cat(x, dim=-1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x).squeeze(-1)
        return x

In [None]:
def train_model(loss_fn, num_epochs=5):
    model = SimpleModel(embedding_sizes)
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs).squeeze()
            loss = loss_fn(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)
        # Validation loop
        model.eval()
        val_loss = 0.0
        val_preds = []
        val_targets = []
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs).squeeze()
                loss = loss_fn(outputs, targets)
                val_loss += loss.item()
                val_preds.extend(outputs.tolist())
                val_targets.extend(targets.tolist())

        val_loss /= len(val_loader)
        r2 = r2_score(val_targets, val_preds)
        print({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss, "r_squared": r2})
    return model, np.array(val_preds), np.array(val_targets)

In [None]:
def log_business_metrics(stocking_decisions, actual_demand, name, tags):
    frac_understocks = (stocking_decisions < actual_demand).mean()
    total_understocked_amt = (actual_demand - stocking_decisions).clip(0).sum()
    frac_overstocks = (stocking_decisions > actual_demand).mean()
    total_overstocked_amt = (stocking_decisions - actual_demand).clip(0).sum()
    utility = -3 * total_understocked_amt - total_overstocked_amt
    mae = mean_absolute_error(actual_demand, stocking_decisions)
    mse = mean_squared_error(actual_demand, stocking_decisions)
    r2 = r2_score(actual_demand, stocking_decisions)

    print(
        {
            "frac_understocks": frac_understocks,
            "total_understocked_amt": total_understocked_amt,
            "frac_overstocks": frac_overstocks,
            "total_overstocked_amt": total_overstocked_amt,
            "utility": utility,
            "mae": mae,
            "mse": mse,
            "r2_score": r2,
        }
    )
    return

In [None]:
loss = nn.MSELoss()
mse_model, mse_val_preds, mse_val_targets = train_model(loss, num_epochs=5)

mse_val_stock = np.ceil(mse_val_preds)
log_business_metrics(
    mse_val_stock, mse_val_targets, "mse_loss_predictions", tags=["mse_loss", "stock_predicted_sales"]
)


{'epoch': 0, 'train_loss': 377.6694202195126, 'val_loss': 248.30723958106557, 'r_squared': 0.5973987522699671}
{'epoch': 1, 'train_loss': 257.1380700016402, 'val_loss': 203.92301788147847, 'r_squared': 0.6693401307703193}
{'epoch': 2, 'train_loss': 189.56163436312124, 'val_loss': 184.47238147638407, 'r_squared': 0.7008787059328843}
{'epoch': 3, 'train_loss': 152.40557876906072, 'val_loss': 166.08334452331445, 'r_squared': 0.7307139029616806}
{'epoch': 4, 'train_loss': 130.24035612463476, 'val_loss': 200.58960523423116, 'r_squared': 0.6748133574345319}
{'frac_understocks': 0.3161054099830627, 'total_understocked_amt': 80497.0, 'frac_overstocks': 0.5401514396732091, 'total_overstocked_amt': 89001.0, 'utility': -330492.0, 'mae': 4.221829231842184, 'mse': 200.7810102620305, 'r2_score': 0.6747476477175408}


In [None]:
alternative_stocking_rule = np.ceil(1.5 * mse_val_preds)
log_business_metrics(
    alternative_stocking_rule,
    mse_val_targets,
    "50_pct_above_mse_loss_predictions",
    tags=["mse_loss", "stock_50_pct_above_predicted_sales"],
)

{'frac_understocks': 0.14772840490186312, 'total_understocked_amt': 36936.0, 'frac_overstocks': 0.7803875660057786, 'total_overstocked_amt': 210560.0, 'utility': -321368.0, 'mae': 6.164591013250972, 'mse': 490.4333964331972, 'r2_score': 0.20552936943788813}


In [None]:
loss = nn.L1Loss()
mae_model, mae_val_preds, mae_val_targets = train_model(loss, num_epochs=5)

mae_val_stock = np.ceil(mae_val_preds)
log_business_metrics(
    mae_val_stock, mae_val_targets, "mae_loss_predictions", tags=["mae_loss", "stock_predicted_sales"]
)


{'epoch': 0, 'train_loss': 4.907322489597883, 'val_loss': 4.367740741201267, 'r_squared': 0.450005703275947}
{'epoch': 1, 'train_loss': 4.029110068344025, 'val_loss': 3.9602960006446595, 'r_squared': 0.6196152860668505}
{'epoch': 2, 'train_loss': 3.7393676879396476, 'val_loss': 3.8238000915308668, 'r_squared': 0.6381785889739313}
{'epoch': 3, 'train_loss': 3.5717267953067187, 'val_loss': 3.8336028012500445, 'r_squared': 0.67578641814034}
{'epoch': 4, 'train_loss': 3.4153030153289734, 'val_loss': 3.8017366418413294, 'r_squared': 0.6876270556981994}
{'frac_understocks': 0.36973199163096543, 'total_understocked_amt': 93943.0, 'frac_overstocks': 0.45912623293812893, 'total_overstocked_amt': 58574.0, 'utility': -340403.0, 'mae': 3.798869184019129, 'mse': 191.7978728703796, 'r2_score': 0.68929975383404}


In [None]:
above_mae_stocking_rule = np.ceil(1.5 * mae_val_preds)
log_business_metrics(
    above_mae_stocking_rule,
    mse_val_targets,
    "50_pct_above_mae_loss_predictions",
    tags=["mse_loss", "stock_50_pct_above_predicted_sales"],
)

{'frac_understocks': 0.17936136295705887, 'total_understocked_amt': 44235.0, 'frac_overstocks': 0.7339095347215303, 'total_overstocked_amt': 153916.0, 'utility': -286621.0, 'mae': 4.93551359968118, 'mse': 230.86268307263126, 'r2_score': 0.6260172681389624}


In [None]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()

    def forward(self, outputs, actual):
        diff = outputs - actual
        loss = torch.where(outputs > actual, diff, -3 * diff)
        return loss.mean()

In [None]:
custom_model, custom_val_preds, custom_val_targets = train_model(CustomLoss(), num_epochs=5)

custom_val_stock = np.ceil(custom_val_preds)
log_business_metrics(
    custom_val_stock, custom_val_targets, "utility_fn_loss_predictions", tags=["stock_predicted_sales"]
)