<a href="https://colab.research.google.com/github/Cimbir/Store-Sales-Forecasting/blob/main/model_inference_patchtst.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [1]:
!pip install kaggle wandb onnx -Uq
from google.colab import drive
drive.mount('/content/drive')

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m39.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m56.3 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive


In [2]:
!pip install neuralforecast datasetsforecast

Collecting neuralforecast
  Downloading neuralforecast-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting datasetsforecast
  Downloading datasetsforecast-1.0.0-py3-none-any.whl.metadata (2.2 kB)
Collecting coreforecast>=0.0.6 (from neuralforecast)
  Downloading coreforecast-0.0.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting pytorch-lightning>=2.0.0 (from neuralforecast)
  Downloading pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting ray>=2.2.0 (from ray[tune]>=2.2.0->neuralforecast)
  Downloading ray-2.47.1-cp311-cp311-manylinux2014_x86_64.whl.metadata (20 kB)
Collecting optuna (from neuralforecast)
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting utilsforecast>=0.2.3 (from neuralforecast)
  Downloading utilsforecast-0.2.12-py3-none-any.whl.metadata (7.6 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning>=2.0.0->neuralforecast)
  Downloading torchmetrics-1.7.4-py3-none-any.whl.metadata (21 kB)

In [3]:
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json
!echo "{\"username\":\"dachisuramelashvili\",\"key\":\"4202ec60e20b612a9947450bb8aeebb5\"}" > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
!unzip walmart-recruiting-store-sales-forecasting.zip

Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 613MB/s]
Archive:  walmart-recruiting-store-sales-forecasting.zip
  inflating: features.csv.zip        
  inflating: sampleSubmission.csv.zip  
  inflating: stores.csv              
  inflating: test.csv.zip            
  inflating: train.csv.zip           


In [5]:
!unzip train.csv.zip
!unzip stores.csv.zip
!unzip features.csv.zip
!unzip test.csv.zip

Archive:  train.csv.zip
  inflating: train.csv               
unzip:  cannot find or open stores.csv.zip, stores.csv.zip.zip or stores.csv.zip.ZIP.
Archive:  features.csv.zip
  inflating: features.csv            
Archive:  test.csv.zip
  inflating: test.csv                


In [6]:
# Basic imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from time import time
import pickle
import seaborn as sns
import random

# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# Sklearn
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cpu


In [7]:
import warnings
warnings.filterwarnings('ignore')

# Evaluate

In [8]:
def WMAE(dataset, real, predict):
    weights = dataset['IsHoliday_x'].apply(lambda x: 5 if x else 1)
    return (np.sum(weights * abs(real - predict)) / np.sum(weights))

In [9]:
class WeightedMAELoss(nn.Module):
    def __init__(self):
        super(WeightedMAELoss, self).__init__()

    def forward(self, y_pred, y_true, holidays):
      weights = torch.where(holidays == 1, torch.tensor(5.0).to(holidays.device), torch.tensor(1.0).to(holidays.device))
      mae = torch.abs(y_pred - y_true)
      return (weights * mae).mean()

In [10]:
class WeightedMSELoss(nn.Module):
    def __init__(self):
        super(WeightedMSELoss, self).__init__()

    def forward(self, y_pred, y_true, holidays):
      weights = torch.where(holidays == 1, torch.tensor(5.0).to(holidays.device), torch.tensor(1.0).to(holidays.device))
      mse = (y_pred - y_true) ** 2
      return (weights * mse).mean()

# Get Data

In [11]:
train = pd.read_csv('train.csv')
stores = pd.read_csv('stores.csv')
features = pd.read_csv('features.csv')

test_df = pd.read_csv('test.csv')

In [12]:
df = pd.merge(train, features, on=['Store', 'Date'], how='left')
df = pd.merge(df, stores, on='Store', how='left')
df = df.drop('IsHoliday_y', axis=1)

df['Date'] = pd.to_datetime(df['Date'])

print(f"Dataset shape: {df.shape}")
print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")
print(f"Unique stores: {df['Store'].nunique()}")
print(f"Unique departments: {df['Dept'].nunique()}")

test_df['Date'] = pd.to_datetime(test_df['Date'])

print(f"Test dataset shape: {test_df.shape}")
print(f"Test date range: {test_df['Date'].min()} to {test_df['Date'].max()}")

display(df.head())

Dataset shape: (421570, 16)
Date range: 2010-02-05 00:00:00 to 2012-10-26 00:00:00
Unique stores: 45
Unique departments: 81
Test dataset shape: (115064, 4)
Test date range: 2012-11-02 00:00:00 to 2013-07-26 00:00:00


Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday_x,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,Type,Size
0,1,1,2010-02-05,24924.5,False,42.31,2.572,,,,,,211.096358,8.106,A,151315
1,1,1,2010-02-12,46039.49,True,38.51,2.548,,,,,,211.24217,8.106,A,151315
2,1,1,2010-02-19,41595.55,False,39.93,2.514,,,,,,211.289143,8.106,A,151315
3,1,1,2010-02-26,19403.54,False,46.63,2.561,,,,,,211.319643,8.106,A,151315
4,1,1,2010-03-05,21827.9,False,46.5,2.625,,,,,,211.350143,8.106,A,151315


In [24]:
test_df

Unnamed: 0,Store,Dept,Date,IsHoliday
0,1,1,2012-11-02,False
1,1,1,2012-11-09,False
2,1,1,2012-11-16,False
3,1,1,2012-11-23,True
4,1,1,2012-11-30,False
...,...,...,...,...
115059,45,98,2013-06-28,False
115060,45,98,2013-07-05,False
115061,45,98,2013-07-12,False
115062,45,98,2013-07-19,False


In [25]:
print(f"Latest date in test_df: {test_df['Date'].max()}")

Latest date in test_df: 2013-07-26 00:00:00


# Definitions

In [14]:
class SalesScaler(BaseEstimator, TransformerMixin):
    def __init__(self, target_column='Weekly_Sales'):
        self.target_column = target_column
        self.scaler = MinMaxScaler() # Or StandardScaler()

    def fit(self, X, y=None):
        # Fit on the target column
        self.scaler.fit(X[[self.target_column]])
        return self

    def transform(self, X):
        X = X.copy()
        # Transform the target column
        X[self.target_column] = self.scaler.transform(X[[self.target_column]])
        return X

    def inverse_transform(self, X_scaled):
        # Assuming X_scaled is the scaled data containing the target column
        # If only the scaled target is passed, adjust accordingly
        X_original = X_scaled.copy()
        X_original[self.target_column] = self.scaler.inverse_transform(X_scaled[[self.target_column]])
        return X_original

    def inverse_transform_array(self, array_scaled):
        # Use this if you have a numpy array or tensor of just the scaled target
        return self.scaler.inverse_transform(array_scaled.reshape(-1, 1))

    def transform_array(self, array_original):
        # Use this if you have a numpy array or tensor of just the original target
        return self.scaler.transform(array_original.reshape(-1, 1))


In [15]:
class FormatterPreprocessor(BaseEstimator, TransformerMixin):
    """
    Preprocessor to:
    - Create 'unique_id' from Store and Dept
    - Rename columns for NeuralForecast (ds, y)
    - Keep only needed columns
    - Ensure datetime type for 'ds'
    """
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self  # No fitting needed

    def transform(self, X):
        X = X.copy()
        # Create unique_id
        X['unique_id'] = X['Store'].astype(str) + '_' + X['Dept'].astype(str)
        # Rename columns
        X = X.rename(columns={
            'Weekly_Sales': 'y',
            'Date': 'ds'
        })
        # Keep only needed columns
        feats = ['unique_id', 'ds', 'y', 'IsHoliday_x']
        X = X[feats]
        # Ensure datetime type
        X['ds'] = pd.to_datetime(X['ds'])
        return X

In [16]:
preprocessor = Pipeline([
    ('scaler', SalesScaler()),
    ('formatter', FormatterPreprocessor()),
])

In [17]:
class SalesUnscaler(BaseEstimator, TransformerMixin):
    def __init__(self, scaler):
        self.scaler = scaler

    def fit(self, X, y=None):
        return self  # No fitting needed

    def transform(self, X):
        X = X.copy()
        X['PatchTST'] = self.scaler.inverse_transform_array(X['PatchTST'].values)
        return X

In [18]:
postprocessor = SalesUnscaler(scaler=preprocessor.named_steps['scaler'])

# Start MLFlow

In [19]:
!pip install dagshub mlflow==2.22.0 -Uq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.0/29.0 MB[0m [31m66.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m96.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.9/139.9 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m82.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [20]:
import dagshub
import mlflow
from mlflow.models.signature import infer_signature
dagshub.init(repo_owner='Cimbir', repo_name='Store-Sales-Forecasting', mlflow=True)
mlflow.set_tracking_uri("https://dagshub.com/Cimbir/Store-Sales-Forecasting.mlflow")

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=001fa7fb-0e88-4ade-a61a-9f029518c6f3&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=07301d43438c51aa6d6ff8938c42e94923950f3a49e1838adf5e3f4ee87be8ce




In [21]:
# Preprocessor
run_id = "6ccd85f8892c44d4ae42fdb0bd2fcf06"
preprocessor_name = "preprocessor.pkl"
local_path = mlflow.artifacts.download_artifacts(run_id=run_id, artifact_path=preprocessor_name, dst_path=".")
print(f"Downloaded artifact to: {local_path}")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloaded artifact to: /content/preprocessor.pkl


In [22]:
# Model
run_id = "37c3db63b1444338afa6b41de0dd84df"
model_name = "model"
local_path = mlflow.artifacts.download_artifacts(run_id=run_id, artifact_path=model_name, dst_path=".")
print(f"Downloaded artifact to: {local_path}")

Downloading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]

Downloaded artifact to: /content/model


In [23]:
# Postprocessor
run_id = "f26fb5d944b348e1bc6c21396adbc51c"
postprocessor_name = "postprocessor.pkl"
local_path = mlflow.artifacts.download_artifacts(run_id=run_id, artifact_path=postprocessor_name, dst_path=".")
print(f"Downloaded artifact to: {local_path}")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloaded artifact to: /content/postprocessor.pkl


# Load

In [27]:
from neuralforecast.core import NeuralForecast

with open(preprocessor_name, 'rb') as f:
    preprocessor = pickle.load(f)

with open(postprocessor_name, 'rb') as f:
    postprocessor = pickle.load(f)

model = NeuralForecast.load(path=model_name)

INFO:lightning_fabric.utilities.seed:Seed set to 1


# Predict

In [34]:
preprocessed = preprocessor.transform(df)

In [35]:
predicted = model.predict(preprocessed)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [36]:
postprocessed = postprocessor.transform(predicted)

In [37]:
postprocessed

Unnamed: 0,unique_id,ds,PatchTST
0,10_1,2012-11-02,58767.824219
1,10_1,2012-11-09,57786.515625
2,10_1,2012-11-16,60501.265625
3,10_1,2012-11-23,70116.132812
4,10_1,2012-11-30,58443.742188
...,...,...,...
133235,9_98,2013-01-25,144.746170
133236,9_98,2013-02-01,235.484375
133237,9_98,2013-02-08,29.745211
133238,9_98,2013-02-15,452.955719


# Submission

In [40]:
submission = postprocessed.copy()

submission['Id'] = submission['unique_id'] + '_' + submission['ds'].dt.strftime('%Y-%m-%d')
submission['Weekly_Sales'] = submission['PatchTST']
submission = submission[['Id', 'Weekly_Sales']]

In [45]:
submission['Store_num'] = submission['Id'].apply(lambda x: int(x.split('_')[0]))
submission['Dept_num'] = submission['Id'].apply(lambda x: int(x.split('_')[1]))
submission['Date_str'] = submission['Id'].apply(lambda x: x.split('_')[2])

submission = submission.sort_values(by=['Store_num', 'Dept_num', 'Date_str']).drop(columns=['Store_num', 'Dept_num', 'Date_str'])

submission

Unnamed: 0,Id,Weekly_Sales
30960,1_1_2012-11-02,26635.636719
30961,1_1_2012-11-09,31239.052734
30962,1_1_2012-11-16,26638.937500
30963,1_1_2012-11-23,32600.875000
30964,1_1_2012-11-30,17060.630859
...,...,...
115155,45_98_2013-07-05,1093.822876
115156,45_98_2013-07-12,1097.555420
115157,45_98_2013-07-19,1079.532715
115158,45_98_2013-07-26,1115.418823


In [46]:
submission.to_csv('submission.csv', index=False)

In [49]:
test_df

Unnamed: 0,Store,Dept,Date,IsHoliday,Id
0,1,1,2012-11-02,False,1_1_2012-11-02
1,1,1,2012-11-09,False,1_1_2012-11-09
2,1,1,2012-11-16,False,1_1_2012-11-16
3,1,1,2012-11-23,True,1_1_2012-11-23
4,1,1,2012-11-30,False,1_1_2012-11-30
...,...,...,...,...,...
115059,45,98,2013-06-28,False,45_98_2013-06-28
115060,45,98,2013-07-05,False,45_98_2013-07-05
115061,45,98,2013-07-12,False,45_98_2013-07-12
115062,45,98,2013-07-19,False,45_98_2013-07-19


In [125]:
print("\nMissing values per column:")
print(submission.isnull().sum())


Missing values per column:
Id                0
Weekly_Sales    346
dtype: int64


In [127]:
submission['Weekly_Sales'].fillna(0, inplace=True)

In [129]:
submission.to_csv('submission.csv', index=False)