<a href="https://colab.research.google.com/github/azhgh22/Walmart-Recruiting-Store-Sales-Forecasting/blob/main/notebooks/03_patch_tst.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup Environment


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q wandb kaggle onnx pandas numpy xgboost scikit-learn dagshub mlflow neuralforecast

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m112.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.7/24.7 MB[0m [31m81.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m89.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.7/242.7 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.8/285.8 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
! mkdir ~/.kaggle

!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

! chmod 600 ~/.kaggle/kaggle.json

! kaggle competitions download -c walmart-recruiting-store-sales-forecasting

! unzip walmart-recruiting-store-sales-forecasting
! rm walmart-recruiting-store-sales-forecasting.zip
! unzip -q '*.zip'

Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 818MB/s]
Archive:  walmart-recruiting-store-sales-forecasting.zip
  inflating: features.csv.zip        
  inflating: sampleSubmission.csv.zip  
  inflating: stores.csv              
  inflating: test.csv.zip            
  inflating: train.csv.zip           

4 archives were successfully processed.


In [4]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlchik22[0m ([33mlchik22-free-uni[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Load and Split Data

In [6]:
import pandas as pd
import numpy as np
import logging
import torch
from itertools import product
from neuralforecast import NeuralForecast
from typing import Dict, List, Optional

In [7]:
def process_dates_and_sort(df: pd.DataFrame) -> pd.DataFrame:
    if "Date" in df.columns:
        df["Date"] = pd.to_datetime(df["Date"])
        sort_keys = [col for col in ["Date", 'Store', 'Dept'] if col in df.columns]
        if sort_keys:
            df = df.sort_values(by=sort_keys).reset_index(drop=True)
    return df

def _merge_features(df: pd.DataFrame, features_df: pd.DataFrame) -> pd.DataFrame:
    return pd.merge(df, features_df, on=['Store', 'Date', 'IsHoliday'], how='left')

def _merge_stores(df: pd.DataFrame, stores_df: pd.DataFrame) -> pd.DataFrame:
    return pd.merge(df, stores_df, on=['Store'], how='left')

In [8]:
def run_preprocessing(
    dataframes: Dict[str, pd.DataFrame],
    process_train: bool = True,
    process_test: bool = True,
    merge_features: bool = True,
    merge_stores: bool = True,
    drop_raw_components: bool = False
) -> Dict[str, pd.DataFrame]:
    primary_to_process = []
    if process_train and "train" in dataframes:
        primary_to_process.append("train")
    if process_test and "test" in dataframes:
        primary_to_process.append("test")

    if not primary_to_process:
        return {}

    processed_dfs = {}

    for name in primary_to_process:
        df = dataframes[name].copy()

        if merge_features and "features" in dataframes:
            df = _merge_features(df, dataframes["features"])
        if merge_stores and "stores" in dataframes:
            df = _merge_stores(df, dataframes["stores"])

        df = process_dates_and_sort(df)

        processed_dfs[name] = df

    if drop_raw_components:
        keys_to_drop = primary_to_process
        if merge_features:
             keys_to_drop.append("features")
        if merge_stores:
             keys_to_drop.append("stores")

        for key in keys_to_drop:
            if key in dataframes:
                del dataframes[key]

    return processed_dfs

In [9]:
def split_data_by_ratio(
    dataframe: pd.DataFrame,
    separate_target: bool = True,
    target_column: str = "Weekly_Sales"
):
    split_index = int(0.8 * len(dataframe))
    train_df = dataframe.iloc[:split_index]
    valid_df = dataframe.iloc[split_index:]

    if separate_target:
        X_train = train_df.drop(columns=[target_column])
        y_train = train_df[target_column]
        X_valid = valid_df.drop(columns=[target_column])
        y_valid = valid_df[target_column]
        return X_train, y_train, X_valid, y_valid
    return train_df, valid_df

In [10]:
def load_raw_data(
    dataframes_to_load: Optional[List[str]] = None
) -> Dict[str, pd.DataFrame]:
    AVAILABLE_DATAFRAMES = {
        "stores": '/content/stores.csv',
        "features": '/content/features.csv',
        "train": '/content/train.csv',
        "test": '/content/test.csv'
    }

    if dataframes_to_load is None:
        dataframes_to_load = list(AVAILABLE_DATAFRAMES.keys())
    else:
        for name in dataframes_to_load:
            if name not in AVAILABLE_DATAFRAMES:
                raise ValueError(
                    f"'{name}' is not a valid dataframe name. "
                    f"Choose from: {list(AVAILABLE_DATAFRAMES.keys())}"
                )
    loaded_dataframes = {}
    for name in dataframes_to_load:
        path = AVAILABLE_DATAFRAMES[name]
        loaded_dataframes[name] = pd.read_csv(path)

    return loaded_dataframes

In [11]:
dataframes = load_raw_data()
df = run_preprocessing(dataframes, process_test=False, merge_features=True, merge_stores=True)['train']
X_train, y_train, X_valid, y_valid = split_data_by_ratio(df, separate_target=True)

print(f"Shapes of train_df and valid_df: {X_train.shape}, {X_valid.shape}")

Shapes of train_df and valid_df: (337256, 15), (84314, 15)


In [12]:
X_train

Unnamed: 0,Store,Dept,Date,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,Type,Size
0,1,1,2010-02-05,False,42.31,2.572,,,,,,211.096358,8.106,A,151315
1,1,2,2010-02-05,False,42.31,2.572,,,,,,211.096358,8.106,A,151315
2,1,3,2010-02-05,False,42.31,2.572,,,,,,211.096358,8.106,A,151315
3,1,4,2010-02-05,False,42.31,2.572,,,,,,211.096358,8.106,A,151315
4,1,5,2010-02-05,False,42.31,2.572,,,,,,211.096358,8.106,A,151315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337251,22,27,2012-04-13,False,49.89,4.025,5981.5,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
337252,22,28,2012-04-13,False,49.89,4.025,5981.5,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
337253,22,29,2012-04-13,False,49.89,4.025,5981.5,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
337254,22,30,2012-04-13,False,49.89,4.025,5981.5,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557


In [13]:
X_valid

Unnamed: 0,Store,Dept,Date,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,Type,Size
337256,22,32,2012-04-13,False,49.89,4.025,5981.50,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
337257,22,33,2012-04-13,False,49.89,4.025,5981.50,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
337258,22,34,2012-04-13,False,49.89,4.025,5981.50,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
337259,22,35,2012-04-13,False,49.89,4.025,5981.50,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
337260,22,36,2012-04-13,False,49.89,4.025,5981.50,10877.85,9.5,1633.96,1932.86,141.843393,7.671,B,119557
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421565,45,93,2012-10-26,False,58.85,3.882,4018.91,58.08,100.0,211.94,858.33,192.308899,8.667,B,118221
421566,45,94,2012-10-26,False,58.85,3.882,4018.91,58.08,100.0,211.94,858.33,192.308899,8.667,B,118221
421567,45,95,2012-10-26,False,58.85,3.882,4018.91,58.08,100.0,211.94,858.33,192.308899,8.667,B,118221
421568,45,97,2012-10-26,False,58.85,3.882,4018.91,58.08,100.0,211.94,858.33,192.308899,8.667,B,118221


# Train

In [14]:
import pandas as pd
from neuralforecast import NeuralForecast
from sklearn.base import BaseEstimator, RegressorMixin

class NeuralForecastModels(BaseEstimator, RegressorMixin):
    def __init__(self, models, model_names, freq='W-FRI', group_cols=['Store', 'Dept'], one_model=False, date_col='Date'):
        assert len(models) == len(model_names), "Each model must have a corresponding name."
        self.models = models
        self.model_names = model_names
        self.freq = freq
        self.group_cols = group_cols
        self.date_col = date_col
        self.nf = None
        self.fitted = False
        self.one_model=one_model

    def _prepare_df(self, X, y=None):
        df = X.copy()
        df['ds'] = df[self.date_col]
        df['unique_id'] = df[self.group_cols].astype(str).agg('-'.join, axis=1)
        if y is not None:
            df['y'] = y.values if isinstance(y, pd.Series) else y
            return df[['unique_id', 'ds', 'y']]
        else:
            return df[['unique_id', 'ds']]

    def fit(self, X, y):
        df = self._prepare_df(X, y)
        self.nf = NeuralForecast(models=self.models, freq=self.freq)
        self.nf.fit(df)
        self.fitted = True

    def predict(self, X_test):
        if not self.fitted:
            raise ValueError("Model is not fitted. Call fit() first.")

        test_df = self._prepare_df(X_test)
        forecast = self.nf.predict()

        predictions = {}
        for name in self.model_names:
            merged = test_df.merge(
                forecast[['unique_id', 'ds', name]],
                on=['unique_id', 'ds'],
                how='left'
            )
            merged.fillna(0, inplace=True)
            predictions[name] = merged[name]

        if self.one_model:
            return predictions[self.model_names[0]]

        return predictions

    def forecast(self):
        if not self.fitted:
            raise ValueError("Model is not fitted. Call fit() first.")
        return self.nf.predict()


In [15]:
import numpy as np
from sklearn.metrics import mean_absolute_error

def compute_wmae(y_true, y_pred, is_holiday):
    weights = np.where(np.array(is_holiday), 5, 1)
    return mean_absolute_error(y_true, y_pred, sample_weight=weights)

In [16]:
from itertools import product
from neuralforecast.models import PatchTST
import logging

logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("neuralforecast").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
logging.getLogger("lightning_fabric").setLevel(logging.WARNING)

def run_patchtst_cv(X_train, y_train, X_valid, y_valid,
                            param_grid,
                            fixed_params,
                            return_all=False):
    results = []

    keys, values = zip(*param_grid.items())
    for vals in product(*values):
        params = dict(zip(keys, vals))
        params.update(fixed_params)

        params['enable_progress_bar'] = False
        params['enable_model_summary'] = False

        model = PatchTST(**params)

        nf_model = NeuralForecastModels(models=[model], model_names=['PatchTST'], freq='W-FRI', one_model=True)
        nf_model.fit(X_train, y_train)
        y_pred = nf_model.predict(X_valid)
        score = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

        result = {'wmae': score, 'preds': y_pred}
        result.update(params)

        results.append(result)
        print(" → ".join(f"{k}={v}" for k,v in params.items() if k not in ['enable_progress_bar','enable_model_summary']) + f" → WMAE={score:.4f}")

    if return_all:
        return results
    else:
        return min(results, key=lambda r: r['wmae'])


In [17]:
from neuralforecast import NeuralForecast
from neuralforecast.models import PatchTST

param_grid = {
    'input_size' : [40, 52, 70],
    'batch_size' : [32, 64, 128]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
}

best_result = run_patchtst_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


input_size=40 → batch_size=32 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1727.5545
input_size=40 → batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1702.6652
input_size=40 → batch_size=128 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1716.7269
input_size=52 → batch_size=32 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1562.4860
input_size=52 → batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1538.3386
input_size=52 → batch_size=128 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1549.5148
input_size=70 → batch_size=32 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1637.8330
input_size=70 → batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1670.5391
input_size=70 → batch_size=128 → max_steps=2600 → h=53 → random_seed=42 → WMAE=1643.3104

Best hyperparameters found:
  input_size: 52
  batch_size: 64
Best WMAE: 1538.3386


In [18]:
param_grid = {
    'dropout': [0.0, 0.1, 0.2],
    # 'patch_len': [2, 4],
    # 'stride': [1, 2, 4],
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size': 52,
    'batch_size' : 64,
}

best_result = run_patchtst_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


dropout=0.0 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1573.5200
dropout=0.1 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1540.9934
dropout=0.2 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1538.3386

Best hyperparameters found:
  dropout: 0.2
Best WMAE: 1538.3386


In [19]:
param_grid = {
    'dropout': [0.2, 0.4],
    'patch_len': [2, 4],
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size': 52,
    'batch_size' : 64,
}

best_result = run_patchtst_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


dropout=0.2 → patch_len=2 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1769.1498
dropout=0.2 → patch_len=4 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1657.5459
dropout=0.4 → patch_len=2 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1823.3388
dropout=0.4 → patch_len=4 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1692.0812

Best hyperparameters found:
  dropout: 0.2
  patch_len: 4
Best WMAE: 1657.5459


In [20]:
param_grid = {
    'patch_len': [16, 32],
    'stride' : [8, 16]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'dropout' : 0.2,
    'random_seed': 42,
    'input_size': 52,
    'batch_size' : 64,
}

best_result = run_patchtst_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


patch_len=16 → stride=8 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1538.3386
patch_len=16 → stride=16 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1558.7274
patch_len=32 → stride=8 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1554.5784
patch_len=32 → stride=16 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1560.6312

Best hyperparameters found:
  patch_len: 16
  stride: 8
Best WMAE: 1538.3386


In [21]:
import torch.optim as optim

param_grid = {
    'optimizer': [optim.Adam, optim.AdamW],
    'learning_rate': [5e-3, 1e-3, 5e-4]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'dropout' : 0.2,
    'random_seed': 42,
    'input_size': 52,
    'batch_size' : 64,
}

best_result = run_patchtst_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


optimizer=<class 'torch.optim.adam.Adam'> → learning_rate=0.005 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1813.3649
optimizer=<class 'torch.optim.adam.Adam'> → learning_rate=0.001 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1580.1102
optimizer=<class 'torch.optim.adam.Adam'> → learning_rate=0.0005 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1568.4398
optimizer=<class 'torch.optim.adamw.AdamW'> → learning_rate=0.005 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1764.6460
optimizer=<class 'torch.optim.adamw.AdamW'> → learning_rate=0.001 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1576.4206
optimizer=<class 'torch.optim.adamw.AdamW'> → learning_rate=0.0005 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 →

In [22]:
import torch.optim as optim

param_grid = {
    'learning_rate': [1e-4, 5e-4, 1e-5]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'dropout' : 0.2,
    'random_seed': 42,
    'input_size': 52,
    'batch_size' : 64,
}

best_result = run_patchtst_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


learning_rate=0.0001 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1538.3386
learning_rate=0.0005 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1568.4398
learning_rate=1e-05 → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1626.1420

Best hyperparameters found:
  learning_rate: 0.0001
Best WMAE: 1538.3386


In [23]:
param_grid = {
    'activation': ['relu', 'gelu']
}


fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'dropout' : 0.2,
    'random_seed': 42,
    'input_size': 52,
    'batch_size' : 64,
}

best_result = run_patchtst_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


activation=relu → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1537.9701
activation=gelu → max_steps=2600 → h=53 → dropout=0.2 → random_seed=42 → input_size=52 → batch_size=64 → WMAE=1538.3386

Best hyperparameters found:
  activation: relu
Best WMAE: 1537.9701


## Final Model

In [25]:
from neuralforecast.models import PatchTST

model = PatchTST(
    input_size=52,
    dropout = 0.2,
    h=53,
    max_steps= 60 * 104,
    batch_size=64,
    random_seed=42,
    activation='relu',
    enable_progress_bar=False,
    enable_model_summary=False,
)
nf_model = NeuralForecastModels(models=[model], model_names=['PatchTST'], freq='W-FRI', one_model=True)

nf_model.fit(X_train, y_train)
y_pred = nf_model.predict(X_valid)
wmae = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

print(wmae)

1526.4649587770039


In [26]:
model = PatchTST(
    input_size=52,
    dropout = 0.2,
    h=53,
    max_steps= 60 * 104,
    batch_size=64,
    random_seed=42,
    activation='relu',
    enable_progress_bar=False,
    enable_model_summary=False,
)
nf_model = NeuralForecastModels(models=[model], model_names=['PatchTST'], freq='W-FRI', one_model=True)

nf_model.fit(df.drop(columns='Weekly_Sales'), df['Weekly_Sales'])

In [27]:
minimal_config = {
  'merge1': 'train, store, how=left, on=Store',
  'merge2': 'train, features, how=left, on=Store, Date, IsHoliday',
  'merged_tables': ['train', 'stores', 'features'],
  'start_date': '2010-02-05',
    'score_metric': 'WMAE',
    'score_policy': {
        'weight on holidays': 5,
        'weight on non_holidays': 1
    },
}

In [28]:
patchtst_config = {
    'input_size': 52,
    'h': 53,
    'max_steps': 60 * 104,
    'batch_size': 64,
    'random_seed': 42,
    'activation': 'relu',
    'dropout': 0.2,
}

In [29]:
import joblib
import wandb
def log_to_wandb(model, train_score, val_score, config,
                       run_name="run_00",
                       project_name="Walmart Recruiting - Store Sales Forecasting",
                       artifact_name="pipeline",
                       artifact_type="model",
                       artifact_description=""):
    filename = f"{run_name}.pkl"
    joblib.dump(model, filename)

    wandb.init(project=project_name, name=run_name)
    wandb.config.update(config)

    wandb.log({
        'train_wmae': train_score,
        'val_wmae': val_score
    })

    artifact = wandb.Artifact(
        name=artifact_name,
        type=artifact_type,
        description=artifact_description
    )
    artifact.add_file(filename)
    wandb.log_artifact(artifact)

    wandb.finish()


In [30]:
from sklearn.pipeline import Pipeline

log_to_wandb(
    model=nf_model,
    train_score=-1,
    val_score=wmae,
    config= minimal_config | patchtst_config,
    run_name='patch_tst_01',
    artifact_name="patch_tst",
)

[34m[1mwandb[0m: Currently logged in as: [33mlchik22[0m ([33mlchik22-free-uni[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
train_wmae,▁
val_wmae,▁

0,1
train_wmae,-1.0
val_wmae,1526.46496
