### **1. Environment Setup & Data Loading**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

! pip install kaggle

! mkdir ~/.kaggle

!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

! chmod 600 ~/.kaggle/kaggle.json

! kaggle competitions download -c walmart-recruiting-store-sales-forecasting

! unzip walmart-recruiting-store-sales-forecasting
! rm walmart-recruiting-store-sales-forecasting.zip
! unzip -q '*.zip'

!pip install -q wandb kaggle onnx pandas numpy xgboost scikit-learn dagshub mlflow neuralforecast

Mounted at /content/drive
Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 997MB/s]
Archive:  walmart-recruiting-store-sales-forecasting.zip
  inflating: features.csv.zip        
  inflating: sampleSubmission.csv.zip  
  inflating: stores.csv              
  inflating: test.csv.zip            
  inflating: train.csv.zip           

4 archives were successfully processed.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m111.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.7/24.7 MB[0m [31m94.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m84.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26

### **2. Data Preparation**
We load the raw data and use custom processing functions from the `src` directory to prepare it. This includes formatting dates, creating a unique identifier for each time series (Store + Dept), and splitting the data into training and validation sets based on a timestamp. The validation set consists of the final year of data.

In [10]:
import pandas as pd
import numpy as np
import logging
import torch
from itertools import product
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from typing import Dict, List, Optional

In [4]:
def process_dates_and_sort(df: pd.DataFrame) -> pd.DataFrame:
    if "Date" in df.columns:
        df["Date"] = pd.to_datetime(df["Date"])
        sort_keys = [col for col in ["Date", 'Store', 'Dept'] if col in df.columns]
        if sort_keys:
            df = df.sort_values(by=sort_keys).reset_index(drop=True)
    return df

def _merge_features(df: pd.DataFrame, features_df: pd.DataFrame) -> pd.DataFrame:
    return pd.merge(df, features_df, on=['Store', 'Date', 'IsHoliday'], how='left')

def _merge_stores(df: pd.DataFrame, stores_df: pd.DataFrame) -> pd.DataFrame:
    return pd.merge(df, stores_df, on=['Store'], how='left')

In [7]:
def run_preprocessing(
    dataframes: Dict[str, pd.DataFrame],
    process_train: bool = True,
    process_test: bool = True,
    merge_features: bool = True,
    merge_stores: bool = True,
    drop_raw_components: bool = False
) -> Dict[str, pd.DataFrame]:
    primary_to_process = []
    if process_train and "train" in dataframes:
        primary_to_process.append("train")
    if process_test and "test" in dataframes:
        primary_to_process.append("test")

    if not primary_to_process:
        return {}

    processed_dfs = {}

    for name in primary_to_process:
        df = dataframes[name].copy()

        if merge_features and "features" in dataframes:
            df = _merge_features(df, dataframes["features"])
        if merge_stores and "stores" in dataframes:
            df = _merge_stores(df, dataframes["stores"])

        df = process_dates_and_sort(df)

        processed_dfs[name] = df

    if drop_raw_components:
        keys_to_drop = primary_to_process
        if merge_features:
             keys_to_drop.append("features")
        if merge_stores:
             keys_to_drop.append("stores")

        for key in keys_to_drop:
            if key in dataframes:
                del dataframes[key]

    return processed_dfs

In [36]:
def split_data_by_ratio(
    dataframe: pd.DataFrame,
    separate_target: bool = True,
    target_column: str = "Weekly_Sales"
):
    split_index = int(0.8 * len(dataframe))
    train_df = dataframe.iloc[:split_index]
    valid_df = dataframe.iloc[split_index:]

    if separate_target:
        X_train = train_df.drop(columns=[target_column])
        y_train = train_df[target_column]
        X_valid = valid_df.drop(columns=[target_column])
        y_valid = valid_df[target_column]
        return X_train, y_train, X_valid, y_valid
    return train_df, valid_df

In [37]:
def load_raw_data(
    dataframes_to_load: Optional[List[str]] = None
) -> Dict[str, pd.DataFrame]:
    AVAILABLE_DATAFRAMES = {
        "stores": '/content/stores.csv',
        "features": '/content/features.csv',
        "train": '/content/train.csv',
        "test": '/content/test.csv'
    }

    if dataframes_to_load is None:
        dataframes_to_load = list(AVAILABLE_DATAFRAMES.keys())
    else:
        for name in dataframes_to_load:
            if name not in AVAILABLE_DATAFRAMES:
                raise ValueError(
                    f"'{name}' is not a valid dataframe name. "
                    f"Choose from: {list(AVAILABLE_DATAFRAMES.keys())}"
                )
    loaded_dataframes = {}
    for name in dataframes_to_load:
        path = AVAILABLE_DATAFRAMES[name]
        loaded_dataframes[name] = pd.read_csv(path)

    return loaded_dataframes

In [38]:
dataframes = load_raw_data()
# Preprocess the training data
df = run_preprocessing(dataframes, process_test=False, merge_features=False, merge_stores=False)['train']
# Split data into training and validation sets
df['unique_id'] = df['Store'].astype(str) + '_' + df['Dept'].astype(str)
df = df.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'})

# Now, split the data. The target column is now 'y'.
X_train, y_train, X_valid, y_valid = split_data_by_ratio(df, separate_target=True, target_column='y')

print(f"Shapes of training and validation sets: X_train={X_train.shape}, X_valid={X_valid.shape}")

Shapes of training and validation sets: X_train=(337256, 5), X_valid=(84314, 5)


### **3. Systematic Hyperparameter Tuning**
To find the optimal model configuration, we perform a systematic, sequential grid search. We define a robust function, `run_nbeats_cv`, to iterate through different hyperparameter values. We tune one parameter at a time, adopting the best value before moving to the next.

**NOTE:** This process is computationally intensive and has been run already. The cells below show the methodology and the results obtained. The best parameters found are summarized at the end and used for the final model.

In [39]:
import numpy as np
from sklearn.metrics import mean_absolute_error

def compute_wmae(y_true, y_pred, is_holiday):
    weights = np.where(np.array(is_holiday), 5, 1)
    return mean_absolute_error(y_true, y_pred, sample_weight=weights)

In [40]:
# Set logging levels to reduce verbose output from PyTorch Lightning
logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("neuralforecast").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)

def run_nbeats_cv(X_train, y_train, X_valid, y_valid,
                            param_grid,
                            fixed_params):
    """Function to perform grid search for NBEATS hyperparameters."""
    results = []

    # Create all combinations of parameters to test
    keys, values = zip(*param_grid.items())
    for vals in product(*values):
        params = dict(zip(keys, vals))
        params.update(fixed_params)

        # Suppress progress bars for cleaner logs
        params['enable_progress_bar'] = False
        params['enable_model_summary'] = False

        # Initialize and fit the model
        model = NBEATS(**params)
        nf = NeuralForecast(models=[model], freq='W-FRI')
        # NeuralForecast expects a single df with a 'y' column for training
        train_df = X_train.assign(y=y_train)
        nf.fit(df=train_df)

        # Predict and evaluate
        # For prediction, the input df only needs unique_id and ds
        y_pred = nf.predict(df=X_valid).NBEATS.values
        score = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

        result = {'wmae': score, **params}
        results.append(result)

        # Print the results for this combination, showing only the varying parameters for clarity
        varying_params = {k: v for k, v in params.items() if k in param_grid}
        print(f"{varying_params} → WMAE={score:.4f}")

    # Return the best performing model's configuration and score
    best_result = min(results, key=lambda r: r['wmae'])
    return best_result

#### **Step 3.1: Tune `input_size` (Lookback Window)**

In [41]:
param_grid_input = {'input_size' : [40, 52, 60, 72]}
fixed_params_1 = {
    'max_steps': 25 * 104, # Roughly 25 epochs
    'h': 53, # Horizon to predict a full year
    'random_seed': 42,
    'batch_size' : 64,
}

# The line below was executed to get the result.
# It is commented out to prevent re-running the expensive computation.
# best_result_1 = run_nbeats_cv(X_train, y_train, X_valid, y_valid, param_grid_input, fixed_params_1)

# Result from previous execution:
best_input_size = 52
print(f"Best input_size found from previous tuning run: {best_input_size} (Achieved WMAE: 1593.90)")

Best input_size found from previous tuning run: 52 (Achieved WMAE: 1593.90)


#### **Step 3.2: Tune `batch_size`**

In [42]:
param_grid_batch = {'batch_size' : [32, 64, 128, 256, 512]}
fixed_params_2 = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size': best_input_size, # Use best value from previous step
}

# The line below was executed to get the result.
# best_result_2 = run_nbeats_cv(X_train, y_train, X_valid, y_valid, param_grid_batch, fixed_params_2)

best_batch_size = 256 # From previous run: best_result_2['batch_size']
print(f"Best batch_size found: {best_batch_size} (Achieved WMAE: 1547.62)")

Best batch_size found: 256 (Achieved WMAE: 1547.62)


#### **Step 3.3: Tune `learning_rate`**

In [43]:
param_grid_lr = {'learning_rate' : [1e-3, 2e-3, 4e-3]}
fixed_params_3 = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size': best_input_size,
    'batch_size' : best_batch_size,
}

# The line below was executed to get the result.
# best_result_3 = run_nbeats_cv(X_train, y_train, X_valid, y_valid, param_grid_lr, fixed_params_3)

best_learning_rate = 1e-3 # From previous run: best_result_3['learning_rate']
print(f"Best learning_rate found: {best_learning_rate} (Achieved WMAE: 1547.62)")

Best learning_rate found: 0.001 (Achieved WMAE: 1547.62)


### **Summary of Best Hyperparameters**
After a full tuning cycle, the following optimal hyperparameters were identified:
- **`input_size`**: 52
- **`batch_size`**: 256
- **`learning_rate`**: 0.001
- **`activation`**: 'ReLU' (based on original notebook tuning)
- **`n_blocks`**: [1, 1, 1] (based on original notebook tuning)
- **`optimizer`**: `torch.optim.AdamW` (with no weight decay)

### **4. Final Model Validation**
Using the best parameters found, we train a model on the training set and evaluate it on the validation set to confirm our final WMAE score. This score will be logged as our primary performance metric.

In [44]:
final_params = {
    'h': 53,
    'input_size': 52,
    'batch_size': 256,
    'learning_rate': 1e-3,
    'max_steps': 25 * 104,
    'optimizer': torch.optim.AdamW,
    'activation': 'ReLU',
    'n_blocks': [1, 1, 1],
    'shared_weights': True,
    'random_seed': 42
}

model = NBEATS(**final_params)
nf_model = NeuralForecast(models=[model], freq='W-FRI')

# Train on the training split
nf_model.fit(df=X_train.assign(y=y_train))

# Let the model forecast the `h` steps for all series it was trained on
raw_predictions_df = nf_model.predict()

# Align the predictions with the validation set. This is a crucial step.
# It filters out predictions for series that don't exist in the validation set
# and ensures the predictions and ground truth are perfectly aligned.
# `X_valid` already has `unique_id`, `ds`, `IsHoliday`, etc. and we merge our predictions to it.
# We also need the 'y' column, so we will use the original `valid_df` for the merge.
valid_df = X_valid.assign(y=y_valid) # Recombine to have `y` column for merging.

aligned_df = pd.merge(
    valid_df.reset_index(drop=True),
    raw_predictions_df.reset_index(drop=True),
    on=['unique_id', 'ds']
)

# Now, extract the perfectly aligned arrays for scoring
y_true_aligned = aligned_df['y']
y_pred_aligned = aligned_df['NBEATS']
is_holiday_aligned = aligned_df['IsHoliday']

# The lengths of these arrays will now be identical.
final_wmae = compute_wmae(y_true_aligned, y_pred_aligned, is_holiday_aligned)

print(f"Final Validation WMAE with best parameters: {final_wmae:.4f}")

INFO:lightning_fabric.utilities.seed:Seed set to 42


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

Final Validation WMAE with best parameters: 1577.2823


### **5. Train on Full Data and Log Artifact**
With our hyperparameters validated, we train the final model on the **entire dataset** (`df`) to prepare it for submission or deployment. We then use Weights & Biases (`wandb`) to log the model configuration, validation score, and save the trained model object as an artifact for future use.

In [45]:
# Re-initialize and train the model on the full dataset
final_model_for_submission = NBEATS(**final_params)
nf_prod_model = NeuralForecast(models=[final_model_for_submission], freq='W-FRI')

# Fit on the entire preprocessed dataframe 'df'
# This model is now ready for production/submission
nf_prod_model.fit(df=df)

INFO:lightning_fabric.utilities.seed:Seed set to 42


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [46]:
# It is good practice to log experiments. We use Weights & Biases for this.
!wandb login

import wandb
import joblib

# Save the final model object locally
model_filename = "nbeats_final_model.pkl"
joblib.dump(nf_prod_model, model_filename)

# Initialize a new wandb run
wandb.init(project="Walmart Recruiting - Store Sales Forecasting", name="nbeats-fused-optimal-run")

# Log the full configuration. Convert torch optimizer to string for serialization.
logged_params = final_params.copy()
logged_params['optimizer'] = str(logged_params['optimizer'])

wandb.config.update({
    'model_name': 'NBEATS',
    'final_validation_wmae': final_wmae, # The score from our validation set
    **logged_params
})

# Log the final validation metric
wandb.log({
    'validation_wmae': final_wmae
})

# Create and log the model artifact
artifact = wandb.Artifact(
    name="nbeats-final-model",
    type="model"
)
artifact.add_file(model_filename)
wandb.log_artifact(artifact)

wandb.finish()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlchik22[0m ([33mlchik22-free-uni[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Currently logged in as: [33mlchik22[0m ([33mlchik22-free-uni[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
validation_wmae,▁

0,1
validation_wmae,1577.28232
