#### Model Training

Import Libraries

In [22]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error

Import Preprocessed data

In [23]:
df = pd.read_csv('data/df_model.csv')
df.head()

Unnamed: 0,date,store_id,item_id,category,sub_category,units_sold,revenue,avg_price,footfall,promotion_discount_pct,...,dow_cos,month_sin,month_cos,y_t_plus_1,y_t_plus_2,y_t_plus_3,y_t_plus_4,y_t_plus_5,y_t_plus_6,y_t_plus_7
0,2025-01-29,Store_1,Clo_Kid_1,Clothing,Kids,16.0,1027.15,71.33,232.0,10.0,...,-0.222521,0.5,0.866025,0.0,0.0,0.0,0.0,0.0,19.0,13.0
1,2025-01-30,Store_1,Clo_Kid_1,Clothing,Kids,0.0,0.0,71.33,232.0,0.0,...,-0.900969,0.5,0.866025,0.0,0.0,0.0,0.0,19.0,13.0,0.0
2,2025-01-31,Store_1,Clo_Kid_1,Clothing,Kids,0.0,0.0,71.33,232.0,0.0,...,-0.900969,0.5,0.866025,0.0,0.0,0.0,19.0,13.0,0.0,0.0
3,2025-02-01,Store_1,Clo_Kid_1,Clothing,Kids,0.0,0.0,71.33,232.0,0.0,...,-0.222521,0.866025,0.5,0.0,0.0,19.0,13.0,0.0,0.0,0.0
4,2025-02-02,Store_1,Clo_Kid_1,Clothing,Kids,0.0,0.0,71.33,232.0,0.0,...,0.62349,0.866025,0.5,0.0,19.0,13.0,0.0,0.0,0.0,0.0


In [24]:
df.columns

Index(['date', 'store_id', 'item_id', 'category', 'sub_category', 'units_sold',
       'revenue', 'avg_price', 'footfall', 'promotion_discount_pct',
       'promotion_type', 'is_weekend', 'is_holiday', 'holiday_type',
       'weather_condition', 'temperature_c', 'month', 'day_of_week',
       'week_of_year', 'is_month_start', 'is_month_end', 'units_sold_lag_1',
       'units_sold_lag_3', 'units_sold_lag_7', 'units_sold_lag_14',
       'units_sold_lag_28', 'rolling_mean_7', 'rolling_mean_14',
       'rolling_mean_28', 'rolling_std_7', 'rolling_std_14', 'rolling_std_28',
       'rolling_max_7', 'zero_sales_flag', 'price_lag_7', 'price_change_pct_7',
       'promo_flag', 'promo_days_last_7', 'dow_sin', 'dow_cos', 'month_sin',
       'month_cos', 'y_t_plus_1', 'y_t_plus_2', 'y_t_plus_3', 'y_t_plus_4',
       'y_t_plus_5', 'y_t_plus_6', 'y_t_plus_7'],
      dtype='object')

Direct multi-horizon forecasting using a single global model
- One model
- Predicts y_t+1 … y_t+7
- Learns across all stores & items

In [25]:
FEATURE_COLS = [
    # Lag features
    'units_sold_lag_1', 'units_sold_lag_3', 'units_sold_lag_7',
    'units_sold_lag_14', 'units_sold_lag_28',

    # Rolling stats
    'rolling_mean_7', 'rolling_mean_14', 'rolling_mean_28',
    'rolling_std_7', 'rolling_std_14', 'rolling_std_28',
    'rolling_max_7',

    # Intermittent demand
    'zero_sales_flag',

    # Price & promo
    'avg_price', 'price_change_pct_7',
    'promotion_discount_pct', 'promo_days_last_7',

    # Calendar
    'dow_sin', 'dow_cos',
    'month_sin', 'month_cos',
    'is_weekend', 'is_holiday'
]

In [26]:
# Targets (7-day horizon)
TARGET_COLS = [f'y_t_plus_{i}' for i in range(1, 8)]

Train–Validation Split (Time-Aware)

In [47]:
# Convert the column to datetime
df['date'] = pd.to_datetime(df['date'])

train_cutoff = df['date'].quantile(0.8)

train_df = df[df['date'] <= train_cutoff]
val_df   = df[df['date'] > train_cutoff]

X_train = train_df[FEATURE_COLS]
y_train = train_df[TARGET_COLS]

X_val = val_df[FEATURE_COLS]
y_val = val_df[TARGET_COLS]

Hyper parameter tuning is not the first or most critical step in demand forecasting.

In [39]:
# WAPE (Weighted Absolute Percentage Error)
def wape(y_true, y_pred):
    return np.sum(np.abs(y_true - y_pred)) / np.sum(y_true)


In [48]:
X_train.head()

Unnamed: 0,units_sold_lag_1,units_sold_lag_3,units_sold_lag_7,units_sold_lag_14,units_sold_lag_28,rolling_mean_7,rolling_mean_14,rolling_mean_28,rolling_std_7,rolling_std_14,...,avg_price,price_change_pct_7,promotion_discount_pct,promo_days_last_7,dow_sin,dow_cos,month_sin,month_cos,is_weekend,is_holiday
0,14.0,0.0,0.0,0.0,15.0,8.428571,6.0,4.678571,7.955232,7.473028,...,71.33,-0.050326,10.0,2.0,0.974928,-0.222521,0.5,0.866025,0.0,0.0
1,16.0,14.0,0.0,0.0,0.0,10.714286,7.142857,4.714286,7.409775,7.704715,...,71.33,-0.050326,0.0,3.0,0.433884,-0.900969,0.5,0.866025,0.0,0.0
2,0.0,14.0,14.0,8.0,17.0,10.714286,7.142857,4.714286,7.409775,7.704715,...,71.33,-0.099937,0.0,3.0,-0.433884,-0.900969,0.5,0.866025,0.0,0.0
3,0.0,16.0,17.0,0.0,0.0,8.714286,6.571429,4.107143,8.220184,7.929636,...,71.33,-0.066361,0.0,3.0,-0.974928,-0.222521,0.866025,0.5,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,6.285714,6.571429,4.107143,7.867958,7.929636,...,71.33,-0.066361,0.0,3.0,-0.781831,0.62349,0.866025,0.5,1.0,0.0


In [49]:
X_train.shape

(13200, 23)

In [50]:
y_train.head()

Unnamed: 0,y_t_plus_1,y_t_plus_2,y_t_plus_3,y_t_plus_4,y_t_plus_5,y_t_plus_6,y_t_plus_7
0,0.0,0.0,0.0,0.0,0.0,19.0,13.0
1,0.0,0.0,0.0,0.0,19.0,13.0,0.0
2,0.0,0.0,0.0,19.0,13.0,0.0,0.0
3,0.0,0.0,19.0,13.0,0.0,0.0,0.0
4,0.0,19.0,13.0,0.0,0.0,0.0,0.0


In [51]:
y_train.shape

(13200, 7)

Baseline Model

In [52]:
# Seasonal Naive (Lag-7) for minimum acceptable performance
baseline_preds = val_df[['units_sold_lag_7']].values.repeat(7, axis=1)

In [35]:
X_val.shape

(3300, 23)

In [53]:
y_val.shape

(3300, 7)

In [54]:
baseline_preds

array([[17., 17., 17., ..., 17., 17., 17.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 7.,  7.,  7., ...,  7.,  7.,  7.],
       ...,
       [20., 20., 20., ..., 20., 20., 20.],
       [18., 18., 18., ..., 18., 18., 18.],
       [17., 17., 17., ..., 17., 17., 17.]], shape=(3300, 7))

#### Model 1: LightGBM Multi-Output Model

In [61]:
models_lgb = {}
val_preds_lgb = {}

for i, target in enumerate(TARGET_COLS):
    model_lgb = lgb.LGBMRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=8,
        num_leaves=64,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )

    model_lgb.fit(
        X_train, y_train[target],
        eval_set=[(X_val, y_val[target])],
        eval_metric='l1',
        callbacks=[lgb.early_stopping(50)],
    )

    models_lgb[target] = model_lgb
    val_preds_lgb[target] = model_lgb.predict(X_val)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000604 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2249
[LightGBM] [Info] Number of data points in the train set: 13200, number of used features: 23
[LightGBM] [Info] Start training from score 13.213333
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[52]	valid_0's l1: 4.86829	valid_0's l2: 35.5704
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000422 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2249
[LightGBM] [Info] Number of data points in the train set: 13200, number of used features: 23
[LightGBM] [Info] Start training from score 13.219015
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[59]	vali

#### Model 2: XGBoost

In [62]:
from xgboost import XGBRegressor

models_xgb = {}
val_preds_xgb = {}

for i, target in enumerate(TARGET_COLS):
    model_xgb = xgb = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=8,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='reg:squarederror',
    random_state=42
)

    model_xgb.fit(
        X_train, y_train[target],
        eval_set=[(X_val, y_val[target])],
        verbose=False,
    )

    models_xgb[target] = model_xgb
    val_preds_xgb[target] = model_xgb.predict(X_val)


In [64]:
# Evaluate per horizon
for target in TARGET_COLS:
    score_lgb = wape(y_val[target].values, val_preds_lgb[target])
    print(f"{target} WAPE: {score_lgb:.3f}")

print("----")

for target in TARGET_COLS:
    score_xgb = wape(y_val[target].values, val_preds_xgb[target])
    print(f"{target} WAPE: {score_xgb:.3f}")
    

y_t_plus_1 WAPE: 0.371
y_t_plus_2 WAPE: 0.383
y_t_plus_3 WAPE: 0.376
y_t_plus_4 WAPE: 0.376
y_t_plus_5 WAPE: 0.369
y_t_plus_6 WAPE: 0.368
y_t_plus_7 WAPE: 0.368
----
y_t_plus_1 WAPE: 0.388
y_t_plus_2 WAPE: 0.395
y_t_plus_3 WAPE: 0.391
y_t_plus_4 WAPE: 0.393
y_t_plus_5 WAPE: 0.380
y_t_plus_6 WAPE: 0.370
y_t_plus_7 WAPE: 0.377


In [65]:
# Baseline comparision
baseline_wape = wape(y_val.values, baseline_preds)
print("Baseline WAPE:", baseline_wape)


Baseline WAPE: 0.4629100509964194


In average,
- ML WAPE of LGB ≈ 0.37
- ML WAPE of XGB ≈ 0.388
- Baseline WAPE ≈ 0.463

Since ML WAPE shows good results, hyperparameter tuning is not necessary