# Part IV: Modelling

## Basic settings

In [22]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
import json
import os
import pickle
import sys
import warnings
from datetime import datetime, timedelta

import lightgbm as lgbm
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import seaborn as sns
import shap
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit

warnings.filterwarnings("ignore")

# Set plotting style
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("deep")

# Set random seed for reproducibility
np.random.seed(2025)

In [24]:
src_path = os.path.abspath(os.path.join("../..", "src"))
if src_path not in sys.path:
    sys.path.append(src_path)

In [25]:
from utils.plots import plot_forecast_single
from utils.utils import flatten_prophet_predictions, weighted_absolute_percentage_error

In [26]:
import os
import pandas as pd
import numpy as np

DATA_DIR = "../../data"

# 1. LOAD DATA ĐÃ PREPROCESS VÀ FEATURE ENGINEERING
df_sales = pd.read_csv(
    os.path.join(DATA_DIR, "data_processed/sales_data_preprocessed.csv"),
    parse_dates=["date"]
)
df_weather = pd.read_csv(
    os.path.join(DATA_DIR, "data_processed/weather_preprocessed.csv"),
    parse_dates=["date"]
)
df_weather_key_store_merged = pd.read_csv(
    os.path.join(DATA_DIR, "data_processed/weather_key_store_merged.csv"),
    parse_dates=["date"]
)

# Đây là file đã có is_kaggle_test và toàn bộ features
df_features = pd.read_feather(os.path.join(DATA_DIR,'data_processed/feature_engineered_data_89_features.feather'))

print("Full feature data:", df_features.shape)
print("Kaggle test rows:", df_features['is_kaggle_test'].sum())
print("Train rows:", (df_features['is_kaggle_test'] == 0).sum())




Full feature data: (686187, 89)
Kaggle test rows: 526917
Train rows: 159270


In [27]:
df_weather_key_store_merged.columns

Index(['date', 'store_nbr', 'item_nbr', 'units', 'logunits', 'is_kaggle_test',
       'station_nbr', 'tmax', 'tmin', 'tavg', 'depart', 'dewpoint', 'wetbulb',
       'heat', 'cool', 'sunrise', 'sunset', 'snowfall', 'preciptotal',
       'stnpressure', 'sealevel', 'resultspeed', 'resultdir', 'avgspeed',
       'BCFG', 'BLDU', 'BLSN', 'BR', 'DU', 'DZ', 'FG', 'FG+', 'FU', 'FZDZ',
       'FZFG', 'FZRA', 'GR', 'GS', 'HZ', 'MIFG', 'PL', 'PRFG', 'RA', 'SG',
       'SN', 'SQ', 'TS', 'TSRA', 'TSSN', 'UP', 'VCFG', 'VCTS'],
      dtype='object')

## Load data

In [28]:
df_features['is_valid'] = 0
mask_train = df_features['is_kaggle_test'] == 0
cutoff_date = pd.Timestamp("2014-08-01")
df_features.loc[mask_train & (df_features['date'] >= cutoff_date), 'is_valid'] = 1

# 2. Tách train/valid và kaggle test
df_train = df_features[(df_features['is_kaggle_test'] == 0) & (df_features['is_valid'] == 0)].copy()
df_valid = df_features[(df_features['is_kaggle_test'] == 0) & (df_features['is_valid'] == 1)].copy()
df_kaggle_test = df_features[df_features['is_kaggle_test'] == 1].copy()

print("Final splits:")
print("  Train:", df_train.shape)
print("  Valid:", df_valid.shape)
print("  Kaggle test:", df_kaggle_test.shape)

Final splits:
  Train: (153496, 90)
  Valid: (5774, 90)
  Kaggle test: (526917, 90)


In [29]:
df_kaggle_test.head()

Unnamed: 0,date,store_nbr,item_nbr,units,logunits,is_kaggle_test,station_nbr,tmax,depart,cool,...,logunits_ewma_14d_a05,logunits_ewma_28d_a05,logunits_ewma_7d_a075,logunits_ewma_14d_a075,logunits_ewma_28d_a075,store_sum_7d,store_mean_7d,item_sum_7d,item_mean_7d,is_valid
159270,2013-04-01,2,1,,,True,14,71.0,1.0,0.0,...,,,,,,8.670772,1.238682,7.203406,1.029058,0
159271,2013-04-01,3,1,,,True,7,68.0,6.2,0.0,...,,,,,,8.82556,1.260794,6.510258,1.085043,0
159272,2013-04-01,6,1,,,True,14,71.0,1.0,0.0,...,,,,,,12.102488,1.728927,5.817111,1.163422,0
159273,2013-04-01,7,1,,,True,6,86.0,6.0,5.0,...,,,,,,8.648221,1.23546,5.123964,1.280991,0
159274,2013-04-01,8,1,,,True,4,87.0,8.0,9.0,...,,,,,,9.57248,1.367497,3.178054,1.059351,0


In [30]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 153496 entries, 0 to 159249
Data columns (total 90 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   date                    153496 non-null  datetime64[ns]
 1   store_nbr               153496 non-null  int64         
 2   item_nbr                153496 non-null  int64         
 3   units                   153496 non-null  float64       
 4   logunits                153496 non-null  float64       
 5   is_kaggle_test          153496 non-null  bool          
 6   station_nbr             153496 non-null  int64         
 7   tmax                    153496 non-null  float64       
 8   depart                  153496 non-null  float64       
 9   cool                    153496 non-null  float64       
 10  sunrise                 153496 non-null  float64       
 11  sunset                  153496 non-null  float64       
 12  snowfall                153496 non-

## Build lightgbm model

In [31]:
drop_cols = [
    'date', 'units', 'logunits',
    'is_kaggle_test', 'is_valid',
    'store_nbr', 'item_nbr', 'station_nbr',
    'tmax', 'depart', 'cool', 'sunrise', 'sunset',
    'snowfall', 'preciptotal', 'stnpressure', 'sealevel',
    'resultspeed', 'resultdir',
    'BCFG', 'BLDU', 'BLSN', 'BR', 'DU', 'DZ', 'FG', 'FG+',
    'FU', 'FZDZ', 'FZFG', 'FZRA', 'GR', 'GS', 'HZ', 'MIFG',
    'PL', 'PRFG', 'RA', 'SG', 'SN', 'SQ', 'TS', 'TSRA',
    'TSSN', 'UP', 'VCFG', 'VCTS'
]

feature_cols = [c for c in df_features.columns if c not in drop_cols]

### Build a lightgbm model

In [32]:
def create_lightgbm_model(df_train, df_valid, feature_cols):
    print("Building LightGBM models per (store, item)...")

    pairs = df_train[['store_nbr', 'item_nbr']].drop_duplicates()

    lgbm_models = {}
    lgbm_metrics = []
    all_actual, all_pred = [], []

    for _, row in pairs.iterrows():
        s, i = int(row['store_nbr']), int(row['item_nbr'])

        train_si = df_train[(df_train['store_nbr'] == s) &
                            (df_train['item_nbr'] == i)]
        valid_si = df_valid[(df_valid['store_nbr'] == s) &
                            (df_valid['item_nbr'] == i)]

        # Không đủ dữ liệu thì bỏ (giống Prophet)
        if len(train_si) < 20:
            continue

        X_tr = train_si[feature_cols]
        y_tr = train_si['logunits']
        X_va = valid_si[feature_cols]
        y_va = valid_si['logunits']

        model = lgbm.LGBMRegressor(
            objective="regression",
            metric="rmse",
            boosting_type="gbdt",
            num_leaves=31,
            learning_rate=0.05,
            feature_fraction=0.9,
            n_estimators=200,
            verbose=-1,
        )
        model.fit(X_tr, y_tr)

        key = (s, i)
        lgbm_models[key] = model

        if not valid_si.empty:
            preds = model.predict(X_va)
            mae = mean_absolute_error(y_va, preds)
            rmse = np.sqrt(mean_squared_error(y_va, preds))
            wape = weighted_absolute_percentage_error(y_va, preds)
            lgbm_metrics.append((s, i, mae, rmse, wape))
            all_actual.extend(y_va)
            all_pred.extend(preds)

    if lgbm_metrics:
        mae_avg = np.mean([m[2] for m in lgbm_metrics])
        rmse_avg = np.mean([m[3] for m in lgbm_metrics])
        wape_avg = weighted_absolute_percentage_error(
            np.array(all_actual), np.array(all_pred)
        )
        print(f"\nOverall LGBM per-(store,item) valid metrics:")
        print(f"MAE: {mae_avg:.4f} | RMSE: {rmse_avg:.4f} | WAPE: {wape_avg:.4f}")

    print("Models built:", len(lgbm_models))
    return lgbm_models, lgbm_metrics

In [33]:
lgbm_models, lgbm_metrics = create_lightgbm_model(
    df_train, df_valid, feature_cols
)

Building LightGBM models per (store, item)...



Overall LGBM per-(store,item) valid metrics:
MAE: 0.2648 | RMSE: 0.3311 | WAPE: 17.8656
Models built: 255


In [42]:
def create_lightgbm_submission(
    df_kaggle_test, lgbm_models, feature_cols,
    filename="submission_lgbm_per_store_item.csv"
):
    df_kaggle = df_kaggle_test.copy()
    kaggle_parts = []

    pairs_test = df_kaggle[['store_nbr', 'item_nbr']].drop_duplicates()

    for _, row in pairs_test.iterrows():
        s, i = int(row['store_nbr']), int(row['item_nbr'])
        key = (s, i)

        test_si = df_kaggle[
            (df_kaggle["store_nbr"] == s) &
            (df_kaggle["item_nbr"] == i)
        ].copy()
        if test_si.empty:
            continue

        X_test_si = test_si[feature_cols]

        if key in lgbm_models:
            yhat_log = lgbm_models[key].predict(X_test_si)
            units = np.expm1(yhat_log)
            units = np.clip(units, 0, None)
        else:
            units = np.zeros(len(test_si))

        test_si["units"] = units
        kaggle_parts.append(test_si)

    if not kaggle_parts:
        raise ValueError("Không có dòng nào được dự đoán.")

    df_kaggle_pred = pd.concat(kaggle_parts, ignore_index=True)

    df_kaggle_pred["date_str"] = df_kaggle_pred["date"].dt.strftime("%Y-%m-%d")
    df_kaggle_pred = df_kaggle_pred.sort_values(["date_str", "store_nbr", "item_nbr"])

    df_kaggle_pred["id"] = (
        df_kaggle_pred["store_nbr"].astype(str) + "_" +
        df_kaggle_pred["item_nbr"].astype(str) + "_" +
        df_kaggle_pred["date_str"]
    )

    submission = df_kaggle_pred[["id", "units"]].reset_index(drop=True)
    submission.to_csv(filename, index=False)
    print(f"✓ Saved {filename} ({len(submission)} rows)")
    return submission

submission_lgbm = create_lightgbm_submission(
    df_kaggle_test, lgbm_models, feature_cols,
    filename="submission_lgbm_per_store_item.csv"
)

✓ Saved submission_lgbm_per_store_item.csv (526917 rows)


In [43]:
# Kiểm tra nhanh: Đếm số lượng dòng dự đoán khác 0
non_zero_preds = submission_lgbm[submission_lgbm['units'] > 0]
print(f"Số lượng dòng có dự đoán bán hàng: {len(non_zero_preds)}")
print("Ví dụ 5 dòng có số liệu:")
print(non_zero_preds.head())

Số lượng dòng có dự đoán bán hàng: 21847
Ví dụ 5 dòng có số liệu:
                  id       units
4     2_5_2013-04-01   82.745288
10   2_11_2013-04-01    1.213815
43   2_44_2013-04-01  159.684847
92   2_93_2013-04-01    0.075407
112   3_2_2013-04-01    0.827853


### (Optional) Fine tunning using Optuna

In [None]:
# def optimize_lightgbm(X_train, y_train, X_valid, y_valid, n_trials=50):
#     print("\nOptimizing LightGBM model with Optuna...")

#     def objective(trial):
#         # Hyperparameters search space
#         params = {
#             "objective": "regression",
#             "metric": "rmse", 
#             "boosting_type": "gbdt",
#             "verbosity": -1,
#             "num_leaves": trial.suggest_int("num_leaves", 31, 127),
#             "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
#             "feature_fraction": trial.suggest_float("feature_fraction", 0.7, 1.0),
#             "bagging_fraction": trial.suggest_float("bagging_fraction", 0.7, 1.0),
#             "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
#             "min_child_samples": trial.suggest_int("min_child_samples", 20, 100),
#             "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
#             "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
#             "max_depth": trial.suggest_int("max_depth", 5, 15),
#             "n_estimators": 2000,
#         }

#         model = lgbm.LGBMRegressor(**params)
#         # Train với early stopping
#         model.fit(
#             X_train, y_train,
#             eval_set=[(X_valid, y_valid)],
#             callbacks=[lgbm.early_stopping(stopping_rounds=100), lgbm.log_evaluation(0)],
#         )
        
#         # Predict và tính metric mục tiêu (WAPE)
#         preds = model.predict(X_valid)
#         wape = weighted_absolute_percentage_error(y_valid, preds)
#         return wape  # Optimize trực tiếp WAPE

#     # Chạy Optuna
#     study = optuna.create_study(direction="minimize")
#     study.optimize(objective, n_trials=n_trials)

#     print("\nBest params found:")
#     best_params = study.best_params
#     best_params.update({
#         "objective": "regression",
#         "metric": "rmse",
#         "boosting_type": "gbdt",
#         "verbosity": -1,
#         "n_estimators": 2000
#     })
    
#     for k, v in best_params.items():
#         print(f"  {k}: {v}")

#     # Train final model với best params + best iteration tìm được
#     final_model = lgbm.LGBMRegressor(**best_params)
#     final_model.fit(
#         X_train, y_train,
#         eval_set=[(X_valid, y_valid)],
#         callbacks=[lgbm.early_stopping(stopping_rounds=100), lgbm.log_evaluation(100)]
#     )
    
#     # Đánh giá lại
#     valid_preds = final_model.predict(X_valid)
#     test_mae = mean_absolute_error(y_valid, valid_preds)
#     test_rmse = np.sqrt(mean_squared_error(y_valid, valid_preds))
#     test_wape = weighted_absolute_percentage_error(y_valid, valid_preds)

#     print(f"\nOptimized LightGBM Valid Metrics - MAE: {test_mae:.3f}, RMSE: {test_rmse:.3f}, WAPE: {test_wape:.3f}")

#     return final_model, best_params, (test_mae, test_rmse, test_wape)

In [None]:
# optimized_model, best_params, optimized_metrics = optimize_lightgbm(
#     X_train, y_train, X_valid, y_valid, n_trials=50
# )


[I 2025-11-29 04:23:49,946] A new study created in memory with name: no-name-c3196c8c-4586-46cc-80c0-051ed1084767



Optimizing LightGBM model with Optuna...
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:23:55,842] Trial 0 finished with value: 14.823612109046277 and parameters: {'num_leaves': 108, 'learning_rate': 0.07028361509874961, 'feature_fraction': 0.8979200588036067, 'bagging_fraction': 0.9352906501638865, 'bagging_freq': 5, 'min_child_samples': 66, 'lambda_l1': 5.803801679413682e-07, 'lambda_l2': 1.2363809845200926, 'max_depth': 10}. Best is trial 0 with value: 14.823612109046277.


Early stopping, best iteration is:
[247]	valid_0's rmse: 0.363119
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:24:31,672] Trial 1 finished with value: 14.726500982575583 and parameters: {'num_leaves': 80, 'learning_rate': 0.012204424379013217, 'feature_fraction': 0.8383414947848707, 'bagging_fraction': 0.7904500968473178, 'bagging_freq': 2, 'min_child_samples': 73, 'lambda_l1': 2.1459966988171709e-07, 'lambda_l2': 3.623957818166913e-08, 'max_depth': 13}. Best is trial 1 with value: 14.726500982575583.


Early stopping, best iteration is:
[946]	valid_0's rmse: 0.360915
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:24:39,028] Trial 2 finished with value: 14.725015821560463 and parameters: {'num_leaves': 96, 'learning_rate': 0.08828299883955089, 'feature_fraction': 0.7685720809466393, 'bagging_fraction': 0.7419929302997711, 'bagging_freq': 7, 'min_child_samples': 31, 'lambda_l1': 5.0627757056731806e-05, 'lambda_l2': 0.2722740771324839, 'max_depth': 9}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[124]	valid_0's rmse: 0.361737
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:24:57,246] Trial 3 finished with value: 14.771366389446698 and parameters: {'num_leaves': 124, 'learning_rate': 0.041718828759479715, 'feature_fraction': 0.7952611973173175, 'bagging_fraction': 0.7143506821416507, 'bagging_freq': 4, 'min_child_samples': 50, 'lambda_l1': 1.1412621001260629e-07, 'lambda_l2': 1.4129976533789238e-08, 'max_depth': 15}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[212]	valid_0's rmse: 0.361519
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:25:07,756] Trial 4 finished with value: 14.842580192920858 and parameters: {'num_leaves': 83, 'learning_rate': 0.05890108851338524, 'feature_fraction': 0.9637165304862501, 'bagging_fraction': 0.8306532930455248, 'bagging_freq': 3, 'min_child_samples': 60, 'lambda_l1': 2.1602372870790567e-08, 'lambda_l2': 0.011402377625881205, 'max_depth': 15}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[127]	valid_0's rmse: 0.362314
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:25:14,870] Trial 5 finished with value: 14.777755576528625 and parameters: {'num_leaves': 67, 'learning_rate': 0.07527925505538538, 'feature_fraction': 0.7831791870110666, 'bagging_fraction': 0.8607312820045689, 'bagging_freq': 1, 'min_child_samples': 33, 'lambda_l1': 4.0796779171964745e-08, 'lambda_l2': 0.00972636762406267, 'max_depth': 9}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[159]	valid_0's rmse: 0.361662
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:25:21,868] Trial 6 finished with value: 14.750776387736131 and parameters: {'num_leaves': 109, 'learning_rate': 0.07629654546508471, 'feature_fraction': 0.9023365898790173, 'bagging_fraction': 0.7199067454969396, 'bagging_freq': 6, 'min_child_samples': 32, 'lambda_l1': 4.520508993952092e-06, 'lambda_l2': 1.354961737169886e-06, 'max_depth': 7}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[139]	valid_0's rmse: 0.361287
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:25:34,457] Trial 7 finished with value: 14.737789516903053 and parameters: {'num_leaves': 120, 'learning_rate': 0.057302134340748316, 'feature_fraction': 0.7698570586607419, 'bagging_fraction': 0.8782840513473065, 'bagging_freq': 4, 'min_child_samples': 50, 'lambda_l1': 1.0369203373244923e-08, 'lambda_l2': 0.3441182798810448, 'max_depth': 8}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[208]	valid_0's rmse: 0.360967
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:25:41,026] Trial 8 finished with value: 14.810070896230112 and parameters: {'num_leaves': 43, 'learning_rate': 0.08555570713491299, 'feature_fraction': 0.9203522453850931, 'bagging_fraction': 0.927757627658029, 'bagging_freq': 2, 'min_child_samples': 65, 'lambda_l1': 0.0007415336155941435, 'lambda_l2': 4.301644964046593e-06, 'max_depth': 6}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[157]	valid_0's rmse: 0.362897
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:26:05,251] Trial 9 finished with value: 14.772834749048105 and parameters: {'num_leaves': 86, 'learning_rate': 0.030358344308965262, 'feature_fraction': 0.7627487791019085, 'bagging_fraction': 0.9554429546853769, 'bagging_freq': 4, 'min_child_samples': 100, 'lambda_l1': 5.081779210368637, 'lambda_l2': 0.03065872115430187, 'max_depth': 9}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[491]	valid_0's rmse: 0.361554
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:26:19,645] Trial 10 finished with value: 14.791280460156521 and parameters: {'num_leaves': 35, 'learning_rate': 0.02234060054926844, 'feature_fraction': 0.7030997144425118, 'bagging_fraction': 0.7824253306953639, 'bagging_freq': 7, 'min_child_samples': 21, 'lambda_l1': 0.001708959378637727, 'lambda_l2': 8.694966298011908, 'max_depth': 12}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[466]	valid_0's rmse: 0.362149
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[878]	valid_0's rmse: 0.361815


[I 2025-11-29 04:26:52,678] Trial 11 finished with value: 14.771629092030501 and parameters: {'num_leaves': 62, 'learning_rate': 0.011314571576946958, 'feature_fraction': 0.8386665995258176, 'bagging_fraction': 0.7791216726716234, 'bagging_freq': 1, 'min_child_samples': 85, 'lambda_l1': 1.3212492392769839e-05, 'lambda_l2': 0.00012991777662883693, 'max_depth': 12}. Best is trial 2 with value: 14.725015821560463.


Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:27:25,724] Trial 12 finished with value: 14.861517188452929 and parameters: {'num_leaves': 95, 'learning_rate': 0.011579712261846741, 'feature_fraction': 0.8429138446118314, 'bagging_fraction': 0.7703106551527603, 'bagging_freq': 7, 'min_child_samples': 81, 'lambda_l1': 0.026471583759221366, 'lambda_l2': 2.0154498193698343e-08, 'max_depth': 13}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[603]	valid_0's rmse: 0.362119
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:27:43,653] Trial 13 finished with value: 14.805051617359485 and parameters: {'num_leaves': 66, 'learning_rate': 0.02082083125786681, 'feature_fraction': 0.7160849283323604, 'bagging_fraction': 0.811328860956311, 'bagging_freq': 2, 'min_child_samples': 80, 'lambda_l1': 2.8728989870950944e-05, 'lambda_l2': 0.0002146408036111805, 'max_depth': 5}. Best is trial 2 with value: 14.725015821560463.


Early stopping, best iteration is:
[584]	valid_0's rmse: 0.362433
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[726]	valid_0's rmse: 0.360851


[I 2025-11-29 04:28:15,502] Trial 14 finished with value: 14.704454211177813 and parameters: {'num_leaves': 97, 'learning_rate': 0.016766396563196537, 'feature_fraction': 0.8195640619026312, 'bagging_fraction': 0.744554970085436, 'bagging_freq': 6, 'min_child_samples': 45, 'lambda_l1': 1.332403354580151e-06, 'lambda_l2': 1.6026848495022659e-06, 'max_depth': 11}. Best is trial 14 with value: 14.704454211177813.


Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:28:42,967] Trial 15 finished with value: 14.72843052607287 and parameters: {'num_leaves': 101, 'learning_rate': 0.017144281901321192, 'feature_fraction': 0.8120436451652304, 'bagging_fraction': 0.7429903958247845, 'bagging_freq': 6, 'min_child_samples': 40, 'lambda_l1': 0.02864535137133335, 'lambda_l2': 6.7822079492884155e-06, 'max_depth': 11}. Best is trial 14 with value: 14.704454211177813.


Early stopping, best iteration is:
[492]	valid_0's rmse: 0.360639
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:28:46,378] Trial 16 finished with value: 14.735022608326283 and parameters: {'num_leaves': 92, 'learning_rate': 0.034976501859683505, 'feature_fraction': 0.7337362727877805, 'bagging_fraction': 0.7431503074439119, 'bagging_freq': 6, 'min_child_samples': 22, 'lambda_l1': 0.00013297734221664434, 'lambda_l2': 5.025394139701171e-07, 'max_depth': 10}. Best is trial 14 with value: 14.704454211177813.


Early stopping, best iteration is:
[351]	valid_0's rmse: 0.361334
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:28:50,215] Trial 17 finished with value: 14.69707895569648 and parameters: {'num_leaves': 110, 'learning_rate': 0.016642298442853396, 'feature_fraction': 0.8758089595326213, 'bagging_fraction': 0.8792968047457024, 'bagging_freq': 7, 'min_child_samples': 45, 'lambda_l1': 1.6819730344110138e-06, 'lambda_l2': 3.7921402932479935e-05, 'max_depth': 7}. Best is trial 17 with value: 14.69707895569648.


Early stopping, best iteration is:
[555]	valid_0's rmse: 0.360669
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:28:55,076] Trial 18 finished with value: 14.70417682718366 and parameters: {'num_leaves': 111, 'learning_rate': 0.016722215028602726, 'feature_fraction': 0.8873429978145101, 'bagging_fraction': 0.8881691664941507, 'bagging_freq': 5, 'min_child_samples': 50, 'lambda_l1': 1.6275557541215745e-06, 'lambda_l2': 2.9382801595246656e-05, 'max_depth': 7}. Best is trial 17 with value: 14.69707895569648.


Early stopping, best iteration is:
[760]	valid_0's rmse: 0.361121
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:28:59,453] Trial 19 finished with value: 14.774971575808433 and parameters: {'num_leaves': 115, 'learning_rate': 0.01481433136676817, 'feature_fraction': 0.8776742613713682, 'bagging_fraction': 0.9023347704607768, 'bagging_freq': 5, 'min_child_samples': 54, 'lambda_l1': 1.5493614780193324e-06, 'lambda_l2': 2.9355155014964806e-05, 'max_depth': 5}. Best is trial 17 with value: 14.69707895569648.


Early stopping, best iteration is:
[1245]	valid_0's rmse: 0.362013
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:03,322] Trial 20 finished with value: 14.690207289901482 and parameters: {'num_leaves': 125, 'learning_rate': 0.02420998661250615, 'feature_fraction': 0.9866300684639262, 'bagging_fraction': 0.9907825888575935, 'bagging_freq': 5, 'min_child_samples': 39, 'lambda_l1': 0.011942841143779335, 'lambda_l2': 0.001426423268924865, 'max_depth': 7}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[518]	valid_0's rmse: 0.361062
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:07,657] Trial 21 finished with value: 14.762737342182334 and parameters: {'num_leaves': 125, 'learning_rate': 0.02475036056255681, 'feature_fraction': 0.9937273128465403, 'bagging_fraction': 0.9999760500489914, 'bagging_freq': 5, 'min_child_samples': 42, 'lambda_l1': 0.006042112886520896, 'lambda_l2': 0.001247841970476562, 'max_depth': 7}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[723]	valid_0's rmse: 0.361372
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:11,805] Trial 22 finished with value: 14.723691133095045 and parameters: {'num_leaves': 113, 'learning_rate': 0.0267913488870701, 'feature_fraction': 0.9429929988100578, 'bagging_fraction': 0.9792390918209197, 'bagging_freq': 5, 'min_child_samples': 38, 'lambda_l1': 0.6353098585581948, 'lambda_l2': 0.0012672602401803499, 'max_depth': 7}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[591]	valid_0's rmse: 0.361285
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:15,046] Trial 23 finished with value: 14.71602136119459 and parameters: {'num_leaves': 104, 'learning_rate': 0.01856393276939904, 'feature_fraction': 0.871062380428426, 'bagging_fraction': 0.8879340813947679, 'bagging_freq': 3, 'min_child_samples': 56, 'lambda_l1': 0.15650162134528284, 'lambda_l2': 5.0102278982069924e-05, 'max_depth': 6}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[573]	valid_0's rmse: 0.361846
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:19,699] Trial 24 finished with value: 14.807266545855956 and parameters: {'num_leaves': 117, 'learning_rate': 0.014515101860822954, 'feature_fraction': 0.9937312114244282, 'bagging_fraction': 0.8508392225018587, 'bagging_freq': 6, 'min_child_samples': 49, 'lambda_l1': 0.0001723566624830133, 'lambda_l2': 0.0010560397325008084, 'max_depth': 8}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[522]	valid_0's rmse: 0.362034
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:21,777] Trial 25 finished with value: 14.748460693108731 and parameters: {'num_leaves': 127, 'learning_rate': 0.03681186444200689, 'feature_fraction': 0.9551592366485679, 'bagging_fraction': 0.91405618112373, 'bagging_freq': 4, 'min_child_samples': 29, 'lambda_l1': 4.838358050068177e-06, 'lambda_l2': 1.7879276425646143e-07, 'max_depth': 6}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[318]	valid_0's rmse: 0.362939
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:27,718] Trial 26 finished with value: 14.716154083865863 and parameters: {'num_leaves': 108, 'learning_rate': 0.014038342702137689, 'feature_fraction': 0.9224850048314364, 'bagging_fraction': 0.9553671530115927, 'bagging_freq': 7, 'min_child_samples': 37, 'lambda_l1': 0.002196243100360395, 'lambda_l2': 1.8733213393825144e-05, 'max_depth': 8}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[720]	valid_0's rmse: 0.360796
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:31,383] Trial 27 finished with value: 14.745991466676523 and parameters: {'num_leaves': 119, 'learning_rate': 0.020722786436469225, 'feature_fraction': 0.8683230995544039, 'bagging_fraction': 0.8233572335099291, 'bagging_freq': 5, 'min_child_samples': 45, 'lambda_l1': 0.01187849084275572, 'lambda_l2': 0.0003238769441137367, 'max_depth': 7}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[526]	valid_0's rmse: 0.361404
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:33,422] Trial 28 finished with value: 14.778973740868606 and parameters: {'num_leaves': 74, 'learning_rate': 0.026593713691838685, 'feature_fraction': 0.9713981295280238, 'bagging_fraction': 0.8894848762944879, 'bagging_freq': 3, 'min_child_samples': 26, 'lambda_l1': 0.15950414878479938, 'lambda_l2': 0.007839155357593929, 'max_depth': 5}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[440]	valid_0's rmse: 0.362387
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:38,766] Trial 29 finished with value: 14.742351834936985 and parameters: {'num_leaves': 107, 'learning_rate': 0.01830919107389759, 'feature_fraction': 0.8967922658512785, 'bagging_fraction': 0.952319275347869, 'bagging_freq': 6, 'min_child_samples': 64, 'lambda_l1': 1.6694139041186289e-07, 'lambda_l2': 5.519388722276737e-05, 'max_depth': 8}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[665]	valid_0's rmse: 0.361685
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:41,599] Trial 30 finished with value: 14.7256294528554 and parameters: {'num_leaves': 112, 'learning_rate': 0.045632560199771785, 'feature_fraction': 0.932269611480923, 'bagging_fraction': 0.8684111278641534, 'bagging_freq': 5, 'min_child_samples': 54, 'lambda_l1': 0.00034997323657092865, 'lambda_l2': 0.0005559487473678976, 'max_depth': 6}. Best is trial 20 with value: 14.690207289901482.


Early stopping, best iteration is:
[604]	valid_0's rmse: 0.361233
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:48,206] Trial 31 finished with value: 14.67722722129744 and parameters: {'num_leaves': 102, 'learning_rate': 0.016500529640972483, 'feature_fraction': 0.8244310370428632, 'bagging_fraction': 0.835924590197089, 'bagging_freq': 6, 'min_child_samples': 45, 'lambda_l1': 1.1770406848027417e-06, 'lambda_l2': 2.8996067922966443e-06, 'max_depth': 11}. Best is trial 31 with value: 14.67722722129744.


Early stopping, best iteration is:
[702]	valid_0's rmse: 0.360046
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:29:56,076] Trial 32 finished with value: 14.715053678241476 and parameters: {'num_leaves': 101, 'learning_rate': 0.01381936737800613, 'feature_fraction': 0.8583219346125557, 'bagging_fraction': 0.8077343337049406, 'bagging_freq': 7, 'min_child_samples': 46, 'lambda_l1': 5.850801997998001e-07, 'lambda_l2': 7.154320877900356e-06, 'max_depth': 10}. Best is trial 31 with value: 14.67722722129744.


Early stopping, best iteration is:
[937]	valid_0's rmse: 0.361149
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:01,226] Trial 33 finished with value: 14.726392287781545 and parameters: {'num_leaves': 87, 'learning_rate': 0.016116827649770647, 'feature_fraction': 0.8905212904725827, 'bagging_fraction': 0.8390215150624435, 'bagging_freq': 5, 'min_child_samples': 36, 'lambda_l1': 5.335691199659864e-06, 'lambda_l2': 1.5860162419476603e-07, 'max_depth': 9}. Best is trial 31 with value: 14.67722722129744.


Early stopping, best iteration is:
[609]	valid_0's rmse: 0.361221
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:08,615] Trial 34 finished with value: 14.78393354130079 and parameters: {'num_leaves': 121, 'learning_rate': 0.01049068715013089, 'feature_fraction': 0.8234829757830354, 'bagging_fraction': 0.921694264641238, 'bagging_freq': 6, 'min_child_samples': 59, 'lambda_l1': 6.706483274905167e-07, 'lambda_l2': 1.4414305187683576e-05, 'max_depth': 10}. Best is trial 31 with value: 14.67722722129744.


Early stopping, best iteration is:
[714]	valid_0's rmse: 0.361582
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:17,683] Trial 35 finished with value: 14.675032358008016 and parameters: {'num_leaves': 127, 'learning_rate': 0.012528590527215604, 'feature_fraction': 0.8044026085648729, 'bagging_fraction': 0.9028865865109351, 'bagging_freq': 7, 'min_child_samples': 42, 'lambda_l1': 7.327783159306205e-08, 'lambda_l2': 0.0035687388325977474, 'max_depth': 11}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[850]	valid_0's rmse: 0.360807
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:25,769] Trial 36 finished with value: 14.73280251536954 and parameters: {'num_leaves': 127, 'learning_rate': 0.011897651480022767, 'feature_fraction': 0.7965965937724934, 'bagging_fraction': 0.8493368071974804, 'bagging_freq': 7, 'min_child_samples': 42, 'lambda_l1': 7.815414402072842e-08, 'lambda_l2': 0.02251301595865726, 'max_depth': 11}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[752]	valid_0's rmse: 0.361371
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:33,004] Trial 37 finished with value: 14.741330937448735 and parameters: {'num_leaves': 120, 'learning_rate': 0.013245906777569445, 'feature_fraction': 0.8010950724304893, 'bagging_fraction': 0.9397122965210836, 'bagging_freq': 7, 'min_child_samples': 28, 'lambda_l1': 4.0993065286158705e-08, 'lambda_l2': 0.004294048553246019, 'max_depth': 14}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[650]	valid_0's rmse: 0.360962
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:39,301] Trial 38 finished with value: 14.696915580105026 and parameters: {'num_leaves': 115, 'learning_rate': 0.023127746578535426, 'feature_fraction': 0.7514327771538281, 'bagging_fraction': 0.9746906643069181, 'bagging_freq': 7, 'min_child_samples': 35, 'lambda_l1': 2.3353169929213544e-07, 'lambda_l2': 0.0035738552647434432, 'max_depth': 12}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[638]	valid_0's rmse: 0.361021
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:43,275] Trial 39 finished with value: 14.843560926506886 and parameters: {'num_leaves': 116, 'learning_rate': 0.02308650450403877, 'feature_fraction': 0.753183036862579, 'bagging_fraction': 0.9801414475002135, 'bagging_freq': 6, 'min_child_samples': 34, 'lambda_l1': 1.0391423677244211e-08, 'lambda_l2': 0.046197519808779564, 'max_depth': 12}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[300]	valid_0's rmse: 0.361543
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:48,648] Trial 40 finished with value: 14.729521569899983 and parameters: {'num_leaves': 123, 'learning_rate': 0.027472357110973705, 'feature_fraction': 0.7795889889910346, 'bagging_fraction': 0.9756520972739777, 'bagging_freq': 7, 'min_child_samples': 32, 'lambda_l1': 2.672375174073725e-07, 'lambda_l2': 0.13958393468550678, 'max_depth': 13}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[450]	valid_0's rmse: 0.36115
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:30:53,864] Trial 41 finished with value: 14.722964549195716 and parameters: {'num_leaves': 104, 'learning_rate': 0.019958185991624428, 'feature_fraction': 0.7439013387331627, 'bagging_fraction': 0.9991165200217107, 'bagging_freq': 7, 'min_child_samples': 43, 'lambda_l1': 3.489321156994954e-07, 'lambda_l2': 0.0033281300856433838, 'max_depth': 11}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[487]	valid_0's rmse: 0.361341
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:04,259] Trial 42 finished with value: 14.725260293530077 and parameters: {'num_leaves': 116, 'learning_rate': 0.010075891915034, 'feature_fraction': 0.7257856853560626, 'bagging_fraction': 0.9406471539979308, 'bagging_freq': 7, 'min_child_samples': 48, 'lambda_l1': 8.467829994986898e-08, 'lambda_l2': 0.000104409273854199, 'max_depth': 13}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[1028]	valid_0's rmse: 0.360913
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:07,231] Trial 43 finished with value: 14.750982533445695 and parameters: {'num_leaves': 52, 'learning_rate': 0.02979437221051602, 'feature_fraction': 0.8233382405493695, 'bagging_fraction': 0.9070697475279454, 'bagging_freq': 6, 'min_child_samples': 39, 'lambda_l1': 1.8234920896885087e-05, 'lambda_l2': 0.003318652676505908, 'max_depth': 12}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[447]	valid_0's rmse: 0.361788
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:11,731] Trial 44 finished with value: 14.729564694990177 and parameters: {'num_leaves': 123, 'learning_rate': 0.022946277946878483, 'feature_fraction': 0.7816312183033179, 'bagging_fraction': 0.8660432879768447, 'bagging_freq': 7, 'min_child_samples': 72, 'lambda_l1': 4.311538474159159e-08, 'lambda_l2': 0.061033130926399226, 'max_depth': 11}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[398]	valid_0's rmse: 0.360675
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:19,124] Trial 45 finished with value: 14.736951564253047 and parameters: {'num_leaves': 109, 'learning_rate': 0.012741626148078779, 'feature_fraction': 0.9111307222009462, 'bagging_fraction': 0.9876074358407272, 'bagging_freq': 6, 'min_child_samples': 25, 'lambda_l1': 6.821120400134116e-05, 'lambda_l2': 0.5364348172988718, 'max_depth': 14}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[705]	valid_0's rmse: 0.361383
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:25,634] Trial 46 finished with value: 14.704258046339627 and parameters: {'num_leaves': 90, 'learning_rate': 0.018843639843081194, 'feature_fraction': 0.8478035809157987, 'bagging_fraction': 0.9633774768777661, 'bagging_freq': 7, 'min_child_samples': 35, 'lambda_l1': 3.288295826040359e-06, 'lambda_l2': 2.3515400621771854e-06, 'max_depth': 12}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[771]	valid_0's rmse: 0.36072
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:30,367] Trial 47 finished with value: 14.772914448388708 and parameters: {'num_leaves': 78, 'learning_rate': 0.01617592615702073, 'feature_fraction': 0.7595714996206282, 'bagging_fraction': 0.9681773947272607, 'bagging_freq': 6, 'min_child_samples': 52, 'lambda_l1': 2.2636399069787455e-08, 'lambda_l2': 0.01691037749637806, 'max_depth': 10}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[559]	valid_0's rmse: 0.36141
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:35,487] Trial 48 finished with value: 14.724125230565805 and parameters: {'num_leaves': 96, 'learning_rate': 0.0216736748580356, 'feature_fraction': 0.8090616951039101, 'bagging_fraction': 0.8114673953207123, 'bagging_freq': 4, 'min_child_samples': 60, 'lambda_l1': 1.589625974691346e-07, 'lambda_l2': 0.00041724139767755636, 'max_depth': 11}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[542]	valid_0's rmse: 0.361224
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 04:31:42,595] Trial 49 finished with value: 14.710277199744219 and parameters: {'num_leaves': 102, 'learning_rate': 0.015149133328414935, 'feature_fraction': 0.7900585768548863, 'bagging_fraction': 0.9329870476567588, 'bagging_freq': 7, 'min_child_samples': 31, 'lambda_l1': 1.031868786034895e-05, 'lambda_l2': 0.00013172326777407766, 'max_depth': 9}. Best is trial 35 with value: 14.675032358008016.


Early stopping, best iteration is:
[830]	valid_0's rmse: 0.361216

Best params found:
  num_leaves: 127
  learning_rate: 0.012528590527215604
  feature_fraction: 0.8044026085648729
  bagging_fraction: 0.9028865865109351
  bagging_freq: 7
  min_child_samples: 42
  lambda_l1: 7.327783159306205e-08
  lambda_l2: 0.0035687388325977474
  max_depth: 11
  objective: regression
  metric: rmse
  boosting_type: gbdt
  verbosity: -1
  n_estimators: 2000
Training until validation scores don't improve for 100 rounds
[100]	valid_0's rmse: 0.603716
[200]	valid_0's rmse: 0.389372
[300]	valid_0's rmse: 0.364696
[400]	valid_0's rmse: 0.361933
[500]	valid_0's rmse: 0.361409
[600]	valid_0's rmse: 0.361126
[700]	valid_0's rmse: 0.361064
[800]	valid_0's rmse: 0.360847
[900]	valid_0's rmse: 0.360956
Early stopping, best iteration is:
[850]	valid_0's rmse: 0.360807

Optimized LightGBM Valid Metrics - MAE: 0.200, RMSE: 0.361, WAPE: 14.675


In [None]:
# submission_lgbm = create_lightgbm_submission(df_kaggle_test, optimized_model)

✓ Saved submission_lightgbm.csv (526917 rows)


## Evaluating model

In [None]:
def evaluate_model(model, X_test, y_test, data):
    """
    Evaluate the model performance on the test set (last 3 months of 2017)
    """
    print("\nEvaluating model performance on test set...")

    # Make predictions on the test set
    test_preds = model.predict(X_test)

    # Calculate metrics
    test_mae = mean_absolute_error(y_test, test_preds)
    test_rmse = np.sqrt(mean_squared_error(y_test, test_preds))
    test_wape = weighted_absolute_percentage_error(y_test, test_preds)

    # Print evaluation results
    print(f"Final Model Test Evaluation:")
    print(f"    MAE: {test_mae:.2f}")
    print(f"    RMSE: {test_rmse:.2f}")
    print(f"    WAPE: {test_wape:.2f}%")

    # Analyze errors by time period (month)
    test_results = data[data["is_test"]].copy()
    test_results["prediction"] = test_preds
    test_results["error"] = test_results["sales"] - test_results["prediction"]
    test_results["abs_error"] = np.abs(test_results["error"])
    test_results["month_name"] = test_results["date"].dt.strftime("%B")

    # Summarize errors by month
    monthly_errors = (
        test_results.groupby("month_name")
        .agg({"abs_error": "mean", "error": "mean", "sales": "mean"})
        .reset_index()
    )
    monthly_errors["error_pct"] = (
        100 * monthly_errors["abs_error"] / monthly_errors["sales"]
    )

    print("\nError Analysis by Month:")
    print(
        monthly_errors[["month_name", "abs_error", "error_pct"]].to_string(index=False)
    )

    # Store results for visualization
    # Include month and store information for granular analysis
    test_results["year_month"] = test_results["date"].dt.strftime("%Y-%m")

    # Plot actual vs predicted
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, test_preds, alpha=0.5)
    plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], "r--")
    plt.title("Actual vs Predicted Sales (Test Set)")
    plt.xlabel("Actual Sales")
    plt.ylabel("Predicted Sales")
    plt.tight_layout()
    # plt.savefig('actual_vs_predicted_test.png')

    # Plot error distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(test_results["error"], kde=True)
    plt.title("Error Distribution")
    plt.xlabel("Prediction Error")
    plt.tight_layout()
    # plt.savefig('error_distribution.png')

    return test_mae, test_rmse, test_wape, test_preds, y_test, test_results

In [None]:
# Prophet Model Results:
# MAE: 9.03 | RMSE: 11.86 | WAPE: 29.13%

In [None]:
# Evaluate the lightgbm model
test_mae, test_rmse, test_smape, test_preds, y_test_values, test_results = (
    evaluate_model(lightgbm_model, X_test, y_test, df_features)
)

NameError: name 'X_test' is not defined

## Save trained models

In [None]:
def save_model(model, X_train, feature_names, output_dir="../models"):
    """
    Save the trained model and related artifacts for API use

    Args:
        model: Trained model (e.g., LightGBM model)
        feature_names: List of feature names
        output_dir: Directory to save model artifacts
    """

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Save the model
    model_path = os.path.join(output_dir, "sales_forecast_model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    print(f"Model saved to {model_path}")

    # Create and save feature statistics
    feature_stats = {
        "model_version": "1.0.0",
        "last_trained": pd.Timestamp.now().strftime("%Y-%m-%d"),
        "required_columns": list(feature_names),
        "column_order": list(feature_names),
        "default_values": {},
        "temperature_bins": [-np.inf, 20, 25, 30, np.inf],
        "temperature_labels": ["Cold", "Cool", "Warm", "Hot"],
        "humidity_bins": [-np.inf, 60, 75, np.inf],
        "humidity_labels": ["Low", "Medium", "High"],
    }

    # Add default values for date features
    feature_stats["default_values"] = {
        "year": 2017,
        "month": 11,
        "day": 15,
        "day_of_week": 2,
        "is_weekend": 0,
        "quarter": 4,
        "is_holiday": 0,
    }

    # Save feature stats
    stats_path = os.path.join(output_dir, "feature_stats.json")
    with open(stats_path, "w") as f:
        json.dump(feature_stats, f, indent=4)
    print(f"Feature statistics saved to {stats_path}")

    print(f"All model artifacts saved successfully to {output_dir}/")

    return model_path, stats_path

In [None]:
# Save model
save_model(
    model=optimized_model,
    X_train=X_train,
    feature_names=X_train.columns,
    output_dir='../models'
)

Model saved to ../models/sales_forecast_model.pkl
Feature statistics saved to ../models/feature_stats.json
All model artifacts saved successfully to ../models/


('../models/sales_forecast_model.pkl', '../models/feature_stats.json')