# Part IV: Modelling

## Basic settings

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import os
import pickle
import sys
import warnings
from datetime import datetime, timedelta

import lightgbm as lgbm
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import seaborn as sns
import shap
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit

warnings.filterwarnings("ignore")

# Set plotting style
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("deep")

# Set random seed for reproducibility
np.random.seed(2025)

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [3]:
src_path = os.path.abspath(os.path.join("../..", "src"))
if src_path not in sys.path:
    sys.path.append(src_path)

In [4]:
from utils.plots import plot_forecast_single
from utils.utils import flatten_prophet_predictions, weighted_absolute_percentage_error

In [5]:
import os
import pandas as pd
import numpy as np

DATA_DIR = "../../data"

# 1. LOAD DATA ĐÃ PREPROCESS VÀ FEATURE ENGINEERING
df_sales = pd.read_csv(
    os.path.join(DATA_DIR, "data_processed/sales_data_preprocessed.csv"),
    parse_dates=["date"]
)
df_weather = pd.read_csv(
    os.path.join(DATA_DIR, "data_processed/weather_preprocessed.csv"),
    parse_dates=["date"]
)
df_weather_key_store_merged = pd.read_csv(
    os.path.join(DATA_DIR, "data_processed/weather_key_store_merged.csv"),
    parse_dates=["date"]
)

# Đây là file đã có is_kaggle_test và toàn bộ features
df_features = pd.read_feather(os.path.join(DATA_DIR,'data_processed/feature_engineered_data_89_features.feather'))

print("Full feature data:", df_features.shape)
print("Kaggle test rows:", df_features['is_kaggle_test'].sum())
print("Train rows:", (df_features['is_kaggle_test'] == 0).sum())




Full feature data: (686187, 89)
Kaggle test rows: 526917
Train rows: 159270


In [6]:
df_weather_key_store_merged.columns

Index(['date', 'store_nbr', 'item_nbr', 'units', 'logunits', 'is_kaggle_test',
       'station_nbr', 'tmax', 'tmin', 'tavg', 'depart', 'dewpoint', 'wetbulb',
       'heat', 'cool', 'sunrise', 'sunset', 'snowfall', 'preciptotal',
       'stnpressure', 'sealevel', 'resultspeed', 'resultdir', 'avgspeed',
       'BCFG', 'BLDU', 'BLSN', 'BR', 'DU', 'DZ', 'FG', 'FG+', 'FU', 'FZDZ',
       'FZFG', 'FZRA', 'GR', 'GS', 'HZ', 'MIFG', 'PL', 'PRFG', 'RA', 'SG',
       'SN', 'SQ', 'TS', 'TSRA', 'TSSN', 'UP', 'VCFG', 'VCTS'],
      dtype='object')

## Load data

In [7]:
df_features['is_valid'] = 0
mask_train = df_features['is_kaggle_test'] == 0
cutoff_date = pd.Timestamp("2014-08-01")
df_features.loc[mask_train & (df_features['date'] >= cutoff_date), 'is_valid'] = 1

# 2. Tách train/valid và kaggle test
df_train = df_features[(df_features['is_kaggle_test'] == 0) & (df_features['is_valid'] == 0)].copy()
df_valid = df_features[(df_features['is_kaggle_test'] == 0) & (df_features['is_valid'] == 1)].copy()
df_kaggle_test = df_features[df_features['is_kaggle_test'] == 1].copy()

print("Final splits:")
print("  Train:", df_train.shape)
print("  Valid:", df_valid.shape)
print("  Kaggle test:", df_kaggle_test.shape)

Final splits:
  Train: (153496, 90)
  Valid: (5774, 90)
  Kaggle test: (526917, 90)


In [25]:
df_kaggle_test.head()

Unnamed: 0,date,store_nbr,item_nbr,units,logunits,is_kaggle_test,station_nbr,tmax,depart,cool,...,logunits_ewma_14d_a05,logunits_ewma_28d_a05,logunits_ewma_7d_a075,logunits_ewma_14d_a075,logunits_ewma_28d_a075,store_sum_7d,store_mean_7d,item_sum_7d,item_mean_7d,is_valid
159270,2013-04-01,2,1,,,True,14,71.0,1.0,0.0,...,,,,,,8.670772,1.238682,7.203406,1.029058,0
159271,2013-04-01,3,1,,,True,7,68.0,6.2,0.0,...,,,,,,8.82556,1.260794,6.510258,1.085043,0
159272,2013-04-01,6,1,,,True,14,71.0,1.0,0.0,...,,,,,,12.102488,1.728927,5.817111,1.163422,0
159273,2013-04-01,7,1,,,True,6,86.0,6.0,5.0,...,,,,,,8.648221,1.23546,5.123964,1.280991,0
159274,2013-04-01,8,1,,,True,4,87.0,8.0,9.0,...,,,,,,9.57248,1.367497,3.178054,1.059351,0


In [9]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 153496 entries, 0 to 159249
Data columns (total 90 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   date                    153496 non-null  datetime64[ns]
 1   store_nbr               153496 non-null  int64         
 2   item_nbr                153496 non-null  int64         
 3   units                   153496 non-null  float64       
 4   logunits                153496 non-null  float64       
 5   is_kaggle_test          153496 non-null  bool          
 6   station_nbr             153496 non-null  int64         
 7   tmax                    153496 non-null  float64       
 8   depart                  153496 non-null  float64       
 9   cool                    153496 non-null  float64       
 10  sunrise                 153496 non-null  float64       
 11  sunset                  153496 non-null  float64       
 12  snowfall                153496 non-

## Build lightgbm model

In [10]:
drop_cols = [
    'date',           # ← Datetime không dùng trực tiếp (đã có year, month, day)
    'units',          # ← Target gốc (data leakage!)
    'logunits',       # ← Target đã transform (data leakage!)
    'is_kaggle_test', # ← Flag phân chia data
    'is_valid',       # ← Flag phân chia data
    'station_nbr',    # ← Thông tin metadata, không cần
    'store_nbr',
    'item_nbr',
]


# Tạo X, y cho train
X_train = df_train.drop(columns=drop_cols)
y_train = df_train['logunits']  # Target

# Tạo X, y cho valid
X_valid = df_valid.drop(columns=drop_cols)
y_valid = df_valid['logunits']

print(f"X_train shape: {X_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"Total features: {X_train.shape[1]}")

X_train shape: (153496, 82)
X_valid shape: (5774, 82)
Total features: 82


### Build a lightgbm model

In [11]:
def create_lightgbm_model(X_train, y_train, X_test, y_test):
    """
    Create a LightGBM model using engineered features
    """
    print("\nCreating base lightgbm model...")

    # Use a time series split for validation within the training set
    # This ensures we're always validating on future data
    tscv = TimeSeriesSplit(n_splits=5)

    # Basic LightGBM parameters
    params = {
        "objective": "regression",
        "metric": "rmse",
        "boosting_type": "gbdt",
        "num_leaves": 31,
        "learning_rate": 0.05,
        "feature_fraction": 0.9,
        "n_estimators": 100,
        "verbose": -1,
    }

    # Train the model with cross-validation on training data only
    cv_scores = []

    for train_idx, val_idx in tscv.split(X_train):
        X_train_cv, X_val_cv = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_train_cv, y_val_cv = y_train.iloc[train_idx], y_train.iloc[val_idx]

        # Train the model
        model = lgbm.LGBMRegressor(**params)
        model.fit(
            X_train_cv,
            y_train_cv,
            eval_set=[(X_val_cv, y_val_cv)],
            # early_stopping_rounds=50,
            # verbose=False
        )

        # Make predictions
        preds = model.predict(X_val_cv)

        # Calculate metrics
        mae = mean_absolute_error(y_val_cv, preds)
        rmse = np.sqrt(mean_squared_error(y_val_cv, preds))
        wape = weighted_absolute_percentage_error(y_val_cv, preds)

        cv_scores.append((mae, rmse, wape))

    # Print average scores from cross-validation
    mae_avg, rmse_avg, wape_avg = np.mean(cv_scores, axis=0)
    print(
        f"Baseline Model CV - MAE: {mae_avg:.2f}, RMSE: {rmse_avg:.2f}, WAPE: {wape_avg:.2f}%"
    )

    # Train a final model on all training data
    final_model = lgbm.LGBMRegressor(**params)
    final_model.fit(X_train, y_train)

    # Evaluate on the test set (last 3 months of 2017)
    test_preds = final_model.predict(X_test)
    test_mae = mean_absolute_error(y_test, test_preds)
    test_rmse = np.sqrt(mean_squared_error(y_test, test_preds))
    test_wape = weighted_absolute_percentage_error(y_test, test_preds)

    print(
        f"Baseline Model Test - MAE: {test_mae:.2f}, RMSE: {test_rmse:.2f}, WAPE: {test_wape:.2f}%"
    )

    return final_model, (test_mae, test_rmse, test_wape)

In [12]:
# Gọi hàm train
lightgbm_model, lightgbm_metrics = create_lightgbm_model(
    X_train, y_train, X_valid, y_valid
)


Creating base lightgbm model...


Baseline Model CV - MAE: 0.28, RMSE: 0.43, WAPE: 51.86%
Baseline Model Test - MAE: 0.21, RMSE: 0.36, WAPE: 15.28%


In [13]:
# Accuracy of Prophet Model
print(
    f"LightGBM Model Results:\nMAE: {lightgbm_metrics[0]:.2f} | RMSE: {lightgbm_metrics[1]:.2f} | WAPE: {lightgbm_metrics[2]:.2f}%"
)

LightGBM Model Results:
MAE: 0.21 | RMSE: 0.36 | WAPE: 15.28%


In [14]:
def create_lightgbm_submission(df_kaggle_test, lightgbm_model, filename="submission_lightgbm.csv"):
    """
    Tạo file submission từ model LightGBM đã train.
    - df_kaggle_test: full test dataframe (có cột is_kaggle_test, date, store_nbr, item_nbr, ...).
    - lightgbm_model: model đã fit trên logunits.
    - filename: tên file csv output.
    """
    # 1. Lọc đúng dữ liệu cho tập Test (từ 01/04/2013 trở đi)
    min_test_date = "2013-04-01"
    df_kaggle_test_lgbm = df_kaggle_test[
        (df_kaggle_test['is_kaggle_test'] == True) &
        (df_kaggle_test['date'] >= min_test_date)
    ].copy()

    # 2. Tách ID columns + Features (không dùng store_nbr, item_nbr cho model)
    id_cols = ['store_nbr', 'item_nbr', 'date']
    drop_cols = [
        'date', 'units', 'logunits',
        'is_kaggle_test', 'is_valid', 'station_nbr',
        'store_nbr', 'item_nbr'
    ]

    df_ids = df_kaggle_test_lgbm[id_cols].copy()
    X_kaggle = df_kaggle_test_lgbm.drop(columns=drop_cols)

    # 3. Predict (trên log scale) và inverse transform
    yhat = lightgbm_model.predict(X_kaggle)
    df_kaggle_pred = df_ids.copy()
    df_kaggle_pred['yhat'] = yhat
    df_kaggle_pred['units'] = np.expm1(df_kaggle_pred['yhat']).clip(lower=0)

    # 4. Tạo date_str, sort và ID đúng format Kaggle
    df_kaggle_pred['date_str'] = df_kaggle_pred['date'].dt.strftime('%Y-%m-%d')
    df_kaggle_pred = df_kaggle_pred.sort_values(['date_str', 'store_nbr', 'item_nbr'])

    df_kaggle_pred['id'] = (
        df_kaggle_pred['store_nbr'].astype(str) + '_' +
        df_kaggle_pred['item_nbr'].astype(str) + '_' +
        df_kaggle_pred['date_str']
    )

    # 5. Tạo submission và lưu
    submission = df_kaggle_pred[['id', 'units']].reset_index(drop=True)
    submission.to_csv(filename, index=False)

    print(f"✓ Saved {filename} ({len(submission)} rows)")
    return submission

submission_lgbm = create_lightgbm_submission(df_kaggle_test, lightgbm_model)

✓ Saved submission_lightgbm.csv (526917 rows)


In [15]:
# Kiểm tra nhanh: Đếm số lượng dòng dự đoán khác 0
non_zero_preds = submission_lgbm[submission_lgbm['units'] > 0]
print(f"Số lượng dòng có dự đoán bán hàng: {len(non_zero_preds)}")
print("Ví dụ 5 dòng có số liệu:")
print(non_zero_preds.head())

Số lượng dòng có dự đoán bán hàng: 526917
Ví dụ 5 dòng có số liệu:
               id      units
0  2_1_2013-04-01   0.157354
1  2_2_2013-04-01   0.093314
2  2_3_2013-04-01   0.157354
3  2_4_2013-04-01   0.093314
4  2_5_2013-04-01  64.844115


### (Optional) Fine tunning using Optuna

In [22]:
def optimize_lightgbm(X_train, y_train, X_valid, y_valid, n_trials=50):
    print("\nOptimizing LightGBM model with Optuna...")

    def objective(trial):
        # Hyperparameters search space
        params = {
            "objective": "regression",
            "metric": "rmse",  # LightGBM optimize RMSE nội bộ
            "boosting_type": "gbdt",
            "verbosity": -1,
            "num_leaves": trial.suggest_int("num_leaves", 31, 127),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
            "feature_fraction": trial.suggest_float("feature_fraction", 0.7, 1.0),
            "bagging_fraction": trial.suggest_float("bagging_fraction", 0.7, 1.0),
            "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
            "min_child_samples": trial.suggest_int("min_child_samples", 20, 100),
            "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
            "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
            "max_depth": trial.suggest_int("max_depth", 5, 15),
            "n_estimators": 2000,
        }

        model = lgbm.LGBMRegressor(**params)
        # Train với early stopping
        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            callbacks=[lgbm.early_stopping(stopping_rounds=100), lgbm.log_evaluation(0)],
        )
        
        # Predict và tính metric mục tiêu (WAPE)
        preds = model.predict(X_valid)
        wape = weighted_absolute_percentage_error(y_valid, preds)
        return wape  # Optimize trực tiếp WAPE

    # Chạy Optuna
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=n_trials)

    print("\nBest params found:")
    best_params = study.best_params
    best_params.update({
        "objective": "regression",
        "metric": "rmse",
        "boosting_type": "gbdt",
        "verbosity": -1,
        "n_estimators": 2000
    })
    
    for k, v in best_params.items():
        print(f"  {k}: {v}")

    # Train final model với best params + best iteration tìm được
    final_model = lgbm.LGBMRegressor(**best_params)
    final_model.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        callbacks=[lgbm.early_stopping(stopping_rounds=100), lgbm.log_evaluation(100)]
    )
    
    # Đánh giá lại
    valid_preds = final_model.predict(X_valid)
    test_mae = mean_absolute_error(y_valid, valid_preds)
    test_rmse = np.sqrt(mean_squared_error(y_valid, valid_preds))
    test_wape = weighted_absolute_percentage_error(y_valid, valid_preds)

    print(f"\nOptimized LightGBM Valid Metrics - MAE: {test_mae:.3f}, RMSE: {test_rmse:.3f}, WAPE: {test_wape:.3f}")

    return final_model, best_params, (test_mae, test_rmse, test_wape)

In [23]:
optimized_model, best_params, optimized_metrics = optimize_lightgbm(
    X_train, y_train, X_valid, y_valid, n_trials=50
)


[I 2025-11-29 03:13:36,939] A new study created in memory with name: no-name-07d7e38f-3341-406b-b322-f2c3e24b9ace



Optimizing LightGBM model with Optuna...
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:13:42,945] Trial 0 finished with value: 14.72858622915484 and parameters: {'num_leaves': 87, 'learning_rate': 0.02037059806581178, 'feature_fraction': 0.8775438737762816, 'bagging_fraction': 0.7733030644099166, 'bagging_freq': 2, 'min_child_samples': 56, 'lambda_l1': 7.439095528804422, 'lambda_l2': 4.107096893390313e-05, 'max_depth': 11}. Best is trial 0 with value: 14.72858622915484.


Early stopping, best iteration is:
[484]	valid_0's rmse: 0.362576
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:13:45,401] Trial 1 finished with value: 14.776010944314603 and parameters: {'num_leaves': 55, 'learning_rate': 0.028907832434754206, 'feature_fraction': 0.8963806470015416, 'bagging_fraction': 0.7369212744225895, 'bagging_freq': 1, 'min_child_samples': 61, 'lambda_l1': 0.0001695482184600973, 'lambda_l2': 2.194044483519614e-06, 'max_depth': 12}. Best is trial 0 with value: 14.72858622915484.


Early stopping, best iteration is:
[312]	valid_0's rmse: 0.362298
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:13:49,018] Trial 2 finished with value: 14.765183053492853 and parameters: {'num_leaves': 117, 'learning_rate': 0.043022062702866785, 'feature_fraction': 0.9468469941476481, 'bagging_fraction': 0.8251309933765285, 'bagging_freq': 7, 'min_child_samples': 70, 'lambda_l1': 1.755790548187145e-05, 'lambda_l2': 0.00019072687756834729, 'max_depth': 15}. Best is trial 0 with value: 14.72858622915484.


Early stopping, best iteration is:
[216]	valid_0's rmse: 0.364136
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:13:51,323] Trial 3 finished with value: 14.83447106975851 and parameters: {'num_leaves': 66, 'learning_rate': 0.03109675684225438, 'feature_fraction': 0.7635174584593963, 'bagging_fraction': 0.7179939631510744, 'bagging_freq': 1, 'min_child_samples': 37, 'lambda_l1': 4.1003058324582455e-08, 'lambda_l2': 0.12257755396551176, 'max_depth': 10}. Best is trial 0 with value: 14.72858622915484.


Early stopping, best iteration is:
[248]	valid_0's rmse: 0.363067
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:13:59,126] Trial 4 finished with value: 14.748218335308506 and parameters: {'num_leaves': 123, 'learning_rate': 0.014230464268002565, 'feature_fraction': 0.8577633812370746, 'bagging_fraction': 0.8418712328270476, 'bagging_freq': 6, 'min_child_samples': 81, 'lambda_l1': 0.0004889376431682281, 'lambda_l2': 6.427723599836863, 'max_depth': 15}. Best is trial 0 with value: 14.72858622915484.


Early stopping, best iteration is:
[545]	valid_0's rmse: 0.362707
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:05,443] Trial 5 finished with value: 14.708068350485359 and parameters: {'num_leaves': 53, 'learning_rate': 0.04059975315354588, 'feature_fraction': 0.7333532453076038, 'bagging_fraction': 0.8143257833791376, 'bagging_freq': 4, 'min_child_samples': 34, 'lambda_l1': 2.11859375154384e-06, 'lambda_l2': 8.476014488188824e-07, 'max_depth': 14}. Best is trial 5 with value: 14.708068350485359.


Early stopping, best iteration is:
[1006]	valid_0's rmse: 0.360842
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:08,774] Trial 6 finished with value: 14.86522729405901 and parameters: {'num_leaves': 43, 'learning_rate': 0.019208205179484968, 'feature_fraction': 0.9087651396227683, 'bagging_fraction': 0.8998941659496238, 'bagging_freq': 6, 'min_child_samples': 38, 'lambda_l1': 0.25864627566479, 'lambda_l2': 1.019619567768518e-05, 'max_depth': 5}. Best is trial 5 with value: 14.708068350485359.


Early stopping, best iteration is:
[706]	valid_0's rmse: 0.363575
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:11,597] Trial 7 finished with value: 14.731944624326642 and parameters: {'num_leaves': 113, 'learning_rate': 0.04053400988034387, 'feature_fraction': 0.9162922716849482, 'bagging_fraction': 0.7471770387336237, 'bagging_freq': 2, 'min_child_samples': 44, 'lambda_l1': 0.0003398974052681038, 'lambda_l2': 8.430925996060889e-05, 'max_depth': 7}. Best is trial 5 with value: 14.708068350485359.


Early stopping, best iteration is:
[283]	valid_0's rmse: 0.362772
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:16,941] Trial 8 finished with value: 14.805310988331165 and parameters: {'num_leaves': 101, 'learning_rate': 0.018149080462208013, 'feature_fraction': 0.7656364385871316, 'bagging_fraction': 0.79202705050796, 'bagging_freq': 6, 'min_child_samples': 21, 'lambda_l1': 0.0001902156273240126, 'lambda_l2': 1.5756000938793544, 'max_depth': 14}. Best is trial 5 with value: 14.708068350485359.


Early stopping, best iteration is:
[444]	valid_0's rmse: 0.362878
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:17,824] Trial 9 finished with value: 14.800867006836285 and parameters: {'num_leaves': 89, 'learning_rate': 0.07878064852490027, 'feature_fraction': 0.757624106520894, 'bagging_fraction': 0.718873044667026, 'bagging_freq': 6, 'min_child_samples': 24, 'lambda_l1': 1.6599240454302967e-08, 'lambda_l2': 4.2205441561300253e-07, 'max_depth': 5}. Best is trial 5 with value: 14.708068350485359.


Early stopping, best iteration is:
[97]	valid_0's rmse: 0.363098
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:19,815] Trial 10 finished with value: 14.776804825887645 and parameters: {'num_leaves': 33, 'learning_rate': 0.09302938857100634, 'feature_fraction': 0.7011451391565036, 'bagging_fraction': 0.9984209902918623, 'bagging_freq': 4, 'min_child_samples': 94, 'lambda_l1': 1.3821712952479429e-06, 'lambda_l2': 1.9365615054365578e-08, 'max_depth': 9}. Best is trial 5 with value: 14.708068350485359.


Early stopping, best iteration is:
[262]	valid_0's rmse: 0.362956
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:24,787] Trial 11 finished with value: 14.678545276491574 and parameters: {'num_leaves': 77, 'learning_rate': 0.04818819802032588, 'feature_fraction': 0.9872971889059428, 'bagging_fraction': 0.8950369007528659, 'bagging_freq': 3, 'min_child_samples': 51, 'lambda_l1': 4.353716799254172, 'lambda_l2': 0.006408093736740222, 'max_depth': 13}. Best is trial 11 with value: 14.678545276491574.


Early stopping, best iteration is:
[441]	valid_0's rmse: 0.362588
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:26,790] Trial 12 finished with value: 14.840657121176987 and parameters: {'num_leaves': 69, 'learning_rate': 0.058844248825754675, 'feature_fraction': 0.997416714902491, 'bagging_fraction': 0.890600846541502, 'bagging_freq': 4, 'min_child_samples': 48, 'lambda_l1': 0.03406526467823194, 'lambda_l2': 0.012211438897815577, 'max_depth': 13}. Best is trial 11 with value: 14.678545276491574.


Early stopping, best iteration is:
[112]	valid_0's rmse: 0.362403
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:29,556] Trial 13 finished with value: 14.76393442355097 and parameters: {'num_leaves': 57, 'learning_rate': 0.047894608370171496, 'feature_fraction': 0.8271461496859531, 'bagging_fraction': 0.9082661865249376, 'bagging_freq': 3, 'min_child_samples': 32, 'lambda_l1': 8.500039144591401e-07, 'lambda_l2': 0.0030217131720484867, 'max_depth': 13}. Best is trial 11 with value: 14.678545276491574.


Early stopping, best iteration is:
[252]	valid_0's rmse: 0.363703
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:31,974] Trial 14 finished with value: 14.731328309268957 and parameters: {'num_leaves': 76, 'learning_rate': 0.06674190505709386, 'feature_fraction': 0.8180385888395789, 'bagging_fraction': 0.9526856951024617, 'bagging_freq': 3, 'min_child_samples': 55, 'lambda_l1': 0.0055124093145997016, 'lambda_l2': 2.564413587143419e-08, 'max_depth': 12}. Best is trial 11 with value: 14.678545276491574.


Early stopping, best iteration is:
[144]	valid_0's rmse: 0.362651
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:35,665] Trial 15 finished with value: 14.723440728459108 and parameters: {'num_leaves': 47, 'learning_rate': 0.0264783523068049, 'feature_fraction': 0.7038685892843894, 'bagging_fraction': 0.866440478486078, 'bagging_freq': 5, 'min_child_samples': 68, 'lambda_l1': 3.008929587824189, 'lambda_l2': 0.006823651464023221, 'max_depth': 9}. Best is trial 11 with value: 14.678545276491574.


Early stopping, best iteration is:
[504]	valid_0's rmse: 0.361922
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:39,492] Trial 16 finished with value: 14.717413683580078 and parameters: {'num_leaves': 86, 'learning_rate': 0.051477002414821775, 'feature_fraction': 0.9732271821815688, 'bagging_fraction': 0.8126637133756099, 'bagging_freq': 3, 'min_child_samples': 29, 'lambda_l1': 1.7830007398894546e-06, 'lambda_l2': 3.000637890344605e-07, 'max_depth': 14}. Best is trial 11 with value: 14.678545276491574.


Early stopping, best iteration is:
[321]	valid_0's rmse: 0.362901
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:43,401] Trial 17 finished with value: 14.666331035281404 and parameters: {'num_leaves': 99, 'learning_rate': 0.03502630702916288, 'feature_fraction': 0.8000504084687489, 'bagging_fraction': 0.936461493334479, 'bagging_freq': 5, 'min_child_samples': 46, 'lambda_l1': 0.1932004381564138, 'lambda_l2': 0.1019384967904647, 'max_depth': 11}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[278]	valid_0's rmse: 0.361653
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:51,098] Trial 18 finished with value: 14.806198743319266 and parameters: {'num_leaves': 98, 'learning_rate': 0.010291832784167983, 'feature_fraction': 0.8019983178607648, 'bagging_fraction': 0.9446255573996551, 'bagging_freq': 5, 'min_child_samples': 48, 'lambda_l1': 0.325399039250061, 'lambda_l2': 0.15862749489963146, 'max_depth': 10}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[649]	valid_0's rmse: 0.3619
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:53,930] Trial 19 finished with value: 14.704957021270952 and parameters: {'num_leaves': 104, 'learning_rate': 0.03757073963375102, 'feature_fraction': 0.7923129951363866, 'bagging_fraction': 0.9427764607736131, 'bagging_freq': 5, 'min_child_samples': 69, 'lambda_l1': 0.5407860633903839, 'lambda_l2': 0.0012432420086897452, 'max_depth': 7}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[306]	valid_0's rmse: 0.361934
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:14:59,040] Trial 20 finished with value: 14.7526201041817 and parameters: {'num_leaves': 71, 'learning_rate': 0.023257122677019463, 'feature_fraction': 0.8526358840202656, 'bagging_fraction': 0.9867060636930679, 'bagging_freq': 2, 'min_child_samples': 84, 'lambda_l1': 0.020522931693870535, 'lambda_l2': 0.07462492759961961, 'max_depth': 12}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[421]	valid_0's rmse: 0.362567
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:01,388] Trial 21 finished with value: 14.766132767283176 and parameters: {'num_leaves': 103, 'learning_rate': 0.03912147616453237, 'feature_fraction': 0.7981221674656992, 'bagging_fraction': 0.9410174195794374, 'bagging_freq': 5, 'min_child_samples': 66, 'lambda_l1': 0.6727736768709608, 'lambda_l2': 0.001120513423719783, 'max_depth': 7}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[217]	valid_0's rmse: 0.362555
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:04,011] Trial 22 finished with value: 14.752206481746923 and parameters: {'num_leaves': 109, 'learning_rate': 0.034413983556150995, 'feature_fraction': 0.792562279143913, 'bagging_fraction': 0.9213497812136041, 'bagging_freq': 4, 'min_child_samples': 77, 'lambda_l1': 1.497042688565866, 'lambda_l2': 0.0007837764957802183, 'max_depth': 7}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[248]	valid_0's rmse: 0.362978
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:06,857] Trial 23 finished with value: 14.686793725606176 and parameters: {'num_leaves': 94, 'learning_rate': 0.056166815815907774, 'feature_fraction': 0.8384969383818199, 'bagging_fraction': 0.869371827383872, 'bagging_freq': 5, 'min_child_samples': 53, 'lambda_l1': 0.0776558831348561, 'lambda_l2': 0.026856962828259376, 'max_depth': 8}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[264]	valid_0's rmse: 0.361972
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:09,620] Trial 24 finished with value: 14.708101979312058 and parameters: {'num_leaves': 93, 'learning_rate': 0.06468576167031587, 'feature_fraction': 0.8399204021440589, 'bagging_fraction': 0.8678730666157044, 'bagging_freq': 7, 'min_child_samples': 43, 'lambda_l1': 0.06046573333827737, 'lambda_l2': 0.02727309428484983, 'max_depth': 9}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[224]	valid_0's rmse: 0.362522
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:12,876] Trial 25 finished with value: 14.690252808105072 and parameters: {'num_leaves': 80, 'learning_rate': 0.05293983859252686, 'feature_fraction': 0.8752712057804333, 'bagging_fraction': 0.8704506436860577, 'bagging_freq': 3, 'min_child_samples': 53, 'lambda_l1': 0.003530930069646676, 'lambda_l2': 0.8414810427339896, 'max_depth': 11}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[263]	valid_0's rmse: 0.362255
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:14,778] Trial 26 finished with value: 14.773108265178415 and parameters: {'num_leaves': 80, 'learning_rate': 0.08133526910590906, 'feature_fraction': 0.9453155767207247, 'bagging_fraction': 0.969859662038412, 'bagging_freq': 5, 'min_child_samples': 61, 'lambda_l1': 0.09985224023410573, 'lambda_l2': 0.6334515374271655, 'max_depth': 8}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[140]	valid_0's rmse: 0.364032
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:17,498] Trial 27 finished with value: 14.768160500665376 and parameters: {'num_leaves': 94, 'learning_rate': 0.07035505234109651, 'feature_fraction': 0.9398861608464028, 'bagging_fraction': 0.8844601174030056, 'bagging_freq': 4, 'min_child_samples': 49, 'lambda_l1': 0.0047565559594977555, 'lambda_l2': 0.025474083323595846, 'max_depth': 11}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[182]	valid_0's rmse: 0.364557
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:20,769] Trial 28 finished with value: 14.695178583727717 and parameters: {'num_leaves': 121, 'learning_rate': 0.04821077938000315, 'feature_fraction': 0.8768463603894121, 'bagging_fraction': 0.8466772520280725, 'bagging_freq': 5, 'min_child_samples': 42, 'lambda_l1': 7.737248666946984, 'lambda_l2': 5.593747049150257, 'max_depth': 8}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[226]	valid_0's rmse: 0.36269
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:25,638] Trial 29 finished with value: 14.717576564902231 and parameters: {'num_leaves': 88, 'learning_rate': 0.023676329399057713, 'feature_fraction': 0.8246692293594744, 'bagging_fraction': 0.9132063901872558, 'bagging_freq': 3, 'min_child_samples': 59, 'lambda_l1': 3.77752827978874, 'lambda_l2': 2.4345468610693027e-05, 'max_depth': 13}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[352]	valid_0's rmse: 0.362558
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:28,717] Trial 30 finished with value: 14.701177024012654 and parameters: {'num_leaves': 109, 'learning_rate': 0.05739688375588565, 'feature_fraction': 0.8629459097303763, 'bagging_fraction': 0.9263219769516972, 'bagging_freq': 2, 'min_child_samples': 55, 'lambda_l1': 0.013924346898936813, 'lambda_l2': 0.0053053554826061776, 'max_depth': 11}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[159]	valid_0's rmse: 0.362584
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:32,100] Trial 31 finished with value: 14.709428853701183 and parameters: {'num_leaves': 81, 'learning_rate': 0.05356416727108439, 'feature_fraction': 0.8855728499031069, 'bagging_fraction': 0.8751037616478687, 'bagging_freq': 3, 'min_child_samples': 52, 'lambda_l1': 0.002461830619935767, 'lambda_l2': 0.5665408886688734, 'max_depth': 11}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[267]	valid_0's rmse: 0.363165
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:35,166] Trial 32 finished with value: 14.733215913277634 and parameters: {'num_leaves': 62, 'learning_rate': 0.03282531276223484, 'feature_fraction': 0.9770458240696712, 'bagging_fraction': 0.8568034312621587, 'bagging_freq': 4, 'min_child_samples': 63, 'lambda_l1': 0.0012990479933789845, 'lambda_l2': 0.38371140190463565, 'max_depth': 10}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[311]	valid_0's rmse: 0.362274
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:38,137] Trial 33 finished with value: 14.717808554998523 and parameters: {'num_leaves': 74, 'learning_rate': 0.045800280331412944, 'feature_fraction': 0.9140128641253156, 'bagging_fraction': 0.8294806205150775, 'bagging_freq': 3, 'min_child_samples': 56, 'lambda_l1': 0.1278992259120044, 'lambda_l2': 1.4364099193911448, 'max_depth': 12}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[228]	valid_0's rmse: 0.362858
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:40,450] Trial 34 finished with value: 14.810903351400086 and parameters: {'num_leaves': 83, 'learning_rate': 0.058046633316359585, 'feature_fraction': 0.7783591289658438, 'bagging_fraction': 0.8931726830786701, 'bagging_freq': 2, 'min_child_samples': 41, 'lambda_l1': 3.0282646796877846e-05, 'lambda_l2': 0.04665008390587631, 'max_depth': 12}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[115]	valid_0's rmse: 0.363177
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:43,415] Trial 35 finished with value: 14.899678985214376 and parameters: {'num_leaves': 94, 'learning_rate': 0.028112777607183653, 'feature_fraction': 0.839106556406644, 'bagging_fraction': 0.9689549941458039, 'bagging_freq': 1, 'min_child_samples': 50, 'lambda_l1': 2.01500167333996, 'lambda_l2': 0.00033164296882026825, 'max_depth': 10}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[224]	valid_0's rmse: 0.363027
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:44,594] Trial 36 finished with value: 14.850573608826915 and parameters: {'num_leaves': 65, 'learning_rate': 0.09707205445216001, 'feature_fraction': 0.8849595182603383, 'bagging_fraction': 0.7928086123574538, 'bagging_freq': 7, 'min_child_samples': 76, 'lambda_l1': 0.014453559351853043, 'lambda_l2': 0.21916456327687805, 'max_depth': 6}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[113]	valid_0's rmse: 0.363417
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:47,015] Trial 37 finished with value: 14.815838021071007 and parameters: {'num_leaves': 77, 'learning_rate': 0.04349336441458241, 'feature_fraction': 0.7312179598474341, 'bagging_fraction': 0.837927466945701, 'bagging_freq': 4, 'min_child_samples': 37, 'lambda_l1': 9.722728877161979, 'lambda_l2': 2.07116426309411, 'max_depth': 15}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[173]	valid_0's rmse: 0.362247
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:49,923] Trial 38 finished with value: 14.84944611846433 and parameters: {'num_leaves': 98, 'learning_rate': 0.035488092055788456, 'feature_fraction': 0.8679045543913619, 'bagging_fraction': 0.8530697315237424, 'bagging_freq': 6, 'min_child_samples': 45, 'lambda_l1': 4.483891311523359e-05, 'lambda_l2': 0.01711492142578662, 'max_depth': 11}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[176]	valid_0's rmse: 0.362247
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:54,712] Trial 39 finished with value: 14.710057535536007 and parameters: {'num_leaves': 127, 'learning_rate': 0.029944422864931902, 'feature_fraction': 0.897915467151521, 'bagging_fraction': 0.9285345018567125, 'bagging_freq': 1, 'min_child_samples': 53, 'lambda_l1': 0.0010656570683799016, 'lambda_l2': 9.62961007166249, 'max_depth': 13}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[341]	valid_0's rmse: 0.362732
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:56,344] Trial 40 finished with value: 14.798391093877639 and parameters: {'num_leaves': 91, 'learning_rate': 0.07664804030064536, 'feature_fraction': 0.9318688535391142, 'bagging_fraction': 0.8994623046705857, 'bagging_freq': 3, 'min_child_samples': 60, 'lambda_l1': 0.18046912313749852, 'lambda_l2': 0.07391106380983684, 'max_depth': 8}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[86]	valid_0's rmse: 0.362576
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:15:59,570] Trial 41 finished with value: 14.727802195714506 and parameters: {'num_leaves': 120, 'learning_rate': 0.048493801851298396, 'feature_fraction': 0.8777912268975177, 'bagging_fraction': 0.8482393776048845, 'bagging_freq': 5, 'min_child_samples': 40, 'lambda_l1': 0.9560920531329344, 'lambda_l2': 3.5694305939791637, 'max_depth': 8}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[255]	valid_0's rmse: 0.36264
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:02,299] Trial 42 finished with value: 14.840497404667733 and parameters: {'num_leaves': 109, 'learning_rate': 0.042844301252784454, 'feature_fraction': 0.8090041455455689, 'bagging_fraction': 0.8854537999097742, 'bagging_freq': 5, 'min_child_samples': 45, 'lambda_l1': 8.381234726991329, 'lambda_l2': 0.6630774410477722, 'max_depth': 9}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[149]	valid_0's rmse: 0.362385
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:03,778] Trial 43 finished with value: 14.82063456625092 and parameters: {'num_leaves': 115, 'learning_rate': 0.05271779813855949, 'feature_fraction': 0.8420477159672787, 'bagging_fraction': 0.8084418366479675, 'bagging_freq': 6, 'min_child_samples': 36, 'lambda_l1': 3.796088113728154, 'lambda_l2': 4.204676795351545, 'max_depth': 6}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[137]	valid_0's rmse: 0.362721
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:06,181] Trial 44 finished with value: 14.790664490613054 and parameters: {'num_leaves': 127, 'learning_rate': 0.05967225198774889, 'feature_fraction': 0.9607300258251972, 'bagging_fraction': 0.7640241746318834, 'bagging_freq': 5, 'min_child_samples': 29, 'lambda_l1': 0.05394226466467227, 'lambda_l2': 0.17735967191905258, 'max_depth': 10}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[113]	valid_0's rmse: 0.363109
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:08,111] Trial 45 finished with value: 15.056930435537172 and parameters: {'num_leaves': 85, 'learning_rate': 0.04895951012912417, 'feature_fraction': 0.9029764748843716, 'bagging_fraction': 0.8354613496068843, 'bagging_freq': 4, 'min_child_samples': 46, 'lambda_l1': 0.2877284018205826, 'lambda_l2': 1.4670302591254731, 'max_depth': 8}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[111]	valid_0's rmse: 0.363466
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:10,610] Trial 46 finished with value: 14.84699870387378 and parameters: {'num_leaves': 73, 'learning_rate': 0.03924614735639539, 'feature_fraction': 0.9238574151673992, 'bagging_fraction': 0.8711794509480056, 'bagging_freq': 6, 'min_child_samples': 41, 'lambda_l1': 0.9729316561395894, 'lambda_l2': 0.004430137931418963, 'max_depth': 11}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[178]	valid_0's rmse: 0.363288
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:13,004] Trial 47 finished with value: 14.868209249213317 and parameters: {'num_leaves': 98, 'learning_rate': 0.06380504366707777, 'feature_fraction': 0.9966870501650493, 'bagging_fraction': 0.9049471213249239, 'bagging_freq': 4, 'min_child_samples': 58, 'lambda_l1': 0.006447716659390582, 'lambda_l2': 9.773423056969248, 'max_depth': 14}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[112]	valid_0's rmse: 0.364076
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:15,212] Trial 48 finished with value: 14.75351399372674 and parameters: {'num_leaves': 120, 'learning_rate': 0.07213096611430724, 'feature_fraction': 0.77450790412831, 'bagging_fraction': 0.8582499448654947, 'bagging_freq': 5, 'min_child_samples': 52, 'lambda_l1': 4.564825159236479, 'lambda_l2': 0.06615701647130455, 'max_depth': 9}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[107]	valid_0's rmse: 0.363384
Training until validation scores don't improve for 100 rounds


[I 2025-11-29 03:16:18,077] Trial 49 finished with value: 14.760196278904003 and parameters: {'num_leaves': 67, 'learning_rate': 0.04524492797498136, 'feature_fraction': 0.7427802482650141, 'bagging_fraction': 0.8790914061962506, 'bagging_freq': 3, 'min_child_samples': 33, 'lambda_l1': 1.1852707077618506e-07, 'lambda_l2': 0.01123390019997144, 'max_depth': 6}. Best is trial 17 with value: 14.666331035281404.


Early stopping, best iteration is:
[414]	valid_0's rmse: 0.362369

Best params found:
  num_leaves: 99
  learning_rate: 0.03502630702916288
  feature_fraction: 0.8000504084687489
  bagging_fraction: 0.936461493334479
  bagging_freq: 5
  min_child_samples: 46
  lambda_l1: 0.1932004381564138
  lambda_l2: 0.1019384967904647
  max_depth: 11
  objective: regression
  metric: rmse
  boosting_type: gbdt
  verbosity: -1
  n_estimators: 2000
Training until validation scores don't improve for 100 rounds
[100]	valid_0's rmse: 0.366997
[200]	valid_0's rmse: 0.362041
[300]	valid_0's rmse: 0.361834
Early stopping, best iteration is:
[278]	valid_0's rmse: 0.361653

Optimized LightGBM Valid Metrics - MAE: 0.200, RMSE: 0.362, WAPE: 14.666


In [24]:
submission_lgbm = create_lightgbm_submission(df_kaggle_test, optimized_model)

✓ Saved submission_lightgbm.csv (526917 rows)


## Evaluating model

In [19]:
def evaluate_model(model, X_test, y_test, data):
    """
    Evaluate the model performance on the test set (last 3 months of 2017)
    """
    print("\nEvaluating model performance on test set...")

    # Make predictions on the test set
    test_preds = model.predict(X_test)

    # Calculate metrics
    test_mae = mean_absolute_error(y_test, test_preds)
    test_rmse = np.sqrt(mean_squared_error(y_test, test_preds))
    test_wape = weighted_absolute_percentage_error(y_test, test_preds)

    # Print evaluation results
    print(f"Final Model Test Evaluation:")
    print(f"    MAE: {test_mae:.2f}")
    print(f"    RMSE: {test_rmse:.2f}")
    print(f"    WAPE: {test_wape:.2f}%")

    # Analyze errors by time period (month)
    test_results = data[data["is_test"]].copy()
    test_results["prediction"] = test_preds
    test_results["error"] = test_results["sales"] - test_results["prediction"]
    test_results["abs_error"] = np.abs(test_results["error"])
    test_results["month_name"] = test_results["date"].dt.strftime("%B")

    # Summarize errors by month
    monthly_errors = (
        test_results.groupby("month_name")
        .agg({"abs_error": "mean", "error": "mean", "sales": "mean"})
        .reset_index()
    )
    monthly_errors["error_pct"] = (
        100 * monthly_errors["abs_error"] / monthly_errors["sales"]
    )

    print("\nError Analysis by Month:")
    print(
        monthly_errors[["month_name", "abs_error", "error_pct"]].to_string(index=False)
    )

    # Store results for visualization
    # Include month and store information for granular analysis
    test_results["year_month"] = test_results["date"].dt.strftime("%Y-%m")

    # Plot actual vs predicted
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, test_preds, alpha=0.5)
    plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], "r--")
    plt.title("Actual vs Predicted Sales (Test Set)")
    plt.xlabel("Actual Sales")
    plt.ylabel("Predicted Sales")
    plt.tight_layout()
    # plt.savefig('actual_vs_predicted_test.png')

    # Plot error distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(test_results["error"], kde=True)
    plt.title("Error Distribution")
    plt.xlabel("Prediction Error")
    plt.tight_layout()
    # plt.savefig('error_distribution.png')

    return test_mae, test_rmse, test_wape, test_preds, y_test, test_results

In [20]:
# Prophet Model Results:
# MAE: 9.03 | RMSE: 11.86 | WAPE: 29.13%

In [21]:
# Evaluate the lightgbm model
test_mae, test_rmse, test_smape, test_preds, y_test_values, test_results = (
    evaluate_model(lightgbm_model, X_test, y_test, df_features)
)

NameError: name 'X_test' is not defined

## Save trained models

In [None]:
def save_model(model, X_train, feature_names, output_dir="../models"):
    """
    Save the trained model and related artifacts for API use

    Args:
        model: Trained model (e.g., LightGBM model)
        feature_names: List of feature names
        output_dir: Directory to save model artifacts
    """

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Save the model
    model_path = os.path.join(output_dir, "sales_forecast_model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    print(f"Model saved to {model_path}")

    # Create and save feature statistics
    feature_stats = {
        "model_version": "1.0.0",
        "last_trained": pd.Timestamp.now().strftime("%Y-%m-%d"),
        "required_columns": list(feature_names),
        "column_order": list(feature_names),
        "default_values": {},
        "temperature_bins": [-np.inf, 20, 25, 30, np.inf],
        "temperature_labels": ["Cold", "Cool", "Warm", "Hot"],
        "humidity_bins": [-np.inf, 60, 75, np.inf],
        "humidity_labels": ["Low", "Medium", "High"],
    }

    # Add default values for date features
    feature_stats["default_values"] = {
        "year": 2017,
        "month": 11,
        "day": 15,
        "day_of_week": 2,
        "is_weekend": 0,
        "quarter": 4,
        "is_holiday": 0,
    }

    # Save feature stats
    stats_path = os.path.join(output_dir, "feature_stats.json")
    with open(stats_path, "w") as f:
        json.dump(feature_stats, f, indent=4)
    print(f"Feature statistics saved to {stats_path}")

    print(f"All model artifacts saved successfully to {output_dir}/")

    return model_path, stats_path

In [None]:
# Save model
save_model(
    model=optimized_model,
    X_train=X_train,
    feature_names=X_train.columns,
    output_dir='../models'
)

Model saved to ../models/sales_forecast_model.pkl
Feature statistics saved to ../models/feature_stats.json
All model artifacts saved successfully to ../models/


('../models/sales_forecast_model.pkl', '../models/feature_stats.json')