In [5]:
import pandas as pd
import numpy as np
import yaml
import pickle
import warnings
from statsmodels.tools.sm_exceptions import ValueWarning

warnings.filterwarnings("ignore", category=ValueWarning)
warnings.filterwarnings("ignore")

In [6]:
def load_model(model_path):
    with open(model_path, 'rb') as file:
        model = pickle.load(file)
        return model

def load_config(config_path=r'C:\Users\MSI 11uc\Documents\ML\CW\Q5 Final\Q5_sales_forecasting_final\Q5_sales_forecasting\config\config.yaml'):
    with open(config_path, 'r') as file:
        return yaml.safe_load(file)

config = load_config()
df = pd.read_csv(config['data']['processed']['featured_test_data_output_path'])
xgb_model = load_model(config['model_paths']['xgboost'])
arima_models = load_model(config['model_paths']['arima'])
lstm_model = load_model(config['model_paths']['lstm'])
xgb_encoders = load_model(config['model_paths']['xgboost_encoders'])
target_column = config['target_column']

In [7]:
def evaluate_models_with_mape(df, xgb_model,label_encoders):
    def calculate_mape(y_true, y_pred):
        """Calculate Mean Absolute Percentage Error (MAPE)."""
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    def evaluate_model(df, model, model_name, granularity):
        mape_values = []
        
        if granularity == 'date_id | store | item_dept':
            grouped = df.groupby(['store', 'item_dept', 'date_id'])
        elif granularity == 'date_id | store':
            grouped = df.groupby(['store', 'date_id'])
        else:
            raise ValueError("Unsupported granularity")
        
        for name, group in grouped:
            actual = group[target_column]
            
            if model_name == 'XGBoost':
                # Ensure all features used in training are included
                group_encoded = group.copy()
                
                # Encode categorical features using the label encoders
                group_encoded['store'] = label_encoders['store'].transform(group_encoded['store'])
                group_encoded['item_dept'] = label_encoders['item_dept'].transform(group_encoded['item_dept'])
                group_encoded['profile'] = label_encoders['profile'].transform(group_encoded['profile'])
                group_encoded['size'] = label_encoders['size'].transform(group_encoded['size'])

                # Drop target and unnecessary columns
                group_encoded = group_encoded.drop(columns=[target_column, 'date_id'])
                pred = model.predict(group_encoded)
                
            mape = calculate_mape(actual, pred)
            mape_values.append(mape)
        
        avg_mape = np.mean(mape_values)
        print(f"{model_name} MAPE ({granularity}): {avg_mape}%")
    
    # Evaluate XGBoost
    evaluate_model(df, xgb_model, 'XGBoost', 'date_id | store | item_dept')
    evaluate_model(df, xgb_model, 'XGBoost', 'date_id | store')

In [8]:
evaluate_models_with_mape(df, xgb_model, xgb_encoders)

XGBoost MAPE (date_id | store | item_dept): 13.490848014621909%
XGBoost MAPE (date_id | store): 13.490848014621912%


<div style="font-family: Arial, sans-serif; font-size: 14px; line-height: 1.6;">
    <h3>XGBoost Mean Absolute Percentage Error (MAPE)</h3>
    <ul>
        <li><strong>date_id | store | item_dept:</strong> 13.490848014621909%</li>
        <li><strong>date_id | store:</strong> 13.490848014621912%</li>
    </ul>
</div>
