# Model Testing

This notebook loads the trained Demand Model and RL Agent to run specific test cases.

In [1]:
import pandas as pd
import numpy as np
import pickle
from stable_baselines3 import SAC
from sklearn.preprocessing import LabelEncoder

%matplotlib inline

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [2]:
# Load Artifacts
with open('../outcomes/demand_model_lgbm.pkl', 'rb') as f:
    demand_model = pickle.load(f)

with open('../outcomes/label_encoders.pkl', 'rb') as f:
    le_dict = pickle.load(f)
    
price_constraints = pd.read_csv('../outcomes/price_constraints.csv').set_index('Product ID')

# Load RL Agent
try:
    rl_model = SAC.load('../outcomes/sac_pricing_model')
    print("RL Model loaded.")
except:
    print("RL Model not found. Please run 03_rl_pricing.ipynb first.")
    rl_model = None

RL Model loaded.


## Define Test Case Helper

In [3]:
def run_test_case(product_id, store_id, category, region, price, inventory, 
                  discount, weather, promotion, competitor_price, seasonality, epidemic, date):
    
    # Create DataFrame from input
    data = {
        'Store ID': [store_id],
        'Product ID': [product_id],
        'Category': [category],
        'Region': [region],
        'Inventory Level': [inventory],
        'Price': [price],
        'Discount': [discount],
        'Weather Condition': [weather],
        'Promotion': [promotion],
        'Competitor Pricing': [competitor_price],
        'Seasonality': [seasonality],
        'Epidemic': [epidemic],
        'Date': [pd.to_datetime(date)]
    }
    test_df = pd.DataFrame(data)
    
    # Feature Engineering
    test_df['Month'] = test_df['Date'].dt.month
    test_df['Day'] = test_df['Date'].dt.day
    test_df['Weekday'] = test_df['Date'].dt.weekday
    
    # Encode
    for col, le in le_dict.items():
        if col in test_df.columns:
            val = test_df[col].iloc[0]
            if val in le.classes_:
                test_df[col] = le.transform([val])
            else:
                print(f"Warning: Unseen label {val} for {col}")
                test_df[col] = -1 # Handle unseen
                
    # 1. Predict Demand
    # Ensure correct column order
    # We need to match the features used in training
    features = ['Store ID', 'Product ID', 'Category', 'Region', 'Inventory Level', 
                'Price', 'Discount', 'Weather Condition', 'Promotion', 
                'Competitor Pricing', 'Seasonality', 'Epidemic', 'Month', 'Day', 'Weekday']
    
    demand_pred = demand_model.predict(test_df[features])[0]
    print(f"Predicted Demand (at Price {price}): {demand_pred:.2f}")
    
    # 2. Recommend Price (RL Agent)
    if rl_model:
        # Construct State (features excluding Price) and append previous price to match env obs shape
        feature_cols = [c for c in features if c != 'Price']
        obs_feats = test_df[feature_cols].iloc[0].values.astype(np.float32)
        prev_price = np.array([np.float32(price)])
        obs = np.concatenate([obs_feats, prev_price])  # shape (15,)
        
        # Predict action; pass a single observation (1D)
        action, _ = rl_model.predict(obs, deterministic=True)
        
        # Decode Price Constraints
        try:
            min_price = price_constraints.loc[product_id, 'min_price']
            max_price = price_constraints.loc[product_id, 'max_price']
        except KeyError:
            min_price, max_price = 10, 1000
            
        # Scale Action to Price
        recommended_price = min_price + (max_price - min_price) * (action[0] + 1) / 2
        print(f"Recommended Price (RL): {recommended_price:.2f} (Bounds: {min_price}-{max_price})")
        
        # Predict Demand at Recommended Price
        test_df['Price'] = recommended_price
        opt_demand = demand_model.predict(test_df[features])[0]
        print(f"Predicted Demand at Recommended Price: {opt_demand:.2f}")
        print(f"Estimated Revenue: {recommended_price * opt_demand:.2f}")

## Run Test Cases

In [12]:
# Example Test Case 1
run_test_case(
    product_id='P0001', 
    store_id='S001', 
    category='Electronics', 
    region='North', 
    price=80.0, 
    inventory=50, 
    discount=5, 
    weather='Sunny', 
    promotion=0, 
    competitor_price=75.0, 
    seasonality='Summer', 
    epidemic=0, 
    date='2023-06-15'
)

Predicted Demand (at Price 80.0): 130.67
Recommended Price (RL): 77.86 (Bounds: 7.3-121.94)
Predicted Demand at Recommended Price: 124.11
Estimated Revenue: 9662.46


In [13]:
# Example Test Case 2 (High Seasonality)
run_test_case(
    product_id='P0002', 
    store_id='S002', 
    category='Clothing', 
    region='East', 
    price=120.0, 
    inventory=100, 
    discount=10, 
    weather='Snowy', 
    promotion=1, 
    competitor_price=110.0, 
    seasonality='Winter', 
    epidemic=1, 
    date='2023-12-20'
)

Predicted Demand (at Price 120.0): 266.45
Recommended Price (RL): 71.05 (Bounds: 11.59-99.74)
Predicted Demand at Recommended Price: 165.24
Estimated Revenue: 11740.62


In [None]:
# Model fit diagnostics: in-sample vs 5-fold CV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.base import clone
import matplotlib.pyplot as plt
import numpy as np

# Load full dataset
df_all = pd.read_csv('../data/sales_data.csv')
df_all['Date'] = pd.to_datetime(df_all['Date'])
df_all['Month'] = df_all['Date'].dt.month
df_all['Day'] = df_all['Date'].dt.day
df_all['Weekday'] = df_all['Date'].dt.weekday

# Apply encoders (same safe mapping used elsewhere)
for col, le in le_dict.items():
    if col in df_all.columns:
        def safe_map(x, le=le):
            try:
                # le.transform expects an array-like; handle unseen labels safely
                return le.transform([x])[0]
            except Exception:
                return -1
        df_all[col] = df_all[col].map(safe_map)

# Features and target (must match training)
features = ['Store ID', 'Product ID', 'Category', 'Region', 'Inventory Level', 
            'Price', 'Discount', 'Weather Condition', 'Promotion', 
            'Competitor Pricing', 'Seasonality', 'Epidemic', 'Month', 'Day', 'Weekday']

target = 'Demand'

# Drop rows with missing features/target
data = df_all.dropna(subset=features + [target]).reset_index(drop=True)
X = data[features]
y = data[target]

print(f"Dataset size for diagnostics: {len(data)} rows")

# 1) In-sample (training) predictions using loaded model
try:
    y_pred_in = demand_model.predict(X)
except Exception as e:
    print('Error predicting with loaded model (in-sample):', e)
    y_pred_in = None

# 2) Cross-validated predictions (5-fold) using a fresh clone of the estimator
try:
    est = clone(demand_model)
    y_pred_cv = cross_val_predict(est, X, y, cv=5, n_jobs=1)
except Exception as e:
    print('Error during cross_val_predict:', e)
    y_pred_cv = None

# Metric helper
def metrics(y_true, y_pred):
    # Use MSE then sqrt for RMSE to maintain compatibility with older sklearn versions
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return rmse, mae, r2

if y_pred_in is not None:
    rmse_in, mae_in, r2_in = metrics(y, y_pred_in)
    print(f"In-sample   -> RMSE: {rmse_in:.3f}, MAE: {mae_in:.3f}, R2: {r2_in:.3f}")
if y_pred_cv is not None:
    rmse_cv, mae_cv, r2_cv = metrics(y, y_pred_cv)
    print(f"5-fold CV    -> RMSE: {rmse_cv:.3f}, MAE: {mae_cv:.3f}, R2: {r2_cv:.3f}")

# Simple interpretation heuristic
if (y_pred_in is not None) and (y_pred_cv is not None):
    ratio = rmse_cv / max(1e-9, rmse_in)
    print(f"RMSE_CV / RMSE_In-sample = {ratio:.2f}")
    if ratio > 1.5:
        print("Interpretation: The model likely OVERFITS (CV error significantly worse than in-sample). Consider regularization, more data, or simpler model.)")
    elif rmse_in > 0.8 * y.mean() and rmse_cv > 0.8 * y.mean():
        print("Interpretation: The model likely UNDERFITS (errors are large relative to target scale). Consider richer features, more complex model, or tuning.)")
    else:
        print("Interpretation: No strong over/under-fitting signal; errors look comparable between in-sample and CV.")

# Parity plots
plt.figure(figsize=(12,5))
if y_pred_in is not None:
    plt.subplot(1,2,1)
    plt.scatter(y, y_pred_in, alpha=0.3, s=8)
    plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
    plt.xlabel('Actual Demand')
    plt.ylabel('Predicted Demand (in-sample)')
    plt.title('In-sample parity')

if y_pred_cv is not None:
    plt.subplot(1,2,2)
    plt.scatter(y, y_pred_cv, alpha=0.3, s=8)
    plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
    plt.xlabel('Actual Demand')
    plt.ylabel('Predicted Demand (CV)')
    plt.title('Cross-validated parity')

plt.tight_layout()
plt.show()

# Residual histograms
plt.figure(figsize=(12,4))
if y_pred_in is not None:
    plt.subplot(1,2,1)
    plt.hist(y - y_pred_in, bins=50)
    plt.title('Residuals (in-sample)')
if y_pred_cv is not None:
    plt.subplot(1,2,2)
    plt.hist(y - y_pred_cv, bins=50)
    plt.title('Residuals (CV)')
plt.tight_layout()
plt.show()

# Print a small table of worst errors (CV)
if y_pred_cv is not None:
    errors = np.abs(y - y_pred_cv)
    worst = errors.argsort()[::-1][:10]
    print('\nTop 10 absolute errors (CV):')
    print(data.iloc[worst][features + [target]].assign(predicted=y_pred_cv[worst], abs_error=errors[worst]))

Dataset size for diagnostics: 76000 rows
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001627 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 869
[LightGBM] [Info] Number of data points in the train set: 60800, number of used features: 15
[LightGBM] [Info] Start training from score 104.570855
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001627 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 869
[LightGBM] [Info] Number of data points in the train set: 60800, number of used features: 15
[LightGBM] [Info] Start training from score 104.570855
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001301 seconds.
You can set `force_row_wise=true` to remo

TypeError: got an unexpected keyword argument 'squared'