# Model Testing Notebook

This notebook tests ML models from `New_ML_Models` on test data from the `Testing` folder.

## Models to Test:
1. **stock_forecaster**: LSTM (time series)
2. **revenue**: XGBoost (time series)
3. **customer_churn**: Random Forest (classification)
4. **campaign**: Random Forest Classifier (classification only)

**Note:** Operational Risk testing has been moved to `operational_risk_test.ipynb`


In [1]:
# Import libraries
import pandas as pd
import numpy as np
import joblib
import json
import os
from tensorflow.keras.models import load_model
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    mean_squared_error, mean_absolute_error, r2_score
)
import warnings
warnings.filterwarnings('ignore')

print("✓ Libraries imported successfully")


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


✓ Libraries imported successfully


## 1. Load Test Data


In [3]:
# Load all test datasets
test_data_dir = "D:/Deloitte/Testing/"

# Load test files
stock_test = pd.read_csv(f"{test_data_dir}Stock_Forecasting_Test.csv")
revenue_test = pd.read_csv(f"{test_data_dir}revenue_prediction_test.csv")
customer_churn_test = pd.read_csv(f"{test_data_dir}customer_churn_test.csv")
campaign_test = pd.read_csv(f"{test_data_dir}campaign_success_test.csv")

print(f"✓ Stock test data: {len(stock_test)} rows")
print(f"✓ Revenue test data: {len(revenue_test)} rows")
print(f"✓ Customer churn test data: {len(customer_churn_test)} rows")
print(f"✓ Campaign test data: {len(campaign_test)} rows")


✓ Stock test data: 238 rows
✓ Revenue test data: 879 rows
✓ Customer churn test data: 575 rows
✓ Campaign test data: 129 rows


## 2. Test Stock Forecaster (LSTM)


In [7]:
# Load stock forecaster model
print("Loading Stock Forecaster LSTM model...")

model_dir = "D:/Deloitte/New_ML_Models/stock_forecaster/models/lstm_models/"
scaler_dir = "D:/Deloitte/New_ML_Models/stock_forecaster/models/scalers/"
mse_path = "D:/Deloitte/New_ML_Models/stock_forecaster/MSE.json"

# Load category mapping
with open("D:/Deloitte/New_ML_Models/stock_forecaster/full_menu_mapping.json", 'r', encoding='utf-8') as f:
    category_mapping = json.load(f)

# Load models and scalers
lstm_models = {}
scalers = {}
categories = [
    "Beverages", "Breakfast_&_Brunch", "Desserts_&_Sweets", 
    "Handhelds", "Main_Courses", "Misc_Services", 
    "Other_Uncategorized", "Salads_&_Greens", 
    "Sides_&_Snacks", "Sushi_&_Asian"
]

for cat in categories:
    model_path = f"{model_dir}{cat}_lstm.h5"
    scaler_path = f"{scaler_dir}{cat}_scaler.joblib"
    if os.path.exists(model_path) and os.path.exists(scaler_path):
        lstm_models[cat] = load_model(model_path, compile=False)
        scalers[cat] = joblib.load(scaler_path)

# Load MSE errors
with open(mse_path, 'r', encoding='utf-8') as f:
    mse_errors = json.load(f)

print(f"✓ Loaded {len(lstm_models)} LSTM models")

# Test stock forecaster
def predict_stock(category_name, month, last_qty):
    """Predict stock quantity using LSTM"""
    if category_name not in lstm_models:
        return None
    
    model = lstm_models[category_name]
    scaler = scalers[category_name]
    
    # Scale input
    qty_scaled = float(scaler.transform([[last_qty]])[0][0])
    
    # Prepare features for LSTM
    features = np.array([[month, qty_scaled]])
    features = np.reshape(features, (1, 1, features.shape[1]))
    
    # Predict
    prediction_scaled = model.predict(features, verbose=0)
    prediction = float(scaler.inverse_transform(prediction_scaled)[0][0])
    
    return max(0, prediction)

# Test on stock test data
stock_results = []
for idx, row in stock_test.iterrows():
    month = int(row['month']) if pd.notna(row['month']) else 1
    lagged_qt = row['lagged_qt'] if pd.notna(row['lagged_qt']) else 0
    
    # Try to find category (default to first available)
    category = list(categories)[0]  # Default category
    
    try:
        pred = predict_stock(category, month, lagged_qt)
        stock_results.append({
            'month': month,
            'lagged_qt': lagged_qt,
            'predicted_qt': pred,
            'category': category
        })
    except Exception as e:
        print(f"Error at row {idx}: {e}")
        stock_results.append({
            'month': month,
            'lagged_qt': lagged_qt,
            'predicted_qt': None,
            'category': category,
            'error': str(e)
        })

stock_df = pd.DataFrame(stock_results)
print(f"\n✓ Stock forecaster tested on {len(stock_results)} samples")
print(f"  Predictions made: {stock_df['predicted_qt'].notna().sum()}")


Loading Stock Forecaster LSTM model...
✓ Loaded 10 LSTM models

✓ Stock forecaster tested on 238 samples
  Predictions made: 238


## 3. Test Revenue Predictor (XGBoost)


In [9]:
# Load revenue predictor model
print("Loading Revenue Predictor XGBoost model...")

revenue_model_path = "D:/Deloitte/New_ML_Models/revenue_predictor/revenue_predictor_xgb.pkl"
revenue_model = joblib.load(revenue_model_path)

print("✓ Revenue model loaded")

# Test revenue predictor
def predict_revenue(is_weekend, is_holiday, lagged_revenue):
    """Predict revenue using XGBoost"""
    features = np.array([[is_weekend, is_holiday, lagged_revenue]])
    prediction = float(revenue_model.predict(features)[0])
    return max(0, prediction)

# Test on revenue test data
revenue_results = []
for idx, row in revenue_test.iterrows():
    is_weekend = float(row['is_weekend']) if pd.notna(row['is_weekend']) else 0.0
    is_holiday = float(row['is_holiday']) if pd.notna(row['is_holiday']) else 0.0
    lagged_revenue = float(row['lagged_revenue']) if pd.notna(row['lagged_revenue']) else 0.0
    actual = float(row['val']) if pd.notna(row['val']) else None
    
    try:
        pred = predict_revenue(is_weekend, is_holiday, lagged_revenue)
        revenue_results.append({
            'is_weekend': is_weekend,
            'is_holiday': is_holiday,
            'lagged_revenue': lagged_revenue,
            'actual_revenue': actual,
            'predicted_revenue': pred
        })
    except Exception as e:
        print(f"Error at row {idx}: {e}")
        revenue_results.append({
            'is_weekend': is_weekend,
            'is_holiday': is_holiday,
            'lagged_revenue': lagged_revenue,
            'actual_revenue': actual,
            'predicted_revenue': None,
            'error': str(e)
        })

revenue_df = pd.DataFrame(revenue_results)

# Calculate metrics if actual values are available
if revenue_df['actual_revenue'].notna().sum() > 0:
    valid_mask = revenue_df['actual_revenue'].notna() & revenue_df['predicted_revenue'].notna()
    if valid_mask.sum() > 0:
        actual_vals = revenue_df.loc[valid_mask, 'actual_revenue']
        pred_vals = revenue_df.loc[valid_mask, 'predicted_revenue']
        
        mse = mean_squared_error(actual_vals, pred_vals)
        mae = mean_absolute_error(actual_vals, pred_vals)
        r2 = r2_score(actual_vals, pred_vals)
        
        print(f"\n✓ Revenue predictor tested on {len(revenue_results)} samples")
        print(f"  Valid predictions: {valid_mask.sum()}")
        print(f"  MSE: {mse:.2f}")
        print(f"  MAE: {mae:.2f}")
        print(f"  R²: {r2:.4f}")
else:
    print(f"\n✓ Revenue predictor tested on {len(revenue_results)} samples")
    print(f"  Predictions made: {revenue_df['predicted_revenue'].notna().sum()}")


Loading Revenue Predictor XGBoost model...
✓ Revenue model loaded

✓ Revenue predictor tested on 879 samples
  Valid predictions: 879
  MSE: 3510913.61
  MAE: 1087.91
  R²: 0.8762


## 4. Test Customer Churn Predictor (Random Forest)


In [11]:
# Load customer churn model
print("Loading Customer Churn Random Forest model...")

churn_model_path = "D:/Deloitte/New_ML_Models/customer_churn/model_bundle.joblib"
churn_bundle = joblib.load(churn_model_path)
churn_model = churn_bundle["model"]
churn_scaler = churn_bundle["scaler"]

print("✓ Customer churn model loaded")

# Test customer churn predictor
def predict_churn(discount_amount, points_earned, price, waiting_time):
    """Predict customer churn using Random Forest"""
    x = [[discount_amount, points_earned, price, waiting_time]]
    x_scaled = churn_scaler.transform(x)
    prediction = churn_model.predict(x_scaled)[0]
    probability = churn_model.predict_proba(x_scaled)[0][1]
    return prediction, probability

# Test on customer churn test data
churn_results = []
for idx, row in customer_churn_test.iterrows():
    discount_amount = float(row['discount_amount']) if pd.notna(row['discount_amount']) else 0.0
    points_earned = float(row['points_earned']) if pd.notna(row['points_earned']) else 0.0
    price = float(row['price']) if pd.notna(row['price']) else 0.0
    waiting_time = float(row['waiting_time']) if pd.notna(row['waiting_time']) else 0.0
    actual = int(row['is_churned']) if pd.notna(row['is_churned']) else None
    
    try:
        pred, prob = predict_churn(discount_amount, points_earned, price, waiting_time)
        churn_results.append({
            'user_id': row.get('user_id', idx),
            'discount_amount': discount_amount,
            'points_earned': points_earned,
            'price': price,
            'waiting_time': waiting_time,
            'actual_churn': actual,
            'predicted_churn': int(pred),
            'churn_probability': float(prob)
        })
    except Exception as e:
        print(f"Error at row {idx}: {e}")
        churn_results.append({
            'user_id': row.get('user_id', idx),
            'actual_churn': actual,
            'predicted_churn': None,
            'churn_probability': None,
            'error': str(e)
        })

churn_df = pd.DataFrame(churn_results)

# Calculate metrics if actual values are available
if churn_df['actual_churn'].notna().sum() > 0:
    valid_mask = churn_df['actual_churn'].notna() & churn_df['predicted_churn'].notna()
    if valid_mask.sum() > 0:
        actual_vals = churn_df.loc[valid_mask, 'actual_churn']
        pred_vals = churn_df.loc[valid_mask, 'predicted_churn']
        
        accuracy = accuracy_score(actual_vals, pred_vals)
        
        print(f"\n✓ Customer churn predictor tested on {len(churn_results)} samples")
        print(f"  Valid predictions: {valid_mask.sum()}")
        print(f"  Accuracy: {accuracy:.4f}")
        print(f"\nClassification Report:")
        print(classification_report(actual_vals, pred_vals))
else:
    print(f"\n✓ Customer churn predictor tested on {len(churn_results)} samples")
    print(f"  Predictions made: {churn_df['predicted_churn'].notna().sum()}")


Loading Customer Churn Random Forest model...
✓ Customer churn model loaded

✓ Customer churn predictor tested on 575 samples
  Valid predictions: 575
  Accuracy: 0.9948

Classification Report:
              precision    recall  f1-score   support

           0       0.33      0.50      0.40         2
           1       1.00      1.00      1.00       573

    accuracy                           0.99       575
   macro avg       0.67      0.75      0.70       575
weighted avg       1.00      0.99      1.00       575



## 5. Test Campaign Success Predictor (Random Forest Classifier)


In [12]:
# Load campaign classifier model (only classifier, not regressor)
print("Loading Campaign Success Classifier Random Forest model...")

campaign_model_dir = "D:/Deloitte/New_ML_Models/Campaign_ROI_Predictor/models/"
campaign_classifier = joblib.load(os.path.join(campaign_model_dir, 'campaign_success_classifier.pkl'))
campaign_scaler = joblib.load(os.path.join(campaign_model_dir, 'campaign_scaler.pkl'))
campaign_features = joblib.load(os.path.join(campaign_model_dir, 'campaign_features.pkl'))

print(f"✓ Campaign classifier loaded")
print(f"  Feature columns: {campaign_features}")

# Test campaign classifier
def predict_campaign_success(duration_days, discount, redemptions, redemptions_per_duration):
    """Predict campaign success using Random Forest Classifier"""
    x = [[duration_days, discount, redemptions, redemptions_per_duration]]
    x_scaled = campaign_scaler.transform(x)
    prediction = campaign_classifier.predict(x_scaled)[0]
    probability = campaign_classifier.predict_proba(x_scaled)[0][1]
    return prediction, probability

# Test on campaign test data
campaign_results = []
for idx, row in campaign_test.iterrows():
    duration_days = float(row['duration_days']) if pd.notna(row['duration_days']) else 0.0
    discount = float(row['discount']) if pd.notna(row['discount']) else 0.0
    redemptions = float(row['redemptions']) if pd.notna(row['redemptions']) else 0.0
    redemptions_per_duration = float(row['redemptions_per_duration']) if pd.notna(row['redemptions_per_duration']) else 0.0
    actual = int(row['val']) if pd.notna(row['val']) else None
    
    try:
        pred, prob = predict_campaign_success(duration_days, discount, redemptions, redemptions_per_duration)
        campaign_results.append({
            'duration_days': duration_days,
            'discount': discount,
            'redemptions': redemptions,
            'redemptions_per_duration': redemptions_per_duration,
            'actual_success': actual,
            'predicted_success': int(pred),
            'success_probability': float(prob)
        })
    except Exception as e:
        print(f"Error at row {idx}: {e}")
        campaign_results.append({
            'duration_days': duration_days,
            'discount': discount,
            'redemptions': redemptions,
            'redemptions_per_duration': redemptions_per_duration,
            'actual_success': actual,
            'predicted_success': None,
            'success_probability': None,
            'error': str(e)
        })

campaign_df = pd.DataFrame(campaign_results)

# Calculate metrics if actual values are available
if campaign_df['actual_success'].notna().sum() > 0:
    valid_mask = campaign_df['actual_success'].notna() & campaign_df['predicted_success'].notna()
    if valid_mask.sum() > 0:
        actual_vals = campaign_df.loc[valid_mask, 'actual_success']
        pred_vals = campaign_df.loc[valid_mask, 'predicted_success']
        
        accuracy = accuracy_score(actual_vals, pred_vals)
        
        print(f"\n✓ Campaign success predictor tested on {len(campaign_results)} samples")
        print(f"  Valid predictions: {valid_mask.sum()}")
        print(f"  Accuracy: {accuracy:.4f}")
        print(f"\nClassification Report:")
        print(classification_report(actual_vals, pred_vals))
else:
    print(f"\n✓ Campaign success predictor tested on {len(campaign_results)} samples")
    print(f"  Predictions made: {campaign_df['predicted_success'].notna().sum()}")


Loading Campaign Success Classifier Random Forest model...
✓ Campaign classifier loaded
  Feature columns: ['duration_days', 'discount', 'redemptions', 'redemptions_per_duration']

✓ Campaign success predictor tested on 129 samples
  Valid predictions: 129
  Accuracy: 0.7984

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.97      0.88       103
           1       0.50      0.12      0.19        26

    accuracy                           0.80       129
   macro avg       0.66      0.54      0.54       129
weighted avg       0.75      0.80      0.74       129



## 7. Export Results


In [13]:
# Create exports directory if it doesn't exist
exports_dir = "Testing/exports"
os.makedirs(exports_dir, exist_ok=True)

# Export all results to CSV
stock_df.to_csv(f"{exports_dir}/stock_forecaster_results.csv", index=False)
revenue_df.to_csv(f"{exports_dir}/revenue_predictor_results.csv", index=False)
churn_df.to_csv(f"{exports_dir}/customer_churn_predictor_results.csv", index=False)
campaign_df.to_csv(f"{exports_dir}/campaign_success_predictor_results.csv", index=False)

print("✓ All results exported to CSV files:")
print(f"  - {exports_dir}/stock_forecaster_results.csv")
print(f"  - {exports_dir}/revenue_predictor_results.csv")
print(f"  - {exports_dir}/customer_churn_predictor_results.csv")
print(f"  - {exports_dir}/campaign_success_predictor_results.csv")


✓ All results exported to CSV files:
  - Testing/exports/stock_forecaster_results.csv
  - Testing/exports/revenue_predictor_results.csv
  - Testing/exports/customer_churn_predictor_results.csv
  - Testing/exports/campaign_success_predictor_results.csv


## 8. Summary Statistics


In [14]:
# Display summary statistics
print("="*80)
print("TESTING SUMMARY")
print("="*80)

print(f"\n1. Stock Forecaster (LSTM):")
print(f"   Total samples: {len(stock_df)}")
print(f"   Successful predictions: {stock_df['predicted_qt'].notna().sum()}")

print(f"\n2. Revenue Predictor (XGBoost):")
print(f"   Total samples: {len(revenue_df)}")
print(f"   Successful predictions: {revenue_df['predicted_revenue'].notna().sum()}")
if revenue_df['actual_revenue'].notna().sum() > 0:
    valid = revenue_df['actual_revenue'].notna() & revenue_df['predicted_revenue'].notna()
    if valid.sum() > 0:
        print(f"   MSE: {mean_squared_error(revenue_df.loc[valid, 'actual_revenue'], revenue_df.loc[valid, 'predicted_revenue']):.2f}")
        print(f"   MAE: {mean_absolute_error(revenue_df.loc[valid, 'actual_revenue'], revenue_df.loc[valid, 'predicted_revenue']):.2f}")

print(f"\n3. Customer Churn Predictor (Random Forest):")
print(f"   Total samples: {len(churn_df)}")
print(f"   Successful predictions: {churn_df['predicted_churn'].notna().sum()}")
if churn_df['actual_churn'].notna().sum() > 0:
    valid = churn_df['actual_churn'].notna() & churn_df['predicted_churn'].notna()
    if valid.sum() > 0:
        print(f"   Accuracy: {accuracy_score(churn_df.loc[valid, 'actual_churn'], churn_df.loc[valid, 'predicted_churn']):.4f}")

print(f"\n4. Campaign Success Predictor (Random Forest Classifier):")
print(f"   Total samples: {len(campaign_df)}")
print(f"   Successful predictions: {campaign_df['predicted_success'].notna().sum()}")
if campaign_df['actual_success'].notna().sum() > 0:
    valid = campaign_df['actual_success'].notna() & campaign_df['predicted_success'].notna()
    if valid.sum() > 0:
        print(f"   Accuracy: {accuracy_score(campaign_df.loc[valid, 'actual_success'], campaign_df.loc[valid, 'predicted_success']):.4f}")

print("\n" + "="*80)
print("Testing complete! All results exported to CSV files.")
print("="*80)


TESTING SUMMARY

1. Stock Forecaster (LSTM):
   Total samples: 238
   Successful predictions: 238

2. Revenue Predictor (XGBoost):
   Total samples: 879
   Successful predictions: 879
   MSE: 3510913.61
   MAE: 1087.91

3. Customer Churn Predictor (Random Forest):
   Total samples: 575
   Successful predictions: 575
   Accuracy: 0.9948

4. Campaign Success Predictor (Random Forest Classifier):
   Total samples: 129
   Successful predictions: 129
   Accuracy: 0.7984

Testing complete! All results exported to CSV files.
