In [None]:
# Load recommender model
with open('../models/incentive_recommender.pkl', 'rb') as f:
    xgb_model = pickle.load(f)
with open('../models/incentive_label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

# Create synthetic test data for incentives
incentive_types = label_encoder.classes_
y_test_incentive = np.random.choice(incentive_types, size=len(X_test))
y_test_encoded = label_encoder.transform(y_test_incentive)

# Generate predictions
y_pred_incentive = xgb_model.predict(X_test)
y_pred_proba_incentive = xgb_model.predict_proba(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test_encoded, y_pred_incentive)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_encoded, y_pred_incentive,
                          target_names=incentive_types))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test_encoded, y_pred_incentive)
fig = px.imshow(conf_matrix,
                labels=dict(x="Predicted", y="Actual"),
                x=incentive_types,
                y=incentive_types,
                text=conf_matrix,
                color_continuous_scale='RdBu',
                title='Confusion Matrix - Incentive Recommendation')
fig.update_xaxes(tickangle=45)
fig.show()

# Analyze feature importance
importance_df = pd.DataFrame({
    'feature': X_test.columns,
    'importance': xgb_model.feature_importances_
}).sort_values('importance', ascending=False)

fig = px.bar(importance_df.head(10),
             x='importance',
             y='feature',
             title='Top 10 Features for Incentive Recommendation',
             orientation='h')
fig.show()

## 3. Incentive Recommendation Evaluation

In [None]:
# Load clustering model
with open('../models/kmeans_model.pkl', 'rb') as f:
    kmeans_model = pickle.load(f)
with open('../models/persona_profiles.pkl', 'rb') as f:
    persona_profiles = pickle.load(f)

# Calculate silhouette score
silhouette_avg = silhouette_score(X_test, kmeans_model.predict(X_test))
print(f"Silhouette Score: {silhouette_avg:.4f}")

# Analyze cluster characteristics
cluster_labels = kmeans_model.predict(X_test)
cluster_sizes = pd.Series(cluster_labels).value_counts()

print("\nCluster Sizes:")
for cluster, size in cluster_sizes.items():
    print(f"Cluster {cluster}: {size} samples ({size/len(cluster_labels)*100:.2f}%)")
    print(f"Profile: {persona_profiles[cluster]['description']}\n")

# Visualize clusters in 2D
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_test)

fig = px.scatter(
    x=X_pca[:, 0], y=X_pca[:, 1],
    color=cluster_labels,
    title='Customer Segments Visualization (PCA)',
    labels={'x': 'First Principal Component', 'y': 'Second Principal Component'}
)
fig.show()

## 2. Customer Segmentation Evaluation

In [None]:
# Load data and model
X_test = pd.read_csv('../data/processed/X_test.csv')
y_test = pd.read_csv('../data/processed/y_test.csv')

with open('../models/purchase_predictor_catboost.pkl', 'rb') as f:
    catboost_model = pickle.load(f)

# Generate predictions
y_pred = catboost_model.predict(X_test)
y_pred_proba = catboost_model.predict_proba(X_test)[:, 1]

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
auc_score = roc_auc_score(y_test, y_pred_proba)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"ROC-AUC Score: {auc_score:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot confusion matrix
fig = px.imshow(conf_matrix,
                labels=dict(x="Predicted", y="Actual"),
                x=['No Purchase', 'Purchase'],
                y=['No Purchase', 'Purchase'],
                text=conf_matrix,
                color_continuous_scale='RdBu',
                title='Confusion Matrix - Purchase Prediction')
fig.show()

# Generate SHAP values
explainer = shap.TreeExplainer(catboost_model)
shap_values = explainer.shap_values(X_test)

# Plot SHAP summary
shap.summary_plot(shap_values, X_test)

## 1. Purchase Prediction Model Evaluation

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, roc_auc_score, confusion_matrix,
    classification_report, silhouette_score
)
import plotly.express as px
import plotly.graph_objects as go
import pickle
import shap

# 📊 Model Evaluation - Online Shopper Intention

This notebook evaluates the performance of our three models:
1. Purchase Prediction: Accuracy, ROC-AUC, and Feature Importance
2. Customer Segmentation: Silhouette Score and Cluster Analysis
3. Incentive Recommendation: Multi-class Metrics and Confusion Matrix