# Telecom Customer Churn Prediction - Model Deployment

This notebook demonstrates how to load trained models and use them for prediction on new data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import joblib

# Add the scripts directory to the path
sys.path.append('../scripts')

# Import our modules
from base_model import BaseModel
from gradient_boosting import XGBoostModel, LightGBMModel
from neural_network import NeuralNetworkModel
from training_pipeline import ModelTrainer
from utils import align_features

# Set up plotting
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

## 1. Load the Data

In [None]:
# Load the preprocessed data
X_train = pd.read_csv('../data/processed/X_train.csv')
y_train = pd.read_csv('../data/processed/y_train.csv', squeeze=True)
X_val = pd.read_csv('../data/processed/X_val.csv')
y_val = pd.read_csv('../data/processed/y_val.csv', squeeze=True)
X_holdout = pd.read_csv('../data/processed/X_holdout.csv')
y_holdout = pd.read_csv('../data/processed/y_holdout.csv', squeeze=True)

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Holdout set: {X_holdout.shape}")

## 2. Load the Best Model

Based on our previous evaluation, we'll load the best performing model.

In [None]:
# Define the model path - update this with your best model
model_path = '../models/XGBoost_Churn_Predictor'

# Load the model
best_model = XGBoostModel.load_model(model_path)
print(f"Loaded model: {best_model.model_name}")

## 3. Align Features for Prediction

Before making predictions, we need to ensure that the features in our holdout set match those used during training.

In [None]:
# Check for feature differences
train_features = set(X_train.columns)
holdout_features = set(X_holdout.columns)

print(f"Number of features in training set: {len(train_features)}")
print(f"Number of features in holdout set: {len(holdout_features)}")
print(f"Features in training but not in holdout: {train_features - holdout_features}")
print(f"Features in holdout but not in training: {holdout_features - train_features}")

In [None]:
# Align the holdout features with the training features
X_holdout_aligned = align_features(X_train, X_holdout)

# Verify the alignment
print(f"Aligned holdout set shape: {X_holdout_aligned.shape}")
print(f"Features match training set: {list(X_train.columns) == list(X_holdout_aligned.columns)}")

## 4. Make Predictions on Holdout Set

In [None]:
# Get the optimal threshold from previous evaluation
# You can load this from the saved model history or set it manually
optimal_threshold = 0.38  # Update this with your optimal threshold

# Make predictions
y_pred_proba = best_model.predict_proba(X_holdout_aligned)[:, 1]
y_pred = (y_pred_proba >= optimal_threshold).astype(int)

# Create a prediction dataframe
predictions_df = pd.DataFrame({
    'customer_id': X_holdout_aligned.index if 'CustomerID' not in X_holdout_aligned.columns else X_holdout_aligned['CustomerID'],
    'churn_probability': y_pred_proba,
    'predicted_churn': y_pred
})

# Display the first few predictions
predictions_df.head()

## 5. Evaluate Predictions on Holdout Set

If we have the ground truth for the holdout set, we can evaluate our predictions.

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

if y_holdout is not None:
    # Calculate metrics
    print("Classification Report:")
    print(classification_report(y_holdout, y_pred))
    
    # Calculate AUC
    auc = roc_auc_score(y_holdout, y_pred_proba)
    print(f"AUC: {auc:.4f}")
    
    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    cm = confusion_matrix(y_holdout, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=['No Churn', 'Churn'],
                yticklabels=['No Churn', 'Churn'])
    plt.title(f"Confusion Matrix (Threshold={optimal_threshold:.2f})")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()
else:
    print("No ground truth available for holdout set.")

## 6. Identify High-Risk Customers

Let's identify customers with the highest churn probability.

In [None]:
# Sort by churn probability in descending order
high_risk_customers = predictions_df.sort_values('churn_probability', ascending=False).head(20)
high_risk_customers

## 7. Feature Importance Analysis

Let's examine which features are most important for predicting churn.

In [None]:
# Plot feature importance
if hasattr(best_model, 'plot_feature_importance'):
    importance_df = best_model.plot_feature_importance(X_train, y_train, top_n=20)
    importance_df.head(20)

## 8. Save Predictions

Finally, let's save our predictions for further analysis or deployment.

In [None]:
# Save predictions to CSV
predictions_df.to_csv('../data/predictions/holdout_predictions.csv', index=False)
print("Predictions saved to '../data/predictions/holdout_predictions.csv'")