In [1]:
# Essential imports for data science and machine learning
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# PyTorch for deep learning
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import (classification_report, confusion_matrix,
                           roc_auc_score, roc_curve, precision_recall_curve,
                           f1_score, precision_score, recall_score)
from sklearn.utils.class_weight import compute_class_weight

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

# Configure visualization
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12
sns.set_palette("husl")

# Check device availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🚀 Environment Setup:")
print(f"   Device: {device}")
print(f"   PyTorch Version: {torch.__version__}")
print(f"   Pandas Version: {pd.__version__}")
print(f"   NumPy Version: {np.__version__}")

if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

print("\n✅ All libraries imported successfully!")
print("📊 Ready to begin customer churn analysis...")

🚀 Environment Setup:
   Device: cpu
   PyTorch Version: 2.8.0+cpu
   Pandas Version: 2.3.3
   NumPy Version: 2.3.3

✅ All libraries imported successfully!
📊 Ready to begin customer churn analysis...


In [2]:
# Redefining Model Architecture to perform serialization

class ChurnPredictor(nn.Module):
    """
    Deep Neural Network for Customer Churn Prediction

    Architecture:
    - Input: All engineered features
    - Hidden layers with batch normalization and dropout
    - Output: Binary classification (churn probability)
    """

    def __init__(self, input_size, hidden_sizes=[256, 128, 64], dropout_rate=0.2):
        super(ChurnPredictor, self).__init__()

        # Build dynamic architecture
        layers = []
        prev_size = input_size

        for i, hidden_size in enumerate(hidden_sizes):
            # Linear layer
            layers.append(nn.Linear(prev_size, hidden_size))
            # Batch normalization
            layers.append(nn.BatchNorm1d(hidden_size))
            # Activation
            layers.append(nn.ReLU())
            # Dropout
            layers.append(nn.Dropout(dropout_rate))
            prev_size = hidden_size

        # Output layer
        layers.append(nn.Linear(prev_size, 1))
        # layers.append(nn.Sigmoid())  # Sigmoid will be applied in the loss function (BCEWithLogitsLoss)

        self.network = nn.Sequential(*layers)

        # Initialize weights
        self._initialize_weights()

    def _initialize_weights(self):
        """Initialize weights using Xavier/Glorot initialization"""
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                nn.init.constant_(module.bias, 0)

    def forward(self, x):
        return self.network(x)

In [10]:
import pickle
import torch

# 1. Load preprocessing pipeline
with open('../outputs/preprocessing_pipeline.pkl', 'rb') as f:
    pipeline_objects = pickle.load(f)
    
# Extract all preprocessing objects
scaler = pipeline_objects['scaler']
label_encoders = pipeline_objects['label_encoders']
feature_columns = pipeline_objects['feature_columns']
categorical_features = pipeline_objects['categorical_features']
numerical_features = pipeline_objects['numerical_features']
binary_features = pipeline_objects['binary_features']
class_weight_dict = pipeline_objects['class_weight_dict']

# Save preprocessing objects for later use
preprocessing_objects = {
    'scaler': scaler,
    'label_encoders': label_encoders,
    'feature_columns': feature_columns,
    'class_weights': class_weight_dict
}

# 2. Load PyTorch tensors
with open('../outputs/tensors/tensor_data.pkl', 'rb') as f:
    tensor_objects = pickle.load(f)
    
# Extract all tensors
X_train_tensor = tensor_objects['X_train_tensor']
X_test_tensor = tensor_objects['X_test_tensor']
y_train_tensor = tensor_objects['y_train_tensor']
y_test_tensor = tensor_objects['y_test_tensor']

# 3. Load trained model weights (if needed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#4. Load Model Result Metdatda
with open('../outputs/model_metadata.pkl', 'rb') as f:
    metadata_object = pickle.load(f)

roc_auc_test = metadata_object['auc_score']
test_f1 = metadata_object['f1_score']
test_precision = metadata_object['precision']
test_recall = metadata_object['recall']
optimal_threshold = metadata_object['optimal_threshold']

In [11]:
# Get input size from our feature matrix
input_size = X_train_tensor.shape[1]
print(f"   ✅ Input size: {input_size} features")

# Create model instance
model = ChurnPredictor(
    input_size=input_size,
    hidden_sizes=[256, 128, 64],  # Progressive reduction
    dropout_rate=0.2
).to(device)

print(f"   ✅ Model created and moved to {device}")
print(f"   ✅ Total parameters: {sum(p.numel() for p in model.parameters()):,}")

# Model summary
print(f"\n📋 MODEL ARCHITECTURE:")
print(model)

   ✅ Input size: 41 features
   ✅ Model created and moved to cpu
   ✅ Total parameters: 52,865

📋 MODEL ARCHITECTURE:
ChurnPredictor(
  (network): Sequential(
    (0): Linear(in_features=41, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=128, out_features=64, bias=True)
    (9): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=64, out_features=1, bias=True)
  )
)


In [12]:
model.load_state_dict(torch.load('../outputs/best_churn_model.pth'))

<All keys matched successfully>

In [19]:
X_train = pd.read_csv("../data/preprocessed/X_train.csv")
y_train = pd.read_csv("../data/preprocessed/y_train.csv")
y_test = pd.read_csv("../data/preprocessed/y_test.csv")
y_val_split = pd.read_csv("../data/val-split/y_val_split.csv")

with open('../outputs/tensors/train_validation_split_tensors.pkl', 'rb') as f:
    tensor_objects = pickle.load(f)
    
# Extract all tensors
X_train_split_tensor = tensor_objects['X_train_split_tensor']
X_val_split_tensor = tensor_objects['X_val_split_tensor']
y_train_split_tensor = tensor_objects['y_train_split_tensor']
y_val_split_tensor = tensor_objects['y_val_split_tensor']


In [14]:
# Model Serialization Implementation
import torch
import json
import os
from datetime import datetime

def serialize_model(model, preprocessing_objects, metadata, base_path="../outputs/model-artifacts"):
    """
    Comprehensive model serialization for production deployment

    Args:
        model (nn.Module): Trained PyTorch model
        preprocessing_objects (dict): Dictionary of preprocessing components
        metadata (dict): Model metadata and performance metrics
        base_path (str): Directory to save model artifacts

    Returns:
        dict: Paths to all serialized components
    """
    # Create model versioning with timestamp and performance
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    model_version = f"churn_model_v{timestamp}"

    if metadata.get('auc_score'):
        model_version += f"_auc{metadata['auc_score']:.3f}"

    # Create directory structure
    model_dir = os.path.join(base_path, model_version)
    os.makedirs(model_dir, exist_ok=True)

    # Save paths
    paths = {
        'base_dir': model_dir,
        'model': os.path.join(model_dir, "model.pt"),
        'model_torchscript': os.path.join(model_dir, "model_optimized.pt"),
        'preprocessing': os.path.join(model_dir, "preprocessing.pkl"),
        'metadata': os.path.join(model_dir, "metadata.json"),
        'version': model_version
    }

    # 1. Serialize model weights
    print(f"📦 Serializing model state dictionary...")
    torch.save(model.state_dict(), paths['model'])

    # 2. Create TorchScript version for optimized inference
    print(f"🚀 Creating TorchScript optimized model...")
    try:
        # Create example input for tracing
        example_input = torch.zeros((1, model.network[0].in_features),
                                   dtype=torch.float32).to(next(model.parameters()).device)

        # Use tracing to create TorchScript model
        model.eval()  # Set to evaluation mode
        traced_model = torch.jit.trace(model, example_input)
        torch.jit.save(traced_model, paths['model_torchscript'])
        print(f"   ✅ TorchScript model created successfully")
    except Exception as e:
        print(f"   ⚠️ TorchScript conversion failed: {str(e)}")
        paths['model_torchscript'] = None

    # 3. Save preprocessing components
    print(f"🔧 Saving preprocessing components...")
    import pickle
    with open(paths['preprocessing'], 'wb') as f:
        pickle.dump(preprocessing_objects, f)

    # 4. Save metadata with extended information
    print(f"📝 Saving model metadata...")

    # Add additional metadata
    metadata.update({
        'serialization_timestamp': timestamp,
        'model_version': model_version,
        'pytorch_version': torch.__version__,
        'input_features': model.network[0].in_features,
        'architecture': [str(layer) for layer in model.network],
        'device': str(next(model.parameters()).device)
    })

    # Save as JSON
    with open(paths['metadata'], 'w') as f:
        json.dump(metadata, f, indent=2)

    print(f"✅ Model serialization complete!")
    print(f"📁 Model artifacts saved to {model_dir}")

    return paths

In [20]:
# Example usage with our trained model
model_metadata = {
    'training_date': datetime.now().strftime("%Y-%m-%d"),
    'auc_score': roc_auc_test,
    'f1_score': test_f1,
    'precision': test_precision,
    'recall': test_recall,
    'optimal_threshold': optimal_threshold,
    'training_epochs': 500, #hard-coded for now
    'training_dataset_size': len(X_train),
    'class_distribution': {
        'churn': int(y_train.sum()),
        'no_churn': int(len(y_train) - y_train.sum())
    },
    'feature_count': X_train.shape[1],
    'hyperparameters': {
        'hidden_layers': [256, 128, 64],
        'dropout_rate': 0.2,
        'learning_rate': 0.0001,
        'weight_decay': 1e-5
    }
}

# Serialize all model components
model_paths = serialize_model(model, preprocessing_objects, model_metadata)

# Document the serialized artifacts
print("\n📋 Serialized Model Components:")
for key, path in model_paths.items():
    if path:
        print(f"   • {key}: {path}")

# Demonstrate model loading (partial implementation)
print("\n🔄 Model Loading Demonstration:")

def load_model(model_dir):
    """Basic model loading implementation"""
    # Load metadata
    with open(os.path.join(model_dir, "metadata.json"), 'r') as f:
        metadata = json.load(f)

    print(f"   📊 Loaded model: {metadata['model_version']}")
    print(f"   📈 Performance: AUC={metadata['auc_score']:.3f}, F1={metadata['f1_score']:.3f}")
    print(f"   🎯 Optimal threshold: {metadata['optimal_threshold']:.3f}")

    # In production: would continue by loading model weights and preprocessing

# Demonstrate loading
load_model(model_paths['base_dir'])

📦 Serializing model state dictionary...
🚀 Creating TorchScript optimized model...
   ✅ TorchScript model created successfully
🔧 Saving preprocessing components...
📝 Saving model metadata...
✅ Model serialization complete!
📁 Model artifacts saved to ../outputs/model-artifacts\churn_model_v20251006_001514_auc0.817

📋 Serialized Model Components:
   • base_dir: ../outputs/model-artifacts\churn_model_v20251006_001514_auc0.817
   • model: ../outputs/model-artifacts\churn_model_v20251006_001514_auc0.817\model.pt
   • model_torchscript: ../outputs/model-artifacts\churn_model_v20251006_001514_auc0.817\model_optimized.pt
   • preprocessing: ../outputs/model-artifacts\churn_model_v20251006_001514_auc0.817\preprocessing.pkl
   • metadata: ../outputs/model-artifacts\churn_model_v20251006_001514_auc0.817\metadata.json
   • version: churn_model_v20251006_001514_auc0.817

🔄 Model Loading Demonstration:
   📊 Loaded model: churn_model_v20251006_001514_auc0.817
   📈 Performance: AUC=0.817, F1=0.608
   🎯