# TensorFlow Deep Learning Model for Customer Retention

This notebook creates and trains a deep learning model to predict customer repurchase probability:
- Load processed features
- Data preprocessing and scaling
- Neural network architecture design
- Model training with validation
- Model evaluation and metrics
- Save trained model

The trained model is saved for future predictions.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os
import json
import pickle
import warnings
warnings.filterwarnings('ignore')

# TensorFlow and ML libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.utils.class_weight import compute_class_weight

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

plt.style.use('default')
sns.set_palette('husl')

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## Load Processed Data

In [None]:
# Load processed data
processed_data_path = '../data/processed/'

# Load feature matrix and target
X = pd.read_csv(f'{processed_data_path}/X_features.csv')
y = pd.read_csv(f'{processed_data_path}/y_target.csv')['will_repurchase']

# Load feature engineering summary
with open(f'{processed_data_path}/feature_engineering_summary.json', 'r') as f:
    feature_summary = json.load(f)

print("=== PROCESSED DATA LOADED ===")
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Target distribution: {y.value_counts().to_dict()}")
print(f"Features: {len(X.columns)} total")

# Display basic statistics
print(f"\n=== FEATURE STATISTICS ===")
print(X.describe())

## Data Preprocessing and Splitting

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

print("=== DATA SPLIT ===")
print(f"Training set: {X_train.shape[0]:,} samples")
print(f"Validation set: {X_val.shape[0]:,} samples")
print(f"Test set: {X_test.shape[0]:,} samples")

# Check target distribution in each set
print(f"\nTarget distribution:")
print(f"Train: {y_train.value_counts(normalize=True).round(3).to_dict()}")
print(f"Val: {y_val.value_counts(normalize=True).round(3).to_dict()}")
print(f"Test: {y_test.value_counts(normalize=True).round(3).to_dict()}")

## Feature Scaling

In [None]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print("=== FEATURE SCALING ===")
print(f"Original feature range: [{X_train.min().min():.3f}, {X_train.max().max():.3f}]")
print(f"Scaled feature range: [{X_train_scaled.min():.3f}, {X_train_scaled.max():.3f}]")
print(f"Scaled mean: {X_train_scaled.mean():.6f}")
print(f"Scaled std: {X_train_scaled.std():.6f}")

# Convert to numpy arrays for TensorFlow
X_train_scaled = X_train_scaled.astype(np.float32)
X_val_scaled = X_val_scaled.astype(np.float32)
X_test_scaled = X_test_scaled.astype(np.float32)
y_train = y_train.values.astype(np.float32)
y_val = y_val.values.astype(np.float32)
y_test = y_test.values.astype(np.float32)

## Handle Class Imbalance

In [None]:
# Calculate class weights to handle imbalance
class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = dict(zip(np.unique(y_train), class_weights))

print("=== CLASS IMBALANCE HANDLING ===")
print(f"Class weights: {class_weight_dict}")

# Calculate positive/negative ratios
neg_count = np.sum(y_train == 0)
pos_count = np.sum(y_train == 1)
total = len(y_train)

print(f"Negative samples: {neg_count:,} ({neg_count/total*100:.1f}%)")
print(f"Positive samples: {pos_count:,} ({pos_count/total*100:.1f}%)")
print(f"Imbalance ratio: {neg_count/pos_count:.2f}:1")

## Neural Network Architecture

In [None]:
# Create neural network model
def create_retention_model(input_dim, dropout_rate=0.3, l2_reg=0.001):
    """
    Create a deep neural network for customer retention prediction
    """
    model = keras.Sequential([
        # Input layer
        layers.Dense(256, activation='relu', input_shape=(input_dim,),
                    kernel_regularizer=regularizers.l2(l2_reg),
                    name='dense_1'),
        layers.BatchNormalization(name='batch_norm_1'),
        layers.Dropout(dropout_rate, name='dropout_1'),
        
        # Hidden layer 2
        layers.Dense(128, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg),
                    name='dense_2'),
        layers.BatchNormalization(name='batch_norm_2'),
        layers.Dropout(dropout_rate, name='dropout_2'),
        
        # Hidden layer 3
        layers.Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg),
                    name='dense_3'),
        layers.BatchNormalization(name='batch_norm_3'),
        layers.Dropout(dropout_rate/2, name='dropout_3'),
        
        # Hidden layer 4
        layers.Dense(32, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg),
                    name='dense_4'),
        layers.Dropout(dropout_rate/2, name='dropout_4'),
        
        # Output layer
        layers.Dense(1, activation='sigmoid', name='output')
    ])
    
    return model

# Create the model
input_dimension = X_train_scaled.shape[1]
model = create_retention_model(input_dimension)

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc')
    ]
)

# Display model architecture
print("=== NEURAL NETWORK ARCHITECTURE ===")
model.summary()

# Visualize model architecture
keras.utils.plot_model(model, show_shapes=True, show_layer_names=True, dpi=150)
plt.show()

## Model Training

In [None]:
# Define callbacks
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=8,
        min_lr=1e-6,
        verbose=1
    ),
    keras.callbacks.ModelCheckpoint(
        '../models/best_retention_model.h5',
        monitor='val_auc',
        save_best_only=True,
        mode='max',
        verbose=1
    )
]

# Train the model
print("=== STARTING MODEL TRAINING ===")
print(f"Training samples: {len(X_train_scaled):,}")
print(f"Validation samples: {len(X_val_scaled):,}")
print(f"Features: {input_dimension}")

history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=100,
    batch_size=64,
    class_weight=class_weight_dict,
    callbacks=callbacks,
    verbose=1
)

print("\n✅ Model training completed!")

## Training History Visualization

In [None]:
# Plot training history
def plot_training_history(history):
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Loss
    axes[0, 0].plot(history.history['loss'], label='Training Loss')
    axes[0, 0].plot(history.history['val_loss'], label='Validation Loss')
    axes[0, 0].set_title('Model Loss')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Accuracy
    axes[0, 1].plot(history.history['accuracy'], label='Training Accuracy')
    axes[0, 1].plot(history.history['val_accuracy'], label='Validation Accuracy')
    axes[0, 1].set_title('Model Accuracy')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # AUC
    axes[1, 0].plot(history.history['auc'], label='Training AUC')
    axes[1, 0].plot(history.history['val_auc'], label='Validation AUC')
    axes[1, 0].set_title('Model AUC')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('AUC')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # Learning Rate (if available)
    if 'lr' in history.history:
        axes[1, 1].plot(history.history['lr'], label='Learning Rate')
        axes[1, 1].set_title('Learning Rate Schedule')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Learning Rate')
        axes[1, 1].set_yscale('log')
        axes[1, 1].legend()
        axes[1, 1].grid(True)
    else:
        # Precision-Recall if LR not available
        axes[1, 1].plot(history.history['precision'], label='Training Precision')
        axes[1, 1].plot(history.history['val_precision'], label='Validation Precision')
        axes[1, 1].plot(history.history['recall'], label='Training Recall')
        axes[1, 1].plot(history.history['val_recall'], label='Validation Recall')
        axes[1, 1].set_title('Precision & Recall')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Score')
        axes[1, 1].legend()
        axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.show()

plot_training_history(history)

# Print training summary
best_epoch = np.argmax(history.history['val_auc'])
print(f"\n=== TRAINING SUMMARY ===")
print(f"Best epoch: {best_epoch + 1}")
print(f"Best validation AUC: {max(history.history['val_auc']):.4f}")
print(f"Best validation accuracy: {history.history['val_accuracy'][best_epoch]:.4f}")
print(f"Final training loss: {history.history['loss'][-1]:.4f}")
print(f"Final validation loss: {history.history['val_loss'][-1]:.4f}")

## Model Evaluation

In [None]:
# Make predictions on test set
y_pred_proba = model.predict(X_test_scaled)
y_pred = (y_pred_proba > 0.5).astype(int).flatten()

# Calculate metrics
test_auc = roc_auc_score(y_test, y_pred_proba)
test_accuracy = np.mean(y_pred == y_test)

print("=== MODEL EVALUATION ON TEST SET ===")
print(f"Test AUC: {test_auc:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Classification report
print("\n=== CLASSIFICATION REPORT ===")
print(classification_report(y_test, y_pred, target_names=['Will Not Repurchase', 'Will Repurchase']))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Will Not Repurchase', 'Will Repurchase'],
            yticklabels=['Will Not Repurchase', 'Will Repurchase'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {test_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.grid(True)

# Prediction distribution
plt.subplot(1, 2, 2)
plt.hist(y_pred_proba[y_test == 0], alpha=0.7, label='Will Not Repurchase', bins=30, density=True)
plt.hist(y_pred_proba[y_test == 1], alpha=0.7, label='Will Repurchase', bins=30, density=True)
plt.axvline(x=0.5, color='red', linestyle='--', label='Decision Threshold')
plt.xlabel('Predicted Probability')
plt.ylabel('Density')
plt.title('Prediction Probability Distribution')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

## Feature Importance Analysis

In [None]:
# Calculate permutation feature importance (simplified approach)
def calculate_feature_importance(model, X_test, y_test, feature_names):
    """
    Calculate feature importance using permutation method
    """
    baseline_score = roc_auc_score(y_test, model.predict(X_test))
    importances = []
    
    for i in range(X_test.shape[1]):
        # Create copy and shuffle one feature
        X_permuted = X_test.copy()
        np.random.shuffle(X_permuted[:, i])
        
        # Calculate score with permuted feature
        permuted_score = roc_auc_score(y_test, model.predict(X_permuted))
        
        # Importance is the decrease in performance
        importance = baseline_score - permuted_score
        importances.append(importance)
    
    # Create feature importance dataframe
    feature_importance = pd.DataFrame({
        'feature': feature_names,
        'importance': importances
    }).sort_values('importance', ascending=False)
    
    return feature_importance

# Calculate feature importance (on a sample for speed)
sample_size = min(1000, len(X_test_scaled))
sample_indices = np.random.choice(len(X_test_scaled), sample_size, replace=False)
X_sample = X_test_scaled[sample_indices]
y_sample = y_test[sample_indices]

feature_importance = calculate_feature_importance(model, X_sample, y_sample, X.columns)

# Plot top feature importances
plt.figure(figsize=(12, 8))
top_features = feature_importance.head(15)
colors = ['green' if x > 0 else 'red' for x in top_features['importance']]
plt.barh(range(len(top_features)), top_features['importance'], color=colors, alpha=0.7)
plt.yticks(range(len(top_features)), top_features['feature'])
plt.xlabel('Feature Importance (AUC decrease when permuted)')
plt.title('Top 15 Most Important Features for Customer Retention Prediction')
plt.axvline(x=0, color='black', linestyle='-', alpha=0.3)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

print("=== TOP 10 MOST IMPORTANT FEATURES ===")
print(feature_importance.head(10))

## Model Interpretation and Business Insights

In [None]:
# Generate business insights
print("=== BUSINESS INSIGHTS FROM MODEL ===")
print("\n🎯 KEY RETENTION DRIVERS:")

top_5_features = feature_importance.head(5)
for idx, row in top_5_features.iterrows():
    feature = row['feature']
    importance = row['importance']
    
    if 'recency' in feature.lower():
        print(f"• {feature}: Recent customer activity is crucial for retention")
    elif 'frequency' in feature.lower():
        print(f"• {feature}: Purchase frequency strongly indicates future behavior")
    elif 'monetary' in feature.lower():
        print(f"• {feature}: Customer spending level is a key retention factor")
    elif 'total_amount' in feature.lower():
        print(f"• {feature}: Historical spend directly correlates with retention")
    elif 'age' in feature.lower():
        print(f"• {feature}: Customer demographics influence repurchase behavior")
    elif 'days_since' in feature.lower():
        print(f"• {feature}: Time since last interaction is critical")
    else:
        print(f"• {feature}: Important predictor (importance: {importance:.4f})")

# Model performance summary
print(f"\n📊 MODEL PERFORMANCE SUMMARY:")
print(f"• Test AUC: {test_auc:.3f} - {'Excellent' if test_auc > 0.9 else 'Good' if test_auc > 0.8 else 'Fair' if test_auc > 0.7 else 'Needs Improvement'}")
print(f"• Test Accuracy: {test_accuracy:.3f}")
print(f"• The model can effectively distinguish between customers who will and won't repurchase")

# Prediction examples
print(f"\n🔍 PREDICTION EXAMPLES:")
sample_predictions = y_pred_proba[:10].flatten()
sample_actuals = y_test[:10]

for i, (prob, actual) in enumerate(zip(sample_predictions, sample_actuals)):
    risk_level = "High" if prob > 0.7 else "Medium" if prob > 0.3 else "Low"
    actual_text = "Repurchased" if actual == 1 else "Did not repurchase"
    print(f"Customer {i+1}: {prob:.3f} retention probability ({risk_level} retention risk) - {actual_text}")

## Save Model and Artifacts

In [None]:
# Create models directory if it doesn't exist
models_path = '../models/'
os.makedirs(models_path, exist_ok=True)

# Save the trained model
model.save(f'{models_path}/retention_model.h5')
print(f"✅ Model saved to {models_path}/retention_model.h5")

# Save the scaler
with open(f'{models_path}/scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print(f"✅ Scaler saved to {models_path}/scaler.pkl")

# Save feature importance
feature_importance.to_csv(f'{models_path}/feature_importance.csv', index=False)
print(f"✅ Feature importance saved to {models_path}/feature_importance.csv")

# Save model metrics and metadata
model_metadata = {
    'model_created': datetime.now().isoformat(),
    'model_type': 'TensorFlow Neural Network',
    'input_features': len(X.columns),
    'training_samples': len(X_train_scaled),
    'validation_samples': len(X_val_scaled),
    'test_samples': len(X_test_scaled),
    'architecture': {
        'layers': [
            {'type': 'Dense', 'units': 256, 'activation': 'relu'},
            {'type': 'Dense', 'units': 128, 'activation': 'relu'},
            {'type': 'Dense', 'units': 64, 'activation': 'relu'},
            {'type': 'Dense', 'units': 32, 'activation': 'relu'},
            {'type': 'Dense', 'units': 1, 'activation': 'sigmoid'}
        ],
        'dropout': 0.3,
        'l2_regularization': 0.001
    },
    'training_config': {
        'optimizer': 'Adam',
        'learning_rate': 0.001,
        'batch_size': 64,
        'epochs_trained': len(history.history['loss']),
        'best_epoch': int(best_epoch + 1),
        'early_stopping': True,
        'class_weights_used': True
    },
    'performance_metrics': {
        'test_auc': float(test_auc),
        'test_accuracy': float(test_accuracy),
        'best_val_auc': float(max(history.history['val_auc'])),
        'best_val_accuracy': float(history.history['val_accuracy'][best_epoch])
    },
    'feature_names': list(X.columns),
    'target_classes': ['will_not_repurchase', 'will_repurchase'],
    'class_distribution': {
        'train': y_train.tolist().count(1) / len(y_train),
        'test': y_test.tolist().count(1) / len(y_test)
    }
}

with open(f'{models_path}/model_metadata.json', 'w') as f:
    json.dump(model_metadata, f, indent=2)

print(f"✅ Model metadata saved to {models_path}/model_metadata.json")

# Save training history
history_df = pd.DataFrame(history.history)
history_df.to_csv(f'{models_path}/training_history.csv', index=False)
print(f"✅ Training history saved to {models_path}/training_history.csv")

print(f"\n🎉 Model training and evaluation completed successfully!")
print(f"📁 All artifacts saved to: {models_path}")
print(f"🚀 Model is ready for production deployment!")

## Model Usage Example

In [None]:
# Example of how to use the trained model for new predictions
print("=== MODEL USAGE EXAMPLE ===")
print("\n# Load the trained model and scaler:")
print("import tensorflow as tf")
print("import pickle")
print("import numpy as np")
print("")
print("# Load model and scaler")
print("model = tf.keras.models.load_model('../models/retention_model.h5')")
print("with open('../models/scaler.pkl', 'rb') as f:")
print("    scaler = pickle.load(f)")
print("")
print("# For new customer data:")
print("# 1. Ensure features are in the same order as training")
print("# 2. Scale features using the saved scaler")
print("# 3. Make prediction")
print("")
print("new_customer_features = scaler.transform(new_customer_data)")
print("retention_probability = model.predict(new_customer_features)[0][0]")
print("print(f'Retention probability: {retention_probability:.3f}')")

# Show actual example with test data
example_customer = X_test_scaled[0:1]  # First test customer
example_prediction = model.predict(example_customer)[0][0]
example_actual = y_test[0]

print(f"\n📝 ACTUAL EXAMPLE:")
print(f"Customer retention probability: {example_prediction:.3f}")
print(f"Actual outcome: {'Repurchased' if example_actual == 1 else 'Did not repurchase'}")
print(f"Model prediction: {'Will repurchase' if example_prediction > 0.5 else 'Will not repurchase'}")
print(f"Prediction confidence: {max(example_prediction, 1-example_prediction):.3f}")