In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tensorflow as tf


keras = tf.keras
from keras import layers, callbacks
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping,ReduceLROnPlateau, ModelCheckpoint

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report,
    roc_curve, precision_recall_curve
)
import pickle
import warnings
warnings.filterwarnings('ignore')

#random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

os.makedirs('results/figures', exist_ok=True)
os.makedirs('models', exist_ok=True)



In [5]:
#neural network classifier
print("Neural network classifier")
print("="*60)

#load preprocessed data
print("Step 1: Loading preprocesses data")
print("="*60)

with open('models/preprocessed_data.pkl','rb') as f:
    preprocessed_data= pickle.load(f)

feature_names = preprocessed_data['feature_names']
X_train = preprocessed_data['X_train']
X_test = preprocessed_data['X_test']
y_train = preprocessed_data['y_train']
y_test= preprocessed_data['y_test']

print("Data loaded successfully.")
print(f"Training data samples: {X_train.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")
print(f"Features: {X_train.shape[1]}")
print(f"Class distribution (train): {np.bincount(y_train)}")
print(f"Class distribution (test): {np.bincount(y_test)}")

input_dimension = X_train.shape[1]
print(f"\nInput dimension for neural network: {input_dimension}")


Neural network classifier
Step 1: Loading preprocesses data
Data loaded successfully.
Training data samples: 8278
Test samples: 1409
Features: 19
Class distribution (train): [4139 4139]
Class distribution (test): [1035  374]

Input dimension for neural network: 19


In [8]:
#define neural network architecture
print("Step 2: Neural network architecture design")
print("="*60)

def create_nn_model(hidden_layers=[64,32], dropout_rate=0.3, learning_rate=0.001):
    """
    create a nural network model with specified architecture
    Parameters:
    :param hidden_layers: list of integers [neurons in each hidden layer]
    :param dropout_rate: dropout rate for regularization
    :param learning_rate: learning rate for optimizer
    """
    model = Sequential(name='ChurnPredictionNN')

    #Input layer
    model.add(Dense(hidden_layers[0], activation='relu', input_dim=input_dimension))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))

    #hidden layers
    for i, units in enumerate(hidden_layers[1:], start=2):
        model.add(Dense(units, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(dropout_rate))

    #output layer
    model.add(Dense(1, activation='sigmoid', name='output_layer'))

    #compile model
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=['accuracy',
                 keras.metrics.Precision(),
                 keras.metrics.Recall(),
                 keras.metrics.AUC()]
    )

    return model



Step 2: Neural network architecture design


In [10]:
#baseline model
print("Step 3: Baseline neural network model")
print("="*60)

print("Architecture: [64, 32] neurons, dropout=0.3, learning_rate=0.001")

#create baseline model
bs_model = create_nn_model(hidden_layers=[64,32], dropout_rate=0.3, learning_rate=0.001)

#display model architecture
print("\nModel architecture:")
bs_model.summary()

#define callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True,
    verbose=1
)

#train baseline
print("\nTraining baseline model")
print("="*60)

history_baseline = bs_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)
print("\nBaseline model training completed")

#make prediction
y_pred_prob_baseline = bs_model.predict(X_test, verbose=0).flatten()
y_pred_baseline = (y_pred_prob_baseline > 0.5).astype(int)

#evaluate baseline model
print("Baseline model performance")
print("="*60)

bs_accuracy= accuracy_score(y_test, y_pred_baseline) #percentage of correct predictions

bs_precision = precision_score(y_test, y_pred_baseline) #Of all predicted "Churn" customers, how many actually churned?

bs_recall = recall_score(y_test, y_pred_baseline)#Of all actual churners, how many did the model detect?

bs_f1 = f1_score(y_test, y_pred_baseline) #Harmonic mean of Precision & Recall

bs_roc_auc = roc_auc_score(y_test, y_pred_prob_baseline)
#Measures overall ranking ability of the model: [0.5 - random guessing, 1- perfect]

print(f"Accuracy: {bs_accuracy:.4f}")
print(f"Precision: {bs_precision:.4f}")
print(f"Recall: {bs_recall:.4f}")
print(f"F1-Score: {bs_f1:.4f}")
print(f"ROC-AUC: {bs_roc_auc:.4f}")

print("\nClassification report:")
print(classification_report(y_test, y_pred_baseline, target_names=['No Churn', 'Churn']))

#confusion matrix
cm = confusion_matrix(y_test, y_pred_baseline)
print("\nConfusion Matrix")
print(cm)



Step 3: Baseline neural network model
Architecture: [64, 32] neurons, dropout=0.3, learning_rate=0.001

Model architecture:



Training baseline model
Epoch 1/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.6927 - auc_1: 0.7483 - loss: 0.6192 - precision_1: 0.5776 - recall_1: 0.6714 - val_accuracy: 0.7391 - val_auc_1: 0.0000e+00 - val_loss: 0.5817 - val_precision_1: 1.0000 - val_recall_1: 0.7391
Epoch 2/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7452 - auc_1: 0.8103 - loss: 0.5204 - precision_1: 0.6582 - recall_1: 0.6669 - val_accuracy: 0.7434 - val_auc_1: 0.0000e+00 - val_loss: 0.5612 - val_precision_1: 1.0000 - val_recall_1: 0.7434
Epoch 3/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7496 - auc_1: 0.8177 - loss: 0.5077 - precision_1: 0.6672 - recall_1: 0.6629 - val_accuracy: 0.7560 - val_auc_1: 0.0000e+00 - val_loss: 0.5523 - val_precision_1: 1.0000 - val_recall_1: 0.7560
Epoch 4/100
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accurac

In [11]:
#hyperparameter tuning
print("Step 4: Hyperparameter tuning")
print("="*60)

#define configs to test
configs = [
    {'name': 'Config 1: Small Network', 'layers': [32, 16], 'dropout': 0.2, 'batch_size': 32, 'lr': 0.001},
    {'name': 'Config 2: Medium Network', 'layers': [64, 32], 'dropout': 0.3, 'batch_size': 32, 'lr': 0.001},
    {'name': 'Config 3: Large Network', 'layers': [128, 64, 32], 'dropout': 0.3, 'batch_size': 32, 'lr': 0.001},
    {'name': 'Config 4: Deep Network', 'layers': [64, 64, 32, 16], 'dropout': 0.4, 'batch_size': 64, 'lr': 0.0005},
    {'name': 'Config 5: Wide Network', 'layers': [256, 128], 'dropout': 0.4, 'batch_size': 64, 'lr': 0.001},
]

print(f"\nTotal configurations to test: {len(configs)}")
print("="*60)

best_score = 0
best_config = None
best_model = None
best_history =None
config_results = []

for idx, config in enumerate(configs,1):
    print(f"\n[{idx}/{len(configs)}] Testing: {config['name']}")
    print(f"Layers: {config['layers']}")
    print(f"Dropout: {config['dropout']}")
    print(f"Batch Size: {config['batch_size']}")
    print(f"Learning Rate: {config['lr']}")

    #create model
    model = create_nn_model(
        hidden_layers=config['layers'],
        dropout_rate=config['dropout'],
        learning_rate=config['lr']
    )

    #define callbacks
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=0
    )

    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=0.00001,
        verbose=0
    )

    #train model
    history = model.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=100,
        batch_size=config['batch_size'],
        callbacks=[early_stop, reduce_lr],
        verbose=0
    )

    #evaluate on validation set
    val_loss, val_accuracy, val_precision, val_recall, val_auc = model.evaluate(
        X_train[int(0.8*len(X_train)):],
        y_train[int(0.8*len(y_train)):],
        verbose=0
    )

    # Calculate F1 score on validation set
    val_pred_proba = model.predict(X_train[int(0.8*len(X_train)):], verbose=0).flatten()
    val_pred = (val_pred_proba > 0.5).astype(int)
    val_f1 = f1_score(y_train[int(0.8*len(y_train)):], val_pred)

    print(f"Validation F1-Score: {val_f1:.4f}")
    print(f"Validation Loss: {val_loss:.4f}")
    print(f"Epochs trained: {len(history.history['loss'])}")

    # Store results
    config_results.append({
        'config': config['name'],
        'f1_score': val_f1,
        'loss': val_loss,
        'accuracy': val_accuracy,
        'auc': val_auc
    })
    # Check if this is the best model
    if val_f1 > best_score:
        best_score = val_f1
        best_config = config
        best_model = model
        best_history = history
        print(f"New best model. F1-Score: {val_f1:.4f}")

print("Hyperparameter tuning results")
print("="*60)

results_df = pd.DataFrame(config_results).sort_values('f1_score', ascending=False)
print("\nAll Configurations (sorted by F1-Score):")
print(results_df.to_string(index=False))

print("Best configuration")
print("="*60)
print(f"Configuration: {best_config['name']}")
print(f"Layers: {best_config['layers']}")
print(f"Dropout: {best_config['dropout']}")
print(f"Batch Size: {best_config['batch_size']}")
print(f"Learning Rate: {best_config['lr']}")
print(f"Best Validation F1-Score: {best_score:.4f}")

Step 4: Hyperparameter tuning

Total configurations to test: 5

[1/5] Testing: Config 1: Small Network
Layers: [32, 16]
Dropout: 0.2
Batch Size: 32
Learning Rate: 0.001
Validation F1-Score: 0.8528
Validation Loss: 0.5494
Epochs trained: 80
New best model. F1-Score: 0.8528

[2/5] Testing: Config 2: Medium Network
Layers: [64, 32]
Dropout: 0.3
Batch Size: 32
Learning Rate: 0.001
Validation F1-Score: 0.8867
Validation Loss: 0.4814
Epochs trained: 48
New best model. F1-Score: 0.8867

[3/5] Testing: Config 3: Large Network
Layers: [128, 64, 32]
Dropout: 0.3
Batch Size: 32
Learning Rate: 0.001
Validation F1-Score: 0.8727
Validation Loss: 0.5013
Epochs trained: 18

[4/5] Testing: Config 4: Deep Network
Layers: [64, 64, 32, 16]
Dropout: 0.4
Batch Size: 64
Learning Rate: 0.0005
Validation F1-Score: 0.8500
Validation Loss: 0.6017
Epochs trained: 36

[5/5] Testing: Config 5: Wide Network
Layers: [256, 128]
Dropout: 0.4
Batch Size: 64
Learning Rate: 0.001
Validation F1-Score: 0.9155
Validation Los

In [13]:
#train optimized model
print("Step 5: Training final optimized model")
print("="*60)

#create final model with best config
nn_optimized = create_nn_model(
    hidden_layers=best_config['layers'],
    dropout_rate=best_config['dropout'],
    learning_rate=best_config['lr']
)

#define callbacks for final training
early_stop_final = EarlyStopping(
    monitor='val_loss',
    patience=20,
    restore_best_weights=True,
    verbose=1
)

reduce_lr_final = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=7,
    min_lr=0.00001,
    verbose=1
)

checkpoint = ModelCheckpoint(
    'models/neural_network_best.keras',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

#train
print("\nTraining optimized model")
print("="*60)

history_optimized = nn_optimized.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=150,
    batch_size=best_config['batch_size'],
    callbacks=[early_stop_final, reduce_lr_final, checkpoint],
    verbose=1
)

print("\nOptimized model training completed")
print(f"Total epochs: {len(history_optimized.history['loss'])}")

Step 5: Training final optimized model

Training optimized model
Epoch 1/150
[1m 86/104[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 2ms/step - accuracy: 0.6607 - auc_8: 0.7169 - loss: 0.7483 - precision_8: 0.5328 - recall_8: 0.6713
Epoch 1: val_loss improved from None to 0.89233, saving model to models/neural_network_best.keras
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.7101 - auc_8: 0.7726 - loss: 0.6336 - precision_8: 0.5977 - recall_8: 0.6935 - val_accuracy: 0.2766 - val_auc_8: 0.0000e+00 - val_loss: 0.8923 - val_precision_8: 1.0000 - val_recall_8: 0.2766 - learning_rate: 0.0010
Epoch 2/150
[1m 87/104[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.7396 - auc_8: 0.8059 - loss: 0.5419 - precision_8: 0.6427 - recall_8: 0.6654
Epoch 2: val_loss improved from 0.89233 to 0.79535, saving model to models/neural_network_best.keras
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step 

In [14]:
#evaluate optimized model
print("Step 6: Optimized model evaluation")
y_pred_prob_opt= nn_optimized.predict(X_test, verbose=0).flatten()
y_pred_opt = (y_pred_prob_opt>0.5).astype(int)

optimized_accuracy = accuracy_score(y_test, y_pred_opt)
optimized_precision = precision_score(y_test, y_pred_opt)
optimized_recall = recall_score(y_test, y_pred_opt)
optimized_f1 = f1_score(y_test, y_pred_opt)
optimized_roc_auc = roc_auc_score(y_test, y_pred_prob_opt)

print("Optimized model performance")
print("="*60)

print(f"Accuracy:  {optimized_accuracy:.4f}")
print(f"Precision: {optimized_precision:.4f}")
print(f"Recall:    {optimized_recall:.4f}")
print(f"F1-Score:  {optimized_f1:.4f}")
print(f"ROC-AUC:   {optimized_roc_auc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_opt, target_names=['No Churn', 'Churn']))

cm_opt = confusion_matrix(y_test, y_pred_opt)
print("\nConfusion Matrix:")
print(cm_opt)
print(f"\nTrue Negatives: {cm_opt[0,0]}")
print(f"False Positives: {cm_opt[0,1]}")
print(f"False Negatives: {cm_opt[1,0]}")
print(f"True Positives: {cm_opt[1,1]}")


Step 6: Optimized model evaluation
Optimized model performance
Accuracy:  0.7757
Precision: 0.5729
Recall:    0.6096
F1-Score:  0.5907
ROC-AUC:   0.8090

Classification Report:
              precision    recall  f1-score   support

    No Churn       0.86      0.84      0.85      1035
       Churn       0.57      0.61      0.59       374

    accuracy                           0.78      1409
   macro avg       0.71      0.72      0.72      1409
weighted avg       0.78      0.78      0.78      1409


Confusion Matrix:
[[865 170]
 [146 228]]

True Negatives: 865
False Positives: 170
False Negatives: 146
True Positives: 228


In [15]:
#compare baseline and optimized
print("Step 6: Baseline VS Optimized comparison")
print("="*60)

comparison_df= pd.DataFrame({
    'Metric':['Accuracy', 'Precision','Recall','F1-Score','ROC-AUC'],
    'Baseline':[bs_accuracy,bs_precision,bs_recall,bs_f1,bs_roc_auc],
    'Optimized': [optimized_accuracy,optimized_precision, optimized_recall, optimized_f1, optimized_roc_auc]
})

comparison_df['Improvement'] = ((comparison_df['Optimized'] - comparison_df['Baseline']) / comparison_df['Baseline']*100).round(2)

#compare baseline and optimized

print("Step 6: Baseline VS Optimized comparison")
print("=" * 60)

comparison_df = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC'],
    'Baseline': [bs_accuracy, bs_precision, bs_recall, bs_f1, bs_roc_auc],
    'Optimized': [optimized_accuracy, optimized_precision, optimized_recall, optimized_f1, optimized_roc_auc]
})

comparison_df['Improvement'] = (
            (comparison_df['Optimized'] - comparison_df['Baseline']) / comparison_df['Baseline'] * 100).round(2)

print("\nPerformance Comparison:")
print(comparison_df.to_string(index=False))
print("\nImprovement Summary:")
for idx, row in comparison_df.iterrows():
    if row['Improvement'] > 0:
        print(f"{row['Metric']}: +{row['Improvement']:.2f}% improvement")
    elif row['Improvement'] < 0:
        print(f"{row['Metric']}: {row['Improvement']:.2f}% decrease")
    else:
        print(f"{row['Metric']}: No change")


Step 6: Baseline VS Optimized comparison
Step 6: Baseline VS Optimized comparison

Performance Comparison:
   Metric  Baseline  Optimized  Improvement
 Accuracy  0.765791   0.775727         1.30
Precision  0.549327   0.572864         4.28
   Recall  0.655080   0.609626        -6.94
 F1-Score  0.597561   0.590674        -1.15
  ROC-AUC  0.827559   0.808951        -2.25

Improvement Summary:
Accuracy: +1.30% improvement
Precision: +4.28% improvement
Recall: -6.94% decrease
F1-Score: -1.15% decrease
ROC-AUC: -2.25% decrease
