# ANN Model Training â€“ Telecom Customer Churn

This notebook trains an Artificial Neural Network (ANN) to predict customer churn.

Steps covered:
- Load engineered dataset
- Scale features (ANN requirement)
- Build ANN architecture
- Train the model
- Evaluate performance using multiple metrics

In [1]:
# import required libraries 

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [5]:
# set the data directory and file names 

data_dir = "../data/processed/"
X_train_file = 'X_train.csv'
y_train_file = 'y_train.csv'
X_test_file = 'X_test.csv'
y_test_file = 'y_test.csv'

In [6]:
# load the files 
import os

X_train = pd.read_csv(os.path.join(data_dir, X_train_file)).values
y_train = pd.read_csv(os.path.join(data_dir, y_train_file)).values

X_test = pd.read_csv(os.path.join(data_dir, X_test_file)).values
y_test = pd.read_csv(os.path.join(data_dir, y_test_file)).values

In [7]:
# Standard scaling 

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# convert into PyTorch tensors 

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

if y_train_tensor.ndim == 1:
  y_train_tensor = y_train_tensor.unsqueeze(1)  # adds a new dimension at index 1.

# This condition will help to conver the tensor from 1D into 2D with shape (num_samples, 1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

if y_test_tensor.ndim == 1:
  y_test_tensor = y_test_tensor.unsqueeze(1)

In [15]:
# Create Dataset and DataLoader

from torch.utils.data import random_split

full_train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

# Split train into train + validation (e.g., 90% train, 10% val)
val_size = int(0.1 * len(full_train_dataset))
train_size = len(full_train_dataset) - val_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=32, shuffle=False)


#### Build Artificial Neural Network Model

In [None]:
class churnANN(nn.Module):
  def __init__(self, input_dim):
    super(churnANN, self).__init__()
    self.model = nn.Sequential(
      nn.Linear(input_dim, 64),   # fully connnected layer 
      nn.ReLU(),                  # Rectified Linear Unit
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 1),
      nn.Sigmoid()
    )
  def forward(self, x):
    return self.model(x)

In [13]:
input_dim = X_train.shape[1]
model = churnANN(input_dim)

In [14]:
# Loss and optimizer 

criterion = nn.BCELoss() # Binary Cross Entropy

optimizer = optim.Adam(model.parameters(), lr=0.001)

In [16]:
import mlflow
import mlflow.pytorch

models_dir = './models'
os.makedirs(models_dir, exist_ok=True)

num_epochs = 50
patience = 5
best_val_loss = float('inf')
counter = 0

# Start MLflow experiment
mlflow.set_experiment("Churn_ANN_Experiment")

with mlflow.start_run():
    # Log parameters
    mlflow.log_param("input_dim", input_dim)
    mlflow.log_param("epochs", num_epochs)
    mlflow.log_param("batch_size", 32)
    mlflow.log_param("learning_rate", 0.001)

    # Training loop (same as before)
    num_epochs = 50
    best_val_loss = float('inf')
    patience = 5
    counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * X_batch.size(0)
        train_loss = running_loss / len(train_loader.dataset)

        model.eval()
        val_loss_total = 0.0
        with torch.no_grad():
            for X_val, y_val in val_loader:
                val_outputs = model(X_val)
                val_loss = criterion(val_outputs, y_val)
                val_loss_total += val_loss.item() * X_val.size(0)
        val_loss_avg = val_loss_total / len(val_loader.dataset)
        
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("val_loss", val_loss_avg, step=epoch)

        if val_loss_avg < best_val_loss:
            best_val_loss = val_loss_avg
            counter = 0
            # Save model inside models directory
            torch.save(model.state_dict(), os.path.join(models_dir, 'best_churn_model.pth'))
        else:
            counter += 1
            if counter >= patience:
                model.load_state_dict(torch.load(os.path.join(models_dir, 'best_churn_model.pth')))
                print(f"Early stopping triggered at epoch {epoch+1}")
                break
    
    # Log the final model to MLflow
    mlflow.pytorch.log_model(model, "churn_ann_model")


2025/12/19 14:49:55 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/19 14:49:55 INFO mlflow.store.db.utils: Updating database tables
2025/12/19 14:49:55 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/19 14:49:55 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2025/12/19 14:49:55 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
2025/12/19 14:49:55 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
2025/12/19 14:49:55 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
2025/12/19 14:49:55 INFO alembic.runtime.migration: Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
2025/12/19 14:49:55 INFO alembic.runtime.migration: Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
2025/12/19 14:49:55 INFO alembic.runtime.migration: Running 

Early stopping triggered at epoch 43


In [18]:
torch.save(model.state_dict(), f'./models/epoch_{epoch+1}.pth')

In [20]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report

# Put model in evaluation mode
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred_class = (y_pred >= 0.5).float()

# Convert to numpy for sklearn
y_true = y_test_tensor.numpy()
y_pred_class = y_pred_class.numpy()
y_pred_prob = y_pred.numpy()

# Compute metrics
accuracy = accuracy_score(y_true, y_pred_class)
precision = precision_score(y_true, y_pred_class)
recall = recall_score(y_true, y_pred_class)
f1 = f1_score(y_true, y_pred_class)
roc_auc = roc_auc_score(y_true, y_pred_prob)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"ROC-AUC:   {roc_auc:.4f}")

# Optional: Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred_class))

# Optional: Detailed report
print("Classification Report:")
print(classification_report(y_true, y_pred_class))


Accuracy:  0.9250
Precision: 0.8052
Recall:    0.6392
F1 Score:  0.7126
ROC-AUC:   0.8850
Confusion Matrix:
[[555  15]
 [ 35  62]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.94      0.97      0.96       570
         1.0       0.81      0.64      0.71        97

    accuracy                           0.93       667
   macro avg       0.87      0.81      0.83       667
weighted avg       0.92      0.93      0.92       667

