# Set up

In [1]:
import numpy as np
import pandas as pd
import random
import torch
import torch.nn as nn

In [2]:
SEED = 1234

In [3]:
def set_seeds(seed=1234):
    """Set seeds for reproducibility."""
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # multi-GPU

In [4]:
# Set seeds for reproducibility
set_seeds(seed=SEED)

In [5]:
# Set device
cuda = True
device = torch.device("cuda" if (
    torch.cuda.is_available() and cuda) else "cpu")
torch.set_default_tensor_type("torch.FloatTensor")
if device.type == "cuda":
    torch.set_default_tensor_type("torch.cuda.FloatTensor")
print (device)

cpu


## Load data

In [6]:
import matplotlib.pyplot as plt
import pandas as pd

In [7]:
# Load data
path = '../processed_churn.csv'
df = pd.read_csv(path) # load
df = df.sample(frac=1).reset_index(drop=True) # shuffle
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,...,TechSupport_Yes,StreamingTV_No_internet_service,StreamingTV_Yes,StreamingMovies_No_internet_service,StreamingMovies_Yes,Contract_One_year,Contract_Two_year,PaymentMethod_Credit_card__automatic_,PaymentMethod_Electronic_check,PaymentMethod_Mailed_check
0,0,0,0,0,41,1,1,74.65,3090.65,0,...,1,0,0,0,1,0,0,0,0,0
1,1,0,1,1,17,1,1,66.7,1077.05,0,...,1,0,0,0,0,0,0,0,0,1
2,0,0,1,1,58,1,1,24.5,1497.9,0,...,0,1,0,1,0,1,0,1,0,0
3,1,0,0,0,1,1,1,50.45,50.45,1,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,9,1,1,44.2,403.35,1,...,0,0,0,0,0,0,0,0,0,0


In [8]:
#feature scaling
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler()

df['tenure'] = sc.fit_transform(df[['tenure']])
df['MonthlyCharges'] = sc.fit_transform(df[['MonthlyCharges']])
df['TotalCharges'] = sc.fit_transform(df[['TotalCharges']])

In [9]:
# Data shapes
X = df.drop('Churn', axis=1).values
y = df['Churn'].values
print ("X: ", np.shape(X))
print ("y: ", np.shape(y))

X:  (7043, 30)
y:  (7043,)


## Split data

In [10]:
import collections
from sklearn.model_selection import train_test_split

In [11]:
TRAIN_SIZE = 0.7
VAL_SIZE = 0.15
TEST_SIZE = 0.15

In [12]:
def train_val_test_split(X, y, train_size):
    """Split dataset into data splits."""
    X_train, X_, y_train, y_ = train_test_split(X, y, train_size=TRAIN_SIZE, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_, y_, train_size=0.5, stratify=y_)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [13]:
# Create data splits
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(
    X=X, y=y, train_size=TRAIN_SIZE)
print (f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print (f"X_val: {X_val.shape}, y_val: {y_val.shape}")
print (f"X_test: {X_test.shape}, y_test: {y_test.shape}")
print (f"Sample point: {X_train[0]} → {y_train[0]}")

X_train: (4930, 30), y_train: (4930,)
X_val: (1056, 30), y_val: (1056,)
X_test: (1057, 30), y_test: (1057,)
Sample point: [0.         0.         1.         1.         0.26388889 1.
 0.         0.01890547 0.04256866 0.         0.         0.
 1.         1.         0.         1.         0.         1.
 0.         1.         0.         1.         0.         1.
 0.         0.         1.         0.         0.         1.        ] → 0


## LabelEncoder

In [14]:
import itertools

In [15]:
class LabelEncoder(object):
    """Label encoder for tag labels."""
    def __init__(self, class_to_index={}):
        self.class_to_index = class_to_index
        self.index_to_class = {v: k for k, v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())

    def __len__(self):
        return len(self.class_to_index)

    def __str__(self):
        return f"<LabelEncoder(num_classes={len(self)})>"

    def fit(self, y):
        classes = np.unique(y)
        for i, class_ in enumerate(classes):
            self.class_to_index[class_] = i
        self.index_to_class = {v: k for k, v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())
        return self

    def encode(self, y):
        encoded = np.zeros((len(y)), dtype=int)
        for i, item in enumerate(y):
            encoded[i] = self.class_to_index[item]
        return encoded

    def decode(self, y):
        classes = []
        for i, item in enumerate(y):
            classes.append(self.index_to_class[item])
        return classes

    def save(self, fp):
        with open(fp, "w") as fp:
            contents = {'class_to_index': self.class_to_index}
            json.dump(contents, fp, indent=4, sort_keys=False)

    @classmethod
    def load(cls, fp):
        with open(fp, "r") as fp:
            kwargs = json.load(fp=fp)
        return cls(**kwargs)

In [16]:
# Encode
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
label_encoder.class_to_index

{0: 0, 1: 1}

In [17]:
# Convert labels to tokens
print (f"y_train[0]: {y_train[0]}")
y_train = label_encoder.encode(y_train)
y_val = label_encoder.encode(y_val)
y_test = label_encoder.encode(y_test)
print (f"y_train[0]: {y_train[0]}")

y_train[0]: 0
y_train[0]: 0


In [18]:
# Class weights
counts = np.bincount(y_train)
class_weights = {i: 1.0/count for i, count in enumerate(counts)}
print (f"counts: {counts}\nweights: {class_weights}")

counts: [3622 1308]
weights: {0: 0.0002760905577029266, 1: 0.0007645259938837921}


# DataLoader

We're going to place our data into a [`Dataset`](https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset) and use a [`DataLoader`](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader) to efficiently create batches for training and evaluation.

In [23]:
import torch

In [24]:
# Seed seed for reproducibility
torch.manual_seed(SEED)

<torch._C.Generator at 0x296b1dd8730>

In [25]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __str__(self):
        return f"<Dataset(N={len(self)})>"

    def __getitem__(self, index):
        X = self.X[index]
        y = self.y[index]
        return [X, y]

    def collate_fn(self, batch):
        """Processing on a batch."""
        # Get inputs
        batch = np.array(batch, dtype=object)
        X = np.stack(batch[:, 0], axis=0)
        y = np.stack(batch[:, 1], axis=0)

        # Cast
        X = torch.FloatTensor(X.astype(np.float32))
        y = torch.LongTensor(y.astype(np.int32))

        return X, y

    def create_dataloader(self, batch_size, shuffle=False, drop_last=False):
        return torch.utils.data.DataLoader(
            dataset=self, batch_size=batch_size, collate_fn=self.collate_fn,
            shuffle=shuffle, drop_last=drop_last, pin_memory=True)

In [26]:
# Create datasets
train_dataset = Dataset(X=X_train, y=y_train)
val_dataset = Dataset(X=X_val, y=y_val)
test_dataset = Dataset(X=X_test, y=y_test)
print ("Datasets:\n"
    f"  Train dataset:{train_dataset.__str__()}\n"
    f"  Val dataset: {val_dataset.__str__()}\n"
    f"  Test dataset: {test_dataset.__str__()}\n"
    "Sample point:\n"
    f"  X: {train_dataset[0][0]}\n"
    f"  y: {train_dataset[0][1]}")

Datasets:
  Train dataset:<Dataset(N=4930)>
  Val dataset: <Dataset(N=1056)>
  Test dataset: <Dataset(N=1057)>
Sample point:
  X: [0.         0.         1.         1.         0.26388889 1.
 0.         0.01890547 0.04256866 0.         0.         0.
 1.         1.         0.         1.         0.         1.
 0.         1.         0.         1.         0.         1.
 0.         0.         1.         0.         0.         1.        ]
  y: 0


In [27]:
# Create dataloaders
batch_size = 64
train_dataloader = train_dataset.create_dataloader(batch_size=batch_size)
val_dataloader = val_dataset.create_dataloader(batch_size=batch_size)
test_dataloader = test_dataset.create_dataloader(batch_size=batch_size)
batch_X, batch_y = next(iter(train_dataloader))
print ("Sample batch:\n"
    f"  X: {list(batch_X.size())}\n"
    f"  y: {list(batch_y.size())}\n"
    "Sample point:\n"
    f"  X: {batch_X[0]}\n"
    f"  y: {batch_y[0]}")

Sample batch:
  X: [64, 30]
  y: [64]
Sample point:
  X: tensor([0.0000, 0.0000, 1.0000, 1.0000, 0.2639, 1.0000, 0.0000, 0.0189, 0.0426,
        0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 1.0000, 0.0000, 1.0000,
        0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000,
        0.0000, 0.0000, 1.0000])
  y: 0


# Device

In [28]:
# Set CUDA seeds
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED) # multi-GPU

In [29]:
# Set device
cuda = True
device = torch.device("cuda" if (
    torch.cuda.is_available() and cuda) else "cpu")
torch.set_default_tensor_type("torch.FloatTensor")
if device.type == "cuda":
    torch.set_default_tensor_type("torch.cuda.FloatTensor")
print (device)

cpu


# Model

Let's initialize the model we'll be using to show the capabilities of training utilities.

In [30]:
import math
from torch import nn
import torch.nn.functional as F

In [31]:
INPUT_DIM = X_train.shape[1] # 2D
HIDDEN_DIM = 100
DROPOUT_P = 0.1
NUM_CLASSES = len(label_encoder.classes)
NUM_EPOCHS = 10

In [32]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout_p, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, hidden_dim)
        self.dropout = nn.AlphaDropout(dropout_p)
        self.fc5 = nn.Linear(hidden_dim, num_classes)
        
    def forward(self, inputs):
        x_in, = inputs
        z = F.selu(self.fc1(x_in))
        z = F.selu(self.fc2(z))
        z = F.selu(self.fc3(z))
        z = F.selu(self.fc4(z))
        z = self.dropout(z)
        z = self.fc5(z)
        return z

In [34]:
# Initialize model
model = MLP(
    input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, 
    dropout_p=DROPOUT_P, num_classes=NUM_CLASSES)
model = model.to(device) # set device
print (model.named_parameters)

<bound method Module.named_parameters of MLP(
  (fc1): Linear(in_features=30, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=100, bias=True)
  (fc4): Linear(in_features=100, out_features=100, bias=True)
  (dropout): AlphaDropout(p=0.1, inplace=False)
  (fc5): Linear(in_features=100, out_features=2, bias=True)
)>


# Training

Let's put all of this together now to train our model.

In [35]:
from torch.optim import Adam

In [36]:
LEARNING_RATE = 1e-1
NUM_EPOCHS = 500
PATIENCE = 50

In [37]:
# Define Loss
class_weights_tensor = torch.Tensor(list(class_weights.values())).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)

In [38]:
# Define optimizer & scheduler
optimizer = Adam(model.parameters(), lr=LEARNING_RATE) 
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.1, patience=3)

In [39]:
class Trainer(object):
    def __init__(self, model, device, loss_fn=None, optimizer=None, scheduler=None):

        # Set params
        self.model = model
        self.device = device
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler

    def train_step(self, dataloader):
        """Train step."""
        # Set model to train mode
        self.model.train()
        loss = 0.0

        # Iterate over train batches
        for i, batch in enumerate(dataloader):

            # Step
            batch = [item.to(self.device) for item in batch]  # Set device
            inputs, targets = batch[:-1], batch[-1]
            self.optimizer.zero_grad()  # Reset gradients
            z = self.model(inputs)  # Forward pass
            J = self.loss_fn(z, targets)  # Define loss
            J.backward()  # Backward pass
            self.optimizer.step()  # Update weights

            # Cumulative Metrics
            loss += (J.detach().item() - loss) / (i + 1)

        return loss

    def eval_step(self, dataloader):
        """Validation or test step."""
        # Set model to eval mode
        self.model.eval()
        loss = 0.0
        y_trues, y_probs = [], []

        # Iterate over val batches
        with torch.inference_mode():
            for i, batch in enumerate(dataloader):

                # Step
                batch = [item.to(self.device) for item in batch]  # Set device
                inputs, y_true = batch[:-1], batch[-1]
                z = self.model(inputs)  # Forward pass
                J = self.loss_fn(z, y_true).item()

                # Cumulative Metrics
                loss += (J - loss) / (i + 1)

                # Store outputs
                y_prob = F.sigmoid(z).cpu().numpy()
                y_probs.extend(y_prob)
                y_trues.extend(y_true.cpu().numpy())

        return loss, np.vstack(y_trues), np.vstack(y_probs)

    def predict_step(self, dataloader):
        """Prediction step."""
        # Set model to eval mode
        self.model.eval()
        y_probs = []

        # Iterate over val batches
        with torch.inference_mode():
            for i, batch in enumerate(dataloader):

                # Forward pass w/ inputs
                inputs, targets = batch[:-1], batch[-1]
                z = self.model(inputs)

                # Store outputs
                y_prob = F.softmax(z).cpu().numpy()
                y_probs.extend(y_prob)

        return np.vstack(y_probs)

    def train(self, num_epochs, patience, train_dataloader, val_dataloader):
        best_val_loss = np.inf
        for epoch in range(num_epochs):
            # Steps
            train_loss = self.train_step(dataloader=train_dataloader)
            val_loss, _, _ = self.eval_step(dataloader=val_dataloader)
            self.scheduler.step(val_loss)

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = self.model
                _patience = patience  # reset _patience
            else:
                _patience -= 1
            if not _patience:  # 0
                print("Stopping early!")
                break

            # Logging
            print(
                f"Epoch: {epoch+1} | "
                f"train_loss: {train_loss:.5f}, "
                f"val_loss: {val_loss:.5f}, "
                f"lr: {self.optimizer.param_groups[0]['lr']:.2E}, "
                f"_patience: {_patience}"
            )
        return best_model

In [40]:
# Trainer module
trainer = Trainer(
    model=model, device=device, loss_fn=loss_fn, 
    optimizer=optimizer, scheduler=scheduler)

In [41]:
# Train
best_model = trainer.train(
    NUM_EPOCHS, PATIENCE, train_dataloader, val_dataloader)



Epoch: 1 | train_loss: 20.31044, val_loss: 0.68700, lr: 1.00E-01, _patience: 50
Epoch: 2 | train_loss: 0.63845, val_loss: 0.55642, lr: 1.00E-01, _patience: 50
Epoch: 3 | train_loss: 0.58212, val_loss: 0.61291, lr: 1.00E-01, _patience: 49
Epoch: 4 | train_loss: 0.59687, val_loss: 0.53653, lr: 1.00E-01, _patience: 50
Epoch: 5 | train_loss: 0.60348, val_loss: 0.54040, lr: 1.00E-01, _patience: 49
Epoch: 6 | train_loss: 0.63544, val_loss: 0.61451, lr: 1.00E-01, _patience: 48
Epoch: 7 | train_loss: 0.64937, val_loss: 0.60846, lr: 1.00E-01, _patience: 47
Epoch: 8 | train_loss: 0.61215, val_loss: 0.58630, lr: 1.00E-02, _patience: 46
Epoch: 9 | train_loss: 0.55867, val_loss: 0.53751, lr: 1.00E-02, _patience: 45
Epoch: 10 | train_loss: 0.56185, val_loss: 0.54140, lr: 1.00E-02, _patience: 44
Epoch: 11 | train_loss: 0.55912, val_loss: 0.53611, lr: 1.00E-02, _patience: 50
Epoch: 12 | train_loss: 0.55196, val_loss: 0.55058, lr: 1.00E-02, _patience: 49
Epoch: 13 | train_loss: 0.56029, val_loss: 0.545

## Evaluation

In [42]:
import json
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, average_precision_score, roc_auc_score, confusion_matrix

In [43]:
def get_performance(y_true, y_pred, classes):
    """Per-class performance metrics."""
    # Performance
    performance = {"overall": {}, "class": {}}

    # Overall performance
    metrics = precision_recall_fscore_support(y_true, y_pred, average="weighted")
    accuracy = accuracy_score(y_true, y_pred)
    AUPRC = average_precision_score(y_true, y_pred)
    AUROC = roc_auc_score(y_true, y_pred)
    ConfMatrix = confusion_matrix(y_true, y_pred) 
    performance["overall"]["accuracy"] = accuracy
    performance["overall"]["precision"] = metrics[0]
    performance["overall"]["recall"] = metrics[1]
    performance["overall"]["f1"] = metrics[2]
    performance["overall"]["AUPRC"] = AUPRC
    performance["overall"]["AUROC"] = AUROC
#     performance["overall"]["ConfMatrix"] = ConfMatrix
    performance["overall"]["num_samples"] = np.float64(len(y_true))
    

    # Per-class performance
    metrics = precision_recall_fscore_support(y_true, y_pred, average=None)
    for i in range(len(classes)):
        performance["class"][classes[i]] = {
            "accuracy": accuracy,
            "precision": metrics[0][i],
            "recall": metrics[1][i],
            "f1": metrics[2][i],
            "AUPRC": AUPRC,
            "AUROC": AUROC,
            "num_samples": np.float64(metrics[3][i]),
        }
    
    print(f'Confusion Matrix: {ConfMatrix}')
    return performance

In [44]:
# Get predictions
test_loss, y_true, y_prob = trainer.eval_step(dataloader=test_dataloader)
y_pred = np.argmax(y_prob, axis=1)

In [45]:
# Determine performance
performance = get_performance(
    y_true=y_test, y_pred=y_pred, classes=label_encoder.classes)
print (json.dumps(performance["overall"], indent=2))
# print(performance["overall"])

Confusion Matrix: [[666 110]
 [101 180]]
{
  "accuracy": 0.8003784295175024,
  "precision": 0.8024869027497133,
  "recall": 0.8003784295175024,
  "f1": 0.8013694536094588,
  "AUPRC": 0.4931482500769433,
  "AUROC": 0.7494084088491031,
  "num_samples": 1057.0
}
