### Data Source -  https://www.kaggle.com/janiobachmann/bank-marketing-dataset. 
* The original dataset was sourced from UCI Machine Learning Repository and was contributed by [Moro et al., 2014]. 


In [None]:
#Listing 5-1  - Import required libraries
#Import required libraries
import torch.nn as nn
import torch as tch
import numpy as np, pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import  precision_score, recall_score,roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from tqdm import tqdm  # Import tqdm for progress bar

In [None]:
#Listing 5-2  - Load data into memory
#Load data into memory using pandas as a DataFrame
df = pd.read_csv("Data/bank.csv")
print("DF Shape:",df.shape)
df.head() #print first 5 rows of data

In [None]:
#Listing 5-3  - Distribution of target values
print("Distribution of Target Values in Dataset -")
df.deposit.value_counts

In [None]:
#Listing 5-4  - Distribution of na (null) values in dataset
#Check if we have 'na' values within the dataset
df.isna().sum()

In [None]:
#Listing 5-5  - Distribution of distinct datatypes
#Check the distinct datatypes within the dataset
df.dtypes.value_counts()

In [None]:
##Listing 5-6  - Extract categorical columns from dataset
#Extract categorical columns from dataset
categorical_columns = df.select_dtypes(include="object").columns #So, returns the names of all columns in the DataFrame df that contain text or categorical data.
print("Categorical columns:",list(categorical_columns))

In [None]:
#For each categorical column if values in (Yes/No) convert into a 1/0 Flag
for col in categorical_columns:
    if df[col].nunique() == 2:
        df[col] = np.where(df[col]=="yes",1,0)

df.head()

In [None]:
#Listing 5-7  - Onehot encoding for remaining non-binary categorical variables
#For the remaining cateogrical variables; 
#create one-hot encoded version of the dataset
new_df = pd.get_dummies(df)
print(new_df)

In [None]:
#Define target and predictors for the model
target = "deposit"
predictors = list(set(new_df.columns) - set([target]))
print(predictors)
print("new_df.shape:",new_df.shape)
new_df[predictors].head()

In [None]:
#Listing 5-8  - Prepare data for training and validation

#Convert all datatypes within pandas dataframe to Float32 
#(Compatibility with PyTorch tensors)
new_df = new_df.astype(np.float32)

#Split dataset into Train/Test [70:30]
X_train,X_temp, y_train,y_temp = train_test_split(new_df[predictors],new_df[target], test_size=0.3, random_state=42)

# Split the remaining data into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

#Convert Pandas dataframe, first to numpy and then to Torch Tensors
X_train = tch.from_numpy(X_train.values)
X_test  = tch.from_numpy(X_test.values)
X_val  = tch.from_numpy(X_val.values)
y_train = tch.from_numpy(y_train.values).reshape(-1,1)
y_test  = tch.from_numpy(y_test.values).reshape(-1,1)
y_val  = tch.from_numpy(y_val.values).reshape(-1,1)

#Print the dataset size to verify
print("X_train.shape:",X_train.shape)
print("X_test.shape:",X_test.shape)
print("X_val.shape:",X_val.shape)
print("y_train.shape:",y_train.shape)
print("y_test.shape:",y_test.shape)
print("y_val.shape:",y_val.shape)

In [None]:
#Listing 5-9  - Define function to train model

#Define function to train the network

# 1. Function Definition: This line defines the function train_network with several parameters
# def train_network(model,optimizer,loss_function,num_epochs,batch_size,X_train,Y_train,lambda_L1=0.0):
def train_network(model, optimizer, loss_function, num_epochs, batch_size, X_train, Y_train, lambda_L1=0.0, X_val=None, Y_val=None):
    """
    model: The neural network model to be trained.
    optimizer: The optimizer used for updating the model’s weights.
    loss_function: The function used to calculate the loss.
    num_epochs: The number of epochs (iterations over the entire dataset) to train the model.
    batch_size: The size of each training batch.
    X_train, Y_train: The training data and corresponding labels.
    lambda_L1: The L1 regularization parameter (default is 0.0).
    X_val, Y_val: Optional validation data and labels.
    
    Return: 
    loss_across_epochs: List of training losses across epochs.
    """
    # 2. Imports the tqdm library, which is used to display a progress bar during training.
    # from tqdm import tqdm  # Import tqdm for progress bar
    
    # 3. Initialize List: Initializes an empty list to store the loss values for each epoch.   
    loss_across_epochs = []
    
    # 4. Epoch Loop: Starts a loop that iterates over the number of epochs. Initializes train_loss to 0.0 for each epoch.
    for epoch in range(num_epochs):
        train_loss= 0.0
        # 5. Set Model to Training Mode: Sets the model to training mode, which is necessary for certain layers like dropout
        #Explicitly start model training
        model.train()
        # 6. Batch Loop with Progress Bar: Starts a loop that iterates over the training data in batches. The tqdm function displays a progress bar.
        # for i in range(0,X_train.shape[0],batch_size):
        for i in tqdm(range(0, X_train.shape[0], batch_size), desc=f"Epoch {epoch+1}/{num_epochs}"):
            
            # 7. Extract Batch: Extracts a batch of input data and corresponding labels from the training set.
            #Extract train batch from X and Y
            input_data = X_train[i:min(X_train.shape[0],i+batch_size)]
            labels = Y_train[i:min(X_train.shape[0],i+batch_size)]
            # 8. Zero Gradients: Resets the gradients of the model parameters to zero before backpropagation.
            #set the gradients to zero before starting to do backpropagation 
            optimizer.zero_grad()

            # 9. Forward Pass: Passes the input data through the model to get the output.
            output_data  = model(input_data)

            # 10. Calculate Loss: Computes the loss between the model’s output and the true labels. Initializes L1_loss to 0.
            loss = loss_function(output_data, labels)
            L1_loss = 0
            
            # 11. Compute L1 Penalty: Iterates over the model parameters and computes the L1 penalty by summing the absolute values of the parameters.
            for p in model.parameters():
                L1_loss = L1_loss + p.abs().sum()  #gets abs() first and then does sum()           

            # 12. Add L1 Penalty to Loss: Adds the L1 penalty to the original loss.
            loss = loss + lambda_L1 * L1_loss

            # 13. Backpropagation: Performs backpropagation to compute the gradients of the loss with respect to the model parameters.
            loss.backward()

            # 14. Update Weights: Updates the model parameters using the optimizer.
            optimizer.step()

            # 15. Accumulate Loss: Accumulates the loss for the current batch, scaled by the batch size.
            train_loss += loss.item() * input_data.size(0)
            
        # 16. Store Epoch Loss: Appends the average loss for the epoch to the loss_across_epochs list.
        loss_across_epochs.append(train_loss/X_train.size(0))
        
        # 17. Print Loss: Prints the loss every 500 epochs.
        if epoch%500 == 0:
            print("Epoch: {} - Loss:{:.4f}".format(epoch,train_loss/X_train.size(0) ))   
            
        # 18. Validation Step: If validation data is provided, sets the model to evaluation mode, computes the validation loss, and prints it
        if X_val is not None and Y_val is not None:
            model.eval()
            with tch.no_grad():
                val_output = model(X_val)
                val_loss = loss_function(val_output, Y_val)
                print(f"Validation Loss: {val_loss.item():.4f}")    
                
    # 19. Return Losses: Returns the list of training losses across epochs.   
    return(loss_across_epochs)

In [None]:
#Listing 5-10  - Define function to evaluate model

#Define function for evaluating NN
def evaluate_model(model,X_test,y_test,X_train,y_train,loss_list):
    # 1. This sets the model to evaluation mode, which disables certain layers like dropout and batch normalization that are only used during training.
    model.eval() #Explicitly set to evaluate mode

    # 2. Predict on Train and Validation Datasets
    # y_test_prob and y_train_prob are the predicted probabilities for the test and training datasets.
    # y_test_pred and y_train_pred convert these probabilities into binary predictions (0 or 1) based on a threshold of 0.5.
    y_test_prob = model(X_test)
    y_test_pred = np.where(y_test_prob>0.5,1,0)
    y_train_prob = model(X_train)
    y_train_pred = np.where(y_train_prob>0.5,1,0)

    # 3. Compute Training and Validation Metrics
    # This block prints the performance metrics for both training and validation datasets, including accuracy, precision, recall, and ROC AUC scores.
    print("\n Model Performance -")
    print("Training Accuracy--",-round(accuracy_score(y_train,y_train_pred),3))
    print("Training Precision-",-round(precision_score(y_train,y_train_pred),3))
    print("Training Recall----",-round(recall_score(y_train,y_train_pred),3))
    print("Training ROCAUC----", round(roc_auc_score(y_train,y_train_prob.detach().numpy()),3))

    print("Validation Accuracy--",-round(accuracy_score(y_test,y_test_pred),3))
    print("Validation Precision-",-round(precision_score(y_test,y_test_pred),3))
    print("Validation Recall----",-round(recall_score(y_test,y_test_pred),3))
    print("Validation ROCAUC----", round(roc_auc_score(y_test,y_test_prob.detach().numpy()),3))    
    print("\n")
    
    # 4. Plot the Loss curve and ROC Curve
    # This creates a figure with two subplots.
    #    The first subplot shows the loss curve across epoch
    plt.figure(figsize=(20,5))
    plt.subplot(1, 2, 1)
    plt.plot(loss_list)
    plt.title('Loss across epochs')
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    
    #   The second subplot shows the ROC curves for both validation and training datasets.
    plt.subplot(1, 2, 2)
    #Validation
    # roc_curve computes the false positive rate (FPR) and true positive rate (TPR) for different threshold values
    fpr_v, tpr_v, _ = roc_curve(y_test, y_test_prob.detach().numpy())
    # auc calculates the area under the ROC curve (AUC).
    roc_auc_v = auc(fpr_v, tpr_v)
    
    #Training
    # roc_curve computes the false positive rate (FPR) and true positive rate (TPR) for different threshold values
    fpr_t, tpr_t, _ = roc_curve(y_train, y_train_prob.detach().numpy())
    # auc calculates the area under the ROC curve (AUC).
    roc_auc_t = auc(fpr_t, tpr_t)    
    plt.title('Receiver Operating Characteristic:Validation')
    plt.plot(fpr_v, tpr_v, 'b', label = 'Validation AUC = %0.2f' % roc_auc_v)
    plt.plot(fpr_t, tpr_t, 'r', label = 'Training AUC = %0.2f' % roc_auc_t)    
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    
    plt.show()

In [None]:
#Listing 5-11  - Define Neural Network

#Define Neural Network
# 1. class NeuralNetwork(nn.Module): This line defines a new class NeuralNetwork that inherits from nn.Module, which is a base class for all neural network modules in PyTorch.
class NeuralNetwork(nn.Module):
    # 2. This is the constructor method that initializes the neural network.
    def __init__(self):
        # 3.  Calls the constructor of the parent class nn.Module.
        super().__init__()
        # 4. Sets the random seed for reproducibility. This ensures that the results are the same every time the code is run.
        tch.manual_seed(2020)
        # 5. Defines the first fully connected layer with 48 input features and 96 output features.
        self.fc1 = nn.Linear(48, 96) 
        # 6. Defines the second fully connected layer with 96 input features and 192 output features.
        self.fc2 = nn.Linear(96, 192)
        # 7.  Defines the third fully connected layer with 192 input features and 384 output features.
        self.fc3 = nn.Linear(192, 384)
        # 8. Defines the output layer with 384 input features and 1 output feature.
        self.out = nn.Linear(384, 1) 
        # 9. Defines the ReLU activation function.       
        self.relu = nn.ReLU()     
        # 10 Defines the Sigmoid activation function for the final output.   
        self.final = nn.Sigmoid()

    # 11. def forward(self, x): Defines the forward pass of the neural network. This method takes an input tensor x and returns the output y.    
    def forward(self, x):
        op = self.fc1(x)   # Passes the input x through the first fully connected layer.
        op = self.relu(op) # Applies the ReLU activation function to the output of the first layer.       
        op = self.fc2(op)  # Passes the result through the second fully connected layer.
        op = self.relu(op) # Applies the ReLU activation function to the output of the second layer.
        op = self.fc3(op)  # Passes the result through the third fully connected layer.
        op = self.relu(op) # Applies the ReLU activation function to the output of the third layer.
        op = self.out(op)  # Passes the result through the output layer.
        y = self.final(op) # Applies the Sigmoid activation function to the output.
        return y
    
#Define training variables
num_epochs = 256
batch_size= 64
loss_function = nn.BCELoss()  #Binary Crosss Entropy Loss

#Hyperparameters
weight_decay=0.0 #set to 0; no L2 Regularizer; passed into the Optimizer
lambda_L1=0.0    #Set to 0; no L1 reg; manually added in loss (train_network)

#Create a model instance
model = NeuralNetwork()

#Define optimizer
adam_optimizer = tch.optim.Adam(model.parameters(), lr= 0.001,weight_decay=weight_decay)

#Train model
adam_loss = train_network(model,adam_optimizer,loss_function,num_epochs,batch_size,X_train,y_train,lambda_L1,X_val,y_val)

#Evaluate model
evaluate_model(model,X_test,y_test,X_train,y_train,adam_loss)


In [None]:
#Listing 5-12  - L1 Regularization
#L1 Regularization    
num_epochs = 256
batch_size= 64

weight_decay=0.0   #Set to 0; no L2 reg
lambda_L1 = 0.0001 #Enables L1 Regularization

model = NeuralNetwork()
loss_function = nn.BCELoss()  #Binary Crosss Entropy Loss

adam_optimizer = tch.optim.Adam(model.parameters(),lr= 0.001 ,weight_decay=weight_decay)

#Define hyperparameter for L1 Regularization
#Train network
adam_loss = train_network(model,adam_optimizer,loss_function ,num_epochs,batch_size,X_train,y_train,lambda_L1=lambda_L1)

#Evaluate model
evaluate_model(model,X_test,y_test,X_train,y_train,adam_loss)

In [None]:
#Listing 5-13  - L2 Regularization

#L2 Regularization    
num_epochs = 256
batch_size= 64
weight_decay =0.001 # Enables L2 Regularization	
lambda_L1 = 0.00    # Set to 0; no L1 reg

model = NeuralNetwork()
loss_function = nn.BCELoss()  #Binary Crosss Entropy Loss

adam_optimizer = tch.optim.Adam(model.parameters(),lr= 0.001,weight_decay=weight_decay)

# The Adam optimizer in PyTorch has several key parameters that you can adjust to fine-tune the training of your neural network. Here are the main parameters:
# params: The parameters to optimize, typically passed as model.parameters().
# lr (learning rate): The step size for updating the weights. Default is 0.001.
# betas: A tuple (beta1, beta2) that represents the coefficients used for computing running averages of gradient and its square. Default is (0.9, 0.999).
# eps (epsilon): A small constant for numerical stability. Default is 1e-8.
# weight_decay: A value for L2 regularization (weight decay). Default is 0.
# amsgrad: A boolean indicating whether to use the AMSGrad variant of this algorithm. Default is False.
# 
#Train Network
adam_loss = train_network(model,adam_optimizer,loss_function,num_epochs,batch_size,X_train,y_train,lambda_L1=lambda_L1)

#Evaluate model
evaluate_model(model,X_test,y_test,X_train,y_train,adam_loss)

In [None]:
#Listing 5-14  - Dropout Regularization

#Define Network with Dropout Layers
class NeuralNetwork(nn.Module):
    # Adding droput layers within Neural Network to reduce overfitting
    def __init__(self):
        super().__init__()
        tch.manual_seed(2020)
        self.fc1 = nn.Linear(48, 96)
        self.fc2 = nn.Linear(96, 192)
        self.fc3 = nn.Linear(192, 384)
        self.relu = nn.ReLU()
        self.out = nn.Linear(384, 1)
        self.final = nn.Sigmoid()
        self.drop = nn.Dropout(p=0.1)  # # Dropout with a probability of 0.1

        # Yes, you can place dropout layers at various points in your forward pass. However, it’s typically used after fully connected (dense) layers or convolutional layers to prevent overfitting by randomly setting a fraction of the input units to zero during training.
        # Here are a few guidelines to consider:
            # After Activation Functions: Dropout is often applied after activation functions like ReLU. This helps in regularizing the activations.
            # Between Layers: You can place dropout layers between fully connected layers to ensure that the network does not rely too heavily on any particular neurons.
            # Not Before Output Layer: It’s generally not recommended to apply dropout before the output layer, especially if you’re using a softmax or sigmoid activation for classification tasks, as it can lead to unstable outputs.      

    def forward(self, x):
        op = self.drop(x)  # Dropout for input layer
        op = self.fc1(op)
        op = self.relu(op)        
        op = self.drop(op) # Dropout for hidden layer 1
        op = self.fc2(op)
        op = self.relu(op)
        op = self.drop(op) # Dropout for hidden layer 2
        op = self.fc3(op)
        op = self.relu(op)      
        op = self.drop(op) # Dropout for hidden layer 3       
        op = self.out(op)
        y = self.final(op)
        return y
    
num_epochs = 256
batch_size= 64

weight_decay=0.0 #Set to 0; no L2 reg
lambda_L1 = 0.0  #Set to 0; no L1 reg

model = NeuralNetwork()
loss_function = nn.BCELoss()  #Binary Crosss Entropy Loss

adam_optimizer = tch.optim.Adam(model.parameters(),lr= 0.001,weight_decay=weight_decay)
#Train model
adam_loss = train_network(model,adam_optimizer,loss_function,num_epochs,batch_size,X_train,y_train,lambda_L1= lambda_L1)

#Evaluate model
evaluate_model(model,X_test,y_test,X_train,y_train,adam_loss)

In [None]:
#Listing 5-15  - L1, L2 + Dropout Regularization


#Create a network with Dropout layer
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        tch.manual_seed(2020)
        self.fc1 = nn.Linear(48, 96)
        self.fc2 = nn.Linear(96, 192)
        self.fc3 = nn.Linear(192, 384)
        self.relu = nn.ReLU()
        self.out = nn.Linear(384, 1)
        self.final = nn.Sigmoid()
        self.drop = nn.Dropout(0.1)  #Dropout Layer
        
    def forward(self, x):
        op = self.drop(x)  #Dropout for input layer
        op = self.fc1(op)
        op = self.relu(op)        
        op = self.drop(op) #Dropout for hidden layer 1
        op = self.fc2(op)
        op = self.relu(op)
        op = self.drop(op) #Dropout for hidden layer 2
        op = self.fc3(op)
        op = self.relu(op)      
        op = self.drop(op) #Dropout for hidden layer 3       
        op = self.out(op)
        y = self.final(op)
        return y
    
num_epochs = 256
batch_size= 64

lambda_L1    = 0.0001  #Enabled L1 
weight_decay =0.001    #Enabled L2

model = NeuralNetwork()
loss_function = nn.BCELoss()

adam_optimizer = tch.optim.Adam(model.parameters(),lr= 0.001 ,weight_decay=weight_decay)

adam_loss = train_network(model,adam_optimizer,loss_function ,num_epochs,batch_size,X_train,y_train,lambda_L1=lambda_L1)

evaluate_model(model,X_test,y_test,X_train,y_train,adam_loss)