# MCDA5511 Assignment 3 : Transformers

Submitted By:
- Louise Fear
- Muhammad Abdul Thoufiq
- Sudeep Raj Badal
- Sukanta Dey Amit

#### Necessary Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#### Load Data

In [13]:
df = pd.read_csv("loan_data.csv")
df.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


#### Data Prep

Handle missing values and encode categorical features and normalize the numeric features

In [14]:
# Handle missing values (impute or drop)
df.dropna(inplace=True)

# Encode categorical features
label_encoders = {}
categorical_cols = ["person_gender", "person_education", "person_home_ownership", "loan_intent", "previous_loan_defaults_on_file"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Select features and target
X = df.drop(columns=["loan_status"])
y = df["loan_status"]

# Normalize numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)


#### Split Data

Split into training and test sets

In [15]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Logistic Regression Model

In [16]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
log_reg_accuracy = accuracy_score(y_test, y_pred_log_reg)
print(f"Logistic Regression Accuracy: {log_reg_accuracy:.4f}")

Logistic Regression Accuracy: 0.8901


### Neural Network Implementation

In [None]:
class NeuralNetwork:
    def __init__(self, input_size, learning_rate=0.01, epochs=1000):
        # Initialization of NN
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = np.random.randn(input_size, 1) * 0.01    # Generate random weights
        self.bias = np.zeros((1,))

    def sigmoid(self, z):
        # Sigmoid Activation Function
        return 1/(1 + np.exp(-z))
    
    def sigmoid_derivative(self, z):
        # Derivative of Sigmoid Activation Function
        return self.sigmoid(z) * (1 - self.sigmoid(z))
    
    def forward(self, X):
        #Forward Propagation to compute Predictions
        z = np.dot(X, self.weights) + self.bias
        return self.sigmoid(z)

    def backward(self, X, y, y_pred):
        # Backpropagation to compute gradients
        n = X.shape[0]
        dz = y_pred - y.values.reshape(-1,1)
        dw = np.dot(X.T, dz) / n
        db = np.sum(dz) / n
        return dw, db
    
    def train(self, X, y):
        # Train the neural network using gradient descent
        for epoch in range(self.epochs):
            # Compute prediction from forward propagation
            y_pred = self.forward(X)

            # Get gradients from backpropagation
            dw, db = self.backward(X, y, y_pred)

            # Update weights and bias using gradients
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            # Print loss every 100 epochs
            # Loss calculated by Binary Cross 
            if epoch % 100 == 0:
                loss = -np.mean(y.values.reshape(-1, 1) * np.log(y_pred + 1e-8) + (1 - y.values.reshape(-1, 1)) * np.log(1 - y_pred + 1e-8))
                print(f"Epoch: {epoch}, Loss: {loss:.4f}")
    
    def predict(self, X):
        # Predict using trained weights
        return (self.forward(X) >= 0.5).astype(int) 


#### Train Neural Network

Training the model With 1000 epochs:

In [46]:
nn = NeuralNetwork(input_size=X_train.shape[1], learning_rate=0.01, epochs=1000)
nn.train(X_train, y_train)

Epoch: 0, Loss: 0.6953
Epoch: 100, Loss: 0.5547
Epoch: 200, Loss: 0.4763
Epoch: 300, Loss: 0.4277
Epoch: 400, Loss: 0.3948
Epoch: 500, Loss: 0.3711
Epoch: 600, Loss: 0.3533
Epoch: 700, Loss: 0.3394
Epoch: 800, Loss: 0.3283
Epoch: 900, Loss: 0.3191


In [47]:
y_pred_nn = nn.predict(X_test)
nn_accuracy = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {nn_accuracy:.4f}")

Neural Network Accuracy: 0.8843


Training the model With 5,000 epochs:

In [48]:
nn = NeuralNetwork(input_size=X_train.shape[1], learning_rate=0.01, epochs=5000)
nn.train(X_train, y_train)

Epoch: 0, Loss: 0.6982
Epoch: 100, Loss: 0.5563
Epoch: 200, Loss: 0.4773
Epoch: 300, Loss: 0.4283
Epoch: 400, Loss: 0.3953
Epoch: 500, Loss: 0.3715
Epoch: 600, Loss: 0.3536
Epoch: 700, Loss: 0.3396
Epoch: 800, Loss: 0.3285
Epoch: 900, Loss: 0.3193
Epoch: 1000, Loss: 0.3117
Epoch: 1100, Loss: 0.3052
Epoch: 1200, Loss: 0.2996
Epoch: 1300, Loss: 0.2948
Epoch: 1400, Loss: 0.2906
Epoch: 1500, Loss: 0.2869
Epoch: 1600, Loss: 0.2836
Epoch: 1700, Loss: 0.2807
Epoch: 1800, Loss: 0.2780
Epoch: 1900, Loss: 0.2757
Epoch: 2000, Loss: 0.2735
Epoch: 2100, Loss: 0.2715
Epoch: 2200, Loss: 0.2697
Epoch: 2300, Loss: 0.2680
Epoch: 2400, Loss: 0.2665
Epoch: 2500, Loss: 0.2650
Epoch: 2600, Loss: 0.2637
Epoch: 2700, Loss: 0.2625
Epoch: 2800, Loss: 0.2613
Epoch: 2900, Loss: 0.2603
Epoch: 3000, Loss: 0.2592
Epoch: 3100, Loss: 0.2583
Epoch: 3200, Loss: 0.2574
Epoch: 3300, Loss: 0.2566
Epoch: 3400, Loss: 0.2558
Epoch: 3500, Loss: 0.2551
Epoch: 3600, Loss: 0.2543
Epoch: 3700, Loss: 0.2537
Epoch: 3800, Loss: 0.253

In [49]:
y_pred_nn = nn.predict(X_test)
nn_accuracy = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {nn_accuracy:.4f}")

Neural Network Accuracy: 0.8881


Training the model With 10,000 epochs

In [55]:
nn = NeuralNetwork(input_size=X_train.shape[1], learning_rate=0.01, epochs=10000)
nn.train(X_train, y_train)

Epoch: 0, Loss: 0.6937
Epoch: 100, Loss: 0.5540
Epoch: 200, Loss: 0.4760
Epoch: 300, Loss: 0.4275
Epoch: 400, Loss: 0.3947
Epoch: 500, Loss: 0.3710
Epoch: 600, Loss: 0.3532
Epoch: 700, Loss: 0.3393
Epoch: 800, Loss: 0.3282
Epoch: 900, Loss: 0.3191
Epoch: 1000, Loss: 0.3114
Epoch: 1100, Loss: 0.3050
Epoch: 1200, Loss: 0.2995
Epoch: 1300, Loss: 0.2947
Epoch: 1400, Loss: 0.2905
Epoch: 1500, Loss: 0.2868
Epoch: 1600, Loss: 0.2835
Epoch: 1700, Loss: 0.2806
Epoch: 1800, Loss: 0.2779
Epoch: 1900, Loss: 0.2755
Epoch: 2000, Loss: 0.2734
Epoch: 2100, Loss: 0.2714
Epoch: 2200, Loss: 0.2696
Epoch: 2300, Loss: 0.2679
Epoch: 2400, Loss: 0.2664
Epoch: 2500, Loss: 0.2649
Epoch: 2600, Loss: 0.2636
Epoch: 2700, Loss: 0.2624
Epoch: 2800, Loss: 0.2613
Epoch: 2900, Loss: 0.2602
Epoch: 3000, Loss: 0.2592
Epoch: 3100, Loss: 0.2582
Epoch: 3200, Loss: 0.2574
Epoch: 3300, Loss: 0.2565
Epoch: 3400, Loss: 0.2557
Epoch: 3500, Loss: 0.2550
Epoch: 3600, Loss: 0.2543
Epoch: 3700, Loss: 0.2536
Epoch: 3800, Loss: 0.253

In [56]:
y_pred_nn = nn.predict(X_test)
nn_accuracy = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {nn_accuracy:.4f}")

Neural Network Accuracy: 0.8893


Training the model with 15,000 epochs

In [57]:
nn = NeuralNetwork(input_size=X_train.shape[1], learning_rate=0.01, epochs=15000)
nn.train(X_train, y_train)

Epoch: 0, Loss: 0.6955
Epoch: 100, Loss: 0.5544
Epoch: 200, Loss: 0.4759
Epoch: 300, Loss: 0.4273
Epoch: 400, Loss: 0.3944
Epoch: 500, Loss: 0.3708
Epoch: 600, Loss: 0.3530
Epoch: 700, Loss: 0.3391
Epoch: 800, Loss: 0.3280
Epoch: 900, Loss: 0.3189
Epoch: 1000, Loss: 0.3113
Epoch: 1100, Loss: 0.3049
Epoch: 1200, Loss: 0.2994
Epoch: 1300, Loss: 0.2946
Epoch: 1400, Loss: 0.2904
Epoch: 1500, Loss: 0.2867
Epoch: 1600, Loss: 0.2834
Epoch: 1700, Loss: 0.2805
Epoch: 1800, Loss: 0.2779
Epoch: 1900, Loss: 0.2755
Epoch: 2000, Loss: 0.2733
Epoch: 2100, Loss: 0.2713
Epoch: 2200, Loss: 0.2695
Epoch: 2300, Loss: 0.2679
Epoch: 2400, Loss: 0.2663
Epoch: 2500, Loss: 0.2649
Epoch: 2600, Loss: 0.2636
Epoch: 2700, Loss: 0.2624
Epoch: 2800, Loss: 0.2612
Epoch: 2900, Loss: 0.2602
Epoch: 3000, Loss: 0.2592
Epoch: 3100, Loss: 0.2582
Epoch: 3200, Loss: 0.2573
Epoch: 3300, Loss: 0.2565
Epoch: 3400, Loss: 0.2557
Epoch: 3500, Loss: 0.2550
Epoch: 3600, Loss: 0.2543
Epoch: 3700, Loss: 0.2536
Epoch: 3800, Loss: 0.253

In [58]:
y_pred_nn = nn.predict(X_test)
nn_accuracy = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {nn_accuracy:.4f}")

Neural Network Accuracy: 0.8892


The Accuracy remains the same with both 10,000 and 15,000 epochs