# MCDA5511 Assignment 3 : Transformers

Submitted By:
- Louise Fear
- Muhammad Abdul Thoufiq
- Sudeep Raj Badal
- Sukanta Dey Amit

#### Necessary Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#### Load Data

In [13]:
df = pd.read_csv("loan_data.csv")
df.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


#### Data Prep

Handle missing values and encode categorical features and normalize the numeric features

In [14]:
# Handle missing values (impute or drop)
df.dropna(inplace=True)

# Encode categorical features
label_encoders = {}
categorical_cols = ["person_gender", "person_education", "person_home_ownership", "loan_intent", "previous_loan_defaults_on_file"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Select features and target
X = df.drop(columns=["loan_status"])
y = df["loan_status"]

# Normalize numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)


#### Split Data

Split into training and test sets

In [15]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Logistic Regression Model

In [16]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
log_reg_accuracy = accuracy_score(y_test, y_pred_log_reg)
print(f"Logistic Regression Accuracy: {log_reg_accuracy:.4f}")

Logistic Regression Accuracy: 0.8901


### Neural Network Implementation

In [22]:
class NeuralNetwork:
    def __init__(self, input_size, learning_rate=0.01, epochs=1000):
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = np.random.randn(input_size, 1) * 0.01
        self.bias = np.zeros((1,))

    def sigmoid(self, z):
        # Sigmoid Activation Function
        return 1/(1 + np.exp(-z))
    
    def sigmoid_derivative(self, z):
        # Derivative of Sigmoid Activation Function
        return self.sigmoid(z) * (1 - self.sigmoid(z))
    
    def forward(self, X):
        #Forward Propagation to compute Predictions
        z = np.dot(X, self.weights) + self.bias
        return self.sigmoid(z)

    def backward(self, X, y, y_pred):
        # Backpropagation to compute gradients
        n = X.shape[0]
        dz = y_pred - y.values.reshape(-1,1)
        dw = np.dot(X.T, dz) / n
        db = np.sum(dz) / n
        return dw, db
    
    def train(self, X, y):
        # Train the neural network using gradient descent
        for epoch in range(self.epochs):
            y_pred = self.forward(X)
            dw, db = self.backward(X, y, y_pred)
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            # Print loss every 100 epochs
            if epoch % 100 == 0:
                loss = -np.mean(y.values.reshape(-1, 1) * np.log(y_pred + 1e-8) + (1 - y.values.reshape(-1, 1)) * np.log(1 - y_pred + 1e-8))
                print(f"Epoch: {epoch}, Loss: {loss:.4f}")
    
    def predict(self, X):
        # Predict using trained weights
        return (self.forward(X) >= 0.5).astype(int) 


#### Train Neural Network

In [23]:
nn = NeuralNetwork(input_size=X_train.shape[1], learning_rate=0.01, epochs=1000)
nn.train(X_train, y_train)

Epoch: 0, Loss: 0.6908
Epoch: 100, Loss: 0.5525
Epoch: 200, Loss: 0.4751
Epoch: 300, Loss: 0.4269
Epoch: 400, Loss: 0.3942
Epoch: 500, Loss: 0.3707
Epoch: 600, Loss: 0.3529
Epoch: 700, Loss: 0.3391
Epoch: 800, Loss: 0.3280
Epoch: 900, Loss: 0.3189


In [24]:
y_pred_nn = nn.predict(X_test)
nn_accuracy = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {nn_accuracy:.4f}")

Neural Network Accuracy: 0.8844
