In [263]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset,DataLoader
from torchsummary import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


In [264]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [265]:
df = pd.read_csv('loan_approval.csv')
df.head(3)
orginal_df = df.copy()

In [266]:
df = df.drop(columns=['name','city'])
df

Unnamed: 0,income,credit_score,loan_amount,years_employed,points,loan_approved
0,113810,389,39698,27,50.0,False
1,44592,729,15446,28,55.0,False
2,33278,584,11189,13,45.0,False
3,127196,344,48823,29,50.0,False
4,66048,496,47174,4,25.0,False
...,...,...,...,...,...,...
1995,92163,770,12251,13,85.0,True
1996,38799,635,48259,17,40.0,False
1997,41957,763,16752,5,60.0,True
1998,139022,360,24031,35,55.0,False


In [267]:
X_train ,X_temp,Y_train,Y_temp = train_test_split(df.drop('loan_approved',axis=1),df['loan_approved'],test_size=.4,random_state=69,stratify=df['loan_approved'])
X_val,X_test,Y_val,Y_test = train_test_split(X_temp,Y_temp,test_size=.5,random_state=69,stratify=Y_temp)
print(X_train.shape,Y_train.shape,X_val.shape,X_test.shape)

(1200, 5) (1200,) (400, 5) (400, 5)


In [268]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [269]:
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [270]:
class dataset(Dataset):
    def __init__(self,X,Y):
        super().__init__()
        self.X = torch.tensor(X,dtype=torch.float32).to(device)
        self.Y= torch.tensor(Y,dtype=torch.float32).to(device)
    
    def __getitem__(self,index):
        return self.X[index] , self.Y[index]
    
    def __len__(self):
        return len(self.X)

In [271]:
train_dataset = dataset(X_train_scaled,Y_train.values)
valid_dataset = dataset(X_val_scaled,Y_val.values)
test_dataset = dataset(X_test_scaled,Y_test.values)

In [272]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True)
valid_dataloader = DataLoader(valid_dataset,batch_size=32,shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=True)

In [273]:
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_layer = nn.Linear(df.drop('loan_approved',axis=1).shape[1],64)
        self.linear = nn.Linear(64,16)
        self.output = nn.Linear(16,1)

        
    def forward(self,x):
        x = self.output(self.linear(self.input_layer(x)))
        return x

In [274]:
model = MyModel().to(device)

In [275]:
summary(model,(df.drop('loan_approved',axis=1).shape[1],))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 64]             384
            Linear-2                   [-1, 16]           1,040
            Linear-3                    [-1, 1]              17
Total params: 1,441
Trainable params: 1,441
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.01
Estimated Total Size (MB): 0.01
----------------------------------------------------------------


In [276]:
critertion = nn.BCEWithLogitsLoss()
optimizer = Adam(model.parameters(),lr=2e-3)


In [277]:
EPOCHS = 20
total_loss_train_plot = []
total_loss_validation_plot = []
total_acc_train_plot = []
total_acc_validation_plot = []

for epoch in range(EPOCHS):
    total_acc_train = 0
    total_loss_train = 0 
    total_acc_val = 0
    total_loss_val = 0
    model.train()
    for input,label in train_dataloader:
        input ,label = input.to(device),label.to(device)
        optimizer.zero_grad()
        pred = model(input).squeeze(1)
        batch_loss = critertion(pred,label)
        total_loss_train +=batch_loss.item()
        batch_loss.backward()

        preds = torch.sigmoid(pred)
        acc = (preds.round()==label).sum().item()
        total_acc_train +=acc
        
        optimizer.step()

    with torch.no_grad():
        model.eval()
        for input,label in valid_dataloader:
            input ,label = input.to(device),label.to(device)
            pred = model(input).squeeze(1)
            batch_loss = critertion(pred,label)
            total_loss_val +=batch_loss.item()

            preds = torch.sigmoid(pred)
            acc = (preds.round()==label).sum().item()
            total_acc_val +=acc

    total_loss_train_plot.append(round(total_loss_train/len(train_dataloader),4))
    total_loss_validation_plot.append(round(total_loss_val/len(valid_dataloader),4))
    
    total_acc_train_plot.append(round(total_acc_train/len(train_dataset)*100,4))
    total_acc_validation_plot.append(round(total_acc_val/len(valid_dataset)*100,4))
    

    print(f'Epoch:{epoch+1} Train Loss : {total_loss_train_plot[epoch]}, Train Acc : {total_acc_train_plot[epoch]}%')
    print(f'Epoch:{epoch+1} Val Loss : {total_loss_validation_plot[epoch]}, Val Acc : {total_acc_validation_plot[epoch]}%')
    print('='*55)


Epoch:1 Train Loss : 0.5882, Train Acc : 75.5833%
Epoch:1 Val Loss : 0.4668, Val Acc : 87.75%
Epoch:2 Train Loss : 0.3526, Train Acc : 89.0%
Epoch:2 Val Loss : 0.2788, Val Acc : 89.5%
Epoch:3 Train Loss : 0.2262, Train Acc : 90.5833%
Epoch:3 Val Loss : 0.2253, Val Acc : 89.75%
Epoch:4 Train Loss : 0.1804, Train Acc : 93.0%
Epoch:4 Val Loss : 0.1855, Val Acc : 92.25%
Epoch:5 Train Loss : 0.1546, Train Acc : 93.9167%
Epoch:5 Val Loss : 0.1704, Val Acc : 91.75%
Epoch:6 Train Loss : 0.138, Train Acc : 94.5%
Epoch:6 Val Loss : 0.1512, Val Acc : 94.0%
Epoch:7 Train Loss : 0.1292, Train Acc : 94.8333%
Epoch:7 Val Loss : 0.1433, Val Acc : 94.25%
Epoch:8 Train Loss : 0.1199, Train Acc : 95.1667%
Epoch:8 Val Loss : 0.1384, Val Acc : 94.5%
Epoch:9 Train Loss : 0.1124, Train Acc : 95.3333%
Epoch:9 Val Loss : 0.1445, Val Acc : 94.0%
Epoch:10 Train Loss : 0.1154, Train Acc : 94.75%
Epoch:10 Val Loss : 0.1287, Val Acc : 95.0%
Epoch:11 Train Loss : 0.1106, Train Acc : 95.1667%
Epoch:11 Val Loss : 0.10

In [278]:
all_preds = []
all_labels=[]
with torch.no_grad():
    model.eval()
    total_loss_test =0
    total_acc_test = 0
    for input,label in test_dataloader:
        input,label = input.to(device),label.to(device)
        pred = model(input).squeeze(1)
        probs = torch.sigmoid(pred)
        preds = probs.round()
        
        all_preds.append(preds.cpu())
        all_labels.append(label.cpu())
        
        
all_preds = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()

In [279]:
from sklearn.metrics import classification_report
cm=classification_report(all_labels,all_preds,target_names=['No Diabetes','Diabetes'])
print(cm)

              precision    recall  f1-score   support

 No Diabetes       1.00      0.96      0.98       224
    Diabetes       0.95      1.00      0.97       176

    accuracy                           0.97       400
   macro avg       0.97      0.98      0.97       400
weighted avg       0.98      0.97      0.98       400

