In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

#### Loading the Data

In [2]:
df = pd.read_csv("Bank_Personal_Loan_Modelling.csv")

In [3]:
print("Shape : ", df.shape)
df.head()

Shape :  (5000, 14)


Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


#### Data Cleaning and Feature Engineering

In [4]:
# Deleting Columns which are not necessary
df.drop(["ID", "ZIP Code"],axis=1,inplace=True)

In [5]:
print("Duplicate : ", df.duplicated().sum())
print("Null : ", df.isnull().sum().sum())

Duplicate :  13
Null :  0


In [6]:
print(df["Experience"].unique())
df["Experience"] = abs(df["Experience"])

[ 1 19 15  9  8 13 27 24 10 39  5 23 32 41 30 14 18 21 28 31 11 16 20 35
  6 25  7 12 26 37 17  2 36 29  3 22 -1 34  0 38 40 33  4 -2 42 -3 43]


In [7]:
df.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
       'Online', 'CreditCard'],
      dtype='object')

In [8]:
df = df[['Age', 'Experience', 'Income', 'Family', 'CCAvg','Education', 'Mortgage', 'Securities Account','CD Account', 'Online', 'CreditCard', 'Personal Loan']]

In [9]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1].values

### Train Test Split

In [10]:
x,y = X,Y

In [11]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=69)

In [12]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [13]:
x_train.shape, y_train.shape

((3750, 11), (3750,))

# PyTorch 

In [14]:
import torch
from torch.utils.data import DataLoader, TensorDataset

In [15]:
BATCH_SIZE = 32
LEARNING_RATE = 0.003
EPOCH = 50

In [16]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [17]:
train_x = torch.from_numpy(x_train).to(torch.float32)
train_y = torch.from_numpy(y_train).to(torch.float32)

In [18]:
train_x.shape, train_y.shape

(torch.Size([3750, 11]), torch.Size([3750]))

In [19]:
data = TensorDataset(train_x,train_y)
data = DataLoader(data,batch_size=BATCH_SIZE,shuffle=True)

## Building Model

In [20]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super(Model,self).__init__()
        
        self.layer1 = torch.nn.Linear(11,16)
        self.layer2 = torch.nn.Linear(16,1)
        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

In [21]:
model = Model()
# model.to(device)
print(model)

Model(
  (layer1): Linear(in_features=11, out_features=16, bias=True)
  (layer2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)


## Defining Loss function and Optimizer

In [22]:
loss_function = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=LEARNING_RATE)

## Training

In [23]:
training_loss = [0]*EPOCH
training_accuracy = [0]*EPOCH

for i in range(1,EPOCH+1):
    epoch_loss = 0
    accuracy = 0
    for x_batch,y_batch in data:
        # x_batch,y_batch = x_batch.to(device),y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(x_batch)
        
        loss = loss_function(y_pred,y_batch.unsqueeze(1))
        loss.backward()
        
        optimizer.step()
        epoch_loss += loss.item() * len(x_batch)
        cnt = (torch.where(y_pred>=0.5, 1, 0) == y_batch.unsqueeze(1)).sum().float()
        accuracy += cnt.item()
    if(i%10 == 0):
        print(i,epoch_loss/len(data.dataset),(accuracy)/len(data.dataset))

10 0.06467689396937688 0.9762666666666666
20 0.053047611598173776 0.9808
30 0.046935501634764176 0.9837333333333333
40 0.042700892545779544 0.9858666666666667
50 0.039998591079811255 0.9874666666666667


## Testing 

In [24]:
test_x = torch.from_numpy(x_test).to(torch.float32)
test_y = torch.from_numpy(y_test).to(torch.float32)

In [25]:
test = TensorDataset(test_x,test_y)
test = DataLoader(test,batch_size=1)

In [26]:
y_pred = model(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
accuracy = (y_pred == test_y).sum().float().item() / len(test.dataset)
print(accuracy)

0.9808


### Classification Report

In [27]:
from sklearn.metrics import classification_report

print(classification_report(y_pred,test_y))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1131
           1       0.91      0.88      0.90       119

    accuracy                           0.98      1250
   macro avg       0.95      0.94      0.94      1250
weighted avg       0.98      0.98      0.98      1250

