<a href="https://colab.research.google.com/github/Nagalakshmi136/loan_prediction/blob/main/loan_prediction_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


Connecting to google drive:

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Importing required packages:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,classification_report

Loading data from google drive to dataframe:

In [None]:
df = pd.read_csv('/content/drive/MyDrive/dataset/loan_approval_dataset.csv')
print(df)

      loan_id   no_of_dependents      education  self_employed   income_annum  \
0           1                  2       Graduate             No        9600000   
1           2                  0   Not Graduate            Yes        4100000   
2           3                  3       Graduate             No        9100000   
3           4                  3       Graduate             No        8200000   
4           5                  5   Not Graduate            Yes        9800000   
...       ...                ...            ...            ...            ...   
4264     4265                  5       Graduate            Yes        1000000   
4265     4266                  0   Not Graduate            Yes        3300000   
4266     4267                  2   Not Graduate             No        6500000   
4267     4268                  1   Not Graduate             No        4100000   
4268     4269                  1       Graduate             No        9200000   

       loan_amount   loan_t

Assigning values to categorical data:

In [None]:
df[' education'].replace(' Graduate',2,inplace=True)
df[' education'].replace(' Not Graduate',1,inplace=True)
df[' self_employed'].replace(' No',1,inplace=True)
df[' self_employed'].replace(' Yes',2,inplace=True)
df[' loan_status'].replace(' Approved',1,inplace=True)
df[' loan_status'].replace(' Rejected',0,inplace=True)

Normalization of data:

In [None]:
data = df.to_numpy()
max_arr = np.max(data, axis = 0, keepdims = True)
min_arr = np.min(data, axis = 0, keepdims = True)
max_min = max_arr - min_arr
data = (data - min_arr)/max_min
print(data)


[[0.00000000e+00 4.00000000e-01 1.00000000e+00 ... 5.75835476e-01
  5.44217687e-01 1.00000000e+00]
 [2.34301781e-04 0.00000000e+00 0.00000000e+00 ... 2.18508997e-01
  2.24489796e-01 0.00000000e+00]
 [4.68603561e-04 6.00000000e-01 1.00000000e+00 ... 8.48329049e-01
  8.70748299e-01 0.00000000e+00]
 ...
 [9.99531396e-01 4.00000000e-01 0.00000000e+00 ... 4.57583548e-01
  4.96598639e-01 0.00000000e+00]
 [9.99765698e-01 2.00000000e-01 0.00000000e+00 ... 3.54755784e-01
  3.94557823e-01 1.00000000e+00]
 [1.00000000e+00 2.00000000e-01 1.00000000e+00 ... 9.10025707e-01
  8.16326531e-01 1.00000000e+00]]


splitting into train and test datasets:

In [None]:
x_train = data[0:3000,1:12].T
y_train = data[0:3000:,12:].T
x_test = data[3000:,1:12].T
y_test = data[3000:,12:].T
nx, m = x_train.shape

Methods used in training of model:

In [None]:
def init_params():
  w1 = np.random.rand(nx,2)
  b1 = np.random.rand(2,1)
  w2 = np.random.rand(2,1)
  b2 = 0
  alpha = 0.02
  itr = 5000
  return w1,b1,w2,b2,alpha,itr

def sigmoid(z):
  return 1/(1+np.exp(-z))

def relu(z):
  return np.maximum(z,0)

def drelu(z):
  return z>0

def forward_prop(w1,b1,w2,b2,x):
  z1 = np.dot(w1.T,x)+b1
  a1 = relu(z1)
  z2 = np.dot(w2.T,a1)+b2
  a2 = sigmoid(z2)
  return z1,a1,z2,a2

def backward_prop(w2,z1,a1,z2,a2):
  dz2 = a2-y_train
  dw2 = 1/m*np.matmul(a1,dz2.T)
  db2 = 1/m*np.sum(dz2, axis=1, keepdims=True)
  dz1 = np.matmul(w2,dz2)*drelu(z1)
  dw1 = 1/m*np.matmul(x_train,dz1.T)
  db1 = 1/m*np.sum(dz1, axis=1, keepdims=True)
  return dw1,db1,dw2,db2

def update_params(w1,b1,w2,b2,dw1,db1,dw2,db2,alpha):
  w1 -= alpha*dw1
  b1 -= alpha*db1
  w2 -= alpha*dw2
  b2 -= alpha*db2
  return w1,b1,w2,b2


Training of model:

In [None]:
def get_prediction(a):
  pre_out = [[1 if x>0.5 else 0 for x in a[0]]]
  return pre_out

def get_accuracy(a,y):
  return np.sum(a-y == 0)/y.shape[1]

def gradient_descendent(w1,b1,w2,b2,alpha,itr):
  for i in range(0,itr):
    z1,a1,z2,a2 = forward_prop(w1,b1,w2,b2,x_train)
    dw1,db1,dw2,db2 = backward_prop(w2,z1,a1,z2,a2)
    w1,b1,w2,b2 = update_params(w1,b1,w2,b2,dw1,db1,dw2,db2,alpha)
    if i%500 == 0:
      pre = get_prediction(a2)
      print("accuracy: ",get_accuracy(pre,y_train))
  return w1,b1,w2,b2

In [None]:
w1,b1,w2,b2,alpha,itr = init_params()
w1,b1,w2,b2 = gradient_descendent(w1,b1,w2,b2,alpha,itr)

accuracy:  0.621
accuracy:  0.6223333333333333
accuracy:  0.6633333333333333
accuracy:  0.85
accuracy:  0.9023333333333333
accuracy:  0.9143333333333333
accuracy:  0.92
accuracy:  0.923
accuracy:  0.9263333333333333
accuracy:  0.9283333333333333


In [None]:
z1,a1,z2,a2 = forward_prop(w1,b1,w2,b2,x_test)
res = get_prediction(a2)
print('accuracy: ',get_accuracy(res,y_test))

accuracy:  0.9180457052797478
