<a href="https://colab.research.google.com/github/Maximus125/Predicting_Loan/blob/master/Loan_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [349]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## *Reading the data*

In [350]:
df = pd.read_csv('loan_data.csv')
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


## *Looking into the data*

In [351]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             601 non-null    object 
 2   Married            611 non-null    object 
 3   Dependents         599 non-null    object 
 4   Education          614 non-null    object 
 5   Self_Employed      582 non-null    object 
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         592 non-null    float64
 9   Loan_Amount_Term   600 non-null    float64
 10  Credit_History     564 non-null    float64
 11  Property_Area      614 non-null    object 
 12  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 62.5+ KB


In [352]:
df.shape

(614, 13)

## *Dividing the data into input features and target variables*

In [353]:
y = df['Loan_Status']
X = df.drop(['Loan_Status', 'Loan_ID'], axis=1)

In [354]:
print(X.shape)
print(y.shape)

(614, 11)
(614,)


## *From now on we will work only on training set*
## *Lets check for Null Values*

In [355]:
print(X.isnull().sum())
print(y.isnull().sum())

Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
dtype: int64
0


In [356]:
# So some features of training set has indeed Null Values
# Some features are Numerical whereas some are Categorical
# We need to deal with it accordingly

In [357]:
X['ApplicantIncome'].fillna(X['ApplicantIncome'].median(), inplace=True)
X['CoapplicantIncome'].fillna(X['CoapplicantIncome'].median(), inplace=True)
X['LoanAmount'].fillna(X['LoanAmount'].median(), inplace=True)
X['Loan_Amount_Term'].fillna(X['Loan_Amount_Term'].median(), inplace=True)
X['Credit_History'].fillna(X['Credit_History'].median(), inplace=True)

In [358]:
X.isnull().sum()
# Looking at the numerical features we see there's no null values now

Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term      0
Credit_History        0
Property_Area         0
dtype: int64

In [359]:
# Lets do the same with categorical features
X['Gender'].fillna(X['Gender'].mode()[0], inplace=True)
X['Married'].fillna(X['Married'].mode()[0], inplace=True)
X['Dependents'].fillna(X['Dependents'].mode()[0], inplace=True)
X['Self_Employed'].fillna(X['Self_Employed'].mode()[0], inplace=True)

In [360]:
X.isnull().sum()

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
dtype: int64

In [361]:
# So no Null Values in our training input set

## *Changing Categorical Features into Numerical Features*

In [362]:
X['Gender'] = X['Gender'].map({'Male':0, 'Female':1})     

#Doing the same thing for every other Categorical features

In [363]:
X['Married'] = X['Married'].map({'No':0, 'Yes':1})
X['Dependents'] = X['Dependents'].map({'0':0, '1':1, '2':2, '3+':3})
X['Education'] = X['Education'].map({'Graduate':1, 'Not Graduate':0})
X['Self_Employed'] = X['Self_Employed'].map({'No':0, 'Yes':1})
X['Property_Area'] = X['Property_Area'].map({'Rural':0, 'Semiurban':1, 'Urban':2})

In [364]:
X.head()

#Categorical features for X_train has been converted to numerical

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,0,0,0,1,0,5849,0.0,128.0,360.0,1.0,2
1,0,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0
2,0,1,0,1,1,3000,0.0,66.0,360.0,1.0,2
3,0,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2
4,0,0,0,1,0,6000,0.0,141.0,360.0,1.0,2


In [365]:
y = y.map({'Y':1, 'N':0})

#Categorical feature in the y_train is now converted into numerical

In [366]:
y

0      1
1      0
2      1
3      1
4      1
      ..
609    1
610    1
611    1
612    1
613    0
Name: Loan_Status, Length: 614, dtype: int64

In [367]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [368]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(491, 11)
(123, 11)
(491,)
(123,)


## *Defining the Neural Network Model for the problem*

In [369]:
# We will use Keras to define our model. So lets import that
import keras
keras.__version__

'2.4.3'

In [370]:
# We will also use Tensorflow at the backend. So lets imprt that also
import tensorflow as tf
tf.__version__

'2.3.0'

In [371]:
X_train.shape

(491, 11)

In [372]:
from keras import models
from keras import layers

In [373]:
# No. of Neurons in Input Layer = 12 (bcz X_train has 12 features)
# No. of Hidden Layers set to 2
# 1st Hidden Layer consists of 12 neurons
# 2nd Hidden Layer consists of 6 Neurons
# Finally Output Layer cosnsits of 1 Neuron bcz its a Binary Classfication Problem i.e. Yes/No

model = models.Sequential()
model.add(layers.InputLayer(input_shape = (11,)))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(6, activation='relu'))
model.add(layers.Dense(3, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [374]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_40 (Dense)             (None, 10)                120       
_________________________________________________________________
dense_41 (Dense)             (None, 6)                 66        
_________________________________________________________________
dense_42 (Dense)             (None, 3)                 21        
_________________________________________________________________
dense_43 (Dense)             (None, 1)                 4         
Total params: 211
Trainable params: 211
Non-trainable params: 0
_________________________________________________________________


## *Compiling the Model*

In [375]:
model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])

#### *Creating a Validation Data Set*

## *Training our Model*

In [376]:
history = model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
