# Loan Eligibility Prediction

## Data Loading

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data/proj18/Loan_Data.csv")
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


## Cleaning and Preparing Data

In [3]:
df.drop("Loan_ID", inplace=True, axis=1)
df.isna().sum()

Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             601 non-null    object 
 1   Married            611 non-null    object 
 2   Dependents         599 non-null    object 
 3   Education          614 non-null    object 
 4   Self_Employed      582 non-null    object 
 5   ApplicantIncome    614 non-null    int64  
 6   CoapplicantIncome  614 non-null    float64
 7   LoanAmount         592 non-null    float64
 8   Loan_Amount_Term   600 non-null    float64
 9   Credit_History     564 non-null    float64
 10  Property_Area      614 non-null    object 
 11  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(7)
memory usage: 57.7+ KB


In [5]:
len(df)

614

In [6]:
import numpy as np
df["Credit_History"].fillna(np.mean(df["Credit_History"]), inplace=True)
df["Loan_Amount_Term"].fillna(np.mean(df["Loan_Amount_Term"]), inplace=True)
df["LoanAmount"].fillna(np.mean(df["LoanAmount"]), inplace=True)
df["CoapplicantIncome"].fillna(np.mean(df["CoapplicantIncome"]), inplace=True)

df.isna().sum()

Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term      0
Credit_History        0
Property_Area         0
Loan_Status           0
dtype: int64

In [7]:
df["Gender"].fillna(df["Gender"].value_counts().keys()[0], inplace=True)
df["Married"].fillna(df["Married"].value_counts().keys()[0], inplace=True)
df["Dependents"].fillna(df["Dependents"].value_counts().keys()[0], inplace=True)
df["Self_Employed"].fillna(df["Self_Employed"].value_counts().keys()[0], inplace=True)

In [8]:
df.isna().sum()

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [9]:
df.keys()

Index(['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
       'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

In [10]:
from sklearn.preprocessing import LabelEncoder
for stuff in df.keys():
    if pd.api.types.is_object_dtype(df[stuff]) == True:
        le = LabelEncoder()
        df[stuff] = le.fit_transform(df[stuff])

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             614 non-null    int64  
 1   Married            614 non-null    int64  
 2   Dependents         614 non-null    int64  
 3   Education          614 non-null    int64  
 4   Self_Employed      614 non-null    int64  
 5   ApplicantIncome    614 non-null    int64  
 6   CoapplicantIncome  614 non-null    float64
 7   LoanAmount         614 non-null    float64
 8   Loan_Amount_Term   614 non-null    float64
 9   Credit_History     614 non-null    float64
 10  Property_Area      614 non-null    int64  
 11  Loan_Status        614 non-null    int64  
dtypes: float64(4), int64(8)
memory usage: 57.7 KB


In [12]:
X = df.drop("Loan_Status", axis=1)
y = df["Loan_Status"]

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
len(X_train), len(y_train), len(X_test), len(y_test)

(460, 460, 154, 154)

## Training the Model

## Kneighour Classifier

In [14]:
from sklearn.neighbors import KNeighborsClassifier

KNClf = KNeighborsClassifier()
KNClf.fit(X_train, y_train)


In [15]:
print(f"Accuracy: {(KNClf.score(X_test, y_test))*100} %")

Accuracy: 62.33766233766234 %


In [16]:
KNClf.predict([[1, 0, 1, 1, 1, 4000, 1200, 100, 32, 3, 1]])



array([1])

## Random Forest Classifier

In [17]:
from sklearn.ensemble import RandomForestClassifier

rclf = RandomForestClassifier()
rclf.fit(X_train, y_train)
rclf.score(X_test, y_test)

0.7597402597402597

## SVM

In [18]:
from sklearn import svm

svmModel = svm.SVC()
svmModel.fit(X_train, y_train)
svmModel.score(X_test, y_test)

0.7337662337662337

## Artificial Neural Network

In [20]:
import tensorflow as tf

In [84]:
model = tf.keras.models.Sequential()

In [85]:
model.add(tf.keras.layers.Dense(11, activation='relu'))

In [86]:
model.add(tf.keras.layers.Dense(11, activation='relu'))
model.add(tf.keras.layers.Dense(2, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [87]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [88]:
model.layers

[<keras.layers.core.dense.Dense at 0x7f74e57fb760>,
 <keras.layers.core.dense.Dense at 0x7f74e57c6280>,
 <keras.layers.core.dense.Dense at 0x7f74e57cd4f0>,
 <keras.layers.core.dense.Dense at 0x7f74e57fb700>]

In [89]:
history = model.fit(X, y, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
