# **Importing Libraries**

In [694]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier , RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RandomizedSearchCV
import imblearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score

import warnings
warnings.filterwarnings('ignore')

# **Splitting data into test & train sets**

In [695]:
data = pd.read_csv('loan_data_dp.csv')

In [696]:
x = data.drop(["Loan_Status"],axis=1)
x

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,1,0,0,0,0,5849,0,120,360,1,2
1,1,1,1,0,0,4583,1508,128,360,1,0
2,1,1,0,0,1,3000,0,66,360,1,2
3,1,1,0,1,0,2583,2358,120,360,1,2
4,1,0,0,0,0,6000,0,141,360,1,2
...,...,...,...,...,...,...,...,...,...,...,...
701,1,0,0,0,0,6444,472,143,360,0,0
702,1,1,1,0,0,3814,3021,161,360,1,1
703,0,0,0,1,0,2799,0,63,274,0,2
704,0,0,0,0,0,2555,0,137,360,1,2


In [697]:
y = data.Loan_Status
y

0      1
1      0
2      1
3      1
4      1
      ..
701    0
702    0
703    0
704    0
705    0
Name: Loan_Status, Length: 706, dtype: int64

In [698]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.33,random_state=42)

# **Decision Tree**

In [699]:
def decisionTree(x_train, x_test, y_train, y_test):
    dt=DecisionTreeClassifier() 
    dt.fit(x_train,y_train) 
    yPred = dt.predict(x_test)
    print('Decision Tree Classifier:') 
    print('Confusion matrix')
    print(confusion_matrix(y_test,yPred)) 
    print('Classification report') 
    print(classification_report (y_test,yPred))
    print("score")
    print(dt.score(x_test,y_test))

# **Random Forest**

In [700]:
def randomForest(x_train, x_test, y_train, y_test):
    rf = RandomForestClassifier() 
    rf.fit(x_train,y_train) 
    yPred = rf.predict(x_test) 
    print('Random Forest Classifier:') 
    print('Confusion matrix')
    print(confusion_matrix(y_test,yPred))
    print('Classification report')
    print(classification_report(y_test,yPred))
    print("score")
    print(rf.score(x_test,y_test))

# **KNN**

In [701]:
def KNN(x_train, x_test, y_train, y_test):
    knn = KNeighborsClassifier()
    knn.fit(x_train,y_train)
    yPred = knn.predict(x_test)
    print('K-Neighbors Classifier:') 
    print('Confusion matrix')
    print(confusion_matrix(y_test,yPred)) 
    print('Classification report')
    print(classification_report(y_test,yPred))
    print("score")
    print(knn.score(x_test,y_test))

# **XGBoost**

In [702]:
def xgboost(x_train, x_test, y_train, y_test):
    xg = GradientBoostingClassifier()
    xg.fit(x_train,y_train)
    yPred = xg.predict(x_test)
    print('Gradient Boosting Classifier:') 
    print('Confusion matrix')
    print(confusion_matrix(y_test,yPred)) 
    print('Classification report')
    print(classification_report(y_test,yPred))
    print("score")
    print(xg.score(x_test,y_test))

# **Comparison of Models**

In [703]:
decisionTree(x_train, x_test, y_train, y_test)

Decision Tree Classifier:
Confusion matrix
[[84 23]
 [35 91]]
Classification report
              precision    recall  f1-score   support

           0       0.71      0.79      0.74       107
           1       0.80      0.72      0.76       126

    accuracy                           0.75       233
   macro avg       0.75      0.75      0.75       233
weighted avg       0.76      0.75      0.75       233

score
0.7510729613733905


In [704]:
randomForest(x_train, x_test, y_train, y_test)

Random Forest Classifier:
Confusion matrix
[[ 83  24]
 [ 20 106]]
Classification report
              precision    recall  f1-score   support

           0       0.81      0.78      0.79       107
           1       0.82      0.84      0.83       126

    accuracy                           0.81       233
   macro avg       0.81      0.81      0.81       233
weighted avg       0.81      0.81      0.81       233

score
0.8111587982832618


In [705]:
KNN(x_train, x_test, y_train, y_test)

K-Neighbors Classifier:
Confusion matrix
[[78 29]
 [59 67]]
Classification report
              precision    recall  f1-score   support

           0       0.57      0.73      0.64       107
           1       0.70      0.53      0.60       126

    accuracy                           0.62       233
   macro avg       0.63      0.63      0.62       233
weighted avg       0.64      0.62      0.62       233

score
0.6223175965665236


In [706]:
xgboost(x_train, x_test, y_train, y_test)

Gradient Boosting Classifier:
Confusion matrix
[[ 78  29]
 [ 21 105]]
Classification report
              precision    recall  f1-score   support

           0       0.79      0.73      0.76       107
           1       0.78      0.83      0.81       126

    accuracy                           0.79       233
   macro avg       0.79      0.78      0.78       233
weighted avg       0.79      0.79      0.78       233

score
0.7854077253218884


# **Performance Evaluation**

In [707]:
from sklearn.model_selection import cross_val_score
rf = RandomForestClassifier()
rf.fit(x_train,y_train)
yPred = rf.predict(x_test)
f1_score(yPred,y_test, average='weighted')
cv = cross_val_score(rf,x,y,cv=5)
np.mean(cv)

0.8145440015982419

In [708]:
pickle.dump(rf,open('../rf.pkl','wb'))