In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [2]:
#save in dataframe
url="https://raw.githubusercontent.com/callxpert/datasets/master/Loan-applicant-details.csv"
names = ['Loan_ID','Gender','Married','Dependents','Education','Self_Employed','ApplicantIncome','CoapplicantIncome','LoanAmount','Loan_Amount_Term','Credit_History','Property_Area','Loan_Status']
df=pd.read_csv(url,names=names)
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128,360,1,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66,360,1,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120,360,1,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141,360,1,Urban,Y
4,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267,360,1,Urban,Y


In [3]:
#convert categorical values to integer values
number=LabelEncoder()
df['Gender']=number.fit_transform(df['Gender'].astype(str))
df['Married']=number.fit_transform(df['Married'].astype(str))
df['Dependents']=number.fit_transform(df['Dependents'].astype(str))
df['Education']=number.fit_transform(df['Education'].astype(str))
df['Self_Employed']=number.fit_transform(df['Self_Employed'].astype(str))
df['Property_Area']=number.fit_transform(df['Property_Area'].astype(str))
df['Loan_Status']=number.fit_transform(df['Loan_Status'].astype(str))
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,1,1,1,0,0,4583,1508.0,128,360,1,0,0
1,LP001005,1,1,0,0,1,3000,0.0,66,360,1,2,1
2,LP001006,1,1,0,1,0,2583,2358.0,120,360,1,2,1
3,LP001008,1,0,0,0,0,6000,0.0,141,360,1,2,1
4,LP001011,1,1,2,0,1,5417,4196.0,267,360,1,2,1


In [4]:
#predictor values
X=df[['Gender','Married','Dependents','Education','Self_Employed','ApplicantIncome','CoapplicantIncome','LoanAmount','Loan_Amount_Term','Credit_History','Property_Area']].values
#scaling of parameters
X=preprocessing.StandardScaler().fit(X).transform(X.astype(float))
X[0:5]

array([[ 0.46719815,  0.73716237,  0.21859918, -0.50325312, -0.3992747 ,
        -0.13796959, -0.02795204, -0.20808917,  0.27554157,  0.41319694,
        -1.31886834],
       [ 0.46719815,  0.73716237, -0.76203265, -0.50325312,  2.50454133,
        -0.4175358 , -0.604633  , -0.97900085,  0.27554157,  0.41319694,
         1.25977445],
       [ 0.46719815,  0.73716237, -0.76203265,  1.98707162, -0.3992747 ,
        -0.49118022,  0.29710022, -0.30756164,  0.27554157,  0.41319694,
         1.25977445],
       [ 0.46719815, -1.35655324, -0.76203265, -0.50325312, -0.3992747 ,
         0.11228014, -0.604633  , -0.04644639,  0.27554157,  0.41319694,
         1.25977445],
       [ 0.46719815,  0.73716237,  1.19923101, -0.50325312,  2.50454133,
         0.00931924,  0.99997795,  1.52024509,  0.27554157,  0.41319694,
         1.25977445]])

In [5]:
#target values
y=df['Loan_Status']
y[0:5]


0    0
1    1
2    1
3    1
4    1
Name: Loan_Status, dtype: int32

In [6]:
#splitting
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.2,random_state=4)

In [7]:
#KNN
k=5
neigh=KNeighborsClassifier(n_neighbors=k).fit(X_train,Y_train)
y_=neigh.predict(X_test)
y_

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 1])

In [8]:
#KNN Accuracy
print("Training Set Accuracy:",metrics.accuracy_score(Y_train,neigh.predict(X_train)))
print("Test Set Accuracy:",metrics.accuracy_score(Y_test,y_))
print(metrics.confusion_matrix(Y_test,y_))
print(metrics.classification_report(Y_test,y_))

Training Set Accuracy: 0.8463541666666666
Test Set Accuracy: 0.7395833333333334
[[14 21]
 [ 4 57]]
              precision    recall  f1-score   support

           0       0.78      0.40      0.53        35
           1       0.73      0.93      0.82        61

    accuracy                           0.74        96
   macro avg       0.75      0.67      0.67        96
weighted avg       0.75      0.74      0.71        96



In [9]:
#Decision Tree
tree=DecisionTreeClassifier(criterion="entropy",max_depth=4)
tree.fit(X_train,Y_train)
ptree=tree.predict(X_test)
ptree

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 1])

In [10]:
#Decision Tree Accuracy
print("Training Set Accuracy:",metrics.accuracy_score(Y_train,tree.predict(X_train)))
print("Test Set Accuracy:",metrics.accuracy_score(Y_test,ptree))
print(metrics.confusion_matrix(Y_test,ptree))
print(metrics.classification_report(Y_test,ptree))

Training Set Accuracy: 0.8567708333333334
Test Set Accuracy: 0.7291666666666666
[[13 22]
 [ 4 57]]
              precision    recall  f1-score   support

           0       0.76      0.37      0.50        35
           1       0.72      0.93      0.81        61

    accuracy                           0.73        96
   macro avg       0.74      0.65      0.66        96
weighted avg       0.74      0.73      0.70        96



In [11]:
#Logistic Regression
lr=LogisticRegression(C=0.01,solver='liblinear').fit(X_train,Y_train)
y_=lr.predict(X_test)
y_

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 1])

In [12]:
#target values probability
y_prob=lr.predict_proba(X_test)
y_prob

array([[0.41596384, 0.58403616],
       [0.37331114, 0.62668886],
       [0.32794038, 0.67205962],
       [0.29055953, 0.70944047],
       [0.30946594, 0.69053406],
       [0.37654068, 0.62345932],
       [0.34105504, 0.65894496],
       [0.68354292, 0.31645708],
       [0.40098458, 0.59901542],
       [0.32930771, 0.67069229],
       [0.73280381, 0.26719619],
       [0.38770951, 0.61229049],
       [0.3623352 , 0.6376648 ],
       [0.42625185, 0.57374815],
       [0.32182288, 0.67817712],
       [0.35163895, 0.64836105],
       [0.33343987, 0.66656013],
       [0.35186104, 0.64813896],
       [0.3478113 , 0.6521887 ],
       [0.3616733 , 0.6383267 ],
       [0.72128013, 0.27871987],
       [0.32094684, 0.67905316],
       [0.49430728, 0.50569272],
       [0.41550308, 0.58449692],
       [0.67348276, 0.32651724],
       [0.64418607, 0.35581393],
       [0.68407624, 0.31592376],
       [0.40569657, 0.59430343],
       [0.38213646, 0.61786354],
       [0.34803109, 0.65196891],
       [0.

In [13]:
#Logistic Regression Accuracy
print("Training Set Accuracy:",metrics.accuracy_score(Y_train,lr.predict(X_train)))
print("Testing Set Accuracy:",metrics.accuracy_score(Y_test,y_))
print(metrics.confusion_matrix(Y_test,y_))
print(metrics.classification_report(Y_test,y_))

Training Set Accuracy: 0.8255208333333334
Testing Set Accuracy: 0.75
[[13 22]
 [ 2 59]]
              precision    recall  f1-score   support

           0       0.87      0.37      0.52        35
           1       0.73      0.97      0.83        61

    accuracy                           0.75        96
   macro avg       0.80      0.67      0.68        96
weighted avg       0.78      0.75      0.72        96

