In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
import pickle

In [2]:
scaler = MaxAbsScaler()

In [3]:
train = pd.read_csv('train.csv')

In [4]:
test = pd.read_csv('test.csv')

In [5]:
train.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,1,1,0,0,0,8.699515,2250.0,5.57973,360.0,1,1,0
1,1,1,0,0,0,7.992269,2900.0,4.875197,360.0,1,1,1
2,1,1,2,0,0,8.740337,1695.0,5.347108,360.0,1,1,1
3,1,1,0,0,0,7.641564,3150.0,4.85203,360.0,1,1,1
4,1,0,0,0,0,8.334712,0.0,4.584967,360.0,0,1,0


In [6]:
train_y = train.iloc[:,-1]

In [7]:
train_x = train.drop('Loan_Status',axis=1)

In [8]:
test_y = test.iloc[:,-1]

In [9]:
test_x = test.drop('Loan_Status',axis=1)

In [10]:
x = pd.concat([train_x,test_x],axis=0)
y = pd.concat([train_y,test_y],axis=0)

In [11]:
train_x = scaler.fit_transform(train_x)

In [12]:
test_x = scaler.transform(test_x)

# DecisionTree

In [13]:
def decisionTree(train_x,test_x,train_y,test_y):
    dt = DecisionTreeClassifier()
    dt.fit(train_x,train_y)
    y_pred = dt.predict(test_x)
    print("**** Decision Tree Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

# RandomForest

In [14]:
def randomForest(train_x,test_x,train_y,test_y):
    rf = RandomForestClassifier()
    rf.fit(train_x,train_y)
    y_pred = rf.predict(test_x)
    print("**** Random Forest Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

# Knn

In [15]:
def knn(train_x,test_x,train_y,test_y):
    knn = KNeighborsClassifier()
    knn.fit(train_x,train_y)
    y_pred = knn.predict(test_x)
    print("**** KNeighbour Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

# xgboost

In [16]:
def xgboost(train_x,test_x,train_y,test_y):
    xg = GradientBoostingClassifier()
    xg.fit(train_x,train_y)
    y_pred = xg.predict(test_x)
    print("**** Gradient Boosting Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

# Comparing The Models

In [17]:
decisionTree(train_x,test_x,train_y,test_y)

**** Decision Tree Classifier ****
Confusion Matrix
[[23 20]
 [14 66]]
Classification Report
              precision    recall  f1-score   support

           0       0.62      0.53      0.57        43
           1       0.77      0.82      0.80        80

    accuracy                           0.72       123
   macro avg       0.69      0.68      0.69       123
weighted avg       0.72      0.72      0.72       123



In [18]:
randomForest(train_x,test_x,train_y,test_y)

**** Random Forest Classifier ****
Confusion Matrix
[[18 25]
 [ 5 75]]
Classification Report
              precision    recall  f1-score   support

           0       0.78      0.42      0.55        43
           1       0.75      0.94      0.83        80

    accuracy                           0.76       123
   macro avg       0.77      0.68      0.69       123
weighted avg       0.76      0.76      0.73       123



In [19]:
knn(train_x,test_x,train_y,test_y)

**** KNeighbour Classifier ****
Confusion Matrix
[[18 25]
 [ 7 73]]
Classification Report
              precision    recall  f1-score   support

           0       0.72      0.42      0.53        43
           1       0.74      0.91      0.82        80

    accuracy                           0.74       123
   macro avg       0.73      0.67      0.67       123
weighted avg       0.74      0.74      0.72       123



In [20]:
xgboost(train_x,test_x,train_y,test_y)

**** Gradient Boosting Classifier ****
Confusion Matrix
[[19 24]
 [ 7 73]]
Classification Report
              precision    recall  f1-score   support

           0       0.73      0.44      0.55        43
           1       0.75      0.91      0.82        80

    accuracy                           0.75       123
   macro avg       0.74      0.68      0.69       123
weighted avg       0.74      0.75      0.73       123



In [27]:
rf = RandomForestClassifier()
rf.fit(train_x,train_y)
ypred = rf.predict(test_x)

In [23]:
f1_score(ypred,test_y,average='weighted')

0.8067630090820815

In [24]:
cv = cross_val_score(rf,x,y,cv=5)

In [25]:
np.mean(cv)

0.7948020791683327

In [26]:
pickle.dump(rf,open('rdf.pkl','wb'))
pickle.dump(scaler,open('scale.pkl','wb'))