In [80]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

In [54]:
scaler = StandardScaler()
train = pd.read_csv("data/final_loan_train.csv")
X_all = train.drop(columns=["loan_id","loan_success"])
Y_all = train["loan_success"]
split_size = 0.3
X_train, X_test, Y_train, Y_test = train_test_split(X_all, Y_all,test_size=split_size, random_state=42, stratify=Y_all)

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Decision Tree

In [79]:
decisionTree = DecisionTreeClassifier()
decisionTree.fit(X_train, Y_train)
Y_pred = decisionTree.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.43      0.43      0.43        14
           1       0.91      0.91      0.91        85

    accuracy                           0.84        99
   macro avg       0.67      0.67      0.67        99
weighted avg       0.84      0.84      0.84        99



## KNN

In [38]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.00      0.00      0.00        14
           1       0.86      0.98      0.91        85

    accuracy                           0.84        99
   macro avg       0.43      0.49      0.46        99
weighted avg       0.73      0.84      0.78        99



In [42]:
logreg = LogisticRegression()
logreg.fit(X_train, Y_train)
Y_pred = logreg.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.42      0.36      0.38        14
           1       0.90      0.92      0.91        85

    accuracy                           0.84        99
   macro avg       0.66      0.64      0.65        99
weighted avg       0.83      0.84      0.83        99



## SVM

In [75]:
svm = SVC(kernel='linear')
svm.fit(X_train, Y_train)
Y_pred = svm.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.36      0.36      0.36        14
           1       0.89      0.89      0.89        85

    accuracy                           0.82        99
   macro avg       0.63      0.63      0.63        99
weighted avg       0.82      0.82      0.82        99



## Naive Bayes

In [81]:
gnb = GaussianNB()
gnb.fit(X_train, Y_train)
Y_pred = gnb.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.14      1.00      0.25        14
           1       1.00      0.02      0.05        85

    accuracy                           0.16        99
   macro avg       0.57      0.51      0.15        99
weighted avg       0.88      0.16      0.08        99



# Save result

In [82]:
model = decisionTree

In [83]:
test = pd.read_csv("data/final_loan_test.csv")
X = test.drop(columns=["loan_id","loan_success"])
scaler.fit(X)
X = scaler.transform(X)
Y = model.predict(X)
test["loan_success"] = Y
test[["loan_id","loan_success"]].rename(columns={"loan_id":"Id","loan_success":"Predicted"}).to_csv("prediction.csv",index=False)