In [20]:
%matplotlib inline
import time
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

In [2]:
scaler = StandardScaler()
train = pd.read_csv("data/final_loan_train.csv")
X_all = train.drop(columns=["loan_id","loan_success"])
Y_all = train["loan_success"]
split_size = 0.3
X_train, X_test, Y_train, Y_test = train_test_split(X_all, Y_all,test_size=split_size, random_state=42, stratify=Y_all)

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Decision Tree

In [3]:
decisionTree = DecisionTreeClassifier()
decisionTree.fit(X_train, Y_train)
Y_pred = decisionTree.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.32      0.43      0.36        14
           1       0.90      0.85      0.87        85

    accuracy                           0.79        99
   macro avg       0.61      0.64      0.62        99
weighted avg       0.82      0.79      0.80        99



## KNN

In [4]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.67      0.14      0.24        14
           1       0.88      0.99      0.93        85

    accuracy                           0.87        99
   macro avg       0.77      0.57      0.58        99
weighted avg       0.85      0.87      0.83        99



In [5]:
logreg = LogisticRegression()
logreg.fit(X_train, Y_train)
Y_pred = logreg.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.38      0.21      0.27        14
           1       0.88      0.94      0.91        85

    accuracy                           0.84        99
   macro avg       0.63      0.58      0.59        99
weighted avg       0.81      0.84      0.82        99



## SVM

In [6]:
svm = SVC(kernel='linear')
svm.fit(X_train, Y_train)
Y_pred = svm.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.60      0.21      0.32        14
           1       0.88      0.98      0.93        85

    accuracy                           0.87        99
   macro avg       0.74      0.60      0.62        99
weighted avg       0.84      0.87      0.84        99



## Naive Bayes

In [7]:
gnb = GaussianNB()
gnb.fit(X_train, Y_train)
Y_pred = gnb.predict(X_test)
print(classification_report(Y_test,Y_pred))

              precision    recall  f1-score   support

          -1       0.15      1.00      0.26        14
           1       1.00      0.06      0.11        85

    accuracy                           0.19        99
   macro avg       0.57      0.53      0.19        99
weighted avg       0.88      0.19      0.13        99



# Save result

In [18]:
model = knn

test = pd.read_csv("data/final_loan_test.csv")
X = test.drop(columns=["loan_id","loan_success"])
scaler.fit(X)
X = scaler.transform(X)
Y = model.predict(X)
test["loan_success"] = Y
test[["loan_id","loan_success"]].rename(columns={"loan_id":"Id","loan_success":"Predicted"}).to_csv("predictions/"+model.__class__.__name__+"_prediction.csv",index=False)

In [59]:
t = time.localtime()
#str(t.tm_year) + "." + str(t.tm_mon) +  "." + str(t.tm_mday) + 
t

time.struct_time(tm_year=2021, tm_mon=12, tm_mday=3, tm_hour=10, tm_min=51, tm_sec=58, tm_wday=4, tm_yday=337, tm_isdst=0)