In [4]:
import warnings
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score


In [5]:
X_train= pd.read_csv('../data/X_train.csv')
Y_train= pd.read_csv('../data/Y_train.csv')
X_test= pd.read_csv('../data/X_test.csv')
Y_test= pd.read_csv('../data/Y_test.csv')

### Imbalanced dataset

In [6]:
Y_train['Loan Status'].value_counts()

Fully Paid     41021
Charged Off    12971
Name: Loan Status, dtype: int64

### One hot encoding

In [7]:
X_train= pd.get_dummies(X_train, drop_first= True)
Y_train= pd.get_dummies(Y_train, drop_first= True)
X_test= pd.get_dummies(X_test, drop_first= True)
Y_test= pd.get_dummies(Y_test, drop_first= True)

### ---KNN---

In [8]:
##training the model
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
y_predict = knn.predict(X_test)

In [9]:
## evaluating the model
print(confusion_matrix(Y_test,y_predict))
print("----------")
print(classification_report(Y_test, y_predict))
print("----------")
print("Score: ",knn.score(X_test, Y_test))

[[1117 2126]
 [ 659 9596]]
----------
              precision    recall  f1-score   support

           0       0.63      0.34      0.45      3243
           1       0.82      0.94      0.87     10255

    accuracy                           0.79     13498
   macro avg       0.72      0.64      0.66     13498
weighted avg       0.77      0.79      0.77     13498

----------
Score:  0.7936731367610016


### SVM

In [10]:
##training the model
svc = SVC()
svc.fit(X_train, Y_train)
y_predict = svc.predict(X_test)

In [11]:
## evaluating the model
print(confusion_matrix(Y_test,y_predict))
print("----------")
print(classification_report(Y_test, y_predict))
print("----------")
print("Score: ",svc.score(X_test, Y_test))



[[  864  2379]
 [    0 10255]]
----------
              precision    recall  f1-score   support

           0       1.00      0.27      0.42      3243
           1       0.81      1.00      0.90     10255

    accuracy                           0.82     13498
   macro avg       0.91      0.63      0.66     13498
weighted avg       0.86      0.82      0.78     13498

----------
Score:  0.8237516669136168


## Cross Validation (K-Fold)


In [12]:
scores1= cross_val_score(estimator=knn, X=X_train, y=Y_train, cv=10)
scores2= cross_val_score(estimator=svc, X=X_train, y=Y_train, cv=10)

In [14]:
scores1_list= [round(elem, 3) for elem in list(scores1)]
scores2_list= [round(elem, 3) for elem in list(scores2)]
print("KNN: ")
print("---------------")
print("scores: ", scores1_list)
print("Mean score: ", round(scores1.mean(), 3))
print("standart deviation: ", round(scores1.std(), 3))
print("")
print("SVC: ")
print("---------------")
print("scores: ", scores2_list)
print("Mean score: ", round(scores2.mean(), 3))
print("standart deviation: ", round(scores2.std(), 3))

KNN: 
---------------
scores:  [0.8, 0.797, 0.798, 0.798, 0.787, 0.793, 0.784, 0.794, 0.791, 0.789]
Mean score:  0.793
standart deviation:  0.005

SVC: 
---------------
scores:  [0.829, 0.819, 0.828, 0.828, 0.818, 0.823, 0.816, 0.823, 0.826, 0.821]
Mean score:  0.823
standart deviation:  0.004
