In [2]:
import pandas as pd
import numpy as np
import joblib

import metrics_summary as ms

In [3]:
np.random.seed(170)

In [4]:
X_train = pd.read_csv("../data/X_train.csv")
X_test = pd.read_csv("../data/X_test.csv")
y_train = pd.read_csv("../data/y_train.csv").values.ravel()
y_test = pd.read_csv("../data/y_test.csv").values.ravel()

## KNN

In [5]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
y_prob = knn.predict_proba(X_test)[:,1]
ms.metrics_summary(y_test, y_pred, y_prob)
ms.crossval_summary(knn, X_train, y_train)
joblib.dump(knn, "knn.pkl")

[[49750   988]
 [ 5602   397]]
Kappa Score: 0.07066844221618163
Accuracy Score: 0.8838500449442163
Precision: 0.28664259927797836
Recall: 0.06617769628271379
F1 Score: 0.10752979414951247
AUC Score: 0.6074571595298731
Average Accuracy Score: 0.8841040608557004
Average Precision Score: 0.28324331271643965
Average Recall Score: 0.06287472566733017
Average F1 Score: 0.8841040608557004
[0.88390362 0.88269064 0.88412585 0.88473014 0.88507006]
Average AUC Score: 0.6040671415717143
[0.60142812 0.59739794 0.60941099 0.609951   0.60214766]


['knn.pkl']

#### KNN Using SMOTE

In [6]:
# implement SMOTE to oversample the minority class
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
knn_os = Pipeline(steps=[('over', SMOTE()), ('model', KNeighborsClassifier())])
knn_os.fit(X_train, y_train)
y_pred = knn_os.predict(X_test)
y_prob = knn_os.predict_proba(X_test)[:,1]
ms.metrics_summary(y_test, y_pred, y_prob)
ms.crossval_summary(knn_os, X_train, y_train)
joblib.dump(knn_os, "knn_os.pkl")

[[34990 15748]
 [ 2982  3017]]
Kappa Score: 0.09933835350664688
Accuracy Score: 0.6698803250083719
Precision: 0.16077804423128164
Recall: 0.5029171528588098
F1 Score: 0.24366015183330644
AUC Score: 0.6236659129945127
Average Accuracy Score: 0.6704409466511392
Average Precision Score: 0.15972077457782424
Average Recall Score: 0.49685548410146474
Average F1 Score: 0.6704409466511392
[0.67527759 0.67163954 0.6721683  0.66582317 0.66729614]
Average AUC Score: 0.6220795196658842
[0.62479353 0.61970448 0.62904944 0.6120562  0.62479395]


['knn_os.pkl']