In [None]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
X = pd.read_csv("Resources/clean_churn_db.csv", low_memory=False)
X.head()

In [None]:
y = pd.read_csv("Resources/BankChurners.csv", low_memory=False)
y.head()

In [None]:
y = y["Attrition_Flag"].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler()
X_scaler.fit(X_train)

In [None]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    train_score = knn.score(X_train_scaled, y_train)
    test_score = knn.score(X_test_scaled, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")
    
    
plt.plot(range(1, 20, 2), train_scores, marker='o')
plt.plot(range(1, 20, 2), test_scores, marker="x")
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy score")
plt.show()

In [None]:
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train_scaled, y_train)
print('k=5 Test Acc: %.3f' % knn.score(X_test_scaled, y_test))

In [None]:
y_true = y_test
y_pred = knn.predict(X_test_scaled)

In [None]:
confusion_matrix(y_true, y_pred)

In [None]:
print(classification_report(y_true, y_pred))

In [None]:
knn.predict(X_test_scaled[0:10])

In [None]:
knn.predict_proba(X_test_scaled[0:10])

In [None]:
knn.predict_proba(X_test_scaled[0:10])[:,1]

In [None]:
# Import the roc_curve module from scikit-learn
from sklearn.metrics import roc_curve

In [None]:
y_pred_proba_knn = knn.predict_proba(X_test_scaled)[:,1]
fpr_knn, tpr_knn, thresholds_knn = roc_curve(y_test, y_pred_proba_knn)

In [None]:
plt.figure()
plt.plot(fpr_knn, tpr_knn, color='darkorange')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - KNN Model')
plt.show()

In [None]:
# Import the auc module from scikit-learn
from sklearn.metrics import auc

In [None]:
# Compute the AUC for the KNN model
roc_auc_knn = auc(fpr_knn, tpr_knn)
print(f"AUC for the KNN Model: {roc_auc_knn}")