# K-Nearest Neighbors (KNN)

In [5]:
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [6]:
X_train = pd.read_csv("./data/X_train_scaled.csv")
X_test = pd.read_csv("./data/X_test_scaled.csv")

y_train = pd.read_csv("./data/y_train.csv")
y_test = pd.read_csv("./data/y_test.csv")

In [7]:
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

In [8]:
knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.8100558659217877


In [9]:
# Confusion Matrix

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Confusion Matrix:
 [[91 14]
 [20 54]]


In [10]:
# Classification Report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.87      0.84       105
           1       0.79      0.73      0.76        74

    accuracy                           0.81       179
   macro avg       0.81      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179



## Effect of K Value

In [11]:
accuracy_list = []

for k in range(1, 21):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    acc = accuracy_score(y_test, pred)
    accuracy_list.append(acc)

for i, acc in enumerate(accuracy_list, start=1):
    print(f"K={i}  Accuracy={acc}")

K=1  Accuracy=0.7262569832402235
K=2  Accuracy=0.7430167597765364
K=3  Accuracy=0.7597765363128491
K=4  Accuracy=0.8044692737430168
K=5  Accuracy=0.8100558659217877
K=6  Accuracy=0.8100558659217877
K=7  Accuracy=0.8268156424581006
K=8  Accuracy=0.8156424581005587
K=9  Accuracy=0.8100558659217877
K=10  Accuracy=0.8044692737430168
K=11  Accuracy=0.8156424581005587
K=12  Accuracy=0.8100558659217877
K=13  Accuracy=0.8044692737430168
K=14  Accuracy=0.8044692737430168
K=15  Accuracy=0.7932960893854749
K=16  Accuracy=0.8100558659217877
K=17  Accuracy=0.7988826815642458
K=18  Accuracy=0.8044692737430168
K=19  Accuracy=0.8044692737430168
K=20  Accuracy=0.8044692737430168


In [12]:
# Finding Best K

best_k = accuracy_list.index(max(accuracy_list)) + 1
print("Best K value:", best_k)

Best K value: 7


## Final Model with Best K

In [13]:
final_model = KNeighborsClassifier(n_neighbors=best_k)

final_model.fit(X_train, y_train)

final_pred = final_model.predict(X_test)

print("Final Accuracy:", accuracy_score(y_test, final_pred))

Final Accuracy: 0.8268156424581006
