In [7]:
# run_knn_sklearn.py
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# ---------------------------------------
# Load SAME features used by SVM
# ---------------------------------------
data = np.load("../features_dataset.npz")

X = data["X"]
y = data["y"]

# ---------------------------------------
# Split Data
# ---------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# ---------------------------------------
# Scale Features (IMPORTANT for KNN)
# ---------------------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# ---------------------------------------
# Create KNN Model (scikit-learn)
# ---------------------------------------
knn = KNeighborsClassifier(
    n_neighbors=3,
    metric="cosine",
    weights="distance"
)

# ---------------------------------------
# Train
# ---------------------------------------
knn.fit(X_train, y_train)

# ---------------------------------------
# Predict + Unknown rejection
# ---------------------------------------
probs = knn.predict_proba(X_test)
max_probs = np.max(probs, axis=1)

rejection_threshold = 0.55
y_pred = np.where(
    max_probs < rejection_threshold,
    6,                 # Unknown class ID
    knn.predict(X_test)
)

# ---------------------------------------
# Evaluate
# ---------------------------------------
print("\nKNN Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ---------------------------------------
# Save Model + Scaler
# ---------------------------------------
joblib.dump(
    {
        "model": knn,
        "scaler": scaler,
        "rejection_threshold": rejection_threshold
    },
    "knn_best_model.pkl"
)

print("\nModel saved: knn_best_model.pkl")



KNN Accuracy: 0.8666666666666667

Confusion Matrix:
 [[86  0  0  5  7  2  0]
 [ 0 85  2  2  3  3  5]
 [ 0  3 91  0  0  1  5]
 [ 4  0  1 86  3  3  3]
 [11  0  0  1 83  1  4]
 [ 1  1  1  2  5 89  1]
 [ 0  0  0  0  0  0  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.86      0.85       100
           1       0.96      0.85      0.90       100
           2       0.96      0.91      0.93       100
           3       0.90      0.86      0.88       100
           4       0.82      0.83      0.83       100
           5       0.90      0.89      0.89       100
           6       0.00      0.00      0.00         0

    accuracy                           0.87       600
   macro avg       0.77      0.74      0.75       600
weighted avg       0.90      0.87      0.88       600


Model saved: knn_best_model.pkl


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
