In [8]:
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# ---------------------------------------
# Load SAME features file used by SVM
# ---------------------------------------
data = np.load("../features_dataset.npz")

X = data["X"]
y = data["y"]

# ---------------------------------------
# Split
# ---------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# ---------------------------------------
# Scaling (IMPORTANT for KNN)
# ---------------------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# ---------------------------------------
# Hyperparameter search
# ---------------------------------------
k_values = [3, 5, 7, 9]
metrics = ['euclidean', 'manhattan','cosine']
weights_list = ['uniform', 'distance']

best_acc = -1
best_params = None
best_model = None

rejection_threshold = 0.55  # Unknown class = 6

for k in k_values:
    for metric in metrics:
        for weights in weights_list:

            knn = KNeighborsClassifier(
                n_neighbors=k,
                metric=metric,
                weights=weights
            )

            knn.fit(X_train, y_train)

            # ---- Prediction with rejection
            probs = knn.predict_proba(X_test)
            max_probs = np.max(probs, axis=1)

            preds = np.where(
                max_probs < rejection_threshold,
                6,                  # Unknown class ID
                knn.predict(X_test)
            )

            acc = accuracy_score(y_test, preds)

            print(f"K={k}, Metric={metric}, Weights={weights} → Acc={acc:.4f}")

            if acc > best_acc:
                best_acc = acc
                best_params = (k, metric, weights)
                best_model = knn

# ---------------------------------------
# Results
# ---------------------------------------
print("\nBest Model Parameters:", best_params)
print("Best Accuracy:", best_acc)

# ---------------------------------------
# Save best model + scaler
# ---------------------------------------
joblib.dump(
    {
        "model": best_model,
        "scaler": scaler,
        "rejection_threshold": rejection_threshold,
        "best_params": best_params
    },
    "knn_best_model.pkl"
)

print("\nBest KNN model saved: knn_best_model.pkl")


K=3, Metric=euclidean, Weights=uniform → Acc=0.8200
K=3, Metric=euclidean, Weights=distance → Acc=0.8200
K=3, Metric=manhattan, Weights=uniform → Acc=0.8167
K=3, Metric=manhattan, Weights=distance → Acc=0.8167
K=3, Metric=cosine, Weights=uniform → Acc=0.8583
K=3, Metric=cosine, Weights=distance → Acc=0.8667
K=5, Metric=euclidean, Weights=uniform → Acc=0.7850
K=5, Metric=euclidean, Weights=distance → Acc=0.7867
K=5, Metric=manhattan, Weights=uniform → Acc=0.7867
K=5, Metric=manhattan, Weights=distance → Acc=0.7900
K=5, Metric=cosine, Weights=uniform → Acc=0.8350
K=5, Metric=cosine, Weights=distance → Acc=0.8417
K=7, Metric=euclidean, Weights=uniform → Acc=0.7767
K=7, Metric=euclidean, Weights=distance → Acc=0.7800
K=7, Metric=manhattan, Weights=uniform → Acc=0.7667
K=7, Metric=manhattan, Weights=distance → Acc=0.7700
K=7, Metric=cosine, Weights=uniform → Acc=0.8117
K=7, Metric=cosine, Weights=distance → Acc=0.8167
K=9, Metric=euclidean, Weights=uniform → Acc=0.7533
K=9, Metric=euclidean