In [None]:
# preprocessing
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler



data = np.load("../features_dataset.npz")

X = data["X"]
y = data["y"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

k_values = [3, 5, 7, 9]
metrics = ['euclidean', 'manhattan','cosine']
weights_list = ['uniform', 'distance']

best_acc = -1
best_params = None
best_model = None

rejection_threshold = 0.55  # Unknown class = 6

for k in k_values:
    for metric in metrics:
        for weights in weights_list:

            knn = KNeighborsClassifier(
                n_neighbors=k,
                metric=metric,
                weights=weights
            )

            knn.fit(X_train, y_train)

            # Prediction with rejection
            probs = knn.predict_proba(X_test)
            max_probs = np.max(probs, axis=1)

            preds = np.where(
                max_probs < rejection_threshold,
                6,                  # Unknown class ID
                knn.predict(X_test)
            )

            acc = accuracy_score(y_test, preds)

            print(f"K={k}, Metric={metric}, Weights={weights} â†’ Acc={acc:.4f}")

            if acc > best_acc:
                best_acc = acc
                best_params = (k, metric, weights)
                best_model = knn


print("\nBest Model Parameters:", best_params)
print("Best Accuracy:", best_acc)


In [None]:
import joblib
joblib.dump(
    {
        "model": best_model,
        "scaler": scaler,
        "rejection_threshold": rejection_threshold,
        "best_params": best_params
    },
    "knn_best_model.pkl"
)