In [23]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
import joblib
import matplotlib.pyplot as plt

## Load Features


In [24]:
X_train = np.load("../features/X_train_scaled.npy")
X_val = np.load("../features/X_val_scaled.npy")
X_test = np.load("../features/X_test_scaled.npy")
y_train = np.load("../features/y_train.npy")
y_val = np.load("../features/y_val.npy")
y_test = np.load("../features/y_test.npy")


## Global Parameters



In [25]:
param_grid = {
    'n_neighbors': [3, 5, 7, 9 , 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

In [26]:
CLASS_MAPPING = {
    "cardboard": 0,
    "glass": 1,
    "metal": 2,
    "paper": 3,
    "plastic": 4,
    "trash": 5,
    "unknown": 6
}


## Model Training


In [27]:
grid_search = GridSearchCV(
    KNeighborsClassifier(),
    param_grid,
    cv=3,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)
grid_search.fit(X_train, y_train)

print(f"\n Best parameters: {grid_search.best_params_}")
print(f" Best CV score: {grid_search.best_score_:.4f}")

Fitting 3 folds for each of 20 candidates, totalling 60 fits

 Best parameters: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
 Best CV score: 0.8535


 0.83535354 0.84090909 0.83232323 0.84040404        nan 0.83636364
        nan 0.83838384        nan 0.84343434        nan 0.83383838
        nan 0.83181818]


## Model Evaluation

In [28]:
val_score = grid_search.score(X_val, y_val)
print(f"Validation accuracy: {val_score:.4f}")


Validation accuracy: 0.8972


## Save Model


In [29]:
# Train final model on train+val if validation score is good
if val_score >= 0.80:
    print("\nTraining final model on train+val...")
    X_combined = np.vstack([X_train, X_val])
    y_combined = np.concatenate([y_train, y_val])

    knn = KNeighborsClassifier(**grid_search.best_params_)
    knn.fit(X_combined, y_combined)
else:
    print("\nUsing train-only model (validation score too low)")
    knn = grid_search.best_estimator_


Training final model on train+val...


In [30]:
# Model Evaluation on test set
y_pred = knn.predict(X_test)
print(f"\nTest Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"\nClassification Report:\n{classification_report(y_test, y_pred)}")



Test Accuracy: 0.8947

Classification Report:
              precision    recall  f1-score   support

   cardboard       1.00      1.00      1.00        37
       glass       0.85      0.85      0.85        59
       metal       0.80      0.94      0.87        48
       paper       0.94      0.97      0.96        69
     plastic       0.95      0.75      0.84        56
       trash       0.78      0.88      0.82        16

    accuracy                           0.89       285
   macro avg       0.89      0.90      0.89       285
weighted avg       0.90      0.89      0.89       285



In [31]:
joblib.dump(knn, "../models/knn_model.pkl")


['../models/knn_model.pkl']

## Model Prediction with Rejection


In [32]:
def knn_predict_with_rejection(model, X, threshold=0.6):
    distances, indices = model.kneighbors(X)
    neighbor_labels = model._y[indices]

    preds = model.predict(X)
    final_preds = []

    for i, pred in enumerate(preds):
        votes = np.sum(neighbor_labels[i] == pred)
        confidence = votes / model.n_neighbors

        if confidence < threshold:
            final_preds.append(6)  # Unknown
        else:
            final_preds.append(CLASS_MAPPING[pred])

    return np.array(final_preds)


In [33]:
# Model Evaluation on test set
y_pred = knn_predict_with_rejection(knn, X_test)
print(f"\nTest Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"\nClassification Report:\n{classification_report(y_test, y_pred)}")



Test Accuracy: 0.0000


ValueError: Mix of label input types (string and number)