# KNN classification

This notebook aims to solve task 4 by using a KNN classifier. 

In [1]:
import pandas as pd
from os import path
import numpy as np
from preprocessing import get_train_test_data

X_train, y_train, X_test, y_test, columns_to_keep = get_train_test_data()

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Definizione della griglia degli iperparametri
param_grid = {
    'n_neighbors': [3, 5, 7, 9],       # Numero di vicini da considerare
    'weights': ['uniform', 'distance'],  # Pesi uniformi o basati sulla distanza
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],  # Algoritmo per il calcolo dei vicini
    'metric': ['euclidean', 'manhattan', 'minkowski'],  # Metriche di distanza
    'p': [1, 2]                          # Parametro "p" per la distanza di Minkowski
}

# Modello KNN
knn = KNeighborsClassifier()

# Grid search
grid_search = GridSearchCV(
    estimator=knn,
    param_grid=param_grid,
    scoring='f1',  # Usa il punteggio F1
    cv=4,          # 4-fold cross-validation
    verbose=1,     # Stampa le informazioni
    n_jobs=-1      # Usa tutti i core disponibili
)

# Esegui la grid search
grid_search.fit(X_train, y_train)

# Miglior modello
best_model = grid_search.best_estimator_

# Stampa i migliori iperparametri
print("Best parameters:", grid_search.best_params_)

# Stampa il miglior punteggio F1
print("Best f1 score:", grid_search.best_score_)


Fitting 4 folds for each of 48 candidates, totalling 192 fits


In [None]:
test_pred_knn = best_model.predict(X_test)

In [5]:
test_pred_knn = knn.predict(X_test)

In [9]:
from sklearn.metrics import classification_report
print(classification_report(y_test, 
                            test_pred_knn, 
                            target_names=['0', '1']))

              precision    recall  f1-score   support

           0       0.87      0.93      0.90     30219
           1       0.31      0.17      0.22      5187

    accuracy                           0.82     35406
   macro avg       0.59      0.55      0.56     35406
weighted avg       0.79      0.82      0.80     35406

