# K Nearest Neighbour Classifier

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [6]:
from sklearn.datasets import make_classification

x, y = make_classification(
    n_samples=1000, # 1000 observations 
    n_features=3, # 3 total features
     n_redundant=1,
    n_classes=2, # binary target/label 
    random_state=999 
)

In [8]:
x

array([[-0.33504974,  0.02852654,  1.16193084],
       [-1.37746253, -0.4058213 ,  0.44359618],
       [-1.04520026, -0.72334759, -3.10470423],
       ...,
       [-0.75602574, -0.51816111, -2.20382324],
       [ 0.56066316, -0.07335845, -2.15660348],
       [-1.87521902, -1.11380394, -4.04620773]])

In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.33,random_state =42)

In [12]:
from sklearn.neighbors import KNeighborsClassifier

In [14]:
knn = KNeighborsClassifier(n_neighbors=5,algorithm='auto',n_jobs=-1)
knn.fit(x_train,y_train)

In [16]:
y_pred = knn.predict(x_test)

In [18]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [20]:
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

[[158  20]
 [ 11 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



# Hyperparameter tunning

In [31]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold

params = dict(
    n_neighbors=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20],
    weights=['uniform', 'distance'],
    algorithm=['auto', 'ball_tree', 'kd_tree', 'brute'],
    leaf_size=[10, 20, 30, 40, 50],
    p=[1, 2],  # p=1 for Manhattan, p=2 for Euclidean
    metric=['minkowski', 'euclidean', 'manhattan', 'chebyshev']
)

model = KNeighborsClassifier()

cv = StratifiedKFold()

grid = GridSearchCV(estimator=model, param_grid=params, cv=cv, n_jobs=-1)


In [33]:
grid.fit(x_train,y_train)

In [37]:
grid.best_params_

{'algorithm': 'auto',
 'leaf_size': 10,
 'metric': 'minkowski',
 'n_neighbors': 9,
 'p': 2,
 'weights': 'uniform'}

In [41]:
grid_pred = grid.predict(x_test)

In [43]:
print(confusion_matrix(grid_pred,y_test))
print(accuracy_score(grid_pred,y_test))
print(classification_report(grid_pred,y_test))

[[156  16]
 [ 13 145]]
0.9121212121212121
              precision    recall  f1-score   support

           0       0.92      0.91      0.91       172
           1       0.90      0.92      0.91       158

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

