 # KNN Classifier

In [1]:
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

X,y=make_classification(n_samples=1000,
    n_features=3,
    n_redundant=1,
    n_classes=2, #binary outputs
    random_state=999)

In [3]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=42)

In [4]:
from sklearn.neighbors import KNeighborsClassifier

In [5]:
KNclassifier=KNeighborsClassifier(n_neighbors=5,algorithm='auto')

In [6]:
KNclassifier.fit(X_train,y_train)

In [7]:
y_pred=KNclassifier.predict(X_test)

In [19]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report,r2_score

In [9]:
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))


[[144  18]
 [ 10 128]]
0.9066666666666666
              precision    recall  f1-score   support

           0       0.94      0.89      0.91       162
           1       0.88      0.93      0.90       138

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300



Hyperparameter Tuning

In [11]:
from sklearn.model_selection import GridSearchCV

In [13]:
param_grid={'n_neighbors':[1,2,3,4,5],
            'weights': ['uniform', 'distance'],
            'metric': ['euclidean', 'manhattan', 'minkowski']
           }

In [14]:
grid=GridSearchCV(KNeighborsClassifier(),param_grid=param_grid,refit=True,cv=5,verbose=3)

In [15]:
grid.fit(X_train,y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END metric=euclidean, n_neighbors=1, weights=uniform;, score=0.850 total time=   0.0s
[CV 2/5] END metric=euclidean, n_neighbors=1, weights=uniform;, score=0.907 total time=   0.0s
[CV 3/5] END metric=euclidean, n_neighbors=1, weights=uniform;, score=0.900 total time=   0.0s
[CV 4/5] END metric=euclidean, n_neighbors=1, weights=uniform;, score=0.871 total time=   0.0s
[CV 5/5] END metric=euclidean, n_neighbors=1, weights=uniform;, score=0.886 total time=   0.0s
[CV 1/5] END metric=euclidean, n_neighbors=1, weights=distance;, score=0.850 total time=   0.0s
[CV 2/5] END metric=euclidean, n_neighbors=1, weights=distance;, score=0.907 total time=   0.0s
[CV 3/5] END metric=euclidean, n_neighbors=1, weights=distance;, score=0.900 total time=   0.0s
[CV 4/5] END metric=euclidean, n_neighbors=1, weights=distance;, score=0.871 total time=   0.0s
[CV 5/5] END metric=euclidean, n_neighbors=1, weights=distance;, score=0.886 to

In [17]:
grid.best_params_

{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'uniform'}

In [18]:
y_pred4=grid.predict(X_test)

In [21]:
print(confusion_matrix(y_pred4,y_test))
print(accuracy_score(y_pred4,y_test))
print(classification_report(y_pred4,y_test))
print(r2_score(y_pred4,y_test))

[[139  19]
 [ 15 127]]
0.8866666666666667
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       158
           1       0.87      0.89      0.88       142

    accuracy                           0.89       300
   macro avg       0.89      0.89      0.89       300
weighted avg       0.89      0.89      0.89       300

0.5453735068639686
