## K Nearest Neighbour Classifier

In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [32]:
from sklearn.datasets import make_classification

X,y = make_classification(
    n_samples=1000,  # number of samples
    n_features=3,   # number of features
    n_redundant=1,
    n_classes=2 ,
    random_state=999
)

In [33]:
X

array([[-0.33504974,  0.02852654,  1.16193084],
       [-1.37746253, -0.4058213 ,  0.44359618],
       [-1.04520026, -0.72334759, -3.10470423],
       ...,
       [-0.75602574, -0.51816111, -2.20382324],
       [ 0.56066316, -0.07335845, -2.15660348],
       [-1.87521902, -1.11380394, -4.04620773]])

In [34]:
from sklearn.model_selection import train_test_split

In [35]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y , test_size=0.33, random_state=42
)

In [36]:
from sklearn.neighbors import KNeighborsClassifier

In [37]:
classifier = KNeighborsClassifier(n_neighbors=5,algorithm='auto')
classifier.fit(X_train,y_train)

In [38]:
y_pred = classifier.predict(X_test)

In [39]:
from sklearn.metrics  import confusion_matrix, accuracy_score,classification_report

In [40]:
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

[[158  20]
 [ 11 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



In [41]:
classifier = KNeighborsClassifier()

In [42]:
from sklearn.model_selection import GridSearchCV

In [43]:
params = {
    'weights':['uniform', 'distance'],
    'n_neighbors':[1,2,3,4,5,6,7,8,9,10],
    'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p':[1,2],
    
    
}

In [44]:
cv = GridSearchCV(classifier,param_grid=params,refit=True,cv=5,verbose=3)

In [45]:
cv.fit(X_train,y_train)

Fitting 5 folds for each of 160 candidates, totalling 800 fits
[CV 1/5] END algorithm=auto, n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 2/5] END algorithm=auto, n_neighbors=1, p=1, weights=uniform;, score=0.896 total time=   0.0s
[CV 3/5] END algorithm=auto, n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 4/5] END algorithm=auto, n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 5/5] END algorithm=auto, n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 1/5] END algorithm=auto, n_neighbors=1, p=1, weights=distance;, score=0.881 total time=   0.0s
[CV 2/5] END algorithm=auto, n_neighbors=1, p=1, weights=distance;, score=0.896 total time=   0.0s
[CV 3/5] END algorithm=auto, n_neighbors=1, p=1, weights=distance;, score=0.881 total time=   0.0s
[CV 4/5] END algorithm=auto, n_neighbors=1, p=1, weights=distance;, score=0.881 total time=   0.0s
[CV 5/5] END algorithm=auto, n_neighbors=1, p=1, we

[CV 5/5] END algorithm=auto, n_neighbors=3, p=2, weights=uniform;, score=0.888 total time=   0.0s
[CV 1/5] END algorithm=auto, n_neighbors=3, p=2, weights=distance;, score=0.910 total time=   0.0s
[CV 2/5] END algorithm=auto, n_neighbors=3, p=2, weights=distance;, score=0.903 total time=   0.0s
[CV 3/5] END algorithm=auto, n_neighbors=3, p=2, weights=distance;, score=0.881 total time=   0.0s
[CV 4/5] END algorithm=auto, n_neighbors=3, p=2, weights=distance;, score=0.873 total time=   0.0s
[CV 5/5] END algorithm=auto, n_neighbors=3, p=2, weights=distance;, score=0.896 total time=   0.0s
[CV 1/5] END algorithm=auto, n_neighbors=4, p=1, weights=uniform;, score=0.933 total time=   0.0s
[CV 2/5] END algorithm=auto, n_neighbors=4, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 3/5] END algorithm=auto, n_neighbors=4, p=1, weights=uniform;, score=0.851 total time=   0.0s
[CV 4/5] END algorithm=auto, n_neighbors=4, p=1, weights=uniform;, score=0.873 total time=   0.0s
[CV 5/5] END al

In [46]:
cv.best_params_

{'algorithm': 'auto', 'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [49]:
classifier = KNeighborsClassifier(n_neighbors=9,algorithm='auto',p= 2, weights= 'uniform')
classifier.fit(X_train,y_train)

In [50]:
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

[[158  20]
 [ 11 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

