# K Nearest Neighbour Classifier

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=3, n_redundant=1, n_classes=2, random_state=999)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5, algorithm='auto')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [5]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

[[158  11]
 [ 20 141]]
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       169
           1       0.93      0.88      0.90       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

0.906060606060606


In [6]:
# GridSearchCV
# Hyperparameter Tuning using GridSearch CV
from sklearn.model_selection import GridSearchCV

#defining parameter range
param_grid = {'n_neighbors'  : [1,2,3,4,5,6,7,8,9,10]}
grid=GridSearchCV(KNeighborsClassifier(),param_grid=param_grid, refit=True, cv=5, verbose=3)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END .....................n_neighbors=1;, score=0.873 total time=   0.0s
[CV 2/5] END .....................n_neighbors=1;, score=0.903 total time=   0.0s
[CV 3/5] END .....................n_neighbors=1;, score=0.866 total time=   0.0s
[CV 4/5] END .....................n_neighbors=1;, score=0.858 total time=   0.0s
[CV 5/5] END .....................n_neighbors=1;, score=0.888 total time=   0.0s
[CV 1/5] END .....................n_neighbors=2;, score=0.881 total time=   0.0s
[CV 2/5] END .....................n_neighbors=2;, score=0.903 total time=   0.0s
[CV 3/5] END .....................n_neighbors=2;, score=0.858 total time=   0.0s
[CV 4/5] END .....................n_neighbors=2;, score=0.843 total time=   0.0s
[CV 5/5] END .....................n_neighbors=2;, score=0.873 total time=   0.0s
[CV 1/5] END .....................n_neighbors=3;, score=0.925 total time=   0.0s
[CV 2/5] END .....................n_neighbors=3;

0,1,2
,estimator,KNeighborsClassifier()
,param_grid,"{'n_neighbors': [1, 2, ...]}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,3
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,9
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [7]:
grid.best_params_

{'n_neighbors': 9}

In [8]:
grid_prediction = grid.predict(X_test)

In [9]:
print(confusion_matrix(y_test, grid_prediction))
print(classification_report(y_test,grid_prediction))
print(accuracy_score(y_test, grid_prediction))

[[156  13]
 [ 16 145]]
              precision    recall  f1-score   support

           0       0.91      0.92      0.91       169
           1       0.92      0.90      0.91       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

0.9121212121212121


In [10]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=1000, n_features=2, noise=1, random_state=42)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [12]:
from sklearn.neighbors import KNeighborsRegressor
regressor = KNeighborsRegressor(n_neighbors=6, algorithm='auto')
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

In [13]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))

0.9907541610084923
13.942678125561427
2.2843909710467045


In [15]:
#GridSearchCV
#defining parameter range
param_grid = {'n_neighbors'  : [1,2,3,4,5,6,7,8,9,10]}
grid=GridSearchCV(KNeighborsRegressor(),param_grid=param_grid, refit=True, cv=5, verbose=3)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END .....................n_neighbors=1;, score=0.985 total time=   0.0s
[CV 2/5] END .....................n_neighbors=1;, score=0.978 total time=   0.0s
[CV 3/5] END .....................n_neighbors=1;, score=0.986 total time=   0.0s
[CV 4/5] END .....................n_neighbors=1;, score=0.992 total time=   0.0s
[CV 5/5] END .....................n_neighbors=1;, score=0.990 total time=   0.0s
[CV 1/5] END .....................n_neighbors=2;, score=0.986 total time=   0.0s
[CV 2/5] END .....................n_neighbors=2;, score=0.983 total time=   0.0s
[CV 3/5] END .....................n_neighbors=2;, score=0.987 total time=   0.0s
[CV 4/5] END .....................n_neighbors=2;, score=0.993 total time=   0.0s
[CV 5/5] END .....................n_neighbors=2;, score=0.993 total time=   0.0s
[CV 1/5] END .....................n_neighbors=3;, score=0.982 total time=   0.0s
[CV 2/5] END .....................n_neighbors=3;

0,1,2
,estimator,KNeighborsRegressor()
,param_grid,"{'n_neighbors': [1, 2, ...]}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,3
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,4
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [16]:
grid.best_params_

{'n_neighbors': 4}

In [17]:
grid_prediction = grid.predict(X_test)
print(r2_score(y_test, grid_prediction))
print(mean_squared_error(y_test, grid_prediction))
print(mean_absolute_error(y_test, grid_prediction))

0.9926575739783021
11.072341057994874
2.217110626968026
