## K Nearest Neighbour Classifier

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [5]:
from sklearn.datasets import make_classification

In [7]:
X, y = make_classification(n_classes=2, n_samples=1000, n_redundant=1, n_features=3, random_state=999)

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [11]:
from sklearn.neighbors import KNeighborsClassifier

In [13]:
classifier = KNeighborsClassifier(n_neighbors=5, algorithm='auto', p=2)
classifier.fit(X_train, y_train)

In [15]:
y_pred = classifier.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [19]:
score = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(score)
print(report)
print(cm)

0.916
              precision    recall  f1-score   support

           0       0.89      0.95      0.92       126
           1       0.95      0.88      0.91       124

    accuracy                           0.92       250
   macro avg       0.92      0.92      0.92       250
weighted avg       0.92      0.92      0.92       250

[[120   6]
 [ 15 109]]


## Grid Search CV

In [22]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [24]:
param_grid = {
    'n_neighbors' : [1,2,3,4,5,6,7,8,9,10],
    'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p' : [1, 2],
    'weights' : ['uniform', 'distance']
}

In [26]:
classifier = KNeighborsClassifier()

In [30]:
grid_search = GridSearchCV(
    estimator=classifier,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

In [32]:
grid_search.fit(X_train, y_train)

In [34]:
grid_search.best_params_

{'algorithm': 'auto', 'n_neighbors': 4, 'p': 1, 'weights': 'uniform'}

In [36]:
grid_search.best_score_

0.9026666666666667

In [38]:
best_classifier = grid_search.best_estimator_

In [40]:
test_accuracy = best_classifier.score(X_test, y_test)

In [42]:
print(test_accuracy)

0.884
