# <font color = 'orange'> K Nearest Neighbour Classifier

---

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import warnings 
warnings.filterwarnings('ignore')

---

### <font color = 'blue'> 1. Create classification Dataset

In [2]:
from sklearn.datasets import make_classification

x, y = make_classification(n_samples = 1000, n_features = 3, n_redundant = 1, n_classes = 2, random_state = 999)

---

### <font color = 'blue'> 2. Train test split

In [3]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 42)

---

### <font color = 'blue'> 3. Model Training

In [4]:
from sklearn.neighbors import KNeighborsClassifier

# algorithm may Brute Force (or) KD Tree (or) Ball Tree
knn_classifier = KNeighborsClassifier(n_neighbors = 5, algorithm = 'auto')

In [5]:
# model training

knn_classifier.fit(x_train, y_train)

KNeighborsClassifier()

---

### <font color = 'blue'> 4. Prediction

In [6]:
y_pred = knn_classifier.predict(x_test)

---

### <font color = 'blue'> 5. Preformance Metrics

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print(accuracy_score(y_test, y_pred))
print()
print(confusion_matrix(y_test, y_pred))
print()
print(classification_report(y_test, y_pred))

0.906060606060606

[[158  11]
 [ 20 141]]

              precision    recall  f1-score   support

           0       0.89      0.93      0.91       169
           1       0.93      0.88      0.90       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



---

## Internal Assignment - Perform Hyperparameter tuning and get the best K value

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
parameter = { 'n_neighbors' : [1,2,3,4,5,6,7,8,9,10]}

estimator = KNeighborsClassifier()

grid_cv = GridSearchCV(estimator, param_grid = parameter, scoring = 'accuracy', cv = 5)

grid_cv.fit(x_train, y_train)

best_param = grid_cv.best_params_

best_param

{'n_neighbors': 9}

---