# K Nearest Neighbour

# 1.) KNN Classification

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [2]:
from sklearn.datasets import make_classification

x, y = make_classification(n_samples=1000, n_features= 5, n_redundant= 1,
                           n_classes= 2, random_state= 42)

In [5]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.30, random_state = 42)

In [6]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((700, 5), (300, 5), (700,), (300,))

In [7]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors= 5, algorithm= 'auto')
knn

In [8]:
knn.fit(x_train, y_train)

In [9]:
y_pred = knn.predict(x_test)

In [10]:
# Performance metrics
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print("confusion matrix:\n", confusion_matrix(y_test, y_pred))

print("accuracy score:", accuracy_score(y_test, y_pred))

print("classification report:\n", classification_report(y_test, y_pred))

confusion matrix:
 [[138  14]
 [ 25 123]]
accuracy score: 0.87
classification report:
               precision    recall  f1-score   support

           0       0.85      0.91      0.88       152
           1       0.90      0.83      0.86       148

    accuracy                           0.87       300
   macro avg       0.87      0.87      0.87       300
weighted avg       0.87      0.87      0.87       300



## Hyperparameter tuning

In [11]:
parameters = {'n_neighbors' : [1,2,3,4,5,6,7,8,9,10]}

In [30]:
from sklearn.model_selection import GridSearchCV

knn_cv = GridSearchCV(knn, parameters, cv = 3, scoring = 'accuracy')
knn_cv

In [31]:
knn_cv.fit(x_train, y_train)

In [32]:
knn_cv.best_params_

{'n_neighbors': 7}

In [33]:
knn_cv.best_score_

0.889983248352347

### Creating a KNN model with the best parameters

In [37]:
knn_best = KNeighborsClassifier(n_neighbors= 7, algorithm= 'auto')
knn_best

In [38]:
knn_best.fit(x_train, y_train)

In [39]:
y_pred_best = knn_best.predict(x_test)

print("confusion matrix:\n", confusion_matrix(y_test, y_pred_best))

print("accuracy score:", accuracy_score(y_test, y_pred_best))

print("classification report:\n", classification_report(y_test, y_pred_best))


confusion matrix:
 [[137  15]
 [ 26 122]]
accuracy score: 0.8633333333333333
classification report:
               precision    recall  f1-score   support

           0       0.84      0.90      0.87       152
           1       0.89      0.82      0.86       148

    accuracy                           0.86       300
   macro avg       0.87      0.86      0.86       300
weighted avg       0.87      0.86      0.86       300



Analysis: In hyperparameter tuning I got K = 7, When I applied that, I got lesser accuracy than before.

# 2.) KNN Regression

In [40]:
from sklearn.datasets import make_regression

x1, y1 = make_regression(n_samples = 100, n_features=4, noise =10, random_state = 42)

In [42]:
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size = 0.33, random_state = 42)

In [43]:
from sklearn.neighbors import KNeighborsRegressor

In [45]:
knn_regressor = KNeighborsRegressor(n_neighbors = 5, algorithm = 'auto')
knn_regressor

In [46]:
knn_regressor.fit(x1_train, y1_train)

In [50]:
y1_pred = knn_regressor.predict(x1_test)

In [55]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

print("mean squared error:", mean_squared_error(y1_test, y1_pred))

print("mean absolute error:", mean_absolute_error(y1_test, y1_pred))

print("r2 score:", r2_score(y1_test, y1_pred))

mean squared error: 858.2572724130301
mean absolute error: 23.05034138537195
r2 score: 0.8618769501597305
