## K Nearest Neighbors

### Importing libraries and dataset

<a href="https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_wine.html#sklearn.datasets.load_wine">
    Wine dataset
</a>

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
from sklearn.datasets import load_wine
data, target = load_wine(return_X_y=True, as_frame=True)

In [17]:
data['target'] = target
data.tail()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740.0,2
174,13.4,3.91,2.48,23.0,102.0,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750.0,2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835.0,2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840.0,2
177,14.13,4.1,2.74,24.5,96.0,2.05,0.76,0.56,1.35,9.2,0.61,1.6,560.0,2


In [59]:
data = pd.read_csv('Social_Network_Ads.csv')
data.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [60]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

### Train test split

In [61]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### Feature scaling

In [62]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [63]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Training and prediction

<a href="https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html">
    KNN
</a>

In [64]:
from sklearn.neighbors import KNeighborsClassifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)

In [65]:
knn_classifier.fit(X_train, y_train)

KNeighborsClassifier()

In [66]:
y_predicted = knn_classifier.predict(X_test)

### Hyperparameter tuning

<a href="https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV">
    Grid Search
</a>

In [67]:
from sklearn.model_selection import GridSearchCV

param_grid = [
    { 'n_neighbors': [3, 4, 5, 6] }
]
knn = KNeighborsClassifier()
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5)

In [68]:
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid=[{'n_neighbors': [3, 4, 5, 6]}])

In [69]:
y_predicted_tuned = grid_search.predict(X_test)

### Validation

In [70]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

print(confusion_matrix(y_test, y_predicted))
print(precision_score(y_test, y_predicted, average='micro'))
print(recall_score(y_test, y_predicted, average='micro'))
print(f1_score(y_test, y_predicted, average='micro'))

[[55  3]
 [ 1 21]]
0.95
0.95
0.9500000000000001


In [71]:
print(confusion_matrix(y_test, y_predicted_tuned))
print(precision_score(y_test, y_predicted_tuned, average='micro'))
print(recall_score(y_test, y_predicted_tuned, average='micro'))
print(f1_score(y_test, y_predicted_tuned, average='micro'))

print(grid_search.best_params_)

[[55  3]
 [ 1 21]]
0.95
0.95
0.9500000000000001
{'n_neighbors': 5}


#### Precision recall curve

In [76]:
from sklearn.metrics import precision_recall_curve
# Invalid for multi-class
precisions, recalls, thresholds = precision_recall_curve(y_test, y_predicted)

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
# Invalid for multi-class
fpr, tpr, thresholds = roc_curve(y_test, y_predicted)