# Shrey Viradiya
### 18BCE259

# KNN classifier using sklearn


In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
import plotly.graph_objects as go
import plotly.express as px
from collections import Counter

In [2]:
np.random.seed(259)

In [3]:
iris = load_iris()

In [4]:
print("IRIS Dataset")
print(f"Feature names: {iris['feature_names']}")
print(f"Target names: {iris['target_names']}")

IRIS Dataset
Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target names: ['setosa' 'versicolor' 'virginica']


In [5]:
X = iris['data']
y = iris['target']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=259)

### n_neighbors = 5

In [7]:
neigh5 = KNeighborsClassifier(n_neighbors = 5)
neigh5.fit(X_train, y_train)

KNeighborsClassifier()

In [8]:
prediction = neigh5.predict(X_test)

In [9]:
print("Report: ____________________________\n")
print(f"Accuracy: {accuracy_score(y_test, prediction)}")
print("Confisuion metrix:")
print(confusion_matrix(y_test, prediction))
print("Classification Report:")
print(classification_report(y_test, prediction))

Report: ____________________________

Accuracy: 0.9733333333333334
Confisuion metrix:
[[28  0  0]
 [ 0 20  1]
 [ 0  1 25]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       0.95      0.95      0.95        21
           2       0.96      0.96      0.96        26

    accuracy                           0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.97      0.97      0.97        75



### n_neighbors = 3

In [10]:
neigh3 = KNeighborsClassifier(n_neighbors = 3)
neigh3.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

In [11]:
prediction = neigh3.predict(X_test)

In [12]:
print("Report: ____________________________\n")
print(f"Accuracy: {accuracy_score(y_test, prediction)}")
print("Confisuion metrix:")
print(confusion_matrix(y_test, prediction))
print("Classification Report:")
print(classification_report(y_test, prediction))

Report: ____________________________

Accuracy: 0.9733333333333334
Confisuion metrix:
[[28  0  0]
 [ 0 20  1]
 [ 0  1 25]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       0.95      0.95      0.95        21
           2       0.96      0.96      0.96        26

    accuracy                           0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.97      0.97      0.97        75



## KNN classifier without using sklearn

In [13]:
def KNN(test_x, k = 3):
    assert k>0
    dis = []
    for i in range(X_train.shape[0]):
        dis.append([np.linalg.norm(test_x - X_train[i]), y_train[i]])

    dis.sort(key=lambda x : x[0])
    dis = np.array(dis[:k])
    c = Counter(dis[:,1])
    return int(c.most_common(1)[0][0])

In [14]:
predictions = list(map(KNN, X_test))

In [15]:
print("Report: ____________________________\n")
print(f"Accuracy: {accuracy_score(y_test, predictions)}")
print("Confisuion metrix:")
print(confusion_matrix(y_test, predictions))
print("Classification Report:")
print(classification_report(y_test, predictions))

Report: ____________________________

Accuracy: 0.9733333333333334
Confisuion metrix:
[[28  0  0]
 [ 0 20  1]
 [ 0  1 25]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       0.95      0.95      0.95        21
           2       0.96      0.96      0.96        26

    accuracy                           0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.97      0.97      0.97        75



## Exploration

In [16]:
error_input = X_test[np.array(y_test) != np.array(prediction)]
error_output = prediction[np.array(y_test) != np.array(prediction)]
expected_output = y_test[np.array(y_test) != np.array(prediction)]

In [17]:
error_input, error_output

(array([[5.9, 3.2, 4.8, 1.8],
        [6. , 2.2, 5. , 1.5]]),
 array([2, 1]))

In [18]:
fx = 2
fy = 3
clr = ['blue','green','red']
clr2 = ['cyan','chartreuse','orange']
clr3 = ['Black','Black','Black']

fig = go.Figure()

# Train Data
fig.add_trace(go.Scatter(
                x=X_train[:,fx],
                y=X_train[:,fy],
                mode='markers',
                marker = dict(
                    size=10,
                    color = list(map(lambda x: clr[x], y_train)),
                ),
                hovertext= list(map(lambda x: iris['target_names'][x], y_train))
                ))

# Test Data
fig.add_trace(go.Scatter(
                x=X_test[:,fx],
                y=X_test[:,fy],
                mode='markers',
                marker = dict(
                    size = 8,
                    color = list(map(lambda x: clr2[x], y_test))
                ),
                hovertext= list(map(lambda x: iris['target_names'][x], prediction))
                ))

# Error
fig.add_trace(go.Scatter(
                x=error_input[:,fx],
                y=error_input[:,fy],
                mode='markers',
                marker = dict(
                    color = list(map(lambda x: clr3[x], error_output))
                ),
                hovertext= list(map(lambda x: f"{iris['target_names'][x[0]]} -> {iris['target_names'][x[1]]} ", list(zip(error_output,expected_output)) ))
                ))
fig.update_layout(autosize=False,
    width=1100,
    height=600,
    title_x = 0.5,
    title_text='Classification on 2 features', 
    yaxis=dict(title=iris['feature_names'][fy]),
    xaxis=dict(title=iris['feature_names'][fx]),
    showlegend=False)
fig.show()