# Experiment 9

### SHIVAM SINGHAL 

### 2K18/CO/340

**AIM** - Write a program to implement k-Nearest Neighbor algorithm to classify any dataset of your
choice. Print both correct and wrong predictions.

In [1]:
import pandas as pd 
import numpy as np

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
header = ['sepal length in cm' , 'sepal width in cm' , 'petal length in cm' , 'petal width in cm' , 'class']

In [3]:
df = pd.read_csv(url , names = header)
df = df.replace(df['class'].unique() , [0,1,2])

In [4]:
df.head()

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## KNN Algorithm
- Calculate the distance between the query example and the current example from the data.
- Add the distance and the index of the example to an ordered collection
- Sort the ordered collection of distances and indices from smallest to largest (in ascending order) by the distances
- Pick the first K entries from the sorted collection
- Get the labels of the selected K entries
    - If regression, return the mean of the K labels
    - If classification, return the mode of the K labels

In [5]:
def distance(v1, v2):
    # Eucledian 
    return np.sqrt(((v1-v2)**2).sum())

In [6]:
def knn(X_train, Y_train , test , k):
    dist = []

    for i in range(X_train.shape[0]):
        # Compute the distance from test point
        d = distance(test, X_train[i])
        dist.append([d, Y_train[i]])
        
    # Sort based on distance 
    dk = sorted(dist, key=lambda x : x[0])
    # Retrieve only k labels
    labels = np.array(dk)[:k,-1]
    (values,counts) = np.unique(labels,return_counts=True)
    ind = np.argmax(counts)
    return values[ind]

In [7]:
X = np.array(df)[: , 0:4]
Y = np.array(df)[: , 4]

In [8]:
print(X.shape)
print(Y.shape)

(150, 4)
(150,)


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    shuffle=True, stratify=Y,
                                                    test_size=0.3, random_state=6)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((105, 4), (45, 4), (105,), (45,))

In [10]:
k = 3
y_pred = []
for i in range(X_test.shape[0]):
    answer = knn(X_train , y_train , X_test[i] ,k)
    y_pred.append(answer)

In [11]:
y_pred = np.array(y_pred)

In [12]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test , y_pred)

array([[15,  0,  0],
       [ 0, 13,  2],
       [ 0,  1, 14]], dtype=int64)

In [13]:
from sklearn.metrics import classification_report

print(classification_report(y_test , y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        15
         1.0       0.93      0.87      0.90        15
         2.0       0.88      0.93      0.90        15

    accuracy                           0.93        45
   macro avg       0.93      0.93      0.93        45
weighted avg       0.93      0.93      0.93        45



In [14]:
# Wrong Predicted 
for i in range(X_test.shape[0]):
    if y_pred[i] != y_test[i] : 
        print('Input -' , X_test[i])
        print('Predicted - ', y_pred[i])
        print('True -', y_test[i])

Input - [4.9 2.5 4.5 1.7]
Predicted -  1.0
True - 2.0
Input - [6.3 2.5 4.9 1.5]
Predicted -  2.0
True - 1.0
Input - [6.  2.7 5.1 1.6]
Predicted -  2.0
True - 1.0


In [15]:
# True Predicted 
for i in range(X_test.shape[0]):
    if y_pred[i] == y_test[i] : 
        print('Input -' , X_test[i])
        print('Predicted - ', y_pred[i])
        print('True -', y_test[i])

Input - [7.6 3.  6.6 2.1]
Predicted -  2.0
True - 2.0
Input - [6.9 3.2 5.7 2.3]
Predicted -  2.0
True - 2.0
Input - [5.7 4.4 1.5 0.4]
Predicted -  0.0
True - 0.0
Input - [6.7 3.1 4.7 1.5]
Predicted -  1.0
True - 1.0
Input - [6.2 2.2 4.5 1.5]
Predicted -  1.0
True - 1.0
Input - [5.  3.2 1.2 0.2]
Predicted -  0.0
True - 0.0
Input - [6.4 3.2 4.5 1.5]
Predicted -  1.0
True - 1.0
Input - [5.1 2.5 3.  1.1]
Predicted -  1.0
True - 1.0
Input - [6.8 3.  5.5 2.1]
Predicted -  2.0
True - 2.0
Input - [4.9 3.1 1.5 0.1]
Predicted -  0.0
True - 0.0
Input - [5.4 3.4 1.7 0.2]
Predicted -  0.0
True - 0.0
Input - [4.9 2.4 3.3 1. ]
Predicted -  1.0
True - 1.0
Input - [6.9 3.1 5.1 2.3]
Predicted -  2.0
True - 2.0
Input - [6.4 2.8 5.6 2.1]
Predicted -  2.0
True - 2.0
Input - [5.5 4.2 1.4 0.2]
Predicted -  0.0
True - 0.0
Input - [6.1 3.  4.9 1.8]
Predicted -  2.0
True - 2.0
Input - [4.4 3.2 1.3 0.2]
Predicted -  0.0
True - 0.0
Input - [6.7 3.  5.2 2.3]
Predicted -  2.0
True - 2.0
Input - [6.  3.4 4.5 1.6]
Pr