# Using KNN to predict iris

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
iris = datasets.load_iris()
iris_X = iris.data
iris_Y = iris.target
iris_name_label = iris.target_names

In [3]:
print("Number of classes %d" % len((np.unique(iris_Y))))
print("Number of data points %d" % (len(iris_Y)))

Number of classes 3
Number of data points 150


In [4]:
X0 = iris_X[iris_Y == 0, :]
print(f"Samples of class 0: \n{X0[:5, :]}")

X1 = iris_X[iris_Y == 1, :]
print(f"Samples of class 1: \n{X1[:5, :]}")

X2 = iris_X[iris_Y == 2, :]
print(f"Samples of class 2: \n{X2[:5, :]}")


Samples of class 0: 
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
Samples of class 1: 
[[7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.9 1.5]
 [5.5 2.3 4.  1.3]
 [6.5 2.8 4.6 1.5]]
Samples of class 2: 
[[6.3 3.3 6.  2.5]
 [5.8 2.7 5.1 1.9]
 [7.1 3.  5.9 2.1]
 [6.3 2.9 5.6 1.8]
 [6.5 3.  5.8 2.2]]


In [5]:
x_train, x_test, y_train, y_test = train_test_split(iris_X, iris_Y, test_size=50)

print("Traning size %d" % (len(x_train)))
print("Validation size: %d" % (len(x_test)))

clf = neighbors.KNeighborsClassifier(n_neighbors = 1, p = 2)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

print("Samples of first 20 values")
print(f"Predict data:\n {y_pred}")
print(f"Ground truth:\n {y_test}")
print("Accuracy: %.2f%%" % (accuracy_score(y_pred, y_test) * 100))

Traning size 100
Validation size: 50
Samples of first 20 values
Predict data:
 [2 0 1 1 1 1 0 1 1 0 0 1 2 0 1 2 2 1 2 2 0 2 2 1 1 0 0 1 1 0 0 1 0 1 1 0 1
 0 0 0 1 2 1 0 2 0 0 1 0 2]
Ground truth:
 [1 0 2 1 1 2 0 1 1 0 0 1 2 0 1 2 2 1 2 2 0 2 2 1 1 0 0 1 1 0 0 1 0 1 1 0 1
 0 0 0 1 2 2 0 2 0 0 1 0 2]
Accuracy: 92.00%


In [8]:
def weight(distance):
    sigma2 = .5
    return np.exp(-distance**2)/sigma2
    
# major voting
# The main concept is to find more than 1 data point nearest. They will find the class that is the majority will predict this class. 
clf = neighbors.KNeighborsClassifier(n_neighbors = 1, p = 2, weights='distance')
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print("Samples of first 20 values")
print(f"Predict data:\n {y_pred}")
print(f"Ground truth:\n {y_test}")
print("Accuracy: %.2f%%" % (accuracy_score(y_pred, y_test) * 100))

Samples of first 20 values
Predict data:
 [2 0 1 1 1 1 0 1 1 0 0 1 2 0 1 2 2 1 2 2 0 2 2 1 1 0 0 1 1 0 0 1 0 1 1 0 1
 0 0 0 1 2 1 0 2 0 0 1 0 2]
Ground truth:
 [1 0 2 1 1 2 0 1 1 0 0 1 2 0 1 2 2 1 2 2 0 2 2 1 1 0 0 1 1 0 0 1 0 1 1 0 1
 0 0 0 1 2 2 0 2 0 0 1 0 2]
Accuracy: 92.00%


In [7]:
vector1 = [1, 2, 3, 4, 5, 6, 7]
vector2 = [4, 5, 6, 7, 8, 9, 10]

def norm(vect1, vect2=[], p=2):
    diff = 0
    if not vect2:
        vect2 = [0]*len(vect1)
        
    for i in range(len(vect1)):
        diff += np.abs((vect2[i] - vect1[i]))**p
    return diff**(1/p)

print(norm(vect1=vector1, p=2))
print(np.linalg.norm(vector1))
        

11.832159566199232
11.832159566199232
