In [43]:
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


In [44]:
def euclidean_distance(A, B):
    n = len(A)
    distance = 0
    for i in range(n):
        distance += (A[i] - B[i])**2
    return np.sqrt(distance)

In [45]:
def sort_by_distance(X_train, Y_train, Z): #--> X: training data, Y_train: labels, Z: query point
    n = len(X_train)
    distances = []
    for i in range(n):
        distance = euclidean_distance(X_train[i], Z)
        distances.append((distance, Y_train[i]))
    return sorted(distances, key=lambda x: x[0])
    

In [46]:
def nearest_neighbors(X_train, Y_train, Z, k):
    distances = sort_by_distance(X_train, Y_train, Z)
    return distances[ : k]
    

In [47]:
def KNN_predict(X_train, Y_train, Z, k):
    near = nearest_neighbors(X_train, Y_train, Z, k)
    labels = [label for (_, label) in near] 
    return Counter(labels).most_common(1)[0][0]
    

In [48]:



iris = load_iris()
X = iris.data          
Y = iris.target        

label_names = iris.target_names  
Y_labels = [label_names[i] for i in Y]


In [49]:

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y_labels, test_size=0.2, random_state=42
)


In [50]:
correct = 0
k = 5

for features, true_label in zip(X_test, Y_test):
    predicted_label = KNN_predict(X_train, Y_train, features, k)
    if predicted_label == true_label:
        correct += 1

accuracy = correct / len(Y_test)
print(f"Accuracy of your KNN on Iris test set: {accuracy:.2%}")


Accuracy of your KNN on Iris test set: 100.00%
