### Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from math import sqrt
from sklearn.model_selection import train_test_split

In [2]:
# load iris

iris = load_iris()

### Training KNN model

In [3]:
# Calculate euclidean distance between test sample and traning sample.

def calc_distances(x1, x2):
    distance = 0
    for ele in range(len(x1)):
        distance += (x2[ele] - x1[ele]) ** 2
    return sqrt(distance)

In [4]:
# Find labels of K nearest neighbors of the test sample
# Predict the label for test sample having maximum frequency without sorting the distances.

def prediction(X_train, Y_train, X_test, k):

    distances = []
    
    for train_sample in range(len(X_train)):
        
        euc = calc_distances(X_test, X_train[train_sample])
        
        if len(distances) < k:
            distances.append((X_train[train_sample], Y_train[train_sample], euc))
            
        elif len([x for x in distances if x[-1] > euc]) > 0:
            index = np.argmax([x[-1] for x in distances])
            distances[index] = (X_train[train_sample], Y_train[train_sample], euc)
    
    labels = [neighbor[1] for neighbor in distances]
    Y_pred = max(set(labels), key=labels.count)
    
    return Y_pred

In [5]:
# repeat the prediction for all samples in test dataset

def knn_model(X_train, X_test, Y_train, k):
    Y_pred = []
    for test_sample in X_test:
        Y_pred.append(prediction(X_train, Y_train, test_sample, k))
    return np.array(Y_pred) 

In [6]:
# Calculate error rates by comparing actual label for data and predicted label for the same.

def calc_error_rate(Y_pred, Y_test, dataset):
    errors = 0
    for i in range(len(Y_pred)):
        if Y_pred[i] != Y_test[i]:
            errors += 1

    error_rate = errors/len(Y_pred)
    accuracy = (1-error_rate)*100
    
    print("\n", dataset, "Dataset:")
    print("No. of Errors =", errors, "out of", len(Y_pred), "samples.")
    print("Error Rate = %.3f" %error_rate)
    print("Correct Predictions (Accuracy) = %.2f%%" %accuracy)

In [7]:
# Prediction and Error rate of Iris dataset

def iris_dataset(k):
   
    X = iris.data
    Y = iris.target
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 501, shuffle=True, stratify=Y)
    Y_pred = knn_model(X_train, X_test, Y_train, int(k))
    calc_error_rate(Y_pred, Y_test, "Iris")

print("\nFor k = 1,")
iris_dataset(1)

print("\nFor k = 3,")
iris_dataset(3)


For k = 1,

 Iris Dataset:
No. of Errors = 1 out of 30 samples.
Error Rate = 0.033
Correct Predictions (Accuracy) = 96.67%

For k = 3,

 Iris Dataset:
No. of Errors = 1 out of 30 samples.
Error Rate = 0.033
Correct Predictions (Accuracy) = 96.67%
