In [1]:
# import necessary libraries
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

# Implementing KNN Classifier from Scratch

In [2]:
class KNN:
    def __init__(self, k):
        # assign the neighbour count
         self.k = k

    def euclidean_distance(self, point1, point2):
        # return the euclidean distance between two points
        distance = 0
        for i in range(len(point1)):
            distance = (point1[i] - point2[i]) ** 2
        return np.sqrt(distance)

    def fit(self, X_train, y_train):
        # assign the train and test data
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        # this method iterates over all the data points in our test dataset and stores their individual labels
        # Hint: during each iteration, you have to call the predict_label method below to get the label
        # returns the prediction of each dataset altogether as an array
        predictions = []
        for x in X_test:
            label = self.predict_label(x)  #Predicts the label for a single test point
            predictions.append(label)
        return predictions

    #this predict_label method will return the most common label for the individual data point called from the predict method
    def predict_label(self, x):
        # Compute distances to all training points
        # Sort by distance and get the indices of the nearest neighbors
        # Get the labels of the nearest neighbors
        # Return the most common class label among the neighbors
        distances = []
        for train_point in self.X_train:
            dist = self.euclidean_distance(x, train_point)
            distances.append(dist)


        sorted_indices = np.argsort(distances)
        k_indices = []
        for i in range(self.k):
            k_indices.append(sorted_indices[i])


        k_labels = []
        for i in k_indices:
            k_labels.append(self.y_train[i])


        label_counts = {}
        for label in k_labels:
            if label in label_counts:
                label_counts[label] += 1
            else:
                label_counts[label] = 1


        most_common_label = max(label_counts, key=label_counts.get)
        return most_common_label



In [3]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=41)

# Initialize KNN classifier with the value of n
knn = KNN(3)

# Fit the model
knn.fit(X_train, y_train)

# Predict on the test set
y_pred = knn.predict(X_test)

# Calculate accuracy (write down the equation to calculate accuracy, and uncomment the print function)

correct_predictions = 0

for i in range(len(y_test)):
    if y_pred[i] == y_test[i]:
        correct_predictions += 1

# Calculate accuracy
accuracy = correct_predictions / len(y_test)

print(f"Accuracy of KNN from scratch: {accuracy * 100:.2f}%")

Accuracy of KNN from scratch: 93.33%
