In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# Loading the data
data = np.load("..\Datasets\mnist_train_small.npy")

In [3]:
# Splitting the data into X and y
X = data[:, 1:]
y = data[:, 0]

In [4]:
# Splitting some data from training, and the remaining for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42)

In [5]:
# Implementation of the KNN algorithm
class ImplementKNN:
    
    # Constructor
    def __init__(self, K = 5):
        self.K = K
    
    # Training the model with data
    def fit(self, X, y):
        
        # Standardizing the values of X
        self.X = (X - X.mean()) / X.std()
        self.y = y
    
    # Predicting the class to which the point belongs to
    def predict_point(self, point):
        
        # Storing the distance of the point from all the points in the data
        distances = []
        
        # Computing the distance from each point in the dataset
        for x, y in zip(self.X, self.y):
            distance = ((point - x)** 2).sum()
            
            # Appending the distance and the class to which the point 'x' belongs to
            distances.append([distance, y])
        
        # Sorting the distances
        sorted_distances = sorted(distances)
        
        # Storing the classes of 'K' closest points
        closest_points = sorted_distances[: self.K]
        
        # Getting the unique classes which the point may belong to
        list, counts = np.unique(np.array(closest_points)[: 1], return_counts = True)
        
        # Getting the most probable class
        point_class = list[np.argmax(counts)]
        
        return point_class
    
    # Predicting the classes of all the points
    def predict(self, X):
        
        # Standardization
        X = (X - X.mean()) / X.std()
        
        # To store the predictions of all the points
        predictions = []
        
        # Performing KNN on each point
        for x in X:
            point_class = self.predict_point(x)
            predictions.append(point_class)
        
        # Returning the list of predictions for all the points
        return predictions
    
    # Finding out the accuracy of the algorithm
    def get_accuracy(self, predictions, answers):
        
        correct, total = 0, 0
        
        # Finding out the number of correct answers from the predictions
        for prediction, answer in zip(predictions, answers):
            total += 1
            
            # Noting that the answer is correct
            if prediction == answer:
                correct += 1
        
        # Computing the percentage of accuracy
        accuracy = correct / total * 100
        
        return accuracy

In [6]:
# Creating a KNN model and training it
model = ImplementKNN()
model.fit(X_train, y_train)

In [7]:
ans = model.predict(X_test[:20])

In [8]:
# Getting the accuracy of this implementation
model.get_accuracy(ans, y_test[:20])

95.0