In [1]:
from math import sqrt
import numpy as np
from random import randrange

class KNearestNeighbour():
    
    # Intializing values
    def __init__(self, k):
        self.k = k
        self.X_train = []
        self.y_train = []
        
        
    # Evaluate an algorithm using a train/test split
    def train_test_split(self,X, y, split):

        X_train = list()
        y_train = list()
        train_size = split * len(X)
        X_test = list(X)
        y_test = list(y)
        
        while len(X_train) < train_size:
            index = randrange(len(X_test))
            X_train.append(X_test.pop(index))
            y_train.append(y_test.pop(index))
        
        return X_train,X_test,y_train,y_test
        
    # Fitting model
    def fit(self,X_train,y_train):
        
        self.X_train = X_train
        self.y_train = y_train
        
    
    # Finding euclidean distance
    def euclidean_distance(self,v1,v2):
        
        v1,v2 = np.array(v1),np.array(v2)
        distance = 0
        
        for i in range(len(v1) - 1):
            distance = distance + ((v1[i] - v2[i])**2)
        
        return np.sqrt(distance)
    
    # Predicting value
    def predict(self, test_instance):
        
        distances = []
        
        for i in range(len(self.X_train)):
            dist = self.euclidean_distance(self.X_train[i],test_instance)
            distances.append((self.y_train[i],dist))
        distances.sort(key=lambda x: x[1])
        
        return distances[0][0]
        
    # Calculate score
    def score(self,X_test,y_test):
        n_correct = 0
        y_pred = []
        
        for i in range(len(y_test)):
            y_pred.append(self.predict(X_test[i]))
        
        for act, pred in zip(y_test, y_pred):
            if act == pred:
                n_correct = n_correct + 1
        
        return n_correct / len(y_test)

if __name__ == "__main__":
    
    from sklearn.datasets import load_digits
    data = load_digits().data
    images = load_digits().images
    targets = load_digits().target
    
    X = images.reshape(-1,64)
    y = targets
    
    model = KNearestNeighbour(5)
    X_train, X_test, y_train, y_test = model.train_test_split(X,y,.5)
    model.fit(X_train,y_train)
    
    print(model.predict([ 0.,  0.,  3., 13., 16., 16., 13.,  0.,  0.,  0., 11., 14.,  8.,
        15.,  9.,  0.,  0.,  0.,  3.,  5.,  2., 14.,  2.,  0.,  0.,  0.,
         0.,  0.,  9., 11.,  1.,  0.,  0.,  0.,  2., 15., 15., 16.,  9.,
         0.,  0.,  0.,  2., 15., 14.,  8.,  2.,  0.,  0.,  0.,  0., 11.,
        10.,  0.,  0.,  0.,  0.,  0.,  2., 15.,  4.,  0.,  0.,  0.]))
    
    print(model.score(X_test,y_test))

7
0.9832962138084632
