In [1]:
import pandas as pd
import numpy as np
import time

In [2]:
train = pd.read_csv('MNIST_train_small.csv', sep=',', header=None).values
test = pd.read_csv('MNIST_test_small.csv', sep=',', header=None).values
train_X = train[:, 1:]
train_Y = train[:, 0]
test_X = test[:, 1:]
test_Y = test[:, 0]

In [3]:
def euclidean_dist(vec1, vec2):
    return np.linalg.norm(vec1 - vec2)

In [4]:
def precompute_distances(X_train):
    dists = {}
    for x in range(len(X_train)):
        for y in range(len(X_train)):
            if x != y:
                dists[(x, y)] = euclidean_dist(X_train[x], X_train[y])
    return dists

In [5]:
# precomputed_distances = precompute_distances(train_X)

In [6]:
def get_accuracy(method, knn, train_X, train_Y, test_X, test_Y): 
    start_time = int(round(time.time() * 1000))
    correct = 0
    for x in range(len(test_X)):
        true_label = test_Y[x]
        predicted_label = method(knn, train_X, train_Y, test_X[x])
        if true_label == predicted_label:
            correct += 1
        if x % 100 == 0:
            print(f'Current accuracy: {correct / (x + 1)}')
            print(f'Points tested: {x + 1}, average time per point: {(int(round(time.time() * 1000)) - start_time) / (x + 1):.2f}ms\n')
    return correct / len(test_X)

In [7]:
def naive_knn(knn, train_X, train_Y, example):
    best_dists = np.array([euclidean_dist(train_X[x], example) for x in range(knn)])
    indices = np.array(range(knn))
    
    for i in range(knn, len(train_X)):
        dist = euclidean_dist(train_X[i], example)
        if dist < np.max(best_dists):
            index_to_replace = np.argmax(best_dists)
            best_dists[index_to_replace] = dist;
            indices[index_to_replace] = i
    
    # voting
    nearest_classes = [train_Y[i] for i in indices]
    predicted = np.argmax(np.bincount(nearest_classes))
    
    return predicted

In [8]:
from tqdm import tqdm, tqdm_notebook

In [9]:
# accuracies = []
# for i in tqdm_notebook(range(1,20)):
#     acc = get_accuracy(naive_knn, i, train_X, train_Y, test_X, test_Y)
#     accuracies.append(acc)