## Nearest Neighbor Classifier 

#### Imports 

In [1]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import cifar

#### Class

In [2]:
class NearestNeighbor:
    def __init__(self):
        pass
    
    def train(self, X, y):
        """ X is N x D where each row is an example. Y is 1-dimension of size N """
        # the nearest neighbor classifier simply remembers all the training data
        self.Xtr = X
        self.ytr = y
        
    def predict(self, X, k):
        """ X is N x D where each row is an example we wish to predict label for """
        num_test = X.shape[0]
        # lets make sure that the output type matches the input type
        Ypred = np.zeros(num_test, dtype = self.ytr.dtype)

        # loop over all test rows
        for i in range(num_test):
            # find the nearest training image to the i'th test image
            # using the L1 distance (sum of absolute value differences)
            distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)
            label_scores = [0] * 10 # prepare the labels scores
            nullifier = np.max(distances)
            # get the first k argmin distances
            min_distances = []
            for j in range(k):
                min_index = np.argmin(distances)
                min_distances.append((min_index,distances[min_index]))
                distances[min_index] += nullifier
                
            max_distance = min_distances[k - 1][1] # get the maximum from minimum distances
            for index_distance_pair in min_distances:
                index = index_distance_pair[0]
                distance = index_distance_pair[1]
                label_scores[self.ytr[index]] += max_distance - distance
                
            #Ypred[i] = self.ytr[min_index] # predict the label of the nearest example
            Ypred[i] = np.argmax(label_scores)

        return Ypred

A fast test reveals the result:

 - First --> Load data

In [3]:
# Load data
Xtr, Ytr, Xte, Yte = cifar.load_CIFAR10('data/cifar10/') # a magic function we provide
# flatten out all images to be one-dimensional
Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072
Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072

 - Then --> Predict the values

In [4]:
nn = NearestNeighbor() # create a Nearest Neighbor classifier class
nn.train(Xtr_rows, Ytr) # train the classifier on the training images and labels
Yte_predict = nn.predict(Xte_rows[:1000], 5) # predict labels on the test images
# and now print the classification accuracy, which is the average number
# of examples that are correctly predicted (i.e. label matches)
print('accuracy: %f' % ( np.mean(Yte_predict == Yte[:1000]) ))

accuracy: 0.410000
