In [1]:
import os
import numpy as np
import cv2 as cv
import platform
from six.moves import cPickle as pickle

In [2]:
class NearestNeighbor(object):
    def __init__(self):
        pass

    def train(self, X, y):
        self.Xtr = X
        self.ytr = y

    def distanceMatrix(self, X):
        num_test = X.shape[0] 
        dis = np.zeros((num_test, (self.Xtr.shape[0])))  
        
        for i in range(num_test):
            dis[i]= np.sqrt(np.sum(np.square(self.Xtr - X[i,:]), axis = 1)) 
        
        return dis

    def predict_labels(self, dists, k=1):
        closest_y = []
        num_test = dists.shape[0]
        y_pred = np.zeros(num_test)
        for i in range(num_test):
            closest_y = np.argsort(dists[i])[:k]                 
        
            y_pred[i] = np.bincount(self.ytr[closest_y]).argmax()
        return y_pred

    

In [3]:
def load_CIFAR10(ROOT):
    """ load all of cifar """
    xs = []
    ys = []
    for b in range(1,6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Ytr, Xte, Yte

In [4]:
def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb') as f:
        datadict = load_pickle(f)
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
        Y = np.array(Y)
        return X, Y

In [5]:
def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2':
        return  pickle.load(f)
    elif version[0] == '3':
        return  pickle.load(f, encoding='latin1')
    raise ValueError("invalid python version: {}".format(version))

In [6]:
Xtr, Ytr, Xte, Yte = load_CIFAR10('assignment1_colab\\assignment1\\data\\cifar-10-python\\cifar-10-batches-py') # a magic function we provide
# flatten out all images to be one-dimensional
Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072
Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072


In [7]:
# assume we have Xtr_rows, Ytr, Xte_rows, Yte as before
# recall Xtr_rows is 50,000 x 3072 matrix
Xval_rows = Xtr_rows[:1000, :] # take first 1000 for validation
Yval = Ytr[:1000]
Xtr_rows = Xtr_rows[1000:, :] # keep last 49,000 for train
Ytr = Ytr[1000:]

# find hyperparameters that work best on the validation set
validation_accuracies = []

nn = NearestNeighbor()
nn.train(Xtr_rows, Ytr)

distance = nn.distanceMatrix(Xval_rows)

# Yval_predict = nn.predict_labels(distance, k = 10)

for k in [1, 3, 5, 10, 20, 50, 100]:
# use a particular value of k and evaluation on validation dat
# here we assume a modified NearestNeighbor class that can take a k as input
    Yval_predict = nn.predict_labels(distance, k = k)
    acc = np.mean(Yval_predict == Yval)
    print ('accuracy: %f' % (acc,))
# keep track of what works on the validation set
    validation_accuracies.append((k, acc))
  

accuracy: 0.352000
accuracy: 0.339000
accuracy: 0.354000
accuracy: 0.353000
accuracy: 0.334000
accuracy: 0.312000
accuracy: 0.302000
