In [1]:
import tensorflow as tf
import numpy as np

### Prepare the training data and test data

In [13]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train_rows = x_train.reshape(x_train.shape[0], -1)
x_test_rows = x_test.reshape(x_test.shape[0], -1)

(10000, 3072)

In [None]:
# here we separate 1000 data to be the validation/development set
x_val = x_train_rows[:1000]
y_val = y_train[:1000]
# the remaining data as the training set
x_train_rows = x_train_rows[1000:]
y_train = y_train[1000:]

### RUN THIS: Utils for Nearest Neighbor Classifier

In [14]:
class NearestNeighbor():
    def __init__(self):
        pass

    def train(self, X, y):
        """
        Training the Nearest Neighbor Classifier by
        :param X: training set data (training samples are ranged row by row)
        :param y: training set labels
        """
        self.x_train = X
        self.y_train = y

    def predict(self, X, loss = "l1"): 
        """
        Predict the labels depending on the given data
        :param X:
        :param loss: the loss function. l1 - l1 norm, l2 - l2 norm
        :return:
        """
        testset_num = X.shape[0]
        y_pred = np.zeros(X.shape[0], dtype = self.y_train.dtype)

        for i in range(testset_num):
            # broadcasting to the x_train shape
            if (loss == "l2"):
                distances = np.sqrt(np.sum(np.square(self.x_train - X[i]), axis = 1))
            else:
                distances = np.sum(np.abs(self.x_train - X[i]), axis = 1)
            min_index = np.argmin(distances)
            y_pred[i] = self.y_train[min_index]

        return y_pred

### Now we've all images stretched out as rows.

### We're going to train the Nearest Neighbor classifier

In [15]:
# initialize a classifier
nearest_neighbor_classifier = NearestNeighbor()

# training the model
nearest_neighbor_classifier.train(x_train_rows, y_train)

# predict on the testset and evaluate the result
# Note: May need a long time without training with GPUs
y_pred = nearest_neighbor_classifier.predict(x_test_rows)
print(f"Accuracy: {np.mean(y_pred == y_test)}")

KeyboardInterrupt: 

In [None]:
# evaluate the model by L2 norm
y_pred = nearest_neighbor_classifier.predict(x_test_rows, loss = "l2")

### Tuning hyper parameter K
Find different hyperparameters' performance

In [None]:
validation_accuracy = []
for k in [1, 3, 5, 10, 20, 50, 100]:
    nn = NearestNeighbor()
    nn.train(x_train_rows, y_train)
    y_pred = nn.predict(x_val, k = k) # we assume we overload the predict function that can
    # accept a hyperparameter k
    acc = np.mean(y_pred == y_test)
    print(f"Accuracy: {acc}")

    validation_accuracy.append((k, acc))