In [1]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

Data preprocessing

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
])

# Load CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# Create DataLoaders
train_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=True)

In [4]:
# Separate X and y data
X_train = []
y_train = []
X_test = []
y_test = []

for data, label in train_loader:
    X_train.append(data)
    y_train.append(label)

for data, label in test_loader:
    X_test.append(data)
    y_test.append(label)

# Convert lists to tensors
X_train = torch.cat(X_train, dim=0)
y_train = torch.cat(y_train, dim=0)
X_test = torch.cat(X_test, dim=0)
y_test = torch.cat(y_test, dim=0)

In [5]:
# Flatten all arrays
X_train_rows = X_train.reshape(X_train.shape[0], 32*32*3)
X_test_rows = X_test.reshape(X_test.shape[0], 32*32*3)

Nearest neighbour class definition

In [6]:
class NearestNeighbour():
    def __init__(self):
        pass
    def train(self, X, y):
        self.Xtr = X
        self.ytr = y
    def predict(self, X):
        num_test = X.shape[0]
        Ypred = torch.zeros(num_test, dtype=self.ytr.dtype)

        # Loop over all test rows
        for i in range(num_test):
            distances = torch.sum(torch.abs(self.Xtr - X[i]), axis=1)
            min_index = torch.argmin(distances)
            Ypred[i] = self.ytr[min_index]
        return Ypred

"Train" and run inference (runs in 46m 8s locally)

In [8]:
nn = NearestNeighbour()
nn.train(X_train_rows, y_train)
Yte_predict = nn.predict(X_test_rows)
accuracy = torch.mean((Yte_predict == y_test).float())  # Cast the boolean tensor to float
print((Yte_predict == y_test).float())
print('Accuracy: %f' % accuracy) # Prints 'Accuracy: 0.385900'

tensor([0., 0., 1.,  ..., 1., 0., 1.])
Accuracy: 0.385900


Accuracy almost 4 times better than expected from random predictions, but given it is still less than half what is possible with a CNN, I won't implement a k nearest neighbours or sweep k choices for lowest validation loss, and leave playing around with cross-validation until we get to CNNs.