In [1]:
# Imports
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.decomposition import PCA

In [2]:
# Load the CIFAR-10 dataset

# The transforms library chains together transformations that we want to apply to datasets using transforms.compose()
# Here, the images are converted to tensors and scales them to between 0 and 1 using ToTensor() and then normalized with 
# a mean and std of 0.5 for each RGB channel
transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# This just downloads the dataset to the ./data directory. The train and test data are split according to the train parameter
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29977018.06it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
# Split the training and test data into images and class labels and convert them into numpy arrays


data_loader_train = torch.utils.data.DataLoader(trainset, batch_size=len(trainset), shuffle=False)
print(type(data_loader_train))
train_data = next(iter(data_loader_train))

# split the data into the images and class labels
train_images, train_labels = train_data
# Convert the tensors to numpy arrays for use since this is what the sklearn model requires
train_images = train_images.numpy()
train_labels = train_labels.numpy()
print(f"the initial dimensions of the images are {train_images.shape}")
# Flatten out the images for the distance calculations
train_images = train_images.reshape(train_images.shape[0], -1)
print(f"the new dimensions of the images are {train_images.shape}")


# Same thing as above except with the test data
data_loader_test = torch.utils.data.DataLoader(testset, batch_size=len(testset), shuffle=False)
test_data = next(iter(data_loader_test))
test_images, test_labels = test_data
test_images = test_images.numpy()
test_labels = test_labels.numpy()

# Flatten out the images for the distance calculations
test_images = test_images.reshape(test_images.shape[0], -1)

<class 'torch.utils.data.dataloader.DataLoader'>
the initial dimensions of the images are (50000, 3, 32, 32)
the new dimensions of the images are (50000, 3072)


In [4]:
#Create and train KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(train_images, train_labels)

In [5]:
# Step 3: Evaluate the model
y_pred = knn.predict(test_images)
accuracy = accuracy_score(test_labels, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")



Test Accuracy: 33.98%
