<a href="https://colab.research.google.com/github/AcidCannon/CMPUT466-Mini-project/blob/master/task1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
from torchvision import datasets, transforms
import numpy as np

In [0]:
config = {}
config['batch_size'] = 1000
config['training_shuffle'] = True
config['validation_shuffle'] = True
config['test_shuffle'] = False
config['num_of_classes'] = 10
config['k'] = 3
config['device'] = 'gpu'
config['algorithm'] = 'K-nearest Neighbors'

In [0]:
def load_data(config):
  # split training dataset into train:validation = 45000:5000
  CIFAR10_training = datasets.CIFAR10('data', train=True, download=True,
                              transform=transforms.Compose([
                                transforms.Grayscale(num_output_channels=1),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))                           
                                ]))

  CIFAR10_test_set = datasets.CIFAR10('data', train=False, download=True,
                              transform=transforms.Compose([
                                transforms.Grayscale(num_output_channels=1), 
                                transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))
                                ]))
  
  CIFAR10_training_set, CIFAR10_validation_set = torch.utils.data.Subset(CIFAR10_training, range(0,45000)), torch.utils.data.Subset(CIFAR10_training, range(45000,50000))
    
  training_dataloader = torch.utils.data.DataLoader(CIFAR10_training_set, batch_size=config['batch_size'], shuffle=config['training_shuffle'])
  validation_dataloader = torch.utils.data.DataLoader(CIFAR10_validation_set, batch_size=config['batch_size'], shuffle=config['validation_shuffle'])
  test_dataloader = torch.utils.data.DataLoader(CIFAR10_test_set, batch_size=config['batch_size'], shuffle=config['test_shuffle'])
  
  return training_dataloader, validation_dataloader, test_dataloader

In [0]:
def knn(x_train, y_train, x_test, k, num_of_classes, device):
  y_test = np.zeros((x_test.shape[0],))
  # using tensor for hardware acceleration by using GPU support
  tensor_x_train = x_train.to(device).float()
  tensor_x_test = x_test.to(device).float()
  print(tensor_x_train.shape)
  print(tensor_x_test.shape)
  tensor_y_train = y_train.to(device)
  print(tensor_y_train.shape)

  tensor_one_hot = torch.zeros(k, num_of_classes, device=device)
  for i in range(x_test.shape[0]):
    # calculate l2 norm
    tensor_x_distance = torch.norm(tensor_x_train - tensor_x_test[i, :], dim=1)
    # find top k samples' indices
    _, tensor_x_indices = torch.topk(tensor_x_distance, k, largest=False)
    # get class
    tensor_y_class = torch.gather(tensor_y_train, 0, tensor_x_indices)
    tensor_y_class = tensor_y_class.reshape((k, 1))
    # get one-hot representation
    tensor_one_hot.zero_() # in-place initialization to speed up
    one_hot = tensor_one_hot.scatter_(1, tensor_y_class, 1)
    sum_one_hot = torch.sum(one_hot, 0)
    y_test[i] = torch.argmax(sum_one_hot)
  return y_test


In [0]:
def run(config):
  print('Using algorithm: {}.'.format(config['algorithm']))

  if config['device'] != 'cpu' and torch.cuda.is_available():
    config['device'] = torch.device('cuda')
    print('Using GPU: {}.'.format(torch.cuda.get_device_name(0)))
  else:
    config['device'] = torch.device('cpu')
    print('Using CPU.')

  print("Running...")

  training_dataloader, _, test_dataloader = load_data(config)

  x_train, y_train = None, None
  for _, (data, target) in enumerate(training_dataloader):
    x_train, y_train = data, target
    break

  x_test, y_test = None, None
  for _, (data, target) in enumerate(test_dataloader):
    x_test, y_test = data, target
    break

  x_train = x_train.reshape((x_train.shape[0], -1))
  x_test = x_test.reshape((x_test.shape[0], -1))

  predicted_y_test = knn(x_train, y_train, x_test, config['k'], config['num_of_classes'], config['device'])

In [160]:
run(config)

Using algorithm: K-nearest Neighbors.
Using GPU: Tesla K80.
Running...
Files already downloaded and verified
Files already downloaded and verified
torch.Size([1000, 1024])
torch.Size([1000, 1024])
torch.Size([1000])
