#Finding optimal hyper-parameters for CIFAR10 Images

#Student Name:

#Student id:

In [1]:
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

epochs = 5
batch_size_train = 128
batch_size_test = 1000
learning_rate = 1e-3
momentum = 0.5
log_interval = 100
optimizer_name="Adam"

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

# Checking GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


cuda:0


## Divide CIFAR10 into training, validation and test sets
## Use DataLoader iterator for loading data in batches

In [2]:
from torch.utils.data import random_split


CIFAR10_training = torchvision.datasets.CIFAR10('/CIFAR10_dataset/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

CIFAR10_test_set = torchvision.datasets.CIFAR10('/CIFAR10_dataset/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

# create a training and a validation set
CIFAR10_training_set, CIFAR10_validation_set = random_split(CIFAR10_training, [45000, 5000])


train_loader = torch.utils.data.DataLoader(CIFAR10_training_set,batch_size=batch_size_train, shuffle=True)

validation_loader = torch.utils.data.DataLoader(CIFAR10_validation_set,batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(CIFAR10_test_set,batch_size=batch_size_test, shuffle=True)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /CIFAR10_dataset/cifar-10-python.tar.gz


170500096it [00:02, 79042015.30it/s]                               


Extracting /CIFAR10_dataset/cifar-10-python.tar.gz to /CIFAR10_dataset/
Files already downloaded and verified


In [0]:
# Multiple Linear regression
class MultipleLinearRegression(nn.Module):
    def __init__(self):
        super(MultipleLinearRegression, self).__init__()
        self.fc = nn.Linear(32*32*3, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [0]:
# Following code appears at:  https://lirnli.wordpress.com/2017/09/03/one-hot-encoding-in-pytorch/
class One_Hot(nn.Module):
    def __init__(self, depth):
        super(One_Hot,self).__init__()
        self.depth = depth
        self.ones = torch.sparse.torch.eye(depth).to(device)
    def forward(self, X_in):
        X_in = X_in.long()
        return self.ones.index_select(0,X_in.data)
    def __repr__(self):
        return self.__class__.__name__ + "({})".format(self.depth)

In [0]:
def train(multi_linear_model, learning_rate=0.0001, momentum=0.5, epochs=2, optimizer_name="Adam"):
  multi_linear_model.train()
  if optimizer_name == "Adam":
      optimizer = optim.Adam(multi_linear_model.parameters(), lr=learning_rate)
      
  elif optimizer_name == "SGD":
      optimizer = optim.SGD(multi_linear_model.parameters(), lr=learning_rate, momentum=momentum)
    
  for epoch in range(1, epochs + 1):
    for batch_idx, (data, target) in enumerate(train_loader):
      data = data.to(device)
      target = target.to(device)
      optimizer.zero_grad()
      output = multi_linear_model(data)
      loss = F.mse_loss(output, one_hot(target)) # notice the use of view_as
      loss.backward()
      optimizer.step()
      error = loss.item();
    print('EPOCH {} completed. learning_rate= {:.6f}, Training Loss: {:.4f}'.format( epoch,learning_rate,error))
  return error


In [0]:
def validation(multi_linear_model):
  multi_linear_model.eval()
  validation_loss = 0
  correct = 0
  with torch.no_grad(): # notice the use of no_grad
    for data, target in validation_loader:
      data = data.to(device)
      target = target.to(device)
      output = multi_linear_model(data)
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
      validation_loss += F.mse_loss(output, one_hot(target), size_average=False).item()
  validation_loss /= len(validation_loader.dataset)
  print('Validation set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(validation_loss, correct, len(validation_loader.dataset), 100. * correct / len(validation_loader.dataset)))
  return 100. * correct / len(validation_loader.dataset)

In [0]:
def test(multi_linear_model):
  multi_linear_model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data = data.to(device)
      target = target.to(device)
      output = multi_linear_model(data)
      test_loss += F.mse_loss(output, one_hot(target), size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  print('Test set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))

In [0]:
def tune_hyper_parameter():
  # -- Your code goes here --
  lrs = [0.00004, 0.00006, 0.00008, 0.0001]
  lrss = [0.00008, 0.0001, 0.0004, 0.0006]
  mmts = [0.9, 0.7, 0.5, 0.3]
  # Known issue: As one of the TA mentioned, epochs of 5 is not practicle for an accuracy of 38%, so we used 10 epochs instead.
  epochs = 10
  current_accuracy, max_accuracy, best_lr, best_mmt = 0, 0, 0, 0
  optimizer = "None"

  ## Perform your hyper-parameter search for Adam
  print("Performing hyper-parameter grid search for Adam...\n")
  for lr in lrs:
    multi_linear_model = MultipleLinearRegression().to(device)
    train(multi_linear_model, learning_rate=lr, epochs=epochs, optimizer_name="Adam")
    current_accuracy = validation(multi_linear_model)
    if (current_accuracy > max_accuracy):
      max_accuracy = current_accuracy
      best_lr = lr
      optimizer = "Adam"
      print("Performing test...\n")
      test(multi_linear_model)
  
  ## Perform your hyper-parameter search for SGD
  print("Performing hyper-parameter grid search for SGD...\n")
  for lr in lrss:
    for mmt in mmts:
      multi_linear_model = MultipleLinearRegression().to(device)
      train(multi_linear_model, learning_rate=lr, momentum=mmt, epochs=epochs, optimizer_name="SGD")
      current_accuracy = validation(multi_linear_model)
      if (current_accuracy > max_accuracy):
        max_accuracy = current_accuracy
        best_lr = lr
        best_mmt = mmt
        optimizer = "SGD"
        print("Performing test...\n")
        test(multi_linear_model)

  ##Final output will be like:
  
  #Best performance: Validation Accuracy=38% , with Adam optimizer learning_rate=0.??????
  
  #or
  
  #Best performance: Validation Accuracy=37% , with SGD optimizer learning_rate=0.?????? and momentum=0.???
  if optimizer == "Adam":
    print("Best performance: Validation Accuracy={}% , with Adam optimizer learning_rate={:f}\n".format(max_accuracy.item(), best_lr))
  elif optimizer == "SGD":
    print("Best performance: Validation Accuracy={}% , with SGD optimizer learning_rate={:f} and momentum={:.3f}\n".format(max_accuracy.item(), best_lr, best_mmt))
    

In [9]:
##Final Block
##Keep the output block of this section while submitting your solution 
##The last line of the output must contain the accuracy and best configuration information
multi_linear_model = MultipleLinearRegression().to(device)
one_hot = One_Hot(10).to(device)
validation(multi_linear_model)
tune_hyper_parameter()
        





Validation set: Avg. loss: 1.8523, Accuracy: 479/5000 (9%)

Performing hyper-parameter grid search for Adam...

EPOCH 1 completed. learning_rate= 0.000040, Training Loss: 0.0888
EPOCH 2 completed. learning_rate= 0.000040, Training Loss: 0.0876
EPOCH 3 completed. learning_rate= 0.000040, Training Loss: 0.0866
EPOCH 4 completed. learning_rate= 0.000040, Training Loss: 0.0878
EPOCH 5 completed. learning_rate= 0.000040, Training Loss: 0.0827
EPOCH 6 completed. learning_rate= 0.000040, Training Loss: 0.0805
EPOCH 7 completed. learning_rate= 0.000040, Training Loss: 0.0800
EPOCH 8 completed. learning_rate= 0.000040, Training Loss: 0.0765
EPOCH 9 completed. learning_rate= 0.000040, Training Loss: 0.0811
EPOCH 10 completed. learning_rate= 0.000040, Training Loss: 0.0766
Validation set: Avg. loss: 0.8095, Accuracy: 1862/5000 (37%)

Performing test...

Test set: Avg. loss: 0.8095, Accuracy: 3730/10000 (37%)

EPOCH 1 completed. learning_rate= 0.000060, Training Loss: 0.0887
EPOCH 2 completed. lea