**Install requirements**

In [1]:
#!pip3 install 'torch==1.3.1'
#!pip3 install 'torchvision==0.5.0'
#!pip3 install 'Pillow-SIMD'
#!pip3 install 'tqdm'

**Import libraries**

In [2]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt

**Set arguments**


In [3]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 7 

ALPHA = 0.5

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20      # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.001          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 5

**Define Data Preprocessing**

In [4]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

**Prepare Dataset**

In [5]:
# Clone github repository with data
if not os.path.isdir('./pacs'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS
  !mv 'Homework3-PACS' 'pacs'

DATA_DIR = 'pacs/PACS'

# Prepare Pytorch train/test Datasets
p_set=torchvision.datasets.ImageFolder(DATA_DIR+'/photo', transform = train_transform)
a_set=torchvision.datasets.ImageFolder(DATA_DIR+'/art_painting', transform = eval_transform)
c_set=torchvision.datasets.ImageFolder(DATA_DIR+'/cartoon', transform = eval_transform)
s_set=torchvision.datasets.ImageFolder(DATA_DIR+'/sketch', transform = eval_transform)

**Prepare Dataloaders**

In [6]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(p_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

test_dataloader = DataLoader(a_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [7]:
!rm -r aiml2020

In [8]:
if not os.path.isdir('./aiml2020'):
  !git clone https://github.com/GianlucaMega/aiml2020.git

from aiml2020.alexnet_modified2 import alexnet2

net = alexnet2() # Loading AlexNet model

net.dann_classifier.load_state_dict(net.classifier.state_dict())
# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 7 outputs for PACS
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d
net.dann_classifier[6] = nn.Linear(4096, 2)

initial_parameters = net.state_dict()
# We just changed the last layer of AlexNet with a new fully connected layer with 7 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

Cloning into 'aiml2020'...
remote: Enumerating objects: 46, done.[K
remote: Counting objects:   2% (1/46)[Kremote: Counting objects:   4% (2/46)[Kremote: Counting objects:   6% (3/46)[Kremote: Counting objects:   8% (4/46)[Kremote: Counting objects:  10% (5/46)[Kremote: Counting objects:  13% (6/46)[Kremote: Counting objects:  15% (7/46)[Kremote: Counting objects:  17% (8/46)[Kremote: Counting objects:  19% (9/46)[Kremote: Counting objects:  21% (10/46)[Kremote: Counting objects:  23% (11/46)[Kremote: Counting objects:  26% (12/46)[Kremote: Counting objects:  28% (13/46)[Kremote: Counting objects:  30% (14/46)[Kremote: Counting objects:  32% (15/46)[Kremote: Counting objects:  34% (16/46)[Kremote: Counting objects:  36% (17/46)[Kremote: Counting objects:  39% (18/46)[Kremote: Counting objects:  41% (19/46)[Kremote: Counting objects:  43% (20/46)[Kremote: Counting objects:  45% (21/46)[Kremote: Counting objects:  47% (22/46)[Kremote: Counting

**Prepare Training**

In [9]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train**

In [10]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

par_array = [
             {'lr':0.1, 'ALPHA':None, 'test':'cartoon'},
             {'lr':0.1, 'ALPHA':None, 'test':'sketch'},
             {'lr':0.01, 'ALPHA':None, 'test':'cartoon'},
             {'lr':0.01, 'ALPHA':None, 'test':'sketch'},
             {'lr':0.005, 'ALPHA':None, 'test':'cartoon'},
             {'lr':0.005, 'ALPHA':None, 'test':'sketch'},
             {'lr':0.001, 'ALPHA':None, 'test':'cartoon'},
             {'lr':0.001, 'ALPHA':None, 'test':'sketch'},
             {'lr':0.1, 'ALPHA':0.5, 'test':'cartoon'},
             {'lr':0.1, 'ALPHA':0.5, 'test':'sketch'},
             {'lr':0.01, 'ALPHA':0.5, 'test':'cartoon'},
             {'lr':0.01, 'ALPHA':0.5, 'test':'sketch'},
             {'lr':0.005, 'ALPHA':0.5, 'test':'cartoon'},
             {'lr':0.005, 'ALPHA':0.5, 'test':'sketch'},
             {'lr':0.001, 'ALPHA':0.5, 'test':'cartoon'},
             {'lr':0.001, 'ALPHA':0.5, 'test':'sketch'},
             {'lr':0.1, 'ALPHA':0.1, 'test':'cartoon'},
             {'lr':0.1, 'ALPHA':0.1, 'test':'sketch'},
             {'lr':0.01, 'ALPHA':0.1, 'test':'cartoon'},
             {'lr':0.01, 'ALPHA':0.1, 'test':'sketch'},
             {'lr':0.005, 'ALPHA':0.1, 'test':'cartoon'},
             {'lr':0.005, 'ALPHA':0.1, 'test':'sketch'},
             {'lr':0.001, 'ALPHA':0.1, 'test':'cartoon'},
             {'lr':0.001, 'ALPHA':0.1, 'test':'sketch'},
             {'lr':0.1, 'ALPHA':0.05, 'test':'cartoon'},
             {'lr':0.1, 'ALPHA':0.05, 'test':'sketch'},
             {'lr':0.01, 'ALPHA':0.05, 'test':'cartoon'},
             {'lr':0.01, 'ALPHA':0.05, 'test':'sketch'},
             {'lr':0.005, 'ALPHA':0.05, 'test':'cartoon'},
             {'lr':0.005, 'ALPHA':0.05, 'test':'sketch'},
             {'lr':0.001, 'ALPHA':0.05, 'test':'cartoon'},
             {'lr':0.001, 'ALPHA':0.05, 'test':'sketch'},
]
for params in par_array:
  net.load_state_dict(initial_parameters)
  LR=params['lr']
  ALPHA=params['ALPHA']
  if params['test'] == 'cartoon':
    test_dataloader = DataLoader(c_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    length=float(len(c_set))
  else:
    test_dataloader = DataLoader(s_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    length=float(len(s_set))

  print('lr= {} alpha= {}'.format(LR,ALPHA))

  criterion = nn.CrossEntropyLoss()
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)


  # Start iterating over the epochs
  for epoch in range(NUM_EPOCHS):

    dataloader_iterator = iter(test_dataloader)

    # Iterate over the dataset
    for images, labels in train_dataloader:
      # Bring data over the device of choice
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      zero_labels = torch.zeros(images.size()[0],dtype=torch.long).to(DEVICE)
      net.train() # Sets module in training mode

      # PyTorch, by default, accumulates gradients after each backward pass
      # We need to manually set the gradients to zero before starting a new iteration
      optimizer.zero_grad() # Zero-ing the gradients

      # Forward pass to the network
      outputs = net(images)

      # Compute loss based on output and ground truth
      loss = criterion(outputs, labels)

      # Compute gradients for each layer and update weights
      loss.backward()  # backward pass: computes gradients


      #Point b.ii
      outputs = net(images,ALPHA)
      loss = criterion(outputs, zero_labels)

      # Compute gradients for each layer and update weights
      loss.backward()  # backward pass: computes gradients


      #Point b.iii
      
      try:
          images, labels = next(dataloader_iterator)
      except StopIteration:
          dataloader_iterator = iter(dataloader)
          images, labels = next(dataloader_iterator)

      images = images.to(DEVICE)
      one_labels = torch.ones(images.size()[0],dtype=torch.long).to(DEVICE)
      outputs = net(images,ALPHA)
      loss = criterion(outputs, one_labels)

      # Compute gradients for each layer and update weights
      loss.backward()  # backward pass: computes gradients


      #############################
      optimizer.step() # update weights based on accumulated gradients


    # Step the scheduler
    scheduler.step() 

  #MEASURE ACCURACY AFTER TRAINING
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  net.train(False) # Set Network to evaluation mode

  running_corrects = 0
  for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    outputs = net(images)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  
  accuracy = running_corrects / length

  print('PARAMS:{} Test Accuracy: {}'.format(params,accuracy))
  
  


lr= 0.1 alpha= None


100%|██████████| 10/10 [00:07<00:00,  1.37it/s]


PARAMS:{'lr': 0.1, 'ALPHA': None, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.1 alpha= None


100%|██████████| 16/16 [00:12<00:00,  1.31it/s]


PARAMS:{'lr': 0.1, 'ALPHA': None, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.01 alpha= None


100%|██████████| 10/10 [00:07<00:00,  1.32it/s]


PARAMS:{'lr': 0.01, 'ALPHA': None, 'test': 'cartoon'} Test Accuracy: 0.20093856655290102
lr= 0.01 alpha= None


100%|██████████| 16/16 [00:12<00:00,  1.23it/s]


PARAMS:{'lr': 0.01, 'ALPHA': None, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.005 alpha= None


100%|██████████| 10/10 [00:07<00:00,  1.28it/s]


PARAMS:{'lr': 0.005, 'ALPHA': None, 'test': 'cartoon'} Test Accuracy: 0.19795221843003413
lr= 0.005 alpha= None


100%|██████████| 16/16 [00:13<00:00,  1.19it/s]


PARAMS:{'lr': 0.005, 'ALPHA': None, 'test': 'sketch'} Test Accuracy: 0.1883430898447442
lr= 0.001 alpha= None


100%|██████████| 10/10 [00:07<00:00,  1.27it/s]


PARAMS:{'lr': 0.001, 'ALPHA': None, 'test': 'cartoon'} Test Accuracy: 0.1992320819112628
lr= 0.001 alpha= None


100%|██████████| 16/16 [00:12<00:00,  1.29it/s]


PARAMS:{'lr': 0.001, 'ALPHA': None, 'test': 'sketch'} Test Accuracy: 0.1883430898447442
lr= 0.1 alpha= 0.5


100%|██████████| 10/10 [00:07<00:00,  1.36it/s]


PARAMS:{'lr': 0.1, 'ALPHA': 0.5, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.1 alpha= 0.5


100%|██████████| 16/16 [00:12<00:00,  1.29it/s]


PARAMS:{'lr': 0.1, 'ALPHA': 0.5, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.01 alpha= 0.5


100%|██████████| 10/10 [00:07<00:00,  1.37it/s]


PARAMS:{'lr': 0.01, 'ALPHA': 0.5, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.01 alpha= 0.5


100%|██████████| 16/16 [00:12<00:00,  1.31it/s]


PARAMS:{'lr': 0.01, 'ALPHA': 0.5, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.005 alpha= 0.5


100%|██████████| 10/10 [00:07<00:00,  1.38it/s]


PARAMS:{'lr': 0.005, 'ALPHA': 0.5, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.005 alpha= 0.5


100%|██████████| 16/16 [00:12<00:00,  1.31it/s]


PARAMS:{'lr': 0.005, 'ALPHA': 0.5, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.001 alpha= 0.5


100%|██████████| 10/10 [00:07<00:00,  1.38it/s]


PARAMS:{'lr': 0.001, 'ALPHA': 0.5, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.001 alpha= 0.5


100%|██████████| 16/16 [00:12<00:00,  1.30it/s]


PARAMS:{'lr': 0.001, 'ALPHA': 0.5, 'test': 'sketch'} Test Accuracy: 0.29015016543649785
lr= 0.1 alpha= 0.1


100%|██████████| 10/10 [00:07<00:00,  1.36it/s]


PARAMS:{'lr': 0.1, 'ALPHA': 0.1, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.1 alpha= 0.1


100%|██████████| 16/16 [00:12<00:00,  1.30it/s]


PARAMS:{'lr': 0.1, 'ALPHA': 0.1, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.01 alpha= 0.1


100%|██████████| 10/10 [00:07<00:00,  1.34it/s]


PARAMS:{'lr': 0.01, 'ALPHA': 0.1, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.01 alpha= 0.1


100%|██████████| 16/16 [00:12<00:00,  1.28it/s]


PARAMS:{'lr': 0.01, 'ALPHA': 0.1, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.005 alpha= 0.1


100%|██████████| 10/10 [00:07<00:00,  1.37it/s]


PARAMS:{'lr': 0.005, 'ALPHA': 0.1, 'test': 'cartoon'} Test Accuracy: 0.4931740614334471
lr= 0.005 alpha= 0.1


100%|██████████| 16/16 [00:12<00:00,  1.29it/s]


PARAMS:{'lr': 0.005, 'ALPHA': 0.1, 'test': 'sketch'} Test Accuracy: 0.2522270297785696
lr= 0.001 alpha= 0.1


100%|██████████| 10/10 [00:07<00:00,  1.37it/s]


PARAMS:{'lr': 0.001, 'ALPHA': 0.1, 'test': 'cartoon'} Test Accuracy: 0.24872013651877134
lr= 0.001 alpha= 0.1


100%|██████████| 16/16 [00:12<00:00,  1.31it/s]


PARAMS:{'lr': 0.001, 'ALPHA': 0.1, 'test': 'sketch'} Test Accuracy: 0.26597098498345634
lr= 0.1 alpha= 0.05


100%|██████████| 10/10 [00:07<00:00,  1.38it/s]


PARAMS:{'lr': 0.1, 'ALPHA': 0.05, 'test': 'cartoon'} Test Accuracy: 0.1659556313993174
lr= 0.1 alpha= 0.05


100%|██████████| 16/16 [00:12<00:00,  1.28it/s]


PARAMS:{'lr': 0.1, 'ALPHA': 0.05, 'test': 'sketch'} Test Accuracy: 0.1964876558920845
lr= 0.01 alpha= 0.05


100%|██████████| 10/10 [00:07<00:00,  1.31it/s]


PARAMS:{'lr': 0.01, 'ALPHA': 0.05, 'test': 'cartoon'} Test Accuracy: 0.37457337883959047
lr= 0.01 alpha= 0.05


100%|██████████| 16/16 [00:12<00:00,  1.32it/s]


PARAMS:{'lr': 0.01, 'ALPHA': 0.05, 'test': 'sketch'} Test Accuracy: 0.27895138712140494
lr= 0.005 alpha= 0.05


100%|██████████| 10/10 [00:07<00:00,  1.34it/s]


PARAMS:{'lr': 0.005, 'ALPHA': 0.05, 'test': 'cartoon'} Test Accuracy: 0.2713310580204778
lr= 0.005 alpha= 0.05


100%|██████████| 16/16 [00:11<00:00,  1.34it/s]


PARAMS:{'lr': 0.005, 'ALPHA': 0.05, 'test': 'sketch'} Test Accuracy: 0.23975566301857978
lr= 0.001 alpha= 0.05


100%|██████████| 10/10 [00:07<00:00,  1.37it/s]


PARAMS:{'lr': 0.001, 'ALPHA': 0.05, 'test': 'cartoon'} Test Accuracy: 0.2529863481228669
lr= 0.001 alpha= 0.05


100%|██████████| 16/16 [00:11<00:00,  1.34it/s]

PARAMS:{'lr': 0.001, 'ALPHA': 0.05, 'test': 'sketch'} Test Accuracy: 0.2837872232120132





**Test**

In [11]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(a_set))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 16/16 [00:10<00:00,  1.46it/s]

Test Accuracy: 0.54443359375



