<a href="https://colab.research.google.com/github/SimoneDutto/MachineLearning/blob/master/HW3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**Install requirements**

In [0]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

Collecting Pillow-SIMD
[?25l  Downloading https://files.pythonhosted.org/packages/b1/19/b7043190f481abb94dcdd1e69c4432432aaa73455cf1128eae39b8eb2518/Pillow-SIMD-6.0.0.post0.tar.gz (621kB)
[K     |████████████████████████████████| 624kB 4.8MB/s 
[?25hBuilding wheels for collected packages: Pillow-SIMD
  Building wheel for Pillow-SIMD (setup.py) ... [?25l[?25hdone
  Created wheel for Pillow-SIMD: filename=Pillow_SIMD-6.0.0.post0-cp36-cp36m-linux_x86_64.whl size=1062836 sha256=bd9225ef558df4eb016cee1470965e8cf16f8ca487f0039bbae31d5d1378e619
  Stored in directory: /root/.cache/pip/wheels/06/60/65/cc9afa345ccbf10a34cc208266b992941a8608010b592f43d1
Successfully built Pillow-SIMD
Installing collected packages: Pillow-SIMD
Successfully installed Pillow-SIMD-6.0.0.post0




**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

import numpy as np

from PIL import Image
from tqdm import tqdm

import matplotlib.pyplot as plt
from google.colab import files


**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 102 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default
ALPHA_D = 0.1        # Coef for discriminator reverse gradient

NUM_EPOCHS = 20      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 15       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                    
])

# Define DNN

In [0]:
import torch
import torch.nn as nn
from torch.autograd import Function
try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url

__all__ = ['AlexNet', 'alexnet']

model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}

class ReverseLayerF(Function):
    # Forwards identity
    # Sends backward reversed gradients
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha
        return output, None

class DANN(nn.Module):

    def __init__(self, num_classes=1000):
        super(DANN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        self.classifier_domain = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2), # classifier between domain source or target
        )

    def forward(self, x, alpha=None):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        if alpha:
          x = ReverseLayerF.apply(x, alpha)
          x = self.classifier_domain(x)
        else:
          x = self.classifier(x)
        return x


def dann( **kwargs):
    """AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    """
  
    model = DANN(**kwargs)
    #state_dict = load_state_dict_from_url(model_urls['alexnet'],
    #                                      progress=True)
    model.load_state_dict(torch.load("alexnet-owt-4df8aa71.pth"), strict=False)
    return model

# Utility Function

In [0]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, Normalize
from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter
import numpy as np

def heatmap(datas, row_labels, col_labels,  ax=None,
            cbar_kw={}, cbarlabel="", **kwargs):
    """
    Create a heatmap from a numpy array and two lists of labels.
    Parameters
    ----------
    data
        A 2D numpy array of shape (N, M).
    row_labels
        A list or array of length N with the labels for the rows.
    col_labels
        A list or array of length M with the labels for the columns.
    ax
        A `matplotlib.axes.Axes` instance to which the heatmap is plotted.  If
        not provided, use current axes or create a new one.  Optional.
    cbar_kw
        A dictionary with arguments to `matplotlib.Figure.colorbar`.  Optional.
    cbarlabel
        The label for the colorbar.  Optional.
    **kwargs
        All other arguments are forwarded to `imshow`.
    """

    if not ax:
        ax = plt.gca()

    # Plot the heatmap
    #print(datas)
    im = ax.imshow(datas)

    # Create colorbar
    cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
    cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")

    # We want to show all ticks...
    ax.set_xticks(np.arange(datas.shape[1]))
    ax.set_yticks(np.arange(datas.shape[0]))
    # ... and label them with the respective list entries.
    ax.set_xticklabels(row_labels)
    ax.set_yticklabels(col_labels)

    # Let the horizontal axes labeling appear on top.
    ax.tick_params(top=True, bottom=False,
                   labeltop=True, labelbottom=False)

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=-30, ha="right",
             rotation_mode="anchor")

    # Turn spines off and create white grid.
    for edge, spine in ax.spines.items():
        spine.set_visible(False)

    ax.set_xticks(np.arange(datas.shape[1]+1)-.5, minor=True)
    ax.set_yticks(np.arange(datas.shape[0]+1)-.5, minor=True)
    ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
    ax.tick_params(which="minor", bottom=False, left=False)

    return im, cbar


def annotate_heatmap(im, data=None, valfmt ="{x:.2f}",
                     textcolors=["black", "white"],
                     threshold=None, **textkw):
    """
    A function to annotate a heatmap.
    Parameters
    ----------
    im
        The AxesImage to be labeled.
    data
        Data used to annotate.  If None, the image's data is used.  Optional.
    valfmt
        The format of the annotations inside the heatmap.  This should either
        use the string format method, e.g. "$ {x:.2f}", or be a
        `matplotlib.ticker.Formatter`.  Optional.
    textcolors
        A list or array of two color specifications.  The first is used for
        values below a threshold, the second for those above.  Optional.
    threshold
        Value in data units according to which the colors from textcolors are
        applied.  If None (the default) uses the middle of the colormap as
        separation.  Optional.
    **kwargs
        All other arguments are forwarded to each call to `text` used to create
        the text labels.
    """

    if not isinstance(data, (list, np.ndarray)):
        data = im.get_array()

    # Normalize the threshold to the images color range.
    if threshold is not None:
        threshold = im.norm(threshold)
    else:
        threshold = im.norm(data.max())/2.

    # Set default alignment to center, but allow it to be
    # overwritten by textkw.
    kw = dict(horizontalalignment="center",
              verticalalignment="center")
    kw.update(textkw)

    if isinstance(valfmt, str):
        valfmt = StrMethodFormatter(valfmt)

    # Loop over the data and create a `Text` for each "pixel".
    # Change the text's color depending on the data.
    texts = []
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)])
            text = im.axes.text(j, i, valfmt(data[i, j], None), **kw)
            texts.append(text)

    return texts

**Prepare Dataset**

In [6]:
# Clone github repository with data
if not os.path.isdir('./Homework3-PACS'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS

!wget https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth
PHOTO_DIR = 'Homework3-PACS/PACS/photo'
ART_DIR = 'Homework3-PACS/PACS/art_painting'
CAR_DIR = 'Homework3-PACS/PACS/cartoon'
SKT_DIR = 'Homework3-PACS/PACS/sketch'
# Prepare Pytorch train/test Datasets
photo_dataset = torchvision.datasets.ImageFolder(PHOTO_DIR, transform=train_transform)
art_dataset = torchvision.datasets.ImageFolder(ART_DIR, transform=eval_transform)
car_dataset = torchvision.datasets.ImageFolder(CAR_DIR, transform=eval_transform)
skt_dataset = torchvision.datasets.ImageFolder(SKT_DIR, transform=eval_transform)
# Check dataset sizes
print('Train Dataset: {}'.format(len(photo_dataset)))
print('Test Dataset: {}'.format(len(art_dataset)))
print('Train Dataset: {}'.format(len(car_dataset)))
print('Test Dataset: {}'.format(len(skt_dataset)))

--2019-12-26 17:23:13--  https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth
Resolving download.pytorch.org (download.pytorch.org)... 13.35.7.126, 13.35.7.106, 13.35.7.18, ...
Connecting to download.pytorch.org (download.pytorch.org)|13.35.7.126|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 244418560 (233M) [application/octet-stream]
Saving to: ‘alexnet-owt-4df8aa71.pth.8’


2019-12-26 17:23:14 (231 MB/s) - ‘alexnet-owt-4df8aa71.pth.8’ saved [244418560/244418560]

Train Dataset: 1670
Test Dataset: 2048
Train Dataset: 2344
Test Dataset: 3929


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
photo_dataloader = DataLoader(photo_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
art_dataloader = DataLoader(art_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=True)
car_dataloader = DataLoader(car_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4,drop_last=True)
skt_dataloader = DataLoader(skt_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=True)

**Prepare Training**

In [0]:
# Define loss function
criterion_classifier = nn.CrossEntropyLoss() # for classification, we use Cross Entropy
criterion_domain = nn.CrossEntropyLoss()
# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
 # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum


# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs


**Train**

In [9]:
# By default, everything is loaded to cpu
 # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime


loss_c = []
loss_d0 = []
loss_d1 = []
loss_t = []

accuracies = []
accuracy_max=0
best_alpha = 0
best_lr = 0
max_max = 0
best_best = 0
alpha_alpha = 0
lr_lr = 0

#alphas = [1, 0.1, 0.01]
alphas = [1]
lrs = [0.001]
#lrs = [0.01]
accuracies = np.zeros((len(alphas), len(lrs)))

# It is possible to deativate domain adaptation or validation
da = True
validation = True

nan = False
i=0
len_dataloader = len(photo_dataset)

for alpha in alphas:
  j=0
  for lr in lrs:
    net = dann()
    net.classifier[6] = nn.Linear(4096, 7)
    net = net.to(DEVICE)
    parameters_to_optimize = net.parameters()
    optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
    accuracy_max = 0
    current_step = 0
# Start iterating over the epochs
    for epoch in range(NUM_EPOCHS):
      print('Starting epoch {}/{}, LR = {}, ALPHA = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr(), alpha))
      m = 0
      # Iterate over the dataset
      for images, labels in photo_dataloader:
        
        p = float(m + (epoch+1) * len_dataloader) / (NUM_EPOCHS+1) / len_dataloader
        alpha = 2. / (1. + np.exp(-10 * p)) - 1
        if current_step % LOG_FREQUENCY == 0:
          print("Alpha "+str(alpha)+"\n")
        # Bring data over the device of choice
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        net.train() # Sets module in training mode

        # PyTorch, by default, accumulates gradients after each backward pass
        # We need to manually set the gradients to zero before starting a new iteration
        optimizer.zero_grad() # Zero-ing the gradients

        # Forward pass to the network
        outputs = net(images)

        # Compute loss based on output and ground truth
        loss_classifier = criterion_classifier(outputs, labels)
        if not da:
          loss_classifier.backward()
          optimizer.step()
        else:
          loss_classifier.backward(retain_graph=True)
        # Log loss
        if current_step % LOG_FREQUENCY == 0:
          loss_c.append(loss_classifier.item())
          print('Step {}, Loss Net {}'.format(current_step, loss_classifier.item()))
        if torch.isnan(loss_classifier):
          nan = True
          break
        # Compute gradients for each layer and update weights

        # Forward pass to the discriminator class label 0
        if not da:
          continue
        
        domain_labels = torch.zeros(BATCH_SIZE)
        domain_labels = domain_labels.long().to(DEVICE)

        outputs_domain = net(images, alpha=alpha)

        # Compute loss based on output and ground truth
        loss_discriminator0 = criterion_domain(outputs, domain_labels)
        loss_discriminator0.backward(retain_graph=True)
        # Log loss
        if current_step % LOG_FREQUENCY == 0:
          loss_d0.append(loss_discriminator0.item())
          print('Step {}, Loss Discriminator Class Label0 {}'.format(current_step, loss_discriminator0.item()))

        # Forward pass to the discriminator class label 1
        images, labels = next(iter(art_dataloader))
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        domain_labels = torch.ones(BATCH_SIZE)
        domain_labels = domain_labels.long().to(DEVICE)

        outputs = net(images, alpha=alpha)
        loss_discriminator1 = criterion_domain(outputs, domain_labels)
        loss_discriminator1.backward()
        
        # Log loss
        if current_step % LOG_FREQUENCY == 0:
          loss_d1.append(loss_discriminator1.item())
          print('Step {}, Loss Discriminator Class Label1 {}'.format(current_step, loss_discriminator1.item()))
        
        # if current_step % LOG_FREQUENCY == 0:
        #   loss_t.append(loss_tot)
        #   print('Step {}, Total Loss {}'.format(current_step, loss_tot.item()))
        

        optimizer.step() # update weights based on accumulated gradients

        current_step += 1
        m += 1
      
      if nan:
        break
      scheduler.step()

      if not validation:
        continue

      net.train(False) # Set Network to evaluation mode

      running_corrects = 0

      for images, labels in tqdm(skt_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = net(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()

        # Calculate Accuracy
      for images, labels in tqdm(car_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = net(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()
      
      accuracy_val = running_corrects / float(len(skt_dataset)+len(car_dataloader))

      print('\nValidation Accuracy on Sketch/Car: {}\n'.format(accuracy_val))
      if accuracy_val > accuracy_max:
        accuracy_max = accuracy_val
        best_model = net
        best_alpha = alpha
        best_lr = lr
      # Step the scheduler
      
      
    if accuracy_max > max_max:
      max_max = accuracy_max
      best_best = best_model
      alpha_alpha = best_alpha
      lr_lr = best_lr
    accuracies[i][j] = accuracy_max
    j+=1
  i+=1
  if not da:
    break 

im, cbar = heatmap(accuracies, alphas, lrs, cmap="RdYlGn")
annotate_heatmap(im)
plt.savefig("grid.png")
plt.show()

plot_loss = False
if plot_loss:
  plt.figure()
  # naming the x axis 
  plt.xlabel('step') 
  # naming the y axis 
  plt.ylabel('loss_tot')
  step_values = range(0,len(loss_t)) 
  plt.xticks(step_values)
  plt.plot(step_values, loss_t, '--bo') 
  # giving a title to my graph 
  plt.title('Total Loss') 
  plt.savefig("loss_tot.png")
  plt.show()

  #files.download("loss_tot.png") 

  plt.figure()
  # naming the x axis 
  plt.xlabel('step') 
  # naming the y axis 
  plt.ylabel('') 
  plt.xticks(step_values)
  plt.plot(step_values, loss_c, '--bo') 
  # giving a title to my graph 
  plt.title('Classifier Loss') 
  plt.savefig("loss_c.png")
  plt.show()

  #files.download("loss_c.png") 

  plt.figure()
  # naming the x axis 
  plt.xlabel('step') 
  # naming the y axis 
  plt.ylabel('Discriminator Loss') 
  plt.xticks(step_values)
  plt.plot(step_values, loss_d0, '--bo', color='olive', label="d0")
  plt.plot(step_values, loss_d1, '--bo', color='skyblue', label="d1")
  plt.legend()

  # giving a title to my graph 
  plt.title('Discriminator Loss') 
  plt.savefig("loss_d.png")
  plt.show()

  #files.download("loss_d.png") 

Starting epoch 1/20, LR = [0.001], ALPHA = 1
Alpha 0.2336958171850616

Step 0, Loss Net 2.0007407665252686
Step 0, Loss Discriminator Class Label0 2.111807346343994
Step 0, Loss Discriminator Class Label1 0.6164483428001404


100%|██████████| 15/15 [00:12<00:00,  1.48it/s]
100%|██████████| 9/9 [00:08<00:00,  1.11it/s]


Validation Accuracy on Sketch/Car: 0.2905027932960894

Starting epoch 2/20, LR = [0.001], ALPHA = 0.23436963286585155





Alpha 0.4436456681367129

Step 10, Loss Net 1.0168030261993408
Step 10, Loss Discriminator Class Label0 0.8729730248451233
Step 10, Loss Discriminator Class Label1 0.002880917862057686


100%|██████████| 15/15 [00:12<00:00,  1.47it/s]
100%|██████████| 9/9 [00:08<00:00,  1.06s/it]


Validation Accuracy on Sketch/Car: 0.39385474860335196

Starting epoch 3/20, LR = [0.001], ALPHA = 0.44376017166062365



100%|██████████| 15/15 [00:12<00:00,  1.55it/s]
100%|██████████| 9/9 [00:08<00:00,  1.28it/s]


Validation Accuracy on Sketch/Car: 0.3420518029456577

Starting epoch 4/20, LR = [0.001], ALPHA = 0.6138017429580398





Alpha 0.7409876704272496

Step 20, Loss Net 0.8841145038604736
Step 20, Loss Discriminator Class Label0 0.6665748357772827
Step 20, Loss Discriminator Class Label1 0.00043579377233982086


  0%|          | 0/15 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

**Test**

In [0]:
print("Test with best model with (apha, lr) ("+str(alpha_alpha) +","+str(lr_lr)+"): accuracy "+str(max_max))

net = best_best # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(art_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(art_dataset))

print('Test Accuracy: {}'.format(accuracy))