<a href="https://colab.research.google.com/github/SimoneDutto/MachineLearning/blob/master/HW3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**Install requirements**

In [0]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

Collecting Pillow-SIMD
[?25l  Downloading https://files.pythonhosted.org/packages/b1/19/b7043190f481abb94dcdd1e69c4432432aaa73455cf1128eae39b8eb2518/Pillow-SIMD-6.0.0.post0.tar.gz (621kB)
[K     |████████████████████████████████| 624kB 4.8MB/s 
[?25hBuilding wheels for collected packages: Pillow-SIMD
  Building wheel for Pillow-SIMD (setup.py) ... [?25l[?25hdone
  Created wheel for Pillow-SIMD: filename=Pillow_SIMD-6.0.0.post0-cp36-cp36m-linux_x86_64.whl size=1062836 sha256=bd9225ef558df4eb016cee1470965e8cf16f8ca487f0039bbae31d5d1378e619
  Stored in directory: /root/.cache/pip/wheels/06/60/65/cc9afa345ccbf10a34cc208266b992941a8608010b592f43d1
Successfully built Pillow-SIMD
Installing collected packages: Pillow-SIMD
Successfully installed Pillow-SIMD-6.0.0.post0




**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

import matplotlib.pyplot as plt
from google.colab import files


**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 102 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default
ALPHA_D = 0.1        # Coef for discriminator reverse gradient

NUM_EPOCHS = 20      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 15       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                    
])

# Define DNN

In [0]:
import torch
import torch.nn as nn
from torch.autograd import Function
try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url

__all__ = ['AlexNet', 'alexnet']

model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}

class ReverseLayerF(Function):
    # Forwards identity
    # Sends backward reversed gradients
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None

class DANN(nn.Module):

    def __init__(self, num_classes=1000):
        super(DANN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        self.classifier_domain = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2), # classifier between domain source or target
        )

    def forward(self, x, alpha=None):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        if alpha:
          x = ReverseLayerF.apply(x, alpha)
          x = self.classifier_domain(x)
        else:
          x = self.classifier(x)
        return x


def dann( **kwargs):
    """AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    """
  
    model = DANN(**kwargs)
    state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                          progress=True)
    model.load_state_dict(state_dict, strict=False)
    return model

**Prepare Dataset**

In [24]:
# Clone github repository with data
if not os.path.isdir('./Homework3-PACS'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS

PHOTO_DIR = 'Homework3-PACS/PACS/photo'
ART_DIR = 'Homework3-PACS/PACS/art_painting'
CAR_DIR = 'Homework3-PACS/PACS/cartoon'
SKT_DIR = 'Homework3-PACS/PACS/sketch'
# Prepare Pytorch train/test Datasets
photo_dataset = torchvision.datasets.ImageFolder(PHOTO_DIR, transform=train_transform)
art_dataset = torchvision.datasets.ImageFolder(ART_DIR, transform=eval_transform)
car_dataset = torchvision.datasets.ImageFolder(CAR_DIR, transform=eval_transform)
skt_dataset = torchvision.datasets.ImageFolder(SKT_DIR, transform=eval_transform)
# Check dataset sizes
print('Train Dataset: {}'.format(len(photo_dataset)))
print('Test Dataset: {}'.format(len(art_dataset)))
print('Train Dataset: {}'.format(len(car_dataset)))
print('Test Dataset: {}'.format(len(skt_dataset)))

Train Dataset: 1670
Test Dataset: 2048
Train Dataset: 2344
Test Dataset: 3929


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
photo_dataloader = DataLoader(photo_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
art_dataloader = DataLoader(art_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
car_dataloader = DataLoader(car_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
skt_dataloader = DataLoader(skt_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [0]:
net = dann() # Loading AlexNet model

**Prepare Training**

In [0]:
# Define loss function
criterion_classifier = nn.CrossEntropyLoss() # for classification, we use Cross Entropy
criterion_domain = nn.CrossEntropyLoss()
# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
 # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum


# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs


**Train**

In [0]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
loss_c = []
loss_d0 = []
loss_d1 = []
loss_t = []

accuracies = []
accuracy_max=0
best_alpha = 0
best_lr = 0

alphas = [1, 0.1, 0.01]
lrs = [0.001, 0.01]

da = True
validation = True
nan = False

for alpha in alphas:
  for lr in lrs:
    parameters_to_optimize = net.parameters()
    optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
# Start iterating over the epochs
    for epoch in range(NUM_EPOCHS):
      print('Starting epoch {}/{}, LR = {}, ALPHA = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr(), alpha))

      # Iterate over the dataset
      for images, labels in photo_dataloader:
        # Bring data over the device of choice
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        net.train() # Sets module in training mode

        # PyTorch, by default, accumulates gradients after each backward pass
        # We need to manually set the gradients to zero before starting a new iteration
        optimizer.zero_grad() # Zero-ing the gradients

        # Forward pass to the network
        outputs = net(images)

        # Compute loss based on output and ground truth
        loss_classifier = criterion_classifier(outputs, labels)
        if not da:
          loss_classifier.backward()
          optimizer.step()
        else:
          loss_classifier.backward(retain_graph=True)
        # Log loss
        if current_step % LOG_FREQUENCY == 0:
          loss_c.append(loss_classifier.item())
          print('Step {}, Loss Net {}'.format(current_step, loss_classifier.item()))
        if torch.isnan(loss_classifier):
          nan = True
          break
        # Compute gradients for each layer and update weights

        # Forward pass to the discriminator class label 0
        if not da:
          continue
        
        domain_labels = torch.zeros(BATCH_SIZE)
        domain_labels = domain_labels.long().to(DEVICE)

        outputs_domain = net(images, alpha=alpha)

        # Compute loss based on output and ground truth
        loss_discriminator0 = criterion_domain(outputs, domain_labels)
        loss_discriminator0.backward(retain_graph=True)
        # Log loss
        if current_step % LOG_FREQUENCY == 0:
          loss_d0.append(loss_discriminator0.item())
          print('Step {}, Loss Discriminator Class Label0 {}'.format(current_step, loss_discriminator0.item()))

        # Forward pass to the discriminator class label 1
        images, labels = next(iter(art_dataloader))
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        domain_labels = torch.ones(BATCH_SIZE)
        domain_labels = domain_labels.long().to(DEVICE)

        outputs = net(images, alpha=alpha)
        loss_discriminator1 = criterion_domain(outputs, domain_labels)
        loss_discriminator1.backward()


        # Log loss
        if current_step % LOG_FREQUENCY == 0:
          loss_d1.append(loss_discriminator1.item())
          print('Step {}, Loss Discriminator Class Label1 {}'.format(current_step, loss_discriminator1.item()))
        
        # if current_step % LOG_FREQUENCY == 0:
        #   loss_t.append(loss_tot)
        #   print('Step {}, Total Loss {}'.format(current_step, loss_tot.item()))
        

        optimizer.step() # update weights based on accumulated gradients

        current_step += 1

      if not validation:
        continue

      net.train(False) # Set Network to evaluation mode

      running_corrects = 0

      for images, labels in tqdm(skt_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = net(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()

        # Calculate Accuracy
      for images, labels in tqdm(car_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = net(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()
      
      accuracy_val = running_corrects / float(len(skt_dataset)+len(car_dataloader))

      print('\nValidation Accuracy on Sketch/Car: {}\n'.format(accuracy_val))
      
      accuracies.append(accuracy_val)

      if accuracy_val > accuracy_max:
        accuracy_max = accuracy_val
        best_model = net
        best_alpha = alpha
        best_lr = lr
      # Step the scheduler
      scheduler.step()
      if nan:
        break
  if not da:
    break 

print(accuracies)
plot_loss = False
if plot_loss:
  plt.figure()
  # naming the x axis 
  plt.xlabel('step') 
  # naming the y axis 
  plt.ylabel('loss_tot')
  step_values = range(0,len(loss_t)) 
  plt.xticks(step_values)
  plt.plot(step_values, loss_t, '--bo') 
  # giving a title to my graph 
  plt.title('Total Loss') 
  plt.savefig("loss_tot.png")
  plt.show()

  #files.download("loss_tot.png") 

  plt.figure()
  # naming the x axis 
  plt.xlabel('step') 
  # naming the y axis 
  plt.ylabel('') 
  plt.xticks(step_values)
  plt.plot(step_values, loss_c, '--bo') 
  # giving a title to my graph 
  plt.title('Classifier Loss') 
  plt.savefig("loss_c.png")
  plt.show()

  #files.download("loss_c.png") 

  plt.figure()
  # naming the x axis 
  plt.xlabel('step') 
  # naming the y axis 
  plt.ylabel('Discriminator Loss') 
  plt.xticks(step_values)
  plt.plot(step_values, loss_d0, '--bo', color='olive', label="d0")
  plt.plot(step_values, loss_d1, '--bo', color='skyblue', label="d1")
  plt.legend()

  # giving a title to my graph 
  plt.title('Discriminator Loss') 
  plt.savefig("loss_d.png")
  plt.show()

  #files.download("loss_d.png") 

Starting epoch 1/20, LR = [0.001], ALPHA = 1
Step 0, Loss Net 17.714834213256836
Step 0, Loss Discriminator Class Label0 15.459487915039062
Step 0, Loss Discriminator Class Label1 0.530855119228363


100%|██████████| 16/16 [00:12<00:00,  1.29it/s]
100%|██████████| 10/10 [00:08<00:00,  1.22it/s]


Validation Accuracy on Sketch/Car: 0.09570957095709572

Starting epoch 2/20, LR = [0.001], ALPHA = 1





Step 10, Loss Net 3.1180098056793213
Step 10, Loss Discriminator Class Label0 0.4778149425983429
Step 10, Loss Discriminator Class Label1 0.002801820170134306


100%|██████████| 16/16 [00:12<00:00,  1.27it/s]
100%|██████████| 10/10 [00:08<00:00,  1.15it/s]


Validation Accuracy on Sketch/Car: 0.09900990099009901

Starting epoch 3/20, LR = [0.001], ALPHA = 1



  0%|          | 0/16 [00:00<?, ?it/s]

**Test**

In [0]:
print("Test with best model with (apha, lr) ("+str(best_alpha) +","+str(best_lr)+"): accuracy "+str(accuracy_max))

net = best_model # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(art_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(art_dataset))

print('Test Accuracy: {}'.format(accuracy))