<a href="https://colab.research.google.com/github/LilMowgli/Homework2-Caltech101/blob/master/Copy_of_My_Homework2_MLDL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**Install requirements**

In [None]:
!pip3 install 'torch==1.4.0'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

Collecting torch==1.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl (753.4MB)
[K     |████████████████████████████████| 753.4MB 22kB/s 
[31mERROR: torchvision 0.6.0+cu101 has requirement torch==1.5.0, but you'll have torch 1.4.0 which is incompatible.[0m
[?25hInstalling collected packages: torch
  Found existing installation: torch 1.5.0+cu101
    Uninstalling torch-1.5.0+cu101:
      Successfully uninstalled torch-1.5.0+cu101
Successfully installed torch-1.4.0
Collecting torchvision==0.5.0
[?25l  Downloading https://files.pythonhosted.org/packages/7e/90/6141bf41f5655c78e24f40f710fdd4f8a8aff6c8b7c6f0328240f649bdbe/torchvision-0.5.0-cp36-cp36m-manylinux1_x86_64.whl (4.0MB)
[K     |████████████████████████████████| 4.0MB 5.1MB/s 
Installing collected packages: torchvision
  Found existing installation: torchvision 0.6.0+cu101
    Uninstalling torchvision-0



**Import libraries**

In [None]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet


from PIL import Image
from tqdm import tqdm

from random import sample
import numpy as np
from itertools import product
from matplotlib import pyplot as plt
from google.colab import files

**Set Arguments**

In [None]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3           # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10


**Define Data Preprocessing**

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])



**Prepare Dataset**

In [None]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/LilMowgli/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech

train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

# Creating data indices for training and validation splits:


validation_split = .5 # define portion of validation split
random_seed= 42
len_dataset = len(train_dataset)
indices = list(range(len_dataset))
split = int(np.floor(validation_split * len_dataset))
np.random.seed(random_seed) # seed the generator
np.random.shuffle(indices) # shuffle indices to get balanced distribution in training and validation set
train_indexes, val_indexes = indices[split:], indices[:split] 

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Cloning into 'Homework2-Caltech101'...
remote: Enumerating objects: 84, done.[K
remote: Counting objects: 100% (84/84), done.[K
remote: Compressing objects: 100% (84/84), done.[K
remote: Total 9340 (delta 53), reused 0 (delta 0), pack-reused 9256[K
Receiving objects: 100% (9340/9340), 129.50 MiB | 40.50 MiB/s, done.
Resolving deltas: 100% (58/58), done.
Checking out files: 100% (9150/9150), done.
Train Dataset: 2892
Valid Dataset: 2892
Test Dataset: 2893


**Prepare Dataloaders**

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
from Caltech101.Manager import Manager

In [None]:
# Helper function to show an image grid

def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [None]:
# Sanity check: visualize a batch of images
dataiter = iter(val_dataloader)
images, labels = dataiter.next()

# Create grid of images
img_grid = torchvision.utils.make_grid(images)

# Show images
matplotlib_imshow(img_grid, one_channel=False)
labels

In [None]:
# Plot Train vs Validation loss and Train vs Validation Accuracy

def plot_scores(train_loss, validation_loss, train_accuracy, validation_accuracy, save_directory):

  # axes[0] = train loss
  # axes[1] = train vs validation accuracy
  fig, axes = plt.subplots(1, 2, figsize = [15, 5])

  axes[0].plot(list(train_loss.keys()), list(train_loss.values()), 
               color = '#2E84D5', linewidth = 2.5, label = 'Train Loss')
  axes[0].plot(list(validation_loss.keys()), list(validation_loss.values()), 
               color = '#FF9232', linewidth = 2.5, label = 'Validation Loss')
  axes[0].set_title("Val vs Train Loss")
  axes[0].set_xlabel("epoch")
  axes[0].set_ylabel("loss")

  axes[1].plot(list(train_accuracy.keys()), list(train_accuracy.values()), 
               color = '#2E84D5', linewidth = 2.5, label = 'Train Accuracy')
  axes[1].plot(list(validation_accuracy.keys()), list(validation_accuracy.values()), 
               color = '#FF9232', linewidth = 2.5, label = 'Validation Accuracy')
  axes[1].set_title("Val vs Train Accuracy")
  axes[1].set_xlabel("epoch")
  axes[1].set_ylabel("accuracy")

  plt.tight_layout()
  axes[0].legend()
  axes[1].legend()
  axes[0].grid(True)
  axes[1].grid(True)

  fig.savefig(save_directory)

  plt.show()

##FMManager

In [None]:
import torch
import torch.nn as nn
from torch.backends import cudnn
from copy import deepcopy

class TemporaryManager():
    """Manage training, validation and testing of a neural network.
    
    Args:
        device (string): chosen device to run network operations on
        criterion: loss function
        optimizer: optimization algorithm to change the attributes of the
            neural network, e.g., stochastic gradient descent (SGD)
        scheduler: learning rate scheduling policy, e.g., MultiStepLR
        train_dataloader DataLoader instance of the training set
        val_dataloader: DataLoader instance of the validation set
        test_dataloader: DataLoader instance of the test set
    """

    def __init__(self, device, net, criterion, optimizer, scheduler, train_dataloader, val_dataloader, test_dataloader):
        self.device = device

        self.net = net
        self.best_net = self.net

        self.criterion = criterion
        self.optimizer = optimizer
        self.scheduler = scheduler

        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader
        self.test_dataloader = test_dataloader

    def set_dataloaders(self, train_dataloader=None, val_dataloader=None, test_dataloader=None):
        """Update dataloaders.
        
        Args:
            train_dataloader, val_dataloader, test_dataloader: if not None,
                update the respective dataloader.
        """

        if train_dataloader is not None:
            self.train_dataloader = train_dataloader

        if val_dataloader is not None:
            self.val_dataloader = val_dataloader
        
        if test_dataloader is not None:
            self.test_dataloader = test_dataloader

    # @todo: do we already initialize to 100 output nodes, as in the iCaRL paper?
    # if so, is increment_classes needed?
    def increment_classes(self, n=10):
        """Add n classes in the final fully connected layer."""

        in_features = self.net.fc.in_features  # size of each input sample
        out_features = self.net.fc.out_features  # size of each output sample
        weight = self.net.fc.weight.data

        self.net.fc = nn.Linear(in_features, out_features+n)
        self.net.fc.weight.data[:out_features] = weight
    
    def output_neurons_count(self):
        """Return the number of output neurons of the current network."""

        return self.net.fc.out_features

    def to_onehot(self, targets): 
      '''
      Args:
      targets : dataloader.dataset.targets of the new task images
      '''
      one_hot_targets = torch.eye(NUM_CLASSES)[targets]

      return one_hot_targets.to(self.device)

    def train(self, num_epochs):
        """Train the network for a specified number of epochs, and save
        the best performing model on the validation set.
        
        Args:
            num_epochs (int): number of epochs for training the network.
        Returns:
            train_loss: loss computed on the last epoch
            train_accuracy: accuracy computed on the last epoch
            val_loss: average loss on the validation set of the last epoch
            val_accuracy: accuracy on the validation set of the last epoch
        """

        # @todo: is the return behaviour intended? (scores of the last epoch)

        self.net.to(self.device)
        cudnn.benchmark  # Calling this optimizes runtime

        self.best_accuracy = 0 # @todo: should we use best_loss instead?
        self.best_epoch = 0

        for epoch in range(num_epochs):
            # Run an epoch (start counting form 1)
            train_loss, train_accuracy = self.do_epoch(epoch+1)
        
            # Validate after each epoch 
            val_loss, val_accuracy = self.validate()    

            # Best validation model
            if val_accuracy > self.best_accuracy:
                self.best_accuracy = val_accuracy
                self.best_net = deepcopy(self.net)
                self.best_epoch = epoch
                print("Best model updated")

            print("")

        return (train_loss, train_accuracy,
                val_loss, val_accuracy)
    
    def do_epoch(self, current_epoch):
        """Trains model for one epoch.
        
        Args:
            current_epoch (int): current epoch number (begins from 1)
        Returns:
            train_loss: average training loss over all batches of the
                current epoch.
            train_accuracy: training accuracy of the current epoch over
                all samples.
        """

        self.net.train()  # Set network in training mode

        running_train_loss = 0
        running_corrects = 0
        total = 0
        batch_idx = 0

        print(f"Epoch: {current_epoch}, LR: {self.scheduler.get_last_lr()}")

        for images, labels in self.train_dataloader:
            loss, corrects = self.do_batch(images, labels)

            running_train_loss += loss.item()
            running_corrects += corrects
            total += labels.size(0)
            batch_idx += 1

        self.scheduler.step()

        # Calculate average scores
        train_loss = running_train_loss / batch_idx # Average over all batches
        train_accuracy = running_corrects / float(total) # Average over all samples

        print(f"Train loss: {train_loss}, Train accuracy: {train_accuracy}")

        return (train_loss, train_accuracy)

    def do_batch(self, batch, labels):
        """Trains model for one batch.
        
        Args:
            batch: batch of images for the model to train.
            labels: labels of the batch of images.
        
        Returns:
            loss: loss function computed on the network outputs of the
                forward pass.
            running_corrects: number of correctly classified images.
        """

        batch = batch.to(self.device)
        labels = labels.to(self.device)

        # Zero-ing the gradients
        self.optimizer.zero_grad() 

        # One hot encoding of new task labels 
        one_hot_labels = self.to_onehot(labels) # Size = [128, 10]

        # New net forward pass
        outputs = self.net(batch)  
        
        loss = self.criterion(outputs, one_hot_labels) # BCE Loss with sigmoids over outputs

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Compute the number of correctly classified images
        running_corrects = \
            torch.sum(preds == labels.data).data.item()

        # Backward pass: computes gradients
        loss.backward()  

        # Update weights based on accumulated gradients
        self.optimizer.step()

        return (loss, running_corrects)

    def validate(self):
        """Validate the model.
        
        Returns:
            val_loss: average loss function computed on the network outputs
                of the validation set (val_dataloader).
            val_accuracy: accuracy computed on the validation set.
        """

        self.net.train(False)

        running_val_loss = 0
        running_corrects = 0
        total = 0
        batch_idx = 0

        for images, labels in self.val_dataloader:
            images = images.to(self.device)
            labels = labels.to(self.device)
            total += labels.size(0)

            # One hot encoding of new task labels 
            one_hot_labels = self.to_onehot(labels) # Size = [128, 10]
            # New net forward pass
            outputs = self.net(images)  
            loss = self.criterion(outputs, one_hot_labels) # BCE Loss with sigmoids over outputs

            running_val_loss += loss.item()

            # Get predictions
            _, preds = torch.max(outputs.data, 1)

            # Update the number of correctly classified validation samples
            running_corrects += torch.sum(preds == labels.data).data.item()

            batch_idx += 1

        # Calcuate scores
        val_loss = running_val_loss / batch_idx
        val_accuracy = running_corrects / float(total)

        print(f"Validation loss: {val_loss}, Validation accuracy: {val_accuracy}")

        return (val_loss, val_accuracy)

    def test(self):
        """Test the model.
        Returns:
            accuracy (float): accuracy of the model on the test set
        """

        self.best_net.train(False)  # Set Network to evaluation mode

        running_corrects = 0
        total = 0

        all_preds = torch.tensor([]) # to store all predictions
        all_preds = all_preds.type(torch.LongTensor)
        
        for images, labels in self.test_dataloader:
            images = images.to(self.device)
            labels = labels.to(self.device)
            total += labels.size(0)

            # Forward Pass
            outputs = self.best_net(images)

            # Get predictions
            _, preds = torch.max(outputs.data, 1)

            # Update Corrects
            running_corrects += torch.sum(preds == labels.data).data.item()

            # Append batch predictions
            all_preds = torch.cat(
                (all_preds.to(self.device), preds.to(self.device)), dim=0
            )

        # Calculate accuracy
        accuracy = running_corrects / float(total)  

        print(f"Test accuracy: {accuracy}")

        return (accuracy, all_preds)

## FMLOSS

In [None]:
# Provo anche come distillation

class FMLoss(nn.Module):

  def __init__(self, weight = None, reduction = 'mean'):
    super(FMLoss, self).__init__()

  def forward(self, outputs, targets, beta = 0):
    EPS = 1e-10
    sigmoid= nn.Sigmoid()
    loss = torch.mean(-targets*torch.log(sigmoid(outputs)+EPS)\
                        + beta*(1-targets)* torch.pow(sigmoid(outputs), 2))
    return loss

**2A. Tuning LR, step_size**

In [None]:
train_loss = []
validation_loss=[]
train_accuracy = []
validation_accuracy = []

lr = 0.2
step_size = 50
num_epochs = 60

#define network
net = alexnet() # Loading AlexNet model
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

#prepare network
criterion = FMLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = TemporaryManager(DEVICE, net, criterion, optimizer, scheduler,
                        train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(num_epochs) 

training_loss, training_accuracy, val_loss, val_accuracy = scores

train_loss.append(training_loss)
validation_loss.append(val_loss)
train_accuracy.append(training_accuracy)
validation_accuracy.append(val_accuracy)

Epoch: 1, LR: [0.2]


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x

Train loss: 0.006863799454136329, Train accuracy: 0.005326704545454545


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
Traceback (most recent call last):
    assert self._parent_pid == os.getpid(), 'can only join a child process'
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
AssertionError: can only join a child process
    self._shutdown_workers()
  File "/usr/local/l

Validation loss: 0.006857875967398286, Validation accuracy: 0.007607192254495159
Best model updated

Epoch: 2, LR: [0.2]


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
Exception ignored in: <bound method _MultiProcess

Train loss: 0.006849730598994277, Train accuracy: 0.009943181818181818


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/l

Validation loss: 0.006838845554739237, Validation accuracy: 0.007607192254495159

Epoch: 3, LR: [0.2]


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/l

Train loss: 0.006830105431039225, Train accuracy: 0.049360795454545456


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
    self._shutdown_workers()
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
    w.join()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == 

Validation loss: 0.006817463164528211, Validation accuracy: 0.09923928077455048
Best model updated

Epoch: 4, LR: [0.2]


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
    self._shutdown_workers()
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
Traceback (most recent call last):
  File "/usr/local/lib/python

Train loss: 0.0068089076402512464, Train accuracy: 0.08700284090909091


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
Exception ignored in: <bound method _MultiProcess

Validation loss: 0.006795661019471784, Validation accuracy: 0.09923928077455048

Epoch: 5, LR: [0.2]


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/l

Train loss: 0.0067881816490129995, Train accuracy: 0.08309659090909091


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/local/l

Validation loss: 0.0067736535565927625, Validation accuracy: 0.09854771784232365

Epoch: 6, LR: [0.2]


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
Traceback (most recent call last):
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
Traceback (most recent call last):
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
    self._shutdown_workers()
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f924c2122b0>>
  File "/usr/lib/python3.6/multiprocessing/process.

KeyboardInterrupt: ignored

In [None]:
val_accuracy

In [None]:
#product allows to make cartesian product between lists of hyperparameters
lr_values = [1e-3, 5e-3, 1e-2, 5e-2] # lr value for tuning

# lr = 1e-1 diverges
parameters = dict(lr = lr_values,
                  step_size = [30, 50],
                  num_epochs = [50]) 

param_values = [value for value in parameters.values()]

In [None]:
train_loss = []
validation_loss=[]
train_accuracy = []
validation_accuracy = []

for lr, step_size, num_epochs in product(*param_values): # grid search over cartesian products of hyper parameters

  #define network
  net = alexnet() # Loading AlexNet model
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  #prepare network
  criterion = nn.CrossEntropyLoss()
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)
  
  manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

  scores = manager.train(optimizer, scheduler, num_epochs) 
  
  training_loss, training_accuracy, val_loss, val_accuracy = scores
  
  train_loss.append(training_loss)
  validation_loss.append(val_loss)
  train_accuracy.append(training_accuracy)
  validation_accuracy.append(val_accuracy)

In [None]:
combinations = []
for lr, step_size, epochs in product(*param_values):
  combinations.append([lr, step_size, epochs])

for i, comb in enumerate(combinations):
  name = 'lr{}_step{}.jpg'.format(comb[0], comb[1])
  print(name)
  plot_scores(train_loss[i], validation_loss[i], train_accuracy[i], validation_accuracy[i], save_directory = name)

**Tuning number of epochs**

Try if increasing the number of epochs, performance gets better, or if instead we have an asymptotic behaviour

In [None]:
parameters = dict(lr = [1e-2],
                  step_size = [40, 50],
                  num_epochs = [60, 70]) 

param_values = [value for value in parameters.values()]

In [None]:
train_loss = []
validation_loss = []
train_accuracy = []
validation_accuracy = []

for lr, step_size, num_epochs in product(*param_values): # grid search over cartesian products of hyper parameters

  #define network
  net = alexnet() # Loading AlexNet model
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  #prepare network
  criterion = nn.CrossEntropyLoss()
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

  manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

  scores = manager.train(optimizer, scheduler, num_epochs) 
  
  training_loss, training_accuracy, val_loss, val_accuracy = scores
  
  train_loss.append(loss)
  validation_loss.append(val_loss)
  train_accuracy.append(training_accuracy)
  validation_accuracy.append(val_accuracy)

In [None]:
combinations = []
for lr, step_size, epochs in product(*param_values):
  combinations.append([lr, step_size, epochs])

for i, comb in enumerate(combinations):
  name = 'lr{}_step{}_epochs{}.jpg'.format(comb[0], comb[1], comb[2])
  print(name)
  plot_scores(train_loss[i], train_accuracy[i], validation_accuracy[i], save_directory = name)

**Tuned Hyper Parameters** 
1. lr = 1e-2
2. step_size = 50
3. num_epochs = 50

<br>

**Train a Model with Tuned Hyper Parameters**

In [None]:
# train model with best hyperparameters found after validation

net = alexnet() # Loading AlexNet model
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

# best parameters
lr = 1e-2
step_size = 50
num_epochs = 50

#prepare network
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                        train_dataloader, val_dataloader, test_dataloader)

net = manager.train(optimizer, scheduler, num_epochs, validation = False) 

**Test the Model**

In [None]:
manager.test() # test trained model

**2B. Tuning LR with Adam optimizer** <br>
Tune the learning rate using a different optimizer.

**Training and Validation**

In [None]:
lr_values = [1e-3, 5e-3, 1e-2, 5e-2] # lr values for tuning
step_size = 50
num_epochs = 50

In [None]:
train_loss = []
validation_loss = []
train_accuracy = []
validation_accuracy = []

for lr in lr_values: # grid search over cartesian products of hyper parameters

  #define network
  net = alexnet() # Loading AlexNet model
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  #prepare network
  criterion = nn.CrossEntropyLoss()
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

  manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

  scores = manager.train(optimizer, scheduler, num_epochs) 
  training_loss, training_accuracy, val_loss, val_accuracy = scores
  
  train_loss.append(loss)
  validation_loss.append(val_loss)
  train_accuracy.append(training_accuracy)
  validation_accuracy.append(val_accuracy)

In [None]:
combinations = []
for lr in lr_values:
  combinations.append([lr])

for i, comb in enumerate(lr_values):
  name = 'lr{}_Adam.jpg'.format(comb)
  print(name)
  plot_scores(train_loss[i], validation_loss[i], train_accuracy[i], validation_accuracy[i], save_directory = name)

In [None]:
#Try larger step_size and number of epochs

lr = 1e-2
step_size = 55
num_epochs = 60

In [None]:
#define network
net = alexnet() # Loading AlexNet model
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

#prepare network
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

In [None]:
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = 'step_size55_epochs60' )

**Testing**

In [None]:
manager.test() #test last model trained with lr = 0.01, epochs = 60, step size = 55

<br><br>
**3. TRANSFER LEARNING**

**Define Data Preprocessing**

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]) # Normalizes tensor with mean and standard deviation required by 
                                                            # pre-trained models
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])                                    
])


**Prepare Datasets**

In [None]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/LilMowgli/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech

train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

validation_split = .5
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits
len_dataset = len(train_dataset)
indices = list(range(len_dataset))
split = int(np.floor(validation_split * len_dataset))
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indexes, val_indexes = indices[split:], indices[:split]

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)

**Prepare DataLoaders**

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Training**

In [None]:
random_seed = 100
np.random.seed(random_seed)
r = np.random.uniform(-3, -1, size = 8)  # randomly sample exponents from a range 
                                  # [-3, -1) (sample lr from a log scale)
lr_values = [10**exp for exp in r] 

s = np.arange(start = -3, stop = 1, dtype = float) # integer exponents range [-8, 0]
eps = [10**expo for expo in s] 

parameters = [dict(lr = lr_values, #find best batch size and lr
                  batch_size = [128, 256, 512], # allows exploring new lr values
                  step_size = [5],
                  epochs = [7]), 
              dict(lr = [1e-2], # optimize step_size and num_epochs
                   step_size = [5, 7], epochs = [5, 7, 10]),  #7 vs 7, 10
              dict(lr = [1e-2, 1e-3], # optimize learning rate and eps for Adam optimizer
                   step_size = [5], 
                   epochs = [7], 
                   eps = eps)] 

**Tuning 1**
- Best with lr circa 0.01, batch size 256. <br>
  Altre considerazioni su documents

**Tuning2**
- Best with lr = 1e-2, epochs = 10 (comparable results) step = 5 <br>

**Tuning 3 Adam**
- Comparable performance to SGD for eps = 0.1
- Fino a eps = 0.01 la loss non scende o peggio diverge (spiegare perche ==> eps a denominatore, per probabilita troppo certe, divergenza)
- Per eps = 0.1 si hanno risultati comparabili con SGD. Il training avviene molto piu in fretta (2 epochs). Accuracy 1% piu bass. Dipende dalle necessita, se volessi accuracy al massimo opterei per SGD, altrimenti adam risulta piu veloce. Visto che questo è un esercizio per sperimentare, Non ci sono criteri epr esprimere una preferenza quindi rimango sul default SGD

**Lr, Batch Size Tuning**

In [None]:
param_values = [value for value in parameters[0].values()]

train_loss = []
validation_loss = []
train_accuracy = []
validation_accuracy = []

for lr, batch_size, step_size, num_epochs in product(*param_values):
  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
  val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)


  #define network
  net = alexnet(pretrained = True) # Loading AlexNet model
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  #prepare network
  criterion = nn.CrossEntropyLoss()
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)
  
  manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

  scores = manager.train(optimizer, scheduler, num_epochs) 
  training_loss, training_accuracy, val_loss, val_accuracy = scores
  
  train_loss.append(loss)
  validation_loss.append(val_loss)
  train_accuracy.append(training_accuracy)
  validation_accuracy.append(val_accuracy)

In [None]:
combinations = []
for lr, batch_size, step_size, epochs in product(*param_values):
  combinations.append([lr, batch_size, step_size, epochs])

for i, comb in enumerate(combinations):
  name = 'lr{}_batch_size{}_step{}_epochs{}.jpg'.format(comb[0], comb[1], comb[2], comb[3])
  print(name)
  plot_scores(train_loss[i], train_accuracy[i], validation_accuracy[i], save_directory = name)

**Lr, Step, Epochs Tuning**

In [None]:
param_values = [value for value in parameters[1].values()] #tune step_size and epochs

train_loss = []
validation_loss = []
train_accuracy = []
validation_accuracy = []

for lr, step_size, num_epochs in product(*param_values): 

  #define network
  net = alexnet(pretrained = True) # Loading AlexNet model
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  #prepare network
  criterion = nn.CrossEntropyLoss()
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)
  
  manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

  scores = manager.train(optimizer, scheduler, num_epochs) 
  training_loss, training_accuracy, val_loss, val_accuracy = scores
  
  train_loss.append(loss)
  validation_loss.append(val_loss)
  train_accuracy.append(training_accuracy)
  validation_accuracy.append(val_accuracy)

In [None]:
combinations = []
for lr, step_size, epochs in product(*param_values):
  combinations.append([lr, step_size, epochs])

for i, comb in enumerate(lr_values):
  name = 'lr{}_Adam.jpg'.format(comb)
  print(name)
  plot_scores(train_loss[i],  validation_loss[i], train_accuracy[i], validation_accuracy[i], save_directory = name)

**Adam Optimizer, LR and Epsilon Tuning**


In [None]:
param_values = [value for value in parameters[2].values()] #tune step_size and epochs

train_loss = []
validation_loss = []
train_accuracy = []
validation_accuracy = []

for lr, step_size, num_epochs, eps in product(*param_values): 

  #define network
  net = alexnet(pretrained = True) # Loading AlexNet model
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  #prepare network
  criterion = nn.CrossEntropyLoss()
  parameters_to_optimize = net.parameters()
  optimizer = optim.Adam(parameters_to_optimize, lr=lr, eps=eps, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)
  
  manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

  scores = manager.train(optimizer, scheduler, num_epochs) 
  training_loss, training_accuracy, val_loss, val_accuracy = scores
  
  train_loss.append(loss)
  validation_loss.append(val_loss)
  train_accuracy.append(training_accuracy)
  validation_accuracy.append(val_accuracy)

In [None]:
combinations = []
for lr, step_size, epochs, eps in product(*param_values):
  combinations.append([lr, step_size, epochs, eps])

for i, comb in enumerate(combinations):
  name = 'lr{}_eps{}Adam.jpg'.format(comb[0], comb[-1])
  print(name)
  plot_scores(train_loss[i], validation_loss[i], train_accuracy[i], validation_accuracy[i], save_directory = name)

**Train Validated Model**

In [None]:
#best parmeters learned (batch size kept at default 256)
lr = 1e-2
num_epochs = 10
step_size = 5


net = alexnet(pretrained = True)
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# SGD optimizer
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

net = manager.train(optimizer, scheduler, num_epochs, validation = False) 

**Test**

In [None]:
manager.test()

**Freeze Convolutional Layers**


In [None]:
#best parmeters learned (lower number of epochs)
lr = 1e-2
num_epochs = 7
step_size = 5


net = alexnet(pretrained = True) # Loading AlexNet model with 

net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

#freeze convolution weights
for param in net.features.parameters():
  param.requires_grad = False

parameters_to_update = []

# sanity check freezed parameters
for name, param in net.named_parameters():
  if param.requires_grad == True:
    parameters_to_update.append(param)
  print("\t",name, param.requires_grad) #False for freezed layer

parameters_to_optimize = net.parameters()

# SGD optimizer
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

freezeConvAccuracy = val_accuracy[num_epochs]

In [None]:
name = 'freezed_CNN.jpg'
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = name)

In [None]:
print(freezeConvAccuracy)

In [None]:
manager.test()

**Freeze FC Layers**

In [None]:
#best lr learned. Increase epochs and step size
lr = 1e-2
num_epochs = 40
step_size = 20

net = alexnet(pretrained = True) # Loading AlexNet model with 

net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

#freeze FC weights
for param in net.classifier.parameters():
  param.requires_grad = False

parameters_to_update = []

# sanity check freezed parameters
for name, param in net.named_parameters():
  if param.requires_grad == True:
    parameters_to_update.append(param)
  print("\t",name, param.requires_grad) #False for freezed layer

parameters_to_optimize = net.parameters()
# SGD optimizer
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)       

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

freezeFCAccuracy = val_accuracy[num_epochs]

In [None]:
name = 'freezed_CNN.jpg'
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = name)

In [None]:
manager.test

**Conclusions freezed layers**

FC freezed works really poorly respect to convolutional layers freezed. This is due to the fact that they share same semantic on the input whille different one on the output

**4. DATA AUGMENTATION**

**TRANSFORMATIONS SET 1**

1. CenterCrop
2. RandomHoriziontalFlip(p=0.1)
3. ColorJitter brightness = 0.5

con num_epochs = 13, step_size = 6 Accuracy = 0.85


In [None]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/LilMowgli/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      
                                      transforms.CenterCrop(224), 
                                      transforms.RandomHorizontalFlip(p=0.1), 
                                      transforms.ColorJitter(brightness = 0.5), 
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]) 
])

#Transformations for validation dataset
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])                                    
])


train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

# Creating data indices for training and validation splits:


validation_split = .5
random_seed= 42
len_dataset = len(train_dataset)
indices = list(range(len_dataset))
split = int(np.floor(validation_split * len_dataset))
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indexes, val_indexes = indices[split:], indices[:split]

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)

#Define dataloader object to iterate over datasets
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
#best hyper parmeters learned (batch size kept at default 256)
#Come consigliato nella spiegazione, occorre aumentare training epochs o learning rate
lr = 1e-2
num_epochs = 14
step_size = 7


#define network
net = alexnet(pretrained = True) # Loading AlexNet model
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

#prepare network
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

In [None]:
name = 'DA1_CNN.jpg'
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = name)

In [None]:
manager.test()

**TENCROP**<br>

1. CenterCrop
2. RandomHoriziontalFlip(p=0.1)
3. ColorJitter(brightness = 0.5)
4. TenCrop on Validation

Can not run due to memory issues

In [None]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/LilMowgli/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      
                                      transforms.CenterCrop(224),
                                      transforms.RandomHorizontalFlip(p=0.1),
                                      transforms.ColorJitter(brightness = 0.5), 
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]) 
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.TenCrop(224),
                                      transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
                                      transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])(crop) for crop in crops]))                                 
])



#create dataset with defined transformaions
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

validation_split = .5
random_seed= 42
len_dataset = len(train_dataset)
indices = list(range(len_dataset))
split = int(np.floor(validation_split * len_dataset))
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indexes, val_indexes = indices[split:], indices[:split]

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)

batch_size = 12 # larger values leads to out of memory errors

#Define dataloader object to iterate over datasets
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=10)

In [None]:
def test_tencrops(model, test_dataloader): 

  #torch.cuda.ipc_collect()

  model = model.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  model.train(False) # Set Network to evaluation mode

  running_corrects = 0
  for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    bs, ncrops, c, h, w = images.size()
    outputs = model(images.view(-1, c, h, w)) # fuse batch size and ncrops
    outputs_avg = outputs.view(bs, ncrops, -1).mean(1) # avg over 

    # Get predictions
    _, preds = torch.max(outputs_avg.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(test_dataset))

  print('Test Accuracy: {}'.format(accuracy))

  return accuracy

In [None]:
#best hyper parmeters learned (batch size kept at default 256)
#Come consigliato nella spiegazione, occrre aumentare training epochs o learning rate
lr = 1e-3
num_epochs = 13
step_size = 7

#define network
net = alexnet(pretrained = True) # Loading AlexNet model
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

#prepare network
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

net = manager.train(optimizer, scheduler, num_epochs, validation = False) 

In [None]:
test_tencrops(net, test_dataloader)

**TRANSFORMATIONS SET 2**
1. CenterCrop
2. RandomHorizontalFlip
3. RandomVerticalFlip with lower probability
4. Random Perspective

Accuracy = 0.81



using RandomGrayScale with p =0.1 and dropping random perspective
Accuracy = 0.84 senza vertical, 
0.83 with vertical

In [None]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/LilMowgli/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      
                                      transforms.CenterCrop(224),  
                                      transforms.RandomHorizontalFlip(p=0.1),
                                      transforms.RandomVerticalFlip(p = 0.05),
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
                                 transforms.RandomErasing(p = 0.1)
])

train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  #Remember: AlexNet needs a 224x224 input!
                                      transforms.RandomHorizontalFlip(p=0.1),
                                      transforms.RandomVerticalFlip(p = 0.05),
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]), # Normalizes tensor with mean and standard deviation
                                 transforms.RandomErasing(p = 0.1)
])

# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])                                    
])


#create dataset with defined transformaions
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

validation_split = .5
random_seed= 42
len_dataset = len(train_dataset)
indices = list(range(len_dataset))
split = int(np.floor(validation_split * len_dataset))
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indexes, val_indexes = indices[split:], indices[:split]

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)


#Define dataloader object to iterate over datasets
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
#best hyper parmeters learned (batch size kept at default 256)
#Come consigliato nella spiegazione, occrre aumentare training epochs o learning rate
lr = 1e-2
num_epochs = 11
step_size = 8

#define network
net = alexnet(pretrained = True) # Loading AlexNet model
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

#prepare network
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

In [None]:
name = 'DA2_CNN.jpg'
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = name)

In [None]:
manager.test()

**TRANSFORMATIONS SET 3**
1. RandomCrop
2. Horizontal
3. VerticalFlip
4. RandomPespective

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      
                                      transforms.RandomCrop(224), 
                                      transforms.RandomHorizontalFlip(p=0.1),
                                      transforms.RandomPerspective(p = 0.2),
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]), 
])

# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])                                    
])


#create dataset with defined transformaions
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

validation_split = .5
random_seed= 42
len_dataset = len(train_dataset)
indices = list(range(len_dataset))
split = int(np.floor(validation_split * len_dataset))
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indexes, val_indexes = indices[split:], indices[:split]

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)


#Define dataloader object to iterate over datasets
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
# Best hyper parmeters learned (batch size kept at default 256)
# It is necessary to have larger numnber of  epochs for convergence
lr = 1e-2
num_epochs = 14
step_size = 10

#define network
net = alexnet(pretrained = True) # Loading AlexNet model
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

#prepare network
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)

manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

In [None]:
name = 'DA3_CNN.jpg'
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = name)

In [None]:
manager.test()

**5. RESNET**

Compare Resnet18 and Alexnet performance

In [None]:
from torchvision.models import resnet18, resnet34

**Define and Train the Network**

Pretarined weights on ImageNet are loaded

In [None]:
#best parmeters learned (lower number of epochs)
lr = 1e-2
num_epochs = 10
step_size = 7

resnet = resnet34(pretrained = True) # Loading ResNet model with 

resnet.fc.out_features = NUM_CLASSES # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

parameters_to_optimize = resnet.parameters() # optimize over all parameters

# SGD optimizer
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)
manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

In [None]:
name = 'resnet18.jpg'
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = name)

In [None]:
manager.test()

In [None]:
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

batch_size = 64

# Creating data indices for training and validation splits:

validation_split = .5 # define portion of validation split
random_seed= 42
len_dataset = len(train_dataset)
indices = list(range(len_dataset))
split = int(np.floor(validation_split * len_dataset))
np.random.seed(random_seed) # seed the generator
np.random.shuffle(indices) # shuffle indices to get balanced distribution in training and validation set
train_indexes, val_indexes = indices[split:], indices[:split] 

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)

# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [None]:
#best parmeters learned (lower number of epochs)
lr = 1e-2
num_epochs = 6
step_size = 6

resnet = resnet34(pretrained = True) # Loading ResNet model with 

resnet.fc.out_features = NUM_CLASSES # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

parameters_to_optimize = resnet.parameters() # optimize over all parameters

# SGD optimizer
optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=GAMMA)
manager = Manager(DEVICE, net, criterion,
                          train_dataloader, val_dataloader, test_dataloader)

scores = manager.train(optimizer, scheduler, num_epochs) 
training_loss, training_accuracy, val_loss, val_accuracy = scores

In [None]:
name = 'resnet34.jpg'
plot_scores(training_loss, val_loss, training_accuracy, val_accuracy, save_directory = name)

In [None]:
manager.test()