<a href="https://colab.research.google.com/github/Nicordaro/Project_MLDL/blob/master/main_finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*Cloning files needed*

In [1]:
# Clone github repository with data
# if os.path.isdir('./Project_MLDL'):
!rm -rf Project_MLDL
if not os.path.isdir('./CIFAR100_tError'):
  !git clone https://github.com/Nicordaro/Project_MLDL


NameError: ignored

**Imports**

---

In [0]:
import logging
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim


from PIL import Image
from Project_MLDL.CIFAR100_tError import CIFAR100_tError
from Project_MLDL.model_finetuning import ResNet18
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
from torchvision import transforms
from torchvision.models import resnet18
from torchvision.models import resnet34
from tqdm import tqdm

**Arguments**

---





In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

# Init at 10 because first train is on 10 classes
NUM_CLASSES = 10

# Used for the pseudorandom shuffle of the split
SEED = 12

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 2         # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 1e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 70     # Total number of training epochs (iterations over dataset)
MILESTONES = [48, 62]  # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.2          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 50

**Transformations definition**

---



In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), # Randomly flip the image with probability of 0.5
                                      transforms.Pad(4), # Add padding
                                      transforms.RandomCrop(32),# Crops a random squares of the image
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # https://gist.github.com/weiaicunzai/e623931921efefd4c331622c344d8151
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                 
])

*New label function*

In [0]:
DATA_DIR = './CIFAR100'

lbls = [i for i in range(0,100)]  #Array of classes integer-encoded (?)
random.seed(SEED)
random.shuffle(lbls)

def make_data_labels(lbls):       #After shuffle, take first 10 classes, and remove the first 10 from the list passed as argument
    new_labels=[]
    for el in lbls[:10]:
        new_labels.append(el)
    lbls = lbls[10:]

    return lbls, new_labels

**Network Definition**

---

In [0]:
net = ResNet18()
best_model = ResNet18()

In [0]:
def testNet(datasetLength, dataloader, net):
    net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
    net.train(False) # Set Network to evaluation mode

    running_corrects = 0

    for images, labels in dataloader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

    # Forward Pass
        outputs = net(images)

    # Get predictions
        _, preds = torch.max(outputs.data, 1)

    #Debugging purpose, print labels of predictions
    ##print(preds)

    # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()

    # Calculate Accuracy
    accuracy = running_corrects / datasetLength

    return accuracy

**Train, Validation** *not so fair* **and Test**

---



In [0]:
import numpy as np
import copy
BATCH_TO_TEST = 10
test_accuracies = []

best_eval_accuracy = 0


# Define test dataset outside in order to increment it, instead of initializing it every cycle iteration
test_dataset = CIFAR100_tError(DATA_DIR, train=False, transform=eval_transform, download=True)
for i in range(0,BATCH_TO_TEST): #one iteration for 10 classes
    print(f'Starting training with batch {i+1}')
    lbls, new_labels = make_data_labels(lbls)
  
    # Define Train dataset
    train_dataset = CIFAR100_tError(DATA_DIR, train=True, transform=train_transform, download=True)       
    eval_dataset = CIFAR100_tError(DATA_DIR, train=False, transform=eval_transform, download=True)
    # Increment dataset with new labels mapped with list comprehension
    train_dataset.increment(new_labels,[j for j in range(0+i*10,10+i*10)])
    eval_dataset.increment(new_labels,[j for j in range(0+i*10,10+i*10)])
    test_dataset.increment(new_labels,[j for j in range(0+i*10,10+i*10)])

    # Define dataloader
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    eval_dataloader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    #prepare training
    # Loss function
    criterion = nn.BCEWithLogitsLoss() # for classification, we use Cross Entropy

    # Parameters to optimize:
    parameters_to_optimize = net.parameters()

    # Optimizer
    optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    # Scheduler
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA, last_epoch=-1)

    # training 
    # By default, everything is loaded to cpu
    net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

    cudnn.benchmark # Calling this optimizes runtime

    current_step = 0

    # Start iterating over the epochs
    for epoch in range(NUM_EPOCHS):
      print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_last_lr()))
    
        # Iterate over the dataset
      for images, labels in train_dataloader:
          # Bring data over the device of choice
          images = images.to(DEVICE)
          labels = labels.to(DEVICE)

          net.train() # Sets module in training mode

          # PyTorch, by default, accumulates gradients after each backward pass
          # We need to manually set the gradients to zero before starting a new iteration
          optimizer.zero_grad() # Zero-ing the gradients

          # Forward pass to the network
          outputs = net(images)
    
          # One hot encoding labels for binary cross-entropy loss
          labels_onehot = nn.functional.one_hot(labels,100)
          labels_onehot = labels_onehot.type_as(outputs)

          loss = criterion(outputs, labels_onehot)
    
          if current_step % LOG_FREQUENCY == 0:
              print('Step {}, Loss {}'.format(current_step, loss.item()))

      # Compute gradients for each layer and update weights
          loss.backward()  # backward pass: computes gradients
          optimizer.step() # update weights based on accumulated gradients
          current_step += 1
    	  # Validation    
      eval_accuracy = testNet(float(len(eval_dataset)), eval_dataloader, net)
      if eval_accuracy > best_eval_accuracy:
        best_eval_accuracy = eval_accuracy
        best_model = copy.deepcopy(net)
        print('Validation Accuracy: {}'.format(eval_accuracy))


        # Step the scheduler
      scheduler.step()

    #test phase
    test_accuracy = testNet(float(len(test_dataset)), test_dataloader, best_model)
    test_accuracies.append(test_accuracy)
    print('Test Accuracy: {}'.format(test_accuracy))

**Define plot function**

---



In [0]:
def accuracy_plot(accuracies):
  ### FOR MEAN STD PLOT https://stackoverflow.com/questions/22481854/plot-mean-and-standard-deviation
  from scipy import interpolate

  tck,u     = interpolate.splprep( [[i*10 for i in range(1,len(accuracies)+1)],accuracies] ,s = 0 )
  xnew,ynew = interpolate.splev( np.linspace( 0, 1, 100 ), tck,der = 0)

  fig, ax = plt.subplots(figsize=(15,14), facecolor='white')

  plt.rc('font', size=20)
  plt.plot( [i*10 for i in range(1,len(accuracies)+1)],accuracies,'.' , xnew ,ynew, label = "accuracy", c='orange' )
  ax.set_ylabel("Accuracy")
  ax.set_xlabel("Classes")
  ax.minorticks_on()
  plt.title("Accuracies obtained with finetuning of a ResNet network")
  plt.yticks(np.arange(0, 1.1, .1))
  plt.xticks(np.arange(0, 110, 10))
  plt.grid(axis='y',which='major', linestyle='-', linewidth='0.5', color='black') 
  plt.grid(axis='y',which='minor', linestyle=':', linewidth='0.5', color='grey')
  for in_i, in_j in zip([i*10 for i in range(1,len(accuracies)+1)], accuracies):  # Plot also the value of the point close to it
          ax.annotate(str(round(in_j, 3)), xy=(in_i, in_j))

  plt.savefig('test.png', format='png', dpi=300)
  plt.show()

*Print & Plot*

In [0]:
print(test_accuracies)

accuracy_plot(test_accuracies)
#obtained [0.844, 0.4525, 0.2976666666666667, 0.224, 0.1808, 0.1535, 0.125, 0.113625, 0.10577777777777778, 0.0929]