<a href="https://colab.research.google.com/github/Nicordaro/Project_MLDL/blob/master/main_lwf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Imports**

In [0]:
# Import Github folder
import os
# if os.path.isdir('./Project_MLDL'):
!rm -rf Project_MLDL
if not os.path.isdir('./CIFAR100_tError'):
  !git clone https://github.com/Nicordaro/Project_MLDL

import copy
import logging
import matplotlib.pyplot as plt
import numpy as np
import random
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim

from PIL import Image
from Project_MLDL.CIFAR100_tError import CIFAR100_tError
from Project_MLDL.model_finetuning import ResNet18
from scipy import interpolate
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
from torchvision import transforms
from torchvision.models import resnet18
from torchvision.models import resnet34
from tqdm import tqdm

Cloning into 'Project_MLDL'...
remote: Enumerating objects: 72, done.[K
remote: Counting objects:   1% (1/72)[Kremote: Counting objects:   2% (2/72)[Kremote: Counting objects:   4% (3/72)[Kremote: Counting objects:   5% (4/72)[Kremote: Counting objects:   6% (5/72)[Kremote: Counting objects:   8% (6/72)[Kremote: Counting objects:   9% (7/72)[Kremote: Counting objects:  11% (8/72)[Kremote: Counting objects:  12% (9/72)[Kremote: Counting objects:  13% (10/72)[Kremote: Counting objects:  15% (11/72)[Kremote: Counting objects:  16% (12/72)[Kremote: Counting objects:  18% (13/72)[Kremote: Counting objects:  19% (14/72)[Kremote: Counting objects:  20% (15/72)[Kremote: Counting objects:  22% (16/72)[Kremote: Counting objects:  23% (17/72)[Kremote: Counting objects:  25% (18/72)[Kremote: Counting objects:  26% (19/72)[Kremote: Counting objects:  27% (20/72)[Kremote: Counting objects:  29% (21/72)[Kremote: Counting objects:  30% (22/72)[Kremote: Coun

---

**Functions**

In [0]:
def make_data_labels(input_list):
  """
  Given an input list as argument, this function returns two lists:
  the first containing all elements of the input list except for the first 10
  the second containing the first 10 elements of the input list
  """      
  second_list = []
  for element in input_list[:10]:
    second_list.append(element)
  first_list = input_list[10:]
  return first_list, second_list


def testNet(datasetLength, dataloader, net):
  """
  This function tests the accuracy of the neural network on the images of the
  input dataloader.
  """
  net = net.to(DEVICE)
  net.train(False)

  running_corrects = 0

  for images, labels in dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    outputs = net(images)  # Forward Pass
    _, preds = torch.max(outputs.data, 1)  # Get predictions

    #Debugging purpose, print labels of predictions
    ##print(preds)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / datasetLength

  return accuracy


def accuracy_plot(accuracies):
  """
  This function plots the accuracy chart from 10 points with interpolation
  """
  # FOR MEAN STD PLOT https://stackoverflow.com/questions/22481854/plot-mean-and-standard-deviation

  tck,u     = interpolate.splprep( [[i*10 for i in range(1,len(accuracies)+1)],accuracies] ,s = 0 )
  xnew,ynew = interpolate.splev( np.linspace( 0, 1, 100 ), tck,der = 0)

  fig, ax = plt.subplots(figsize=(15,14), facecolor='white')

  plt.rc('font', size=20)
  plt.plot( [i*10 for i in range(1,len(accuracies)+1)],accuracies,'.' , xnew ,ynew, label = "accuracy", c='orange' )
  ax.set_ylabel("Accuracy")
  ax.set_xlabel("Classes")
  ax.minorticks_on()
  plt.title("Accuracies obtained with finetuning of a ResNet network")
  plt.yticks(np.arange(0, 1.1, .1))
  plt.xticks(np.arange(0, 110, 10))
  plt.grid(axis='y',which='major', linestyle='-', linewidth='0.5', color='black') 
  plt.grid(axis='y',which='minor', linestyle=':', linewidth='0.5', color='grey')

  # Plot also the value of the point close to it
  for in_i, in_j in zip([i*10 for i in range(1,len(accuracies)+1)], accuracies):
    ax.annotate(str(round(in_j, 3)), xy=(in_i, in_j))

  plt.savefig('test.png', format='png', dpi=300)
  plt.show()
  return

---

**Arguments**





In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 10  # Init at 10 because first train is on 10 classes

SEED = 12  # Used for the pseudorandom shuffle of the split

BATCH_SIZE = 128  # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                  # the batch size, learning rate should change by the same factor to have comparable results

LR = 2  # The initial Learning Rate
MOMENTUM = 0.9  # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 1e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 70  # Total number of training epochs
MILESTONES = [48, 62]  # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.2  # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 50

---

**Transformations definition**



In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), # Randomly flip the image with probability of 0.5
                                      transforms.Pad(4), # Add padding
                                      transforms.RandomCrop(32),# Crops a random squares of the image
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # https://gist.github.com/weiaicunzai/e623931921efefd4c331622c344d8151
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                 
])

---

**Network Initialization**

In [0]:
net = ResNet18()
best_model = ResNet18()

---

**Train, Validation** *not so fair* **and Test**

In [0]:
# List of 100 classes in random order
from torch.autograd import Variable
lbls = [i for i in range(0,100)]  
random.seed(SEED)
random.shuffle(lbls)

NUM_GROUPS = 10  # Number of training groups in which to divide the 100 classes

test_accuracies = []
best_eval_accuracy = 0

# Folder where to store CIFAR100 Dataset
DATA_DIR = './CIFAR100'

# Define test dataset outside in order to increment it, instead of initializing it every cycle iteration
test_dataset = CIFAR100_tError(DATA_DIR, train=False, transform=eval_transform, download=True)

for i in range(0, NUM_GROUPS): # Iterating on all the groups of classes
  print('\n', f'Training network on group {i+1} of {10}', '\n')
  best_eval_accuracy = 0
  # Every iteration the first 10 classes of the shuffled list of all unseen 
  # classes are selected to train the network and removed from the shuffled list
  lbls, new_labels = make_data_labels(lbls)
  
  # Training set is the train dataset of exactly 10 new classes
  train_dataset = CIFAR100_tError(DATA_DIR, train=True, transform=train_transform, download=True)
  train_dataset.increment(new_labels, [j for j in range(0+i*10, 10+i*10)])

  # Evaluation set is the subset of the test dataset of the only 10 new classes
  # (for this reason -> "Not so fair" Validation)
  eval_dataset = CIFAR100_tError(DATA_DIR, train=False, transform=eval_transform, download=True)
  eval_dataset.increment(new_labels, [j for j in range(0+i*10, 10+i*10)])    
    
  # Test dataset is the test dataset of the 10 new classes and all the previous
  test_dataset.increment(new_labels, [j for j in range(0+i*10, 10+i*10)])

  # Define dataloader
  train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
  eval_dataloader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
  test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

  # Prepare Training
  criterion = nn.BCEWithLogitsLoss() # for classification: Cross Entropy
  criterion2 = nn.BCELoss()
  parameters_to_optimize = net.parameters()  # Parameters to optimize
  optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA, last_epoch=-1) 
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  cudnn.benchmark # Calling this optimizes runtime
  current_step = 0
  
  if i>0:
    old = copy.deepcopy(net)

  # Training
  for epoch in range(NUM_EPOCHS):
    print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_last_lr()))
    
    for images, labels in train_dataloader:
      # Bring data over the device of choice
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      net.train() # Sets module in training mode
      optimizer.zero_grad()  # Zero-ing the gradients
      outputs = net(images)  # Forward pass to the network

      # One hot encoding labels for binary cross-entropy loss
      labels_onehot = nn.functional.one_hot(labels,100)
      labels_onehot = labels_onehot.type_as(outputs)
      labels_onehot = labels_onehot[:,i*10:]
      out = outputs[:,i*10:]
      loss = criterion(out, labels_onehot)

      if i>0:
        outputs_old = Variable(torch.sigmoid(old(images))[:,:10*i],requires_grad = False)
        outputs = torch.sigmoid(outputs)[:,:10*i].to(DEVICE)
        loss2 = criterion2(outputs,outputs_old)
        loss = loss + loss2

      if current_step % LOG_FREQUENCY == 0:
        print('Step {}, Loss {}'.format(current_step, loss.item()))

      loss.backward()  # backward pass: computes gradients
      optimizer.step()  # update weights based on accumulated gradients
      current_step += 1

    # Validation is "Not so fair" because it is performed on eval_dataset 
    # which is a subset of the current 10 new training classes of the
    # Test dataset. (Validation is not to be performed on Test data)
    eval_accuracy = testNet(float(len(eval_dataset)), eval_dataloader, net)
    if eval_accuracy > best_eval_accuracy:
      best_eval_accuracy = eval_accuracy
      best_model = copy.deepcopy(net)
      print('Validation Accuracy: {}'.format(eval_accuracy))

    scheduler.step()  # Step the scheduler

  # Test Phase (Performed on current 10 new classes and all the previous ones)
  test_accuracy = testNet(float(len(test_dataset)), test_dataloader, best_model)
  test_accuracies.append(test_accuracy)
  print('Test Accuracy: {}'.format(test_accuracy))

Files already downloaded and verified

 Training network on group 1 of 10 

Files already downloaded and verified
Files already downloaded and verified
Starting epoch 1/70, LR = [2]
Step 0, Loss 0.6720407009124756
Validation Accuracy: 0.387
Starting epoch 2/70, LR = [2]
Step 50, Loss 0.026718996465206146
Validation Accuracy: 0.445
Starting epoch 3/70, LR = [2]
Step 100, Loss 0.021802863106131554
Validation Accuracy: 0.49
Starting epoch 4/70, LR = [2]
Step 150, Loss 0.02308138832449913
Validation Accuracy: 0.552
Starting epoch 5/70, LR = [2]
Starting epoch 6/70, LR = [2]
Step 200, Loss 0.020000986754894257
Validation Accuracy: 0.575
Starting epoch 7/70, LR = [2]
Step 250, Loss 0.019739314913749695
Validation Accuracy: 0.607
Starting epoch 8/70, LR = [2]
Step 300, Loss 0.016790248453617096
Starting epoch 9/70, LR = [2]
Step 350, Loss 0.01582823507487774
Starting epoch 10/70, LR = [2]
Validation Accuracy: 0.649
Starting epoch 11/70, LR = [2]
Step 400, Loss 0.013747407123446465
Starting ep

---

**Prints & Plots**

In [0]:
print(test_accuracies)

accuracy_plot(test_accuracies)
#obtained [0.844, 0.4525, 0.2976666666666667, 0.224, 0.1808, 0.1535, 0.125, 0.113625, 0.10577777777777778, 0.0929]