<a href="https://colab.research.google.com/github/Nicordaro/Project_MLDL/blob/master/Copy_of_main_icarl_marco.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*Cloning files needed*

In [0]:
import os
# Clone github repository with data
# if os.path.isdir('./Project_MLDL'):
!rm -rf Project_MLDL
if not os.path.isdir('./CIFAR100_tError'):
  !git clone https://github.com/Nicordaro/Project_MLDL


Cloning into 'Project_MLDL'...
remote: Enumerating objects: 20, done.[K
remote: Counting objects: 100% (20/20), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 240 (delta 11), reused 0 (delta 0), pack-reused 220[K
Receiving objects: 100% (240/240), 892.95 KiB | 13.74 MiB/s, done.
Resolving deltas: 100% (118/118), done.


**Imports**

---

In [0]:
import logging
import matplotlib.pyplot as plt
import numpy as np
import random
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim


from PIL import Image
from Project_MLDL.CIFAR100_tError import CIFAR100_tError
from Project_MLDL.model_finetuning import ResNet18
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
from torchvision import transforms
from torchvision.models import resnet18
from torchvision.models import resnet34
from tqdm import tqdm

**Arguments**

---





In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

# Init at 10 because first train is on 10 classes
NUM_CLASSES = 10

# Used for the pseudorandom shuffle of the split
SEED = 12

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 2         # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 1e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 70     # Total number of training epochs (iterations over dataset)
MILESTONES = [48, 62]  # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.2          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 50

**Transformations definition**

---



In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), # Randomly flip the image with probability of 0.5
                                      transforms.Pad(4), # Add padding
                                      transforms.RandomCrop(32),# Crops a random squares of the image
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # https://gist.github.com/weiaicunzai/e623931921efefd4c331622c344d8151
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                 
])

*New label function*

In [0]:
DATA_DIR = './CIFAR100'

lbls = [i for i in range(0,100)]  #Array of classes integer-encoded (?)
random.seed(SEED)
random.shuffle(lbls)

def make_data_labels(lbls):       #After shuffle, take first 10 classes, and remove the first 10 from the list passed as argument
    new_labels=[]
    for el in lbls[:10]:
        new_labels.append(el)
    lbls = lbls[10:]

    return lbls, new_labels

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

class iCaRL(nn.Module):
  def __init__(self):
    super(iCaRL, self).__init__()

    self.feature_extractor = ResNet18()
    self.feature_extractor.linear = nn.Linear(512, 2048)
    self.relu = nn.ReLU()
    self.fc = nn.Linear(2048, 100)

    self.cls_loss = nn.BCEWithLogitsLoss()
    self.dist_loss = nn.BCELoss()

    self.optimizer = optim.SGD(self.parameters(), lr=2, weight_decay=1e-5, momentum=0.9)

    self.exemplar_sets=[]
    self.exemplars_means=[]

  def forward(self, x):
    x = self.feature_extractor(x)
    #x = nn.functional.norm(x)
    x = self.relu(x)
    x = self.fc(x)
    return x

  def classify(self, x):
    
    # x is a batch (128,3,32,32)
    exemplar_means = []

    # Compute exemplar means
    for Py in self.exemplar_sets:
      features=[]
      for p in Py:
        feature = self.feature_extractor(p.to(DEVICE))
        features.append(feature)
      features = torch.stack(features)
      mu_y = features.mean(0) # compute the mean for class y
      mu_y.data = mu_y.data / mu_y.data.norm() # Normalize
      exemplar_means.append(mu_y) # array of means, one for each class

    self.exemplar_means = exemplar_means #update array of exemplar means
    size = x.size(0)
    means = torch.cat(exemplar_means,dim=0) # (n_classes, feature_size)
    means = torch.stack([means]*size) #(128,7,2048)
    means = means.transpose(1, 2) #(128,2048,7)
    feature_x = self.feature_extractor(x) # (128,2048)

    # normalize 
    for i in range(feature_x.size(0)):
      feature_x.data[i] = feature_x.data[i] / feature_x.data[i].norm()

    feature_x = feature_x.unsqueeze(2) # (batch_size, feature_size, 1)
    feature_x = feature_x.expand_as(means) # (batch_size, feature_size, n_classes)

    # find the nearest prototype
    labels = torch.argmin((feature_x - means).pow(2).sum(1),dim=1)

    return labels

  def reduce_exemplar_sets(self,m):
    for y, P_y in enumerate(self.exemplar_sets):
      self.exemplar_sets[y] = P_y[:m]
  
  def construct_exemplar_set(self,X,m):
    
    # X dataset containing all elements of class y

    loader = DataLoader(X,128,True,drop_last=False,num_workers = 4) # create dataloader

    features = []
    for images,labels in loader:
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)
      feature = self.feature_extractor(images) #(batchsize, 2048)
      features.append(feature)

    features_stacked = torch.cat(features)# (size,2048)
    mean = features_stacked.mean(0) #current class mean (1,2048)
    mean = torch.stack([mean]*features_stacked.size()[0]) # (size,2048)
     

    P = [] # exemplar set
    for k in range(1,int(m+1)):
      s = torch.zeros(1,2048).to(DEVICE)
      for j in range(1,k):
        s = s + self.feature_extractor(P[j-1].to(DEVICE))
      s = torch.cat([s]*features_stacked.size()[0])
      index = torch.argmin((mean-(1/k)*(features_stacked+s)).pow(2).sum(1),dim=0)
      pk = X[index.item()][0].unsqueeze(dim=0)
      P.append(pk)
     
    self.exemplar_sets.append(P)

  def update_representation(self,X):
    # parameters = self.parameters()
    training = [] 
    # add exemplar set to training set

    # creation of ordered training e.g. classes 0-9, 10-19
    index = 0
    for i in range(0, len(self.exemplar_sets)):
      for tensor_img in self.exemplar_sets[i]:
        training.append((tensor_img, i,index))
        index += 1
    for img_class in X:
      for image, label in img_class:
        training.append((image, label,index))
        index += 1
    
    # create total_loader from the new training set
    self = self.to(DEVICE)
    train_dataloader = DataLoader(training, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    q = torch.zeros(len(training),100).to(DEVICE)
    for images,labels,indices in train_dataloader:
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)
      indices = indices.to(DEVICE)
      g = torch.sigmoid(self.forward(images))
      q[indices] = g.data

    # training su total_loader

    # Optimizer
    optimizer = self.optimizer
    # Scheduler
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA, last_epoch=-1)

    cudnn.benchmark # Calling this optimizes runtime

    current_step = 0
    # Start iterating over the epochs
    for epoch in range(NUM_EPOCHS):
      print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_last_lr()))
    
        # Iterate over the dataset
      for images, labels,indices in train_dataloader:
          # Bring data over the device of choice
          images = images.to(DEVICE)
          labels = labels.to(DEVICE)
          indices = indices.to(DEVICE)
          # PyTorch, by default, accumulates gradients after each backward pass
          # We need to manually set the gradients to zero before starting a new iteration
          optimizer.zero_grad() # Zero-ing the gradients

          # Forward pass to the network
          outputs = self.forward(images)
    
          # One hot encoding labels for binary cross-entropy loss
          labels_onehot = nn.functional.one_hot(labels,100)
          labels_onehot = labels_onehot.type_as(outputs)
          criterion = self.cls_loss
          loss = criterion(outputs, labels_onehot)
          
          distillation = 0
          g = torch.sigmoid(outputs)
          qi = q[indices]
          for y in range(0,len(self.exemplar_sets)):
            distillation += self.dist_loss(g[:,y],qi[:,y])
          loss += distillation
    
          if current_step % LOG_FREQUENCY == 0:
              print('Step {}, Loss {}'.format(current_step, loss.item()))

      # Compute gradients for each layer and update weights
          loss.backward()  # backward pass: computes gradients
          optimizer.step() # update weights based on accumulated gradients
          current_step += 1

        # Step the scheduler
      scheduler.step()


In [0]:
net = iCaRL()
K = 1000
t = 0
test_dataset = CIFAR100_tError(DATA_DIR, train=False, transform=eval_transform, download=True)
for i in range(0,10): # batches of 10
  print(f"processing batch {i+1}")
  #Create Datasets
  train_datasets = []
  # take 10 new classes
  lbls, new_labels = make_data_labels(lbls)
  for num,label in enumerate(new_labels):
    train_dataset = CIFAR100_tError(DATA_DIR, train=True, transform=train_transform, download=True)
    train_dataset.increment([label],[t])
    test_dataset.increment([label],[t])
    train_datasets.append(train_dataset) # List of training examples in per class sets
    test_dataloader = DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True,num_workers=4)
    t += 1
  net.update_representation(train_datasets)
  m = K/t #numbers of exemplars per class
  net.reduce_exemplar_sets(m)
  for X in train_datasets:
    net.construct_exemplar_set(X,m) #new exemplar sets
  # Test on Test set
  running_corrects = 0
  for images,labels in test_dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    preds = net.classify(images)
    running_corrects += torch.sum(preds == labels.data).data.item()
    accuracy = running_corrects / float(len(test_dataset))
  print(f"Test Accuracy: {accuracy}")

Files already downloaded and verified
processing batch 1
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Starting epoch 1/70, LR = [2]
Step 0, Loss 0.7028124332427979
Starting epoch 2/70, LR = [2]
Step 50, Loss 0.032497428357601166
Starting epoch 3/70, LR = [2]
Step 100, Loss 0.03195340558886528
Starting epoch 4/70, LR = [2]
Step 150, Loss 0.029297754168510437
Starting epoch 5/70, LR = [2]
Starting epoch 6/70, LR = [2]
Step 200, Loss 0.02739444002509117
Starting epoch 7/70, LR = [2]
Step 250, Loss 0.024822918698191643
Starting epoch 8/70, LR = [2]
Step 300, Loss 0.02214529924094677
Starting epoch 9/70, LR = [2]
Step 350, Loss 0.02330085076391697
Starting epoch

KeyboardInterrupt: 

In [0]:
preds.size()

**Define plot function**

---



In [0]:
def accuracy_plot(accuracies):
  ### FOR MEAN STD PLOT https://stackoverflow.com/questions/22481854/plot-mean-and-standard-deviation
  from scipy import interpolate

  tck,u     = interpolate.splprep( [[i*10 for i in range(1,len(accuracies)+1)],accuracies] ,s = 0 )
  xnew,ynew = interpolate.splev( np.linspace( 0, 1, 100 ), tck,der = 0)

  fig, ax = plt.subplots(figsize=(15,14), facecolor='white')

  plt.rc('font', size=20)
  plt.plot( [i*10 for i in range(1,len(accuracies)+1)],accuracies,'.' , xnew ,ynew, label = "accuracy", c='orange' )
  ax.set_ylabel("Accuracy")
  ax.set_xlabel("Classes")
  ax.minorticks_on()
  plt.title("Accuracies obtained with finetuning of a ResNet network")
  plt.yticks(np.arange(0, 1.1, .1))
  plt.xticks(np.arange(0, 110, 10))
  plt.grid(axis='y',which='major', linestyle='-', linewidth='0.5', color='black') 
  plt.grid(axis='y',which='minor', linestyle=':', linewidth='0.5', color='grey')
  for in_i, in_j in zip([i*10 for i in range(1,len(accuracies)+1)], accuracies):  # Plot also the value of the point close to it
          ax.annotate(str(round(in_j, 3)), xy=(in_i, in_j))

  plt.savefig('test.png', format='png', dpi=300)
  plt.show()

*Print & Plot*

In [0]:
print(test_accuracies)

accuracy_plot(test_accuracies)
