# Incremental learning on image classification

## Libraries and packages


In [None]:
!pip3 install 'torch==1.4.0'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

In [2]:
import os
import urllib
import logging

import numpy as np

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import Dataset, Subset, DataLoader, ConcatDataset
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import resnet34

from PIL import Image
from tqdm import tqdm

from copy import deepcopy

from sklearn.metrics import confusion_matrix

In [None]:
# GitHub credentials for cloning private repository
username = ''
password = ''

# Download packages from repository
password = urllib.parse.quote(password)
!git clone https://$username:$password@github.com/manuelemacchia/incremental-learning-image-classification.git
password = ''

!mv -v incremental-learning-image-classification/* .
!rm -rf incremental-learning-image-classification README.md

In [None]:
from data.cifar100 import Cifar100
from model.resnet_cifar import resnet32
from model.manager import Manager
from model.icarl import Exemplars
from model.icarl import iCaRL
from utils import plot

In [None]:
import pickle
import time
from google.colab import files

if not os.path.isdir('./obj'):
    !mkdir 'obj'

def obj_save(obj, name):
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
    
    time.sleep(5)

    files.download('obj/'+ name + '.pkl') 

def obj_load(name):
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

## Arguments

In [5]:
# Directories
DATA_DIR = 'data'       # Directory where the dataset will be downloaded

# Settings
DEVICE = 'cuda'

# Dataset

RANDOM_STATE = None

RANDOM_STATES = [658, 423, 422]      # For reproducibility of results                        
                                     # Note: different random states give very different
                                     # splits and therefore very different results.

NUM_CLASSES = 100       # Total number of classes
NUM_BATCHES = 10
CLASS_BATCH_SIZE = 10   # Size of batch of classes for incremental learning

VAL_SIZE = 0.1          # Proportion of validation set with respect to training set (between 0 and 1)

# Training
BATCH_SIZE = 64         # Batch size (iCaRL sets this to 128)
LR = 2                  # Initial learning rate
                       
MOMENTUM = 0.9          # Momentum for stochastic gradient descent (SGD)
WEIGHT_DECAY = 1e-5     # Weight decay from iCaRL

NUM_RUNS = 3            # Number of runs of every method
                        # Note: this should be at least 3 to have a fair benchmark

NUM_EPOCHS = 70         # Total number of training epochs
MILESTONES = [49, 63]   # Step down policy from iCaRL (MultiStepLR)
                        # Decrease the learning rate by gamma at each milestone
GAMMA = 0.2             # Gamma factor from iCaRL

## Fine tuning

### Data preparation

In [None]:
# Define transformations for training
train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Define transformations for evaluation
test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

In [None]:
train_dataloaders = [[] for i in range(NUM_RUNS)]
val_dataloaders = [[] for i in range(NUM_RUNS)]
test_dataloaders = [[] for i in range(NUM_RUNS)]



for run_i in range(NUM_RUNS):

    test_subsets = []
    random_state = RANDOM_STATES[run_i]

    for split_i in range(CLASS_BATCH_SIZE):

        # Download dataset only at first instantiation
        if(run_i+split_i == 0):
            download = True
        else:
            download = False

        download = False

        # Create CIFAR100 dataset
        train_dataset = Cifar100(DATA_DIR, train=True, download=download,
                                 random_state=random_state, transform=train_transform)
        test_dataset = Cifar100(DATA_DIR, train=False, download=False,
                                random_state=random_state, transform=test_transform)

        # Subspace of CIFAR100 of 10 classes
        train_dataset.set_classes_batch(train_dataset.batch_splits[split_i])
        test_dataset.set_classes_batch(
            [test_dataset.batch_splits[i] for i in range(0, split_i+1)])

        # Define train and validation indices
        train_indices, val_indices = train_dataset.train_val_split(
            VAL_SIZE, random_state)

        train_dataloaders[run_i].append(DataLoader(Subset(train_dataset, train_indices),
                                                   batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True))

        val_dataloaders[run_i].append(DataLoader(Subset(train_dataset, val_indices),
                                                 batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True))

        # Dataset with all seen class
        test_dataloaders[run_i].append(DataLoader(test_dataset,
                                                  batch_size=BATCH_SIZE, shuffle=True, num_workers=4))

In [None]:
# Sanity check: visualize a batch of images
dataiter = iter(test_dataloaders[0][5])
images, labels = dataiter.next()

plot.image_grid(images, one_channel=False)
unique_labels = np.unique(labels, return_counts=True)
unique_labels

### Execution

In [None]:
train_loss_history = []
train_accuracy_history = []
val_loss_history = []
val_accuracy_history = []
test_accuracy_history = []

# Iterate over runs
for train_dataloader, val_dataloader, test_dataloader in zip(train_dataloaders,
                                                             val_dataloaders, test_dataloaders):
  
    
    train_loss_history.append({})
    train_accuracy_history.append({})
    val_loss_history.append({})
    val_accuracy_history.append({})
    test_accuracy_history.append({})

    net = resnet32()  # Define the net
    
    criterion = nn.BCEWithLogitsLoss()  # Define the loss      
    
    i = 0
    for train_split, val_split, test_split in zip(train_dataloader,
                                                  val_dataloader, test_dataloader):
      
        
        current_split = "Split %i"%(i)
        print(current_split)

        parameters_to_optimize = net.parameters()
        optimizer = optim.SGD(parameters_to_optimize, lr=LR,
                          momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 
                                               milestones=MILESTONES, gamma=GAMMA)

        # Define Manager Object
        manager = Manager(DEVICE, net, criterion, optimizer, scheduler,
                          train_split, val_split, test_split)

        scores = manager.train(NUM_EPOCHS)  # train the model

        # score[i] = dictionary with key:epoch, value: score
        train_loss_history[-1][current_split] = scores[0]
        train_accuracy_history[-1][current_split] = scores[1]
        val_loss_history[-1][current_split] = scores[2]
        val_accuracy_history[-1][current_split] = scores[3]

        # Test the model on classes seen until now
        test_accuracy, all_preds = manager.test()

        test_accuracy_history[-1][current_split] = test_accuracy

        # Uncomment if default resnet has 10 node at last FC layer
        manager.increment_classes(n=10)  # add 10 nodes to last FC layer

        i+=1
        

In [None]:
# Confusion matrix over last run test predictions
targets = test_dataset.targets
preds = all_preds.to('cpu').numpy()

plot.heatmap_cm(targets, preds)

In [None]:
def mean_std_scores(train_loss_history, train_accuracy_history,
                   val_loss_history, val_accuracy_history, test_accuracy_history):
  '''
      Average the scores of runs different splits
  '''
  # keys = 'Split i-esim'
  keys = train_loss_history[0].keys()

  # Containers for average scores
  avg_train_loss = {k:[] for k in keys}
  avg_train_accuracy = {k:[] for k in keys}
  avg_val_loss = {k:[] for k in keys}
  avg_val_accuracy = {k:[] for k in keys}
  avg_test_accuracy = {k:[] for k in keys}
  
  train_loss = []
  train_accuracy = []
  val_loss = []
  val_accuracy = []
  test_accuracy = []

  for key in keys:
    for run in range(NUM_RUNS):

      # Append all i-th scores (split i-esim) for the different runs
      avg_train_loss[key].append(train_loss_history[run][key])
      avg_train_accuracy[key].append(train_accuracy_history[run][key])
      avg_val_loss[key].append(val_loss_history[run][key])
      avg_val_accuracy[key].append(val_accuracy_history[run][key])
      avg_test_accuracy[key].append(test_accuracy_history[run][key])

    # Define (mean, std) of the i-th score for each split
    train_loss.append([np.array(avg_train_loss[key]).mean(), np.array(avg_train_loss[key]).std()])
    train_accuracy.append([np.array(avg_train_accuracy[key]).mean(), np.array(avg_train_accuracy[key]).std()])
    val_loss.append([np.array(avg_val_loss[key]).mean(), np.array(avg_val_loss[key]).std()])
    val_accuracy.append([np.array(avg_val_accuracy[key]).mean(), np.array(avg_val_accuracy[key]).std()])
    test_accuracy.append([np.array(avg_test_accuracy[key]).mean(), np.array(avg_test_accuracy[key]).std()])

  train_loss = np.array(train_loss)
  train_accuracy = np.array(train_accuracy)
  val_loss = np.array(val_loss)
  val_accuracy = np.array(val_accuracy)
  test_accuracy = np.array(test_accuracy)

  # Return averaged scores
  return(train_loss, train_accuracy, val_loss, val_accuracy, test_accuracy)

In [None]:
# Get the average scores
train_loss, train_accuracy, val_loss, val_accuracy,\
test_accuracy = mean_std_scores(train_loss_history, train_accuracy_history,
                                   val_loss_history, val_accuracy_history, test_accuracy_history)

In [None]:
plot.train_val_scores(train_loss, train_accuracy, val_loss, val_accuracy, None)

In [None]:
plot.test_scores(test_accuracy, None)

In [None]:
# @todo: create utils package for functions

import ast

def load_json_scores(root):

  with open(os.path.join(root, 'train_accuracy_history.json')) as f:
      train_accuracy_history = ast.literal_eval(f.read())

  with open(os.path.join(root, 'train_loss_history.json')) as f:
      train_loss_history = ast.literal_eval(f.read())

  with open(os.path.join(root, 'val_accuracy_history.json')) as f:
      val_accuracy_history = ast.literal_eval(f.read())

  with open(os.path.join(root, 'val_loss_history.json')) as f:
      val_loss_history = ast.literal_eval(f.read())

  with open(os.path.join(root, 'test_accuracy_history.json')) as f:
      test_accuracy_history = ast.literal_eval(f.read())

  return(train_loss_history, train_accuracy_history, val_loss_history,
         val_accuracy_history, test_accuracy_history)

In [None]:
# @todo: create utils package for functions
import json

def save_json_scores(root, train_loss_history, train_accuracy_history,
                   val_loss_history, val_accuracy_history, test_accuracy_history):

    with open(os.path.join(root, 'train_loss_history.json'), 'w') as fout:
        json.dump(train_loss_history, fout)

    with open(os.path.join(root, 'train_accuracy_history.json'), 'w') as fout:
        json.dump(train_accuracy_history, fout)

    with open(os.path.join(root, 'val_loss_history.json'), 'w') as fout:
        json.dump(val_loss_history, fout)

    with open(os.path.join(root, 'val_accuracy_history.json'), 'w') as fout:
        json.dump(val_accuracy_history, fout)

    with open(os.path.join(root, 'test_accuracy_history.json'), 'w') as fout:
        json.dump(test_accuracy_history, fout)

In [None]:
save_json_scores('scores', train_loss_history, train_accuracy_history,
                   val_loss_history, val_accuracy_history, test_accuracy_history)

In [None]:
from google.colab import files
!zip -r scores.zip scores
files.download("scores.zip")

## Learning Without Forgetting

### Arguments

In [None]:
# Training settings for Learning Without Forgetting
RANDOM_STATES = [658, 423, 422] 
BATCH_SIZE = 128
LR = 2

### Data preparation

In [None]:
# Transformations for Learning Without Forgetting
train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

In [None]:
train_dataloaders = [[] for i in range(NUM_RUNS)]
val_dataloaders = [[] for i in range(NUM_RUNS)]
test_dataloaders = [[] for i in range(NUM_RUNS)]

for run_i in range(NUM_RUNS):

  test_subsets = []
  random_state = RANDOM_STATES[run_i]

  for split_i in range(CLASS_BATCH_SIZE):

    # Download dataset only at first instantiation
    if(run_i+split_i == 0):
      download = True
    else:
      download = False

    # Create CIFAR100 dataset
    train_dataset = Cifar100(DATA_DIR, train = True, download = download, random_state = random_state, transform=train_transform)
    test_dataset = Cifar100(DATA_DIR, train = False, download = False, random_state = random_state, transform=test_transform)
   
    # Subspace of CIFAR100 of 10 classes
    train_dataset.set_classes_batch(train_dataset.batch_splits[split_i]) 
    test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])

    # Define train and validation indices
    train_indices, val_indices = train_dataset.train_val_split(VAL_SIZE, random_state)
    
    train_dataloaders[run_i].append(DataLoader(Subset(train_dataset, train_indices), 
                               batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True))
    
    val_dataloaders[run_i].append(DataLoader(Subset(train_dataset, val_indices), 
                                batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True))
    
    # Dataset with all seen class
    test_dataloaders[run_i].append(DataLoader(test_dataset, 
                               batch_size=BATCH_SIZE, shuffle=True, num_workers=4))           

In [None]:
# Sanity check: visualize a batch of images
dataiter = iter(test_dataloaders[0][5])
images, labels = dataiter.next()

plot.image_grid(images, one_channel=False)
unique_labels = np.unique(labels, return_counts=True)
unique_labels

In [None]:
from torch.nn import BCEWithLogitsLoss
from copy import deepcopy

'''BCE formulation:
 let x = logits, z = labels. The logistic loss is

  z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
'''

   
CLASS_BATCH_SIZE = 10


class LWF():
  def __init__(self, device, net, old_net, criterion, optimizer, scheduler,
               train_dataloader, val_dataloader, test_dataloader, num_classes=10):
    
    self.device = device

    self.net = net
    self.best_net = self.net
    self.old_net = old_net # None for first ten classes

    self.criterion = BCEWithLogitsLoss() # Classifier criterion 
    self.optimizer = optimizer
    self.scheduler = scheduler

    self.train_dataloader = train_dataloader
    self.val_dataloader = val_dataloader
    self.test_dataloader = test_dataloader

    self.num_classes = num_classes # can be incremented ouitside methods in the main, or inside methods
    self.order = np.arange(100)

    self.sigmoid = nn.Sigmoid()


  def warm_up():
    pass

  def increment_classes(self, n=10):
    """Add n classes in the final fully connected layer."""

    in_features = self.net.fc.in_features  # size of each input sample
    out_features = self.net.fc.out_features  # size of each output sample
    weight = self.net.fc.weight.data

    self.net.fc = nn.Linear(in_features, out_features+n)
    self.net.fc.weight.data[:out_features] = weight

  def to_onehot(self, targets): 
    '''
    Args:
    targets : dataloader.dataset.targets of the new task images
    '''
    one_hot_targets = torch.eye(self.num_classes)[targets]

    return one_hot_targets.to(self.device)

  def do_first_batch(self, batch, labels):

    batch = batch.to(self.device)
    labels = labels.to(self.device) # new classes labels

    # Zero-ing the gradients
    self.optimizer.zero_grad()

    # One hot encoding of new task labels 
    one_hot_labels = self.to_onehot(labels) # Size = [128, 10]

    # New net forward pass
    outputs = self.net(batch)  
    
    loss = self.criterion(outputs, one_hot_labels) # BCE Loss with sigmoids over outputs

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Accuracy over NEW IMAGES, not over all images
    running_corrects = \
        torch.sum(preds == labels.data).data.item() # Può essere che debba usare targets e non labels

    # Backward pass: computes gradients
    loss.backward()

    self.optimizer.step()

    return loss, running_corrects


  def do_batch(self, batch, labels):

    batch = batch.to(self.device)
    labels = labels.to(self.device) # new classes labels

    # Zero-ing the gradients
    self.optimizer.zero_grad()

    # One hot encoding of new task labels 
    one_hot_labels = self.to_onehot(labels) # Size = [128, n_classes] will be sliced as [:, :self.num_classes-10]
    new_classes = (self.order[range(self.num_classes-10, self.num_classes)]).astype(np.int32)
    one_hot_labels = torch.stack([one_hot_labels[:, i] for i in new_classes], axis=1)

    # Old net forward pass
    old_outputs = self.sigmoid(self.old_net(batch)) # Size = [128, 100]
    old_classes = (self.order[range(self.num_classes-10)]).astype(np.int32)
    old_outputs = torch.stack([old_outputs[:, i] for i in old_classes], axis =1)
    
    # Combine new and old class targets
    targets = torch.cat((old_outputs, one_hot_labels), 1)

    # New net forward pass
    outputs = self.net(batch) # Size = [128, 100] comparable with the define targets
    out_classes = (self.order[range(self.num_classes)]).astype(np.int32)
    outputs = torch.stack([outputs[:, i] for i in out_classes], axis=1)
  
    
    loss = self.criterion(outputs, targets) # BCE Loss with sigmoids over outputs (over targets must be done manually)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Accuracy over NEW IMAGES, not over all images
    running_corrects = \
        torch.sum(preds == labels.data).data.item() 

    # Backward pass: computes gradients
    loss.backward()

    self.optimizer.step()

    return loss, running_corrects


  def do_epoch(self, current_epoch):

    self.net.train()

    running_train_loss = 0
    running_corrects = 0
    total = 0
    batch_idx = 0

    print(f"Epoch: {current_epoch}, LR: {self.scheduler.get_last_lr()}")

    for images, labels in self.train_dataloader:

      if self.num_classes == CLASS_BATCH_SIZE:
        loss, corrects = self.do_first_batch(images, labels)
      else:
        loss, corrects = self.do_batch(images, labels)

      running_train_loss += loss.item()
      running_corrects += corrects
      total += labels.size(0)
      batch_idx += 1

    self.scheduler.step()

    # Calculate average scores
    train_loss = running_train_loss / batch_idx # Average over all batches
    train_accuracy = running_corrects / float(total) # Average over all samples

    print(f"Train loss: {train_loss}, Train accuracy: {train_accuracy}")

    return (train_loss, train_accuracy)


  def train(self, num_epochs):
    """Train the network for a specified number of epochs, and save
    the best performing model on the validation set.
    
    Args:
        num_epochs (int): number of epochs for training the network.
    Returns:
        train_loss: loss computed on the last epoch
        train_accuracy: accuracy computed on the last epoch
        val_loss: average loss on the validation set of the last epoch
        val_accuracy: accuracy on the validation set of the last epoch
    """

    # @todo: is the return behaviour intended? (scores of the last epoch)

    self.net = self.net.to(self.device)
    if self.old_net != None:
      self.old_net = self.old_net.to(self.device)
      self.old_net.train(False)

    cudnn.benchmark  # Calling this optimizes runtime

    self.best_loss = float("inf")
    self.best_epoch = 0

    for epoch in range(num_epochs):
        # Run an epoch (start counting form 1)
        train_loss, train_accuracy = self.do_epoch(epoch+1)
    
        # Validate after each epoch 
        val_loss, val_accuracy = self.validate()    

        # Best validation model
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.best_net = deepcopy(self.net)
            self.best_epoch = epoch
            print("Best model updated")

        print("")

    return (train_loss, train_accuracy,
            val_loss, val_accuracy)


  def validate(self):
    """Validate the model.
    
    Returns:
        val_loss: average loss function computed on the network outputs
            of the validation set (val_dataloader).
        val_accuracy: accuracy computed on the validation set.
    """

    self.net.train(False)

    running_val_loss = 0
    running_corrects = 0
    total = 0
    batch_idx = 0


    for batch, labels in self.val_dataloader:
      batch = batch.to(self.device)
      labels = labels.to(self.device)
      total += labels.size(0)

      # One hot encoding of new task labels 
      one_hot_labels = self.to_onehot(labels) # Size = [128, 100] will be sliced as [:, :self.num_classes-10]
      new_classes = (self.order[range(self.num_classes-10, self.num_classes)]).astype(np.int32)
      one_hot_labels = torch.stack([one_hot_labels[:, i] for i in new_classes], axis=1)

      if self.num_classes > 10:
        # Old net forward pass
        old_outputs = self.sigmoid(self.old_net(batch)) # Size = [128, 100]
        old_classes = (self.order[range(self.num_classes-10)]).astype(np.int32)
        old_outputs = torch.stack([old_outputs[:, i] for i in old_classes], axis =1)

        # Combine new and old class targets
        targets = torch.cat((old_outputs, one_hot_labels), 1)

      else:
        targets = one_hot_labels

      # New net forward pass
      outputs = self.net(batch) # Size = [128, 100] comparable with the define targets
      out_classes = (self.order[range(self.num_classes)]).astype(np.int32)
      outputs = torch.stack([outputs[:, i] for i in out_classes], axis=1)

      
      loss = self.criterion(outputs, targets) # BCE Loss with sigmoids over outputs (over targets must be done manually)

      # Get predictions
      _, preds = torch.max(outputs.data, 1)

      # Update the number of correctly classified validation samples
      running_corrects += torch.sum(preds == labels.data).data.item()
      running_val_loss += loss.item()

      batch_idx += 1

    # Calcuate scores
    val_loss = running_val_loss / batch_idx
    val_accuracy = running_corrects / float(total)

    print(f"Validation loss: {val_loss}, Validation accuracy: {val_accuracy}")

    return (val_loss, val_accuracy)


  def test(self):
    """Test the model.
    Returns:
        accuracy (float): accuracy of the model on the test set
    """

    self.best_net.train(False)  # Set Network to evaluation mode

    running_corrects = 0
    total = 0

    all_preds = torch.tensor([]) # to store all predictions
    all_preds = all_preds.type(torch.LongTensor)
    
    for images, labels in self.test_dataloader:
      images = images.to(self.device)
      labels = labels.to(self.device)
      total += labels.size(0)

      # Forward Pass
      outputs = self.best_net(images)

      # Get predictions
      _, preds = torch.max(outputs.data, 1)

      # Update Corrects
      running_corrects += torch.sum(preds == labels.data).data.item()

      # Append batch predictions
      all_preds = torch.cat(
          (all_preds.to(self.device), preds.to(self.device)), dim=0
      )

    # Calculate accuracy
    accuracy = running_corrects / float(total)  

    print(f"Test accuracy: {accuracy}")

    return (accuracy, all_preds)

In [None]:
train_loss_history = []
train_accuracy_history = []
val_loss_history = []
val_accuracy_history = []
test_accuracy_history = []



# Iterate over runs
for train_dataloader, val_dataloader, test_dataloader in zip(train_dataloaders,
                                                             val_dataloaders, test_dataloaders):
  
    
    train_loss_history.append({})
    train_accuracy_history.append({})
    val_loss_history.append({})
    val_accuracy_history.append({})
    test_accuracy_history.append({})

    net = resnet32()  # Define the net
    
    criterion = nn.BCEWithLogitsLoss()  # Define the loss
        
    
    i = 0
    for train_split, val_split, test_split in zip(train_dataloader,
                                                  val_dataloader, test_dataloader):
      
      # Redefine optimizer at each split (pass by reference issue)
      parameters_to_optimize = net.parameters()
      optimizer = optim.SGD(parameters_to_optimize, lr=LR,
                            momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
      scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 
                                                milestones=MILESTONES, gamma=GAMMA)
        
      current_split = "Split %i"%(i)
      print(current_split)

      num_classes = CLASS_BATCH_SIZE*(i+1)

      if num_classes == CLASS_BATCH_SIZE:
        # Old Network = None
        lwf = LWF(DEVICE, net, None, criterion, optimizer, scheduler,
                          train_split, val_split, test_split, num_classes)
      else:
        lwf = LWF(DEVICE, net, old_net, criterion, optimizer, scheduler,
                        train_split, val_split, test_split, num_classes)
        

      scores = lwf.train(NUM_EPOCHS)  # train the model

      # score[i] = dictionary with key:epoch, value: score
      train_loss_history[-1][current_split] = scores[0]
      train_accuracy_history[-1][current_split] = scores[1]
      val_loss_history[-1][current_split] = scores[2]
      val_accuracy_history[-1][current_split] = scores[3]

      # Test the model on classes seen until now
      test_accuracy, all_preds = lwf.test()

      test_accuracy_history[-1][current_split] = test_accuracy

      # Uncomment if default resnet has 10 node at last FC layer
      old_net = deepcopy(lwf.net)
      lwf.increment_classes()

      i =i+1

## iCaRL

### Data preparation

In [6]:
# Transformations for Learning Without Forgetting
train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

In [None]:
train_subsets = [[] for i in range(NUM_RUNS)]
val_subsets = [[] for i in range(NUM_RUNS)]
test_subsets = [[] for i in range(NUM_RUNS)]

for run_i in range(NUM_RUNS):
    for split_i in range(CLASS_BATCH_SIZE):
        if run_i+split_i == 0: # Download dataset only at first instantiation
            download = True
        else:
            download = False

        # Create CIFAR100 dataset
        train_dataset = Cifar100(DATA_DIR, train=True, download=download, random_state=RANDOM_STATES[run_i], transform=train_transform)
        test_dataset = Cifar100(DATA_DIR, train=False, download=False, random_state=RANDOM_STATES[run_i], transform=test_transform)
    
        # Subspace of CIFAR100 of 10 classes
        train_dataset.set_classes_batch(train_dataset.batch_splits[split_i]) 
        test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])

        # Define train and validation indices
        train_indices, val_indices = train_dataset.train_val_split(VAL_SIZE, RANDOM_STATES[run_i])

        # Define subsets
        train_subsets[run_i].append(Subset(train_dataset, train_indices))
        val_subsets[run_i].append(Subset(train_dataset, val_indices))
        test_subsets[run_i].append(test_dataset)

### Execution

In [8]:
# iCaRL hyperparameters
LR = 2
MOMENTUM = 0.9
WEIGHT_DECAY = 0.00001
MILESTONES = [49, 63]
GAMMA = 0.2
NUM_EPOCHS = 70
BATCH_SIZE = 64

In [9]:
# Define what tests to run
TEST_ICARL = True # Run test with iCaRL (exemplars + train dataset)
TEST_HYBRID1 = True # Run test with hybrid1

# Initialize logs
logs_icarl = [[] for _ in range(NUM_RUNS)]
logs_hybrid1 = [[] for _ in range(NUM_RUNS)]

for run_i in range(NUM_RUNS):
    net = resnet32()
    icarl = iCaRL(DEVICE, net, LR, MOMENTUM, WEIGHT_DECAY, MILESTONES, GAMMA, NUM_EPOCHS, BATCH_SIZE, train_transform, test_transform)

    for split_i in range(10):
        print(f"## Split {split_i} of run {run_i} ##")
        
        train_logs = icarl.incremental_train(split_i, train_subsets[run_i][split_i], val_subsets[run_i][split_i])

        all_targets = torch.stack([label[0] for _, label in DataLoader(test_subsets[run_i][split_i])])

        if TEST_ICARL:
            logs_icarl[run_i].append({})

            acc, all_preds = icarl.test(test_subsets[run_i][split_i], train_subsets[run_i][split_i])

            logs_icarl[run_i][split_i]['accuracy'] = acc
            logs_icarl[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))

            logs_icarl[run_i][split_i]['train_loss'] = train_logs[0]
            logs_icarl[run_i][split_i]['train_accuracy'] = train_logs[1]
            logs_icarl[run_i][split_i]['val_loss'] = train_logs[2]
            logs_icarl[run_i][split_i]['val_accuracy'] = train_logs[3]

        if TEST_HYBRID1:
            logs_hybrid1[run_i].append({})

            acc, all_preds = icarl.test_without_classifier(test_subsets[run_i][split_i])

            logs_hybrid1[run_i][split_i]['accuracy'] = acc

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Randomly extracting exemplars from class 8 of current split... Extracted 25 exemplars.
Randomly extracting exemplars from class 9 of current split... Extracted 25 exemplars.
Computing mean of exemplars... done
Test accuracy (iCaRL): 0.526125 (exemplars and training data)
Test accuracy (hybrid1): 0.448875
## Split 8 of run 0 ##
Length of exemplars set: 2000
Epoch: 1, LR: [2]
Train loss: 0.09822838923128524, Train accuracy: 0.3213180693069307
Validation loss: 0.06753472025905337, Validation accuracy: 0.2544642857142857
Epoch: 2, LR: [2]
Train loss: 0.09202185782170531, Train accuracy: 0.4274443069306931
Validation loss: 0.06396962808711189, Validation accuracy: 0.33035714285714285
Epoch: 3, LR: [2]
Train loss: 0.09086757418837878, Train accuracy: 0.47926980198019803
Validation loss: 0.06287309740270887, Validation accuracy: 0.4575892857142857
Epoch: 4, LR: [2]
Train loss: 0.08965109178040287, Train accuracy: 0.5205754950495

In [10]:
print(logs_icarl)

[[{'accuracy': 0.9, 'conf_mat': array([[ 9,  8,  6,  9,  9,  9, 17, 12, 12,  9],
       [11,  9, 16,  9,  7,  9, 11, 13,  7,  8],
       [10, 13,  7,  6,  4, 10, 15, 15, 12,  8],
       [ 9, 11,  9,  7, 10, 11, 13, 12,  9,  9],
       [12,  7, 13,  8, 11,  9, 11, 11,  9,  9],
       [13,  9,  9, 13, 11, 13,  7,  7,  6, 12],
       [ 9, 11,  8,  7, 11, 10,  7,  7, 11, 19],
       [14, 12, 12,  6,  9,  9,  8,  9, 10, 11],
       [10, 10, 10,  9, 12,  8, 10, 12,  9, 10],
       [ 8, 12,  7, 12, 12,  9,  7, 10, 11, 12]]), 'train_loss': 0.012425291248863296, 'train_accuracy': 0.9832589285714286, 'val_loss': 0.05387524887919426, 'val_accuracy': 0.9174107142857143}, {'accuracy': 0.809, 'conf_mat': array([[ 5,  8,  5,  3, 11,  9,  5,  5,  3,  1,  7,  4,  1,  0,  9,  7,
         5,  4,  4,  4],
       [11,  6,  6,  4,  3,  2,  5,  7,  3,  6,  5,  6,  8,  6,  4,  5,
         4,  3,  5,  1],
       [ 5,  5,  2,  7,  3,  6,  6, 10,  2,  6,  5,  4,  2,  3,  5,  7,
         9,  4,  4,  5],
       [ 

In [11]:
print(logs_hybrid1)

[[{'accuracy': 0.894}, {'accuracy': 0.7855}, {'accuracy': 0.693}, {'accuracy': 0.64275}, {'accuracy': 0.5652}, {'accuracy': 0.5178333333333334}, {'accuracy': 0.49414285714285716}, {'accuracy': 0.448875}, {'accuracy': 0.4066666666666667}, {'accuracy': 0.375}], [{'accuracy': 0.877}, {'accuracy': 0.784}, {'accuracy': 0.734}, {'accuracy': 0.659}, {'accuracy': 0.6066}, {'accuracy': 0.535}, {'accuracy': 0.47485714285714287}, {'accuracy': 0.444625}, {'accuracy': 0.41688888888888886}, {'accuracy': 0.3867}], [{'accuracy': 0.866}, {'accuracy': 0.75}, {'accuracy': 0.6673333333333333}, {'accuracy': 0.60325}, {'accuracy': 0.5588}, {'accuracy': 0.517}, {'accuracy': 0.4665714285714286}, {'accuracy': 0.4395}, {'accuracy': 0.41788888888888887}, {'accuracy': 0.3776}]]


### Plots

In [None]:
train_loss = [[logs_icarl[run_i][i]['train_loss'] for i in range(10)] for run_i in range(NUM_RUNS)]
train_accuracy = [[logs_icarl[run_i][i]['train_accuracy'] for i in range(10)] for run_i in range(NUM_RUNS)]
val_loss = [[logs_icarl[run_i][i]['val_loss'] for i in range(10)] for run_i in range(NUM_RUNS)]
val_accuracy = [[logs_icarl[run_i][i]['val_accuracy'] for i in range(10)] for run_i in range(NUM_RUNS)]
test_accuracy = [[logs_icarl[run_i][i]['accuracy'] for i in range(10)] for run_i in range(NUM_RUNS)]

train_loss = np.array(train_loss)
train_accuracy = np.array(train_accuracy)
val_loss = np.array(val_loss)
val_accuracy = np.array(val_accuracy)
test_accuracy = np.array(test_accuracy)

train_loss_stats = np.array([train_loss.mean(0), train_loss.std(0)]).transpose()
train_accuracy_stats = np.array([train_accuracy.mean(0), train_accuracy.std(0)]).transpose()
val_loss_stats = np.array([val_loss.mean(0), val_loss.std(0)]).transpose()
val_accuracy_stats = np.array([val_accuracy.mean(0), val_accuracy.std(0)]).transpose()
test_accuracy_stats = np.array([test_accuracy.mean(0), test_accuracy.std(0)]).transpose()

In [None]:
plot.train_val_scores(train_loss_stats, train_accuracy_stats, val_loss_stats, val_accuracy_stats)

In [None]:
plot.test_scores(test_accuracy_stats)