#import libraries and package

---



In [None]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import Dataset, Subset, DataLoader, ConcatDataset

from torchvision import transforms
from torchvision.models import resnet34

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from PIL import Image
from copy import deepcopy

import numpy as np
import sys
import os
np.set_printoptions(threshold=sys.maxsize) #needed to print correctly the logs

righe di codice utili per importare da altri jupyter notebook delle funzioni. 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto"
!pip install import-ipynb

import import_ipynb 

In [None]:
#retrieve from folder 'data'
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto/data"                          
from DataClean import Cifar100 as datasetManager   # from **nome jupyter notebook** import **classe o metodo da importare**

#retrieve from folder 'models'
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto/models"
from resnet import resnet32
from Manager import Manager
from LwFMC import LWF
from iCaRL import iCaRL_rand
from iCaRL import Exemplars
from iCaRL import iCaRL_herd


#retrieve from folder 'logs'
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto/logs"
from Save_logs import save_logs

# return in the main project folder
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto"


# Arguments

In [None]:
DEVICE=torch.device("cuda:0")
DIR='/content/data'

RANDOM_SEED = [1993,30,423]    
NUM_CLASSES = 100       # Total number of classes

# Training
BATCH_SIZE = 64         # Batch size 
NUM_EPOCHS = 70         # Total number of training epochs
LR = 2                  # Initial learning rate
MOMENTUM = 0.9          # Momentum for stochastic gradient descent (SGD)
WEIGHT_DECAY = 1e-5     # Weight decay from iCaRL
MILESTONES = [49, 63]   # Step down policy from iCaRL (MultiStepLR)
                        # Decrease the learning rate by gamma at each milestone
GAMMA = 0.2             # Gamma factor from iCaRL


### define dataset transformation ###
train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4),  #useful data augmentation
                                      transforms.RandomHorizontalFlip(),     #useful data augmentation
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

# Fine tuning implementation
we need to train on 10 new classes and then test on all the seen classes. 
 


In [None]:
logs = [[] for _ in range(len(RANDOM_SEED))]
RUN_NAME = 'fine_tuning'

for run_i in range(len(RANDOM_SEED)):
  #set the random seed
  random_seed=RANDOM_SEED[run_i]

  #instantiate the net and loss:
  net=resnet32()
  criterion=nn.BCEWithLogitsLoss()
 
  train_dataset = datasetManager(DIR, train=True, download=True, random_state=random_seed, transform=train_transform)
  test_dataset = datasetManager(DIR, train=False, download=False, random_state=random_seed, transform=test_transform)


  for split_i in range(10):
    print(f"-- Split {split_i} of run {run_i} --")
    
      
      #---- net parameters part ----
    parameters_to_optimize = net.parameters()
    optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA)

      #---- dataset preparation part ----
    
    #we create the splitted version of train and test:
    ## 10 classes in the train (from current split), 10*(numberOfSeenSplit) for test
    train_dataset.set_classes_batch(train_dataset.batch_splits[split_i])
    test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])

    #instantiate the dataloaders:
    #train and validation split 
    train_idx, val_idx = train_test_split(list(range(len(train_dataset))),      #we extract a split of indices
                                          random_state=random_seed,
                                          test_size = 0.1)   #choose here the test size

    train_data_split = Subset(train_dataset , train_idx)                        #we use the indices previously extracted to make subset of 
    val_data_split = Subset(train_dataset , val_idx)                            #the original train_dataset

    
    train_dataloader=DataLoader(train_data_split, batch_size=BATCH_SIZE,        #we instantiate the dataloaders
                                shuffle=True, num_workers=4, drop_last=True)
    val_dataloader=DataLoader(val_data_split, batch_size=BATCH_SIZE, 
                              shuffle=True, num_workers=4, drop_last=True)

    
    #test dataloader with all seen classes until current iteration:
    test_dataloader=DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)


      #---- end of data preparation ----

      #---- model training ----
    manager = Manager(DEVICE, net, criterion, optimizer, scheduler,
                          train_dataloader,
                          val_dataloader,
                          test_dataloader)
    
    manager.train(NUM_EPOCHS)  # train the model
    
    # Test the model on classes seen until now
    test_accuracy, all_targets, all_preds = manager.test()

    #save test results
    logs[run_i].append({})

    logs[run_i][split_i]['test_accuracy'] = test_accuracy
    logs[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))

    # Add 10 nodes to last FC layer
    manager.increment_classes(n=10)


save_logs(logs,RANDOM_SEED, NUM_EPOCHS, RUN_NAME, BATCH_SIZE)
  

In [None]:
 #save test results
logs[run_i].append({})
logs[run_i][split_i]['test_accuracy'] = test_accuracy
logs[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))


#Learning Without Forgetting

In [None]:
RUN_NAME = 'lwf'
CUDA_LAUNCH_BLOCKING=1
logs = [[] for _ in range(len(RANDOM_SEED))]

for run_i in range(len(RANDOM_SEED)):
  #set the random seed
  random_seed=RANDOM_SEED[run_i]

  #instantiate the net and loss:
  net=resnet32()
  criterion= nn.BCEWithLogitsLoss()



  #load the Cifar100. For each run 'random_seed' makes dataset splits different 
  if run_i== 0: 
    first_run = True         #we download dataset from internet into directory only at the very first instantiation
  else: first_run = False    

  train_dataset = datasetManager(DIR, train=True, download=first_run, random_state=random_seed, transform=train_transform)
  test_dataset = datasetManager(DIR, train=False, download=False, random_state=random_seed, transform=test_transform)


  for split_i in range(10):
      
    #---- net parameters part ----
    parameters_to_optimize = net.parameters()
    optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA)

    #---- dataset preparation part ----
    
    #we create the splitted version of train and test:
    ## 10 classes in the train (from current split), 10*(numberOfSeenSplit) for test
    train_dataset.set_classes_batch(train_dataset.batch_splits[split_i])
    test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])

    #instantiate the dataloaders:
    #train and validation split 
    train_idx, val_idx = train_test_split(list(range(len(train_dataset))),      #we extract a split of indices
                                          random_state=random_seed,
                                          test_size = 0.1)   #choose here the test size

    train_data_split = Subset(train_dataset , train_idx)                        #we use the indices previously extracted to make subset of 
    val_data_split = Subset(train_dataset , val_idx)                            #the original train_dataset

    
    train_dataloader=DataLoader(train_data_split, batch_size=BATCH_SIZE,        #we instantiate the dataloaders
                                shuffle=True, num_workers=4, drop_last=True)
    val_dataloader=DataLoader(val_data_split, batch_size=BATCH_SIZE, 
                              shuffle=True, num_workers=4, drop_last=True)

    
    #test dataloader with all seen classes until current iteration:
    test_dataloader=DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)


    #---- end of data preparation ----

    #---- model training ----
    num_classes = 10*(split_i+1)

    if num_classes == 10: # old network == None , we are on first split
                lwf = LWF(DEVICE, net, None, criterion, optimizer, scheduler,
                            train_dataloader,
                            val_dataloader,
                            test_dataloader,
                            num_classes)
    else:
                lwf = LWF(DEVICE, net, old_net, criterion, optimizer, scheduler,
                            train_dataloader,
                            val_dataloader,
                            test_dataloader,
                            num_classes)


    
    lwf.train(NUM_EPOCHS)  # train the model
   
    
    # score record part
    ##saving score for each run and for each split_i
    logs[run_i].append({})
    
    # Test the model on classes seen until now
    test_accuracy, all_targets, all_preds = lwf.test()

    logs[run_i][split_i]['test_accuracy'] = test_accuracy
    logs[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))
    
    old_net = deepcopy(lwf.net)

    lwf.increment_classes()    


save_logs(logs,RANDOM_SEED, NUM_EPOCHS, RUN_NAME, BATCH_SIZE)



# iCaRL (with random exemplar set construction)
iCaRL with random exemplar set construction. Used as comparision to herding strategy.

In [None]:
RUN_NAME = 'iCarl_no_herding_3run'

CUDA_LAUNCH_BLOCKING=1
NUM_RUNS = len(RANDOM_SEED)


# Initialize logs
logs_icarl = [[] for _ in range(NUM_RUNS)]

for run_i in range(NUM_RUNS):

  train_dataset = datasetManager(DIR, train=True, download=True, random_state=RANDOM_SEED[run_i], transform=train_transform)
  test_dataset = datasetManager(DIR, train=False, download=False, random_state=RANDOM_SEED[run_i], transform=test_transform)
  
  net = resnet32()
  icarl = iCaRL_rand(DEVICE, net, LR, MOMENTUM, WEIGHT_DECAY, MILESTONES, GAMMA, NUM_EPOCHS, BATCH_SIZE, train_transform, test_transform)
  
  for split_i in range(10):
        print(f"## Split {split_i} of run {run_i} ##")


        train_dataset.set_classes_batch(train_dataset.batch_splits[split_i])
        test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])
        

        train_idx, val_idx = train_test_split(list(range(len(train_dataset))),
                                          random_state=RANDOM_SEED[run_i],
                                          test_size = 0.1)   
                                          
        train_data_split = Subset(train_dataset , train_idx)                 #we use the indices previously extracted to make subset of 
        val_data_split = Subset(train_dataset , val_idx)                     #the original train_dataset

    
        #N.B.: Dataloader are instantiated inside iCaRL model
        icarl.incremental_train(split_i, train_data_split, val_data_split)

        #create logs for the current run
        logs_icarl[run_i].append({})

        acc, all_targets, all_preds = icarl.test(test_dataset, train_data_split)

        logs_icarl[run_i][split_i]['accuracy'] = acc
        logs_icarl[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))

save_logs(logs_icarl,RANDOM_SEED, NUM_EPOCHS, RUN_NAME, BATCH_SIZE)

#iCaRL
official iCaRL as in the paper, with herding as construction strategy for exemplar set.

In [None]:
RUN_NAME = 'iCaRL_herding'
CUDA_LAUNCH_BLOCKING=1
NUM_RUNS = len(RANDOM_SEED)

# Initialize logs
logs_icarl_herd = [[] for _ in range(NUM_RUNS)]

for run_i in range(NUM_RUNS):

  train_dataset = datasetManager(DIR, train=True, download=True, random_state=RANDOM_SEED[run_i], transform=train_transform)
  test_dataset = datasetManager(DIR, train=False, download=False, random_state=RANDOM_SEED[run_i], transform=test_transform)

    
  net = resnet32()
  icarl = iCaRL_herd(DEVICE, net, LR, MOMENTUM, WEIGHT_DECAY, MILESTONES, GAMMA, NUM_EPOCHS, BATCH_SIZE, train_transform, test_transform)

  for split_i in range(10):
        print(f"## Split {split_i} of run {run_i} ##")


        train_dataset.set_classes_batch(train_dataset.batch_splits[split_i])
        test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])
        

        train_idx, val_idx = train_test_split(list(range(len(train_dataset))),      #we extract a split of indices
                                          random_state=RANDOM_SEED[run_i],
                                          test_size = 0.1)   
                                          
        train_data_split = Subset(train_dataset , train_idx)             #we use the indices previously extracted to make subset of 
        val_data_split = Subset(train_dataset , val_idx)                 #the original train_dataset

        #N.B.: Dataloader are instantiated inside iCaRL model
        icarl.incremental_train(split_i, train_data_split, val_data_split)

        logs_icarl_herd[run_i].append({})

        acc, all_targets, all_preds = icarl.test(test_dataset, train_data_split)

        logs_icarl_herd[run_i][split_i]['accuracy'] = acc
        logs_icarl_herd[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))


save_logs(logs_icarl_herd,RANDOM_SEED, NUM_EPOCHS, RUN_NAME, BATCH_SIZE)    