#import libraries and package

---



In [None]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import Dataset, Subset, DataLoader, ConcatDataset

from torchvision import transforms
from torchvision.models import resnet34

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from PIL import Image
from copy import deepcopy

import numpy as np
import sys
import os
np.set_printoptions(threshold=sys.maxsize) #needed to print correctly the logs

righe di codice utili per importare da altri jupyter notebook delle funzioni. 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto"
!pip install import-ipynb

import import_ipynb 

In [None]:
#retrieve from folder 'data'
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto/data"                          
from DataClean import Cifar100 as datasetManager   # from **nome jupyter notebook** import **classe o metodo da importare**

#retrieve from folder 'models'
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto/models"
from resnet import resnet32
from Manager import Manager

#retrieve from folder 'extension'
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto/extension"
from iCaRL_extension import iCaRL_ext

#retrieve from folder 'logs'
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto/logs"
from Save_logs import save_logs

# return in the main project folder
%cd "/content/drive/MyDrive/MLDL2021_Calderaro_Giannuzzi_Scorca/File colab/File progetto"


In [None]:
DEVICE=torch.device("cuda:0")
DIR='/content/data'

RANDOM_SEED = [1993,30,423]    
NUM_CLASSES = 100       # Total number of classes

# Training
BATCH_SIZE = 64         # Batch size 
NUM_EPOCHS = 70         # Total number of training epochs
LR = 2                  # Initial learning rate
MOMENTUM = 0.9          # Momentum for stochastic gradient descent (SGD)
WEIGHT_DECAY = 1e-5     # Weight decay from iCaRL
MILESTONES = [49, 63]   # Step down policy from iCaRL (MultiStepLR)
                        # Decrease the learning rate by gamma at each milestone
GAMMA = 0.2             # Gamma factor from iCaRL


### define dataset transformation ###
train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4),  #useful data augmentation
                                      transforms.RandomHorizontalFlip(),     #useful data augmentation
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

# iCaRL extension

In [None]:
RUN_NAME = 'iCarl_extension'

CUDA_LAUNCH_BLOCKING=1
NUM_RUNS = len(RANDOM_SEED)

#### customize hyperparameter for tuning ######
############ (debug purpose) ##################
LR = 0.1
NUM_EPOCHS = 70
WEIGHT_DECAY = 1e-4
###############################################


# Initialize logs
logs = [[] for _ in range(NUM_RUNS)]

for run_i in range(NUM_RUNS):
   #download cifar100:
  train_dataset = datasetManager(DIR, train=True, download=True, random_state=RANDOM_SEED[run_i], transform=train_transform)
  test_dataset = datasetManager(DIR, train=False, download=False, random_state=RANDOM_SEED[run_i], transform=test_transform)

    
  net = resnet32()
  icarl = iCaRL_ext(DEVICE, net, LR, MOMENTUM, WEIGHT_DECAY, MILESTONES, GAMMA, NUM_EPOCHS, BATCH_SIZE, train_transform, test_transform)

  for split_i in range(10):
        print(f"-- Split {split_i} of run {run_i} (SEED: {RANDOM_SEED[run_i]}) --")

        ############## 10-class split management: ##################
        train_dataset.set_classes_batch(train_dataset.batch_splits[split_i])
        test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])
        
        ############## extraction of validation index ##############
        train_idx, val_idx = train_test_split(list(range(len(train_dataset))),      #we extract a split of indices
                                          random_state=RANDOM_SEED[run_i],
                                          test_size = 0.1)   #choose here the test size
                                          
        train_data_split = Subset(train_dataset , train_idx)                        #we use the indices previously extracted to make subset of 
        val_data_split = Subset(train_dataset , val_idx)                            #the original train_dataset


        ############## training + log generation ###################        
        icarl.incremental_train(split_i, train_data_split, val_data_split)
        
        logs[run_i].append({})

        acc, all_targets, all_preds = icarl.test(test_dataset, train_data_split)
        logs[run_i][split_i]['accuracy'] = acc
        logs[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))


save_logs(logs_icarl_herd,RANDOM_SEED, NUM_EPOCHS, RUN_NAME, BATCH_SIZE)    

# Tests on semantic drift compensation

In [None]:
from iCaRL_extension import iCaRL_SDC
torch.autograd.set_detect_anomaly(True)

RUN_NAME = 'icarl_SDC'

NUM_RUNS = len(RANDOM_SEED)
NUM_EPOCHS = 70


logs = [[] for _ in range(NUM_RUNS)]

for run_i in range(NUM_RUNS):

  train_dataset = datasetManager(DIR, train=True, download=True, random_state=RANDOM_SEED[run_i], transform=train_transform)
  test_dataset = datasetManager(DIR, train=False, download=False, random_state=RANDOM_SEED[run_i], transform=test_transform)

    
  net = resnet32()
  icarl = iCaRL_SDC(DEVICE, net, LR, MOMENTUM, WEIGHT_DECAY, MILESTONES, GAMMA, NUM_EPOCHS, BATCH_SIZE, train_transform, test_transform)

  for split_i in range(10):
        print(f"## Split {split_i} of run {run_i} ##")


        train_dataset.set_classes_batch(train_dataset.batch_splits[split_i])
        test_dataset.set_classes_batch([test_dataset.batch_splits[i] for i in range(0, split_i+1)])
        

        train_idx, val_idx = train_test_split(list(range(len(train_dataset))),      
                                          random_state=RANDOM_SEED[run_i],
                                          test_size = 0.1)   
                                          
        train_data_split = Subset(train_dataset , train_idx)                        
        val_data_split = Subset(train_dataset , val_idx)                            
        
        train_logs = icarl.incremental_train(split_i, train_data_split, val_data_split)
        
        


        logs[run_i].append({})

        acc, all_targets, all_preds = icarl.test(test_dataset, train_data_split)

        logs[run_i][split_i]['accuracy'] = acc
        logs[run_i][split_i]['conf_mat'] = confusion_matrix(all_targets.to('cpu'), all_preds.to('cpu'))


save_logs(logs,RANDOM_SEED, NUM_EPOCHS, RUN_NAME, BATCH_SIZE)