<a href="https://colab.research.google.com/github/danielegenta/Progetto-MLDL/blob/master/ProjectMLDL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Install requirements**

In [0]:
"""!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'
!pip install --upgrade wandb"""

"!pip3 install 'torch==1.3.1'\n!pip3 install 'torchvision==0.5.0'\n!pip3 install 'Pillow-SIMD'\n!pip3 install 'tqdm'\n!pip install --upgrade wandb"

**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet, resnet18, resnet34

from PIL import Image
from tqdm import tqdm
import random
import wandb

# Everything available at https://app.wandb.ai/danver/progetto-mldl
#wandb.login('1eb973e575b3a7ecf03049dcb7e3ec62b5d6d96b')

**Set arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'
DATA_DIR = 'DATA/cifar-100-python' # here the dataset will be downloaded

NUM_CLASSES = 100 

# @toupdate the following vals (look at icarl paper)

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.02            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 2      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 49       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Retrieving dataset CIFAR1000**

In [0]:
# Clone github repository with dataset handler
!rm -r Cifar100/ #debug purposes
!rm -r $DATA_DIR
if not os.path.isdir('./Cifar100'):
  !git clone https://github.com/danielegenta/Progetto-MLDL.git
  !mv 'Progetto-MLDL' 'Cifar100'
  !rm -r Cifar100/Theoretical-Sources
  !rm -rf Cifar100/ProjectMLDL.ipynb

In [0]:
from Cifar100.Dataset.cifar100 import CIFAR100
# Download dataset from the official source and save it into DATA

if not os.path.isdir('./{}'.format(DATA_DIR)):
    !wget https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
    !tar -xf 'cifar-100-python.tar.gz'  
    !mv 'cifar-100-python' $DATA_DIR
    !rm -rf 'cifar-100-python.tar.gz'

**Define data preprocessing**

In [0]:
# it is ok to use also .5 mean and .5 std (faq1)
# @tocheck
# ref: https://github.com/chengyangfu/pytorch-vgg-cifar10/blob/master/main.py + pytorch resnet documentation
# Define transformations for training
train_transform = transforms.Compose([transforms.Resize(32), 
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define transformations for evaluation
eval_transform = transforms.Compose([transforms.Resize(32),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])                                   
])


**Prepare dataset**

In [0]:
# Import dataset
train_dataset = CIFAR100("DATA", split='train', transform=train_transform)
test_dataset = CIFAR100("DATA", split='test', transform=eval_transform)

# @todo
# split into train, test, 
print(len(train_dataset))
print(len(test_dataset))

train_splits = train_dataset.split_in_train_val_groups(ratio=0.99, seed=30)
test_splits = test_dataset.split_classes(seed=30, dictionary_of='indices')

50000
10000


**Build reverse index**  
Builds the reverse index used to get outputs labels from shuffled classes

In [0]:
import pandas as pd
import numpy as np

def build_reverse_index():
    reverse_index = pd.DataFrame(columns=['group', 'labels'])

    for k in train_splits.keys():
        labels = list(train_dataset.df.loc[train_splits[k]['train'],'labels'].value_counts().index)
        group = [k for i in range(len(labels))]
        data = pd.DataFrame(np.array([group, labels]).T, columns=['group', 'labels'])
        reverse_index = reverse_index.append(data, ignore_index=True)

    return reverse_index

def getLabels(reverse_index, outputs):
    outs = outputs.cpu().numpy()
    labels = reverse_index.loc[outs, 'labels']

    labels = torch.tensor(list(labels))
    return labels.to(DEVICE)


outputs_labels_mapping = build_reverse_index()
outputs_labels_mapping

Unnamed: 0,group,labels
0,0,99
1,0,39
2,0,23
3,0,19
4,0,98
...,...,...
95,9,81
96,9,33
97,9,72
98,9,48


**Prepare dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_subsets = []
val_subsets = []
test_subsets = []

for v in train_splits.values():
    train_subs = Subset(train_dataset, v['train'])
    val_subs = Subset(train_dataset, v['val'])
    # train_dl = DataLoader(train_subs, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    # val_dl = DataLoader(train_subs, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=False)
    train_subsets.append(train_subs)
    val_subsets.append(val_subs)

for v in test_splits.values():
    test_subs = Subset(test_dataset, v)
    # test_dl = DataLoader(test_subs, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=False)
    test_subsets.append(test_subs)


In [0]:
import time

def train(net, train_dataloader, criterion, optimizer, scheduler, num_epochs=NUM_EPOCHS):     
    # By default, everything is loaded to cpu
    net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

    cudnn.benchmark # Calling this optimizes runtime
    
    net.train()
    current_step = 0
    # Start iterating over the epochs
    start_time = time.time()
    for epoch in range(num_epochs):
        print('Starting epoch {}/{}, LR = {}'.format(epoch+1, num_epochs, scheduler.get_lr()))

        running_corrects = 0
        running_loss = 0.0
        for images, labels in train_dataloader:
            # Bring data over the device of choice
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            optimizer.zero_grad() # Zero-ing the gradients

            outputs = net(images)

            loss = criterion(outputs, labels)
            
            # Get predictions
            _, preds = torch.max(outputs.data, 1)
            preds = getLabels(outputs_labels_mapping, preds)
            # print(preds)
            
            # Update Corrects & Loss
            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(preds == labels.data).data.item()

            # Log loss
            if current_step % LOG_FREQUENCY == 0:
                print('Train step - Step {}, Loss {}'.format(current_step, loss.item()))

            # Compute gradients for each layer and update weights
            loss.backward()  # backward pass: computes gradients
            optimizer.step() # update weights based on accumulated gradients

            current_step += 1
        
        
        # Step the scheduler
        scheduler.step()

        # Calculate Accuracy & Loss
        epoch_loss = running_loss / float(len(train_dataloader.dataset))
        epoch_acc = running_corrects / float(len(train_dataloader.dataset))
        
        #wandb.log({'Epochs': epoch, 'Train Accuracy': epoch_acc, 'Train Loss': epoch_loss})
        print('Train epoch - Accuracy: {} Loss: {} Corrects: {}'.format(epoch_acc, epoch_loss, running_corrects))
    print('Training finished in {} seconds'.format(time.time() - start_time))

def validate(net, val_dataloader, criterion=None):
    net.eval()

    running_corrects = 0
    running_loss = 0.0
    for images, labels in val_dataloader:
        # Bring data over the device of choice
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward pass to the network
        outputs = net(images)
        
        # Update Corrects & Loss
        if criterion is not None:
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)

        _, preds = torch.max(outputs.data, 1)
        preds = getLabels(outputs_labels_mapping, preds)
        running_corrects += torch.sum(preds == labels.data).data.item()
        
    # Calculate Accuracy & Loss
    loss = running_loss / float(len(val_dataloader.dataset))
    acc = running_corrects / float(len(val_dataloader.dataset))

    return acc, loss

def test(net, test_dataloader):
    acc, _ = validate(net, test_dataloader)
    return acc

# Joins 2+ subsets into a new Subset
def joinSubsets(dataset, subsets):
    indices = []
    for s in subsets:
        indices += s.indices
    return Subset(dataset, indices)

def jointTraining(getNet, addOutputs, train_subsets, val_subsets, test_subsets):
    #wandb.init(project="progetto-mldl", name='joint-training', anonymous='never')

    net, criterion, optimizer, scheduler = getNet()
    #wandb.watch(net)

    train_set = None
    test_set = None
    first_pass = True
    for train_subset, val_subset, test_subset in zip(train_subsets, val_subsets, test_subsets):

        # Builds growing train and test set. The new sets include data from previous class groups and current class group
        if train_set is None:
            train_set = train_subset
        else:
            train_set = joinSubsets(train_dataset, [train_set, train_subset])
        if test_set is None:
            test_set = test_subset
        else:
            test_set = joinSubsets(test_dataset, [test_set, test_subset])

        # Adds new output nodes to the network for the new incoming classes
        if first_pass:
            first_pass = False
        else:
            addOutputs(net, 10)

        # Trains model on previous and current class groups
        _, _, optimizer, scheduler = getNet() # Resets optimizer & scheduler
        train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=False)
        train(net, train_loader, criterion, optimizer, scheduler)

        # Validate model on current class group
        val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=False)
        acc, loss = validate(net, val_loader, criterion)
        print(acc, loss)

        # Test the model on previous and current class groups
        test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=False)
        acc = test(net, test_loader)
        print(acc)


In [0]:
def getResNet34(output_size):
    net = resnet34(pretrained=False, progress=True)
    net.fc = nn.Linear(net.fc.in_features, output_size)

    criterion = nn.CrossEntropyLoss()
    parameters_to_optimize = net.parameters()
    optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

    return net, criterion, optimizer, scheduler

def addOutputsToResNet(net, new_outputs):
    in_features = net.fc.in_features
    out_features = net.fc.out_features
    weight = net.fc.weight.data

    net.fc = nn.Linear(in_features, out_features + new_outputs)
    net.fc.weight.data[:out_features] = weight

In [0]:
def getNet():
    return getResNet34(10)

jointTraining(getNet, addOutputsToResNet, train_subsets, val_subsets, test_subsets)