# Training

First, let's install Avalanche. You can skip this step if you have installed it already.

In [None]:
!pip install avalanche-lib=0.2.0

## Benchmarks

you can import benchmarks from the `avl.benchmarks` module

In [7]:
from avalanche.benchmarks import SplitMNIST

In [8]:
benchmark = SplitMNIST(
    n_experiences=5,
    return_task_id=False
)
train_stream = benchmark.train_stream
test_stream = benchmark.test_stream

print(f"len train: {len(train_stream)}, len test: {len(test_stream)}")

len train: 5, len test: 5


In [10]:
for exp in train_stream:
    eid = exp.current_experience
    curr_classes = exp.classes_in_this_experience
    tid = exp.task_label
    print(f"({eid}) - T{tid}, classes={curr_classes}")

(0) - T0, classes=[3, 7]
(1) - T0, classes=[4, 6]
(2) - T0, classes=[0, 5]
(3) - T0, classes=[8, 2]
(4) - T0, classes=[1, 9]


 now let's try with a multi-task benchmark

In [11]:
benchmark = SplitMNIST(
    n_experiences=5,
    return_task_id=True
)
train_stream = benchmark.train_stream
test_stream = benchmark.test_stream

print(f"len train: {len(train_stream)}, len test: {len(test_stream)}")

len train: 5, len test: 5


In [12]:
for exp in train_stream:
    eid = exp.current_experience
    curr_classes = exp.classes_in_this_experience
    tid = exp.task_label
    print(f"({eid}) - T{tid}, classes={curr_classes}")

(0) - T0, classes=[0, 1]
(1) - T1, classes=[0, 1]
(2) - T2, classes=[0, 1]
(3) - T3, classes=[0, 1]
(4) - T4, classes=[0, 1]


## Multi-Task Model

you can use pytorch models

In [19]:
import torch
from torch import nn

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU()
        )
        self.classifier = nn.Linear(512, 10)
    
    def forward(self, x, **kwargs):
        x = x.reshape(x.shape[0], -1)
        x = self.features(x)
        return self.classifier(x)
    
model = MLP()
model(torch.randn(32, 784))
print(model)

MLP(
  (features): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
  )
  (classifier): Linear(in_features=512, out_features=10, bias=True)
)


MLP is ignoring task_labels. You can use Avalanche MultiHeadClassifier to split the output layer by task id.

In [22]:
x = torch.randn(32, 784)
t = torch.randint(low=0, high=4, size=(32,))
model(x, task_labels=t)

tensor([[-5.2818e-02, -1.1711e-01, -4.0755e-02,  6.2580e-02, -2.7195e-02,
          1.6112e-01,  8.8212e-02,  5.4748e-02,  1.1080e-01, -1.1024e-01],
        [-4.8843e-02, -6.7356e-02,  9.0853e-02,  8.9037e-02,  4.0431e-02,
          9.2167e-02,  5.9377e-02, -4.3629e-02,  6.8562e-02, -1.4341e-01],
        [-1.0278e-01, -5.2737e-03,  7.3208e-02,  2.5740e-02, -5.0809e-02,
          1.0314e-01,  6.2482e-02,  4.6415e-02, -2.7389e-02,  1.1042e-02],
        [-5.0386e-02, -8.8294e-02,  3.5914e-03, -5.0179e-02, -1.0267e-01,
          7.3686e-02,  2.9858e-04,  5.6256e-03, -3.8574e-03, -5.6767e-02],
        [ 2.0078e-01, -1.4191e-01,  4.9989e-02,  1.3214e-02, -2.4189e-04,
         -4.0756e-03,  8.3634e-02, -9.3482e-02,  1.7649e-02, -8.6874e-02],
        [-7.5609e-02,  1.2334e-02, -1.2911e-02, -4.7168e-03,  4.7949e-02,
          1.2498e-01,  8.0643e-02,  3.9433e-02,  5.1224e-02, -8.4854e-02],
        [ 1.9634e-02, -3.1073e-02,  1.3481e-01,  3.3806e-02,  2.6018e-02,
         -9.8696e-04,  5.1616e-0

In [25]:
from avalanche.models import as_multitask

model_mt = as_multitask(MLP(), 'classifier')
print(model_mt)

MultiTaskDecorator(
  (model): MLP(
    (features): Sequential(
      (0): Linear(in_features=784, out_features=512, bias=True)
      (1): ReLU()
      (2): Linear(in_features=512, out_features=512, bias=True)
      (3): ReLU()
    )
    (classifier): Sequential()
  )
  (classifier): MultiHeadClassifier(
    (classifiers): ModuleDict(
      (0): IncrementalClassifier(
        (classifier): Linear(in_features=512, out_features=10, bias=True)
      )
    )
  )
)


The model still doesn't know about all the tasks because it has neven seen them.

In [27]:
model_mt(x, task_labels=t)

KeyError: '1'

You have to adapt the model

In [31]:
from avalanche.models import DynamicModule

for exp in benchmark.train_stream:
    for m in model_mt.modules():
        if isinstance(m, DynamicModule):
            m.adaptation(exp)

print(model_mt)

MultiTaskDecorator(
  (model): MLP(
    (features): Sequential(
      (0): Linear(in_features=784, out_features=512, bias=True)
      (1): ReLU()
      (2): Linear(in_features=512, out_features=512, bias=True)
      (3): ReLU()
    )
    (classifier): Sequential()
  )
  (classifier): MultiHeadClassifier(
    (classifiers): ModuleDict(
      (0): IncrementalClassifier(
        (classifier): Linear(in_features=512, out_features=10, bias=True)
      )
      (1): IncrementalClassifier(
        (classifier): Linear(in_features=512, out_features=10, bias=True)
      )
      (2): IncrementalClassifier(
        (classifier): Linear(in_features=512, out_features=10, bias=True)
      )
      (3): IncrementalClassifier(
        (classifier): Linear(in_features=512, out_features=10, bias=True)
      )
      (4): IncrementalClassifier(
        (classifier): Linear(in_features=512, out_features=10, bias=True)
      )
    )
  )
)


Now the model has been adapted to all the tasks. A separate head for each task is available for classification.

In [32]:
model_mt(x, task_labels=t)

tensor([[ 1.1171e-02, -9.0120e-02, -1.0000e+03, -1.0000e+03, -1.0000e+03,
         -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03],
        [-9.5024e-02, -4.7357e-02, -1.0000e+03, -1.0000e+03, -1.0000e+03,
         -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03],
        [-9.1795e-02,  1.4599e-02, -1.0000e+03, -1.0000e+03, -1.0000e+03,
         -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03],
        [-4.4824e-02,  8.8849e-03, -1.0000e+03, -1.0000e+03, -1.0000e+03,
         -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03],
        [-1.4652e-02, -8.7687e-02, -1.0000e+03, -1.0000e+03, -1.0000e+03,
         -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03],
        [-5.0053e-02, -1.0758e-01, -1.0000e+03, -1.0000e+03, -1.0000e+03,
         -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03, -1.0000e+03],
        [ 4.0553e-02,  1.9057e-01, -1.0000e+03, -1.0000e+03, -1.0000e+03,
         -1.0000e+03, -1.0000e+0

## Training

Each strategy object offers two main methods: `train` and `eval`. Both of them, accept either a _single experience_(`Experience`) or a _list of them_, for maximum flexibility.

We can train the model continually by iterating over the `train_stream` provided by the scenario.

In [37]:
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from avalanche.models import SimpleMLP
from avalanche.training import Naive

# scenario
benchmark = SplitMNIST(
    n_experiences=5,
    return_task_id=True,
    seed=1
)

# model
model = as_multitask(MLP(), 'classifier')
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = CrossEntropyLoss()

# strategy
cl_strategy = Naive(
    model=model, 
    optimizer=optimizer, 
    criterion=criterion,
    train_mb_size=100, 
    train_epochs=1, 
    eval_mb_size=100
)

In [38]:
for exp in benchmark.train_stream:
    cl_strategy.train(exp)
cl_strategy.eval(benchmark.test_stream)

-- >> Start of training phase << --
100%|████████████████████████████████████████████████████████████████████████████████| 114/114 [00:04<00:00, 24.26it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.3752
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.8766
-- >> End of training phase << --
-- >> Start of training phase << --
100%|████████████████████████████████████████████████████████████████████████████████| 127/127 [00:05<00:00, 24.36it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task001 = 0.2639
	Top1_Acc_Epoch/train_phase/train_stream/Task001 = 0.9317
-- >> End of training phase << --
-- >> Start of training phase << --
100%|████████████████████████████████████████████████████████████████████████████████| 118/118 [00:04<00:00, 24.47it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task002 = 0.2196
	Top1_Acc_Epoch/train_phase/train_stream/Task002 = 0.9381
-- >> End of training phase << --
-- >> Start of training phase << --
100%|████████

{'Top1_Acc_Epoch/train_phase/train_stream/Task000': 0.8766205132727753,
 'Loss_Epoch/train_phase/train_stream/Task000': 0.3752482113228745,
 'Top1_Acc_Epoch/train_phase/train_stream/Task001': 0.9317322834645669,
 'Loss_Epoch/train_phase/train_stream/Task001': 0.2638802383769685,
 'Top1_Acc_Epoch/train_phase/train_stream/Task002': 0.9380839137081706,
 'Loss_Epoch/train_phase/train_stream/Task002': 0.2196187972971837,
 'Top1_Acc_Epoch/train_phase/train_stream/Task003': 0.9188741721854304,
 'Loss_Epoch/train_phase/train_stream/Task003': 0.27645327155755844,
 'Top1_Acc_Epoch/train_phase/train_stream/Task004': 0.9092260675642191,
 'Loss_Epoch/train_phase/train_stream/Task004': 0.2734750110715716,
 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp000': 0.947027027027027,
 'Loss_Exp/eval_phase/test_stream/Task000/Exp000': 0.15023965730860428,
 'Top1_Acc_Exp/eval_phase/test_stream/Task001/Exp001': 0.9709275496077526,
 'Loss_Exp/eval_phase/test_stream/Task001/Exp001': 0.08084066453538956,
 'Top1

In [43]:
from avalanche.training.plugins import EvaluationPlugin
from avalanche.logging import InteractiveLogger
from avalanche.evaluation.metrics import accuracy_metrics

# model
model = as_multitask(MLP(), 'classifier')
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = CrossEntropyLoss()

evaluator = EvaluationPlugin(
    accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loggers=[InteractiveLogger()],
    suppress_warnings=True
)

# strategy
cl_strategy = Naive(
    model=model, 
    optimizer=optimizer, 
    criterion=criterion,
    train_mb_size=100, 
    train_epochs=1, 
    eval_mb_size=100,
    eval_every=1,  # how often you want the evaluation during training
    evaluator=evaluator
)

for exp in benchmark.train_stream[:2]:
    cl_strategy.train(exp, eval_streams=[benchmark.train_stream[:2]])

-- >> Start of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --




100%|████████████████████████████████████████████████████████████████████████████████| 114/114 [00:03<00:00, 31.59it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp000 = 0.4781
-- Starting eval on experience 1 (Task 1) from train stream --
100%|████████████████████████████████████████████████████████████████████████████████| 127/127 [00:04<00:00, 31.45it/s]
> Eval on experience 1 (Task 1) from train stream ended.
	Top1_Acc_Exp/eval_phase/train_stream/Task001/Exp001 = 0.5309
-- >> End of eval phase << --
	Top1_Acc_Stream/eval_phase/train_stream/Task000 = 0.4781
	Top1_Acc_Stream/eval_phase/train_stream/Task001 = 0.5309
100%|████████████████████████████████████████████████████████████████████████████████| 114/114 [00:04<00:00, 26.49it/s]
Epoch 0 ended.
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9016
	Top1_Acc_MB/train_phase/train_stream/Task000 = 1.0000
-- >> Start of eval phase << --
-- Starting eval on experience 0 

In [44]:
evaluator.get_all_metrics()

defaultdict(<function avalanche.training.plugins.evaluation.EvaluationPlugin.__init__.<locals>.<lambda>()>,
            {'Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp000': ([0,
               114,
               114,
               241],
              [0.47808448716818064,
               0.9645471381956081,
               0.9645471381956081,
               0.9510538848222947]),
             'Top1_Acc_Exp/eval_phase/train_stream/Task001/Exp001': ([0,
               114,
               114,
               241],
              [0.5308661417322834,
               0.5308661417322834,
               0.5308661417322834,
               0.981732283464567]),
             'Top1_Acc_Stream/eval_phase/train_stream/Task000': ([0,
               114,
               114,
               241],
              [0.47808448716818064,
               0.9645471381956081,
               0.9645471381956081,
               0.9510538848222947]),
             'Top1_Acc_Stream/eval_phase/train_stream/Task001': ([0

### Adding Plugins

Most continual learning strategies follow roughly the same training/evaluation loops, i.e. a simple naive strategy (a.k.a. finetuning) augmented with additional behavior to counteract catastrophic forgetting. The plugin systems in Avalanche is designed to easily augment continual learning strategies with custom behavior, without having to rewrite the training loop from scratch. Avalanche strategies accept an optional list of `plugins` that will be executed during the training/evaluation loops.

For example, early stopping is implemented as a plugin:

In [3]:
from avalanche.training.plugins import EarlyStoppingPlugin

strategy = Naive(
    model, optimizer, criterion,
    plugins=[EarlyStoppingPlugin(patience=10, val_stream_name='train')])

In Avalanche, most continual learning strategies are implemented using plugins, which makes it easy to combine them together. For example, it is extremely easy to create a hybrid strategy that combines replay and EWC together by passing the appropriate `plugins` list to the `SupervisedTemplate`:

In [4]:
from avalanche.training.templates import SupervisedTemplate
from avalanche.training.plugins import ReplayPlugin, EWCPlugin

replay = ReplayPlugin(mem_size=100)
ewc = EWCPlugin(ewc_lambda=0.001)
strategy = SupervisedTemplate(
    model, optimizer, criterion,
    plugins=[replay, ewc])

Beware that most strategy plugins modify the internal state. As a result, not all the strategy plugins can be combined together. For example, it does not make sense to use multiple replay plugins since they will try to modify the same strategy variables (mini-batches, dataloaders), and therefore they will be in conflict.

# Example: Replay Buffers

In [47]:
from avalanche.training.storage_policy import ReservoirSamplingBuffer
from types import SimpleNamespace

benchmark = SplitMNIST(5, return_task_id=False)
storage_p = ReservoirSamplingBuffer(max_size=30)

print(f"Max buffer size: {storage_p.max_size}, current size: {len(storage_p.buffer)}")

Max buffer size: 30, current size: 0


In [48]:
from avalanche.training.storage_policy import ParametricBuffer, RandomExemplarsSelectionStrategy
from types import SimpleNamespace


storage_p = ParametricBuffer(
    max_size=30,
    groupby='class',
    selection_strategy=RandomExemplarsSelectionStrategy()
)

print(f"Max buffer size: {storage_p.max_size}, current size: {len(storage_p.buffer)}")
for i in range(5):
    # you can use a SimpleNamespace if you want to use Avalanche components with your own code.
    strategy_state = SimpleNamespace(experience=benchmark.train_stream[i])
    storage_p.update(strategy_state)
    print(f"Max buffer size: {storage_p.max_size}, current size: {len(storage_p.buffer)}")
    print(f"class targets: {storage_p.buffer.targets}\n")

Max buffer size: 30, current size: 0
Max buffer size: 30, current size: 30
class targets: [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]

Max buffer size: 30, current size: 30
class targets: [5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6]

Max buffer size: 30, current size: 30
class targets: [5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1]

Max buffer size: 30, current size: 30
class targets: [5, 5, 5, 5, 7, 7, 7, 2, 2, 2, 2, 6, 6, 6, 6, 4, 4, 4, 4, 1, 1, 1, 1, 0, 0, 0, 0, 9, 9, 9]

Max buffer size: 30, current size: 30
class targets: [5, 5, 5, 7, 7, 7, 2, 2, 2, 6, 6, 6, 4, 4, 4, 1, 1, 1, 0, 0, 0, 9, 9, 9, 3, 3, 3, 8, 8, 8]



## 🤝 Run it on Google Colab

You can run _this chapter_ and play with it on Google Colaboratory: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AntonioCarta/avalanche-demo/blob/main/avl_demo.ipynb)

https://github.com/AntonioCarta/avalanche-demo/blob/main/avl_demo.ipynb