From 136c69ebde7b3ca037444c333542bae21fb71165 Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Tue, 12 Jul 2022 16:47:53 +0200
Subject: [PATCH 01/10] Initialize new template structure

---
 .../supervised/strategy_wrappers_temp.py      | 103 ++++
 .../training/templates/base_general_sgd.py    | 516 ++++++++++++++++++
 .../training/templates/common_templates.py    | 227 ++++++++
 .../templates/observation_type/__init__.py    |   6 +
 .../observation_type/batch_observation.py     |  46 ++
 .../observation_type/online_observation.py    |  58 ++
 .../templates/problem_type/__init__.py        |   5 +
 .../problem_type/supervised_problem.py        |  31 ++
 .../templates/update_type/__init__.py         |   5 +
 .../templates/update_type/meta_update.py      |  27 +
 .../templates/update_type/sgd_update.py       |  36 ++
 11 files changed, 1060 insertions(+)
 create mode 100644 avalanche/training/supervised/strategy_wrappers_temp.py
 create mode 100644 avalanche/training/templates/base_general_sgd.py
 create mode 100644 avalanche/training/templates/common_templates.py
 create mode 100644 avalanche/training/templates/observation_type/__init__.py
 create mode 100644 avalanche/training/templates/observation_type/batch_observation.py
 create mode 100644 avalanche/training/templates/observation_type/online_observation.py
 create mode 100644 avalanche/training/templates/problem_type/__init__.py
 create mode 100644 avalanche/training/templates/problem_type/supervised_problem.py
 create mode 100644 avalanche/training/templates/update_type/__init__.py
 create mode 100644 avalanche/training/templates/update_type/meta_update.py
 create mode 100644 avalanche/training/templates/update_type/sgd_update.py

diff --git a/avalanche/training/supervised/strategy_wrappers_temp.py b/avalanche/training/supervised/strategy_wrappers_temp.py
new file mode 100644
index 000000000..19bcbba36
--- /dev/null
+++ b/avalanche/training/supervised/strategy_wrappers_temp.py
@@ -0,0 +1,103 @@
+from typing import Optional, List
+from torch.nn import Module, CrossEntropyLoss
+from torch.optim import Optimizer
+
+from avalanche.training.plugins import EvaluationPlugin
+from avalanche.training.templates.common_templates import (
+    SupervisedTemplate,
+    OnlineSupervisedTemplate
+)
+from avalanche.training.plugins.evaluation import default_evaluator
+from avalanche.training.plugins import SupervisedPlugin
+
+
+class Naive(SupervisedTemplate):
+    def __init__(
+            self,
+            model: Module,
+            optimizer: Optimizer,
+            criterion=CrossEntropyLoss(),
+            train_mb_size: int = 1,
+            train_epochs: int = 1,
+            eval_mb_size: Optional[int] = None,
+            device=None,
+            plugins: Optional[List[SupervisedPlugin]] = None,
+            evaluator: EvaluationPlugin = default_evaluator,
+            eval_every=-1,
+            **base_kwargs
+    ):
+        super().__init__(
+            model,
+            optimizer,
+            criterion,
+            train_mb_size=train_mb_size,
+            train_epochs=train_epochs,
+            eval_mb_size=eval_mb_size,
+            device=device,
+            plugins=plugins,
+            evaluator=evaluator,
+            eval_every=eval_every,
+            **base_kwargs
+        )
+
+
+class OnlineNaive(OnlineSupervisedTemplate):
+    """Online naive finetuning.
+
+    The simplest (and least effective) Continual Learning strategy. Naive just
+    incrementally fine tunes a single model without employing any method
+    to contrast the catastrophic forgetting of previous knowledge.
+    This strategy does not use task identities.
+
+    Naive is easy to set up and its results are commonly used to show the worst
+    performing baseline.
+    """
+
+    def __init__(
+        self,
+        model: Module,
+        optimizer: Optimizer,
+        criterion=CrossEntropyLoss(),
+        train_passes: int = 1,
+        train_mb_size: int = 1,
+        eval_mb_size: int = None,
+        device=None,
+        plugins: Optional[List[SupervisedPlugin]] = None,
+        evaluator: EvaluationPlugin = default_evaluator,
+        eval_every=-1,
+    ):
+        """
+        Creates an instance of the Naive strategy.
+
+        :param model: The model.
+        :param optimizer: The optimizer to use.
+        :param criterion: The loss criterion to use.
+        :param num_passes: The number of passes for each sub-experience.
+            Defaults to 1.
+        :param train_mb_size: The train minibatch size. Defaults to 1.
+        :param eval_mb_size: The eval minibatch size. Defaults to 1.
+        :param device: The device to use. Defaults to None (cpu).
+        :param plugins: Plugins to be added. Defaults to None.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        """
+        super().__init__(
+            model,
+            optimizer,
+            criterion,
+            train_passes=train_passes,
+            train_mb_size=train_mb_size,
+            eval_mb_size=eval_mb_size,
+            device=device,
+            plugins=plugins,
+            evaluator=evaluator,
+            eval_every=eval_every,
+        )
+
+
+__all__ = ["Naive", "OnlineNaive"]
diff --git a/avalanche/training/templates/base_general_sgd.py b/avalanche/training/templates/base_general_sgd.py
new file mode 100644
index 000000000..954bfe9d0
--- /dev/null
+++ b/avalanche/training/templates/base_general_sgd.py
@@ -0,0 +1,516 @@
+from typing import Iterable, Sequence, Optional, Union, List
+from pkg_resources import parse_version
+
+import torch
+from torch.nn import Module, CrossEntropyLoss
+from torch.optim import Optimizer
+from torch.utils.data import DataLoader
+
+from avalanche.benchmarks import CLExperience, CLStream
+from avalanche.core import BaseSGDPlugin
+from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
+from avalanche.training.plugins.clock import Clock
+from avalanche.training.plugins.evaluation import default_evaluator
+from avalanche.training.templates.base import BaseTemplate, ExpSequence
+from avalanche.models.utils import avalanche_model_adaptation
+from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \
+    collate_from_data_or_kwargs
+from avalanche.training.utils import trigger_plugins
+
+
+class BaseGeneralSGDTemplate(BaseTemplate):
+    """Base SGD class for continual learning skeletons.
+
+    **Training loop**
+    The training loop is organized as follows::
+
+        train
+            train_exp  # for each experience
+
+    **Evaluation loop**
+    The evaluation loop is organized as follows::
+
+        eval
+            eval_exp  # for each experience
+
+    """
+
+    PLUGIN_CLASS = BaseSGDPlugin
+
+    def __init__(
+        self,
+        model: Module,
+        optimizer: Optimizer,
+        criterion=CrossEntropyLoss(),
+        train_mb_size: int = 1,
+        train_epochs: int = 1,
+        eval_mb_size: Optional[int] = 1,
+        device="cpu",
+        plugins: Optional[List["SupervisedPlugin"]] = None,
+        evaluator: EvaluationPlugin = default_evaluator,
+        eval_every=-1,
+        peval_mode="epoch",
+    ):
+        """Init.
+
+        :param model: PyTorch model.
+        :param optimizer: PyTorch optimizer.
+        :param criterion: loss function.
+        :param train_mb_size: mini-batch size for training.
+        :param train_epochs: number of training epochs.
+        :param eval_mb_size: mini-batch size for eval.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations. None to remove logging.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
+            periodic evaluation during training should execute every
+            `eval_every` epochs or iterations (Default='epoch').
+        """
+        super().__init__(model=model, device=device, plugins=plugins)
+
+        self.optimizer: Optimizer = optimizer
+        """ PyTorch optimizer. """
+
+        self._criterion = criterion
+        """ Criterion. """
+
+        self.train_epochs: int = train_epochs
+        """ Number of training epochs. """
+
+        self.train_mb_size: int = train_mb_size
+        """ Training mini-batch size. """
+
+        self.eval_mb_size: int = (
+            train_mb_size if eval_mb_size is None else eval_mb_size
+        )
+        """ Eval mini-batch size. """
+
+        if evaluator is None:
+            evaluator = EvaluationPlugin()
+        self.plugins.append(evaluator)
+        self.evaluator = evaluator
+        """ EvaluationPlugin used for logging and metric computations. """
+
+        # Configure periodic evaluation.
+        assert peval_mode in {"experience", "epoch", "iteration"}
+        self.eval_every = eval_every
+        peval = PeriodicEval(eval_every, peval_mode)
+        self.plugins.append(peval)
+
+        self.clock = Clock()
+        """ Incremental counters for strategy events. """
+        # WARNING: Clock needs to be the last plugin, otherwise
+        # counters will be wrong for plugins called after it.
+        self.plugins.append(self.clock)
+
+        ###################################################################
+        # State variables. These are updated during the train/eval loops. #
+        ###################################################################
+
+        self.adapted_dataset = None
+        """ Data used to train. It may be modified by plugins. Plugins can 
+        append data to it (e.g. for replay). 
+
+        .. note::
+
+            This dataset may contain samples from different experiences. If you 
+            want the original data for the current experience  
+            use :attr:`.BaseTemplate.experience`.
+        """
+
+        self.dataloader = None
+        """ Dataloader. """
+
+        self.mbatch = None
+        """ Current mini-batch. """
+
+        self.mb_output = None
+        """ Model's output computed on the current mini-batch. """
+
+        self.loss = None
+        """ Loss of the current mini-batch. """
+
+        self._stop_training = False
+
+    def train(self,
+              experiences: Union[CLExperience,
+                                 ExpSequence],
+              eval_streams: Optional[Sequence[Union[CLExperience,
+                                                    ExpSequence]]] = None,
+              **kwargs):
+        super().train(experiences, eval_streams, **kwargs)
+        return self.evaluator.get_last_metrics()
+
+    @torch.no_grad()
+    def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs):
+        """
+        Evaluate the current model on a series of experiences and
+        returns the last recorded value for each metric.
+
+        :param exp_list: CL experience information.
+        :param kwargs: custom arguments.
+
+        :return: dictionary containing last recorded value for
+            each metric name
+        """
+        super().eval(exp_list, **kwargs)
+        return self.evaluator.get_last_metrics()
+
+    def _train_exp(
+        self, experience: CLExperience, eval_streams, **kwargs
+    ):
+        # Should be implemented in Observation Type
+        raise NotImplementedError()
+
+    def _eval_exp(self, **kwargs):
+        self.eval_epoch(**kwargs)
+
+    def make_optimizer(self, **kwargs):
+        """Optimizer initialization."""
+        # Should be implemented in Observation Type
+        raise NotImplementedError()
+
+    def criterion(self):
+        """Compute loss function."""
+        raise NotImplementedError()
+
+    def forward(self):
+        """Compute the model's output given the current mini-batch."""
+        raise NotImplementedError()
+
+    def model_adaptation(self, model=None):
+        """Adapts the model to the current experience."""
+        raise NotImplementedError()
+
+    def stop_training(self):
+        """Signals to stop training at the next iteration."""
+        self._stop_training = True
+
+    def training_epoch(self, **kwargs):
+        # Should be implemented in Update Type
+        raise NotADirectoryError()
+
+    def backward(self):
+        """Run the backward pass."""
+        self.loss.backward()
+
+    def optimizer_step(self):
+        """Execute the optimizer step (weights update)."""
+        self.optimizer.step()
+
+    def eval_epoch(self, **kwargs):
+        """Evaluation loop over the current `self.dataloader`."""
+        for self.mbatch in self.dataloader:
+            self._unpack_minibatch()
+            self._before_eval_iteration(**kwargs)
+
+            self._before_eval_forward(**kwargs)
+            self.mb_output = self.forward()
+            self._after_eval_forward(**kwargs)
+            self.loss = self.criterion()
+
+            self._after_eval_iteration(**kwargs)
+
+    # ==================================================================> NEW
+
+    def maybe_adapt_model_and_make_optimizer(self):
+        # Should be implemented in observation type
+        raise NotImplementedError()
+
+    def _before_training_exp(self, **kwargs):
+        """Setup to train on a single experience."""
+        # Data Adaptation (e.g. add new samples/data augmentation)
+        self._before_train_dataset_adaptation(**kwargs)
+        self.train_dataset_adaptation(**kwargs)
+        self._after_train_dataset_adaptation(**kwargs)
+
+        self.make_train_dataloader(**kwargs)
+
+        # Model Adaptation (e.g. freeze/add new units)
+        # self.model = self.model_adaptation()
+        # self.make_optimizer()
+        self.maybe_adapt_model_and_make_optimizer()
+
+        super()._before_training_exp(**kwargs)
+
+    def _save_train_state(self):
+        """Save the training state which may be modified by the eval loop.
+
+        This currently includes: experience, adapted_dataset, dataloader,
+        is_training, and train/eval modes for each module.
+
+        TODO: we probably need a better way to do this.
+        """
+        state = super()._save_train_state()
+        new_state = {
+            "adapted_dataset": self.adapted_dataset,
+            "dataloader": self.dataloader,
+        }
+        return {**state, **new_state}
+
+    def train_dataset_adaptation(self, **kwargs):
+        """Initialize `self.adapted_dataset`."""
+        self.adapted_dataset = self.experience.dataset
+        self.adapted_dataset = self.adapted_dataset.train()
+
+    def _load_train_state(self, prev_state):
+        super()._load_train_state(prev_state)
+        self.adapted_dataset = prev_state["adapted_dataset"]
+        self.dataloader = prev_state["dataloader"]
+
+    def _before_eval_exp(self, **kwargs):
+
+        # Data Adaptation
+        self._before_eval_dataset_adaptation(**kwargs)
+        self.eval_dataset_adaptation(**kwargs)
+        self._after_eval_dataset_adaptation(**kwargs)
+
+        self.make_eval_dataloader(**kwargs)
+        # Model Adaptation (e.g. freeze/add new units)
+        self.model = self.model_adaptation()
+
+        super()._before_eval_exp(**kwargs)
+
+    def make_train_dataloader(
+        self,
+        num_workers=0,
+        shuffle=True,
+        pin_memory=True,
+        persistent_workers=False,
+        **kwargs
+    ):
+        """Data loader initialization.
+
+        Called at the start of each learning experience after the dataset
+        adaptation.
+
+        :param num_workers: number of thread workers for the data loading.
+        :param shuffle: True if the data should be shuffled, False otherwise.
+        :param pin_memory: If True, the data loader will copy Tensors into CUDA
+            pinned memory before returning them. Defaults to True.
+        """
+
+        other_dataloader_args = {}
+
+        if parse_version(torch.__version__) >= parse_version("1.7.0"):
+            other_dataloader_args["persistent_workers"] = persistent_workers
+        for k, v in kwargs.items():
+            other_dataloader_args[k] = v
+
+        self.dataloader = TaskBalancedDataLoader(
+            self.adapted_dataset,
+            oversample_small_groups=True,
+            num_workers=num_workers,
+            batch_size=self.train_mb_size,
+            shuffle=shuffle,
+            pin_memory=pin_memory,
+            **other_dataloader_args
+        )
+
+    def make_eval_dataloader(
+        self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs
+    ):
+        """
+        Initializes the eval data loader.
+        :param num_workers: How many subprocesses to use for data loading.
+            0 means that the data will be loaded in the main process.
+            (default: 0).
+        :param pin_memory: If True, the data loader will copy Tensors into CUDA
+            pinned memory before returning them. Defaults to True.
+        :param kwargs:
+        :return:
+        """
+        other_dataloader_args = {}
+
+        if parse_version(torch.__version__) >= parse_version("1.7.0"):
+            other_dataloader_args["persistent_workers"] = persistent_workers
+        for k, v in kwargs.items():
+            other_dataloader_args[k] = v
+
+        collate_from_data_or_kwargs(self.adapted_dataset,
+                                    other_dataloader_args)
+        self.dataloader = DataLoader(
+            self.adapted_dataset,
+            num_workers=num_workers,
+            batch_size=self.eval_mb_size,
+            pin_memory=pin_memory,
+            **other_dataloader_args
+        )
+
+    def eval_dataset_adaptation(self, **kwargs):
+        """Initialize `self.adapted_dataset`."""
+        self.adapted_dataset = self.experience.dataset
+        self.adapted_dataset = self.adapted_dataset.eval()
+
+    def model_adaptation(self, model=None):
+        """Adapts the model to the current data.
+
+        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
+        """
+        if model is None:
+            model = self.model
+        avalanche_model_adaptation(model, self.experience)
+        return model.to(self.device)
+
+    def _unpack_minibatch(self):
+        """Move to device"""
+        # First verify the mini-batch
+        self._check_minibatch()
+
+        for i in range(len(self.mbatch)):
+            self.mbatch[i] = self.mbatch[i].to(self.device)
+
+    #########################################################
+    # Plugin Triggers                                       #
+    #########################################################
+
+    def _before_training_epoch(self, **kwargs):
+        trigger_plugins(self, "before_training_epoch", **kwargs)
+
+    def _after_training_epoch(self, **kwargs):
+        trigger_plugins(self, "after_training_epoch", **kwargs)
+
+    def _before_training_iteration(self, **kwargs):
+        trigger_plugins(self, "before_training_iteration", **kwargs)
+
+    def _before_forward(self, **kwargs):
+        trigger_plugins(self, "before_forward", **kwargs)
+
+    def _after_forward(self, **kwargs):
+        trigger_plugins(self, "after_forward", **kwargs)
+
+    def _before_backward(self, **kwargs):
+        trigger_plugins(self, "before_backward", **kwargs)
+
+    def _after_backward(self, **kwargs):
+        trigger_plugins(self, "after_backward", **kwargs)
+
+    def _after_training_iteration(self, **kwargs):
+        trigger_plugins(self, "after_training_iteration", **kwargs)
+
+    def _before_update(self, **kwargs):
+        trigger_plugins(self, "before_update", **kwargs)
+
+    def _after_update(self, **kwargs):
+        trigger_plugins(self, "after_update", **kwargs)
+
+    def _before_eval_iteration(self, **kwargs):
+        trigger_plugins(self, "before_eval_iteration", **kwargs)
+
+    def _before_eval_forward(self, **kwargs):
+        trigger_plugins(self, "before_eval_forward", **kwargs)
+
+    def _after_eval_forward(self, **kwargs):
+        trigger_plugins(self, "after_eval_forward", **kwargs)
+
+    def _after_eval_iteration(self, **kwargs):
+        trigger_plugins(self, "after_eval_iteration", **kwargs)
+
+    # ==================================================================> NEW
+
+    def _before_train_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "before_train_dataset_adaptation", **kwargs)
+
+    def _after_train_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "after_train_dataset_adaptation", **kwargs)
+
+    def _before_eval_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs)
+
+    def _after_eval_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs)
+
+
+class PeriodicEval(SupervisedPlugin):
+    """Schedules periodic evaluation during training.
+
+    This plugin is automatically configured and added by the BaseTemplate.
+    """
+
+    def __init__(self, eval_every=-1, peval_mode="epoch", do_initial=True):
+        """Init.
+
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
+            periodic evaluation during training should execute every
+            `eval_every` epochs or iterations (Default='epoch').
+        :param do_initial: whether to evaluate before each `train` call.
+            Occasionally needed becuase some metrics need to know the
+            accuracy before training.
+        """
+        super().__init__()
+        assert peval_mode in {"experience", "epoch", "iteration"}
+        self.eval_every = eval_every
+        self.peval_mode = peval_mode
+        self.do_initial = do_initial and eval_every > -1
+        self.do_final = None
+        self._is_eval_updated = False
+
+    def before_training(self, strategy, **kwargs):
+        """Eval before each learning experience.
+
+        Occasionally needed because some metrics need the accuracy before
+        training.
+        """
+        if self.do_initial:
+            self._peval(strategy, **kwargs)
+
+    def before_training_exp(self, strategy, **kwargs):
+        # We evaluate at the start of each experience because train_epochs
+        # could change.
+        self.do_final = True
+        if self.peval_mode == "epoch":
+            if (
+                self.eval_every > 0
+                and (strategy.train_epochs - 1) % self.eval_every == 0
+            ):
+                self.do_final = False
+        else:  # peval_mode == 'iteration'
+            # we may need to fix this but we don't have a way to know
+            # the number of total iterations.
+            # Right now there may be two eval calls at the last iterations.
+            pass
+        self.do_final = self.do_final and self.eval_every > -1
+
+    def _peval(self, strategy, **kwargs):
+        for el in strategy._eval_streams:
+            strategy.eval(el, **kwargs)
+
+    def _maybe_peval(self, strategy, counter, **kwargs):
+        if self.eval_every > 0 and counter % self.eval_every == 0:
+            self._peval(strategy, **kwargs)
+
+    def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs):
+        """Periodic eval controlled by `self.eval_every` and
+        `self.peval_mode`."""
+        if self.peval_mode == "epoch":
+            self._maybe_peval(strategy, strategy.clock.train_exp_epochs,
+                              **kwargs)
+
+    def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs):
+        """Periodic eval controlled by `self.eval_every` and
+        `self.peval_mode`."""
+        if self.peval_mode == "iteration":
+            self._maybe_peval(strategy, strategy.clock.train_exp_iterations,
+                              **kwargs)
+
+    # ---> New
+    def after_training_exp(self, strategy, **kwargs):
+        """Final eval after a learning experience."""
+        if self.do_final:
+            self._peval(strategy, **kwargs)
+
+    # def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs):
+    #     """Periodic eval controlled by `self.eval_every` and
+    #     `self.peval_mode`."""
+    #     if self.peval_mode == "experience":
+    #         self._maybe_peval(strategy, strategy.clock.train_exp_counter,
+    #                           **kwargs)
diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/common_templates.py
new file mode 100644
index 000000000..f5b055e17
--- /dev/null
+++ b/avalanche/training/templates/common_templates.py
@@ -0,0 +1,227 @@
+from typing import Sequence, Optional
+
+from torch.nn import Module, CrossEntropyLoss
+from torch.optim import Optimizer
+
+from avalanche.training.plugins import SupervisedPlugin
+from avalanche.training.plugins.evaluation import default_evaluator
+
+from .observation_type import *
+from .problem_type import *
+from .update_type import *
+from .base_general_sgd import BaseGeneralSGDTemplate
+
+
+class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate,
+                         BaseGeneralSGDTemplate):
+    """Base class for continual learning strategies.
+
+    BaseTemplate is the super class of all task-based continual learning
+    strategies. It implements a basic training loop and callback system
+    that allows to execute code at each experience of the training loop.
+    Plugins can be used to implement callbacks to augment the training
+    loop with additional behavior (e.g. a memory buffer for replay).
+
+    **Scenarios**
+    This strategy supports several continual learning scenarios:
+
+    * class-incremental scenarios (no task labels)
+    * multi-task scenarios, where task labels are provided)
+    * multi-incremental scenarios, where the same task may be revisited
+
+    The exact scenario depends on the data stream and whether it provides
+    the task labels.
+
+    **Training loop**
+    The training loop is organized as follows::
+
+        train
+            train_exp  # for each experience
+                adapt_train_dataset
+                train_dataset_adaptation
+                make_train_dataloader
+                train_epoch  # for each epoch
+                    # forward
+                    # backward
+                    # model update
+
+    **Evaluation loop**
+    The evaluation loop is organized as follows::
+
+        eval
+            eval_exp  # for each experience
+                adapt_eval_dataset
+                eval_dataset_adaptation
+                make_eval_dataloader
+                eval_epoch  # for each epoch
+                    # forward
+                    # backward
+                    # model update
+
+    """
+
+    PLUGIN_CLASS = SupervisedPlugin
+
+    def __init__(
+            self,
+            model: Module,
+            optimizer: Optimizer,
+            criterion=CrossEntropyLoss(),
+            train_mb_size: int = 1,
+            train_epochs: int = 1,
+            eval_mb_size: Optional[int] = 1,
+            device="cpu",
+            plugins: Optional[Sequence["SupervisedPlugin"]] = None,
+            evaluator=default_evaluator,
+            eval_every=-1,
+            peval_mode="epoch",
+    ):
+        """Init.
+
+        :param model: PyTorch model.
+        :param optimizer: PyTorch optimizer.
+        :param criterion: loss function.
+        :param train_mb_size: mini-batch size for training.
+        :param train_epochs: number of training epochs.
+        :param eval_mb_size: mini-batch size for eval.
+        :param device: PyTorch device where the model will be allocated.
+        :param plugins: (optional) list of StrategyPlugins.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations. None to remove logging.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
+            periodic evaluation during training should execute every
+            `eval_every` epochs or iterations (Default='epoch').
+        """
+        super().__init__(
+            model=model,
+            optimizer=optimizer,
+            criterion=criterion,
+            train_mb_size=train_mb_size,
+            train_epochs=train_epochs,
+            eval_mb_size=eval_mb_size,
+            device=device,
+            plugins=plugins,
+            evaluator=evaluator,
+            eval_every=eval_every,
+            peval_mode=peval_mode,
+        )
+        ###################################################################
+        # State variables. These are updated during the train/eval loops. #
+        ###################################################################
+
+        # self.adapted_dataset = None
+        # """ Data used to train. It may be modified by plugins. Plugins can
+        # append data to it (e.g. for replay).
+        #
+        # .. note::
+        #
+        #    This dataset may contain samples from different experiences. If you
+        #    want the original data for the current experience
+        #    use :attr:`.BaseTemplate.experience`.
+
+
+class OnlineSupervisedTemplate(OnlineObservation, SupervisedProblem, SGDUpdate,
+                               BaseGeneralSGDTemplate):
+    """Base class for continual learning strategies.
+
+    BaseTemplate is the super class of all task-based continual learning
+    strategies. It implements a basic training loop and callback system
+    that allows to execute code at each experience of the training loop.
+    Plugins can be used to implement callbacks to augment the training
+    loop with additional behavior (e.g. a memory buffer for replay).
+
+    **Scenarios**
+    This strategy supports several continual learning scenarios:
+
+    * class-incremental scenarios (no task labels)
+    * multi-task scenarios, where task labels are provided)
+    * multi-incremental scenarios, where the same task may be revisited
+
+    The exact scenario depends on the data stream and whether it provides
+    the task labels.
+
+    **Training loop**
+    The training loop is organized as follows::
+
+        train
+            train_exp  # for each experience
+                adapt_train_dataset
+                train_dataset_adaptation
+                make_train_dataloader
+                train_pass  # for each pass
+                    # forward
+                    # backward
+                    # model update
+
+    **Evaluation loop**
+    The evaluation loop is organized as follows::
+
+        eval
+            eval_exp  # for each experience
+                adapt_eval_dataset
+                eval_dataset_adaptation
+                make_eval_dataloader
+                eval_epoch  # for each epoch
+                    # forward
+                    # backward
+                    # model update
+
+    """
+
+    PLUGIN_CLASS = SupervisedPlugin
+
+    def __init__(
+            self,
+            model: Module,
+            optimizer: Optimizer,
+            criterion=CrossEntropyLoss(),
+            train_mb_size: int = 1,
+            train_passes: int = 1,
+            eval_mb_size: Optional[int] = 1,
+            device="cpu",
+            plugins: Optional[Sequence["SupervisedPlugin"]] = None,
+            evaluator=default_evaluator,
+            eval_every=-1,
+            peval_mode="experience",
+    ):
+        """Init.
+
+        :param model: PyTorch model.
+        :param optimizer: PyTorch optimizer.
+        :param criterion: loss function.
+        :param train_mb_size: mini-batch size for training.
+        :param train_passes: number of training passes.
+        :param eval_mb_size: mini-batch size for eval.
+        :param device: PyTorch device where the model will be allocated.
+        :param plugins: (optional) list of StrategyPlugins.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations. None to remove logging.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` experiences and at the end of
+            the learning experience.
+        :param peval_mode: one of {'experience', 'iteration'}. Decides whether
+            the periodic evaluation during training should execute every
+            `eval_every` experience or iterations (Default='experience').
+        """
+        super().__init__(
+            model=model,
+            optimizer=optimizer,
+            criterion=criterion,
+            train_mb_size=train_mb_size,
+            train_epochs=1,
+            eval_mb_size=eval_mb_size,
+            device=device,
+            plugins=plugins,
+            evaluator=evaluator,
+            eval_every=eval_every,
+            peval_mode=peval_mode,
+        )
+
+        self.train_passes = train_passes
diff --git a/avalanche/training/templates/observation_type/__init__.py b/avalanche/training/templates/observation_type/__init__.py
new file mode 100644
index 000000000..4391bfbd7
--- /dev/null
+++ b/avalanche/training/templates/observation_type/__init__.py
@@ -0,0 +1,6 @@
+"""Observation types mainly define the way data samples are observed:
+   batch(multiple epochs) vs. online(one epoch)
+
+"""
+from .batch_observation import BatchObservation
+from .online_observation import OnlineObservation
diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py
new file mode 100644
index 000000000..4f67ef91b
--- /dev/null
+++ b/avalanche/training/templates/observation_type/batch_observation.py
@@ -0,0 +1,46 @@
+from typing import Iterable
+
+from avalanche.benchmarks import CLExperience
+from avalanche.models.dynamic_optimizers import reset_optimizer
+
+
+class BatchObservation:
+    def _train_exp(
+        self, experience: CLExperience, eval_streams=None, **kwargs
+    ):
+        """Training loop over a single Experience object.
+
+        :param experience: CL experience information.
+        :param eval_streams: list of streams for evaluation.
+            If None: use the training experience for evaluation.
+            Use [] if you do not want to evaluate during training.
+        :param kwargs: custom arguments.
+        """
+        if eval_streams is None:
+            eval_streams = [experience]
+        for i, exp in enumerate(eval_streams):
+            if not isinstance(exp, Iterable):
+                eval_streams[i] = [exp]
+        for _ in range(self.train_epochs):
+            self._before_training_epoch(**kwargs)
+
+            if self._stop_training:  # Early stopping
+                self._stop_training = False
+                break
+
+            self.training_epoch(**kwargs)
+            self._after_training_epoch(**kwargs)
+
+    def make_optimizer(self):
+        """Optimizer initialization.
+
+        Called before each training experiene to configure the optimizer.
+        """
+        # we reset the optimizer's state after each experience.
+        # This allows to add new parameters (new heads) and
+        # freezing old units during the model's adaptation phase.
+        reset_optimizer(self.optimizer, self.model)
+
+    def maybe_adapt_model_and_make_optimizer(self):
+        self.model = self.model_adaptation()
+        self.make_optimizer()
diff --git a/avalanche/training/templates/observation_type/online_observation.py b/avalanche/training/templates/observation_type/online_observation.py
new file mode 100644
index 000000000..faf98dca0
--- /dev/null
+++ b/avalanche/training/templates/observation_type/online_observation.py
@@ -0,0 +1,58 @@
+from typing import Iterable
+
+from avalanche.benchmarks import OnlineCLExperience
+from avalanche.models.dynamic_optimizers import reset_optimizer
+from avalanche.models.dynamic_optimizers import update_optimizer
+
+
+class OnlineObservation:
+    def _train_exp(
+            self, experience: OnlineCLExperience, eval_streams=None, **kwargs
+    ):
+        """Training loop over a single Experience object.
+
+        :param experience: Online CL experience information.
+        :param eval_streams: list of streams for evaluation.
+            If None: use the training experience for evaluation.
+            Use [] if you do not want to evaluate during training.
+        :param kwargs: custom arguments.
+        """
+        if eval_streams is None:
+            eval_streams = [experience]
+        for i, exp in enumerate(eval_streams):
+            if not isinstance(exp, Iterable):
+                eval_streams[i] = [exp]
+
+        self.training_epoch(**kwargs)
+
+    def make_optimizer(self):
+        """Optimizer initialization.
+
+        Called before each training experience to configure the optimizer.
+        """
+        # We reset the optimizer's state after each experience if task
+        # boundaries are given, otherwise it updates the optimizer only if
+        # new parameters are added to the model after each adaptation step.
+
+        # We assume the current experience is an OnlineCLExperience:
+        if self.experience.access_task_boundaries:
+            reset_optimizer(self.optimizer, self.model)
+
+        else:
+            update_optimizer(self.optimizer,
+                             self.model_params_before_adaptation,
+                             self.model.parameters(),
+                             reset_state=False)
+
+    def maybe_adapt_model_and_make_optimizer(self):
+        # If strategy has access to the task boundaries, and the current
+        # sub-experience is the first sub-experience in the online (sub-)stream,
+        # then adapt the model with the full origin experience:
+        if self.experience.access_task_boundaries:
+            if self.experience.is_first_subexp:
+                self.model = self.model_adaptation()
+                self.make_optimizer()
+        # Otherwise, adapt to the current sub-experience:
+        else:
+            self.model = self.model_adaptation()
+            self.make_optimizer()
diff --git a/avalanche/training/templates/problem_type/__init__.py b/avalanche/training/templates/problem_type/__init__.py
new file mode 100644
index 000000000..0932beb4c
--- /dev/null
+++ b/avalanche/training/templates/problem_type/__init__.py
@@ -0,0 +1,5 @@
+"""Problem types mainly define the properties and criterions depending on
+   how inputs should be mapped to outputs.
+
+"""
+from .supervised_problem import SupervisedProblem
diff --git a/avalanche/training/templates/problem_type/supervised_problem.py b/avalanche/training/templates/problem_type/supervised_problem.py
new file mode 100644
index 000000000..9432e04ef
--- /dev/null
+++ b/avalanche/training/templates/problem_type/supervised_problem.py
@@ -0,0 +1,31 @@
+from avalanche.models import avalanche_forward
+
+
+class SupervisedProblem:
+    @property
+    def mb_x(self):
+        """Current mini-batch input."""
+        return self.mbatch[0]
+
+    @property
+    def mb_y(self):
+        """Current mini-batch target."""
+        return self.mbatch[1]
+
+    @property
+    def mb_task_id(self):
+        """Current mini-batch task labels."""
+        assert len(self.mbatch) >= 3
+        return self.mbatch[-1]
+
+    def criterion(self):
+        """Loss function for supervised problems."""
+        return self._criterion(self.mb_output, self.mb_y)
+
+    def forward(self):
+        """Compute the model's output given the current mini-batch."""
+        return avalanche_forward(self.model, self.mb_x, self.mb_task_id)
+
+    def _check_minibatch(self):
+        """Check if the current mini-batch has 3 components."""
+        assert len(self.mbatch) >= 3
diff --git a/avalanche/training/templates/update_type/__init__.py b/avalanche/training/templates/update_type/__init__.py
new file mode 100644
index 000000000..3cc498524
--- /dev/null
+++ b/avalanche/training/templates/update_type/__init__.py
@@ -0,0 +1,5 @@
+"""Update types define how the model is updated for every batch of data.
+"""
+
+from .sgd_update import SGDUpdate
+from .meta_update import MetaUpdate
diff --git a/avalanche/training/templates/update_type/meta_update.py b/avalanche/training/templates/update_type/meta_update.py
new file mode 100644
index 000000000..a04559189
--- /dev/null
+++ b/avalanche/training/templates/update_type/meta_update.py
@@ -0,0 +1,27 @@
+
+class MetaUpdate:
+    def training_epoch(self, **kwargs):
+        """Training epoch.
+
+        :param kwargs:
+        :return:
+        """
+        for self.mbatch in self.dataloader:
+            if self._stop_training:
+                break
+
+            self._unpack_minibatch()
+            self._before_training_iteration(**kwargs)
+
+            self.optimizer.zero_grad()
+            self.loss = 0
+
+            # Fast updates
+            self._before_fast_update(**kwargs)
+            self._after_fast_updates(**kwargs)
+
+            # Slow updates
+            self._before_slow_update(**kwargs)
+            self._after_slow_updates(**kwargs)
+
+            self._after_training_iteration(**kwargs)
diff --git a/avalanche/training/templates/update_type/sgd_update.py b/avalanche/training/templates/update_type/sgd_update.py
new file mode 100644
index 000000000..d85365f49
--- /dev/null
+++ b/avalanche/training/templates/update_type/sgd_update.py
@@ -0,0 +1,36 @@
+
+class SGDUpdate:
+    def training_epoch(self, **kwargs):
+        """Training epoch.
+
+        :param kwargs:
+        :return:
+        """
+        for self.mbatch in self.dataloader:
+            if self._stop_training:
+                break
+
+            self._unpack_minibatch()
+            self._before_training_iteration(**kwargs)
+
+            self.optimizer.zero_grad()
+            self.loss = 0
+
+            # Forward
+            self._before_forward(**kwargs)
+            self.mb_output = self.forward()
+            self._after_forward(**kwargs)
+
+            # Loss & Backward
+            self.loss += self.criterion()
+
+            self._before_backward(**kwargs)
+            self.backward()
+            self._after_backward(**kwargs)
+
+            # Optimization step
+            self._before_update(**kwargs)
+            self.optimizer_step()
+            self._after_update(**kwargs)
+
+            self._after_training_iteration(**kwargs)

From 38e0cef559cf6a4057bcd376517074a674fdeb31 Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Tue, 12 Jul 2022 16:52:41 +0200
Subject: [PATCH 02/10] Syntax fix

---
 avalanche/training/templates/base_general_sgd.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/avalanche/training/templates/base_general_sgd.py b/avalanche/training/templates/base_general_sgd.py
index 954bfe9d0..f20a6414b 100644
--- a/avalanche/training/templates/base_general_sgd.py
+++ b/avalanche/training/templates/base_general_sgd.py
@@ -488,14 +488,16 @@ def _maybe_peval(self, strategy, counter, **kwargs):
         if self.eval_every > 0 and counter % self.eval_every == 0:
             self._peval(strategy, **kwargs)
 
-    def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs):
+    def after_training_epoch(self, strategy: "BaseGeneralSGDTemplate",
+                             **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""
         if self.peval_mode == "epoch":
             self._maybe_peval(strategy, strategy.clock.train_exp_epochs,
                               **kwargs)
 
-    def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs):
+    def after_training_iteration(self, strategy: "BaseGeneralSGDTemplate",
+                                 **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""
         if self.peval_mode == "iteration":

From 346017beb1a6d259cc3133c853063ed6118e19fa Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Sun, 24 Jul 2022 20:36:47 +0200
Subject: [PATCH 03/10] Update meta-learning template and add examples

---
 avalanche/NEW_core.py                         | 226 ++++++++++++++++
 avalanche/training/plugins/NEW_lamaml.py      | 251 ++++++++++++++++++
 ...ppers_temp.py => NEW_strategy_wrappers.py} |   2 +-
 .../{base_general_sgd.py => NEW_base_sgd.py}  |   3 +-
 ...n_templates.py => NEW_common_templates.py} | 129 ++++++++-
 .../templates/update_type/meta_update.py      |  36 ++-
 examples/NEW_lamaml.py                        |  73 +++++
 examples/NEW_naive.py                         |  59 ++++
 examples/NEW_online_naive.py                  | 137 ++++++++++
 9 files changed, 900 insertions(+), 16 deletions(-)
 create mode 100644 avalanche/NEW_core.py
 create mode 100644 avalanche/training/plugins/NEW_lamaml.py
 rename avalanche/training/supervised/{strategy_wrappers_temp.py => NEW_strategy_wrappers.py} (98%)
 rename avalanche/training/templates/{base_general_sgd.py => NEW_base_sgd.py} (99%)
 rename avalanche/training/templates/{common_templates.py => NEW_common_templates.py} (63%)
 create mode 100644 examples/NEW_lamaml.py
 create mode 100644 examples/NEW_naive.py
 create mode 100644 examples/NEW_online_naive.py

diff --git a/avalanche/NEW_core.py b/avalanche/NEW_core.py
new file mode 100644
index 000000000..baf59efb6
--- /dev/null
+++ b/avalanche/NEW_core.py
@@ -0,0 +1,226 @@
+from abc import ABC
+from typing import TypeVar, Generic
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from avalanche.training.templates.base import BaseTemplate
+
+CallbackResult = TypeVar("CallbackResult")
+Template = TypeVar("Template", bound="BaseTemplate")
+
+
+class BasePlugin(Generic[Template], ABC):
+    """ABC for BaseTemplate plugins.
+
+    A plugin is simply an object implementing some strategy callbacks.
+    Plugins are called automatically during the strategy execution.
+
+    Callbacks provide access before/after each phase of the execution.
+    In general, for each method of the training and evaluation loops,
+    `StrategyCallbacks`
+    provide two functions `before_{method}` and `after_{method}`, called
+    before and after the method, respectively.
+    Therefore plugins can "inject" additional code by implementing callbacks.
+    Each callback has a `strategy` argument that gives access to the state.
+
+    In Avalanche, callbacks are used to implement continual strategies, metrics
+    and loggers.
+    """
+
+    def __init__(self):
+        pass
+
+    def before_training(self, strategy: Template, *args, **kwargs):
+        """Called before `train` by the `BaseTemplate`."""
+        pass
+
+    def before_training_exp(self, strategy: Template, *args, **kwargs):
+        """Called before `train_exp` by the `BaseTemplate`."""
+        pass
+
+    def after_training_exp(self, strategy: Template, *args, **kwargs):
+        """Called after `train_exp` by the `BaseTemplate`."""
+        pass
+
+    def after_training(self, strategy: Template, *args, **kwargs):
+        """Called after `train` by the `BaseTemplate`."""
+        pass
+
+    def before_eval(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `eval` by the `BaseTemplate`."""
+        pass
+
+    def before_eval_exp(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `eval_exp` by the `BaseTemplate`."""
+        pass
+
+    def after_eval_exp(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `eval_exp` by the `BaseTemplate`."""
+        pass
+
+    def after_eval(self, strategy: Template, *args, **kwargs) -> CallbackResult:
+        """Called after `eval` by the `BaseTemplate`."""
+        pass
+
+
+class BaseSGDPlugin(BasePlugin[Template], ABC):
+    """ABC for BaseSGDTemplate plugins.
+
+    See `BaseSGDTemplate` for complete description of the train/eval loop.
+    """
+
+    def before_training_epoch(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `train_epoch` by the `BaseTemplate`."""
+        pass
+
+    def before_training_iteration(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before the start of a training iteration by the
+        `BaseTemplate`."""
+        pass
+
+    def before_forward(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `model.forward()` by the `BaseTemplate`."""
+        pass
+
+    def after_forward(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `model.forward()` by the `BaseTemplate`."""
+        pass
+
+    def before_backward(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `criterion.backward()` by the `BaseTemplate`."""
+        pass
+
+    def after_backward(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `criterion.backward()` by the `BaseTemplate`."""
+        pass
+
+    def after_training_iteration(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after the end of a training iteration by the
+        `BaseTemplate`."""
+        pass
+
+    def before_update(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `optimizer.update()` by the `BaseTemplate`."""
+        pass
+
+    def after_update(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `optimizer.update()` by the `BaseTemplate`."""
+        pass
+
+    def after_training_epoch(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `train_epoch` by the `BaseTemplate`."""
+        pass
+
+    def before_eval_iteration(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before the start of a training iteration by the
+        `BaseTemplate`."""
+        pass
+
+    def before_eval_forward(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `model.forward()` by the `BaseTemplate`."""
+        pass
+
+    def after_eval_forward(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `model.forward()` by the `BaseTemplate`."""
+        pass
+
+    def after_eval_iteration(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after the end of an iteration by the
+        `BaseTemplate`."""
+        pass
+
+    def before_train_dataset_adaptation(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `train_dataset_adapatation` by the `BaseTemplate`."""
+        pass
+
+    def after_train_dataset_adaptation(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `train_dataset_adapatation` by the `BaseTemplate`."""
+        pass
+
+    def before_eval_dataset_adaptation(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `eval_dataset_adaptation` by the `BaseTemplate`."""
+        pass
+
+    def after_eval_dataset_adaptation(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called after `eval_dataset_adaptation` by the `BaseTemplate`."""
+        pass
+
+    # ====================================================================> NEW
+
+    def before_inner_updates(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_inner_updates` by the `BaseTemplate`."""
+        pass
+
+    def inner_updates(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_inner_updates` by the `BaseTemplate`."""
+        pass
+
+    def after_inner_updates(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_outer_updates` by the `BaseTemplate`."""
+        pass
+
+    def before_outer_update(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_outer_updates` by the `BaseTemplate`."""
+        pass
+
+    def outer_update(
+            self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_inner_updates` by the `BaseTemplate`."""
+        pass
+
+    def after_outer_update(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_outer_updates` by the `BaseTemplate`."""
+        pass
diff --git a/avalanche/training/plugins/NEW_lamaml.py b/avalanche/training/plugins/NEW_lamaml.py
new file mode 100644
index 000000000..a44e8057d
--- /dev/null
+++ b/avalanche/training/plugins/NEW_lamaml.py
@@ -0,0 +1,251 @@
+from typing import TYPE_CHECKING
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import math
+
+try:
+    import higher
+except ImportError:
+    raise ModuleNotFoundError("higher not found, if you want to use "
+                              "MAML please install avalanche with "
+                              "the extra dependencies: "
+                              "pip install avalanche-lib[extra]")
+
+from avalanche.NEW_core import BaseSGDPlugin
+from avalanche.models.utils import avalanche_forward
+
+
+class LaMAMLPlugin(BaseSGDPlugin):
+    """LaMAML Plugin.
+    """
+
+    def __init__(
+            self,
+            n_inner_updates: int = 5,
+            second_order: bool = True,
+            grad_clip_norm: float = 1.0,
+            learn_lr: bool = True,
+            lr_alpha: float = 0.25,
+            sync_update: bool = False,
+            alpha_init: float = 0.1,
+    ):
+        """Implementation of Look-ahead MAML (LaMAML) algorithm in Avalanche
+            using Higher library for applying fast updates.
+
+        :param n_inner_updates: number of inner updates.
+        :param second_order: If True, it computes the second-order derivative
+               of the inner update trajectory for the meta-loss. Otherwise,
+               it computes the meta-loss with a first-order approximation.
+        :param grad_clip_norm: gradient clipping norm.
+        :param learn_lr: if True, it learns the LR for each batch of data.
+        :param lr_alpha: LR for learning the main update's learning rate.
+        :param sync_update: if True, it updates the meta-model with a fixed
+                            learning rate. Mutually exclusive with learn_lr and
+                            lr_alpha.
+        :param alpha_init: initialization value for learnable LRs.
+
+        """
+
+        super().__init__()
+
+        self.n_inner_updates = n_inner_updates
+        self.second_order = second_order
+        self.grad_clip_norm = grad_clip_norm
+        self.learn_lr = learn_lr
+        self.lr_alpha = lr_alpha
+        self.sync_update = sync_update
+        self.alpha_init = alpha_init
+        self.alpha_params = None
+        self.is_model_initialized = False
+
+    def before_training(self, strategy, **kwargs):
+        if not self.is_model_initialized:
+            strategy.model.apply(init_kaiming_normal)
+            self.is_model_initialized = True
+
+    def before_training_exp(self, strategy, **kwargs):
+        # Initialize alpha-lr parameters
+        if self.alpha_params is None:
+            self.alpha_params = nn.ParameterList([])
+            # Iterate through model parameters and add the corresponding
+            # alpha_lr parameter
+            for p in strategy.model.parameters():
+                alpha_param = nn.Parameter(
+                    torch.ones(p.shape) * self.alpha_init, requires_grad=True
+                )
+                self.alpha_params.append(alpha_param)
+            self.alpha_params.to(strategy.device)
+
+            # Create optimizer for the alpha_lr parameters
+            self.optimizer_alpha = torch.optim.SGD(
+                self.alpha_params.parameters(), lr=self.lr_alpha
+            )
+
+        # For task-incremental heads:
+        # If new parameters are added to the model, update alpha_lr
+        # parameters respectively
+        if len(self.alpha_params) < len(list(strategy.model.parameters())):
+            for iter_p, p in enumerate(strategy.model.parameters()):
+                # Skip the older parameters
+                if iter_p < len(self.alpha_params):
+                    continue
+                # Add new alpha_lr for the new parameter
+                alpha_param = nn.Parameter(
+                    torch.ones(p.shape) * self.alpha_init, requires_grad=True
+                )
+                self.alpha_params.append(alpha_param)
+
+            self.alpha_params.to(strategy.device)
+            # Re-init optimizer for the new set of alpha_lr parameters
+            self.optimizer_alpha = torch.optim.SGD(
+                self.alpha_params.parameters(), lr=self.lr_alpha
+            )
+
+    def before_inner_updates(self, strategy, **kwargs):
+        # Create a stateless copy of the model for inner-updates
+        self.fast_model = higher.patch.monkeypatch(
+            strategy.model,
+            copy_initial_weights=True,
+            track_higher_grads=self.second_order,
+        )
+        if strategy.clock.train_exp_counter > 0:
+            self.batch_x = strategy.mb_x[: strategy.train_mb_size]
+            self.batch_y = strategy.mb_y[: strategy.train_mb_size]
+            self.batch_t = strategy.mb_task_id[: strategy.train_mb_size]
+        else:
+            self.batch_x = strategy.mb_x
+            self.batch_y = strategy.mb_y
+            self.batch_t = strategy.mb_task_id
+
+        bsize_data = self.batch_x.shape[0]
+        self.rough_sz = math.ceil(bsize_data / self.n_inner_updates)
+        self.meta_losses = [0 for _ in range(self.n_inner_updates)]
+
+    def single_inner_update(self, x, y, t, criterion):
+        logits = avalanche_forward(self.fast_model, x, t)
+        loss = criterion(logits, y)
+
+        # Compute gradient with respect to the current fast weights
+        grads = list(
+            torch.autograd.grad(
+                loss,
+                self.fast_model.fast_params,
+                create_graph=self.second_order,
+                retain_graph=self.second_order,
+                allow_unused=True,
+            )
+        )
+
+        # Clip grad norms
+        grads = [
+            torch.clamp(g, min=-self.grad_clip_norm, max=self.grad_clip_norm)
+            if g is not None
+            else g
+            for g in grads
+        ]
+
+        # New fast parameters
+        new_fast_params = [
+            param - alpha * grad if grad is not None else param
+            for (param, alpha, grad) in zip(
+                self.fast_model.fast_params, self.alpha_params.parameters(),
+                grads
+            )
+        ]
+
+        # Update fast model's weights
+        self.fast_model.update_params(new_fast_params)
+
+    def inner_updates(self, strategy, **kwargs):
+        """Update fast weights using current samples and
+                return the updated fast model.
+                """
+        for i in range(self.n_inner_updates):
+            batch_x_i = self.batch_x[i * self.rough_sz:
+                                     (i + 1) * self.rough_sz]
+            batch_y_i = self.batch_y[i * self.rough_sz:
+                                     (i + 1) * self.rough_sz]
+            batch_t_i = self.batch_t[i * self.rough_sz:
+                                     (i + 1) * self.rough_sz]
+
+            # We assume that samples for inner update are from the same task
+            self.single_inner_update(batch_x_i, batch_y_i, batch_t_i,
+                                     strategy._criterion)
+
+            # Compute meta-loss with the combination of batch and buffer samples
+            logits_meta = avalanche_forward(
+                self.fast_model, strategy.mb_x, strategy.mb_task_id
+            )
+            meta_loss = strategy._criterion(logits_meta, strategy.mb_y)
+            self.meta_losses[i] = meta_loss
+
+    def apply_grad(self, module, grads, device):
+        for i, p in enumerate(module.parameters()):
+            grad = grads[i]
+            if grad is None:
+                grad = torch.zeros(p.shape).float().to(device)
+
+            if p.grad is None:
+                p.grad = grad
+            else:
+                p.grad += grad
+
+    def outer_update(self, strategy, **kwargs):
+        # Compute meta-gradient for the main model
+        meta_loss = sum(self.meta_losses) / len(self.meta_losses)
+        meta_grad_model = torch.autograd.grad(
+            meta_loss,
+            self.fast_model.parameters(time=0),
+            retain_graph=True,
+            allow_unused=True,
+        )
+        strategy.model.zero_grad()
+        self.apply_grad(strategy.model, meta_grad_model, strategy.device)
+
+        # Clip gradients
+        torch.nn.utils.clip_grad_norm_(
+            strategy.model.parameters(), self.grad_clip_norm
+        )
+
+        if self.learn_lr:
+            # Compute meta-gradient for alpha-lr parameters
+            meta_grad_alpha = torch.autograd.grad(
+                meta_loss, self.alpha_params.parameters(), allow_unused=True
+            )
+            self.alpha_params.zero_grad()
+            self.apply_grad(self.alpha_params, meta_grad_alpha, strategy.device)
+
+            torch.nn.utils.clip_grad_norm_(
+                self.alpha_params.parameters(), self.grad_clip_norm
+            )
+            self.optimizer_alpha.step()
+
+        # If sync-update: update with self.optimizer
+        # o.w: use the learned LRs to update the model
+        if self.sync_update:
+            self.optimizer.step()
+        else:
+            for p, alpha in zip(
+                    strategy.model.parameters(), self.alpha_params.parameters()
+            ):
+                # Use relu on updated LRs to avoid negative values
+                p.data = p.data - p.grad * F.relu(alpha)
+
+        strategy.loss = meta_loss
+
+
+def init_kaiming_normal(m):
+    if isinstance(m, nn.Conv2d):
+        torch.nn.init.constant_(m.weight.data, 1.0)
+        torch.nn.init.kaiming_normal_(m.weight.data)
+        if m.bias is not None:
+            m.bias.data.zero_()
+
+    elif isinstance(m, nn.Linear):
+        torch.nn.init.constant_(m.weight.data, 1.0)
+        torch.nn.init.kaiming_normal_(m.weight.data)
+        if m.bias is not None:
+            m.bias.data.zero_()
diff --git a/avalanche/training/supervised/strategy_wrappers_temp.py b/avalanche/training/supervised/NEW_strategy_wrappers.py
similarity index 98%
rename from avalanche/training/supervised/strategy_wrappers_temp.py
rename to avalanche/training/supervised/NEW_strategy_wrappers.py
index 19bcbba36..e5abd5119 100644
--- a/avalanche/training/supervised/strategy_wrappers_temp.py
+++ b/avalanche/training/supervised/NEW_strategy_wrappers.py
@@ -3,7 +3,7 @@
 from torch.optim import Optimizer
 
 from avalanche.training.plugins import EvaluationPlugin
-from avalanche.training.templates.common_templates import (
+from avalanche.training.templates.NEW_common_templates import (
     SupervisedTemplate,
     OnlineSupervisedTemplate
 )
diff --git a/avalanche/training/templates/base_general_sgd.py b/avalanche/training/templates/NEW_base_sgd.py
similarity index 99%
rename from avalanche/training/templates/base_general_sgd.py
rename to avalanche/training/templates/NEW_base_sgd.py
index f20a6414b..8b5545aa3 100644
--- a/avalanche/training/templates/base_general_sgd.py
+++ b/avalanche/training/templates/NEW_base_sgd.py
@@ -18,7 +18,7 @@
 from avalanche.training.utils import trigger_plugins
 
 
-class BaseGeneralSGDTemplate(BaseTemplate):
+class BaseSGDTemplate(BaseTemplate):
     """Base SGD class for continual learning skeletons.
 
     **Training loop**
@@ -142,6 +142,7 @@ def train(self,
               eval_streams: Optional[Sequence[Union[CLExperience,
                                                     ExpSequence]]] = None,
               **kwargs):
+
         super().train(experiences, eval_streams, **kwargs)
         return self.evaluator.get_last_metrics()
 
diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/NEW_common_templates.py
similarity index 63%
rename from avalanche/training/templates/common_templates.py
rename to avalanche/training/templates/NEW_common_templates.py
index f5b055e17..eb3fdd6be 100644
--- a/avalanche/training/templates/common_templates.py
+++ b/avalanche/training/templates/NEW_common_templates.py
@@ -3,17 +3,17 @@
 from torch.nn import Module, CrossEntropyLoss
 from torch.optim import Optimizer
 
-from avalanche.training.plugins import SupervisedPlugin
+from avalanche.core import BaseSGDPlugin
 from avalanche.training.plugins.evaluation import default_evaluator
 
 from .observation_type import *
 from .problem_type import *
 from .update_type import *
-from .base_general_sgd import BaseGeneralSGDTemplate
+from .NEW_base_sgd import BaseSGDTemplate
 
 
 class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate,
-                         BaseGeneralSGDTemplate):
+                         BaseSGDTemplate):
     """Base class for continual learning strategies.
 
     BaseTemplate is the super class of all task-based continual learning
@@ -60,7 +60,7 @@ class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate,
 
     """
 
-    PLUGIN_CLASS = SupervisedPlugin
+    PLUGIN_CLASS = BaseSGDPlugin
 
     def __init__(
             self,
@@ -71,7 +71,120 @@ def __init__(
             train_epochs: int = 1,
             eval_mb_size: Optional[int] = 1,
             device="cpu",
-            plugins: Optional[Sequence["SupervisedPlugin"]] = None,
+            plugins: Optional[Sequence["BaseSGDPlugin"]] = None,
+            evaluator=default_evaluator,
+            eval_every=-1,
+            peval_mode="epoch",
+    ):
+        """Init.
+
+        :param model: PyTorch model.
+        :param optimizer: PyTorch optimizer.
+        :param criterion: loss function.
+        :param train_mb_size: mini-batch size for training.
+        :param train_epochs: number of training epochs.
+        :param eval_mb_size: mini-batch size for eval.
+        :param device: PyTorch device where the model will be allocated.
+        :param plugins: (optional) list of StrategyPlugins.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations. None to remove logging.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
+            periodic evaluation during training should execute every
+            `eval_every` epochs or iterations (Default='epoch').
+        """
+        super().__init__(
+            model=model,
+            optimizer=optimizer,
+            criterion=criterion,
+            train_mb_size=train_mb_size,
+            train_epochs=train_epochs,
+            eval_mb_size=eval_mb_size,
+            device=device,
+            plugins=plugins,
+            evaluator=evaluator,
+            eval_every=eval_every,
+            peval_mode=peval_mode,
+        )
+        ###################################################################
+        # State variables. These are updated during the train/eval loops. #
+        ###################################################################
+
+        # self.adapted_dataset = None
+        # """ Data used to train. It may be modified by plugins. Plugins can
+        # append data to it (e.g. for replay).
+        #
+        # .. note::
+        #
+        #    This dataset may contain samples from different experiences. If you
+        #    want the original data for the current experience
+        #    use :attr:`.BaseTemplate.experience`.
+
+
+class SupervisedMetaLearningTemplate(BatchObservation, SupervisedProblem,
+                                     MetaUpdate, BaseSGDTemplate):
+    """Base class for continual learning strategies.
+
+    BaseTemplate is the super class of all task-based continual learning
+    strategies. It implements a basic training loop and callback system
+    that allows to execute code at each experience of the training loop.
+    Plugins can be used to implement callbacks to augment the training
+    loop with additional behavior (e.g. a memory buffer for replay).
+
+    **Scenarios**
+    This strategy supports several continual learning scenarios:
+
+    * class-incremental scenarios (no task labels)
+    * multi-task scenarios, where task labels are provided)
+    * multi-incremental scenarios, where the same task may be revisited
+
+    The exact scenario depends on the data stream and whether it provides
+    the task labels.
+
+    **Training loop**
+    The training loop is organized as follows::
+
+        train
+            train_exp  # for each experience
+                adapt_train_dataset
+                train_dataset_adaptation
+                make_train_dataloader
+                train_epoch  # for each epoch
+                    # forward
+                    # backward
+                    # model update
+
+    **Evaluation loop**
+    The evaluation loop is organized as follows::
+
+        eval
+            eval_exp  # for each experience
+                adapt_eval_dataset
+                eval_dataset_adaptation
+                make_eval_dataloader
+                eval_epoch  # for each epoch
+                    # forward
+                    # backward
+                    # model update
+
+    """
+
+    PLUGIN_CLASS = BaseSGDPlugin
+
+    def __init__(
+            self,
+            model: Module,
+            optimizer: Optimizer,
+            criterion=CrossEntropyLoss(),
+            train_mb_size: int = 1,
+            train_epochs: int = 1,
+            eval_mb_size: Optional[int] = 1,
+            device="cpu",
+            plugins: Optional[Sequence["BaseSGDPlugin"]] = None,
             evaluator=default_evaluator,
             eval_every=-1,
             peval_mode="epoch",
@@ -126,7 +239,7 @@ def __init__(
 
 
 class OnlineSupervisedTemplate(OnlineObservation, SupervisedProblem, SGDUpdate,
-                               BaseGeneralSGDTemplate):
+                               BaseSGDTemplate):
     """Base class for continual learning strategies.
 
     BaseTemplate is the super class of all task-based continual learning
@@ -173,7 +286,7 @@ class OnlineSupervisedTemplate(OnlineObservation, SupervisedProblem, SGDUpdate,
 
     """
 
-    PLUGIN_CLASS = SupervisedPlugin
+    PLUGIN_CLASS = BaseSGDPlugin
 
     def __init__(
             self,
@@ -184,7 +297,7 @@ def __init__(
             train_passes: int = 1,
             eval_mb_size: Optional[int] = 1,
             device="cpu",
-            plugins: Optional[Sequence["SupervisedPlugin"]] = None,
+            plugins: Optional[Sequence["BaseSGDPlugin"]] = None,
             evaluator=default_evaluator,
             eval_every=-1,
             peval_mode="experience",
diff --git a/avalanche/training/templates/update_type/meta_update.py b/avalanche/training/templates/update_type/meta_update.py
index a04559189..cc1090385 100644
--- a/avalanche/training/templates/update_type/meta_update.py
+++ b/avalanche/training/templates/update_type/meta_update.py
@@ -1,3 +1,5 @@
+from avalanche.training.utils import trigger_plugins
+
 
 class MetaUpdate:
     def training_epoch(self, **kwargs):
@@ -16,12 +18,34 @@ def training_epoch(self, **kwargs):
             self.optimizer.zero_grad()
             self.loss = 0
 
-            # Fast updates
-            self._before_fast_update(**kwargs)
-            self._after_fast_updates(**kwargs)
+            # Inner updates
+            self._before_inner_updates(**kwargs)
+            self._inner_updates(**kwargs)
+            self._after_inner_updates(**kwargs)
+
+            # Outer update
+            self._before_outer_update(**kwargs)
+            self._outer_update(**kwargs)
+            self._after_outer_update(**kwargs)
 
-            # Slow updates
-            self._before_slow_update(**kwargs)
-            self._after_slow_updates(**kwargs)
+            self.mb_output = self.forward()
 
             self._after_training_iteration(**kwargs)
+
+    def _before_inner_updates(self, **kwargs):
+        trigger_plugins(self, "before_inner_updates", **kwargs)
+
+    def _inner_updates(self, **kwargs):
+        trigger_plugins(self, "inner_updates", **kwargs)
+
+    def _after_inner_updates(self, **kwargs):
+        trigger_plugins(self, "after_inner_updates", **kwargs)
+
+    def _before_outer_update(self, **kwargs):
+        trigger_plugins(self, "before_outer_update", **kwargs)
+
+    def _outer_update(self, **kwargs):
+        trigger_plugins(self, "outer_update", **kwargs)
+
+    def _after_outer_update(self, **kwargs):
+        trigger_plugins(self, "after_outer_update", **kwargs)
diff --git a/examples/NEW_lamaml.py b/examples/NEW_lamaml.py
new file mode 100644
index 000000000..ec456461d
--- /dev/null
+++ b/examples/NEW_lamaml.py
@@ -0,0 +1,73 @@
+import torch
+from os.path import expanduser
+
+from avalanche.models import MTSimpleMLP
+from avalanche.evaluation.metrics import (
+    accuracy_metrics,
+    loss_metrics,
+)
+from avalanche.training.plugins import EvaluationPlugin
+from avalanche.benchmarks.classic import SplitMNIST
+from avalanche.logging import InteractiveLogger
+from avalanche.training.templates.NEW_common_templates import (
+    SupervisedMetaLearningTemplate
+)
+from avalanche.training.plugins.NEW_lamaml import LaMAMLPlugin
+
+
+def main():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+
+    scenario = SplitMNIST(
+        n_experiences=5,
+        dataset_root=expanduser("~") + "/.avalanche/data/mnist/",
+        return_task_id=True
+    )
+
+    # choose some metrics and evaluation method
+    interactive_logger = InteractiveLogger()
+    eval_plugin = EvaluationPlugin(
+        accuracy_metrics(
+            minibatch=True, epoch=True, experience=True, stream=True
+        ),
+        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        loggers=[interactive_logger],
+    )
+
+    model = MTSimpleMLP(hidden_size=128)
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+    criterion = torch.nn.CrossEntropyLoss()
+
+    # LaMAML plugin
+    lamaml_plugin = LaMAMLPlugin(
+        n_inner_updates=5,
+        second_order=True,
+        grad_clip_norm=1.0,
+        learn_lr=True,
+        lr_alpha=0.25,
+        sync_update=False,
+        alpha_init=0.1,
+    )
+
+    # create strategy
+    strategy = SupervisedMetaLearningTemplate(
+        model,
+        optimizer,
+        criterion,
+        train_epochs=1,
+        device=device,
+        train_mb_size=32,
+        evaluator=eval_plugin,
+        plugins=[lamaml_plugin]
+    )
+
+    # train on the selected scenario with the chosen strategy
+    for experience in scenario.train_stream:
+        print("Start training on experience ", experience.current_experience)
+        strategy.train(experience)
+        strategy.eval(scenario.test_stream[:])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/NEW_naive.py b/examples/NEW_naive.py
new file mode 100644
index 000000000..5aa29388a
--- /dev/null
+++ b/examples/NEW_naive.py
@@ -0,0 +1,59 @@
+import torch
+from os.path import expanduser
+
+from avalanche.models import SimpleMLP
+from avalanche.evaluation.metrics import (
+    accuracy_metrics,
+    loss_metrics,
+)
+from avalanche.training.plugins import EvaluationPlugin
+from avalanche.benchmarks.classic import SplitMNIST
+from avalanche.logging import InteractiveLogger
+from avalanche.training.supervised.NEW_strategy_wrappers import (
+    Naive
+)
+
+
+def main():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+
+    scenario = SplitMNIST(
+        n_experiences=5,
+        dataset_root=expanduser("~") + "/.avalanche/data/mnist/"
+    )
+
+    # choose some metrics and evaluation method
+    interactive_logger = InteractiveLogger()
+    eval_plugin = EvaluationPlugin(
+        accuracy_metrics(
+            minibatch=True, epoch=True, experience=True, stream=True
+        ),
+        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        loggers=[interactive_logger],
+    )
+
+    model = SimpleMLP(hidden_size=128)
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+    criterion = torch.nn.CrossEntropyLoss()
+
+    # create strategy
+    strategy = Naive(
+        model,
+        optimizer,
+        criterion,
+        train_epochs=1,
+        device=device,
+        train_mb_size=32,
+        evaluator=eval_plugin,
+    )
+
+    # train on the selected scenario with the chosen strategy
+    for experience in scenario.train_stream:
+        print("Start training on experience ", experience.current_experience)
+        strategy.train(experience)
+        strategy.eval(scenario.test_stream[:])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/NEW_online_naive.py b/examples/NEW_online_naive.py
new file mode 100644
index 000000000..249bf9437
--- /dev/null
+++ b/examples/NEW_online_naive.py
@@ -0,0 +1,137 @@
+################################################################################
+# Copyright (c) 2021 ContinualAI.                                              #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 12-10-2020                                                             #
+# Author(s): Vincenzo Lomonaco, Hamed Hemati                                   #
+# E-mail: contact@continualai.org                                              #
+# Website: avalanche.continualai.org                                           #
+################################################################################
+
+"""
+This is a simple example on how to use the Naive strategy.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from os.path import expanduser
+
+import argparse
+import torch
+from torch.nn import CrossEntropyLoss
+from torchvision import transforms
+from torchvision.datasets import MNIST
+from torchvision.transforms import ToTensor, RandomCrop
+import torch.optim.lr_scheduler
+from avalanche.benchmarks import nc_benchmark
+from avalanche.models import SimpleMLP
+from avalanche.training.supervised.NEW_strategy_wrappers import OnlineNaive
+from avalanche.benchmarks.scenarios.online_scenario import OnlineCLScenario
+from avalanche.evaluation.metrics import (
+    forgetting_metrics,
+    accuracy_metrics,
+    loss_metrics,
+)
+from avalanche.logging import InteractiveLogger
+from avalanche.training.plugins import EvaluationPlugin
+
+
+def main(args):
+    # --- CONFIG
+    device = torch.device(
+        f"cuda:{args.cuda}"
+        if torch.cuda.is_available() and args.cuda >= 0
+        else "cpu"
+    )
+    n_batches = 5
+    # ---------
+
+    # --- TRANSFORMATIONS
+    train_transform = transforms.Compose(
+        [
+            RandomCrop(28, padding=4),
+            ToTensor(),
+            transforms.Normalize((0.1307,), (0.3081,)),
+        ]
+    )
+    test_transform = transforms.Compose(
+        [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+    )
+    # ---------
+
+    # --- SCENARIO CREATION
+    mnist_train = MNIST(
+        root=expanduser("~") + "/.avalanche/data/mnist/",
+        train=True,
+        download=True,
+        transform=train_transform,
+    )
+    mnist_test = MNIST(
+        root=expanduser("~") + "/.avalanche/data/mnist/",
+        train=False,
+        download=True,
+        transform=test_transform,
+    )
+    scenario = nc_benchmark(
+        mnist_train, mnist_test, n_batches, task_labels=False, seed=1234
+    )
+    # ---------
+
+    # MODEL CREATION
+    model = SimpleMLP(num_classes=scenario.n_classes)
+
+    # choose some metrics and evaluation method
+    interactive_logger = InteractiveLogger()
+
+    eval_plugin = EvaluationPlugin(
+        accuracy_metrics(
+            minibatch=True, epoch=True, experience=True, stream=True
+        ),
+        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        forgetting_metrics(experience=True),
+        loggers=[interactive_logger],
+    )
+
+    # CREATE THE STRATEGY INSTANCE (ONLINE-NAIVE)
+    cl_strategy = OnlineNaive(
+        model,
+        torch.optim.Adam(model.parameters(), lr=0.1),
+        CrossEntropyLoss(),
+        train_passes=1,
+        train_mb_size=1,
+        eval_mb_size=32,
+        device=device,
+        evaluator=eval_plugin,
+    )
+
+    # TRAINING LOOP
+    print("Starting experiment...")
+    results = []
+
+    # Create online benchmark
+    batch_streams = scenario.streams.values()
+    # ocl_benchmark = OnlineCLScenario(batch_streams)
+    for i, exp in enumerate(scenario.train_stream):
+        # Create online scenario from experience exp
+        ocl_benchmark = OnlineCLScenario(original_streams=batch_streams,
+                                         experiences=exp,
+                                         experience_size=1,
+                                         access_task_boundaries=True)
+        # Train on the online train stream of the scenario
+        cl_strategy.train(ocl_benchmark.train_stream)
+        results.append(cl_strategy.eval(scenario.original_test_stream))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--cuda",
+        type=int,
+        default=0,
+        help="Select zero-indexed cuda device. -1 to use CPU.",
+    )
+    args = parser.parse_args()
+    main(args)

From 6c5bfb0ed5d47657a4de15813aa220a2aa0e19a2 Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Sun, 24 Jul 2022 20:42:17 +0200
Subject: [PATCH 04/10] Fix syntax

---
 avalanche/training/templates/NEW_base_sgd.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/avalanche/training/templates/NEW_base_sgd.py b/avalanche/training/templates/NEW_base_sgd.py
index 8b5545aa3..41c2f57e5 100644
--- a/avalanche/training/templates/NEW_base_sgd.py
+++ b/avalanche/training/templates/NEW_base_sgd.py
@@ -489,7 +489,7 @@ def _maybe_peval(self, strategy, counter, **kwargs):
         if self.eval_every > 0 and counter % self.eval_every == 0:
             self._peval(strategy, **kwargs)
 
-    def after_training_epoch(self, strategy: "BaseGeneralSGDTemplate",
+    def after_training_epoch(self, strategy: "BaseSGDTemplate",
                              **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""
@@ -497,7 +497,7 @@ def after_training_epoch(self, strategy: "BaseGeneralSGDTemplate",
             self._maybe_peval(strategy, strategy.clock.train_exp_epochs,
                               **kwargs)
 
-    def after_training_iteration(self, strategy: "BaseGeneralSGDTemplate",
+    def after_training_iteration(self, strategy: "BaseSGDTemplate",
                                  **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""

From 3e0c54a4ce7c88873f21dca96a1632ec60397700 Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Wed, 31 Aug 2022 17:01:18 +0200
Subject: [PATCH 05/10] Remove `NEW_` prefixes and split plugin templates

---
 avalanche/NEW_core.py                         | 226 --------
 avalanche/core.py                             |  30 +
 .../supervised/NEW_strategy_wrappers.py       | 103 ----
 .../training/supervised/strategy_wrappers.py  |   2 +-
 .../supervised/strategy_wrappers_online.py    |   6 +-
 avalanche/training/templates/NEW_base_sgd.py  | 519 ------------------
 avalanche/training/templates/__init__.py      |   8 +-
 avalanche/training/templates/base_sgd.py      | 303 ++++++----
 ...ommon_templates.py => common_templates.py} |   2 +-
 examples/NEW_lamaml.py                        |   2 +-
 examples/NEW_online_naive.py                  | 137 -----
 examples/lamaml_cifar100.py                   |   2 +-
 examples/{NEW_naive.py => naive.py}           |   2 +-
 13 files changed, 251 insertions(+), 1091 deletions(-)
 delete mode 100644 avalanche/NEW_core.py
 delete mode 100644 avalanche/training/supervised/NEW_strategy_wrappers.py
 delete mode 100644 avalanche/training/templates/NEW_base_sgd.py
 rename avalanche/training/templates/{NEW_common_templates.py => common_templates.py} (99%)
 delete mode 100644 examples/NEW_online_naive.py
 rename examples/{NEW_naive.py => naive.py} (96%)

diff --git a/avalanche/NEW_core.py b/avalanche/NEW_core.py
deleted file mode 100644
index baf59efb6..000000000
--- a/avalanche/NEW_core.py
+++ /dev/null
@@ -1,226 +0,0 @@
-from abc import ABC
-from typing import TypeVar, Generic
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from avalanche.training.templates.base import BaseTemplate
-
-CallbackResult = TypeVar("CallbackResult")
-Template = TypeVar("Template", bound="BaseTemplate")
-
-
-class BasePlugin(Generic[Template], ABC):
-    """ABC for BaseTemplate plugins.
-
-    A plugin is simply an object implementing some strategy callbacks.
-    Plugins are called automatically during the strategy execution.
-
-    Callbacks provide access before/after each phase of the execution.
-    In general, for each method of the training and evaluation loops,
-    `StrategyCallbacks`
-    provide two functions `before_{method}` and `after_{method}`, called
-    before and after the method, respectively.
-    Therefore plugins can "inject" additional code by implementing callbacks.
-    Each callback has a `strategy` argument that gives access to the state.
-
-    In Avalanche, callbacks are used to implement continual strategies, metrics
-    and loggers.
-    """
-
-    def __init__(self):
-        pass
-
-    def before_training(self, strategy: Template, *args, **kwargs):
-        """Called before `train` by the `BaseTemplate`."""
-        pass
-
-    def before_training_exp(self, strategy: Template, *args, **kwargs):
-        """Called before `train_exp` by the `BaseTemplate`."""
-        pass
-
-    def after_training_exp(self, strategy: Template, *args, **kwargs):
-        """Called after `train_exp` by the `BaseTemplate`."""
-        pass
-
-    def after_training(self, strategy: Template, *args, **kwargs):
-        """Called after `train` by the `BaseTemplate`."""
-        pass
-
-    def before_eval(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `eval` by the `BaseTemplate`."""
-        pass
-
-    def before_eval_exp(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `eval_exp` by the `BaseTemplate`."""
-        pass
-
-    def after_eval_exp(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `eval_exp` by the `BaseTemplate`."""
-        pass
-
-    def after_eval(self, strategy: Template, *args, **kwargs) -> CallbackResult:
-        """Called after `eval` by the `BaseTemplate`."""
-        pass
-
-
-class BaseSGDPlugin(BasePlugin[Template], ABC):
-    """ABC for BaseSGDTemplate plugins.
-
-    See `BaseSGDTemplate` for complete description of the train/eval loop.
-    """
-
-    def before_training_epoch(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `train_epoch` by the `BaseTemplate`."""
-        pass
-
-    def before_training_iteration(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before the start of a training iteration by the
-        `BaseTemplate`."""
-        pass
-
-    def before_forward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `model.forward()` by the `BaseTemplate`."""
-        pass
-
-    def after_forward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `model.forward()` by the `BaseTemplate`."""
-        pass
-
-    def before_backward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `criterion.backward()` by the `BaseTemplate`."""
-        pass
-
-    def after_backward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `criterion.backward()` by the `BaseTemplate`."""
-        pass
-
-    def after_training_iteration(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after the end of a training iteration by the
-        `BaseTemplate`."""
-        pass
-
-    def before_update(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `optimizer.update()` by the `BaseTemplate`."""
-        pass
-
-    def after_update(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `optimizer.update()` by the `BaseTemplate`."""
-        pass
-
-    def after_training_epoch(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `train_epoch` by the `BaseTemplate`."""
-        pass
-
-    def before_eval_iteration(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before the start of a training iteration by the
-        `BaseTemplate`."""
-        pass
-
-    def before_eval_forward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `model.forward()` by the `BaseTemplate`."""
-        pass
-
-    def after_eval_forward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `model.forward()` by the `BaseTemplate`."""
-        pass
-
-    def after_eval_iteration(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after the end of an iteration by the
-        `BaseTemplate`."""
-        pass
-
-    def before_train_dataset_adaptation(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `train_dataset_adapatation` by the `BaseTemplate`."""
-        pass
-
-    def after_train_dataset_adaptation(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `train_dataset_adapatation` by the `BaseTemplate`."""
-        pass
-
-    def before_eval_dataset_adaptation(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `eval_dataset_adaptation` by the `BaseTemplate`."""
-        pass
-
-    def after_eval_dataset_adaptation(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called after `eval_dataset_adaptation` by the `BaseTemplate`."""
-        pass
-
-    # ====================================================================> NEW
-
-    def before_inner_updates(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `_inner_updates` by the `BaseTemplate`."""
-        pass
-
-    def inner_updates(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `_inner_updates` by the `BaseTemplate`."""
-        pass
-
-    def after_inner_updates(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `_outer_updates` by the `BaseTemplate`."""
-        pass
-
-    def before_outer_update(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `_outer_updates` by the `BaseTemplate`."""
-        pass
-
-    def outer_update(
-            self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `_inner_updates` by the `BaseTemplate`."""
-        pass
-
-    def after_outer_update(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
-        """Called before `_outer_updates` by the `BaseTemplate`."""
-        pass
diff --git a/avalanche/core.py b/avalanche/core.py
index 829daa7e2..ac13aac9f 100644
--- a/avalanche/core.py
+++ b/avalanche/core.py
@@ -193,3 +193,33 @@ def after_eval_dataset_adaptation(
     ) -> CallbackResult:
         """Called after `eval_dataset_adaptation` by the `BaseTemplate`."""
         pass
+
+
+class SupervisedMetaLearningPlugin(SupervisedPlugin[Template], ABC):
+    """ABC for SupervisedMetaLearningTemplate plugins.
+
+        See `BaseTemplate` for complete description of the train/eval loop.
+    """
+    def before_inner_updates(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_inner_updates` by the `BaseTemplate`."""
+        pass
+
+    def after_inner_updates(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_outer_updates` by the `BaseTemplate`."""
+        pass
+
+    def before_outer_update(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_outer_updates` by the `BaseTemplate`."""
+        pass
+
+    def after_outer_update(
+        self, strategy: Template, *args, **kwargs
+    ) -> CallbackResult:
+        """Called before `_outer_updates` by the `BaseTemplate`."""
+        pass
diff --git a/avalanche/training/supervised/NEW_strategy_wrappers.py b/avalanche/training/supervised/NEW_strategy_wrappers.py
deleted file mode 100644
index e5abd5119..000000000
--- a/avalanche/training/supervised/NEW_strategy_wrappers.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from typing import Optional, List
-from torch.nn import Module, CrossEntropyLoss
-from torch.optim import Optimizer
-
-from avalanche.training.plugins import EvaluationPlugin
-from avalanche.training.templates.NEW_common_templates import (
-    SupervisedTemplate,
-    OnlineSupervisedTemplate
-)
-from avalanche.training.plugins.evaluation import default_evaluator
-from avalanche.training.plugins import SupervisedPlugin
-
-
-class Naive(SupervisedTemplate):
-    def __init__(
-            self,
-            model: Module,
-            optimizer: Optimizer,
-            criterion=CrossEntropyLoss(),
-            train_mb_size: int = 1,
-            train_epochs: int = 1,
-            eval_mb_size: Optional[int] = None,
-            device=None,
-            plugins: Optional[List[SupervisedPlugin]] = None,
-            evaluator: EvaluationPlugin = default_evaluator,
-            eval_every=-1,
-            **base_kwargs
-    ):
-        super().__init__(
-            model,
-            optimizer,
-            criterion,
-            train_mb_size=train_mb_size,
-            train_epochs=train_epochs,
-            eval_mb_size=eval_mb_size,
-            device=device,
-            plugins=plugins,
-            evaluator=evaluator,
-            eval_every=eval_every,
-            **base_kwargs
-        )
-
-
-class OnlineNaive(OnlineSupervisedTemplate):
-    """Online naive finetuning.
-
-    The simplest (and least effective) Continual Learning strategy. Naive just
-    incrementally fine tunes a single model without employing any method
-    to contrast the catastrophic forgetting of previous knowledge.
-    This strategy does not use task identities.
-
-    Naive is easy to set up and its results are commonly used to show the worst
-    performing baseline.
-    """
-
-    def __init__(
-        self,
-        model: Module,
-        optimizer: Optimizer,
-        criterion=CrossEntropyLoss(),
-        train_passes: int = 1,
-        train_mb_size: int = 1,
-        eval_mb_size: int = None,
-        device=None,
-        plugins: Optional[List[SupervisedPlugin]] = None,
-        evaluator: EvaluationPlugin = default_evaluator,
-        eval_every=-1,
-    ):
-        """
-        Creates an instance of the Naive strategy.
-
-        :param model: The model.
-        :param optimizer: The optimizer to use.
-        :param criterion: The loss criterion to use.
-        :param num_passes: The number of passes for each sub-experience.
-            Defaults to 1.
-        :param train_mb_size: The train minibatch size. Defaults to 1.
-        :param eval_mb_size: The eval minibatch size. Defaults to 1.
-        :param device: The device to use. Defaults to None (cpu).
-        :param plugins: Plugins to be added. Defaults to None.
-        :param evaluator: (optional) instance of EvaluationPlugin for logging
-            and metric computations.
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop. -1 disables the evaluation. 0 means `eval` is called
-            only at the end of the learning experience. Values >0 mean that
-            `eval` is called every `eval_every` epochs and at the end of the
-            learning experience.
-        """
-        super().__init__(
-            model,
-            optimizer,
-            criterion,
-            train_passes=train_passes,
-            train_mb_size=train_mb_size,
-            eval_mb_size=eval_mb_size,
-            device=device,
-            plugins=plugins,
-            evaluator=evaluator,
-            eval_every=eval_every,
-        )
-
-
-__all__ = ["Naive", "OnlineNaive"]
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 743c74b20..713ba8c7c 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -34,7 +34,7 @@
     MASPlugin,
 )
 from avalanche.training.templates.base import BaseTemplate
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates.common_templates import SupervisedTemplate
 from avalanche.models.generator import MlpVAE, VAE_loss
 from avalanche.logging import InteractiveLogger
 
diff --git a/avalanche/training/supervised/strategy_wrappers_online.py b/avalanche/training/supervised/strategy_wrappers_online.py
index 3eb5d5003..24464690e 100644
--- a/avalanche/training/supervised/strategy_wrappers_online.py
+++ b/avalanche/training/supervised/strategy_wrappers_online.py
@@ -15,13 +15,13 @@
 
 from avalanche.training.plugins.evaluation import default_evaluator
 from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
-from avalanche.training.templates.online_supervised import (
+from avalanche.training.templates.common_templates import (
     OnlineSupervisedTemplate,
 )
 
 
 class OnlineNaive(OnlineSupervisedTemplate):
-    """Naive finetuning.
+    """Online naive finetuning.
 
     The simplest (and least effective) Continual Learning strategy. Naive just
     incrementally fine tunes a single model without employing any method
@@ -42,7 +42,7 @@ def __init__(
         eval_mb_size: int = None,
         device=None,
         plugins: Optional[List[SupervisedPlugin]] = None,
-        evaluator: EvaluationPlugin = default_evaluator(),
+        evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
     ):
         """
diff --git a/avalanche/training/templates/NEW_base_sgd.py b/avalanche/training/templates/NEW_base_sgd.py
deleted file mode 100644
index 41c2f57e5..000000000
--- a/avalanche/training/templates/NEW_base_sgd.py
+++ /dev/null
@@ -1,519 +0,0 @@
-from typing import Iterable, Sequence, Optional, Union, List
-from pkg_resources import parse_version
-
-import torch
-from torch.nn import Module, CrossEntropyLoss
-from torch.optim import Optimizer
-from torch.utils.data import DataLoader
-
-from avalanche.benchmarks import CLExperience, CLStream
-from avalanche.core import BaseSGDPlugin
-from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
-from avalanche.training.plugins.clock import Clock
-from avalanche.training.plugins.evaluation import default_evaluator
-from avalanche.training.templates.base import BaseTemplate, ExpSequence
-from avalanche.models.utils import avalanche_model_adaptation
-from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \
-    collate_from_data_or_kwargs
-from avalanche.training.utils import trigger_plugins
-
-
-class BaseSGDTemplate(BaseTemplate):
-    """Base SGD class for continual learning skeletons.
-
-    **Training loop**
-    The training loop is organized as follows::
-
-        train
-            train_exp  # for each experience
-
-    **Evaluation loop**
-    The evaluation loop is organized as follows::
-
-        eval
-            eval_exp  # for each experience
-
-    """
-
-    PLUGIN_CLASS = BaseSGDPlugin
-
-    def __init__(
-        self,
-        model: Module,
-        optimizer: Optimizer,
-        criterion=CrossEntropyLoss(),
-        train_mb_size: int = 1,
-        train_epochs: int = 1,
-        eval_mb_size: Optional[int] = 1,
-        device="cpu",
-        plugins: Optional[List["SupervisedPlugin"]] = None,
-        evaluator: EvaluationPlugin = default_evaluator,
-        eval_every=-1,
-        peval_mode="epoch",
-    ):
-        """Init.
-
-        :param model: PyTorch model.
-        :param optimizer: PyTorch optimizer.
-        :param criterion: loss function.
-        :param train_mb_size: mini-batch size for training.
-        :param train_epochs: number of training epochs.
-        :param eval_mb_size: mini-batch size for eval.
-        :param evaluator: (optional) instance of EvaluationPlugin for logging
-            and metric computations. None to remove logging.
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop. -1 disables the evaluation. 0 means `eval` is called
-            only at the end of the learning experience. Values >0 mean that
-            `eval` is called every `eval_every` epochs and at the end of the
-            learning experience.
-        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
-            periodic evaluation during training should execute every
-            `eval_every` epochs or iterations (Default='epoch').
-        """
-        super().__init__(model=model, device=device, plugins=plugins)
-
-        self.optimizer: Optimizer = optimizer
-        """ PyTorch optimizer. """
-
-        self._criterion = criterion
-        """ Criterion. """
-
-        self.train_epochs: int = train_epochs
-        """ Number of training epochs. """
-
-        self.train_mb_size: int = train_mb_size
-        """ Training mini-batch size. """
-
-        self.eval_mb_size: int = (
-            train_mb_size if eval_mb_size is None else eval_mb_size
-        )
-        """ Eval mini-batch size. """
-
-        if evaluator is None:
-            evaluator = EvaluationPlugin()
-        self.plugins.append(evaluator)
-        self.evaluator = evaluator
-        """ EvaluationPlugin used for logging and metric computations. """
-
-        # Configure periodic evaluation.
-        assert peval_mode in {"experience", "epoch", "iteration"}
-        self.eval_every = eval_every
-        peval = PeriodicEval(eval_every, peval_mode)
-        self.plugins.append(peval)
-
-        self.clock = Clock()
-        """ Incremental counters for strategy events. """
-        # WARNING: Clock needs to be the last plugin, otherwise
-        # counters will be wrong for plugins called after it.
-        self.plugins.append(self.clock)
-
-        ###################################################################
-        # State variables. These are updated during the train/eval loops. #
-        ###################################################################
-
-        self.adapted_dataset = None
-        """ Data used to train. It may be modified by plugins. Plugins can 
-        append data to it (e.g. for replay). 
-
-        .. note::
-
-            This dataset may contain samples from different experiences. If you 
-            want the original data for the current experience  
-            use :attr:`.BaseTemplate.experience`.
-        """
-
-        self.dataloader = None
-        """ Dataloader. """
-
-        self.mbatch = None
-        """ Current mini-batch. """
-
-        self.mb_output = None
-        """ Model's output computed on the current mini-batch. """
-
-        self.loss = None
-        """ Loss of the current mini-batch. """
-
-        self._stop_training = False
-
-    def train(self,
-              experiences: Union[CLExperience,
-                                 ExpSequence],
-              eval_streams: Optional[Sequence[Union[CLExperience,
-                                                    ExpSequence]]] = None,
-              **kwargs):
-
-        super().train(experiences, eval_streams, **kwargs)
-        return self.evaluator.get_last_metrics()
-
-    @torch.no_grad()
-    def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs):
-        """
-        Evaluate the current model on a series of experiences and
-        returns the last recorded value for each metric.
-
-        :param exp_list: CL experience information.
-        :param kwargs: custom arguments.
-
-        :return: dictionary containing last recorded value for
-            each metric name
-        """
-        super().eval(exp_list, **kwargs)
-        return self.evaluator.get_last_metrics()
-
-    def _train_exp(
-        self, experience: CLExperience, eval_streams, **kwargs
-    ):
-        # Should be implemented in Observation Type
-        raise NotImplementedError()
-
-    def _eval_exp(self, **kwargs):
-        self.eval_epoch(**kwargs)
-
-    def make_optimizer(self, **kwargs):
-        """Optimizer initialization."""
-        # Should be implemented in Observation Type
-        raise NotImplementedError()
-
-    def criterion(self):
-        """Compute loss function."""
-        raise NotImplementedError()
-
-    def forward(self):
-        """Compute the model's output given the current mini-batch."""
-        raise NotImplementedError()
-
-    def model_adaptation(self, model=None):
-        """Adapts the model to the current experience."""
-        raise NotImplementedError()
-
-    def stop_training(self):
-        """Signals to stop training at the next iteration."""
-        self._stop_training = True
-
-    def training_epoch(self, **kwargs):
-        # Should be implemented in Update Type
-        raise NotADirectoryError()
-
-    def backward(self):
-        """Run the backward pass."""
-        self.loss.backward()
-
-    def optimizer_step(self):
-        """Execute the optimizer step (weights update)."""
-        self.optimizer.step()
-
-    def eval_epoch(self, **kwargs):
-        """Evaluation loop over the current `self.dataloader`."""
-        for self.mbatch in self.dataloader:
-            self._unpack_minibatch()
-            self._before_eval_iteration(**kwargs)
-
-            self._before_eval_forward(**kwargs)
-            self.mb_output = self.forward()
-            self._after_eval_forward(**kwargs)
-            self.loss = self.criterion()
-
-            self._after_eval_iteration(**kwargs)
-
-    # ==================================================================> NEW
-
-    def maybe_adapt_model_and_make_optimizer(self):
-        # Should be implemented in observation type
-        raise NotImplementedError()
-
-    def _before_training_exp(self, **kwargs):
-        """Setup to train on a single experience."""
-        # Data Adaptation (e.g. add new samples/data augmentation)
-        self._before_train_dataset_adaptation(**kwargs)
-        self.train_dataset_adaptation(**kwargs)
-        self._after_train_dataset_adaptation(**kwargs)
-
-        self.make_train_dataloader(**kwargs)
-
-        # Model Adaptation (e.g. freeze/add new units)
-        # self.model = self.model_adaptation()
-        # self.make_optimizer()
-        self.maybe_adapt_model_and_make_optimizer()
-
-        super()._before_training_exp(**kwargs)
-
-    def _save_train_state(self):
-        """Save the training state which may be modified by the eval loop.
-
-        This currently includes: experience, adapted_dataset, dataloader,
-        is_training, and train/eval modes for each module.
-
-        TODO: we probably need a better way to do this.
-        """
-        state = super()._save_train_state()
-        new_state = {
-            "adapted_dataset": self.adapted_dataset,
-            "dataloader": self.dataloader,
-        }
-        return {**state, **new_state}
-
-    def train_dataset_adaptation(self, **kwargs):
-        """Initialize `self.adapted_dataset`."""
-        self.adapted_dataset = self.experience.dataset
-        self.adapted_dataset = self.adapted_dataset.train()
-
-    def _load_train_state(self, prev_state):
-        super()._load_train_state(prev_state)
-        self.adapted_dataset = prev_state["adapted_dataset"]
-        self.dataloader = prev_state["dataloader"]
-
-    def _before_eval_exp(self, **kwargs):
-
-        # Data Adaptation
-        self._before_eval_dataset_adaptation(**kwargs)
-        self.eval_dataset_adaptation(**kwargs)
-        self._after_eval_dataset_adaptation(**kwargs)
-
-        self.make_eval_dataloader(**kwargs)
-        # Model Adaptation (e.g. freeze/add new units)
-        self.model = self.model_adaptation()
-
-        super()._before_eval_exp(**kwargs)
-
-    def make_train_dataloader(
-        self,
-        num_workers=0,
-        shuffle=True,
-        pin_memory=True,
-        persistent_workers=False,
-        **kwargs
-    ):
-        """Data loader initialization.
-
-        Called at the start of each learning experience after the dataset
-        adaptation.
-
-        :param num_workers: number of thread workers for the data loading.
-        :param shuffle: True if the data should be shuffled, False otherwise.
-        :param pin_memory: If True, the data loader will copy Tensors into CUDA
-            pinned memory before returning them. Defaults to True.
-        """
-
-        other_dataloader_args = {}
-
-        if parse_version(torch.__version__) >= parse_version("1.7.0"):
-            other_dataloader_args["persistent_workers"] = persistent_workers
-        for k, v in kwargs.items():
-            other_dataloader_args[k] = v
-
-        self.dataloader = TaskBalancedDataLoader(
-            self.adapted_dataset,
-            oversample_small_groups=True,
-            num_workers=num_workers,
-            batch_size=self.train_mb_size,
-            shuffle=shuffle,
-            pin_memory=pin_memory,
-            **other_dataloader_args
-        )
-
-    def make_eval_dataloader(
-        self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs
-    ):
-        """
-        Initializes the eval data loader.
-        :param num_workers: How many subprocesses to use for data loading.
-            0 means that the data will be loaded in the main process.
-            (default: 0).
-        :param pin_memory: If True, the data loader will copy Tensors into CUDA
-            pinned memory before returning them. Defaults to True.
-        :param kwargs:
-        :return:
-        """
-        other_dataloader_args = {}
-
-        if parse_version(torch.__version__) >= parse_version("1.7.0"):
-            other_dataloader_args["persistent_workers"] = persistent_workers
-        for k, v in kwargs.items():
-            other_dataloader_args[k] = v
-
-        collate_from_data_or_kwargs(self.adapted_dataset,
-                                    other_dataloader_args)
-        self.dataloader = DataLoader(
-            self.adapted_dataset,
-            num_workers=num_workers,
-            batch_size=self.eval_mb_size,
-            pin_memory=pin_memory,
-            **other_dataloader_args
-        )
-
-    def eval_dataset_adaptation(self, **kwargs):
-        """Initialize `self.adapted_dataset`."""
-        self.adapted_dataset = self.experience.dataset
-        self.adapted_dataset = self.adapted_dataset.eval()
-
-    def model_adaptation(self, model=None):
-        """Adapts the model to the current data.
-
-        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
-        """
-        if model is None:
-            model = self.model
-        avalanche_model_adaptation(model, self.experience)
-        return model.to(self.device)
-
-    def _unpack_minibatch(self):
-        """Move to device"""
-        # First verify the mini-batch
-        self._check_minibatch()
-
-        for i in range(len(self.mbatch)):
-            self.mbatch[i] = self.mbatch[i].to(self.device)
-
-    #########################################################
-    # Plugin Triggers                                       #
-    #########################################################
-
-    def _before_training_epoch(self, **kwargs):
-        trigger_plugins(self, "before_training_epoch", **kwargs)
-
-    def _after_training_epoch(self, **kwargs):
-        trigger_plugins(self, "after_training_epoch", **kwargs)
-
-    def _before_training_iteration(self, **kwargs):
-        trigger_plugins(self, "before_training_iteration", **kwargs)
-
-    def _before_forward(self, **kwargs):
-        trigger_plugins(self, "before_forward", **kwargs)
-
-    def _after_forward(self, **kwargs):
-        trigger_plugins(self, "after_forward", **kwargs)
-
-    def _before_backward(self, **kwargs):
-        trigger_plugins(self, "before_backward", **kwargs)
-
-    def _after_backward(self, **kwargs):
-        trigger_plugins(self, "after_backward", **kwargs)
-
-    def _after_training_iteration(self, **kwargs):
-        trigger_plugins(self, "after_training_iteration", **kwargs)
-
-    def _before_update(self, **kwargs):
-        trigger_plugins(self, "before_update", **kwargs)
-
-    def _after_update(self, **kwargs):
-        trigger_plugins(self, "after_update", **kwargs)
-
-    def _before_eval_iteration(self, **kwargs):
-        trigger_plugins(self, "before_eval_iteration", **kwargs)
-
-    def _before_eval_forward(self, **kwargs):
-        trigger_plugins(self, "before_eval_forward", **kwargs)
-
-    def _after_eval_forward(self, **kwargs):
-        trigger_plugins(self, "after_eval_forward", **kwargs)
-
-    def _after_eval_iteration(self, **kwargs):
-        trigger_plugins(self, "after_eval_iteration", **kwargs)
-
-    # ==================================================================> NEW
-
-    def _before_train_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "before_train_dataset_adaptation", **kwargs)
-
-    def _after_train_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "after_train_dataset_adaptation", **kwargs)
-
-    def _before_eval_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs)
-
-    def _after_eval_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs)
-
-
-class PeriodicEval(SupervisedPlugin):
-    """Schedules periodic evaluation during training.
-
-    This plugin is automatically configured and added by the BaseTemplate.
-    """
-
-    def __init__(self, eval_every=-1, peval_mode="epoch", do_initial=True):
-        """Init.
-
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop. -1 disables the evaluation. 0 means `eval` is called
-            only at the end of the learning experience. Values >0 mean that
-            `eval` is called every `eval_every` epochs and at the end of the
-            learning experience.
-        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
-            periodic evaluation during training should execute every
-            `eval_every` epochs or iterations (Default='epoch').
-        :param do_initial: whether to evaluate before each `train` call.
-            Occasionally needed becuase some metrics need to know the
-            accuracy before training.
-        """
-        super().__init__()
-        assert peval_mode in {"experience", "epoch", "iteration"}
-        self.eval_every = eval_every
-        self.peval_mode = peval_mode
-        self.do_initial = do_initial and eval_every > -1
-        self.do_final = None
-        self._is_eval_updated = False
-
-    def before_training(self, strategy, **kwargs):
-        """Eval before each learning experience.
-
-        Occasionally needed because some metrics need the accuracy before
-        training.
-        """
-        if self.do_initial:
-            self._peval(strategy, **kwargs)
-
-    def before_training_exp(self, strategy, **kwargs):
-        # We evaluate at the start of each experience because train_epochs
-        # could change.
-        self.do_final = True
-        if self.peval_mode == "epoch":
-            if (
-                self.eval_every > 0
-                and (strategy.train_epochs - 1) % self.eval_every == 0
-            ):
-                self.do_final = False
-        else:  # peval_mode == 'iteration'
-            # we may need to fix this but we don't have a way to know
-            # the number of total iterations.
-            # Right now there may be two eval calls at the last iterations.
-            pass
-        self.do_final = self.do_final and self.eval_every > -1
-
-    def _peval(self, strategy, **kwargs):
-        for el in strategy._eval_streams:
-            strategy.eval(el, **kwargs)
-
-    def _maybe_peval(self, strategy, counter, **kwargs):
-        if self.eval_every > 0 and counter % self.eval_every == 0:
-            self._peval(strategy, **kwargs)
-
-    def after_training_epoch(self, strategy: "BaseSGDTemplate",
-                             **kwargs):
-        """Periodic eval controlled by `self.eval_every` and
-        `self.peval_mode`."""
-        if self.peval_mode == "epoch":
-            self._maybe_peval(strategy, strategy.clock.train_exp_epochs,
-                              **kwargs)
-
-    def after_training_iteration(self, strategy: "BaseSGDTemplate",
-                                 **kwargs):
-        """Periodic eval controlled by `self.eval_every` and
-        `self.peval_mode`."""
-        if self.peval_mode == "iteration":
-            self._maybe_peval(strategy, strategy.clock.train_exp_iterations,
-                              **kwargs)
-
-    # ---> New
-    def after_training_exp(self, strategy, **kwargs):
-        """Final eval after a learning experience."""
-        if self.do_final:
-            self._peval(strategy, **kwargs)
-
-    # def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs):
-    #     """Periodic eval controlled by `self.eval_every` and
-    #     `self.peval_mode`."""
-    #     if self.peval_mode == "experience":
-    #         self._maybe_peval(strategy, strategy.clock.train_exp_counter,
-    #                           **kwargs)
diff --git a/avalanche/training/templates/__init__.py b/avalanche/training/templates/__init__.py
index 191c78e28..5438e8d9b 100644
--- a/avalanche/training/templates/__init__.py
+++ b/avalanche/training/templates/__init__.py
@@ -11,6 +11,8 @@
 """
 from .base import BaseTemplate
 from .base_sgd import BaseSGDTemplate
-from .base_online_sgd import BaseOnlineSGDTemplate
-from .online_supervised import OnlineSupervisedTemplate
-from .supervised import SupervisedTemplate
+from .common_templates import (
+    SupervisedTemplate,
+    SupervisedMetaLearningTemplate,
+    OnlineSupervisedTemplate
+)
diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py
index 36e550f45..41c2f57e5 100644
--- a/avalanche/training/templates/base_sgd.py
+++ b/avalanche/training/templates/base_sgd.py
@@ -1,8 +1,10 @@
 from typing import Iterable, Sequence, Optional, Union, List
+from pkg_resources import parse_version
 
 import torch
-from torch.nn import Module
+from torch.nn import Module, CrossEntropyLoss
 from torch.optim import Optimizer
+from torch.utils.data import DataLoader
 
 from avalanche.benchmarks import CLExperience, CLStream
 from avalanche.core import BaseSGDPlugin
@@ -10,17 +12,14 @@
 from avalanche.training.plugins.clock import Clock
 from avalanche.training.plugins.evaluation import default_evaluator
 from avalanche.training.templates.base import BaseTemplate, ExpSequence
-
-from typing import TYPE_CHECKING
-
+from avalanche.models.utils import avalanche_model_adaptation
+from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \
+    collate_from_data_or_kwargs
 from avalanche.training.utils import trigger_plugins
 
-if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
-
 
 class BaseSGDTemplate(BaseTemplate):
-    """Base class for continual learning skeletons.
+    """Base SGD class for continual learning skeletons.
 
     **Training loop**
     The training loop is organized as follows::
@@ -42,12 +41,13 @@ def __init__(
         self,
         model: Module,
         optimizer: Optimizer,
+        criterion=CrossEntropyLoss(),
         train_mb_size: int = 1,
         train_epochs: int = 1,
         eval_mb_size: Optional[int] = 1,
         device="cpu",
         plugins: Optional[List["SupervisedPlugin"]] = None,
-        evaluator: EvaluationPlugin = default_evaluator(),
+        evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
     ):
@@ -55,6 +55,7 @@ def __init__(
 
         :param model: PyTorch model.
         :param optimizer: PyTorch optimizer.
+        :param criterion: loss function.
         :param train_mb_size: mini-batch size for training.
         :param train_epochs: number of training epochs.
         :param eval_mb_size: mini-batch size for eval.
@@ -74,6 +75,9 @@ def __init__(
         self.optimizer: Optimizer = optimizer
         """ PyTorch optimizer. """
 
+        self._criterion = criterion
+        """ Criterion. """
+
         self.train_epochs: int = train_epochs
         """ Number of training epochs. """
 
@@ -92,7 +96,7 @@ def __init__(
         """ EvaluationPlugin used for logging and metric computations. """
 
         # Configure periodic evaluation.
-        assert peval_mode in {"epoch", "iteration"}
+        assert peval_mode in {"experience", "epoch", "iteration"}
         self.eval_every = eval_every
         peval = PeriodicEval(eval_every, peval_mode)
         self.plugins.append(peval)
@@ -107,6 +111,17 @@ def __init__(
         # State variables. These are updated during the train/eval loops. #
         ###################################################################
 
+        self.adapted_dataset = None
+        """ Data used to train. It may be modified by plugins. Plugins can 
+        append data to it (e.g. for replay). 
+
+        .. note::
+
+            This dataset may contain samples from different experiences. If you 
+            want the original data for the current experience  
+            use :attr:`.BaseTemplate.experience`.
+        """
+
         self.dataloader = None
         """ Dataloader. """
 
@@ -127,6 +142,7 @@ def train(self,
               eval_streams: Optional[Sequence[Union[CLExperience,
                                                     ExpSequence]]] = None,
               **kwargs):
+
         super().train(experiences, eval_streams, **kwargs)
         return self.evaluator.get_last_metrics()
 
@@ -145,58 +161,18 @@ def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs):
         super().eval(exp_list, **kwargs)
         return self.evaluator.get_last_metrics()
 
-    def _before_training_exp(self, **kwargs):
-        self.make_train_dataloader(**kwargs)
-        # Model Adaptation (e.g. freeze/add new units)
-        self.model = self.model_adaptation()
-        self.make_optimizer()
-        super()._before_training_exp(**kwargs)
-
     def _train_exp(
-        self, experience: CLExperience, eval_streams=None, **kwargs
+        self, experience: CLExperience, eval_streams, **kwargs
     ):
-        """Training loop over a single Experience object.
-
-        :param experience: CL experience information.
-        :param eval_streams: list of streams for evaluation.
-            If None: use the training experience for evaluation.
-            Use [] if you do not want to evaluate during training.
-        :param kwargs: custom arguments.
-        """
-        if eval_streams is None:
-            eval_streams = [experience]
-        for i, exp in enumerate(eval_streams):
-            if not isinstance(exp, Iterable):
-                eval_streams[i] = [exp]
-        for _ in range(self.train_epochs):
-            self._before_training_epoch(**kwargs)
-
-            if self._stop_training:  # Early stopping
-                self._stop_training = False
-                break
-
-            self.training_epoch(**kwargs)
-            self._after_training_epoch(**kwargs)
-
-    def _before_eval_exp(self, **kwargs):
-        self.make_eval_dataloader(**kwargs)
-        # Model Adaptation (e.g. freeze/add new units)
-        self.model = self.model_adaptation()
-        super()._before_eval_exp(**kwargs)
+        # Should be implemented in Observation Type
+        raise NotImplementedError()
 
     def _eval_exp(self, **kwargs):
         self.eval_epoch(**kwargs)
 
-    def make_train_dataloader(self, **kwargs):
-        """Assign dataloader to self.dataloader."""
-        raise NotImplementedError()
-
-    def make_eval_dataloader(self, **kwargs):
-        """Assign dataloader to self.dataloader."""
-        raise NotImplementedError()
-
     def make_optimizer(self, **kwargs):
         """Optimizer initialization."""
+        # Should be implemented in Observation Type
         raise NotImplementedError()
 
     def criterion(self):
@@ -216,39 +192,8 @@ def stop_training(self):
         self._stop_training = True
 
     def training_epoch(self, **kwargs):
-        """Training epoch.
-
-        :param kwargs:
-        :return:
-        """
-        for self.mbatch in self.dataloader:
-            if self._stop_training:
-                break
-
-            self._unpack_minibatch()
-            self._before_training_iteration(**kwargs)
-
-            self.optimizer.zero_grad()
-            self.loss = 0
-
-            # Forward
-            self._before_forward(**kwargs)
-            self.mb_output = self.forward()
-            self._after_forward(**kwargs)
-
-            # Loss & Backward
-            self.loss += self.criterion()
-
-            self._before_backward(**kwargs)
-            self.backward()
-            self._after_backward(**kwargs)
-
-            # Optimization step
-            self._before_update(**kwargs)
-            self.optimizer_step()
-            self._after_update(**kwargs)
-
-            self._after_training_iteration(**kwargs)
+        # Should be implemented in Update Type
+        raise NotADirectoryError()
 
     def backward(self):
         """Run the backward pass."""
@@ -271,8 +216,152 @@ def eval_epoch(self, **kwargs):
 
             self._after_eval_iteration(**kwargs)
 
+    # ==================================================================> NEW
+
+    def maybe_adapt_model_and_make_optimizer(self):
+        # Should be implemented in observation type
+        raise NotImplementedError()
+
+    def _before_training_exp(self, **kwargs):
+        """Setup to train on a single experience."""
+        # Data Adaptation (e.g. add new samples/data augmentation)
+        self._before_train_dataset_adaptation(**kwargs)
+        self.train_dataset_adaptation(**kwargs)
+        self._after_train_dataset_adaptation(**kwargs)
+
+        self.make_train_dataloader(**kwargs)
+
+        # Model Adaptation (e.g. freeze/add new units)
+        # self.model = self.model_adaptation()
+        # self.make_optimizer()
+        self.maybe_adapt_model_and_make_optimizer()
+
+        super()._before_training_exp(**kwargs)
+
+    def _save_train_state(self):
+        """Save the training state which may be modified by the eval loop.
+
+        This currently includes: experience, adapted_dataset, dataloader,
+        is_training, and train/eval modes for each module.
+
+        TODO: we probably need a better way to do this.
+        """
+        state = super()._save_train_state()
+        new_state = {
+            "adapted_dataset": self.adapted_dataset,
+            "dataloader": self.dataloader,
+        }
+        return {**state, **new_state}
+
+    def train_dataset_adaptation(self, **kwargs):
+        """Initialize `self.adapted_dataset`."""
+        self.adapted_dataset = self.experience.dataset
+        self.adapted_dataset = self.adapted_dataset.train()
+
+    def _load_train_state(self, prev_state):
+        super()._load_train_state(prev_state)
+        self.adapted_dataset = prev_state["adapted_dataset"]
+        self.dataloader = prev_state["dataloader"]
+
+    def _before_eval_exp(self, **kwargs):
+
+        # Data Adaptation
+        self._before_eval_dataset_adaptation(**kwargs)
+        self.eval_dataset_adaptation(**kwargs)
+        self._after_eval_dataset_adaptation(**kwargs)
+
+        self.make_eval_dataloader(**kwargs)
+        # Model Adaptation (e.g. freeze/add new units)
+        self.model = self.model_adaptation()
+
+        super()._before_eval_exp(**kwargs)
+
+    def make_train_dataloader(
+        self,
+        num_workers=0,
+        shuffle=True,
+        pin_memory=True,
+        persistent_workers=False,
+        **kwargs
+    ):
+        """Data loader initialization.
+
+        Called at the start of each learning experience after the dataset
+        adaptation.
+
+        :param num_workers: number of thread workers for the data loading.
+        :param shuffle: True if the data should be shuffled, False otherwise.
+        :param pin_memory: If True, the data loader will copy Tensors into CUDA
+            pinned memory before returning them. Defaults to True.
+        """
+
+        other_dataloader_args = {}
+
+        if parse_version(torch.__version__) >= parse_version("1.7.0"):
+            other_dataloader_args["persistent_workers"] = persistent_workers
+        for k, v in kwargs.items():
+            other_dataloader_args[k] = v
+
+        self.dataloader = TaskBalancedDataLoader(
+            self.adapted_dataset,
+            oversample_small_groups=True,
+            num_workers=num_workers,
+            batch_size=self.train_mb_size,
+            shuffle=shuffle,
+            pin_memory=pin_memory,
+            **other_dataloader_args
+        )
+
+    def make_eval_dataloader(
+        self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs
+    ):
+        """
+        Initializes the eval data loader.
+        :param num_workers: How many subprocesses to use for data loading.
+            0 means that the data will be loaded in the main process.
+            (default: 0).
+        :param pin_memory: If True, the data loader will copy Tensors into CUDA
+            pinned memory before returning them. Defaults to True.
+        :param kwargs:
+        :return:
+        """
+        other_dataloader_args = {}
+
+        if parse_version(torch.__version__) >= parse_version("1.7.0"):
+            other_dataloader_args["persistent_workers"] = persistent_workers
+        for k, v in kwargs.items():
+            other_dataloader_args[k] = v
+
+        collate_from_data_or_kwargs(self.adapted_dataset,
+                                    other_dataloader_args)
+        self.dataloader = DataLoader(
+            self.adapted_dataset,
+            num_workers=num_workers,
+            batch_size=self.eval_mb_size,
+            pin_memory=pin_memory,
+            **other_dataloader_args
+        )
+
+    def eval_dataset_adaptation(self, **kwargs):
+        """Initialize `self.adapted_dataset`."""
+        self.adapted_dataset = self.experience.dataset
+        self.adapted_dataset = self.adapted_dataset.eval()
+
+    def model_adaptation(self, model=None):
+        """Adapts the model to the current data.
+
+        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
+        """
+        if model is None:
+            model = self.model
+        avalanche_model_adaptation(model, self.experience)
+        return model.to(self.device)
+
     def _unpack_minibatch(self):
         """Move to device"""
+        # First verify the mini-batch
+        self._check_minibatch()
+
         for i in range(len(self.mbatch)):
             self.mbatch[i] = self.mbatch[i].to(self.device)
 
@@ -322,6 +411,20 @@ def _after_eval_forward(self, **kwargs):
     def _after_eval_iteration(self, **kwargs):
         trigger_plugins(self, "after_eval_iteration", **kwargs)
 
+    # ==================================================================> NEW
+
+    def _before_train_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "before_train_dataset_adaptation", **kwargs)
+
+    def _after_train_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "after_train_dataset_adaptation", **kwargs)
+
+    def _before_eval_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs)
+
+    def _after_eval_dataset_adaptation(self, **kwargs):
+        trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs)
+
 
 class PeriodicEval(SupervisedPlugin):
     """Schedules periodic evaluation during training.
@@ -345,7 +448,7 @@ def __init__(self, eval_every=-1, peval_mode="epoch", do_initial=True):
             accuracy before training.
         """
         super().__init__()
-        assert peval_mode in {"epoch", "iteration"}
+        assert peval_mode in {"experience", "epoch", "iteration"}
         self.eval_every = eval_every
         self.peval_mode = peval_mode
         self.do_initial = do_initial and eval_every > -1
@@ -378,11 +481,6 @@ def before_training_exp(self, strategy, **kwargs):
             pass
         self.do_final = self.do_final and self.eval_every > -1
 
-    def after_training_exp(self, strategy, **kwargs):
-        """Final eval after a learning experience."""
-        if self.do_final:
-            self._peval(strategy, **kwargs)
-
     def _peval(self, strategy, **kwargs):
         for el in strategy._eval_streams:
             strategy.eval(el, **kwargs)
@@ -391,16 +489,31 @@ def _maybe_peval(self, strategy, counter, **kwargs):
         if self.eval_every > 0 and counter % self.eval_every == 0:
             self._peval(strategy, **kwargs)
 
-    def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs):
+    def after_training_epoch(self, strategy: "BaseSGDTemplate",
+                             **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""
         if self.peval_mode == "epoch":
             self._maybe_peval(strategy, strategy.clock.train_exp_epochs,
                               **kwargs)
 
-    def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs):
+    def after_training_iteration(self, strategy: "BaseSGDTemplate",
+                                 **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""
         if self.peval_mode == "iteration":
             self._maybe_peval(strategy, strategy.clock.train_exp_iterations,
                               **kwargs)
+
+    # ---> New
+    def after_training_exp(self, strategy, **kwargs):
+        """Final eval after a learning experience."""
+        if self.do_final:
+            self._peval(strategy, **kwargs)
+
+    # def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs):
+    #     """Periodic eval controlled by `self.eval_every` and
+    #     `self.peval_mode`."""
+    #     if self.peval_mode == "experience":
+    #         self._maybe_peval(strategy, strategy.clock.train_exp_counter,
+    #                           **kwargs)
diff --git a/avalanche/training/templates/NEW_common_templates.py b/avalanche/training/templates/common_templates.py
similarity index 99%
rename from avalanche/training/templates/NEW_common_templates.py
rename to avalanche/training/templates/common_templates.py
index eb3fdd6be..54aefd058 100644
--- a/avalanche/training/templates/NEW_common_templates.py
+++ b/avalanche/training/templates/common_templates.py
@@ -9,7 +9,7 @@
 from .observation_type import *
 from .problem_type import *
 from .update_type import *
-from .NEW_base_sgd import BaseSGDTemplate
+from .base_sgd import BaseSGDTemplate
 
 
 class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate,
diff --git a/examples/NEW_lamaml.py b/examples/NEW_lamaml.py
index ec456461d..5e30e303e 100644
--- a/examples/NEW_lamaml.py
+++ b/examples/NEW_lamaml.py
@@ -9,7 +9,7 @@
 from avalanche.training.plugins import EvaluationPlugin
 from avalanche.benchmarks.classic import SplitMNIST
 from avalanche.logging import InteractiveLogger
-from avalanche.training.templates.NEW_common_templates import (
+from avalanche.training.templates.common_templates import (
     SupervisedMetaLearningTemplate
 )
 from avalanche.training.plugins.NEW_lamaml import LaMAMLPlugin
diff --git a/examples/NEW_online_naive.py b/examples/NEW_online_naive.py
deleted file mode 100644
index 249bf9437..000000000
--- a/examples/NEW_online_naive.py
+++ /dev/null
@@ -1,137 +0,0 @@
-################################################################################
-# Copyright (c) 2021 ContinualAI.                                              #
-# Copyrights licensed under the MIT License.                                   #
-# See the accompanying LICENSE file for terms.                                 #
-#                                                                              #
-# Date: 12-10-2020                                                             #
-# Author(s): Vincenzo Lomonaco, Hamed Hemati                                   #
-# E-mail: contact@continualai.org                                              #
-# Website: avalanche.continualai.org                                           #
-################################################################################
-
-"""
-This is a simple example on how to use the Naive strategy.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from os.path import expanduser
-
-import argparse
-import torch
-from torch.nn import CrossEntropyLoss
-from torchvision import transforms
-from torchvision.datasets import MNIST
-from torchvision.transforms import ToTensor, RandomCrop
-import torch.optim.lr_scheduler
-from avalanche.benchmarks import nc_benchmark
-from avalanche.models import SimpleMLP
-from avalanche.training.supervised.NEW_strategy_wrappers import OnlineNaive
-from avalanche.benchmarks.scenarios.online_scenario import OnlineCLScenario
-from avalanche.evaluation.metrics import (
-    forgetting_metrics,
-    accuracy_metrics,
-    loss_metrics,
-)
-from avalanche.logging import InteractiveLogger
-from avalanche.training.plugins import EvaluationPlugin
-
-
-def main(args):
-    # --- CONFIG
-    device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
-    )
-    n_batches = 5
-    # ---------
-
-    # --- TRANSFORMATIONS
-    train_transform = transforms.Compose(
-        [
-            RandomCrop(28, padding=4),
-            ToTensor(),
-            transforms.Normalize((0.1307,), (0.3081,)),
-        ]
-    )
-    test_transform = transforms.Compose(
-        [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-    )
-    # ---------
-
-    # --- SCENARIO CREATION
-    mnist_train = MNIST(
-        root=expanduser("~") + "/.avalanche/data/mnist/",
-        train=True,
-        download=True,
-        transform=train_transform,
-    )
-    mnist_test = MNIST(
-        root=expanduser("~") + "/.avalanche/data/mnist/",
-        train=False,
-        download=True,
-        transform=test_transform,
-    )
-    scenario = nc_benchmark(
-        mnist_train, mnist_test, n_batches, task_labels=False, seed=1234
-    )
-    # ---------
-
-    # MODEL CREATION
-    model = SimpleMLP(num_classes=scenario.n_classes)
-
-    # choose some metrics and evaluation method
-    interactive_logger = InteractiveLogger()
-
-    eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
-        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
-        forgetting_metrics(experience=True),
-        loggers=[interactive_logger],
-    )
-
-    # CREATE THE STRATEGY INSTANCE (ONLINE-NAIVE)
-    cl_strategy = OnlineNaive(
-        model,
-        torch.optim.Adam(model.parameters(), lr=0.1),
-        CrossEntropyLoss(),
-        train_passes=1,
-        train_mb_size=1,
-        eval_mb_size=32,
-        device=device,
-        evaluator=eval_plugin,
-    )
-
-    # TRAINING LOOP
-    print("Starting experiment...")
-    results = []
-
-    # Create online benchmark
-    batch_streams = scenario.streams.values()
-    # ocl_benchmark = OnlineCLScenario(batch_streams)
-    for i, exp in enumerate(scenario.train_stream):
-        # Create online scenario from experience exp
-        ocl_benchmark = OnlineCLScenario(original_streams=batch_streams,
-                                         experiences=exp,
-                                         experience_size=1,
-                                         access_task_boundaries=True)
-        # Train on the online train stream of the scenario
-        cl_strategy.train(ocl_benchmark.train_stream)
-        results.append(cl_strategy.eval(scenario.original_test_stream))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--cuda",
-        type=int,
-        default=0,
-        help="Select zero-indexed cuda device. -1 to use CPU.",
-    )
-    args = parser.parse_args()
-    main(args)
diff --git a/examples/lamaml_cifar100.py b/examples/lamaml_cifar100.py
index a974031f8..7ac903238 100644
--- a/examples/lamaml_cifar100.py
+++ b/examples/lamaml_cifar100.py
@@ -10,7 +10,7 @@
 import torch.optim.lr_scheduler
 from avalanche.benchmarks.classic import SplitCIFAR100
 from avalanche.models import MTSimpleCNN
-from avalanche.training.supervised import LaMAML
+from avalanche.training.supervised.lamaml import LaMAML
 from avalanche.training.plugins import ReplayPlugin
 from avalanche.training.storage_policy import ReservoirSamplingBuffer
 from avalanche.evaluation.metrics import (
diff --git a/examples/NEW_naive.py b/examples/naive.py
similarity index 96%
rename from examples/NEW_naive.py
rename to examples/naive.py
index 5aa29388a..91e895dac 100644
--- a/examples/NEW_naive.py
+++ b/examples/naive.py
@@ -9,7 +9,7 @@
 from avalanche.training.plugins import EvaluationPlugin
 from avalanche.benchmarks.classic import SplitMNIST
 from avalanche.logging import InteractiveLogger
-from avalanche.training.supervised.NEW_strategy_wrappers import (
+from avalanche.training.supervised import (
     Naive
 )
 

From 50f2683e30e2dca508fa973021f15f169bb14f53 Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Wed, 31 Aug 2022 17:38:06 +0200
Subject: [PATCH 06/10] Update meta-learning example w.r.t the new template

---
 avalanche/training/plugins/NEW_lamaml.py      | 251 ------------------
 avalanche/training/supervised/lamaml.py       |  93 +++----
 .../templates/update_type/meta_update.py      |   4 +-
 examples/NEW_lamaml.py                        |  73 -----
 examples/lamaml_cifar100.py                   |   2 +-
 5 files changed, 43 insertions(+), 380 deletions(-)
 delete mode 100644 avalanche/training/plugins/NEW_lamaml.py
 delete mode 100644 examples/NEW_lamaml.py

diff --git a/avalanche/training/plugins/NEW_lamaml.py b/avalanche/training/plugins/NEW_lamaml.py
deleted file mode 100644
index a44e8057d..000000000
--- a/avalanche/training/plugins/NEW_lamaml.py
+++ /dev/null
@@ -1,251 +0,0 @@
-from typing import TYPE_CHECKING
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-import math
-
-try:
-    import higher
-except ImportError:
-    raise ModuleNotFoundError("higher not found, if you want to use "
-                              "MAML please install avalanche with "
-                              "the extra dependencies: "
-                              "pip install avalanche-lib[extra]")
-
-from avalanche.NEW_core import BaseSGDPlugin
-from avalanche.models.utils import avalanche_forward
-
-
-class LaMAMLPlugin(BaseSGDPlugin):
-    """LaMAML Plugin.
-    """
-
-    def __init__(
-            self,
-            n_inner_updates: int = 5,
-            second_order: bool = True,
-            grad_clip_norm: float = 1.0,
-            learn_lr: bool = True,
-            lr_alpha: float = 0.25,
-            sync_update: bool = False,
-            alpha_init: float = 0.1,
-    ):
-        """Implementation of Look-ahead MAML (LaMAML) algorithm in Avalanche
-            using Higher library for applying fast updates.
-
-        :param n_inner_updates: number of inner updates.
-        :param second_order: If True, it computes the second-order derivative
-               of the inner update trajectory for the meta-loss. Otherwise,
-               it computes the meta-loss with a first-order approximation.
-        :param grad_clip_norm: gradient clipping norm.
-        :param learn_lr: if True, it learns the LR for each batch of data.
-        :param lr_alpha: LR for learning the main update's learning rate.
-        :param sync_update: if True, it updates the meta-model with a fixed
-                            learning rate. Mutually exclusive with learn_lr and
-                            lr_alpha.
-        :param alpha_init: initialization value for learnable LRs.
-
-        """
-
-        super().__init__()
-
-        self.n_inner_updates = n_inner_updates
-        self.second_order = second_order
-        self.grad_clip_norm = grad_clip_norm
-        self.learn_lr = learn_lr
-        self.lr_alpha = lr_alpha
-        self.sync_update = sync_update
-        self.alpha_init = alpha_init
-        self.alpha_params = None
-        self.is_model_initialized = False
-
-    def before_training(self, strategy, **kwargs):
-        if not self.is_model_initialized:
-            strategy.model.apply(init_kaiming_normal)
-            self.is_model_initialized = True
-
-    def before_training_exp(self, strategy, **kwargs):
-        # Initialize alpha-lr parameters
-        if self.alpha_params is None:
-            self.alpha_params = nn.ParameterList([])
-            # Iterate through model parameters and add the corresponding
-            # alpha_lr parameter
-            for p in strategy.model.parameters():
-                alpha_param = nn.Parameter(
-                    torch.ones(p.shape) * self.alpha_init, requires_grad=True
-                )
-                self.alpha_params.append(alpha_param)
-            self.alpha_params.to(strategy.device)
-
-            # Create optimizer for the alpha_lr parameters
-            self.optimizer_alpha = torch.optim.SGD(
-                self.alpha_params.parameters(), lr=self.lr_alpha
-            )
-
-        # For task-incremental heads:
-        # If new parameters are added to the model, update alpha_lr
-        # parameters respectively
-        if len(self.alpha_params) < len(list(strategy.model.parameters())):
-            for iter_p, p in enumerate(strategy.model.parameters()):
-                # Skip the older parameters
-                if iter_p < len(self.alpha_params):
-                    continue
-                # Add new alpha_lr for the new parameter
-                alpha_param = nn.Parameter(
-                    torch.ones(p.shape) * self.alpha_init, requires_grad=True
-                )
-                self.alpha_params.append(alpha_param)
-
-            self.alpha_params.to(strategy.device)
-            # Re-init optimizer for the new set of alpha_lr parameters
-            self.optimizer_alpha = torch.optim.SGD(
-                self.alpha_params.parameters(), lr=self.lr_alpha
-            )
-
-    def before_inner_updates(self, strategy, **kwargs):
-        # Create a stateless copy of the model for inner-updates
-        self.fast_model = higher.patch.monkeypatch(
-            strategy.model,
-            copy_initial_weights=True,
-            track_higher_grads=self.second_order,
-        )
-        if strategy.clock.train_exp_counter > 0:
-            self.batch_x = strategy.mb_x[: strategy.train_mb_size]
-            self.batch_y = strategy.mb_y[: strategy.train_mb_size]
-            self.batch_t = strategy.mb_task_id[: strategy.train_mb_size]
-        else:
-            self.batch_x = strategy.mb_x
-            self.batch_y = strategy.mb_y
-            self.batch_t = strategy.mb_task_id
-
-        bsize_data = self.batch_x.shape[0]
-        self.rough_sz = math.ceil(bsize_data / self.n_inner_updates)
-        self.meta_losses = [0 for _ in range(self.n_inner_updates)]
-
-    def single_inner_update(self, x, y, t, criterion):
-        logits = avalanche_forward(self.fast_model, x, t)
-        loss = criterion(logits, y)
-
-        # Compute gradient with respect to the current fast weights
-        grads = list(
-            torch.autograd.grad(
-                loss,
-                self.fast_model.fast_params,
-                create_graph=self.second_order,
-                retain_graph=self.second_order,
-                allow_unused=True,
-            )
-        )
-
-        # Clip grad norms
-        grads = [
-            torch.clamp(g, min=-self.grad_clip_norm, max=self.grad_clip_norm)
-            if g is not None
-            else g
-            for g in grads
-        ]
-
-        # New fast parameters
-        new_fast_params = [
-            param - alpha * grad if grad is not None else param
-            for (param, alpha, grad) in zip(
-                self.fast_model.fast_params, self.alpha_params.parameters(),
-                grads
-            )
-        ]
-
-        # Update fast model's weights
-        self.fast_model.update_params(new_fast_params)
-
-    def inner_updates(self, strategy, **kwargs):
-        """Update fast weights using current samples and
-                return the updated fast model.
-                """
-        for i in range(self.n_inner_updates):
-            batch_x_i = self.batch_x[i * self.rough_sz:
-                                     (i + 1) * self.rough_sz]
-            batch_y_i = self.batch_y[i * self.rough_sz:
-                                     (i + 1) * self.rough_sz]
-            batch_t_i = self.batch_t[i * self.rough_sz:
-                                     (i + 1) * self.rough_sz]
-
-            # We assume that samples for inner update are from the same task
-            self.single_inner_update(batch_x_i, batch_y_i, batch_t_i,
-                                     strategy._criterion)
-
-            # Compute meta-loss with the combination of batch and buffer samples
-            logits_meta = avalanche_forward(
-                self.fast_model, strategy.mb_x, strategy.mb_task_id
-            )
-            meta_loss = strategy._criterion(logits_meta, strategy.mb_y)
-            self.meta_losses[i] = meta_loss
-
-    def apply_grad(self, module, grads, device):
-        for i, p in enumerate(module.parameters()):
-            grad = grads[i]
-            if grad is None:
-                grad = torch.zeros(p.shape).float().to(device)
-
-            if p.grad is None:
-                p.grad = grad
-            else:
-                p.grad += grad
-
-    def outer_update(self, strategy, **kwargs):
-        # Compute meta-gradient for the main model
-        meta_loss = sum(self.meta_losses) / len(self.meta_losses)
-        meta_grad_model = torch.autograd.grad(
-            meta_loss,
-            self.fast_model.parameters(time=0),
-            retain_graph=True,
-            allow_unused=True,
-        )
-        strategy.model.zero_grad()
-        self.apply_grad(strategy.model, meta_grad_model, strategy.device)
-
-        # Clip gradients
-        torch.nn.utils.clip_grad_norm_(
-            strategy.model.parameters(), self.grad_clip_norm
-        )
-
-        if self.learn_lr:
-            # Compute meta-gradient for alpha-lr parameters
-            meta_grad_alpha = torch.autograd.grad(
-                meta_loss, self.alpha_params.parameters(), allow_unused=True
-            )
-            self.alpha_params.zero_grad()
-            self.apply_grad(self.alpha_params, meta_grad_alpha, strategy.device)
-
-            torch.nn.utils.clip_grad_norm_(
-                self.alpha_params.parameters(), self.grad_clip_norm
-            )
-            self.optimizer_alpha.step()
-
-        # If sync-update: update with self.optimizer
-        # o.w: use the learned LRs to update the model
-        if self.sync_update:
-            self.optimizer.step()
-        else:
-            for p, alpha in zip(
-                    strategy.model.parameters(), self.alpha_params.parameters()
-            ):
-                # Use relu on updated LRs to avoid negative values
-                p.data = p.data - p.grad * F.relu(alpha)
-
-        strategy.loss = meta_loss
-
-
-def init_kaiming_normal(m):
-    if isinstance(m, nn.Conv2d):
-        torch.nn.init.constant_(m.weight.data, 1.0)
-        torch.nn.init.kaiming_normal_(m.weight.data)
-        if m.bias is not None:
-            m.bias.data.zero_()
-
-    elif isinstance(m, nn.Linear):
-        torch.nn.init.constant_(m.weight.data, 1.0)
-        torch.nn.init.kaiming_normal_(m.weight.data)
-        if m.bias is not None:
-            m.bias.data.zero_()
diff --git a/avalanche/training/supervised/lamaml.py b/avalanche/training/supervised/lamaml.py
index e7852b029..b13c1148f 100644
--- a/avalanche/training/supervised/lamaml.py
+++ b/avalanche/training/supervised/lamaml.py
@@ -5,6 +5,7 @@
 import torch.nn.functional as F
 from torch.nn import Module, CrossEntropyLoss
 from torch.optim import Optimizer
+import math
 
 try:
     import higher
@@ -13,29 +14,14 @@
                               "MAML please install avalanche with "
                               "the extra dependencies: "
                               "pip install avalanche-lib[extra]")
-import math
 
 from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
 from avalanche.training.plugins.evaluation import default_evaluator
-from avalanche.training.templates import SupervisedTemplate
+from avalanche.training.templates import SupervisedMetaLearningTemplate
 from avalanche.models.utils import avalanche_forward
 
 
-def init_kaiming_normal(m):
-    if isinstance(m, nn.Conv2d):
-        torch.nn.init.constant_(m.weight.data, 1.0)
-        torch.nn.init.kaiming_normal_(m.weight.data)
-        if m.bias is not None:
-            m.bias.data.zero_()
-
-    elif isinstance(m, nn.Linear):
-        torch.nn.init.constant_(m.weight.data, 1.0)
-        torch.nn.init.kaiming_normal_(m.weight.data)
-        if m.bias is not None:
-            m.bias.data.zero_()
-
-
-class LaMAML(SupervisedTemplate):
+class LaMAML(SupervisedMetaLearningTemplate):
     def __init__(
         self,
         model: Module,
@@ -139,21 +125,18 @@ def _before_training_exp(self, **kwargs):
                 self.alpha_params.parameters(), lr=self.lr_alpha
             )
 
-    def training_epoch(self, **kwargs):
-        for self.mbatch in self.dataloader:
-            if self._stop_training:
-                break
-
-            self._unpack_minibatch()
-            self._before_training_iteration(**kwargs)
-            self.loss = 0
-
-            self.train_batch()
+    def apply_grad(self, module, grads):
+        for i, p in enumerate(module.parameters()):
+            grad = grads[i]
+            if grad is None:
+                grad = torch.zeros(p.shape).float().to(self.device)
 
-            self.mb_output = self.forward()
-            self._after_training_iteration(**kwargs)
+            if p.grad is None:
+                p.grad = grad
+            else:
+                p.grad += grad
 
-    def inner_update(self, fast_model, x, y, t):
+    def inner_update_step(self, fast_model, x, y, t):
         """Update fast weights using current samples and
         return the updated fast model.
         """
@@ -190,20 +173,9 @@ def inner_update(self, fast_model, x, y, t):
         # Update fast model's weights
         fast_model.update_params(new_fast_params)
 
-    def apply_grad(self, module, grads):
-        for i, p in enumerate(module.parameters()):
-            grad = grads[i]
-            if grad is None:
-                grad = torch.zeros(p.shape).float().to(self.device)
-
-            if p.grad is None:
-                p.grad = grad
-            else:
-                p.grad += grad
-
-    def train_batch(self):
+    def _inner_updates(self, **kwargs):
         # Create a stateless copy of the model for inner-updates
-        fast_model = higher.patch.monkeypatch(
+        self.fast_model = higher.patch.monkeypatch(
             self.model,
             copy_initial_weights=True,
             track_higher_grads=self.second_order,
@@ -217,28 +189,29 @@ def train_batch(self):
 
         bsize_data = batch_x.shape[0]
         rough_sz = math.ceil(bsize_data / self.n_inner_updates)
-        meta_losses = [0 for _ in range(self.n_inner_updates)]
+        self.meta_losses = [0 for _ in range(self.n_inner_updates)]
 
         for i in range(self.n_inner_updates):
-            batch_x_i = batch_x[i * rough_sz : (i + 1) * rough_sz]
-            batch_y_i = batch_y[i * rough_sz : (i + 1) * rough_sz]
-            batch_t_i = batch_t[i * rough_sz : (i + 1) * rough_sz]
+            batch_x_i = batch_x[i * rough_sz: (i + 1) * rough_sz]
+            batch_y_i = batch_y[i * rough_sz: (i + 1) * rough_sz]
+            batch_t_i = batch_t[i * rough_sz: (i + 1) * rough_sz]
 
             # We assume that samples for inner update are from the same task
-            self.inner_update(fast_model, batch_x_i, batch_y_i, batch_t_i)
+            self.inner_update_step(self.fast_model, batch_x_i, batch_y_i, batch_t_i)
 
             # Compute meta-loss with the combination of batch and buffer samples
             logits_meta = avalanche_forward(
-                fast_model, self.mb_x, self.mb_task_id
+                self.fast_model, self.mb_x, self.mb_task_id
             )
             meta_loss = self._criterion(logits_meta, self.mb_y)
-            meta_losses[i] = meta_loss
+            self.meta_losses[i] = meta_loss
 
+    def _outer_update(self, **kwargs):
         # Compute meta-gradient for the main model
-        meta_loss = sum(meta_losses) / len(meta_losses)
+        meta_loss = sum(self.meta_losses) / len(self.meta_losses)
         meta_grad_model = torch.autograd.grad(
             meta_loss,
-            fast_model.parameters(time=0),
+            self.fast_model.parameters(time=0),
             retain_graph=True,
             allow_unused=True,
         )
@@ -269,9 +242,23 @@ def train_batch(self):
             self.optimizer.step()
         else:
             for p, alpha in zip(
-                self.model.parameters(), self.alpha_params.parameters()
+                    self.model.parameters(), self.alpha_params.parameters()
             ):
                 # Use relu on updated LRs to avoid negative values
                 p.data = p.data - p.grad * F.relu(alpha)
 
         self.loss = meta_loss
+
+
+def init_kaiming_normal(m):
+    if isinstance(m, nn.Conv2d):
+        torch.nn.init.constant_(m.weight.data, 1.0)
+        torch.nn.init.kaiming_normal_(m.weight.data)
+        if m.bias is not None:
+            m.bias.data.zero_()
+
+    elif isinstance(m, nn.Linear):
+        torch.nn.init.constant_(m.weight.data, 1.0)
+        torch.nn.init.kaiming_normal_(m.weight.data)
+        if m.bias is not None:
+            m.bias.data.zero_()
diff --git a/avalanche/training/templates/update_type/meta_update.py b/avalanche/training/templates/update_type/meta_update.py
index cc1090385..d387db9c0 100644
--- a/avalanche/training/templates/update_type/meta_update.py
+++ b/avalanche/training/templates/update_type/meta_update.py
@@ -36,7 +36,7 @@ def _before_inner_updates(self, **kwargs):
         trigger_plugins(self, "before_inner_updates", **kwargs)
 
     def _inner_updates(self, **kwargs):
-        trigger_plugins(self, "inner_updates", **kwargs)
+        raise NotImplementedError()
 
     def _after_inner_updates(self, **kwargs):
         trigger_plugins(self, "after_inner_updates", **kwargs)
@@ -45,7 +45,7 @@ def _before_outer_update(self, **kwargs):
         trigger_plugins(self, "before_outer_update", **kwargs)
 
     def _outer_update(self, **kwargs):
-        trigger_plugins(self, "outer_update", **kwargs)
+        raise NotImplementedError()
 
     def _after_outer_update(self, **kwargs):
         trigger_plugins(self, "after_outer_update", **kwargs)
diff --git a/examples/NEW_lamaml.py b/examples/NEW_lamaml.py
deleted file mode 100644
index 5e30e303e..000000000
--- a/examples/NEW_lamaml.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import torch
-from os.path import expanduser
-
-from avalanche.models import MTSimpleMLP
-from avalanche.evaluation.metrics import (
-    accuracy_metrics,
-    loss_metrics,
-)
-from avalanche.training.plugins import EvaluationPlugin
-from avalanche.benchmarks.classic import SplitMNIST
-from avalanche.logging import InteractiveLogger
-from avalanche.training.templates.common_templates import (
-    SupervisedMetaLearningTemplate
-)
-from avalanche.training.plugins.NEW_lamaml import LaMAMLPlugin
-
-
-def main():
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"Using device: {device}")
-
-    scenario = SplitMNIST(
-        n_experiences=5,
-        dataset_root=expanduser("~") + "/.avalanche/data/mnist/",
-        return_task_id=True
-    )
-
-    # choose some metrics and evaluation method
-    interactive_logger = InteractiveLogger()
-    eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
-        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
-        loggers=[interactive_logger],
-    )
-
-    model = MTSimpleMLP(hidden_size=128)
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
-    criterion = torch.nn.CrossEntropyLoss()
-
-    # LaMAML plugin
-    lamaml_plugin = LaMAMLPlugin(
-        n_inner_updates=5,
-        second_order=True,
-        grad_clip_norm=1.0,
-        learn_lr=True,
-        lr_alpha=0.25,
-        sync_update=False,
-        alpha_init=0.1,
-    )
-
-    # create strategy
-    strategy = SupervisedMetaLearningTemplate(
-        model,
-        optimizer,
-        criterion,
-        train_epochs=1,
-        device=device,
-        train_mb_size=32,
-        evaluator=eval_plugin,
-        plugins=[lamaml_plugin]
-    )
-
-    # train on the selected scenario with the chosen strategy
-    for experience in scenario.train_stream:
-        print("Start training on experience ", experience.current_experience)
-        strategy.train(experience)
-        strategy.eval(scenario.test_stream[:])
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/lamaml_cifar100.py b/examples/lamaml_cifar100.py
index 7ac903238..5e713e972 100644
--- a/examples/lamaml_cifar100.py
+++ b/examples/lamaml_cifar100.py
@@ -77,7 +77,7 @@ def main(args):
         lr_alpha=0.25,
         sync_update=False,
         train_mb_size=10,
-        train_epochs=10,
+        train_epochs=1,
         eval_mb_size=100,
         device=device,
         plugins=[replay_plugin],

From a66a6a36c3512094689da8729520fda1ac98ba5f Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Wed, 31 Aug 2022 18:50:12 +0200
Subject: [PATCH 07/10] Add `model_adaptation` to `observation_type`
 implementation

---
 avalanche/training/supervised/ar1.py          |  2 +-
 avalanche/training/supervised/cumulative.py   |  2 +-
 avalanche/training/supervised/deep_slda.py    |  2 +-
 avalanche/training/supervised/icarl.py        |  2 +-
 .../training/supervised/joint_training.py     |  2 +-
 avalanche/training/supervised/lamaml.py       |  3 ++-
 .../training/supervised/strategy_wrappers.py  |  2 +-
 .../supervised/strategy_wrappers_online.py    |  2 +-
 avalanche/training/templates/base_sgd.py      | 12 +--------
 .../observation_type/batch_observation.py     | 11 ++++++++
 .../observation_type/online_observation.py    | 27 +++++++++++++++++++
 examples/lamaml_cifar100.py                   |  3 ++-
 tests/training/test_online_strategies.py      |  7 +++--
 13 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/avalanche/training/supervised/ar1.py b/avalanche/training/supervised/ar1.py
index ceaa06fb7..a514c57e8 100644
--- a/avalanche/training/supervised/ar1.py
+++ b/avalanche/training/supervised/ar1.py
@@ -16,7 +16,7 @@
     SynapticIntelligencePlugin,
     CWRStarPlugin,
 )
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 from avalanche.training.utils import (
     replace_bn_with_brn,
     get_last_fc_layer,
diff --git a/avalanche/training/supervised/cumulative.py b/avalanche/training/supervised/cumulative.py
index a143fcad1..28123c809 100644
--- a/avalanche/training/supervised/cumulative.py
+++ b/avalanche/training/supervised/cumulative.py
@@ -7,7 +7,7 @@
 from avalanche.benchmarks.utils import AvalancheConcatDataset
 from avalanche.training.plugins.evaluation import default_evaluator
 from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 
 
 class Cumulative(SupervisedTemplate):
diff --git a/avalanche/training/supervised/deep_slda.py b/avalanche/training/supervised/deep_slda.py
index 06446d86d..676652636 100644
--- a/avalanche/training/supervised/deep_slda.py
+++ b/avalanche/training/supervised/deep_slda.py
@@ -5,7 +5,7 @@
 import torch
 
 from avalanche.training.plugins import SupervisedPlugin
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 from avalanche.training.plugins.evaluation import default_evaluator
 from avalanche.models.dynamic_modules import MultiTaskModule
 from avalanche.models import FeatureExtractorBackbone
diff --git a/avalanche/training/supervised/icarl.py b/avalanche/training/supervised/icarl.py
index 3b559fae9..0f5cd63a6 100644
--- a/avalanche/training/supervised/icarl.py
+++ b/avalanche/training/supervised/icarl.py
@@ -18,7 +18,7 @@
 from avalanche.training.plugins.strategy_plugin import SupervisedPlugin
 from torch.nn import Module
 from torch.utils.data import DataLoader
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 
 
 class ICaRL(SupervisedTemplate):
diff --git a/avalanche/training/supervised/joint_training.py b/avalanche/training/supervised/joint_training.py
index d84627151..335f5db43 100644
--- a/avalanche/training/supervised/joint_training.py
+++ b/avalanche/training/supervised/joint_training.py
@@ -17,7 +17,7 @@
 from avalanche.benchmarks.scenarios import ClassificationExperience
 from avalanche.benchmarks.utils import AvalancheConcatDataset
 from avalanche.training.plugins.evaluation import default_evaluator
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 from avalanche.models import DynamicModule
 
 if TYPE_CHECKING:
diff --git a/avalanche/training/supervised/lamaml.py b/avalanche/training/supervised/lamaml.py
index b13c1148f..431f3f55c 100644
--- a/avalanche/training/supervised/lamaml.py
+++ b/avalanche/training/supervised/lamaml.py
@@ -197,7 +197,8 @@ def _inner_updates(self, **kwargs):
             batch_t_i = batch_t[i * rough_sz: (i + 1) * rough_sz]
 
             # We assume that samples for inner update are from the same task
-            self.inner_update_step(self.fast_model, batch_x_i, batch_y_i, batch_t_i)
+            self.inner_update_step(self.fast_model, batch_x_i, batch_y_i,
+                                   batch_t_i)
 
             # Compute meta-loss with the combination of batch and buffer samples
             logits_meta = avalanche_forward(
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 713ba8c7c..195c75ce1 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -34,7 +34,7 @@
     MASPlugin,
 )
 from avalanche.training.templates.base import BaseTemplate
-from avalanche.training.templates.common_templates import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 from avalanche.models.generator import MlpVAE, VAE_loss
 from avalanche.logging import InteractiveLogger
 
diff --git a/avalanche/training/supervised/strategy_wrappers_online.py b/avalanche/training/supervised/strategy_wrappers_online.py
index 24464690e..d757e2401 100644
--- a/avalanche/training/supervised/strategy_wrappers_online.py
+++ b/avalanche/training/supervised/strategy_wrappers_online.py
@@ -15,7 +15,7 @@
 
 from avalanche.training.plugins.evaluation import default_evaluator
 from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
-from avalanche.training.templates.common_templates import (
+from avalanche.training.templates import (
     OnlineSupervisedTemplate,
 )
 
diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py
index 41c2f57e5..46813a9ed 100644
--- a/avalanche/training/templates/base_sgd.py
+++ b/avalanche/training/templates/base_sgd.py
@@ -47,7 +47,7 @@ def __init__(
         eval_mb_size: Optional[int] = 1,
         device="cpu",
         plugins: Optional[List["SupervisedPlugin"]] = None,
-        evaluator: EvaluationPlugin = default_evaluator,
+        evaluator: EvaluationPlugin = default_evaluator(),
         eval_every=-1,
         peval_mode="epoch",
     ):
@@ -347,16 +347,6 @@ def eval_dataset_adaptation(self, **kwargs):
         self.adapted_dataset = self.experience.dataset
         self.adapted_dataset = self.adapted_dataset.eval()
 
-    def model_adaptation(self, model=None):
-        """Adapts the model to the current data.
-
-        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
-        """
-        if model is None:
-            model = self.model
-        avalanche_model_adaptation(model, self.experience)
-        return model.to(self.device)
-
     def _unpack_minibatch(self):
         """Move to device"""
         # First verify the mini-batch
diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py
index 4f67ef91b..35b887d11 100644
--- a/avalanche/training/templates/observation_type/batch_observation.py
+++ b/avalanche/training/templates/observation_type/batch_observation.py
@@ -2,6 +2,7 @@
 
 from avalanche.benchmarks import CLExperience
 from avalanche.models.dynamic_optimizers import reset_optimizer
+from avalanche.models.utils import avalanche_model_adaptation
 
 
 class BatchObservation:
@@ -31,6 +32,16 @@ def _train_exp(
             self.training_epoch(**kwargs)
             self._after_training_epoch(**kwargs)
 
+    def model_adaptation(self, model=None):
+        """Adapts the model to the current data.
+
+        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
+        """
+        if model is None:
+            model = self.model
+        avalanche_model_adaptation(model, self.experience)
+        return model.to(self.device)
+
     def make_optimizer(self):
         """Optimizer initialization.
 
diff --git a/avalanche/training/templates/observation_type/online_observation.py b/avalanche/training/templates/observation_type/online_observation.py
index faf98dca0..e542375a4 100644
--- a/avalanche/training/templates/observation_type/online_observation.py
+++ b/avalanche/training/templates/observation_type/online_observation.py
@@ -3,6 +3,7 @@
 from avalanche.benchmarks import OnlineCLExperience
 from avalanche.models.dynamic_optimizers import reset_optimizer
 from avalanche.models.dynamic_optimizers import update_optimizer
+from avalanche.models.utils import avalanche_model_adaptation
 
 
 class OnlineObservation:
@@ -44,6 +45,32 @@ def make_optimizer(self):
                              self.model.parameters(),
                              reset_state=False)
 
+    def model_adaptation(self, model=None):
+        """Adapts the model to the current data.
+
+        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
+        """
+        if model is None:
+            model = self.model
+
+        # For training:
+        if isinstance(self.experience, OnlineCLExperience):
+            # If the strategy has access to task boundaries, adapt the model
+            # for the whole origin experience to add the
+            if self.experience.access_task_boundaries:
+                avalanche_model_adaptation(model,
+                                           self.experience.origin_experience)
+            else:
+                self.model_params_before_adaptation = list(model.parameters())
+                avalanche_model_adaptation(model, self.experience)
+
+        # For evaluation, the experience is not necessarily an online
+        # experience:
+        else:
+            avalanche_model_adaptation(model, self.experience)
+
+        return model.to(self.device)
+
     def maybe_adapt_model_and_make_optimizer(self):
         # If strategy has access to the task boundaries, and the current
         # sub-experience is the first sub-experience in the online (sub-)stream,
diff --git a/examples/lamaml_cifar100.py b/examples/lamaml_cifar100.py
index 5e713e972..a85b7ba0b 100644
--- a/examples/lamaml_cifar100.py
+++ b/examples/lamaml_cifar100.py
@@ -31,7 +31,8 @@ def main(args):
     )
 
     # --- SCENARIO CREATION
-    scenario = SplitCIFAR100(n_experiences=20, return_task_id=True)
+    scenario = SplitCIFAR100(n_experiences=20, return_task_id=True,
+                             class_ids_from_zero_in_each_exp=True)
     config = {"scenario": "SplitCIFAR100"}
 
     # MODEL CREATION
diff --git a/tests/training/test_online_strategies.py b/tests/training/test_online_strategies.py
index e5cf6c3eb..fdced8935 100644
--- a/tests/training/test_online_strategies.py
+++ b/tests/training/test_online_strategies.py
@@ -10,6 +10,7 @@
 from avalanche.benchmarks.scenarios.online_scenario import OnlineCLScenario
 from avalanche.training import OnlineNaive
 from tests.unit_tests_utils import get_fast_benchmark
+from avalanche.training.plugins.evaluation import default_evaluator
 
 
 class StrategyTest(unittest.TestCase):
@@ -51,7 +52,8 @@ def test_naive(self):
             criterion,
             train_mb_size=1,
             device=self.device,
-            eval_mb_size=50
+            eval_mb_size=50,
+            evaluator=default_evaluator(),
         )
         ocl_benchmark = OnlineCLScenario(benchmark_streams,
                                          access_task_boundaries=True)
@@ -65,7 +67,8 @@ def test_naive(self):
             criterion,
             train_mb_size=1,
             device=self.device,
-            eval_mb_size=50
+            eval_mb_size=50,
+            evaluator=default_evaluator(),
         )
         ocl_benchmark = OnlineCLScenario(benchmark_streams,
                                          access_task_boundaries=False)

From 1cd1924249118a9e377cf28013efbba46b37eaee Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Fri, 30 Sep 2022 16:34:54 +0200
Subject: [PATCH 08/10] Update imports and delete old template files

---
 avalanche/evaluation/metric_definitions.py    |   2 +-
 avalanche/evaluation/metric_utils.py          |   2 +-
 avalanche/evaluation/metrics/checkpoint.py    |   2 +-
 .../evaluation/metrics/confusion_matrix.py    |   2 +-
 .../evaluation/metrics/forgetting_bwt.py      |   2 +-
 .../evaluation/metrics/forward_transfer.py    |   2 +-
 avalanche/evaluation/metrics/gpu_usage.py     |   2 +-
 .../evaluation/metrics/images_samples.py      |   2 +-
 .../evaluation/metrics/labels_repartition.py  |   2 +-
 avalanche/evaluation/metrics/mean_scores.py   |   2 +-
 avalanche/evaluation/metrics/ram_usage.py     |   2 +-
 avalanche/evaluation/metrics/timing.py        |   2 +-
 avalanche/logging/interactive_logging.py      |   6 +-
 avalanche/logging/wandb_logger.py             |   2 +-
 avalanche/training/plugins/evaluation.py      |   2 +-
 avalanche/training/plugins/gdumb.py           |   2 +-
 avalanche/training/plugins/gss_greedy.py      |   2 +-
 avalanche/training/plugins/lr_scheduling.py   |   2 +-
 avalanche/training/plugins/replay.py          |   2 +-
 .../training/plugins/synaptic_intelligence.py |   2 +-
 avalanche/training/storage_policy.py          |   2 +-
 .../training/templates/base_online_sgd.py     | 386 ------------------
 .../training/templates/online_supervised.py   | 341 ----------------
 avalanche/training/templates/supervised.py    | 311 --------------
 tests/training/test_replay.py                 |   2 +-
 tests/training/test_strategies.py             |   2 +-
 26 files changed, 27 insertions(+), 1061 deletions(-)
 delete mode 100644 avalanche/training/templates/base_online_sgd.py
 delete mode 100644 avalanche/training/templates/online_supervised.py
 delete mode 100644 avalanche/training/templates/supervised.py

diff --git a/avalanche/evaluation/metric_definitions.py b/avalanche/evaluation/metric_definitions.py
index 78defbbd8..a27f60794 100644
--- a/avalanche/evaluation/metric_definitions.py
+++ b/avalanche/evaluation/metric_definitions.py
@@ -18,7 +18,7 @@
 
 if TYPE_CHECKING:
     from .metric_results import MetricResult
-    from ..training.templates.supervised import SupervisedTemplate
+    from ..training.templates import SupervisedTemplate
 
 TResult = TypeVar("TResult")
 TAggregated = TypeVar("TAggregated", bound="PluginMetric")
diff --git a/avalanche/evaluation/metric_utils.py b/avalanche/evaluation/metric_utils.py
index 2e3ca3fbc..a2a53351b 100644
--- a/avalanche/evaluation/metric_utils.py
+++ b/avalanche/evaluation/metric_utils.py
@@ -19,7 +19,7 @@
 from torch import Tensor
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
     from avalanche.benchmarks.scenarios import ClassificationExperience
     from avalanche.evaluation import PluginMetric
 
diff --git a/avalanche/evaluation/metrics/checkpoint.py b/avalanche/evaluation/metrics/checkpoint.py
index 08b1c8ce8..ed3f0feb2 100644
--- a/avalanche/evaluation/metrics/checkpoint.py
+++ b/avalanche/evaluation/metrics/checkpoint.py
@@ -19,7 +19,7 @@
 from avalanche.evaluation.metric_utils import get_metric_name
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class WeightCheckpoint(PluginMetric[Tensor]):
diff --git a/avalanche/evaluation/metrics/confusion_matrix.py b/avalanche/evaluation/metrics/confusion_matrix.py
index 88a3bc419..a0910104f 100644
--- a/avalanche/evaluation/metrics/confusion_matrix.py
+++ b/avalanche/evaluation/metrics/confusion_matrix.py
@@ -41,7 +41,7 @@
 )
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class ConfusionMatrix(Metric[Tensor]):
diff --git a/avalanche/evaluation/metrics/forgetting_bwt.py b/avalanche/evaluation/metrics/forgetting_bwt.py
index d0652a3d0..2abfae036 100644
--- a/avalanche/evaluation/metrics/forgetting_bwt.py
+++ b/avalanche/evaluation/metrics/forgetting_bwt.py
@@ -21,7 +21,7 @@
 )
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class Forgetting(Metric[Union[float, None, Dict[int, float]]]):
diff --git a/avalanche/evaluation/metrics/forward_transfer.py b/avalanche/evaluation/metrics/forward_transfer.py
index f6eb934bd..fdd41b482 100644
--- a/avalanche/evaluation/metrics/forward_transfer.py
+++ b/avalanche/evaluation/metrics/forward_transfer.py
@@ -21,7 +21,7 @@
 )
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class ForwardTransfer(Metric[Union[float, None, Dict[int, float]]]):
diff --git a/avalanche/evaluation/metrics/gpu_usage.py b/avalanche/evaluation/metrics/gpu_usage.py
index 7ae1f4648..6304a6213 100644
--- a/avalanche/evaluation/metrics/gpu_usage.py
+++ b/avalanche/evaluation/metrics/gpu_usage.py
@@ -20,7 +20,7 @@
 from avalanche.evaluation.metric_results import MetricResult
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class MaxGPU(Metric[float]):
diff --git a/avalanche/evaluation/metrics/images_samples.py b/avalanche/evaluation/metrics/images_samples.py
index cb568dd29..d64f6c3f1 100644
--- a/avalanche/evaluation/metrics/images_samples.py
+++ b/avalanche/evaluation/metrics/images_samples.py
@@ -21,7 +21,7 @@
 
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
     from avalanche.benchmarks.utils import AvalancheDataset
 
 
diff --git a/avalanche/evaluation/metrics/labels_repartition.py b/avalanche/evaluation/metrics/labels_repartition.py
index 2b85f855c..436e40849 100644
--- a/avalanche/evaluation/metrics/labels_repartition.py
+++ b/avalanche/evaluation/metrics/labels_repartition.py
@@ -26,7 +26,7 @@
 
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
     from avalanche.evaluation.metric_results import MetricResult
 
 
diff --git a/avalanche/evaluation/metrics/mean_scores.py b/avalanche/evaluation/metrics/mean_scores.py
index bdc422f83..d4d927891 100644
--- a/avalanche/evaluation/metrics/mean_scores.py
+++ b/avalanche/evaluation/metrics/mean_scores.py
@@ -32,7 +32,7 @@
     from typing_extensions import Literal
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
     from avalanche.evaluation.metric_results import MetricResult
 
 
diff --git a/avalanche/evaluation/metrics/ram_usage.py b/avalanche/evaluation/metrics/ram_usage.py
index 1dd022469..b358248b5 100644
--- a/avalanche/evaluation/metrics/ram_usage.py
+++ b/avalanche/evaluation/metrics/ram_usage.py
@@ -19,7 +19,7 @@
 from avalanche.evaluation.metric_results import MetricResult
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class MaxRAM(Metric[float]):
diff --git a/avalanche/evaluation/metrics/timing.py b/avalanche/evaluation/metrics/timing.py
index 2704fc4c0..eb09ca8c2 100644
--- a/avalanche/evaluation/metrics/timing.py
+++ b/avalanche/evaluation/metrics/timing.py
@@ -18,7 +18,7 @@
 from avalanche.evaluation.metrics.mean import Mean
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class ElapsedTime(Metric[float]):
diff --git a/avalanche/logging/interactive_logging.py b/avalanche/logging/interactive_logging.py
index dac6699c5..03dee5461 100644
--- a/avalanche/logging/interactive_logging.py
+++ b/avalanche/logging/interactive_logging.py
@@ -19,7 +19,7 @@
 from tqdm import tqdm
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class InteractiveLogger(TextLogger, SupervisedPlugin):
@@ -61,6 +61,8 @@ def before_training_epoch(
         metric_values: List["MetricValue"],
         **kwargs
     ):
+        if isinstance(strategy.experience, OnlineCLExperience):
+            return
         super().before_training_epoch(strategy, metric_values, **kwargs)
         self._progress.total = len(strategy.dataloader)
 
@@ -70,6 +72,8 @@ def after_training_epoch(
         metric_values: List["MetricValue"],
         **kwargs
     ):
+        if isinstance(strategy.experience, OnlineCLExperience):
+            return
         self._end_progress()
         super().after_training_epoch(strategy, metric_values, **kwargs)
 
diff --git a/avalanche/logging/wandb_logger.py b/avalanche/logging/wandb_logger.py
index dd523916d..5c4902992 100644
--- a/avalanche/logging/wandb_logger.py
+++ b/avalanche/logging/wandb_logger.py
@@ -34,7 +34,7 @@
 
 if TYPE_CHECKING:
     from avalanche.evaluation.metric_results import MetricValue
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class WandBLogger(BaseLogger, SupervisedPlugin):
diff --git a/avalanche/training/plugins/evaluation.py b/avalanche/training/plugins/evaluation.py
index 7e092534a..10e8eadfc 100644
--- a/avalanche/training/plugins/evaluation.py
+++ b/avalanche/training/plugins/evaluation.py
@@ -10,7 +10,7 @@
 if TYPE_CHECKING:
     from avalanche.evaluation import PluginMetric
     from avalanche.logging import BaseLogger
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class EvaluationPlugin:
diff --git a/avalanche/training/plugins/gdumb.py b/avalanche/training/plugins/gdumb.py
index 85c921be5..be44c8cdc 100644
--- a/avalanche/training/plugins/gdumb.py
+++ b/avalanche/training/plugins/gdumb.py
@@ -5,7 +5,7 @@
 from avalanche.training.storage_policy import ClassBalancedBuffer
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class GDumbPlugin(SupervisedPlugin):
diff --git a/avalanche/training/plugins/gss_greedy.py b/avalanche/training/plugins/gss_greedy.py
index 30121dc23..6d1914755 100644
--- a/avalanche/training/plugins/gss_greedy.py
+++ b/avalanche/training/plugins/gss_greedy.py
@@ -6,7 +6,7 @@
 from avalanche.training.plugins.strategy_plugin import SupervisedPlugin
 
 if TYPE_CHECKING:
-    from ..templates.supervised import SupervisedTemplate
+    from ..templates import SupervisedTemplate
 
 
 class GSS_greedyPlugin(SupervisedPlugin):
diff --git a/avalanche/training/plugins/lr_scheduling.py b/avalanche/training/plugins/lr_scheduling.py
index 46c29ba63..e288ff915 100644
--- a/avalanche/training/plugins/lr_scheduling.py
+++ b/avalanche/training/plugins/lr_scheduling.py
@@ -8,7 +8,7 @@
 import inspect
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class LRSchedulerPlugin(SupervisedPlugin):
diff --git a/avalanche/training/plugins/replay.py b/avalanche/training/plugins/replay.py
index 62add9f4d..0b8946cb1 100644
--- a/avalanche/training/plugins/replay.py
+++ b/avalanche/training/plugins/replay.py
@@ -9,7 +9,7 @@
 )
 
 if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class ReplayPlugin(SupervisedPlugin):
diff --git a/avalanche/training/plugins/synaptic_intelligence.py b/avalanche/training/plugins/synaptic_intelligence.py
index 79379fce0..6efd4da42 100644
--- a/avalanche/training/plugins/synaptic_intelligence.py
+++ b/avalanche/training/plugins/synaptic_intelligence.py
@@ -13,7 +13,7 @@
 from avalanche.training.utils import get_layers_and_params
 
 if TYPE_CHECKING:
-    from ..templates.supervised import SupervisedTemplate
+    from ..templates import SupervisedTemplate
 
 SynDataType = Dict[str, Dict[str, Tensor]]
 
diff --git a/avalanche/training/storage_policy.py b/avalanche/training/storage_policy.py
index 576d92540..7a1cbd262 100644
--- a/avalanche/training/storage_policy.py
+++ b/avalanche/training/storage_policy.py
@@ -16,7 +16,7 @@
 from avalanche.models import FeatureExtractorBackbone
 
 if TYPE_CHECKING:
-    from .templates.supervised import SupervisedTemplate
+    from .templates import SupervisedTemplate
 
 
 class ExemplarsBuffer(ABC):
diff --git a/avalanche/training/templates/base_online_sgd.py b/avalanche/training/templates/base_online_sgd.py
deleted file mode 100644
index 176547932..000000000
--- a/avalanche/training/templates/base_online_sgd.py
+++ /dev/null
@@ -1,386 +0,0 @@
-from typing import Iterable, Sequence, Optional, Union, List
-
-import torch
-from torch.nn import Module
-from torch.optim import Optimizer
-
-from avalanche.benchmarks import CLExperience, CLStream
-from avalanche.core import BaseSGDPlugin
-from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
-from avalanche.training.plugins.clock import Clock
-from avalanche.training.plugins.evaluation import default_evaluator
-from avalanche.training.templates.base import BaseTemplate, ExpSequence
-
-from typing import TYPE_CHECKING
-
-from avalanche.training.utils import trigger_plugins
-
-if TYPE_CHECKING:
-    from avalanche.training.templates.supervised import SupervisedTemplate
-
-
-class BaseOnlineSGDTemplate(BaseTemplate):
-    """Base class for continual learning skeletons.
-
-    **Training loop**
-    The training loop is organized as follows::
-
-        train
-            train_exp  # for each experience
-
-    **Evaluation loop**
-    The evaluation loop is organized as follows::
-
-        eval
-            eval_exp  # for each experience
-
-    """
-
-    PLUGIN_CLASS = BaseSGDPlugin
-
-    def __init__(
-        self,
-        model: Module,
-        optimizer: Optimizer,
-        train_mb_size: int = 1,
-        train_passes: int = 1,
-        eval_mb_size: Optional[int] = 1,
-        device="cpu",
-        plugins: Optional[List["SupervisedPlugin"]] = None,
-        evaluator: EvaluationPlugin = default_evaluator(),
-        eval_every=-1,
-        peval_mode="experience",
-    ):
-        """Init.
-
-        :param model: PyTorch model.
-        :param optimizer: PyTorch optimizer.
-        :param train_mb_size: mini-batch size for training.
-        :param train_passes: number of training passes.
-        :param eval_mb_size: mini-batch size for eval.
-        :param evaluator: (optional) instance of EvaluationPlugin for logging
-            and metric computations. None to remove logging.
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop. -1 disables the evaluation. 0 means `eval` is called
-            only at the end of the learning experience. Values >0 mean
-            that `eval` is called every `eval_every` experience and at the end
-            of the learning experience.
-        :param peval_mode: one of {'experience', 'iteration'}. Decides whether
-            the periodic evaluation during training should execute every
-            `eval_every` experiences or iterations (Default='experience').
-        """
-        super().__init__(model=model, device=device, plugins=plugins)
-
-        self.optimizer: Optimizer = optimizer
-        """ PyTorch optimizer. """
-
-        self.train_passes: int = train_passes
-        """ Number of training passes. """
-
-        self.train_mb_size: int = train_mb_size
-        """ Training mini-batch size. """
-
-        self.eval_mb_size: int = (
-            train_mb_size if eval_mb_size is None else eval_mb_size
-        )
-        """ Eval mini-batch size. """
-
-        if evaluator is None:
-            evaluator = EvaluationPlugin()
-        self.plugins.append(evaluator)
-        self.evaluator = evaluator
-        """ EvaluationPlugin used for logging and metric computations. """
-
-        # Configure periodic evaluation.
-        assert peval_mode in {"experience", "iteration"}
-        self.eval_every = eval_every
-        peval = PeriodicEval(eval_every, peval_mode)
-        self.plugins.append(peval)
-
-        self.clock = Clock()
-        """ Incremental counters for strategy events. """
-        # WARNING: Clock needs to be the last plugin, otherwise
-        # counters will be wrong for plugins called after it.
-        self.plugins.append(self.clock)
-
-        ###################################################################
-        # State variables. These are updated during the train/eval loops. #
-        ###################################################################
-
-        self.dataloader = None
-        """ Dataloader. """
-
-        self.mbatch = None
-        """ Current mini-batch. """
-
-        self.mb_output = None
-        """ Model's output computed on the current mini-batch. """
-
-        self.loss = None
-        """ Loss of the current mini-batch. """
-
-        self._stop_training = False
-
-    def train(self,
-              experiences: Union[CLExperience,
-                                 ExpSequence],
-              eval_streams: Optional[Sequence[Union[CLExperience,
-                                                    ExpSequence]]] = None,
-              **kwargs):
-        super().train(experiences, eval_streams, **kwargs)
-        return self.evaluator.get_last_metrics()
-
-    @torch.no_grad()
-    def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs):
-        """
-        Evaluate the current model on a series of experiences and
-        returns the last recorded value for each metric.
-
-        :param exp_list: CL experience information.
-        :param kwargs: custom arguments.
-
-        :return: dictionary containing last recorded value for
-            each metric name
-        """
-        super().eval(exp_list, **kwargs)
-        return self.evaluator.get_last_metrics()
-
-    def _before_training_exp(self, **kwargs):
-        self.make_train_dataloader(**kwargs)
-        # Model Adaptation (e.g. freeze/add new units)
-
-        # If strategy has access to the task boundaries, and the current
-        # sub-experience is the first sub-experience in the online (sub-)stream,
-        # then adapt the model with the full origin experience:
-        if self.experience.access_task_boundaries:
-            if self.experience.is_first_subexp:
-                self.model = self.model_adaptation()
-                self.make_optimizer()
-        # Otherwise, adapt to the current sub-experience:
-        else:
-            self.model = self.model_adaptation()
-            self.make_optimizer()
-
-        super()._before_training_exp(**kwargs)
-
-    def _train_exp(
-        self, experience: CLExperience, eval_streams=None, **kwargs
-    ):
-        """Training loop over a single Experience object.
-
-        :param experience: CL experience information.
-        :param eval_streams: list of streams for evaluation.
-            If None: use the training experience for evaluation.
-            Use [] if you do not want to evaluate during training.
-        :param kwargs: custom arguments.
-        """
-        if eval_streams is None:
-            eval_streams = [experience]
-        for i, exp in enumerate(eval_streams):
-            if not isinstance(exp, Iterable):
-                eval_streams[i] = [exp]
-
-        self.training_pass(**kwargs)
-
-    def _before_eval_exp(self, **kwargs):
-        self.make_eval_dataloader(**kwargs)
-        # Model Adaptation (e.g. freeze/add new units)
-        self.model = self.model_adaptation()
-        super()._before_eval_exp(**kwargs)
-
-    def _eval_exp(self, **kwargs):
-        self.eval_epoch(**kwargs)
-
-    def make_train_dataloader(self, **kwargs):
-        """Assign dataloader to self.dataloader."""
-        raise NotImplementedError()
-
-    def make_eval_dataloader(self, **kwargs):
-        """Assign dataloader to self.dataloader."""
-        raise NotImplementedError()
-
-    def make_optimizer(self, **kwargs):
-        """Optimizer initialization."""
-        raise NotImplementedError()
-
-    def criterion(self):
-        """Compute loss function."""
-        raise NotImplementedError()
-
-    def forward(self):
-        """Compute the model's output given the current mini-batch."""
-        raise NotImplementedError()
-
-    def model_adaptation(self, model=None):
-        """Adapts the model to the current experience."""
-        raise NotImplementedError()
-
-    def stop_training(self):
-        """Signals to stop training at the next iteration."""
-        self._stop_training = True
-
-    def training_pass(self, **kwargs):
-        """Training pass.
-
-        :param kwargs:
-        :return:
-        """
-        for self.pass_itr in range(self.train_passes):
-            for self.mbatch in self.dataloader:
-                if self._stop_training:
-                    break
-
-                self._unpack_minibatch()
-                self._before_training_iteration(**kwargs)
-
-                self.optimizer.zero_grad()
-                self.loss = 0
-
-                # Forward
-                self._before_forward(**kwargs)
-                self.mb_output = self.forward()
-                self._after_forward(**kwargs)
-
-                # Loss & Backward
-                self.loss += self.criterion()
-
-                self._before_backward(**kwargs)
-                self.backward()
-                self._after_backward(**kwargs)
-
-                # Optimization step
-                self._before_update(**kwargs)
-                self.optimizer_step()
-                self._after_update(**kwargs)
-
-                self._after_training_iteration(**kwargs)
-
-    def backward(self):
-        """Run the backward pass."""
-        self.loss.backward()
-
-    def optimizer_step(self):
-        """Execute the optimizer step (weights update)."""
-        self.optimizer.step()
-
-    def eval_epoch(self, **kwargs):
-        """Evaluation loop over the current `self.dataloader`."""
-        for self.mbatch in self.dataloader:
-            self._unpack_minibatch()
-            self._before_eval_iteration(**kwargs)
-
-            self._before_eval_forward(**kwargs)
-            self.mb_output = self.forward()
-            self._after_eval_forward(**kwargs)
-            self.loss = self.criterion()
-
-            self._after_eval_iteration(**kwargs)
-
-    def _unpack_minibatch(self):
-        """Move to device"""
-        for i in range(len(self.mbatch)):
-            self.mbatch[i] = self.mbatch[i].to(self.device)
-
-    #########################################################
-    # Plugin Triggers                                       #
-    #########################################################
-
-    def _before_training_iteration(self, **kwargs):
-        trigger_plugins(self, "before_training_iteration", **kwargs)
-
-    def _before_forward(self, **kwargs):
-        trigger_plugins(self, "before_forward", **kwargs)
-
-    def _after_forward(self, **kwargs):
-        trigger_plugins(self, "after_forward", **kwargs)
-
-    def _before_backward(self, **kwargs):
-        trigger_plugins(self, "before_backward", **kwargs)
-
-    def _after_backward(self, **kwargs):
-        trigger_plugins(self, "after_backward", **kwargs)
-
-    def _after_training_iteration(self, **kwargs):
-        trigger_plugins(self, "after_training_iteration", **kwargs)
-
-    def _before_update(self, **kwargs):
-        trigger_plugins(self, "before_update", **kwargs)
-
-    def _after_update(self, **kwargs):
-        trigger_plugins(self, "after_update", **kwargs)
-
-    def _before_eval_iteration(self, **kwargs):
-        trigger_plugins(self, "before_eval_iteration", **kwargs)
-
-    def _before_eval_forward(self, **kwargs):
-        trigger_plugins(self, "before_eval_forward", **kwargs)
-
-    def _after_eval_forward(self, **kwargs):
-        trigger_plugins(self, "after_eval_forward", **kwargs)
-
-    def _after_eval_iteration(self, **kwargs):
-        trigger_plugins(self, "after_eval_iteration", **kwargs)
-
-
-class PeriodicEval(SupervisedPlugin):
-    """Schedules periodic evaluation during training.
-
-    This plugin is automatically configured and added by the BaseTemplate.
-    """
-
-    def __init__(self, eval_every=-1, peval_mode="experience",
-                 do_initial=True):
-        """Init.
-
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop. -1 disables the evaluation. 0 means `eval` is called
-            only at the end of the learning experience. Values >0 mean
-            that `eval` is called every `eval_every` experience and at the
-            end of the learning experience.
-        :param peval_mode: one of {'experience', 'iteration'}. Decides whether
-            the periodic evaluation during training should execute every
-            `eval_every` experience or iterations
-            (Default='experience').
-        :param do_initial: whether to evaluate before each `train` call.
-            Occasionally needed becuase some metrics need to know the
-            accuracy before training.
-        """
-        super().__init__()
-        assert peval_mode in {"experience", "iteration"}
-        self.eval_every = eval_every
-        self.peval_mode = peval_mode
-        self.do_initial = do_initial and eval_every > -1
-        self.do_final = None
-        self._is_eval_updated = False
-
-    def before_training(self, strategy, **kwargs):
-        """Eval before each learning experience.
-
-        Occasionally needed because some metrics need the accuracy before
-        training.
-        """
-        if self.do_initial:
-            self._peval(strategy, **kwargs)
-
-    def _peval(self, strategy, **kwargs):
-        for el in strategy._eval_streams:
-            strategy.eval(el, **kwargs)
-
-    def _maybe_peval(self, strategy, counter, **kwargs):
-        if self.eval_every > 0 and counter % self.eval_every == 0:
-            self._peval(strategy, **kwargs)
-
-    def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs):
-        """Periodic eval controlled by `self.eval_every` and
-        `self.peval_mode`."""
-        if self.peval_mode == "experience":
-            self._maybe_peval(strategy, strategy.clock.train_exp_counter,
-                              **kwargs)
-
-    def after_training_iteration(self, strategy: "BaseOnlineSGDTemplate",
-                                 **kwargs):
-        """Periodic eval controlled by `self.eval_every` and
-        `self.peval_mode`."""
-        if self.peval_mode == "iteration":
-            self._maybe_peval(strategy, strategy.clock.train_exp_iterations,
-                              **kwargs)
diff --git a/avalanche/training/templates/online_supervised.py b/avalanche/training/templates/online_supervised.py
deleted file mode 100644
index 6354471cb..000000000
--- a/avalanche/training/templates/online_supervised.py
+++ /dev/null
@@ -1,341 +0,0 @@
-from typing import Sequence, Optional
-from pkg_resources import parse_version
-
-import torch
-from torch.nn import Module, CrossEntropyLoss
-from torch.optim import Optimizer
-from torch.utils.data import DataLoader
-
-from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \
-    collate_from_data_or_kwargs
-from avalanche.models import avalanche_forward
-from avalanche.models.dynamic_optimizers import reset_optimizer
-from avalanche.models.utils import avalanche_model_adaptation
-from avalanche.training.plugins import SupervisedPlugin
-from avalanche.training.plugins.evaluation import default_evaluator
-from avalanche.training.templates.base_online_sgd import BaseOnlineSGDTemplate
-from avalanche.training.utils import trigger_plugins
-from avalanche.benchmarks.scenarios import OnlineCLExperience
-from avalanche.models.dynamic_optimizers import update_optimizer
-
-
-class OnlineSupervisedTemplate(BaseOnlineSGDTemplate):
-    """Base class for continual learning strategies.
-
-    BaseTemplate is the super class of all task-based continual learning
-    strategies. It implements a basic training loop and callback system
-    that allows to execute code at each experience of the training loop.
-    Plugins can be used to implement callbacks to augment the training
-    loop with additional behavior (e.g. a memory buffer for replay).
-
-    **Scenarios**
-    This strategy supports several continual learning scenarios:
-
-    * class-incremental scenarios (no task labels)
-    * multi-task scenarios, where task labels are provided)
-    * multi-incremental scenarios, where the same task may be revisited
-
-    The exact scenario depends on the data stream and whether it provides
-    the task labels.
-
-    **Training loop**
-    The training loop is organized as follows::
-
-        train
-            train_exp  # for each experience
-                adapt_train_dataset
-                train_dataset_adaptation
-                make_train_dataloader
-                train_pass  # for each pass
-                    # forward
-                    # backward
-                    # model update
-
-    **Evaluation loop**
-    The evaluation loop is organized as follows::
-
-        eval
-            eval_exp  # for each experience
-                adapt_eval_dataset
-                eval_dataset_adaptation
-                make_eval_dataloader
-                eval_epoch  # for each epoch
-                    # forward
-                    # backward
-                    # model update
-
-    """
-
-    PLUGIN_CLASS = SupervisedPlugin
-
-    def __init__(
-            self,
-            model: Module,
-            optimizer: Optimizer,
-            criterion=CrossEntropyLoss(),
-            train_mb_size: int = 1,
-            train_passes: int = 1,
-            eval_mb_size: Optional[int] = 1,
-            device="cpu",
-            plugins: Optional[Sequence["SupervisedPlugin"]] = None,
-            evaluator=default_evaluator(),
-            eval_every=-1,
-            peval_mode="experience",
-    ):
-        """Init.
-
-        :param model: PyTorch model.
-        :param optimizer: PyTorch optimizer.
-        :param criterion: loss function.
-        :param train_mb_size: mini-batch size for training.
-        :param train_passes: number of training passes.
-        :param eval_mb_size: mini-batch size for eval.
-        :param device: PyTorch device where the model will be allocated.
-        :param plugins: (optional) list of StrategyPlugins.
-        :param evaluator: (optional) instance of EvaluationPlugin for logging
-            and metric computations. None to remove logging.
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop. -1 disables the evaluation. 0 means `eval` is called
-            only at the end of the learning experience. Values >0 mean that
-            `eval` is called every `eval_every` experiences and at the end of
-            the learning experience.
-        :param peval_mode: one of {'experience', 'iteration'}. Decides whether
-            the periodic evaluation during training should execute every
-            `eval_every` experience or iterations (Default='experience').
-        """
-        super().__init__(
-            model=model,
-            optimizer=optimizer,
-            train_mb_size=train_mb_size,
-            train_passes=train_passes,
-            eval_mb_size=eval_mb_size,
-            device=device,
-            plugins=plugins,
-            evaluator=evaluator,
-            eval_every=eval_every,
-            peval_mode=peval_mode,
-        )
-        self._criterion = criterion
-
-        ###################################################################
-        # State variables. These are updated during the train/eval loops. #
-        ###################################################################
-
-        self.adapted_dataset = None
-        """ Data used to train. It may be modified by plugins. Plugins can 
-        append data to it (e.g. for replay). 
-
-        .. note::
-
-            This dataset may contain samples from different experiences. If you 
-            want the original data for the current experience  
-            use :attr:`.BaseTemplate.experience`.
-        """
-
-    @property
-    def mb_x(self):
-        """Current mini-batch input."""
-        return self.mbatch[0]
-
-    @property
-    def mb_y(self):
-        """Current mini-batch target."""
-        return self.mbatch[1]
-
-    @property
-    def mb_task_id(self):
-        """Current mini-batch task labels."""
-        assert len(self.mbatch) >= 3
-        return self.mbatch[-1]
-
-    def criterion(self):
-        """Loss function."""
-        return self._criterion(self.mb_output, self.mb_y)
-
-    def _before_training_exp(self, **kwargs):
-        """Setup to train on a single experience."""
-        # Data Adaptation (e.g. add new samples/data augmentation)
-        self._before_train_dataset_adaptation(**kwargs)
-        self.train_dataset_adaptation(**kwargs)
-        self._after_train_dataset_adaptation(**kwargs)
-        super()._before_training_exp(**kwargs)
-
-    def _load_train_state(self, prev_state):
-        super()._load_train_state(prev_state)
-        self.adapted_dataset = prev_state["adapted_dataset"]
-        self.dataloader = prev_state["dataloader"]
-
-    def _save_train_state(self):
-        """Save the training state which may be modified by the eval loop.
-
-        This currently includes: experience, adapted_dataset, dataloader,
-        is_training, and train/eval modes for each module.
-
-        TODO: we probably need a better way to do this.
-        """
-        state = super()._save_train_state()
-        new_state = {
-            "adapted_dataset": self.adapted_dataset,
-            "dataloader": self.dataloader,
-        }
-        return {**state, **new_state}
-
-    def train_dataset_adaptation(self, **kwargs):
-        """Initialize `self.adapted_dataset`."""
-        self.adapted_dataset = self.experience.dataset
-        self.adapted_dataset = self.adapted_dataset.train()
-
-    def _before_eval_exp(self, **kwargs):
-        # Data Adaptation
-        self._before_eval_dataset_adaptation(**kwargs)
-        self.eval_dataset_adaptation(**kwargs)
-        self._after_eval_dataset_adaptation(**kwargs)
-        super()._before_eval_exp(**kwargs)
-
-    def make_train_dataloader(
-            self,
-            num_workers=0,
-            shuffle=True,
-            pin_memory=True,
-            persistent_workers=False,
-            **kwargs
-    ):
-        """Data loader initialization.
-
-        Called at the start of each learning experience after the dataset
-        adaptation.
-
-        :param num_workers: number of thread workers for the data loading.
-        :param shuffle: True if the data should be shuffled, False otherwise.
-        :param pin_memory: If True, the data loader will copy Tensors into CUDA
-            pinned memory before returning them. Defaults to True.
-        """
-
-        other_dataloader_args = {}
-
-        if parse_version(torch.__version__) >= parse_version("1.7.0"):
-            other_dataloader_args["persistent_workers"] = persistent_workers
-        for k, v in kwargs.items():
-            other_dataloader_args[k] = v
-
-        collate_from_data_or_kwargs(self.adapted_dataset,
-                                    other_dataloader_args)
-        self.dataloader = TaskBalancedDataLoader(
-            self.adapted_dataset,
-            oversample_small_groups=True,
-            num_workers=num_workers,
-            batch_size=self.train_mb_size,
-            shuffle=shuffle,
-            pin_memory=pin_memory,
-            **other_dataloader_args
-        )
-
-    def make_eval_dataloader(
-            self, num_workers=0, pin_memory=True, persistent_workers=False,
-            **kwargs
-    ):
-        """
-        Initializes the eval data loader.
-        :param num_workers: How many subprocesses to use for data loading.
-            0 means that the data will be loaded in the main process.
-            (default: 0).
-        :param pin_memory: If True, the data loader will copy Tensors into CUDA
-            pinned memory before returning them. Defaults to True.
-        :param kwargs:
-        :return:
-        """
-        other_dataloader_args = {}
-
-        if parse_version(torch.__version__) >= parse_version("1.7.0"):
-            other_dataloader_args["persistent_workers"] = persistent_workers
-        for k, v in kwargs.items():
-            other_dataloader_args[k] = v
-
-        collate_from_data_or_kwargs(self.adapted_dataset,
-                                    other_dataloader_args)
-        self.dataloader = DataLoader(
-            self.adapted_dataset,
-            num_workers=num_workers,
-            batch_size=self.eval_mb_size,
-            pin_memory=pin_memory,
-            **other_dataloader_args
-        )
-
-    def forward(self):
-        """Compute the model's output given the current mini-batch."""
-        return avalanche_forward(self.model, self.mb_x, self.mb_task_id)
-
-    def model_adaptation(self, model=None):
-        """Adapts the model to the current data.
-
-        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
-        """
-        if model is None:
-            model = self.model
-
-        # For training:
-        if isinstance(self.experience, OnlineCLExperience):
-            # If the strategy has access to task boundaries, adapt the model
-            # for the whole origin experience to add the
-            if self.experience.access_task_boundaries:
-                avalanche_model_adaptation(model,
-                                           self.experience.origin_experience)
-            else:
-                self.model_params_before_adaptation = list(model.parameters())
-                avalanche_model_adaptation(model, self.experience)
-
-        # For evaluation, the experience is not necessarily an online
-        # experience:
-        else:
-            avalanche_model_adaptation(model, self.experience)
-
-        return model.to(self.device)
-
-    def _unpack_minibatch(self):
-        """We assume mini-batches have the form <x, y, ..., t>.
-        This allows for arbitrary tensors between y and t.
-        Keep in mind that in the most general case mb_task_id is a tensor
-        which may contain different labels for each sample.
-        """
-        assert len(self.mbatch) >= 3
-        super()._unpack_minibatch()
-
-    def eval_dataset_adaptation(self, **kwargs):
-        """Initialize `self.adapted_dataset`."""
-        self.adapted_dataset = self.experience.dataset
-        self.adapted_dataset = self.adapted_dataset.eval()
-
-    def make_optimizer(self):
-        """Optimizer initialization.
-
-        Called before each training experience to configure the optimizer.
-        """
-        # We reset the optimizer's state after each experience if task
-        # boundaries are given, otherwise it updates the optimizer only if
-        # new parameters are added to the model after each adaptation step.
-
-        # We assume the current experience is an OnlineCLExperience:
-        if self.experience.access_task_boundaries:
-            reset_optimizer(self.optimizer, self.model)
-
-        else:
-            update_optimizer(self.optimizer,
-                             self.model_params_before_adaptation,
-                             self.model.parameters(),
-                             reset_state=False)
-
-    #########################################################
-    # Plugin Triggers                                       #
-    #########################################################
-
-    def _before_train_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "before_train_dataset_adaptation", **kwargs)
-
-    def _after_train_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "after_train_dataset_adaptation", **kwargs)
-
-    def _before_eval_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs)
-
-    def _after_eval_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs)
diff --git a/avalanche/training/templates/supervised.py b/avalanche/training/templates/supervised.py
deleted file mode 100644
index 9b1eb104a..000000000
--- a/avalanche/training/templates/supervised.py
+++ /dev/null
@@ -1,311 +0,0 @@
-from typing import Sequence, Optional
-from pkg_resources import parse_version
-
-import torch
-from torch.nn import Module, CrossEntropyLoss
-from torch.optim import Optimizer
-from torch.utils.data import DataLoader
-
-from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \
-    collate_from_data_or_kwargs
-from avalanche.models import avalanche_forward
-from avalanche.models.dynamic_optimizers import reset_optimizer
-from avalanche.models.utils import avalanche_model_adaptation
-from avalanche.training.plugins import SupervisedPlugin
-from avalanche.training.plugins.evaluation import default_evaluator
-from avalanche.training.templates.base_sgd import BaseSGDTemplate
-from avalanche.training.utils import trigger_plugins
-
-
-class SupervisedTemplate(BaseSGDTemplate):
-    """Base class for continual learning strategies.
-
-    BaseTemplate is the super class of all task-based continual learning
-    strategies. It implements a basic training loop and callback system
-    that allows to execute code at each experience of the training loop.
-    Plugins can be used to implement callbacks to augment the training
-    loop with additional behavior (e.g. a memory buffer for replay).
-
-    **Scenarios**
-    This strategy supports several continual learning scenarios:
-
-    * class-incremental scenarios (no task labels)
-    * multi-task scenarios, where task labels are provided)
-    * multi-incremental scenarios, where the same task may be revisited
-
-    The exact scenario depends on the data stream and whether it provides
-    the task labels.
-
-    **Training loop**
-    The training loop is organized as follows::
-
-        train
-            train_exp  # for each experience
-                adapt_train_dataset
-                train_dataset_adaptation
-                make_train_dataloader
-                train_epoch  # for each epoch
-                    # forward
-                    # backward
-                    # model update
-
-    **Evaluation loop**
-    The evaluation loop is organized as follows::
-
-        eval
-            eval_exp  # for each experience
-                adapt_eval_dataset
-                eval_dataset_adaptation
-                make_eval_dataloader
-                eval_epoch  # for each epoch
-                    # forward
-                    # backward
-                    # model update
-
-    """
-
-    PLUGIN_CLASS = SupervisedPlugin
-
-    def __init__(
-        self,
-        model: Module,
-        optimizer: Optimizer,
-        criterion=CrossEntropyLoss(),
-        train_mb_size: int = 1,
-        train_epochs: int = 1,
-        eval_mb_size: Optional[int] = 1,
-        device="cpu",
-        plugins: Optional[Sequence["SupervisedPlugin"]] = None,
-        evaluator=default_evaluator(),
-        eval_every=-1,
-        peval_mode="epoch",
-    ):
-        """Init.
-
-        :param model: PyTorch model.
-        :param optimizer: PyTorch optimizer.
-        :param criterion: loss function.
-        :param train_mb_size: mini-batch size for training.
-        :param train_epochs: number of training epochs.
-        :param eval_mb_size: mini-batch size for eval.
-        :param device: PyTorch device where the model will be allocated.
-        :param plugins: (optional) list of StrategyPlugins.
-        :param evaluator: (optional) instance of EvaluationPlugin for logging
-            and metric computations. None to remove logging.
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop. -1 disables the evaluation. 0 means `eval` is called
-            only at the end of the learning experience. Values >0 mean that
-            `eval` is called every `eval_every` epochs and at the end of the
-            learning experience.
-        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
-            periodic evaluation during training should execute every
-            `eval_every` epochs or iterations (Default='epoch').
-        """
-        super().__init__(
-            model=model,
-            optimizer=optimizer,
-            train_mb_size=train_mb_size,
-            train_epochs=train_epochs,
-            eval_mb_size=eval_mb_size,
-            device=device,
-            plugins=plugins,
-            evaluator=evaluator,
-            eval_every=eval_every,
-            peval_mode=peval_mode,
-        )
-        self._criterion = criterion
-
-        ###################################################################
-        # State variables. These are updated during the train/eval loops. #
-        ###################################################################
-
-        self.adapted_dataset = None
-        """ Data used to train. It may be modified by plugins. Plugins can 
-        append data to it (e.g. for replay). 
-         
-        .. note::
-
-            This dataset may contain samples from different experiences. If you 
-            want the original data for the current experience  
-            use :attr:`.BaseTemplate.experience`.
-        """
-
-    @property
-    def mb_x(self):
-        """Current mini-batch input."""
-        return self.mbatch[0]
-
-    @property
-    def mb_y(self):
-        """Current mini-batch target."""
-        return self.mbatch[1]
-
-    @property
-    def mb_task_id(self):
-        """Current mini-batch task labels."""
-        assert len(self.mbatch) >= 3
-        return self.mbatch[-1]
-
-    def criterion(self):
-        """Loss function."""
-        return self._criterion(self.mb_output, self.mb_y)
-
-    def _before_training_exp(self, **kwargs):
-        """Setup to train on a single experience."""
-        # Data Adaptation (e.g. add new samples/data augmentation)
-        self._before_train_dataset_adaptation(**kwargs)
-        self.train_dataset_adaptation(**kwargs)
-        self._after_train_dataset_adaptation(**kwargs)
-        super()._before_training_exp(**kwargs)
-
-    def _load_train_state(self, prev_state):
-        super()._load_train_state(prev_state)
-        self.adapted_dataset = prev_state["adapted_dataset"]
-        self.dataloader = prev_state["dataloader"]
-
-    def _save_train_state(self):
-        """Save the training state which may be modified by the eval loop.
-
-        This currently includes: experience, adapted_dataset, dataloader,
-        is_training, and train/eval modes for each module.
-
-        TODO: we probably need a better way to do this.
-        """
-        state = super()._save_train_state()
-        new_state = {
-            "adapted_dataset": self.adapted_dataset,
-            "dataloader": self.dataloader,
-        }
-        return {**state, **new_state}
-
-    def train_dataset_adaptation(self, **kwargs):
-        """Initialize `self.adapted_dataset`."""
-        self.adapted_dataset = self.experience.dataset
-        self.adapted_dataset = self.adapted_dataset.train()
-
-    def _before_eval_exp(self, **kwargs):
-        # Data Adaptation
-        self._before_eval_dataset_adaptation(**kwargs)
-        self.eval_dataset_adaptation(**kwargs)
-        self._after_eval_dataset_adaptation(**kwargs)
-        super()._before_eval_exp(**kwargs)
-
-    def make_train_dataloader(
-        self,
-        num_workers=0,
-        shuffle=True,
-        pin_memory=True,
-        persistent_workers=False,
-        **kwargs
-    ):
-        """Data loader initialization.
-
-        Called at the start of each learning experience after the dataset
-        adaptation.
-
-        :param num_workers: number of thread workers for the data loading.
-        :param shuffle: True if the data should be shuffled, False otherwise.
-        :param pin_memory: If True, the data loader will copy Tensors into CUDA
-            pinned memory before returning them. Defaults to True.
-        """
-
-        other_dataloader_args = {}
-
-        if parse_version(torch.__version__) >= parse_version("1.7.0"):
-            other_dataloader_args["persistent_workers"] = persistent_workers
-        for k, v in kwargs.items():
-            other_dataloader_args[k] = v
-
-        self.dataloader = TaskBalancedDataLoader(
-            self.adapted_dataset,
-            oversample_small_groups=True,
-            num_workers=num_workers,
-            batch_size=self.train_mb_size,
-            shuffle=shuffle,
-            pin_memory=pin_memory,
-            **other_dataloader_args
-        )
-
-    def make_eval_dataloader(
-        self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs
-    ):
-        """
-        Initializes the eval data loader.
-        :param num_workers: How many subprocesses to use for data loading.
-            0 means that the data will be loaded in the main process.
-            (default: 0).
-        :param pin_memory: If True, the data loader will copy Tensors into CUDA
-            pinned memory before returning them. Defaults to True.
-        :param kwargs:
-        :return:
-        """
-        other_dataloader_args = {}
-
-        if parse_version(torch.__version__) >= parse_version("1.7.0"):
-            other_dataloader_args["persistent_workers"] = persistent_workers
-        for k, v in kwargs.items():
-            other_dataloader_args[k] = v
-
-        collate_from_data_or_kwargs(self.adapted_dataset,
-                                    other_dataloader_args)
-        self.dataloader = DataLoader(
-            self.adapted_dataset,
-            num_workers=num_workers,
-            batch_size=self.eval_mb_size,
-            pin_memory=pin_memory,
-            **other_dataloader_args
-        )
-
-    def forward(self):
-        """Compute the model's output given the current mini-batch."""
-        return avalanche_forward(self.model, self.mb_x, self.mb_task_id)
-
-    def model_adaptation(self, model=None):
-        """Adapts the model to the current data.
-
-        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
-        """
-        if model is None:
-            model = self.model
-        avalanche_model_adaptation(model, self.experience)
-        return model.to(self.device)
-
-    def _unpack_minibatch(self):
-        """We assume mini-batches have the form <x, y, ..., t>.
-        This allows for arbitrary tensors between y and t.
-        Keep in mind that in the most general case mb_task_id is a tensor
-        which may contain different labels for each sample.
-        """
-        assert len(self.mbatch) >= 3
-        super()._unpack_minibatch()
-
-    def eval_dataset_adaptation(self, **kwargs):
-        """Initialize `self.adapted_dataset`."""
-        self.adapted_dataset = self.experience.dataset
-        self.adapted_dataset = self.adapted_dataset.eval()
-
-    def make_optimizer(self):
-        """Optimizer initialization.
-
-        Called before each training experiene to configure the optimizer.
-        """
-        # we reset the optimizer's state after each experience.
-        # This allows to add new parameters (new heads) and
-        # freezing old units during the model's adaptation phase.
-        reset_optimizer(self.optimizer, self.model)
-
-    #########################################################
-    # Plugin Triggers                                       #
-    #########################################################
-
-    def _before_train_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "before_train_dataset_adaptation", **kwargs)
-
-    def _after_train_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "after_train_dataset_adaptation", **kwargs)
-
-    def _before_eval_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs)
-
-    def _after_eval_dataset_adaptation(self, **kwargs):
-        trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs)
diff --git a/tests/training/test_replay.py b/tests/training/test_replay.py
index 26afa78bc..7ae908161 100644
--- a/tests/training/test_replay.py
+++ b/tests/training/test_replay.py
@@ -22,7 +22,7 @@
     ParametricBuffer,
 )
 from avalanche.training.supervised import Naive
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 from tests.unit_tests_utils import get_fast_benchmark
 
 
diff --git a/tests/training/test_strategies.py b/tests/training/test_strategies.py
index c7a9b773e..95889c818 100644
--- a/tests/training/test_strategies.py
+++ b/tests/training/test_strategies.py
@@ -47,7 +47,7 @@
 from avalanche.training.supervised.icarl import ICaRL
 from avalanche.training.supervised.joint_training import AlreadyTrainedError
 from avalanche.training.supervised.strategy_wrappers import PNNStrategy
-from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.training.templates import SupervisedTemplate
 from avalanche.training.utils import get_last_fc_layer
 from tests.unit_tests_utils import get_fast_benchmark, get_device
 

From aee1d2efc403124c9b9bbc6bd04295f8eeea59ce Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Fri, 30 Sep 2022 16:35:27 +0200
Subject: [PATCH 09/10] Update `observation_type` and `base_sgd`

---
 avalanche/logging/base_logger.py              |  2 +-
 avalanche/training/templates/base_sgd.py      | 30 +++++++++++++++++--
 .../training/templates/common_templates.py    |  2 +-
 .../observation_type/batch_observation.py     | 28 +----------------
 .../observation_type/online_observation.py    | 21 +------------
 examples/online_replay.py                     |  2 +-
 6 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/avalanche/logging/base_logger.py b/avalanche/logging/base_logger.py
index 9e03daa87..77b86864e 100644
--- a/avalanche/logging/base_logger.py
+++ b/avalanche/logging/base_logger.py
@@ -4,7 +4,7 @@
 
 if TYPE_CHECKING:
     from avalanche.evaluation.metric_results import MetricValue
-    from avalanche.training.templates.supervised import SupervisedTemplate
+    from avalanche.training.templates import SupervisedTemplate
 
 
 class BaseLogger(ABC):
diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py
index 46813a9ed..dc0ba9d38 100644
--- a/avalanche/training/templates/base_sgd.py
+++ b/avalanche/training/templates/base_sgd.py
@@ -218,7 +218,7 @@ def eval_epoch(self, **kwargs):
 
     # ==================================================================> NEW
 
-    def maybe_adapt_model_and_make_optimizer(self):
+    def check_model_and_optimizer(self):
         # Should be implemented in observation type
         raise NotImplementedError()
 
@@ -234,10 +234,36 @@ def _before_training_exp(self, **kwargs):
         # Model Adaptation (e.g. freeze/add new units)
         # self.model = self.model_adaptation()
         # self.make_optimizer()
-        self.maybe_adapt_model_and_make_optimizer()
+        self.check_model_and_optimizer()
 
         super()._before_training_exp(**kwargs)
 
+    def _train_exp(
+        self, experience: CLExperience, eval_streams=None, **kwargs
+    ):
+        """Training loop over a single Experience object.
+
+        :param experience: CL experience information.
+        :param eval_streams: list of streams for evaluation.
+            If None: use the training experience for evaluation.
+            Use [] if you do not want to evaluate during training.
+        :param kwargs: custom arguments.
+        """
+        if eval_streams is None:
+            eval_streams = [experience]
+        for i, exp in enumerate(eval_streams):
+            if not isinstance(exp, Iterable):
+                eval_streams[i] = [exp]
+        for _ in range(self.train_epochs):
+            self._before_training_epoch(**kwargs)
+
+            if self._stop_training:  # Early stopping
+                self._stop_training = False
+                break
+
+            self.training_epoch(**kwargs)
+            self._after_training_epoch(**kwargs)
+
     def _save_train_state(self):
         """Save the training state which may be modified by the eval loop.
 
diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/common_templates.py
index 54aefd058..a8f5e35e6 100644
--- a/avalanche/training/templates/common_templates.py
+++ b/avalanche/training/templates/common_templates.py
@@ -328,7 +328,7 @@ def __init__(
             optimizer=optimizer,
             criterion=criterion,
             train_mb_size=train_mb_size,
-            train_epochs=1,
+            train_epochs=train_passes,
             eval_mb_size=eval_mb_size,
             device=device,
             plugins=plugins,
diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py
index 35b887d11..4ec073849 100644
--- a/avalanche/training/templates/observation_type/batch_observation.py
+++ b/avalanche/training/templates/observation_type/batch_observation.py
@@ -6,32 +6,6 @@
 
 
 class BatchObservation:
-    def _train_exp(
-        self, experience: CLExperience, eval_streams=None, **kwargs
-    ):
-        """Training loop over a single Experience object.
-
-        :param experience: CL experience information.
-        :param eval_streams: list of streams for evaluation.
-            If None: use the training experience for evaluation.
-            Use [] if you do not want to evaluate during training.
-        :param kwargs: custom arguments.
-        """
-        if eval_streams is None:
-            eval_streams = [experience]
-        for i, exp in enumerate(eval_streams):
-            if not isinstance(exp, Iterable):
-                eval_streams[i] = [exp]
-        for _ in range(self.train_epochs):
-            self._before_training_epoch(**kwargs)
-
-            if self._stop_training:  # Early stopping
-                self._stop_training = False
-                break
-
-            self.training_epoch(**kwargs)
-            self._after_training_epoch(**kwargs)
-
     def model_adaptation(self, model=None):
         """Adapts the model to the current data.
 
@@ -52,6 +26,6 @@ def make_optimizer(self):
         # freezing old units during the model's adaptation phase.
         reset_optimizer(self.optimizer, self.model)
 
-    def maybe_adapt_model_and_make_optimizer(self):
+    def check_model_and_optimizer(self):
         self.model = self.model_adaptation()
         self.make_optimizer()
diff --git a/avalanche/training/templates/observation_type/online_observation.py b/avalanche/training/templates/observation_type/online_observation.py
index e542375a4..d3dbfaac5 100644
--- a/avalanche/training/templates/observation_type/online_observation.py
+++ b/avalanche/training/templates/observation_type/online_observation.py
@@ -7,25 +7,6 @@
 
 
 class OnlineObservation:
-    def _train_exp(
-            self, experience: OnlineCLExperience, eval_streams=None, **kwargs
-    ):
-        """Training loop over a single Experience object.
-
-        :param experience: Online CL experience information.
-        :param eval_streams: list of streams for evaluation.
-            If None: use the training experience for evaluation.
-            Use [] if you do not want to evaluate during training.
-        :param kwargs: custom arguments.
-        """
-        if eval_streams is None:
-            eval_streams = [experience]
-        for i, exp in enumerate(eval_streams):
-            if not isinstance(exp, Iterable):
-                eval_streams[i] = [exp]
-
-        self.training_epoch(**kwargs)
-
     def make_optimizer(self):
         """Optimizer initialization.
 
@@ -71,7 +52,7 @@ def model_adaptation(self, model=None):
 
         return model.to(self.device)
 
-    def maybe_adapt_model_and_make_optimizer(self):
+    def check_model_and_optimizer(self):
         # If strategy has access to the task boundaries, and the current
         # sub-experience is the first sub-experience in the online (sub-)stream,
         # then adapt the model with the full origin experience:
diff --git a/examples/online_replay.py b/examples/online_replay.py
index b01144471..d998d72a8 100644
--- a/examples/online_replay.py
+++ b/examples/online_replay.py
@@ -127,7 +127,7 @@ def main(args):
                                          experiences=exp,
                                          experience_size=1)
         # Train on the online train stream of the scenario
-        cl_strategy.train(ocl_benchmark.online_train_stream)
+        cl_strategy.train(ocl_benchmark.train_stream)
         results.append(cl_strategy.eval(scenario.test_stream))
     
 

From 678a23433558eca8bf22fcadbb2559e3e385edd5 Mon Sep 17 00:00:00 2001
From: hamedhemati <hemati.hmd@gmail.com>
Date: Mon, 3 Oct 2022 16:27:33 +0200
Subject: [PATCH 10/10] Update `observation_type` and `base_sgd`

---
 examples/online_naive.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/online_naive.py b/examples/online_naive.py
index 04c87919e..d4b7b581f 100644
--- a/examples/online_naive.py
+++ b/examples/online_naive.py
@@ -120,8 +120,10 @@ def main(args):
                                          experiences=exp,
                                          experience_size=1,
                                          access_task_boundaries=True)
+
         # Train on the online train stream of the scenario
         cl_strategy.train(ocl_benchmark.train_stream)
+
         results.append(cl_strategy.eval(scenario.original_test_stream))