diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index d37000545..264a18a27 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -11,7 +11,7 @@ assignees: ''
 A clear and concise description of what the bug is.
 
 🐜 **To Reproduce**
-Steps / minimal snipped of code to reproduce the issue.
+A [minimal working example](https://en.wikipedia.org/wiki/Minimal_working_example) to reproduce the issue. The code should be executable without modifications.
 
 🐝 **Expected behavior**
 A clear and concise description of what you expected to happen.
diff --git a/README.md b/README.md
index c67a11ec8..642b5b9ed 100644
--- a/README.md
+++ b/README.md
@@ -83,10 +83,10 @@ Current Release
 
 Avalanche is a framework in constant development. Thanks to the support of the [ContinualAI]() community and its active members we are quickly extending its features and improve its usability based on the demands of our research community!
 
-A the moment, Avalanche is in [**Alpha v0.0.1**](https://avalanche.continualai.org/getting-started/alpha-version), but we already support [several *Benchmarks*, *Strategies* and *Metrics*](https://avalanche.continualai.org/getting-started/alpha-version), that make it, we believe, the best tool out there for your continual learning research! 💪
+A the moment, Avalanche is in [**Beta (v0.1.0)**](https://github.com/ContinualAI/avalanche/releases/tag/v0.1.0). We support [several *Benchmarks*, *Strategies* and *Metrics*](https://avalanche.continualai.org/getting-started/alpha-version), that make it, we believe, the best tool out there for your continual learning research! 💪
 
-*Please note that, at the moment, we **do not** support stable releases and packaged versions of the library.*
-*We do this intentionally as in this early phase we would like to stimulate contributions only from experienced CL researchers and coders.*
+**You can install Avalanche by running `pip install avalanche-lib`.**  
+Look [here](https://avalanche.continualai.org/getting-started/how-to-install) for a more complete guide on the different ways available to install Avalanche.
 
 Getting Started
 ----------------
diff --git a/avalanche/__init__.py b/avalanche/__init__.py
index 1f26f41bb..36febab02 100644
--- a/avalanche/__init__.py
+++ b/avalanche/__init__.py
@@ -5,7 +5,7 @@
 from avalanche import training
 
 
-__version__ = "0.0.1"
+__version__ = "0.1.0"
 
 _dataset_add = None
 
diff --git a/avalanche/benchmarks/classic/__init__.py b/avalanche/benchmarks/classic/__init__.py
index 240040530..d43f247b8 100644
--- a/avalanche/benchmarks/classic/__init__.py
+++ b/avalanche/benchmarks/classic/__init__.py
@@ -3,10 +3,12 @@
 from .ccub200 import *
 from .cfashion_mnist import *
 from .cimagenet import *
+from .cinaturalist import *
 from .cmnist import *
 from .comniglot import *
 from .core50 import CORe50
 from .ctiny_imagenet import *
+from .ctrl import *
 from .endless_cl_sim import *
 from .openloris import *
 from .stream51 import *
diff --git a/avalanche/benchmarks/classic/ctrl.py b/avalanche/benchmarks/classic/ctrl.py
new file mode 100644
index 000000000..a8a9994b0
--- /dev/null
+++ b/avalanche/benchmarks/classic/ctrl.py
@@ -0,0 +1,105 @@
+################################################################################
+# Copyright (c) 2021 ContinualAI.                                              #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 22-06-2021                                                             #
+# Author(s): Tom Veniat                                                        #
+# E-mail: contact@continualai.org                                              #
+# Website: avalanche.continualai.org                                           #
+################################################################################
+
+import random
+import sys
+from pathlib import Path
+
+import torchvision.transforms.functional as F
+from torchvision import transforms
+from tqdm import tqdm
+
+import ctrl
+from avalanche.benchmarks import dataset_benchmark
+from avalanche.benchmarks.datasets import default_dataset_location
+from avalanche.benchmarks.utils import AvalancheTensorDataset, \
+    common_paths_root, AvalancheDataset, PathsDataset
+
+
+def CTrL(stream_name: str, save_to_disk: bool = False,
+         path: Path = default_dataset_location(''), seed: int = None,
+         n_tasks: int = None):
+    """
+    Gives access to the Continual Transfer Learning benchmark streams
+    introduced in https://arxiv.org/abs/2012.12631.
+    :param stream_name: Name of the test stream to generate. Must be one of
+    `s_plus`, `s_minus`, `s_in`, `s_out` and `s_pl`.
+    :param save_to_disk:  Whether to save each stream on the disk or load
+    everything in memory. Setting it to `True` will save memory but takes more
+    time on the first generation using the corresponding seed.
+    :param path: The path under which the generated stream will be saved if
+    save_to_disk is True.
+    :param seed: The seed to use to generate the streams. If no seed is given,
+    a random one will be used to make sure that the generated stream can
+    be reproduced.
+    :param n_tasks: The number of tasks to generate. This parameter is only
+    relevant for the `s_long` stream, as all other streams have a fixed number
+    of tasks.
+    :return: A scenario containing 3 streams: train, val and test.
+    """
+    seed = seed or random.randint(0, sys.maxsize)
+    if stream_name != 's_long' and n_tasks is not None:
+        raise ValueError('The n_tasks parameter can only be used with the '
+                         f'"s_long" stream, asked {n_tasks} for {stream_name}')
+    elif stream_name == 's_long' and n_tasks is None:
+        n_tasks = 100
+
+    stream = ctrl.get_stream(stream_name, seed)
+
+    if save_to_disk:
+        folder = path / 'ctrl' / stream_name / f'seed_{seed}'
+
+    # Train, val and test experiences
+    exps = [[], [], []]
+    for t_id, t in enumerate(tqdm(stream, desc=f'Loading {stream_name}'), ):
+        trans = transforms.Normalize(t.statistics['mean'],
+                                     t.statistics['std'])
+        for split, split_name, exp in zip(t.datasets, t.split_names, exps):
+            samples, labels = split.tensors
+            task_labels = [t.id] * samples.size(0)
+            if save_to_disk:
+                exp_folder = folder / f'exp_{t_id}' / split_name
+                exp_folder.mkdir(parents=True, exist_ok=True)
+                files = []
+                for i, (sample, label) in enumerate(zip(samples, labels)):
+                    sample_path = exp_folder / f'sample_{i}.png'
+                    if not sample_path.exists():
+                        F.to_pil_image(sample).save(sample_path)
+                    files.append((sample_path, label.item()))
+
+                common_root, exp_paths_list = common_paths_root(files)
+                paths_dataset = PathsDataset(common_root, exp_paths_list)
+                dataset = AvalancheDataset(
+                    paths_dataset,
+                    task_labels=task_labels,
+                    transform=transforms.Compose([
+                        transforms.ToTensor(),
+                        trans
+                    ])
+                )
+            else:
+                dataset = AvalancheTensorDataset(samples, labels.squeeze(1),
+                                                 task_labels=task_labels,
+                                                 transform=trans)
+            exp.append(dataset)
+        if stream_name == 's_long' and t_id == n_tasks - 1:
+            break
+
+    return dataset_benchmark(
+        train_datasets=exps[0],
+        test_datasets=exps[2],
+        other_streams_datasets=dict(val=exps[1]),
+    )
+
+
+__all__ = [
+    'CTrL'
+]
diff --git a/avalanche/benchmarks/classic/stream51.py b/avalanche/benchmarks/classic/stream51.py
index 1be59742f..72d8d79a0 100644
--- a/avalanche/benchmarks/classic/stream51.py
+++ b/avalanche/benchmarks/classic/stream51.py
@@ -247,7 +247,7 @@ def CLStream51(
 
 
 __all__ = [
-    'Stream51'
+    'CLStream51'
 ]
 
 if __name__ == "__main__":
diff --git a/avalanche/benchmarks/datasets/__init__.py b/avalanche/benchmarks/datasets/__init__.py
index cbe8e9f08..f5c5f0148 100644
--- a/avalanche/benchmarks/datasets/__init__.py
+++ b/avalanche/benchmarks/datasets/__init__.py
@@ -2,10 +2,11 @@
 from .downloadable_dataset import *
 from .core50 import *
 from .cub200 import *
+from .endless_cl_sim import *
 from .mini_imagenet import *
 from .openloris import *
+from .stream51 import *
 from .tiny_imagenet import *
 from .omniglot import *
-from .stream51 import *
 from .torchvision_wrapper import *
 from .inaturalist import *
diff --git a/avalanche/benchmarks/utils/avalanche_dataset.py b/avalanche/benchmarks/utils/avalanche_dataset.py
index fa27b4790..f2e9ef2a2 100644
--- a/avalanche/benchmarks/utils/avalanche_dataset.py
+++ b/avalanche/benchmarks/utils/avalanche_dataset.py
@@ -2004,20 +2004,34 @@ def concat_datasets_sequentially(
 
     new_class_ids_per_dataset = []
     for dataset_idx in range(len(train_dataset_list)):
+    
+        # Get the train and test sets of the dataset
+        train_set = train_dataset_list[dataset_idx]
+        test_set = test_dataset_list[dataset_idx]
+        
+        # Get the classes in the dataset
+        dataset_classes = set(map(int, train_set.targets))
+        
         # The class IDs for this dataset will be in range
         # [n_classes_in_previous_datasets,
         #       n_classes_in_previous_datasets + classes_in_this_dataset)
-        class_mapping = list(
+        new_classes = list(
             range(next_remapped_idx,
                   next_remapped_idx + classes_per_dataset[dataset_idx]))
-        new_class_ids_per_dataset.append(class_mapping)
-
-        train_set = train_dataset_list[dataset_idx]
-        test_set = test_dataset_list[dataset_idx]
-
+        new_class_ids_per_dataset.append(new_classes)
+        
         # AvalancheSubset is used to apply the class IDs transformation.
         # Remember, the class_mapping parameter must be a list in which:
         # new_class_id = class_mapping[original_class_id]
+        # Hence, a list of size equal to the maximum class index is created
+        # Only elements corresponding to the present classes are remapped
+        class_mapping = [-1] * (max(dataset_classes) + 1)
+        j = 0
+        for i in dataset_classes:
+            class_mapping[i] = new_classes[j]
+            j += 1        
+
+        # Create remapped datasets and append them to the final list
         remapped_train_datasets.append(
             AvalancheSubset(train_set, class_mapping=class_mapping))
         remapped_test_datasets.append(
diff --git a/avalanche/benchmarks/utils/data_loader.py b/avalanche/benchmarks/utils/data_loader.py
index 104c8d406..9339b5d71 100644
--- a/avalanche/benchmarks/utils/data_loader.py
+++ b/avalanche/benchmarks/utils/data_loader.py
@@ -41,6 +41,8 @@ def _default_collate_mbatches_fn(mbatches):
 
 
 class TaskBalancedDataLoader:
+    """ Task-balanced data loader for Avalanche's datasets."""
+
     def __init__(self, data: AvalancheDataset,
                  oversample_small_tasks: bool = False,
                  collate_mbatches=_default_collate_mbatches_fn,
@@ -95,9 +97,12 @@ def __len__(self):
 
 
 class GroupBalancedDataLoader:
+    """ Data loader that balances data from multiple datasets."""
+
     def __init__(self, datasets: Sequence[AvalancheDataset],
                  oversample_small_groups: bool = False,
                  collate_mbatches=_default_collate_mbatches_fn,
+                 batch_size: int = 32,
                  **kwargs):
         """ Data loader that balances data from multiple datasets.
 
@@ -115,6 +120,8 @@ def __init__(self, datasets: Sequence[AvalancheDataset],
         :param collate_mbatches: function that given a sequence of mini-batches
             (one for each task) combines them into a single mini-batch. Used to
             combine the mini-batches obtained separately from each task.
+        :param batch_size: the size of the batch. It must be greater than or
+            equal to the number of groups.
         :param kwargs: data loader arguments used to instantiate the loader for
             each group separately. See pytorch :class:`DataLoader`.
         """
@@ -123,8 +130,19 @@ def __init__(self, datasets: Sequence[AvalancheDataset],
         self.oversample_small_groups = oversample_small_groups
         self.collate_mbatches = collate_mbatches
 
+        # check if batch_size is larger than or equal to the number of datasets
+        assert batch_size >= len(datasets)
+
+        # divide the batch between all datasets in the group
+        ds_batch_size = batch_size // len(datasets)
+        remaining = batch_size % len(datasets)
+
         for data in self.datasets:
-            self.dataloaders.append(DataLoader(data, **kwargs))
+            bs = ds_batch_size
+            if remaining > 0:
+                bs += 1
+                remaining -= 1
+            self.dataloaders.append(DataLoader(data, batch_size=bs, **kwargs))
         self.max_len = max([len(d) for d in self.dataloaders])
 
     def __iter__(self):
@@ -166,6 +184,9 @@ def __len__(self):
 
 
 class GroupBalancedInfiniteDataLoader:
+    """ Data loader that balances data from multiple datasets emitting an
+        infinite stream."""
+
     def __init__(self, datasets: Sequence[AvalancheDataset],
                  collate_mbatches=_default_collate_mbatches_fn,
                  **kwargs):
@@ -214,6 +235,8 @@ def __len__(self):
 
 
 class ReplayDataLoader:
+    """ Custom data loader for rehearsal/replay strategies."""
+
     def __init__(self, data: AvalancheDataset, memory: AvalancheDataset = None,
                  oversample_small_tasks: bool = False,
                  collate_mbatches=_default_collate_mbatches_fn,
@@ -240,7 +263,9 @@ def __init__(self, data: AvalancheDataset, memory: AvalancheDataset = None,
             combine the mini-batches obtained separately from each task.
         :param batch_size: the size of the batch. It must be greater than or
             equal to the number of tasks.
-        :param ratio_data_mem: How many of the samples should be from
+        :param force_data_batch_size: How many of the samples should be from the
+            current `data`. If None, it will equally divide each batch between
+            samples from all seen tasks in the current `data` and `memory`.
         :param kwargs: data loader arguments used to instantiate the loader for
             each task separately. See pytorch :class:`DataLoader`.
         """
@@ -256,19 +281,23 @@ def __init__(self, data: AvalancheDataset, memory: AvalancheDataset = None,
             assert force_data_batch_size <= batch_size, \
                 "Forced batch size of data must be <= entire batch size"
 
-            mem_batch_size = batch_size - force_data_batch_size
-            remaining_example = 0
+            remaining_example_data = 0
+
             mem_keys = len(self.memory.task_set)
+            mem_batch_size = batch_size - force_data_batch_size
+            mem_batch_size_k = mem_batch_size // mem_keys
+            remaining_example_mem = mem_batch_size % mem_keys
+
             assert mem_batch_size >= mem_keys, \
                 "Batch size must be greator or equal " \
                 "to the number of tasks in the memory."
 
             self.loader_data, _ = self._create_dataloaders(
                 data, force_data_batch_size,
-                remaining_example, **kwargs)
+                remaining_example_data, **kwargs)
             self.loader_memory, _ = self._create_dataloaders(
-                memory, mem_batch_size,
-                remaining_example, **kwargs)
+                memory, mem_batch_size_k,
+                remaining_example_mem, **kwargs)
         else:
             num_keys = len(self.data.task_set) + len(self.memory.task_set)
             assert batch_size >= num_keys, \
diff --git a/avalanche/evaluation/metrics/images_samples.py b/avalanche/evaluation/metrics/images_samples.py
index 407a3ef5d..8429ff0e8 100644
--- a/avalanche/evaluation/metrics/images_samples.py
+++ b/avalanche/evaluation/metrics/images_samples.py
@@ -22,35 +22,40 @@
 
 if TYPE_CHECKING:
     from avalanche.training.strategies import BaseStrategy
+    from avalanche.benchmarks.utils import AvalancheDataset
 
 
 class ImagesSamplePlugin(PluginMetric):
-    """
-    A metric used to sample images at random.
-    No data augmentation is shown.
+    """Metric used to sample random images.
+
     Only images in strategy.adapted dataset are used. Images added in the
     dataloader (like the replay plugins do) are missed.
+    By default data augmentation are removed.
 
     :param n_rows: The numbers of raws to use in the grid of images.
     :param n_cols: The numbers of columns to use in the grid of images.
     :param group: If True, images will be grouped by (task, label)
     :param mode: The plugin can be used at train or eval time.
+    :param disable_augmentations: determines whether to show the augmented
+        images or the raw images (default: True).
     :return: The corresponding plugins.
     """
 
     def __init__(
         self,
         *,
-        mode: Literal["train", "eval"],
+        mode: Literal["train", "eval", "both"],
         n_cols: int,
         n_rows: int,
         group: bool = True,
+        disable_augmentations: bool = True
     ):
         super().__init__()
         self.group = group
         self.n_rows = n_rows
         self.n_cols = n_cols
         self.mode = mode
+        self.disable_augmentations = disable_augmentations
 
         self.images: List[Tensor] = []
         self.n_wanted_images = self.n_cols * self.n_rows
@@ -58,17 +63,26 @@ def __init__(
     def after_train_dataset_adaptation(
         self, strategy: "BaseStrategy"
     ) -> "MetricResult":
-        if self.mode == "train":
-            return self.make_grid_sample(strategy)
+        if self.mode == "train" or self.mode == "both":
+            return self._make_grid_sample(strategy)
 
     def after_eval_dataset_adaptation(
         self, strategy: "BaseStrategy"
     ) -> "MetricResult":
-        if self.mode == "eval":
-            return self.make_grid_sample(strategy)
+        if self.mode == "eval" or self.mode == "both":
+            return self._make_grid_sample(strategy)
+
+    def reset(self) -> None:
+        self.images = []
+
+    def result(self) -> List[Tensor]:
+        return self.images
+
+    def __str__(self):
+        return "images"
 
-    def make_grid_sample(self, strategy: "BaseStrategy") -> "MetricResult":
-        self.load_sorted_images(strategy)
+    def _make_grid_sample(self, strategy: "BaseStrategy") -> "MetricResult":
+        self._load_sorted_images(strategy)
 
         return [
             MetricValue(
@@ -88,16 +102,17 @@ def make_grid_sample(self, strategy: "BaseStrategy") -> "MetricResult":
             )
         ]
 
-    def load_sorted_images(self, strategy: "BaseStrategy"):
+    def _load_sorted_images(self, strategy: "BaseStrategy"):
         self.reset()
-        self.images, labels, tasks = self.load_data(strategy)
+        self.images, labels, tasks = self._load_data(strategy)
         if self.group:
-            self.sort_images(labels, tasks)
+            self._sort_images(labels, tasks)
 
-    def load_data(
+    def _load_data(
         self, strategy: "BaseStrategy"
     ) -> Tuple[List[Tensor], List[int], List[int]]:
-        dataloader = self.make_dataloader(strategy)
+        dataloader = self._make_dataloader(strategy.adapted_dataset,
+                                           strategy.eval_mb_size)
 
         images, labels, tasks = [], [], []
 
@@ -109,7 +124,7 @@ def load_data(
             if len(images) == self.n_wanted_images:
                 return images, labels, tasks
 
-    def sort_images(self, labels: List[int], tasks: List[int]):
+    def _sort_images(self, labels: List[int], tasks: List[int]):
         self.images = [
             image
             for task, label, image in sorted(
@@ -117,23 +132,35 @@ def sort_images(self, labels: List[int], tasks: List[int]):
             )
         ]
 
-    def make_dataloader(self, strategy: "BaseStrategy") -> DataLoader:
+    def _make_dataloader(self, data: "AvalancheDataset", mb_size: int)\
+            -> DataLoader:
+        if self.disable_augmentations:
+            data = data.replace_transforms(
+                transform=MaybeToTensor(), target_transform=None,
+            )
         return DataLoader(
-            dataset=strategy.adapted_dataset.replace_transforms(
-                transform=ToTensor(), target_transform=None,
-            ),
-            batch_size=min(strategy.eval_mb_size, self.n_wanted_images),
+            dataset=data,
+            batch_size=min(mb_size, self.n_wanted_images),
             shuffle=True,
         )
 
-    def reset(self) -> None:
-        self.images = []
 
-    def result(self) -> List[Tensor]:
-        return self.images
+class MaybeToTensor(ToTensor):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. Pytorch tensors
+    are left as is.
+    """
 
-    def __str__(self):
-        return "images"
+    def __call__(self, pic):
+        """
+        Args:
+            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+        Returns:
+            Tensor: Converted image.
+        """
+        if isinstance(pic, Tensor):
+            return pic
+        return super().__call__(pic)
 
 
 def images_samples_metrics(
diff --git a/avalanche/logging/tensorboard_logger.py b/avalanche/logging/tensorboard_logger.py
index 945553d4c..88ecaffbb 100644
--- a/avalanche/logging/tensorboard_logger.py
+++ b/avalanche/logging/tensorboard_logger.py
@@ -21,8 +21,9 @@
 from matplotlib.pyplot import Figure
 from torchvision.transforms.functional import to_tensor
 from avalanche.evaluation.metric_results import AlternativeValues, \
-    MetricValue, TensorImage
+    TensorImage
 from avalanche.logging import StrategyLogger
+import weakref
 
 
 class TensorboardLogger(StrategyLogger):
@@ -61,8 +62,11 @@ def __init__(self, tb_log_dir: Union[str, Path] = "./tb_data",
         self.writer = SummaryWriter(tb_log_dir,
                                     filename_suffix=filename_suffix)
 
-    def __del__(self):
-        self.writer.close()
+        # Shuts down the writer gracefully on process exit
+        # or when this logger gets GCed. Fixes issue #864.
+        # For more info see:
+        # https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods
+        weakref.finalize(self, SummaryWriter.close, self.writer)
 
     def log_single_metric(self, name, value, x_plot):
         if isinstance(value, AlternativeValues):
diff --git a/avalanche/training/plugins/lr_scheduling.py b/avalanche/training/plugins/lr_scheduling.py
index cdce9c535..62acc57e9 100644
--- a/avalanche/training/plugins/lr_scheduling.py
+++ b/avalanche/training/plugins/lr_scheduling.py
@@ -1,4 +1,12 @@
+import warnings
+from typing import TYPE_CHECKING
+
+from avalanche.evaluation.metrics import Mean
 from avalanche.training.plugins import StrategyPlugin
+import inspect
+
+if TYPE_CHECKING:
+    from avalanche.training import BaseStrategy
 
 
 class LRSchedulerPlugin(StrategyPlugin):
@@ -7,30 +15,73 @@ class LRSchedulerPlugin(StrategyPlugin):
     This plugin manages learning rate scheduling inside of a strategy using the
     PyTorch scheduler passed to the constructor. The step() method of the
     scheduler is called after each training epoch.
+
+    Metric-based schedulers (like ReduceLROnPlateau) are supported as well.
     """
 
-    def __init__(self, scheduler, reset_scheduler=True, reset_lr=True):
+    def __init__(self, scheduler, reset_scheduler=True, reset_lr=True,
+                 metric=None):
         """
         Creates a ``LRSchedulerPlugin`` instance.
 
         :param scheduler: a learning rate scheduler that can be updated through
-            a step() method and can be reset by setting last_epoch=0
+            a step() method and can be reset by setting last_epoch=0.
         :param reset_scheduler: If True, the scheduler is reset at the end of
-            the experience.
-            Defaults to True.
+            the experience. Defaults to True.
         :param reset_lr: If True, the optimizer learning rate is reset to its
-            original value.
-            Default to True.
+            original value. Default to True.
+        :param metric: the metric to use. Must be set when using
+            metric-based scheduling (like ReduceLROnPlateau). Only "train_loss"
+            and "val_loss" are supported at the moment. Beware that,
+            when using "val_loss", the periodic evaluation flow must be enabled
+            in the strategy. By default, the `eval_every` parameter of the
+            base strategy is -1, which means that the validation set is never
+            evaluated. Set that value to 1 to obtain the correct results.
+            Also, when using `metric="val_loss"`, remember to pass a proper
+            validation stream to the strategy train method, otherwise the
+            periodic evaluation stream will use the training set to compute
+            the validation loss.
         """
+
         super().__init__()
         self.scheduler = scheduler
         self.reset_scheduler = reset_scheduler
         self.reset_lr = reset_lr
+        self.metric = metric
+        self.rolling_metric = Mean()
+
+        # Used to detect and manage the periodic eval phase
+        self._was_training = False
+        self._eval_train_epoch = 0
+
+        arg_names = inspect.getfullargspec(self.scheduler.step)[0]
+        needs_metrics = 'metrics' in arg_names
+
+        if needs_metrics and self.metric is None:
+            raise ValueError(
+                'The step method of this scheduler requires a metric '
+                '(usually the loss) to be passed. Please set a proper '
+                'metric parameter when creating this plugin.')
+        elif (not needs_metrics) and self.metric is not None:
+            warnings.warn('You are passing a metric value but the scheduler'
+                          "doesn't seem to support metrics...")
 
-    def after_training_epoch(self, strategy, **kwargs):
-        self.scheduler.step()
+        if self.metric not in [None, 'train_loss', 'val_loss']:
+            raise ValueError(
+                'Only scheduling based on "train_loss" and '"val_loss" ''
+                f'is supported at the moment (got {metric}.')
 
-    def after_training_exp(self, strategy, **kwargs):
+        LRSchedulerPlugin._patch_lr_on_plateau(self.scheduler)
+
+    def after_training_epoch(self, strategy: 'BaseStrategy', **kwargs):
+        if self.metric == 'train_loss':
+            self.scheduler.step(metrics=self.rolling_metric.result())
+            self.rolling_metric.reset()
+        elif self.metric != 'val_loss':
+            self.scheduler.step()
+            self.rolling_metric.reset()
+
+    def after_training_exp(self, strategy: 'BaseStrategy', **kwargs):
         param_groups = strategy.optimizer.param_groups
         base_lrs = self.scheduler.base_lrs
 
@@ -40,3 +91,88 @@ def after_training_exp(self, strategy, **kwargs):
 
         if self.reset_scheduler:
             self.scheduler.last_epoch = 0
+
+            # Manage the reset of the scheduler
+            # Mainly used to call _reset on ReduceLROnPlateau, but may come
+            # in handy for other schedulers in the future
+            reset_method = getattr(self.scheduler, 'reset', None)
+            if not callable(reset_method):
+                reset_method = getattr(self.scheduler, '_reset', None)
+
+            if callable(reset_method):
+                # print('Calling reset method of scheduler')
+                reset_method()
+
+    # Methods used to manage ReduceLROnPlateau (keep track of the periodic eval)
+    def before_training(self, strategy: 'BaseStrategy', **kwargs):
+        self._was_training = True
+
+    def after_training(self, strategy: 'BaseStrategy', **kwargs):
+        self._was_training = False
+
+    def after_eval(self, strategy: 'BaseStrategy', **kwargs):
+
+        if self.metric == 'val_loss' and self._was_training:
+
+            if strategy.clock.train_exp_epochs == 0:
+                # The base strategy may run an evaluation pass on the
+                # validation set before running the training loop. In that
+                # case, we should just discard the result.
+                # print('Ignoring pre-training validation')
+                pass
+            elif self._eval_train_epoch == strategy.clock.train_exp_epochs:
+                # The base strategy may run an evaluation pass on the
+                # validation set after the training loop. In that
+                # case, we should discard the result only if the validation pass
+                # has been duplicated.
+
+                # In fact, the previous branch of the "if" could be omitted
+                # because this one can cover both the pre-training and
+                # duplicate post-training cases...
+                # print('Ignoring post-training duplicate validation '
+                #      f'{self._eval_train_epoch}')
+                pass
+            else:
+                # print('Stepping after validation',
+                #       self.rolling_metric.result())
+                self.scheduler.step(metrics=self.rolling_metric.result())
+            self.rolling_metric.reset()
+        self._eval_train_epoch = strategy.clock.train_exp_epochs
+
+    def after_training_iteration(self, strategy: 'BaseStrategy', **kwargs):
+        if self.metric != 'train_loss':
+            return
+        self.rolling_metric.update(strategy.loss, weight=len(strategy.mb_x))
+
+    def after_eval_iteration(self, strategy: 'BaseStrategy', **kwargs):
+        if self.metric != 'val_loss':
+            return
+
+        # Check if switched to eval mid-training
+        # This only happens when running periodic validation
+        if self._was_training:
+            self.rolling_metric.update(strategy.loss,
+                                       weight=len(strategy.mb_x))
+
+    @staticmethod
+    def _patch_lr_on_plateau(scheduler):
+        # All PyTorch schedulers have the base_lrs field (needed to reset the
+        # initial LRs before each experience) with the only exception being
+        # ReduceLROnPlateau. This method will add that field to
+        # ReduceLROnPlateau.
+
+        if hasattr(scheduler, 'base_lrs'):
+            return
+
+        # Initialize epoch and base learning rates
+        for group in scheduler.optimizer.param_groups:
+            group.setdefault('initial_lr', group['lr'])
+
+        scheduler.base_lrs = list(
+            map(lambda group_param: group_param['initial_lr'],
+                scheduler.optimizer.param_groups))
+
+
+__all__ = [
+    'LRSchedulerPlugin'
+]
diff --git a/avalanche/training/plugins/replay.py b/avalanche/training/plugins/replay.py
index 168543e79..a6fad6d26 100644
--- a/avalanche/training/plugins/replay.py
+++ b/avalanche/training/plugins/replay.py
@@ -28,16 +28,19 @@ class ReplayPlugin(StrategyPlugin):
 
     The :mem_size: attribute controls the total number of patterns to be stored 
     in the external memory.
+    :param storage_policy: The policy that controls how to add new exemplars
+                           in memory
+    :param force_data_batch_size: How many of the samples should be from the
+            current `data`. If None, it will equally divide each batch between
+            samples from all seen tasks in the current `data` and `memory`.
     """
 
     def __init__(self, mem_size: int = 200,
-                 storage_policy: Optional["ExemplarsBuffer"] = None):
-        """
-        :param storage_policy: The policy that controls how to add new exemplars
-                        in memory
-        """
+                 storage_policy: Optional["ExemplarsBuffer"] = None,
+                 force_data_batch_size: int = None):
         super().__init__()
         self.mem_size = mem_size
+        self.force_data_batch_size = force_data_batch_size
 
         if storage_policy is not None:  # Use other storage policy
             self.storage_policy = storage_policy
@@ -68,6 +71,7 @@ def before_training_exp(self, strategy: "BaseStrategy",
             oversample_small_tasks=True,
             num_workers=num_workers,
             batch_size=strategy.train_mb_size,
+            force_data_batch_size=self.force_data_batch_size,
             shuffle=shuffle)
 
     def after_training_exp(self, strategy: "BaseStrategy", **kwargs):
diff --git a/avalanche/training/storage_policy.py b/avalanche/training/storage_policy.py
index 7574c5046..d2d685e99 100644
--- a/avalanche/training/storage_policy.py
+++ b/avalanche/training/storage_policy.py
@@ -24,7 +24,8 @@ class ExemplarsBuffer(ABC):
     """
 
     def __init__(self, max_size: int):
-        """
+        """Init.
+
         :param max_size: max number of input samples in the replay memory.
         """
         self.max_size = max_size
@@ -83,6 +84,11 @@ def update(self, strategy: 'BaseStrategy', **kwargs):
         self.update_from_dataset(strategy.experience.dataset)
 
     def update_from_dataset(self, new_data: AvalancheDataset):
+        """Update the buffer using the given dataset.
+
+        :param new_data:
+        :return:
+        """
         new_weights = torch.rand(len(new_data))
 
         cat_weights = torch.cat([new_weights, self._buffer_weights])
@@ -317,7 +323,7 @@ def __init__(self, max_size: int,
 
     def update(self, strategy: "BaseStrategy", **kwargs):
         new_data = strategy.experience.dataset
-        new_groups = self.make_groups(strategy, new_data)
+        new_groups = self._make_groups(strategy, new_data)
         self.seen_groups.update(new_groups.keys())
 
         # associate lengths to classes
@@ -344,7 +350,8 @@ def update(self, strategy: "BaseStrategy", **kwargs):
             self.buffer_groups[group_id].resize(strategy,
                                                 group_to_len[group_id])
 
-    def make_groups(self, strategy, data):
+    def _make_groups(self, strategy, data):
+        """Split the data by group according to `self.groupby`."""
         if self.groupby is None:
             return {0: data}
         elif self.groupby == 'task':
diff --git a/avalanche/training/strategies/base_strategy.py b/avalanche/training/strategies/base_strategy.py
index 1ee40490b..8dcc0dd9f 100644
--- a/avalanche/training/strategies/base_strategy.py
+++ b/avalanche/training/strategies/base_strategy.py
@@ -28,10 +28,10 @@
 from typing import TYPE_CHECKING
 
 from avalanche.training.plugins import EvaluationPlugin
+from avalanche.training.plugins import StrategyPlugin
 
 if TYPE_CHECKING:
     from avalanche.core import StrategyCallbacks
-    from avalanche.training.plugins import StrategyPlugin
 
 
 logger = logging.getLogger(__name__)
@@ -84,13 +84,15 @@ class BaseStrategy:
 
     """
     DISABLED_CALLBACKS: Sequence[str] = ()
+    """Internal class attribute used to disable some callbacks if a strategy
+    does not support them."""
 
     def __init__(self, model: Module, optimizer: Optimizer,
                  criterion=CrossEntropyLoss(),
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = 1, device='cpu',
                  plugins: Optional[Sequence['StrategyPlugin']] = None,
-                 evaluator=default_logger, eval_every=-1):
+                 evaluator=default_logger, eval_every=-1, peval_mode='epoch'):
         """ Init.
 
         :param model: PyTorch model.
@@ -108,6 +110,9 @@ def __init__(self, model: Module, optimizer: Optimizer,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
+            periodic evaluation during training should execute every
+            `eval_every` epochs or iterations (Default='epoch').
         """
         self._criterion = criterion
 
@@ -139,15 +144,18 @@ def __init__(self, model: Module, optimizer: Optimizer,
         self.evaluator = evaluator
         """ EvaluationPlugin used for logging and metric computations. """
 
+        # Configure periodic evaluation.
+        assert peval_mode in {'epoch', 'iteration'}
+        self.eval_every = eval_every
+        peval = PeriodicEval(eval_every, peval_mode)
+        self.plugins.append(peval)
+
         self.clock = Clock()
         """ Incremental counters for strategy events. """
         # WARNING: Clock needs to be the last plugin, otherwise
         # counters will be wrong for plugins called after it.
         self.plugins.append(self.clock)
 
-        self.eval_every = eval_every
-        """ Frequency of the evaluation during training. """
-
         ###################################################################
         # State variables. These are updated during the train/eval loops. #
         ###################################################################
@@ -231,6 +239,7 @@ def mb_y(self):
 
     @property
     def mb_task_id(self):
+        """Current mini-batch task labels."""
         assert len(self.mbatch) >= 3
         return self.mbatch[-1]
 
@@ -267,11 +276,10 @@ def train(self, experiences: Union[Experience, Sequence[Experience]],
             experiences = [experiences]
         if eval_streams is None:
             eval_streams = [experiences]
+        self._eval_streams = eval_streams
 
         self._before_training(**kwargs)
 
-        self._periodic_eval(eval_streams, do_final=False, do_initial=True)
-
         for self.experience in experiences:
             self.train_exp(self.experience, eval_streams, **kwargs)
         self._after_training(**kwargs)
@@ -308,11 +316,6 @@ def train_exp(self, experience: Experience, eval_streams=None, **kwargs):
         self.make_optimizer()
 
         self._before_training_exp(**kwargs)
-        
-        do_final = True
-        if self.eval_every > 0 and \
-                (self.train_epochs - 1) % self.eval_every == 0:
-            do_final = False
 
         for _ in range(self.train_epochs):
             self._before_training_epoch(**kwargs)
@@ -323,46 +326,42 @@ def train_exp(self, experience: Experience, eval_streams=None, **kwargs):
 
             self.training_epoch(**kwargs)
             self._after_training_epoch(**kwargs)
-            self._periodic_eval(eval_streams, do_final=False)
 
-        # Final evaluation
-        self._periodic_eval(eval_streams, do_final=do_final)
         self._after_training_exp(**kwargs)
 
-    def _periodic_eval(self, eval_streams, do_final, do_initial=False):
-        """ Periodic eval controlled by `self.eval_every`. """
-        # Since we are switching from train to eval model inside the training
-        # loop, we need to save the training state, and restore it after the
-        # eval is done.
+    def _load_train_state(self, _prev_model_training_modes, _prev_state):
+        # restore train-state variables and training mode.
+        self.experience, self.adapted_dataset = _prev_state[:2]
+        self.dataloader = _prev_state[2]
+        self.is_training = _prev_state[3]
+        # restore each layer's training mode to original
+        for name, layer in self.model.named_modules():
+            try:
+                prev_mode = _prev_model_training_modes[name]
+                layer.train(mode=prev_mode)
+            except KeyError:
+                # Unknown parameter, probably added during the eval
+                # model's adaptation. We set it to train mode.
+                layer.train()
+
+    def _save_train_state(self):
+        """Save the training state which may be modified by the eval loop.
+
+        This currently includes: experience, adapted_dataset, dataloader,
+        is_training, and train/eval modes for each module.
+
+        TODO: we probably need a better way to do this.
+        """
         _prev_state = (
             self.experience,
             self.adapted_dataset,
             self.dataloader,
             self.is_training)
-        
         # save each layer's training mode, to restore it later
         _prev_model_training_modes = {}
         for name, layer in self.model.named_modules():
             _prev_model_training_modes[name] = layer.training
-        
-        curr_epoch = self.clock.train_exp_epochs
-        if (self.eval_every == 0 and (do_final or do_initial)) or \
-           (self.eval_every > 0 and do_initial) or \
-                (self.eval_every > 0 and curr_epoch % self.eval_every == 0):
-            # in the first case we are outside epoch loop
-            # in the second case we are within epoch loop
-            for exp in eval_streams:
-                self.eval(exp)
-
-        # restore train-state variables and training mode.
-        self.experience, self.adapted_dataset = _prev_state[:2]
-        self.dataloader = _prev_state[2]
-        self.is_training = _prev_state[3]
-        
-        # restore each layer's training mode to original 
-        for name, layer in self.model.named_modules():
-            prev_mode = _prev_model_training_modes[name]
-            layer.train(mode=prev_mode)
+        return _prev_model_training_modes, _prev_state
 
     def stop_training(self):
         """ Signals to stop training at the next iteration. """
@@ -387,6 +386,9 @@ def eval(self,
         :return: dictionary containing last recorded value for
             each metric name
         """
+        # eval can be called inside the train method.
+        # Save the shared state here to restore before returning.
+        train_state = self._save_train_state()
         self.is_training = False
         self.model.eval()
 
@@ -410,9 +412,10 @@ def eval(self,
             self._after_eval_exp(**kwargs)
 
         self._after_eval(**kwargs)
-
         res = self.evaluator.get_last_metrics()
 
+        # restore previous shared state.
+        self._load_train_state(*train_state)
         return res
 
     def _before_training_exp(self, **kwargs):
@@ -595,6 +598,7 @@ def _after_eval_dataset_adaptation(self, **kwargs):
             p.after_eval_dataset_adaptation(self, **kwargs)
 
     def eval_epoch(self, **kwargs):
+        """Evaluation loop over the current `self.dataloader`."""
         for self.mbatch in self.dataloader:
             self._unpack_minibatch()
             self._before_eval_iteration(**kwargs)
@@ -635,6 +639,10 @@ def _before_train_dataset_adaptation(self, **kwargs):
             p.before_train_dataset_adaptation(self, **kwargs)
 
     def model_adaptation(self, model=None):
+        """Adapts the model to the current data.
+
+        Calls the :class:`~avalanche.models.DynamicModule`s adaptation.
+        """
         if model is None:
             model = self.model
 
@@ -644,9 +652,14 @@ def model_adaptation(self, model=None):
         return model.to(self.device)
 
     def forward(self):
+        """Compute the model's output given the current mini-batch."""
         return avalanche_forward(self.model, self.mb_x, self.mb_task_id)
 
     def make_optimizer(self):
+        """Optimizer initialization.
+
+        Called before each training experiene to configure the optimizer.
+        """
         # we reset the optimizer's state after each experience.
         # This allows to add new parameters (new heads) and
         # freezing old units during the model's adaptation phase.
@@ -683,4 +696,83 @@ def _warn_for_disabled_callbacks(
                     )
 
 
+class PeriodicEval(StrategyPlugin):
+    """Schedules periodic evaluation during training.
+
+    This plugin is automatically configured and added by the BaseStrategy.
+    """
+
+    def __init__(self, eval_every=-1, peval_mode='epoch', do_initial=True):
+        """Init.
+
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the
+            periodic evaluation during training should execute every
+            `eval_every` epochs or iterations (Default='epoch').
+        :param do_initial: whether to evaluate before each `train` call.
+            Occasionally needed becuase some metrics need to know the
+            accuracy before training.
+        """
+        super().__init__()
+        assert peval_mode in {'epoch', 'iteration'}
+        self.eval_every = eval_every
+        self.peval_mode = peval_mode
+        self.do_initial = do_initial and eval_every > -1
+        self.do_final = None
+        self._is_eval_updated = False
+
+    def before_training(self, strategy, **kwargs):
+        """Eval before each learning experience.
+
+        Occasionally needed because some metrics need the accuracy before
+        training.
+        """
+        if self.do_initial:
+            self._peval(strategy)
+
+    def before_training_exp(self, strategy, **kwargs):
+        # We evaluate at the start of each experience because train_epochs
+        # could change.
+        self.do_final = True
+        if self.peval_mode == 'epoch':
+            if self.eval_every > 0 and \
+                    (strategy.train_epochs - 1) % self.eval_every == 0:
+                self.do_final = False
+        else:  # peval_mode == 'iteration'
+            # we may need to fix this but we don't have a way to know
+            # the number of total iterations.
+            # Right now there may be two eval calls at the last iterations.
+            pass
+        self.do_final = self.do_final and self.eval_every > -1
+
+    def after_training_exp(self, strategy, **kwargs):
+        """Final eval after a learning experience."""
+        if self.do_final:
+            self._peval(strategy)
+
+    def _peval(self, strategy):
+        for el in strategy._eval_streams:
+            strategy.eval(el)
+
+    def _maybe_peval(self, strategy, counter):
+        if self.eval_every > 0 and counter % self.eval_every == 0:
+            self._peval(strategy)
+
+    def after_training_epoch(self, strategy: 'BaseStrategy', **kwargs):
+        """Periodic eval controlled by `self.eval_every` and
+        `self.peval_mode`."""
+        if self.peval_mode == 'epoch':
+            self._maybe_peval(strategy, strategy.clock.train_exp_epochs)
+
+    def after_training_iteration(self, strategy: 'BaseStrategy', **kwargs):
+        """Periodic eval controlled by `self.eval_every` and
+        `self.peval_mode`."""
+        if self.peval_mode == 'iteration':
+            self._maybe_peval(strategy, strategy.clock.train_exp_iterations)
+
+
 __all__ = ['BaseStrategy']
diff --git a/avalanche/training/strategies/deep_slda.py b/avalanche/training/strategies/deep_slda.py
index 9acfe5257..a7ad76282 100644
--- a/avalanche/training/strategies/deep_slda.py
+++ b/avalanche/training/strategies/deep_slda.py
@@ -32,8 +32,8 @@ def __init__(self, slda_model, criterion,
                  eval_mb_size: int = 1, device='cpu',
                  plugins: Optional[Sequence['StrategyPlugin']] = None,
                  evaluator=default_logger, eval_every=-1):
-        """
-        Init function for the SLDA model.
+        """Init function for the SLDA model.
+
         :param slda_model: a PyTorch model
         :param criterion: loss function
         :param output_layer_name: if not None, wrap model to retrieve
@@ -82,6 +82,7 @@ def __init__(self, slda_model, criterion,
         self.prev_num_updates = -1
 
     def forward(self, return_features=False):
+        """Compute the model's output given the current mini-batch."""
         self.model.eval()
         if isinstance(self.model, MultiTaskModule):
             feat = self.model(self.mb_x, self.mb_task_id)
@@ -124,6 +125,8 @@ def training_epoch(self, **kwargs):
             self._after_training_iteration(**kwargs)
 
     def make_optimizer(self):
+        """Empty function.
+        Deep SLDA does not need a Pytorch optimizer."""
         pass
 
     @torch.no_grad()
diff --git a/avalanche/training/strategies/icarl.py b/avalanche/training/strategies/icarl.py
index 9dda258af..8de9e07e9 100644
--- a/avalanche/training/strategies/icarl.py
+++ b/avalanche/training/strategies/icarl.py
@@ -31,7 +31,7 @@ def __init__(self, feature_extractor: Module, classifier: Module,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
                  evaluator: EvaluationPlugin = default_logger, eval_every=-1):
-        """ 
+        """Init.
 
         :param feature_extractor: The feature extractor.
         :param classifier: The differentiable classifier that takes as input
diff --git a/avalanche/training/strategies/joint_training.py b/avalanche/training/strategies/joint_training.py
index c2e52923a..1b5dd9f73 100644
--- a/avalanche/training/strategies/joint_training.py
+++ b/avalanche/training/strategies/joint_training.py
@@ -48,7 +48,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  eval_mb_size: int = 1, device='cpu',
                  plugins: Optional[Sequence['StrategyPlugin']] = None,
                  evaluator=default_logger, eval_every=-1):
-        """ 
+        """Init.
+
         :param model: PyTorch model.
         :param optimizer: PyTorch optimizer.
         :param criterion: loss function.
diff --git a/avalanche/training/strategies/strategy_wrappers.py b/avalanche/training/strategies/strategy_wrappers.py
index 96f5c6f23..91498d8ea 100644
--- a/avalanche/training/strategies/strategy_wrappers.py
+++ b/avalanche/training/strategies/strategy_wrappers.py
@@ -39,7 +39,8 @@ def __init__(self, model: Module, optimizer: Optimizer,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
         """
         Creates an instance of the Naive strategy.
 
@@ -58,12 +59,14 @@ def __init__(self, model: Module, optimizer: Optimizer,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
         super().__init__(
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class PNNStrategy(BaseStrategy):
@@ -77,7 +80,8 @@ def __init__(self, num_layers: int, in_features: int,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
         """ Progressive Neural Network strategy.
 
         :param num_layers: Number of layers for the PNN architecture.
@@ -103,6 +107,8 @@ def __init__(self, num_layers: int, in_features: int,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
         model = PNN(
             num_layers=num_layers,
@@ -117,7 +123,7 @@ def __init__(self, num_layers: int, in_features: int,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class CWRStar(BaseStrategy):
@@ -126,7 +132,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  cwr_layer_name: str, train_mb_size: int = 1,
                  train_epochs: int = 1, eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
         """ 
 
         :param model: The model.
@@ -146,6 +153,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
         cwsp = CWRStarPlugin(model, cwr_layer_name, freeze_remaining_model=True)
         if plugins is None:
@@ -156,7 +165,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class Replay(BaseStrategy):
@@ -170,8 +179,10 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
-        """ 
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
+        """ Init.
+
         :param model: The model.
         :param optimizer: The optimizer to use.
         :param criterion: The loss criterion to use.
@@ -188,6 +199,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
 
         rp = ReplayPlugin(mem_size)
@@ -201,7 +214,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, 
             plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class GSS_greedy(BaseStrategy):
@@ -215,8 +228,10 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
-        """ 
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
+        """Init.
+
         :param model: The model.
         :param optimizer: The optimizer to use.
         :param criterion: The loss criterion to use.
@@ -234,6 +249,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
         rp = GSS_greedyPlugin(mem_size=mem_size,
                               mem_strength=mem_strength, input_size=input_size)
@@ -245,7 +262,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class GDumb(BaseStrategy):
@@ -259,8 +276,10 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
-        """ 
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
+        """Init.
+
         :param model: The model.
         :param optimizer: The optimizer to use.
         :param criterion: The loss criterion to use.
@@ -277,6 +296,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
 
         gdumb = GDumbPlugin(mem_size)
@@ -289,7 +310,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class LwF(BaseStrategy):
@@ -304,8 +325,10 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
-        """
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
+        """Init.
+
         :param model: The model.
         :param optimizer: The optimizer to use.
         :param criterion: The loss criterion to use.
@@ -324,6 +347,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
 
         lwf = LwFPlugin(alpha, temperature)
@@ -336,7 +361,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class AGEM(BaseStrategy):
@@ -351,7 +376,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
         """ Init.
 
         :param model: The model.
@@ -372,6 +398,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
 
         agem = AGEMPlugin(patterns_per_exp, sample_size)
@@ -384,7 +412,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class GEM(BaseStrategy):
@@ -399,7 +427,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
         """ Init.
 
         :param model: The model.
@@ -420,6 +449,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
 
         gem = GEMPlugin(patterns_per_exp, memory_strength)
@@ -432,7 +463,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class EWC(BaseStrategy):
@@ -449,7 +480,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
         """ Init.
 
         :param model: The model.
@@ -480,6 +512,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
         ewc = EWCPlugin(ewc_lambda, mode, decay_factor, keep_importance_data)
         if plugins is None:
@@ -491,7 +525,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class SynapticIntelligence(BaseStrategy):
@@ -515,7 +549,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  eps: float = 0.0000001, train_mb_size: int = 1,
                  train_epochs: int = 1, eval_mb_size: int = 1, device='cpu',
                  plugins: Optional[Sequence['StrategyPlugin']] = None,
-                 evaluator=default_logger, eval_every=-1):
+                 evaluator=default_logger, eval_every=-1, **base_kwargs):
         """ Init.
 
         Creates an instance of the Synaptic Intelligence strategy.
@@ -540,6 +574,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
         if plugins is None:
             plugins = []
@@ -551,7 +587,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
         super(SynapticIntelligence, self).__init__(
             model, optimizer, criterion, train_mb_size, train_epochs,
             eval_mb_size, device=device, plugins=plugins, evaluator=evaluator,
-            eval_every=eval_every
+            eval_every=eval_every, **base_kwargs
         )
 
 
@@ -569,7 +605,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
                  evaluator: EvaluationPlugin = default_logger,
-                 eval_every=-1):
+                 eval_every=-1, **base_kwargs):
         """ Init.
 
         :param model: The model.
@@ -597,6 +633,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
         copep = CoPEPlugin(mem_size, n_classes, p_size, alpha, T)
         if plugins is None:
@@ -607,7 +645,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 class LFL(BaseStrategy):
@@ -623,7 +661,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
                  train_mb_size: int = 1, train_epochs: int = 1,
                  eval_mb_size: int = None, device=None,
                  plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1,
+                 **base_kwargs):
         """ Init.
 
         :param model: The model.
@@ -643,6 +682,8 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             only at the end of the learning experience. Values >0 mean that 
             `eval` is called every `eval_every` epochs and at the end of the 
             learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseStrategy` constructor arguments.
         """
 
         lfl = LFLPlugin(lambda_e)
@@ -655,7 +696,7 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             model, optimizer, criterion,
             train_mb_size=train_mb_size, train_epochs=train_epochs,
             eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
+            evaluator=evaluator, eval_every=eval_every, **base_kwargs)
 
 
 __all__ = [
diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
index 0e28d100b..f51f3846b 100644
--- a/docs/benchmarks.rst
+++ b/docs/benchmarks.rst
@@ -2,3 +2,236 @@ Benchmarks module
 ============================
 
 | This module provides popular continual learning benchmarks and generic facilities to build custom benchmarks.
+
+* Popular benchmarks (like SplitMNIST, PermutedMNIST, SplitCIFAR, ...) are contained in the ``classic`` sub-module.
+* Dataset implementations are available in the ``datasets`` sub-module.
+* One can create new benchmarks by using the utilities found in the ``generators`` sub-module.
+* Avalanche uses custom dataset and dataloader implementations contained in the ``utils`` sub-module. More info can be found in this couple of How-Tos `here <https://avalanche.continualai.org/how-tos/dataloading_buffers_replay>`_ and `here <https://avalanche.continualai.org/how-tos/avalanchedataset>`_.
+
+
+avalanche.benchmarks
+----------------------------------------
+
+.. contents::
+    :depth: 2
+    :local:
+    :backlinks: top
+
+.. currentmodule:: avalanche.benchmarks.classic
+
+Classic Benchmarks
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+| The **classic benchmarks** sub-module covers all mainstream benchmarks. Expect this list to grow over time!
+
+
+CORe50-based benchmarks
+............................
+Benchmarks based on the `CORe50 <https://vlomonaco.github.io/core50/>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    CORe50
+
+
+CIFAR-based benchmarks
+............................
+Benchmarks based on the `CIFAR-10 and CIFAR-100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ datasets.
+
+.. autosummary::
+    :toctree: generated
+
+    SplitCIFAR10
+    SplitCIFAR100
+    SplitCIFAR110
+
+
+CUB200-based benchmarks
+............................
+Benchmarks based on the `Caltech-UCSD Birds 200 <http://www.vision.caltech.edu/visipedia/CUB-200.html>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    SplitCUB200
+
+
+EndlessCLSim-based benchmarks
+............................
+Benchmarks based on the `EndlessCLSim <https://zenodo.org/record/4899267>`_ derived datasets.
+
+.. autosummary::
+    :toctree: generated
+
+    EndlessCLSim
+
+
+FashionMNIST-based benchmarks
+............................
+Benchmarks based on the `Fashion MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    SplitFMNIST
+
+
+ImageNet-based benchmarks
+............................
+Benchmarks based on the `ImageNet ILSVRC-2012 <https://www.image-net.org/>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    SplitImageNet
+    SplitTinyImageNet
+
+
+iNaturalist-based benchmarks
+............................
+Benchmarks based on the `iNaturalist-2018 <https://www.kaggle.com/c/inaturalist-2018/>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    SplitInaturalist
+
+
+MNIST-based benchmarks
+............................
+Benchmarks based on the `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    SplitMNIST
+    PermutedMNIST
+    RotatedMNIST
+
+
+Omniglot-based benchmarks
+............................
+Benchmarks based on the `Omniglot <https://github.com/brendenlake/omniglot>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    SplitOmniglot
+    PermutedOmniglot
+    RotatedOmniglot
+
+
+OpenLORIS-based benchmarks
+............................
+Benchmarks based on the `OpenLORIS <https://lifelong-robotic-vision.github.io/dataset/scene.html>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    OpenLORIS
+
+
+Stream51-based benchmarks
+............................
+Benchmarks based on the `Stream-51, <https://github.com/tyler-hayes/Stream-51>`_ dataset.
+
+.. autosummary::
+    :toctree: generated
+
+    CLStream51
+
+
+.. currentmodule:: avalanche.benchmarks.datasets
+
+Datasets
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+| The **datasets** sub-module provides PyTorch dataset implementations for datasets missing from the torchvision/audio/* libraries. These datasets can also be used in a standalone way!
+
+.. autosummary::
+    :toctree: generated
+
+    CORe50Dataset
+    CUB200
+    EndlessCLSimDataset
+    INATURALIST2018
+    MiniImageNetDataset
+    Omniglot
+    OpenLORIS
+    Stream51
+    TinyImagenet
+
+.. currentmodule:: avalanche.benchmarks.generators
+
+Benchmark Generators
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+| The **generators** sub-module provides a lot of functions that can be used to create a new benchmark.
+| This set of functions tries to cover most common use cases (Class/Task-Incremental, Domain-Incremental, ...) but it also allows for the creation of entirely custom benchmarks (based on lists of tensors, on file lists, ...).
+
+
+Generators for Class/Task/Domain-incremental benchmarks
+............................
+
+.. autosummary::
+    :toctree: generated
+
+    nc_benchmark
+    ni_benchmark
+
+
+Starting from tensor lists, file lists, PyTorch datasets
+............................
+
+.. autosummary::
+    :toctree: generated
+
+    dataset_benchmark
+    filelist_benchmark
+    paths_benchmark
+    tensors_benchmark
+
+
+Misc (make data-incremental, add a validation stream, ...)
+............................
+
+| Avalanche offers utilities to adapt a previously instantiated benchmark object.
+| More utilities to come!
+
+.. autosummary::
+    :toctree: generated
+
+    data_incremental_benchmark
+    benchmark_with_validation_stream
+
+.. currentmodule:: avalanche.benchmarks.utils
+
+Utils (Data Loading and AvalancheDataset)
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+| The custom dataset and dataloader implementations contained in this sub-module are described in more detailed in the How-Tos `here <https://avalanche.continualai.org/how-tos/dataloading_buffers_replay>`_ and `here <https://avalanche.continualai.org/how-tos/avalanchedataset>`_.
+
+
+.. currentmodule:: avalanche.benchmarks.utils.data_loader
+
+Data Loaders
+............................
+.. autosummary::
+    :toctree: generated
+
+    TaskBalancedDataLoader
+    GroupBalancedDataLoader
+    ReplayDataLoader
+    GroupBalancedInfiniteDataLoader
+
+
+.. currentmodule:: avalanche.benchmarks.utils.avalanche_dataset
+
+AvalancheDataset
+............................
+.. autosummary::
+    :toctree: generated
+
+    AvalancheDataset
+    AvalancheSubset
+    AvalancheTensorDataset
+    AvalancheConcatDataset
diff --git a/docs/gitbook/.gitbook/assets/avalanche (1).png b/docs/gitbook/.gitbook/assets/avalanche (1).png
new file mode 100644
index 000000000..141b3481b
Binary files /dev/null and b/docs/gitbook/.gitbook/assets/avalanche (1).png differ
diff --git a/docs/gitbook/.gitbook/assets/avalanche (2).png b/docs/gitbook/.gitbook/assets/avalanche (2).png
new file mode 100644
index 000000000..141b3481b
Binary files /dev/null and b/docs/gitbook/.gitbook/assets/avalanche (2).png differ
diff --git a/docs/gitbook/.gitbook/assets/avalanche (3).png b/docs/gitbook/.gitbook/assets/avalanche (3).png
new file mode 100644
index 000000000..141b3481b
Binary files /dev/null and b/docs/gitbook/.gitbook/assets/avalanche (3).png differ
diff --git a/docs/gitbook/.gitbook/assets/avalanche.png b/docs/gitbook/.gitbook/assets/avalanche.png
new file mode 100644
index 000000000..141b3481b
Binary files /dev/null and b/docs/gitbook/.gitbook/assets/avalanche.png differ
diff --git a/docs/gitbook/.gitbook/assets/avalanche_api (1).png b/docs/gitbook/.gitbook/assets/avalanche_api (1).png
new file mode 100644
index 000000000..d27967d12
Binary files /dev/null and b/docs/gitbook/.gitbook/assets/avalanche_api (1).png differ
diff --git a/docs/gitbook/.gitbook/assets/avalanche_api.png b/docs/gitbook/.gitbook/assets/avalanche_api.png
new file mode 100644
index 000000000..d27967d12
Binary files /dev/null and b/docs/gitbook/.gitbook/assets/avalanche_api.png differ
diff --git a/docs/gitbook/README.md b/docs/gitbook/README.md
index 84dfb5c04..5a53873e6 100644
--- a/docs/gitbook/README.md
+++ b/docs/gitbook/README.md
@@ -176,8 +176,8 @@ for task_id, train_dataset in enumerate(list_train_dataset):
 acc_results = []
 for task_id, test_dataset in enumerate(list_test_dataset):
 
-    train_data_loader = DataLoader(
-        train_dataset, num_workers=num_workers, batch_size=train_mb_size)
+    test_data_loader = DataLoader(
+        test_dataset, num_workers=num_workers, batch_size=test_mb_size)
 
     correct = 0
     for iteration, (test_mb_x, test_mb_y) in enumerate(test_data_loader):
diff --git a/docs/gitbook/SUMMARY.md b/docs/gitbook/SUMMARY.md
index 33826d7aa..639a0ef57 100644
--- a/docs/gitbook/SUMMARY.md
+++ b/docs/gitbook/SUMMARY.md
@@ -9,6 +9,26 @@
 * [How to Install](getting-started/how-to-install.md)
 * [Learn Avalanche in 5 Minutes](getting-started/learn-avalanche-in-5-minutes.md)
 
+## 📙 From Zero to Hero Tutorial
+
+* [Introduction](from-zero-to-hero-tutorial/01\_introduction.md)
+* [Models](from-zero-to-hero-tutorial/02\_models.md)
+* [Benchmarks](from-zero-to-hero-tutorial/03\_benchmarks.md)
+* [Training](from-zero-to-hero-tutorial/04\_training.md)
+* [Evaluation](from-zero-to-hero-tutorial/05\_evaluation.md)
+* [Loggers](from-zero-to-hero-tutorial/06\_loggers.md)
+* [Putting All Together](from-zero-to-hero-tutorial/07\_putting-all-together.md)
+* [Extending Avalanche](from-zero-to-hero-tutorial/08\_extending-avalanche.md)
+* [Contribute to Avalanche](from-zero-to-hero-tutorial/09\_contribute-to-avalanche.md)
+
+## How-Tos
+
+* [AvalancheDataset](how-tos/avalanchedataset/README.md)
+  * [Preamble: PyTorch Datasets](how-tos/avalanchedataset/preamble-pytorch-datasets.md)
+  * [Creating AvalancheDatasets](how-tos/avalanchedataset/creating-avalanchedatasets.md)
+  * [Advanced Transformations](how-tos/avalanchedataset/advanced-transformations.md)
+* [Dataloaders, Buffers, and Replay](how-tos/dataloading\_buffers\_replay.md)
+
 ## 📝 Examples
 
 * [Models](examples/models.md)
@@ -17,22 +37,6 @@
 * [Evaluation](examples/evaluation.md)
 * [Loggers](examples/loggers.md)
 
-## 📙 From Zero to Hero Tutorial
-
-* [Introduction](from-zero-to-hero-tutorial/01_introduction.md)
-* [Models](from-zero-to-hero-tutorial/02_models.md)
-* [Benchmarks](from-zero-to-hero-tutorial/03_benchmarks.md)
-* [Training](from-zero-to-hero-tutorial/04_training.md)
-* [Evaluation](from-zero-to-hero-tutorial/05_evaluation.md)
-* [Loggers](from-zero-to-hero-tutorial/06_loggers.md)
-* [Putting All Together](from-zero-to-hero-tutorial/07_putting-all-together.md)
-* [Extending Avalanche](from-zero-to-hero-tutorial/08_extending-avalanche.md)
-* [Contribute to Avalanche](from-zero-to-hero-tutorial/09_contribute-to-avalanche.md)
-
-## How-Tos
-
-* [Dataloaders, Buffers, and Replay](how-tos/dataloading_buffers_replay.md)
-
 ## 💻 Code Documentation
 
 * [Avalanche API](https://avalanche-api.continualai.org)
@@ -48,11 +52,10 @@
 * [Request a Feature](questions-and-issues/request-a-feature.md)
 * [Give Feedback](questions-and-issues/give-feedback.md)
 
-## 👪 About Us <a id="contacts-and-links"></a>
+## 👪 About Us <a href="#contacts-and-links" id="contacts-and-links"></a>
 
 * [The People](contacts-and-links/the-team.md)
 * [Join Us!](contacts-and-links/join-us.md)
-* [Slack](https://join.slack.com/t/continualai/shared_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU)
+* [Slack](https://join.slack.com/t/continualai/shared\_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU)
 * [Email](mailto:contact@continualai.org)
 * [Twitter](https://twitter.com/AvalancheLib)
-
diff --git a/docs/gitbook/contacts-and-links/join-us.md b/docs/gitbook/contacts-and-links/join-us.md
index 4db43db48..ae612d26d 100644
--- a/docs/gitbook/contacts-and-links/join-us.md
+++ b/docs/gitbook/contacts-and-links/join-us.md
@@ -4,16 +4,15 @@ description: Happiness is only Real when Shared
 
 # Join Us!
 
-> _Do you want to make Avalanche more suitable for your own research project?  
-> Or maybe you just want to learn more about it and sharpen your coding skills in this area?_
+> _Do you want to make Avalanche more suitable for your own research project?_\
+> _Or maybe you just want to learn more about it and sharpen your coding skills in this area?_
 
-No matter the reasons, we are always looking for new members __that can help _help us improve Avalanche and make it a better tool for everyone!_
+No matter the reasons, we are always looking for new members __ that can help _help us improve Avalanche and make it a better tool for everyone!_
 
-Building something great together 👪 is _fun_ and _fulfilling_ 🎈. Joining our team you will also join a family of _mentors_ and _friends_ that can let you collaborate, fave fun and ultimately achieve more in this area. 
+Building something great together 👪 is _fun_ and _fulfilling_ 🎈. Joining our team you will also join a family of _mentors_ and _friends_ that can let you collaborate, fave fun and ultimately achieve more in this area.&#x20;
 
 No matter your research or coding expertise level you may have, we believe anyone has her own strengths that can help us build a wonderful tool, being _passion_ and _time_ the fundamental ingredients_._
 
 So, don't hesistate to contact [our team](the-team.md) to learn more about how you can help. Do it now! 😊
 
-![](../.gitbook/assets/join-us-you-5cae9e.jpg)
-
+![](../../../.gitbook/assets/join-us-you-5cae9e.jpg)
diff --git a/docs/gitbook/contacts-and-links/the-team.md b/docs/gitbook/contacts-and-links/the-team.md
index 14e11ddc9..113a4b0a0 100644
--- a/docs/gitbook/contacts-and-links/the-team.md
+++ b/docs/gitbook/contacts-and-links/the-team.md
@@ -4,17 +4,17 @@ description: All the People that Made Avalanche Great
 
 # The People
 
-![Avalanche: Coming soon to your computer screens! &#x1F602;](../.gitbook/assets/avalanche_maintaners.png)
+![Avalanche: Coming soon to your computer screens! 😂](../../../.gitbook/assets/avalanche\_maintaners.png)
 
 ## 🗂️ Maintainers
 
 The Project is maintained mostly by [ContinualAI Lab](https://www.continualai.org/lab/) members, with the core mission of supporting the production, organization and dissemination of original research on CL with **technical research**, **open source projects** and **tools** that can make the life of a CL researcher easier.
 
-* [**Antonio Carta**](http://pages.di.unipi.it/carta/) \(Lead Mantainer\)
-* [**Lorenzo Pellegrini** ](https://www.unibo.it/sitoweb/l.pellegrini)\(Mantainer\)
-* [**Andrea Cossu**](https://andreacossu.github.io/) **\(**Mantainer\)
-* [**Gabriele Graffieti**](https://www.unibo.it/sitoweb/gabriele.graffieti/en) \(Mantainer\)
-* [**Vincenzo Lomonaco**](https://www.vincenzolomonaco.com/) \(Project Manager\)
+* [**Antonio Carta**](http://pages.di.unipi.it/carta/) (Lead Mantainer)
+* [**Lorenzo Pellegrini** ](https://www.unibo.it/sitoweb/l.pellegrini)(Mantainer)
+* [**Andrea Cossu**](https://andreacossu.github.io) **(**Mantainer)
+* [**Gabriele Graffieti**](https://www.unibo.it/sitoweb/gabriele.graffieti/en) (Mantainer)
+* [**Vincenzo Lomonaco**](https://www.vincenzolomonaco.com) (Project Manager)
 
 ## 🔨 Contributors
 
@@ -26,21 +26,20 @@ _Tyler Hayes, Matthias De Lange, Marc Masana, Jary Pomponi, Gido van de Ven, Mar
 
 _Avalanche_ is a great tool also thanks to its many users. Here we list some research groups using _Avalanche_ for their continual learning research:
 
-* [**ContinualAI Lab**](https://www.continualai.org/lab/) \(PI: Vincenzo Lomonaco\)
-* [**Pervasive AI Lab**](http://pai.di.unipi.it/) \(PI: Davide Bacciu\)
-* [**BioLab** ](http://biolab.csr.unibo.it/home.asp)\(PI: Davide Maltoni, University of Bologna\)
-* [**Computational Intelligence & Machine Learning Group**](http://ciml.di.unipi.it/index.html) \(PI: Alessio Micheli, University of Pisa\)
-* [**Italian Association for Machine Learning**](https://iaml.it/) \(President: Simone Scardapane,  Sapienza University\)
-* [**AIforPeople**](https://www.aiforpeople.org/) \(President: Marta Ziosi, University of Oxford\)
-* [**Learning and Machine Perception Team**](http://www.cvc.uab.es/lamp/) \(PI: Joost van de Weijer\)
-* [**Tinne Tuytelaars’ group**](https://homes.esat.kuleuven.be/~tuytelaa/) \(PI: Tinne Tuytelaars\)
-* [**Machine and Neuromorphic Perception Laboratory**](http://klab.cis.rit.edu/) \(PI: Christopher Kanan\)
-* [**LASTI Lab**](https://kalisteo.cea.fr/index.php/textual-and-visual-semantic/) \(PI: Adrian Popescu\)
-* [**Visual Artificial Intelligence Laboratory**](https://cms.brookes.ac.uk/staff/FabioCuzzolin) \(PI: Fabio Cuzzolin\)
-* [**Eugenio Culurciello’s group**](https://scholar.google.com/citations?user=SeGmqkIAAAAJ&hl=en) \(PI: Eugenio Culurciello\)
+* [**ContinualAI Lab**](https://www.continualai.org/lab/) (PI: Vincenzo Lomonaco)
+* [**Pervasive AI Lab**](http://pai.di.unipi.it) (PI: Davide Bacciu)
+* [**BioLab** ](http://biolab.csr.unibo.it/home.asp)(PI: Davide Maltoni, University of Bologna)
+* [**Computational Intelligence & Machine Learning Group**](http://ciml.di.unipi.it/index.html) (PI: Alessio Micheli, University of Pisa)
+* [**Italian Association for Machine Learning**](https://iaml.it) (President: Simone Scardapane,  Sapienza University)
+* [**AIforPeople**](https://www.aiforpeople.org) (President: Marta Ziosi, University of Oxford)
+* [**Learning and Machine Perception Team**](http://www.cvc.uab.es/lamp/) (PI: Joost van de Weijer)
+* [**Tinne Tuytelaars’ group**](https://homes.esat.kuleuven.be/\~tuytelaa/) (PI: Tinne Tuytelaars)
+* [**Machine and Neuromorphic Perception Laboratory**](http://klab.cis.rit.edu) (PI: Christopher Kanan)
+* [**LASTI Lab**](https://kalisteo.cea.fr/index.php/textual-and-visual-semantic/) (PI: Adrian Popescu)
+* [**Visual Artificial Intelligence Laboratory**](https://cms.brookes.ac.uk/staff/FabioCuzzolin) (PI: Fabio Cuzzolin)
+* [**Eugenio Culurciello’s group**](https://scholar.google.com/citations?user=SeGmqkIAAAAJ\&hl=en) (PI: Eugenio Culurciello)
 * _..._[_and many more!_ ](https://www.continualai.org/research)
 
 ## 📫 Contacts
 
-If you want to contact us don't hesitate to send an email to `vincenzo.lomonaco@continualai.org`, `contact@continualai.org`, or you can join us [on slack](https://join.slack.com/t/continualai/shared_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU) and chat with us all! 😃
-
+If you want to contact us don't hesitate to send an email to `vincenzo.lomonaco@continualai.org`, `contact@continualai.org`, or you can join us [on slack](https://join.slack.com/t/continualai/shared\_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU) and chat with us all! 😃
diff --git a/docs/gitbook/examples/benchmarks.md b/docs/gitbook/examples/benchmarks.md
index f56856798..3079f95d8 100644
--- a/docs/gitbook/examples/benchmarks.md
+++ b/docs/gitbook/examples/benchmarks.md
@@ -4,7 +4,7 @@ description: Benchmarks and DatasetCode Examples
 
 # Benchmarks
 
-{% code title="\"All MNIST\" Example" %}
+{% code title=""All MNIST" Example" %}
 ```python
 # Device config
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -58,4 +58,3 @@ You can run _this chapter_ and play with it on Google Colaboratory:
 {% hint style="danger" %}
 Notebook currently unavailable.
 {% endhint %}
-
diff --git a/docs/gitbook/examples/evaluation.md b/docs/gitbook/examples/evaluation.md
index 5ae93fcdb..6c4028d3b 100644
--- a/docs/gitbook/examples/evaluation.md
+++ b/docs/gitbook/examples/evaluation.md
@@ -4,7 +4,7 @@ description: Protocols and Metrics Code Examples
 
 # Evaluation
 
-{% code title="\"Evaluation Pluging\" Example" %}
+{% code title=""Evaluation Pluging" Example" %}
 ```python
 # --- CONFIG
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -89,4 +89,3 @@ You can run _this chapter_ and play with it on Google Colaboratory:
 {% hint style="danger" %}
 Notebook currently unavailable.
 {% endhint %}
-
diff --git a/docs/gitbook/examples/loggers.md b/docs/gitbook/examples/loggers.md
index 1f8bcbdf0..abb22f901 100644
--- a/docs/gitbook/examples/loggers.md
+++ b/docs/gitbook/examples/loggers.md
@@ -4,7 +4,7 @@ description: Examples for the Loggers module offered in Avalanche
 
 # Loggers
 
-{% code title="\"Loggers\" Example" %}
+{% code title=""Loggers" Example" %}
 ```python
 # --- CONFIG
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -89,4 +89,3 @@ You can run _this chapter_ and play with it on Google Colaboratory:
 {% hint style="danger" %}
 Notebook currently unavailable.
 {% endhint %}
-
diff --git a/docs/gitbook/examples/models.md b/docs/gitbook/examples/models.md
index 11df6251b..eb8e8edac 100644
--- a/docs/gitbook/examples/models.md
+++ b/docs/gitbook/examples/models.md
@@ -4,7 +4,7 @@ description: Examples for the Models module offered in Avalanche
 
 # Models
 
-{% code title="\"Available Models\" Example" %}
+{% code title=""Available Models" Example" %}
 ```python
 from avalanche.models import SimpleCNN
 from avalanche.models import SimpleMLP
@@ -20,4 +20,3 @@ You can run _this chapter_ and play with it on Google Colaboratory:
 {% hint style="danger" %}
 Notebook currently unavailable.
 {% endhint %}
-
diff --git a/docs/gitbook/examples/training.md b/docs/gitbook/examples/training.md
index 0412010ee..4442ee509 100644
--- a/docs/gitbook/examples/training.md
+++ b/docs/gitbook/examples/training.md
@@ -4,7 +4,7 @@ description: Baselines and Strategies Code Examples
 
 # Training
 
-{% code title="\"LWF\" Example" %}
+{% code title=""LWF" Example" %}
 ```python
 model = SimpleMLP(hidden_size=args.hs)
 optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
@@ -58,4 +58,3 @@ You can run _this chapter_ and play with it on Google Colaboratory:
 {% hint style="danger" %}
 Notebook currently unavailable.
 {% endhint %}
-
diff --git a/docs/gitbook/from-zero-to-hero-tutorial/01_introduction.md b/docs/gitbook/from-zero-to-hero-tutorial/01_introduction.md
index a9aa19a8c..9e25055a9 100644
--- a/docs/gitbook/from-zero-to-hero-tutorial/01_introduction.md
+++ b/docs/gitbook/from-zero-to-hero-tutorial/01_introduction.md
@@ -1,21 +1,51 @@
 ---
-description: Understand the Avalanche Package Structure
+description: Understand the Avalanche Structure
 ---
 
 # Introduction
 
-Welcome to the "_Introduction_" tutorial of the "_From Zero to Hero_" series. We will start our journey by taking a quick look at the _Avalanche_ main modules to understand its **general architecture**.
+![](../.gitbook/assets/avalanche\_api.png)
 
-As hinted in the getting started introduction _Avalanche_ is organized in **five main modules**:
+Welcome to the "_Introduction_" tutorial of the "_From Zero to Hero_" series. We will start our journey by taking a quick look at _Avalanche_ main modules and its **general architecture**.
 
-* **`Benchmarks`**: This module maintains a uniform API for data handling: mostly generating a stream of data from one or more datasets. It contains all the major CL benchmarks \(similar to what has been done for [torchvision](https://pytorch.org/docs/stable/torchvision/index.html)\).
-* **`Training`**: This module provides all the necessary utilities concerning model training. This includes simple and efficient ways of implement new _continual learning_ strategies as well as a set pre-implemented CL baselines and state-of-the-art algorithms you will be able to use for comparison!
-* **`Evaluation`**: This module provides all the utilities and metrics that can help evaluate a CL algorithm with respect to all the factors we believe to be important for a continually learning system. It also includes advanced logging and plotting features, including native [Tensorboard](https://www.tensorflow.org/tensorboard) support.
-* **`Models`**: In this module you'll find several model architectures and pre-trained models that can be used for your continual learning experiment \(similar to what has been done in [torchvision.models](https://pytorch.org/docs/stable/torchvision/index.html)\). Furthermore, we provide everything you need to implement architectural strategies, task-aware models, and dynamic model expansion.
-* **`Logging`**: It includes advanced logging and plotting features, including native _stdout_, _file_ and [Tensorboard](https://www.tensorflow.org/tensorboard) support \(How cool it is to have a complete, interactive dashboard, tracking your experiment metrics in real-time with a single line of code?\)
+_Avalanche_ is organized in **five main modules**: Benchmarks, Training, Evaluation, Models, Logging.
+
+#### Benchmarks
+
+This module provides a uniform API for data handling: mostly generating a stream of data from one or more datasets. It contains all the major CL benchmarks (similar to what has been done for [torchvision](https://pytorch.org/docs/stable/torchvision/index.html)). Its main components are:
+
+* **Benchmarks**: these are complete benchmarks, providing access to train/test streams.
+* **Streams**: a sequential list of learning experiences.
+* **Experience**: a bunch of data available at a specific point in time.
+* **AvalancheDataset**: a dataset that provides support for train/eval transformations, concatenation and subsampling, and other operations needed to manipulate data in continual learning strategies.
+
+#### Training
+
+This module provides all the necessary utilities concerning model training. This includes simple and efficient ways of implement new _continual learning_ strategies as well as a set pre-implemented CL baselines and state-of-the-art algorithms you will be able to use for comparison!
+
+* **BaseStrategy** provides the default training and eval loops.
+* **Plugins** extend the basic loops with additional functionality.
+
+#### **`Evaluation`**
+
+This module provides all the utilities and metrics that can help evaluate a CL algorithm with respect to all the factors we believe to be important for a continually learning system.
+
+* **EvaluationPlugin**: the plugin that connects metrics and the `BaseStrategy.`
+* **MetricPlugins**: plugins that provide a bridge between metrics and the evaluation plugin. They emit `MetricValue`s that will be collected by the `EvaluationPlugin` and serialized by the loggers.
+* **Metric**: basic logic to compute a metric. Provides the `update`, `result` and `reset` operations used to compute and retrieve the metric value.
+
+#### Models
+
+In this module you'll find model architectures, pre-trained models, and utilities to implement continual learning models. We provide everything you need to implement architectural strategies, task-aware models, and dynamic model expansion.
+
+#### Logging
+
+Metrics are automatically logger using native _stdout_, _files_ and [Tensorboard](https://www.tensorflow.org/tensorboard) support (How cool it is to have a complete, interactive dashboard, tracking your experiment metrics in real-time with a single line of code?)
+
+## File Structure
 
 {% code title="Avalanche Main Modules and Sub-Modules" %}
-```text
+```
 Avalanche
 ├── Benchmarks
 │   ├── Classic
@@ -33,7 +63,6 @@ Avalanche
 |   └── Utils
 ├── Models
 └── Loggers
-
 ```
 {% endcode %}
 
@@ -45,4 +74,4 @@ In the following tutorials we will assume you have already installed _Avalanche
 
 ## 🤝 Run it on Google Colab
 
-You can run _this chapter_ and play with it on Google Colaboratory: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/from-zero-to-hero-tutorial/01_introduction.ipynb)
+You can run _this chapter_ and play with it on Google Colaboratory: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/from-zero-to-hero-tutorial/01\_introduction.ipynb)
diff --git a/docs/gitbook/from-zero-to-hero-tutorial/04_training.md b/docs/gitbook/from-zero-to-hero-tutorial/04_training.md
index d81c55abe..467e1f34e 100644
--- a/docs/gitbook/from-zero-to-hero-tutorial/04_training.md
+++ b/docs/gitbook/from-zero-to-hero-tutorial/04_training.md
@@ -5,6 +5,8 @@ description: Continual Learning Algorithms Prototyping Made Easy
 
 Welcome to the "_Training_" tutorial of the "_From Zero to Hero_" series. In this part we will present the functionalities offered by the `training` module.
 
+First, let's install Avalanche. You can skip this step if you have installed it already.
+
 
 ```python
 !pip install git+https://github.com/ContinualAI/avalanche.git
@@ -187,7 +189,7 @@ The strategy state is accessible via several attributes. Most of these can be mo
 - `self.experience`: the current experience.
 - `self.adapted_dataset`: the data modified by the dataset adaptation phase.
 - `self.dataloader`: the current dataloader.
-- `self.mbatch`: the current mini-batch. For classification problems, mini-batches have the form `<x, y, t>`, where `x` is the input, `y` is the label, and `t` is the target.
+- `self.mbatch`: the current mini-batch. For classification problems, mini-batches have the form `<x, y, t>`, where `x` is the input, `y` is the target class, and `t` is the task label.
 - `self.mb_output`: the current model's output.
 - `self.loss`: the current loss.
 - `self.is_training`: `True` if the strategy is in training mode.
diff --git a/docs/gitbook/getting-started/alpha-version.md b/docs/gitbook/getting-started/alpha-version.md
index dc2e416d1..87ebfe53e 100644
--- a/docs/gitbook/getting-started/alpha-version.md
+++ b/docs/gitbook/getting-started/alpha-version.md
@@ -1,133 +1,65 @@
 ---
-description: 'Supported Benchmarks, Strategies & Metrics'
+description: 'Avalnche Features: Benchmarks, Strategies & Metrics'
 ---
 
 # Current Release
 
-_Avalanche_ is a framework in constant development. Thanks to the support of the [ContinualAI](https://www.continualai.org/) community and its active members we plan to **extend its features** and **improve its usability** based on the demands of our research community!  
-  
-A the moment, _Avalanche_ is in **Alpha \(v0.0.1\)**, but we already support a number of _Benchmarks_, _Strategies_ and _Metrics_, that makes it, we believe, **the best tool out there for your continual learning research!** 💪
+_Avalanche_ is a framework in constant development. Thanks to the support of the [ContinualAI](https://www.continualai.org) community and its active members we plan to **extend its features** and **improve its usability** based on the demands of our research community!\
+\
+A the moment, _Avalanche_ is in **Beta (v0.1.0).** We support a large number of _Benchmarks_, _Strategies_ and _Metrics_, that makes it, we believe, **the best tool out there for your continual learning research!** 💪
+
+You can find the full list of available features on the [API documentation](https://avalanche-api.continualai.org).
 
 {% hint style="info" %}
-Check out below what we support in details, and please let us know if you think [we are missing out something important](../questions-and-issues/request-a-feature.md)! We deeply value [your feedback](../questions-and-issues/give-feedback.md)!
+Do you think we are missing some important features? Please [let us know](../questions-and-issues/request-a-feature.md)! We deeply value [your feedback](../questions-and-issues/give-feedback.md)!
 {% endhint %}
 
-## 🖼️ Supported Datasets
-
-In the Table below, we list all the Pytorch datasets used in _Continual Learning_ \(along with some references\) and indicating if we **support** them in _Avalanche_ or not. Some of them were already available in [_Torchvision_](https://pytorch.org/docs/stable/torchvision/index.html), while other have been integrated by us.
-
-| Name | Dataset Support | From Torch Vision | Automatic Download | References |
-| :--- | :--- | :--- | :--- | :--- |
-| **CORe50** | ✔️ | ✔️ | ✔️ | [\[1\]](http://proceedings.mlr.press/v78/lomonaco17a.html) |
-| **MNIST** | ✔️ | ✔️ | ✔️ | n.a. |
-| **CIFAR-10** | ✔️ | ✔️ | ✔️ | n.a. |
-| **CIFAR-100** | ✔️ | ✔️ | ✔️ | n.a. |
-| **FashionMNIST** | ✔️ | ✔️ | ✔️ | n.a. |
-| **TinyImagenet** | ✔️ | ✔️ | ✔️ | n.a. |
-| **MiniImagenet** | ✔️ | ❌ | ❌ | n.a. |
-| **Imagenet** | ✔️ | ✔️ | ❌ | n.a. |
-| **CUB200** | ✔️ | ❌ | ✔️ | n.a. |
-| **CRIB** | ❌ | ❌ | ❌ | n.a. |
-| **OpenLORIS** | ✔️ | ❌ | ✔️ | n.a. |
-| **Stream-51** | ✔️ | ❌ | ✔️ | n.a. |
-| **KMNIST** | ✔️ | ✔️ | ✔️ | n.a. |
-| **EMNIST** | ✔️ | ✔️ | ✔️ | n.a. |
-| **QMNIST** | ✔️ | ✔️ | ✔️ | n.a. |
-| **FakeData** | ✔️ | ✔️ | ✔️ | n.a. |
-| **CocoCaption** | ✔️ | ✔️ | ❌ | n.a. |
-| **CocoDetection** | ✔️ | ❌ | ❌ | n.a. |
-| **LSUN** | ✔️ | ❌ | ❌ | n.a. |
-| **STL10** | ✔️ | ❌ | ✔️ | n.a. |
-| **SVHN** | ✔️ | ❌ | ✔️ | n.a. |
-| **PhotoTour** | ✔️ | ❌ | ✔️ | n.a. |
-| **SBU** | ✔️ | ✔️ | ✔️ | n.a. |
-| **Flickr8k** | ✔️ | ✔️ | ❌ | n.a. |
-| **Flickr30k** | ✔️ | ✔️ | ❌ | n.a. |
-| **VOCDetection** | ✔️ | ✔️ | ✔️ | n.a. |
-| **VOCSegmentation** | ✔️ | ✔️ | ✔️ | n.a. |
-| **Cityscapes** | ✔️ | ✔️ | ❌ | n.a. |
-| **SBDataset** | ✔️ | ✔️ | ✔️ | n.a. |
-| **USPS** | ✔️ | ✔️ | ✔️ | n.a. |
-| **Kinetics400** | ✔️ | ✔️ | ❌ | n.a. |
-| **HMDB51** | ✔️ | ✔️ | ❌ | n.a. |
-| **UCF101** | ✔️ | ✔️ | ❌ | n.a. |
-| **CelebA** | ✔️ | ✔️ | ✔️ | n.a. |
-| **Caltech101** | ❌ | ❌ | ❌ | n.a. |
-| **Caltech256** | ❌ | ❌ | ❌ | n.a. |
-
-## 📚 Supported Benchmarks
-
-In the Table below, we list all the major benchmarks used in _Continual Learning_ \(along with some references\) and indicating if we **support** them in _Avalanche_ or not. 
-
-_""Benchmark Support"_ is checked if the actual _continual learning benchmark_ \(with the actual stream of data\) is also provided.
-
-| Name | Benchmark  Support | References |
-| :--- | :--- | :--- |
-| **CORe50** | ✔️ | [\[1\]](http://proceedings.mlr.press/v78/lomonaco17a.html) |
-| **RotatedMNIST** | ✔️ | n.a. |
-| **PermutedMNIST** | ✔️ | n.a. |
-| **SplitMNIST** | ✔️ | n.a. |
-| **FashionMNIST** | ✔️ | n.a. |
-| **CIFAR-10** | ✔️ | n.a. |
-| **CIFAR-100** | ✔️ | n.a. |
-| **CIFAR-110** | ✔️ | n.a. |
-| **TinyImagenet** | ✔️ | n.a. |
-| **CUB200** | ✔️ | n.a. |
-| **SplitImagenet** | ✔️ | n.a. |
-| **CRIB** | ❌ | n.a. |
-| **OpenLORIS** | ✔️ | n.a. |
-| **MiniImagenet** | ❌ | n.a. |
-| **Stream-51** | ✔️ | n.a. |
-
-## 📈 Supported Strategies
-
-In the Table below, we list all the _Continual Learning_ algorithms \(also known as _strategies_\) we currently support in _Avalanche_. 
-
-_"Strategy Support"_ is checked if the algorithm is already available in _Avalanche_, whereas _"Plugin Support"_ is checked if the **corresponding plugin** of the strategy \(that can be used in conjunction with other strategies\) is is also provided.
-
-| Name | Strategy Support | Plugin Support | References |
-| :--- | :--- | :--- | :--- |
-| **Naive \(a.k.a. "Finetuning"\)** | ✔️ | ❌ | n.a. |
-| **Naive Multi-Head** | ✔️ | ✔️ | n.a. |
-| **Joint Training \(a.k.a. "Multi-Task"\)** | ✔️ | ❌ | n.a. |
-| **Cumulative** | ✔️ | ❌ | n.a. |
-| **GDumb** | ✔️ | ✔️ | n.a. |
-| **Experience Replay \(a.k.a. "Rehearsal"\)** | ✔️ | ✔️ | n.a. |
-| **EWC** | ✔️ | ✔️ | n.a. |
-| **LWF** | ✔️ | ✔️ | n.a. |
-| **GEM** | ✔️ | ✔️ | n.a. |
-| **AGEM** | ✔️ | ✔️ | n.a. |
-| **CWR** | ✔️ | ✔️ | n.a. |
-| **SI** | ✔️ | ✔️ | n.a. |
-| **iCaRL** | ❌ | ❌ | n.a. |
-| **AR1** | ✔️ | ❌ | n.a. |
-
-## 📊 Supported Metrics
-
-In the Table below, we list all the _Continual Learning_ **Metrics** we currently support in _Avalanche_. All the metrics by default can be **collected** during runtime, **logged on stdout** or on **log file**.
-
-With _"Tensorboard"_ is checked if the metrics can be also visualized in **Tensorboard** is already available in _Avalanche_, whereas _"Wandb"_ is checked if the metrics can be visualized in **Wandb**.
-
-| Name | Support | Tensorboard | Wandb | References |
-| :--- | :--- | :--- | :--- | :--- |
-| **Accuracy** | ✔️ | ✔️ | ❌ | n.a. |
-| **Loss** | ✔️ | ✔️ | ❌ | n.a. |
-| **ACC** | ❌ | ❌ | ❌ | [\(Lopez-Paz, 2017\)](https://arxiv.org/pdf/1706.08840.pdf) |
-| **BWT** | ❌ | ❌ | ❌ | [\(Lopez-Paz, 2017\)](https://arxiv.org/pdf/1706.08840.pdf) |
-| **FWT** | ❌ | ❌ | ❌ | [\(Lopez-Paz, 2017\)](https://arxiv.org/pdf/1706.08840.pdf) |
-| **Catastrophic Forgetting** | ✔️ | ✔️ | ❌ | n.a. |
-| **Remembering** | ❌ | ❌ | ❌ | n.a. |
-| **A** | ❌ | ❌ | ❌ | [\(Díaz-Rodríguez, 2018\)](https://arxiv.org/pdf/1810.13166.pdf) |
-| **MS** | ❌ | ❌ | ❌ | [\(Díaz-Rodríguez, 2018\)](https://arxiv.org/pdf/1810.13166.pdf) |
-| **SSS** | ❌ | ❌ | ❌ | [\(Díaz-Rodríguez, 2018\)](https://arxiv.org/pdf/1810.13166.pdf) |
-| **CE** | ❌ | ❌ | ❌ | [\(Díaz-Rodríguez, 2018\)](https://arxiv.org/pdf/1810.13166.pdf) |
-| **Confusion Matrix** | ✔️ | ✔️ | ❌ | n.a. |
-| **MAC** | ✔️ | ✔️ | ❌ | n.a. |
-| **CPU Usage** | ✔️ | ✔️ | ❌ | n.a. |
-| **Disk Usage** | ✔️ | ✔️ | ❌ | n.a. |
-| **GPU Usage** | ✔️ | ✔️ | ❌ | n.a. |
-| **RAM Usage** | ✔️ | ✔️ | ❌ | n.a. |
-| **Running Time** | ✔️ | ✔️ | ❌ | n.a. |
-| **CLScore** | ❌ | ❌ | ❌ | [\(Díaz-Rodríguez, 2018\)](https://arxiv.org/pdf/1810.13166.pdf) |
-| **CLStability** | ❌ | ❌ | ❌ | [\(Díaz-Rodríguez, 2018\)](https://arxiv.org/pdf/1810.13166.pdf) |
+## Benchmarks and Datasets
+
+You find a complete list of the features on the [benchmarks API documentation](https://avalanche-api.continualai.org/en/latest/benchmarks.html).
+
+### 🖼️ Datasets
+
+Avalanche supports all the most popular computer vision datasets used in _Continual Learning_. Some of them are available in [_Torchvision_](https://pytorch.org/docs/stable/torchvision/index.html), while other have been integrated by us. Most datasets are automatically downloaded by Avalanche.
+
+* **Toy datasets**: MNIST, Fashion MNIST, KMNIST, EMNIST, QMNIST.
+* **CIFAR:** CIFAR10, CIFAR100.
+* **ImageNet**: TinyImagenet, MiniImagenet, Imagenet.
+* **Others**: EndlessCLDataset, CUB200, OpenLORIS, Stream-51, INATURALIST2018, Omniglot, ...
+
+### 📚 Benchmarks
+
+All the major continual learning benchmarks are available and ready to use. Benchmarks split the datasets and create the train and test streams:
+
+* **MNIST**: SplitMNIST, RotatedMNIST, PermutedMNIST, SplitFashionMNIST.
+* **CIFAR10**: SplitCIFAR10, SplitCIFAR100, SplitCIFAR110.
+* **CORe50**: all the CORe50 scenarios are supported.
+* **Others**: SplitCUB200, CLStream51, OpenLORIS.
+
+## 📈 Continual Learning Strategies
+
+Avalanche provides _Continual Learning_ algorithms (_strategies_). We are continuously expanding the library with new algorithms.
+
+* **Baselines**: Naive, JointTraining, Cumulative.
+* **Rehearsal**: Replay with reservoir sampling and balanced buffers, GSS greedy, CoPE.
+* **Regularization**: EWC, LwF, GEM, AGEM, CWR\*, Synaptic Intelligence.
+* **Architectural**: Progressive Neural Networks, multi-head, incremental classifier.
+* **Others**: GDumb, iCaRL, AR1, Streaming LDA, LFL.
+
+## Models
+
+Avalanche uses and extends pytorch `nn.Module`s to define continual learning models:
+
+* support for `nn.Module`s and `torchvision` models.
+* Dynamic output heads for class-incremental scenarios and multi heads for task-incremental scenarios.
+* support for architectural strategies and dynamically expanding models such as progressive neural networks.
+
+## 📊 Metrics and Evaluation
+
+Avalanche provides continuous evaluation of CL strategies with a large set of **Metrics**. They are collected and logged automatically by the strategy during the training and evaluation loops.
+
+* accuracy, loss, confusion (averaged over streams or experiences).
+* **CL-Metrics**: backward/forward transfer, forgetting.
+* **Computational Resources**: CPU and RAM usage, MAC, execution times.
 
+and [many more](https://avalanche-api.continualai.org/en/latest/evaluation.html#).
diff --git a/docs/gitbook/getting-started/how-to-install.md b/docs/gitbook/getting-started/how-to-install.md
index e976e5bad..ff9114bc0 100644
--- a/docs/gitbook/getting-started/how-to-install.md
+++ b/docs/gitbook/getting-started/how-to-install.md
@@ -6,50 +6,19 @@ description: Installing Avalanche has Never Been so Simple
 
 _Avalanche_ has been designed for extreme **portability** and **usability**. Indeed, it can be run on every OS and native python environment. 💻🍎🐧
 
-In order to install _Avalanche_ we have three main options:
-
-1. [Installing it with Pip](how-to-install.md#installing-avalanche-with-pip)
-2. [Installing it with Anaconda](how-to-install.md#install-avalanche-with-anaconda)
-3. [Developer Mode Install](how-to-install.md#developer-mode-install) \(for contributing to _Avalanche_!\)
-
-## 🔂 Avalanche Dependencies
-
-The _Avalanche_ dependencies are the following:
-
-`python>=3.6,<=3.9.2`, `typing-extensions`, `psutil`, `torch`, `torchvision`, `tensorboard`, `scikit-learn`, `matplotlib`, `numpy`, `pytorchcv`, `quadprog`, `tqdm`, `gdown`, `pycocotools`.
-
-{% hint style="info" %}
-_Avalanche may work on lower Python versions as well but we don't officially support it, nor recommend it._
-{% endhint %}
-
-At the moment, we cannot provide a swift installation experience as some of the dependencies cannot be installed automatically. However, in the sections below we detail how you can install Avalanche **in a matter of minutes** on any platform!
-
 ## 📦 Installing Avalanche with Pip
 
-Within an Anaconda environment or not you can install _Avalanche_ also with Pip with the following steps:
-
-1. Install the package with pip.
-2. [Install Pytorch + TorchVision](https://pytorch.org/) \(follow the instructions on the website to use pip\)
-
-Step 1. can be done with the following line of code:
+you can install Avalanche with pip:
 
 ```bash
-pip install git+https://github.com/ContinualAI/avalanche.git
+pip install avalanche-lib
 ```
 
-That's it. now we have _Avalanche_ up and running and we can start using it!
-
-## 🐍 Install Avalanche with Anaconda
+That's it. Now you can start using Avalanche.
 
-This is the **safest option** since it allows you to build an isolated environment for your Avalanche experiments. This means that you'll be able to _pin particular versions_ of your dependencies that may differ from the ones you want to maintain in the rest of your system. This will in turn increase reproducibility of any experiment you may produce.
+## Installing the Master Branch Using Anaconda
 
-Assuming you have **Anaconda \(or Miniconda\) installed** on your system, you can follow these simple steps:
-
-1. Install the `avalanche-env` environment and activate it.
-2. [Install Pytorch + TorchVision](https://pytorch.org/) \(follow the instructions on the website to use conda\)
-3. Update the Conda Environment
-
-These steps can be accomplished with the following lines of code:
+We suggest you to use the pip package, but if you need some recent features you may want to install directly from the master branch. In general, the master branch is well tested and safe to use. However, the API of new features may change more frequently or break backward compatibility. Reproducibility is also easier if you use the pip package.
 
 ```bash
 # choose your python version
@@ -76,12 +45,12 @@ You can test your installation by running the `examples/test_install.py` script.
 
 ## 💻 Developer Mode Install
 
-If you want to expand _Avalanche_ and help us improve it \(see the "[_From Zero to Hero_](../from-zero-to-hero-tutorial/03_benchmarks.md)" Tutorial\). In this case we suggest to create an environment in _**developer-mode**_ as follows \(just a couple of more dependencies will be installed\).
+If you want to expand _Avalanche_ and help us improve it (see the "[_From Zero to Hero_](../from-zero-to-hero-tutorial/03\_benchmarks.md)" Tutorial). In this case we suggest to create an environment in _**developer-mode**_ as follows (just a couple of more dependencies will be installed).
 
-Assuming you have **Anaconda \(or Miniconda\) installed** on your system, you can follow these simple steps:
+Assuming you have **Anaconda (or Miniconda) installed** on your system, you can follow these simple steps:
 
 1. Install the `avalanche-dev-env` environment and activate it.
-2. [Install Pytorch + TorchVision](https://pytorch.org/) \(follow the instructions on the website to use conda\).
+2. [Install Pytorch + TorchVision](https://pytorch.org) (follow the instructions on the website to use conda).
 3. Update the Conda Environment.
 
 These three steps can be accomplished with the following lines of code:
@@ -115,5 +84,6 @@ That's it. now we have _Avalanche_ up and running and we can start contribute to
 
 You can run _this chapter_ and play with it on Google Colaboratory:
 
-{% embed url="https://colab.research.google.com/drive/1pSTUgftqqg2sFNlvM6ourNYLpt2lnCQf?usp=sharing" caption="Run the \"How to Install\" Chapter on Google Colab" %}
-
+{% embed url="https://colab.research.google.com/drive/1pSTUgftqqg2sFNlvM6ourNYLpt2lnCQf?usp=sharing" %}
+Run the "How to Install" Chapter on Google Colab
+{% endembed %}
diff --git a/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md b/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md
index 1320ce213..09f4ad449 100644
--- a/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md
+++ b/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md
@@ -6,13 +6,17 @@ description: A Short Guide for Researchers on the Run
 
 _Avalanche_ is mostly about making the life of a continual learning researcher easier.
 
-> #### What are the **three pillars** of any respectful continual learning research project?
+![Avalanche modules.](<../.gitbook/assets/avalanche (1).png>)
+
+>
+>
+> **What are the three pillars of any respectful continual learning research project?**
 
 * **`Benchmarks`**: Machine learning researchers need multiple benchmarks with efficient data handling utils to design and prototype new algorithms. Quantitative results on ever-changing benchmarks has been one of the driving forces of _Deep Learning_.
 * **`Training`**: Efficient implementation and training of continual learning algorithms; comparisons with other baselines and state-of-the-art methods become fundamental to asses the quality of an original algorithmic proposal.
 * **`Evaluation`**: _Training_ utils and _Benchmarks_ are not enough alone to push continual learning research forward. Comprehensive and sound _evaluation protocols_ and _metrics_ need to be employed as well.
 
-> #### _With Avalanche, you can find all these three fundamental pieces together and much more, in a single and coherent, well-maintained codebase._
+> _**With Avalanche, you can find all these three fundamental pieces together and much more, in a single and coherent, well-maintained codebase.**_
 
 Let's take a quick tour on how you can use Avalanche for your research projects with a **5-minutes guide**, for _researchers on the run_!
 
@@ -27,7 +31,7 @@ _Avalanche_ is organized in **five main modules**:
 * **`Benchmarks`**: This module maintains a uniform API for data handling: mostly generating a stream of data from one or more datasets. It contains all the major CL benchmarks (similar to what has been done for [torchvision](https://pytorch.org/docs/stable/torchvision/index.html)).
 * **`Training`**: This module provides all the necessary utilities concerning model training. This includes simple and efficient ways of implement new _continual learning_ strategies as well as a set pre-implemented CL baselines and state-of-the-art algorithms you will be able to use for comparison!
 * **`Evaluation`**: This modules provides all the utilities and metrics that can help in evaluating a CL algorithm with respect to all the factors we believe to be important for a continually learning system.
-* **`Models`**: In this module you'll be able to find several model architectures and pre-trained models that can be used for your continual learning experiment (similar to what has been done in [torchvision.models](https://pytorch.org/docs/stable/torchvision/index.html)).&#x20;
+* **`Models`**: In this module you'll be able to find several model architectures and pre-trained models that can be used for your continual learning experiment (similar to what has been done in [torchvision.models](https://pytorch.org/docs/stable/torchvision/index.html)).
 * **`Logging`**: It includes advanced logging and plotting features, including native _stdout_, _file_ and [Tensorboard](https://www.tensorflow.org/tensorboard) support (How cool it is to have a complete, interactive dashboard, tracking your experiment metrics in real-time with a single line of code?)
 
 In the graphic below, you can see how _Avalanche_ sub-modules are available and organized as well:
@@ -253,7 +257,7 @@ Check out more details about what Avalanche can offer in this module following t
 
 The `evaluation` module is quite straightforward: it offers all the basic functionalities to evaluate and keep track of a continual learning experiment.
 
-This is mostly done through the **Metrics** and the **Loggers.** The **Metrics** provide a set of classes which implements the main continual learning metrics like A_ccuracy_, F_orgetting_, M_emory Usage_, R_unning Times_, etc.\
+This is mostly done through the **Metrics** and the **Loggers.** The **Metrics** provide a set of classes which implements the main continual learning metrics like A\_ccuracy\_, F\_orgetting\_, M\_emory Usage\_, R\_unning Times\_, etc.\
 Metrics should be created via the utility functions (e.g. `accuracy_metrics`, `timing_metrics`and others) specifying in the arguments when those metrics should be computed (after each minibatch, epoch, experience etc...).\
 The **Loggers** specify a way to report the metrics (e.g. with Tensorboard, on console or others). Loggers are created by instantiating the respective class.
 
diff --git a/docs/gitbook/getting-started/why-avalanche.md b/docs/gitbook/getting-started/why-avalanche.md
index 40d7e1833..769666da9 100644
--- a/docs/gitbook/getting-started/why-avalanche.md
+++ b/docs/gitbook/getting-started/why-avalanche.md
@@ -4,30 +4,31 @@ description: A Brief Introduction to Avalanche
 
 # Introduction
 
-**Avalanche** was born within [ContinualAI](https://www.continualai.org/) with a clear goal in mind:
+**Avalanche** was born within [ContinualAI](https://www.continualai.org) with a clear goal in mind:
 
-> ### _Pushing Continual Learning to the next level, providing a shared and collaborative library for fast prototyping, training and reproducible evaluation of continual learning algorithms._
+> #### _Pushing Continual Learning to the next level, providing a shared and collaborative library for fast prototyping, training and reproducible evaluation of continual learning algorithms._
 
 As a powerful _avalanche_, a _Continual Learning_ agent _incrementally_ _improves_ its knowledge and skills over time, building upon the previously acquired ones and learning how to interact with the external world.
 
 We hope _Avalanche_ may trigger the same _**positive reinforcement loop**_ within our community, moving towards a more _**collaborative**_ **and inclusive** way of doing research and helping us tackle bigger problems, faster and better, but together! 👪
 
-{% embed url="https://www.youtube.com/watch?v=EyO1eM0-Hi8" caption="A complete Introduction to Avalanche: an End-to-End Library for Continual Learning." %}
+{% embed url="https://www.youtube.com/watch?v=y924wXP86Mo" %}
+Avalanche 5 minutes introduction
+{% endembed %}
 
 ## 💪The Avalanche Advantage
 
 Avalanche has several advantages:
 
-* **Shared & Coherent Codebase**: Aren't you tired of re-inventing the wheel in continual learning? We are. Re-producing paper results has always been daunting in machine learning and it is even more so in continual learning. _Avalanche_ makes you stop re-write your \(and other people\) code all over again with a coherent and shared codebase that provides already all the utilities, benchmark, metrics and baselines you may need for your next great continual learning research project! 
-* **Errors Reduction**: The more code we write, the more bugs we introduce in our code. This is the rule, not the exception. _Avalanche_, let you focus on what really matters: defining your CL solution. _Benchmarks_ preparation to _training,_ _evaluation_ and _comparison_ with other methods will be already there for you. This in turn, massively reduce the amount of errors introduced and the time needed to debug your code. 
-* **Faster Prototyping**: As researchers or data scientists, we have dozens ideas every day and time is always too little to execute them.  However, if we think about it, most of the time spent in bringing our ideas to life is consumed in installing software, preparing and cleaning our data, preparing the experiments code infrastructure and so on. _Avalanche_ lets you focus just on the original algorithmic proposal, taking care of most of the rest! 
-* **Improved Reproducibility & Portability**: One of the great features of _Avalanche_, is the possibility of reproducing experimental results easily and on any OS. Researchers can simply plug-in their algorithm into the codebase and see how it goes with respect of other researchers' methods. Their algorithm in turn, is used as a baseline for other methods, creating a virtuous circle. This is only possible thanks to the simple, yet powerful idea of providing shared _benchmarks_, _training_ and _evaluation_ in a single place. 
-* **Improved Modularity**: _Avalanche_ has been designed with modularity in mind. As you will learn more about Avalanche, you will realize we have sometimes forego simplicity in favor of modularity and reusability \(we hate code replication as you do 🤪\). However, we believe this will help us scale in the near future as we collaboratively bring this codebase into maturity.  
-* **Increased Efficiency & Scalability**: Full-stack researchers & data scientists know this, making your algorithm memory and computationally efficient is tough. _Avalanche_ is already optimized for you, so that you can run your ImageNet continual learning experiment on your 8GB laptop \(buy a cooling fan 💨\) or even try it on embedded devices of your latest product!
+* **Shared & Coherent Codebase**: Aren't you tired of re-inventing the wheel in continual learning? We are. Re-producing paper results has always been daunting in machine learning and it is even more so in continual learning. _Avalanche_ makes you stop re-write your (and other people) code all over again with a coherent and shared codebase that provides already all the utilities, benchmark, metrics and baselines you may need for your next great continual learning research project!
+* **Errors Reduction**: The more code we write, the more bugs we introduce in our code. This is the rule, not the exception. _Avalanche_, let you focus on what really matters: defining your CL solution. _Benchmarks_ preparation to _training,_ _evaluation_ and _comparison_ with other methods will be already there for you. This in turn, massively reduce the amount of errors introduced and the time needed to debug your code.
+* **Faster Prototyping**: As researchers or data scientists, we have dozens ideas every day and time is always too little to execute them. However, if we think about it, most of the time spent in bringing our ideas to life is consumed in installing software, preparing and cleaning our data, preparing the experiments code infrastructure and so on. _Avalanche_ lets you focus just on the original algorithmic proposal, taking care of most of the rest!
+* **Improved Reproducibility & Portability**: One of the great features of _Avalanche_, is the possibility of reproducing experimental results easily and on any OS. Researchers can simply plug-in their algorithm into the codebase and see how it goes with respect of other researchers' methods. Their algorithm in turn, is used as a baseline for other methods, creating a virtuous circle. This is only possible thanks to the simple, yet powerful idea of providing shared _benchmarks_, _training_ and _evaluation_ in a single place.
+* **Improved Modularity**: _Avalanche_ has been designed with modularity in mind. As you will learn more about Avalanche, you will realize we have sometimes forego simplicity in favor of modularity and reusability (we hate code replication as you do 🤪). However, we believe this will help us scale in the near future as we collaboratively bring this codebase into maturity.
+* **Increased Efficiency & Scalability**: Full-stack researchers & data scientists know this, making your algorithm memory and computationally efficient is tough. _Avalanche_ is already optimized for you, so that you can run your ImageNet continual learning experiment on your 8GB laptop (buy a cooling fan 💨) or even try it on embedded devices of your latest product!
 
 But most of all, _Avalanche_, can help us standardize our field and work better together, more collaboratively, towards our shared goal of making machines learn over time like humans do.
 
 _Avalanche_ the first experiment of a **End-to-end Library** for reproducible _continual learning_ research where you can find _benchmarks_, _algorithms, evaluation utilities_ and much more in the same place.
 
 Let's make it together 👫 a wonderful ride! 🎈
-
diff --git a/docs/gitbook/how-to-contribute/guidelines.md b/docs/gitbook/how-to-contribute/guidelines.md
index f6484559a..734a603fe 100644
--- a/docs/gitbook/how-to-contribute/guidelines.md
+++ b/docs/gitbook/how-to-contribute/guidelines.md
@@ -8,9 +8,14 @@ If you are here it means you are considering contributing to _Avalanche_. It is
 
 In order to contribute the this awesome framework we recommend to go through the "_From Zero to Hero_" _Avalanche_ Tutorial:
 
+{% content-ref url="broken-reference" %}
+[Broken link](broken-reference)
+{% endcontent-ref %}
+
 In this tutorial you'll learn Avalanche _in-depth_ and learn how to extend and contribute back to the community! In particular, be sure to read the "_Contribute to Avalanche_" chapter:
 
-{% page-ref page="../from-zero-to-hero-tutorial/09\_contribute-to-avalanche.md" %}
+{% content-ref url="../from-zero-to-hero-tutorial/09_contribute-to-avalanche.md" %}
+[09\_contribute-to-avalanche.md](../from-zero-to-hero-tutorial/09\_contribute-to-avalanche.md)
+{% endcontent-ref %}
 
 At the moment, we don't have a lot of rules for contributing or a strict _code of conduct_, please enjoy this freedom with a grain of salt! 😁
-
diff --git a/docs/gitbook/how-tos/avalanchedataset/README.md b/docs/gitbook/how-tos/avalanchedataset/README.md
new file mode 100644
index 000000000..83b69bd1a
--- /dev/null
+++ b/docs/gitbook/how-tos/avalanchedataset/README.md
@@ -0,0 +1,15 @@
+---
+description: Dealing with AvalancheDatasets
+---
+
+# AvalancheDataset
+
+The `AvalancheDataset` is an implementation of the PyTorch `Dataset` class that comes with many useful out-of-the-box functionalities. For most users, the _AvalancheDataset_ can be used as a plain PyTorch Dataset that will return `x, y, t` elements. However, the AvalancheDataset is much more powerful than a simple PyTorch Dataset.
+
+**A serie of **_**Mini How-Tos**_ will guide you through the functionalities of the _AvalancheDataset_ and its subclasses:
+
+* [Preamble: PyTorch Datasets](https://avalanche.continualai.org/how-tos/avalanchedataset/preamble-pytorch-datasets)
+* [Creating AvalancheDatasets](https://avalanche.continualai.org/how-tos/avalanchedataset/creating-avalanchedatasets)
+* [Advanced Transformations](https://avalanche.continualai.org/how-tos/avalanchedataset/advanced-transformations)
+
+Brefore jumping to the actual _Mini How-To_s, **we recommend having a look at the basic notions of Dataset and DataLoader by reading the** [**Preamble page**](https://avalanche.continualai.org/how-tos/avalanchedataset/preamble-pytorch-datasets).
diff --git a/docs/gitbook/how-tos/avalanchedataset/advanced-transformations.md b/docs/gitbook/how-tos/avalanchedataset/advanced-transformations.md
new file mode 100644
index 000000000..0f029b712
--- /dev/null
+++ b/docs/gitbook/how-tos/avalanchedataset/advanced-transformations.md
@@ -0,0 +1,321 @@
+---
+description: Dealing with transformations (groups, appending, replacing, freezing).
+---
+
+# Advanced Transformations
+
+AvalancheDataset (and its subclasses like the Avalanche_Tensor/Subset/Concat_Dataset) allow for a finer control over transformations. While torchvision (and other) datasets allow for a minimal mechanism to apply transformations, with AvalancheDataset one can:
+
+1. Have multiple **transformation "groups"** in the same dataset (like separated train and test transformations).
+2. **Append, replace and remove transformations**, even by using nested Subset/Concat Datasets.
+3. **Freeze transformations**, so that they can't be changed.
+
+The following sub-sections show examples on how to use these features. Please note that all the constructor parameters and the methods described in this How-To can be used on AvalancheDataset subclasses as well. For more info on all the available subclasses, refer to [this Mini How-To](https://avalanche.continualai.org/how-tos/avalanchedataset/creating-avalanchedatasets).
+
+It is warmly recommended to **run this page as a notebook** using Colab (info at the bottom of this page).
+
+Let's start by installing Avalanche:
+
+```python
+!pip install git+https://github.com/ContinualAI/avalanche.git
+
+# Or, if you cloned Avalanche on your PC:
+# import sys
+# from pathlib import Path
+# sys.path.append(str(Path.cwd() / '../../..'))
+```
+
+## Transformation groups
+
+AvalancheDatasets can contain multiple **transformation groups**. This can be useful to keep train and test transformations in the same dataset and to have different set of transformations. This may come in handy in many situations (for instance, to apply ad-hoc transformations to replay data).
+
+As in torchvision datasets, AvalancheDataset supports the two kind of transformations: the `transform`, which is applied to X values, and the `target_transform`, which is applied to Y values. The latter is rarely used. This means that **a transformation group is a pair of transformations to be applied to the X and Y values** of each instance returned by the dataset. In both torchvision and Avalanche implementations, **a transformation must be a function (or other callable object)** that accepts one input (the X or Y value) and outputs its transformed version. This pair of functions is stored in the `transform` and `target_transform` fields of the dataset. A comprehensive guide on transformations can be found in the [torchvision documentation](https://pytorch.org/vision/stable/transforms.html).
+
+In the following example, a MNIST dataset is created and then wrapped in an AvalancheDataset. When creating the AvalancheDataset, we can set _train_ and _eval_ transformations by passing a _transform\_groups_ parameter. Train transformations usually include some form of random augmentation, while eval transformations usually include a sequence of deterministic transformations only. Here we define the sequence of train transformations as a random rotation followed by the ToTensor operation. The eval transformations only include the ToTensor operation.
+
+```python
+from torchvision import transforms
+from torchvision.datasets import MNIST
+from avalanche.benchmarks.utils import AvalancheDataset
+
+mnist_dataset = MNIST('mnist_data', download=True)
+
+# Define the training transformation for X values
+train_transformation = transforms.Compose([
+    transforms.RandomRotation(45),
+    transforms.ToTensor(),
+])
+# Define the training transformation for Y values (rarely used)
+train_target_transformation = None
+
+# Define the test transformation for X values
+eval_transformation = transforms.ToTensor()
+# Define the test transformation for Y values (rarely used)
+eval_target_transformation = None
+
+transform_groups = {
+    'train': (train_transformation, train_target_transformation),
+    'eval': (eval_transformation, eval_target_transformation)
+}
+
+avl_mnist_transform = AvalancheDataset(mnist_dataset, transform_groups=transform_groups)
+```
+
+Of course, one can also just use the `transform` and `target_transform` constructor parameters to set the transformations for both the _train_ and the _eval_ groups. However, it is recommended to use the approach based on _transform\_groups_ (shown in the code above) as it is much more flexible.
+
+```python
+# Not recommended: use transform_groups instead
+avl_mnist_same_transforms =  AvalancheDataset(mnist_dataset, transform=train_transformation)
+```
+
+### Using `.train()` and `.eval()`
+
+**The default behaviour of the AvalancheDataset is to use transformations from the **_**train**_** group.** However, one can easily obtain a version of the dataset where the _eval_ group is used. Note: when obtaining the dataset of experiences from the test stream, those datasets will already be using the _eval_ group of transformations so you don't need to switch to the eval group ;).
+
+As noted before, transformations for the current group are loaded in the `transform` and `target_transform` fields. These fields can be changed directly, but this is _NOT_ recommended, as this will not create a copy of the dataset and may probably affect other parts of the code in which the dataset is used.
+
+The recommended way to switch between the _train_ and _eval_ groups is to use the `.train()` and `.eval()` methods to obtain a copy (view) of the dataset with the proper transformations enabled. This is another very handy feature of the AvalancheDataset: **methods that manipulate the AvalancheDataset fields (and transformations) always create a view of the dataset. The original dataset is never changed.**
+
+In the following cell we use the _avl\_mnist\_transform_ dataset created in the cells above. We first obtain a view of it in which _eval_ transformations are enabled. Then, starting from this view, we obtain a version of it in which _train_ transformations are enabled. We want to double-stress that `.train()` and `.eval()` never change the group of the dataset on which they are called: they always create a view.
+
+One can check that the correct transformation group is in use by looking at the content of the _transform/target\_transform_ fields.
+
+```python
+# Obtain a view of the dataset in which eval transformations are enabled
+avl_mnist_eval = avl_mnist_transform.eval()
+
+# Obtain a view of the dataset in which we get back to train transforms
+# Basically, avl_mnist_transform ~= avl_mnist_train
+avl_mnist_train = avl_mnist_eval.train()
+
+# Check the current transformations function for the 3 datasets
+print('Original dataset transformation:', avl_mnist_transform.transform)
+# Output:
+# Original dataset transformation: Compose(
+#     RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)
+#     ToTensor()
+# )
+print('--------------------------------')
+print('Eval version of the dataset:', avl_mnist_eval.transform)
+# Output: "Eval version of the dataset: ToTensor()"
+print('--------------------------------')
+print('Back to train transformations:', avl_mnist_train.transform)
+# Output:
+# Back to train transformations: Compose(
+#     RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)
+#     ToTensor()
+# )
+```
+
+### Custom transformation groups
+
+In _AvalancheDataset_s the _**train**_** and **_**eval**_** transformation groups are always available**. However, _AvalancheDataset_ also supports **custom transformation groups**.
+
+The following example shows how to create an AvalancheDataset with an additional group named _replay_. We define the replay transformation as a random crop followed by the ToTensor operation.
+
+```python
+replay_transform = transforms.Compose([
+    transforms.RandomCrop(28, padding=4),
+    transforms.ToTensor()
+])
+
+replay_target_transform = None
+
+transform_groups_with_replay = {
+    'train': (None, None),
+    'eval': (None, None),
+    'replay': (replay_transform, replay_target_transform)
+}
+
+AvalancheDataset(mnist_dataset, transform_groups=transform_groups_with_replay)
+```
+
+However, once created the dataset will use the _train_ group. There are two ways to **switch to our custom group**:
+
+* Set the group when creating the dataset using the `initial_transform_group` constructor parameter
+* Switch to the group using the `.with_transforms(group_name)` method
+
+The `.with_transforms(group_name)` method behaves in the same way `.train()` and `.eval()` do by creating a view of the original dataset.
+
+The following example shows how to use both methods:
+
+```python
+# Method 1: create the dataset with "replay" as the default group
+avl_mnist_custom_transform_1 = AvalancheDataset(
+    mnist_dataset,
+    transform_groups=transform_groups_with_replay,
+    initial_transform_group='replay')
+
+print(avl_mnist_custom_transform_1.transform)
+
+# Method 2: switch to "replay" using `.with_transforms(group_name)`
+avl_mnist_custom_transform_not_enabled = AvalancheDataset(
+    mnist_dataset,
+    transform_groups=transform_groups_with_replay)
+
+avl_mnist_custom_transform_2 = avl_mnist_custom_transform_not_enabled.with_transforms('replay')
+print(avl_mnist_custom_transform_2.transform)
+
+# Both prints output:
+# Compose(
+#     RandomCrop(size=(28, 28), padding=4)
+#     ToTensor()
+# )
+```
+
+## Appending transformations
+
+In the standard torchvision datasets the only way to append (that is, add a new transformation step to the list of existing one) is to change the _transform_ field directly by doing something like this:
+
+```python
+# Append a transform by using torchvision datasets (>>> DON'T DO THIS! <<<)
+
+# Create the dataset
+mnist_dataset_w_totensor = MNIST('mnist_data', download=True, transform=transforms.ToTensor())
+
+# Append a transform
+to_append_transform = transforms.RandomCrop(size=(28, 28), padding=4)
+mnist_dataset_w_totensor.transform = transforms.Compose(
+    [mnist_dataset_w_totensor.transform, to_append_transform]
+)
+print(mnist_dataset_w_totensor.transform)
+# Prints:
+# Compose(
+#     ToTensor()
+#     RandomCrop(size=(28, 28), padding=4)
+# )
+```
+
+This solution has many huge drawbacks:
+
+* The transformation field of the dataset is changed directly. This will affect other parts of the code that use that dataset instance.
+* If the initial transform is `None`, then `Compose` will not complain, but the process will crash later (try it by yourself: replace the first element of Compose in cell above with `None`, then try obtaining a data point from the dataset).
+* If you need to change transformations only temporarly to do some specific things in a limited part of the code, then you need to store the previous set of transformations in some variable in order to switch back to them later.
+
+AvalancheDataset offers a very simple method to append transformations without incurring in those issues. The `.add_transforms(transform=None, target_transform=None)` method will append the given transform(s) **to the currently enabled transform group** and will return a new (a view actually) dataset with given transformations appended to the existing ones. The original dataset is not affected. One can also use `.add_transforms_to_group(group_name, transform, target_transform)` to change transformations for a different group.
+
+The next cell shows how to use `.add_transforms(...)` to append the _to\_append\_transform_ transform defined in the cell above.
+
+```python
+# Create the dataset
+avl_mnist = AvalancheDataset(MNIST('mnist_data', download=True), transform=transforms.ToTensor())
+
+# Append a transformation. Simple as:
+avl_mnist_appended_transform = avl_mnist.add_transforms(to_append_transform)
+
+print('With appended transforms:', avl_mnist_appended_transform.transform)
+# Prints:
+# With appended transforms: Compose(
+#     ToTensor()
+#     RandomCrop(size=(28, 28), padding=4)
+# )
+
+# Check that the original dataset was not affected:
+print('Original dataset:', avl_mnist.transform)
+# Prints: "Original dataset: ToTensor()"
+```
+
+Note that by using `.add_transforms(...)`:
+
+* The original dataset is not changed, which means that other parts of the code that use that dataset instance are not affected.
+* You don't need to worry about _None_ transformations.
+* In order to revert to the original transformations you don't need to keep a copy of them: the original dataset is not affected!
+
+## Replacing transformations
+
+The replacement operation follows the same idea (and benefits) of the append one. By using `.replace_transforms(transform, target_transform)` one can obtain a view of the original dataset in which the **transformaations for the current group** are replaced with the given ones. One may also change tranformations for other groups by passing the name of the group as the optional parameter `group`. As with any transform-related operation, the original dataset is not affected.
+
+Note: one can use `.replace_transforms(...)` to remove previous transformations (by passing `None` as the new transform).
+
+The following cell shows how to use `.replace_transforms(...)` to replace the transformations of the current group:
+
+```python
+new_transform = transforms.RandomCrop(size=(28, 28), padding=4)
+
+# Append a transformation. Simple as:
+avl_mnist_replaced_transform = avl_mnist.replace_transforms(new_transform, None)
+
+print('With replaced transform:', avl_mnist_replaced_transform.transform)
+# Prints: "With replaces transforms: RandomCrop(size=(28, 28), padding=4)"
+
+# Check that the original dataset was not affected:
+print('Original dataset:', avl_mnist.transform)
+# Prints: "Original dataset: ToTensor()"
+```
+
+## Freezing transformations
+
+One last functionality regarding transformations is the ability to "freeze" transformations. Freezing transformations menas **permanently glueing transformations to the dataset so that they can't be replaced or changed in any way** (usually by mistake). Frozen transformations cannot be changed by using `.replace_transforms(...)` or even by changing the `transform` field directly.
+
+One may wonder when this may come in handy... in fact, you will probably rarely need to freeze transformations. However, imagine having to instantiate the PermutedMNIST benchmark. You want the permutation transformation to not be changed by mistake. However, the end users do not know how the internal implementations of the benchmark works, so they may end up messing with those transformations. By freezing the permutation transformation, users cannot mess with it.
+
+Transformations for all transform groups can be frozen at once by using `.freeze_transforms()`. Transformations can be frozen for a single group by using `.freeze_group_transforms(group_name)`. As always, those methods return a view of the original dataset.
+
+The cell below shows a simplified excerpt from the [PermutedMNIST benchmark implementation](../../../../avalanche/benchmarks/classic/cmnist.py). First, a _PixelsPermutation_ instance is created. That instance is a transformation that will permute the pixels of the input image. We then create the train end test sets. Once created, transformations for those datasets are frozen using `.freeze_transforms()`.
+
+```python
+from avalanche.benchmarks.classic.cmnist import PixelsPermutation
+import numpy as np
+import torch
+
+# Instantiate MNIST train and test sets
+mnist_train = MNIST('mnist_data', train=True, download=True)
+mnist_test = MNIST('mnist_data', train=False, download=True)
+    
+# Define the transformation used to permute the pixels
+rng_seed = 4321
+rng_permute = np.random.RandomState(rng_seed)
+idx_permute = torch.from_numpy(rng_permute.permutation(784)).type(torch.int64)
+permutation_transform = PixelsPermutation(idx_permute)
+
+# Define the transforms group
+perm_group_transforms = dict(
+    train=(permutation_transform, None),
+    eval=(permutation_transform, None)
+)
+
+# Create the datasets and freeze transforms
+# Note: one can call "freeze_transforms" on constructor result
+# or you can do this in 2 steps. The result is the same (obviously).
+# The next part show both ways:
+
+# Train set
+permuted_train_set = AvalancheDataset(
+    mnist_train, 
+    transform_groups=perm_group_transforms).freeze_transforms()
+
+# Test set
+permuted_test_set = AvalancheDataset(
+    mnist_test, transform_groups=perm_group_transforms, 
+    initial_transform_group='eval')
+permuted_test_set = permuted_test_set.freeze_transforms()
+```
+
+In this way, that transform can't be removed. However, remember that one can always append other transforms atop of frozen transforms.
+
+The cell below shows that `replace_transforms` can't remove frozen transformations:
+
+```python
+# First, show that the image pixels are permuted
+print('Before replace_transforms:')
+display(permuted_train_set[0][0].resize((192, 192), 0))
+
+# Try to remove the permutation
+with_removed_transforms = permuted_train_set.replace_transforms(None, None)
+
+print('After replace_transforms:')
+display(permuted_train_set[0][0].resize((192, 192), 0))
+display(with_removed_transforms[0][0].resize((192, 192), 0))
+```
+
+## Transformations wrap-up
+
+This completes the _Mini How-To_ for the functionalities of the _AvalancheDataset_ related to **transformations**.
+
+Here you learned how to use **transformation groups** and how to **append/replace/freeze transformations** in a simple way.
+
+Other _Mini How-To_s will guide you through the other functionalities offered by the _AvalancheDataset_ class. The list of _Mini How-To_s can be found [here](https://avalanche.continualai.org/how-tos/avalanchedataset).
+
+## 🤝 Run it on Google Colab
+
+You can run _this chapter_ and play with it on Google Colaboratory by clicking here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/how-tos/avalanchedataset/advanced-transformations.ipynb)
diff --git a/docs/gitbook/how-tos/avalanchedataset/creating-avalanchedatasets.md b/docs/gitbook/how-tos/avalanchedataset/creating-avalanchedatasets.md
new file mode 100644
index 000000000..6f9c95a88
--- /dev/null
+++ b/docs/gitbook/how-tos/avalanchedataset/creating-avalanchedatasets.md
@@ -0,0 +1,228 @@
+---
+description: Creation and manipulation of AvalancheDatasets and its subclasses.
+---
+
+# Creating AvalancheDatasets
+
+The _AvalancheDataset_ is an implementation of the PyTorch Dataset class which comes with many out-of-the-box functionalities. The _AvalancheDataset_ (an its few subclass) are extensively used through the whole Avalanche library as the reference way to manipulate datasets:
+
+* The dataset carried by the `experience.dataset` field is always an _AvalancheDataset_.
+* Benchmark creation functions accept _AvalancheDataset_s to create benchmarks where a finer control over task labels is required.
+* Internally, benchmarks are created by manipulating _AvalancheDataset_s.
+
+This first _Mini How-To_ will guide through the main ways you can use to **instantiate an **_**AvalancheDataset**_ while the **other Mini How-Tos (**[**complete list here**](https://avalanche.continualai.org/how-tos/avalanchedataset)**) will show how to use its functionalities**.
+
+It is warmly recommended to **run this page as a notebook** using Colab (info at the bottom of this page).
+
+Let's start by installing avalanche:
+
+```python
+!pip install git+https://github.com/ContinualAI/avalanche.git
+
+# Or, if you cloned Avalanche on your PC:
+# import sys
+# from pathlib import Path
+# sys.path.append(str(Path.cwd() / '../../..'))
+```
+
+## AvalancheDataset vs PyTorch Dataset
+
+This mini How-To will guide you through the main ways used to instantiate an _AvalancheDataset_.
+
+First thing: the base class `AvalancheDataset` is a **wrapper for existing datasets**. Only two things must be considered when wrapping an existing dataset:
+
+* Apart from the x and y values, the resulting AvalancheDataset will also return a third value: the task label (which defaults to 0).
+* The wrapped dataset must contain a valid **targets** field.
+
+The **targets field** is available is nearly all _torchvision_ datasets. It must be a list containing the label for each data point (usually the y value). In this way, Avalanche can use that field when instantiating benchmarks like the "Class/Task-Incremental\* and _Domain-Incremental_ ones.
+
+Avalanche exposes 4 classes of _AvalancheDataset_s which map exactly the 4 _Dataset_ classes offered by PyTorch:
+
+* `AvalancheDataset`: the base class, which acts a wrapper to existing _Dataset_ instances.
+* `AvalancheTensorDataset`: equivalent to PyTorch `TesnsorDataset`.
+* `AvalancheSubset`: equivalent to PyTorch `Subset`.
+* `AvalancheConcatDataset`: equivalent to PyTorch `ConcatDataset`.
+
+## 🛠️ Create an AvalancheDataset
+
+Given a dataset (like MNIST), an _AvalancheDataset_ can be instantiated as follows:
+
+```python
+from avalanche.benchmarks.utils import AvalancheDataset
+from torchvision.datasets import MNIST
+
+# Instantiate the MNIST train dataset from torchvision
+mnist_dataset = MNIST('mnist_data', download=True)
+
+# Create the AvalancheDataset
+mnist_avalanche_dataset = AvalancheDataset(mnist_dataset)
+```
+
+Just like any other Dataset, a data point can be obtained using the `x, y = dataset[idx]` syntax. **When obtaining a data point from an AvalancheDataset, an additional third value (the task label) will be returned**:
+
+```python
+# Obtain the first instance from the original dataset
+x, y = mnist_dataset[0]
+print(f'x={x}, y={y}')
+# Output: "x=<PIL.Image.Image image mode=L size=28x28 at 0x7FBEDFDB2430>, y=5"
+
+# Obtain the first instance from the AvalancheDataset
+x, y, t = mnist_avalanche_dataset[0]
+print(f'x={x}, y={y}, t={t}')
+# Output: "x=<PIL.Image.Image image mode=L size=28x28 at 0x7FBEEFD3A850>, y=5, t=0"
+```
+
+**Useful tip:** if you are not sure if you are dealing with a PyTorch _Dataset_ or an _AvalancheDataset_, or if you want to ignore task labels, you can use this syntax:
+
+```python
+# You can use "x, y, *_" to manage both kinds of Datasets
+x, y, *_ = mnist_dataset[0]  # OK
+x, y, *_ = mnist_avalanche_dataset[0]  # OK
+```
+
+## The AvalancheTensorDataset
+
+The PyTorch _TensorDataset_ is one of the most useful Dataset classes as it can be used to quickly prototype the data loading part of your code.
+
+A _TensorDataset_ can be wrapped in an AvalancheDataset just like any Dataset, but this is not much convenient, as shown below:
+
+```python
+import torch
+from torch.utils.data import TensorDataset
+
+
+# Create 10 instances described by 7 features 
+x_data = torch.rand(10, 7)
+
+# Create the class labels for the 10 instances
+y_data = torch.randint(0, 5, (10,))
+
+# Create the tensor dataset
+tensor_dataset = TensorDataset(x_data, y_data)
+
+# Wrap it in an AvalancheDataset
+wrapped_tensor_dataset = AvalancheDataset(tensor_dataset)
+
+# Obtain the first instance from the dataset
+x, y, t = wrapped_tensor_dataset[0]
+print(f'x={x}, y={y}, t={t}')
+# Output: "x=tensor([0.6329, 0.8495, 0.1853, 0.7254, 0.7893, 0.8079, 0.1106]), y=4, t=0"
+```
+
+**Instead, it is recommended to use the AvalancheTensorDataset** class to get the same result. In this way, you can just skip one intermediate step.
+
+```python
+from avalanche.benchmarks.utils import AvalancheTensorDataset
+
+# Create the tensor dataset
+avl_tensor_dataset = AvalancheTensorDataset(x_data, y_data)
+
+# Obtain the first instance from the AvalancheTensorDataset
+x, y, t = avl_tensor_dataset[0]
+print(f'x={x}, y={y}, t={t}')
+# Output: "x=tensor([0.6329, 0.8495, 0.1853, 0.7254, 0.7893, 0.8079, 0.1106]), y=4, t=0"
+```
+
+In both cases, **AvalancheDataset will automatically populate its **_**targets**_** field by using the values from the second Tensor** (which usually contains the Y values). This behaviour can be customized by passing a custom `targets` constructor parameter (by either passing a list of targets or the index of the Tensor to use).
+
+The cell below shows the content of the target field of the dataset created in the cell above. Notice that the _targets_ field has been filled with the content of the second Tensor (_y\_data_).
+
+```python
+# Check the targets field
+print('y_data=', y_data)
+ # Output: "y_data= tensor([4, 3, 3, 2, 0, 1, 3, 3, 3, 2])"
+
+print('targets field=', avl_tensor_dataset.targets)
+# Output: "targets field= [tensor(4), tensor(3), tensor(3), tensor(2), 
+#          tensor(0), tensor(1), tensor(3), tensor(3), tensor(3), tensor(2)]"
+```
+
+## The AvalancheSubset and AvalancheConcatDataset classes
+
+Avalanche offers the `AvalancheSubset` and `AvalancheConcatDataset` implementations that extend the functionalities of PyTorch _Subset_ and _ConcatDataset_.
+
+Regarding the subsetting operation, `AvalancheSubset` behaves in the same way the PyTorch `Subset` class does: both implementations accept a dataset and a list of indices as parameters. The resulting Subset is not a copy of the dataset, it's just a view. This is similar to creating a view of a NumPy array by passing a list of indexes using the `numpy_array[list_of_indices]` syntax. This can be used to both _create a smaller dataset_ and to _change the order of data points_ in the dataset.
+
+Here we create a toy dataset in which each X and Y values are _int_s. We then obtain a subset of it by creating an **AvalancheSubset**:
+
+```python
+from avalanche.benchmarks.utils import AvalancheSubset
+
+# Define the X values of 10 instances (each instance is an int)
+x_data_toy = [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
+
+# Define the class labels for the 10 instances
+y_data_toy = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+
+# Create  the tensor dataset
+# Note: AvalancheSubset can also be applied to PyTorch TensorDataset directly!
+# However, note that PyTorch TensorDataset doesn't support Python lists...
+# ... (it only supports Tensors) while AvalancheTensorDataset does.
+toy_dataset = AvalancheTensorDataset(x_data_toy, y_data_toy) 
+
+# Define the indices for the subset
+# Here we want to obtain a subset containing only the data points...
+# ... at indices 0, 5, 8, 2 (in this specific order)
+subset_indices = [0, 5, 8, 2]
+
+# Create the subset
+avl_subset = AvalancheSubset(toy_dataset, indices=subset_indices)
+print('The subset contains', len(avl_subset), 'instances.')
+# Output: "The subset contains 4 instances."
+
+# Obtain instances from the AvalancheSubset
+for x, y, t in avl_subset:
+    print(f'x={x}, y={y}, t={t}')
+# Output:
+# x=50, y=10, t=0
+# x=55, y=15, t=0
+# x=58, y=18, t=0
+# x=52, y=12, t=0
+```
+
+Concatenation is even simpler. Just like with PyTorch _ConcatDataset_, one can easily concatentate datasets with **AvalancheConcatDataset**.
+
+Both _AvalancheConcatDataset_ and PyTorch _ConcatDataset_ accept a list of datasets to concatenate.
+
+```python
+from avalanche.benchmarks.utils import AvalancheConcatDataset
+
+# Define the 2 datasets to be concatenated
+x_data_toy_1 = [50, 51, 52, 53, 54]
+y_data_toy_1 = [10, 11, 12, 13, 14]
+x_data_toy_2 = [60, 61, 62, 63, 64]
+y_data_toy_2 = [20, 21, 22, 23, 24]
+
+# Create the datasets
+toy_dataset_1 = AvalancheTensorDataset(x_data_toy_1, y_data_toy_1) 
+toy_dataset_2 = AvalancheTensorDataset(x_data_toy_2, y_data_toy_2) 
+
+# Create the concat dataset
+avl_concat = AvalancheConcatDataset([toy_dataset_1, toy_dataset_2])
+print('The concat dataset contains', len(avl_concat), 'instances.')
+# Output: "The concat dataset contains 10 instances."
+
+# Obtain instances from the AvalancheConcatDataset
+for x, y, t in avl_concat:
+    print(f'x={x}, y={y}, t={t}')
+# Output:
+# x=51, y=11, t=0
+# x=52, y=12, t=0
+# x=53, y=13, t=0
+# x=54, y=14, t=0
+# x=60, y=20, t=0
+# x=61, y=21, t=0
+# x=62, y=22, t=0
+# x=63, y=23, t=0
+# x=64, y=24, t=0
+```
+
+## Dataset Creation wrap-up
+
+This _Mini How-To_ showed you how to **create instances of AvalancheDataset (and its subclasses)**.
+
+Other _Mini How-To_s will guide you through the functionalities offered by AvalancheDataset. The list of _Mini How-To_s can be found [here](https://avalanche.continualai.org/how-tos/avalanchedataset).
+
+## 🤝 Run it on Google Colab
+
+You can run _this chapter_ and play with it on Google Colaboratory by clicking here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/how-tos/avalanchedataset/creating-avalanchedatasets.ipynb)
diff --git a/docs/gitbook/how-tos/avalanchedataset/preamble-pytorch-datasets.md b/docs/gitbook/how-tos/avalanchedataset/preamble-pytorch-datasets.md
new file mode 100644
index 000000000..9030f3854
--- /dev/null
+++ b/docs/gitbook/how-tos/avalanchedataset/preamble-pytorch-datasets.md
@@ -0,0 +1,86 @@
+---
+description: Few words about PyTorch Datasets
+---
+
+# Preamble: PyTorch Datasets
+
+This short preamble will briefly go through the basic notions of Dataset offered natively by PyTorch. A solid grasp of these notions are needed to understand:
+
+1. How PyTorch data loading works in general
+2. How AvalancheDatasets differs from PyTorch Datasets
+
+## 📚 Dataset: general definition
+
+In PyTorch, **a `Dataset` is a class** exposing two methods:
+
+* `__len__()`, which returns the amount of instances in the dataset (as an `int`).
+* `__getitem__(idx)`, which returns the data point at index `idx`.
+
+In other words, a Dataset instance is just an object for which, similarly to a list, one can simply:
+
+* Obtain its length using the Python `len(dataset)` function.
+* Obtain a single data point using the `x, y = dataset[idx]` syntax.
+
+The content of the dataset can be either loaded in memory when the dataset is instantiated (like the torchvision MNIST dataset does) or, for big datasets like ImageNet, the content is kept on disk, with the dataset keeping the list of files in an internal field. In this case, data is loaded from the storage on-the-fly when `__getitem__(idx)` is called. The way those things are managed is specific to each dataset implementation.
+
+## PyTorch Datasets
+
+The PyTorch library offers 4 Dataset implementations:
+
+* `Dataset`: an interface defining the `__len__` and `__getitem__` methods.
+* `TensorDataset`: instantiated by passing X and Y tensors. Each row of the X and Y tensors is interpreted as a data point. The `__getitem__(idx)` method will simply return the `idx`-th row of X and Y tensors.
+* `ConcatDataset`: instantiated by passing a list of datasets. The resulting dataset is a concatenation of those datasets.
+* `Subset`: instantiated by passing a dataset and a list of indices. The resulting dataset will only contain the data points described by that list of indices.
+
+As explained in the mini _How-To_s, Avalanche offers a customized version for all these 4 datasets.
+
+## Transformations
+
+Most datasets from the _torchvision_ libraries (as well as datasets found "in the wild") allow for a `transformation` function to be passed to the dataset constructor. The support for transformations is not mandatory for a dataset, but it is quite common to support them. The transformation is used to process the X value of a data point before returning it. This is used to normalize values, apply augmentations, etcetera.
+
+As explained in the mini _How-To_s, the `AvalancheDataset` class implements a very rich and powerful set of functionalities for managing transformations.
+
+## Quick note on the IterableDataset class
+
+A variation of the standard `Dataset` exist in PyTorch: the [IterableDataset](https://pytorch.org/docs/stable/data.html#iterable-style-datasets). When using an `IterableDataset`, one can load the data points in a sequential way only (by using a tape-alike approach). The `dataset[idx]` syntax and `len(dataset)` function are not allowed. **Avalanche does NOT support `IterableDataset`s.** You shouldn't worry about this because, realistically, you will never encounter such datasets.
+
+## DataLoader
+
+The `Dataset` is a very simple object that only returns one data point given its index. In order to create minibatches and speed-up the data loading process, a `DataLoader` is required.
+
+The PyTorch `DataLoader` class is a very efficient mechanism that, given a `Dataset`, will return **minibatches** by optonally **shuffling** data brefore each epoch and by **loading data in parallel** by using multiple workers.
+
+## Preamble wrap-up
+
+To wrap-up, let's see how the native, _non-Avalanche_, PyTorch components work in practice. In the following code we create a `TensorDataset` and then we load it in minibatches using a `DataLoader`.
+
+```python
+import torch
+from torch.utils.data.dataset import TensorDataset
+from torch.utils.data.dataloader import DataLoader
+
+# Create a dataset of 100 data points described by 22 features + 1 class label
+x_data = torch.rand(100, 22)
+y_data = torch.randint(0, 5, (100,))
+
+# Create the Dataset
+my_dataset = TensorDataset(x_data, y_data)
+
+# Create the DataLoader
+my_dataloader = DataLoader(my_dataset, batch_size=10, shuffle=True, num_workers=4)
+
+# Run one epoch
+for x_minibatch, y_minibatch in my_dataloader:
+    print('Loaded minibatch of', len(x_minibatch), 'instances')
+# Output: "Loaded minibatch of 10 instances" x10 times
+```
+
+## Next steps
+
+With these notions in mind, you can start start your journey on understanding the functionalities offered by the AvalancheDatasets by going through the _Mini How-To_s.
+
+Please refer to the [list of the _Mini How-To_s regarding AvalancheDatasets](https://avalanche.continualai.org/how-tos/avalanchedataset) for a complete list. It is recommended to start with the **"Creating AvalancheDatasets"** _Mini How-To_.
+
+## 🤝 Run it on Google Colab
+
+You can run _this chapter_ and play with it on Google Colaboratory by clicking here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/how-tos/avalanchedataset/preamble-pytorch-datasets.ipynb)
diff --git a/docs/gitbook/how-tos/dataloading_buffers_replay.md b/docs/gitbook/how-tos/dataloading_buffers_replay.md
index 4675437fa..8d31204b8 100644
--- a/docs/gitbook/how-tos/dataloading_buffers_replay.md
+++ b/docs/gitbook/how-tos/dataloading_buffers_replay.md
@@ -1,3 +1,6 @@
+---
+description: How to implement replay and data loading
+---
 # Dataloading, Memory Buffers, and Replay
 
 Avalanche provides several components that help you to balance data loading and implement rehearsal strategies.
@@ -8,6 +11,13 @@ Avalanche provides several components that help you to balance data loading and
 
 Finally, **Replay** strategies implement rehearsal by using Avalanche's plugin system. Most rehearsal strategies use a custom dataloader to balance the buffer with the current experience and a buffer that is updated for each experience.
 
+First, let's install Avalanche. You can skip this step if you have installed it already.
+
+
+```python
+!pip install git+https://github.com/ContinualAI/avalanche.git
+```
+
 ## Dataloaders
 Avalanche dataloaders are simple iterators, located under `avalanche.benchmarks.utils.data_loader`. Their interface is equivalent to pytorch's dataloaders. For example, `GroupBalancedDataLoader` takes a sequence of datasets and iterates over them by providing balanced mini-batches, where the number of samples is split equally among groups. Internally, it instantiate a `DataLoader` for each separate group. More specialized dataloaders exist such as `TaskBalancedDataLoader`.
 
diff --git a/docs/gitbook/questions-and-issues/add-your-issue.md b/docs/gitbook/questions-and-issues/add-your-issue.md
index f6ac1cd35..aea080e38 100644
--- a/docs/gitbook/questions-and-issues/add-your-issue.md
+++ b/docs/gitbook/questions-and-issues/add-your-issue.md
@@ -4,13 +4,12 @@ description: Help us Find Bug in Avalanche
 
 # Add Your Issue
 
-If you encounter a problem in _Avalanche_, please do not give up on us and help us fix it as soon as possible**. This first of all means reporting it**. We are grateful to all the people who took the time _to report an issue_ or even fix it with a _Pull Request_.  
+If you encounter a problem in _Avalanche_, please do not give up on us and help us fix it as soon as possible**. This first of all means reporting it**. We are grateful to all the people who took the time _to report an issue_ or even fix it with a _Pull Request_. &#x20;
 
-![Open Issues for the Avalanche Project](../.gitbook/assets/issues.png)
+![Open Issues for the Avalanche Project](../../../.gitbook/assets/issues.png)
 
 Check current _Avalanche_ issue or **submit a new one** here:
 
 {% embed url="https://github.com/ContinualAI/avalanche" %}
 
 Please try to use the _appropriate tags_ and explain your issue with a simple _code snippet_ to reproduce it.
-
diff --git a/docs/gitbook/questions-and-issues/ask-your-question.md b/docs/gitbook/questions-and-issues/ask-your-question.md
index 0db6be777..db07e5f2e 100644
--- a/docs/gitbook/questions-and-issues/ask-your-question.md
+++ b/docs/gitbook/questions-and-issues/ask-your-question.md
@@ -1,5 +1,5 @@
 ---
-description: 'To get Answers of Life, Ask Questions'
+description: To get Answers of Life, Ask Questions
 ---
 
 # Ask Your Question
@@ -10,13 +10,13 @@ We know that learning a new tool may be tough at times. This is why we are here
 Don't be afraid to ask questions, there are no stupid questions and we will always answer to you.
 {% endhint %}
 
-_However, in order to help you, we need you to help us first._   
-  
-First of all, if the question is more of a _code issue_ **please use** the ****[**GitHub Issues**](https://github.com/ContinualAI/avalanche/issues) ****page.  
+_However, in order to help you, we need you to help us first._ \
+\
+First of all, if the question is more of a _code issue_ **please use** the **** [**GitHub Issues**](https://github.com/ContinualAI/avalanche/issues) **** page.\
 For _general questions, ideas,_ [_feature-requests_](request-a-feature.md) _and discussions_ use [**GitHub Discussions**](https://github.com/ContinualAI/avalanche/discussions).
 
-If instead, this is a _quick question about Avalanche or a request for support_, in this case you can ask us directly [on slack](https://join.slack.com/t/continualai/shared_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU) \(**\#avalanche** channel\).  
-  
+If instead, this is a _quick question about Avalanche or a request for support_, in this case you can ask us directly [on slack](https://join.slack.com/t/continualai/shared\_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU) (**#avalanche** channel).\
+\
 In any case, please make sure to follow the steps below:
 
 1. _Clarify your information needs._
@@ -26,5 +26,3 @@ In any case, please make sure to follow the steps below:
 
 Then we will try to answer as swiftly as possible! 🤗
 
-
-
diff --git a/docs/gitbook/questions-and-issues/give-feedback.md b/docs/gitbook/questions-and-issues/give-feedback.md
index 24c0ccf93..40ca420ea 100644
--- a/docs/gitbook/questions-and-issues/give-feedback.md
+++ b/docs/gitbook/questions-and-issues/give-feedback.md
@@ -4,15 +4,17 @@ description: We are all ears!
 
 # Give Feedback
 
-_Avalanche_ is a tool **from** the continual learning research community and **for** the continual learning research community. We try to keep the design of _Avalanche_ as _open_, _collaborative_ and _inclusive_ as possible.  
-  
-This is why **we are always keen to hear your feedback** about _Avalanche!_ Join directly [**on slack**](https://join.slack.com/t/continualai/shared_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU) \(**\#avalanche** channel\) for a quick feedback or write a post on [**GitHub Discussions**](https://github.com/ContinualAI/avalanche/discussions)!
+_Avalanche_ is a tool **from** the continual learning research community and **for** the continual learning research community. We try to keep the design of _Avalanche_ as _open_, _collaborative_ and _inclusive_ as possible.\
+\
+This is why **we are always keen to hear your feedback** about _Avalanche!_ Join directly [**on slack**](https://join.slack.com/t/continualai/shared\_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU) (**#avalanche** channel) for a quick feedback or write a post on [**GitHub Discussions**](https://github.com/ContinualAI/avalanche/discussions)!
 
-![General Feedback Section of the Avalanche GitHub &quot;Discussions&quot; Tab.](../.gitbook/assets/feedback.png)
-
-{% embed url="https://join.slack.com/t/continualai/shared\_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU" caption="Click Above to Join ContinualAI Slack" %}
-
-{% embed url="https://github.com/ContinualAI/avalanche/discussions" caption="Click Above to Join the Discussion!" %}
+![General Feedback Section of the Avalanche GitHub "Discussions" Tab.](../../../.gitbook/assets/feedback.png)
 
+{% embed url="https://join.slack.com/t/continualai/shared_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU" %}
+Click Above to Join ContinualAI Slack
+{% endembed %}
 
+{% embed url="https://github.com/ContinualAI/avalanche/discussions" %}
+Click Above to Join the Discussion!
+{% endembed %}
 
diff --git a/docs/gitbook/questions-and-issues/request-a-feature.md b/docs/gitbook/questions-and-issues/request-a-feature.md
index 36a84777c..747102403 100644
--- a/docs/gitbook/questions-and-issues/request-a-feature.md
+++ b/docs/gitbook/questions-and-issues/request-a-feature.md
@@ -8,11 +8,10 @@ description: Help us Design Avalanche of the Future
 Do you think an **important feature is missing** in Avalanche? You are in the right place!
 {% endhint %}
 
-We try to keep the design of _Avalanche_ as _open_, _collaborative_ and _inclusive_ as possible. This means discussing _Avalanche_ issues, development and future ideas openly through general [ContinualAI projects meetups](https://www.continualai.org/news/#meetup), its [slack channel](https://join.slack.com/t/continualai/shared_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU), [Github](https://github.com/vlomonaco) and [forum](https://continualai.discourse.group).
+We try to keep the design of _Avalanche_ as _open_, _collaborative_ and _inclusive_ as possible. This means discussing _Avalanche_ issues, development and future ideas openly through general [ContinualAI projects meetups](https://www.continualai.org/news/#meetup), its [slack channel](https://join.slack.com/t/continualai/shared\_invite/enQtNjQxNDYwMzkxNzk0LTBhYjg2MjM0YTM2OWRkNDYzOGE0ZTIzNDQ0ZGMzNDE3ZGUxNTZmNmM1YzJiYzgwMTkyZDQxYTlkMTI3NzZkNjU), [Github](https://github.com/vlomonaco) and [forum](https://continualai.discourse.group).
 
 If you'd like to add a new feature to _Avalanche_ please let us know, so we can work on it, or team up with you to make it a happen! 😄
 
 Features request can be opened on the appropriate [GitHub Discussion Feature-Request section](https://github.com/ContinualAI/avalanche/discussions/categories/feature-request). Vote your preferred features and we will try to implement the most voted first!
 
-![Feature-request section of the Avalanche GitHub &quot;Discussions&quot; Tab. ](../.gitbook/assets/requerst.png)
-
+![Feature-request section of the Avalanche GitHub "Discussions" Tab. ](../../../.gitbook/assets/requerst.png)
diff --git a/environment-dev.yml b/environment-dev.yml
index 5fc4786b3..59b09edf2 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -31,4 +31,5 @@ dependencies:
 - pip:
     - pytorchcv
     - gdown
+    - ctrl-benchmark
     - gym
diff --git a/environment.yml b/environment.yml
index 71e01d05e..81d8143a5 100644
--- a/environment.yml
+++ b/environment.yml
@@ -26,4 +26,5 @@ dependencies:
 - pip:
     - pytorchcv
     - gdown
+    - ctrl-benchmark
     - gym
diff --git a/examples/simple_ctrl.py b/examples/simple_ctrl.py
new file mode 100644
index 000000000..c03a47261
--- /dev/null
+++ b/examples/simple_ctrl.py
@@ -0,0 +1,128 @@
+################################################################################
+# Copyright (c) 2021 ContinualAI.                                              #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 22-06-2021                                                             #
+# Author(s): Tom Veniat                                                        #
+# E-mail: contact@continualai.org                                              #
+# Website: avalanche.continualai.org                                           #
+################################################################################
+
+"""
+In this example we show a simple way to use the ctrl benchmark presented
+in https://arxiv.org/abs/2012.12631.
+The training procedure will report the Transfer metric as defined in
+eq.3 in the article for all streams but the long one, for which
+the average accuracy after training on the whole stream is reported.
+ """
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+from copy import deepcopy
+
+import torch
+from torch.nn import CrossEntropyLoss
+from torch.optim import SGD
+
+from avalanche.benchmarks.classic.ctrl import CTrL
+from avalanche.evaluation.metrics import accuracy_metrics
+from avalanche.logging import InteractiveLogger
+from avalanche.models import SimpleCNN
+from avalanche.training.plugins import EvaluationPlugin
+from avalanche.training.plugins.early_stopping import EarlyStoppingPlugin
+from avalanche.training.strategies import Naive
+
+
+def main(args):
+    # Device config
+    device = torch.device(f"cuda:{args.cuda}"
+                          if torch.cuda.is_available() and
+                          args.cuda >= 0 else "cpu")
+
+    # Intialize the model, stream and training strategy
+    model = SimpleCNN(num_classes=10)
+    if args.stream != 's_long':
+        model_init = deepcopy(model)
+
+    scenario = CTrL(stream_name=args.stream, save_to_disk=args.save,
+                    path=args.path, seed=10)
+
+    train_stream = scenario.train_stream
+    test_stream = scenario.test_stream
+    val_stream = scenario.val_stream
+
+    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
+    criterion = CrossEntropyLoss()
+
+    logger = EvaluationPlugin(
+        accuracy_metrics(minibatch=False, epoch=False, experience=True,
+                         stream=True),
+        loggers=[InteractiveLogger()])
+
+    cl_strategy = Naive(
+        model, optimizer, criterion, train_mb_size=32, device=device,
+        train_epochs=args.max_epochs, eval_mb_size=128, evaluator=logger,
+        plugins=[EarlyStoppingPlugin(50, 'val_stream')], eval_every=5
+    )
+
+    # train and test loop
+    for train_task, val_task in zip(train_stream, val_stream):
+        cl_strategy.train(train_task, eval_streams=[val_task])
+        cl_strategy.eval(test_stream)
+
+    transfer_mat = []
+    for tid in range(len(train_stream)):
+        transfer_mat.append(
+            logger.all_metric_results['Top1_Acc_Exp/eval_phase/test_stream/'
+                                      f'Task00{tid}/Exp00{tid}'][1])
+
+    if args.stream == 's_long':
+        res = logger.last_metric_results['Top1_Acc_Stream/eval_phase/'
+                                         'test_stream']
+        print(f'Average accuracy on S_long : {res}')
+    else:
+        optimizer = SGD(model_init.parameters(), lr=0.001, momentum=0.9)
+        cl_strategy = Naive(
+            model_init, optimizer, criterion, train_mb_size=32, device=device,
+            train_epochs=args.max_epochs, eval_mb_size=128,
+            plugins=[EarlyStoppingPlugin(50, 'val_stream')], eval_every=5
+        )
+
+        cl_strategy.train(train_stream[-1])
+        res = cl_strategy.eval([test_stream[-1]])
+
+        acc_last_stream = transfer_mat[-1][-1]
+        acc_last_only = res['Top1_Acc_Exp/eval_phase/test_stream/'
+                            'Task005/Exp-01']
+        transfer_value = acc_last_stream - acc_last_only
+
+        print(f'Accuracy on probe task after training on the whole '
+              f'stream: {acc_last_stream}')
+        print(f'Accuracy on probe task after trained '
+              f'independently: {acc_last_only}')
+        print(f'T({args.stream})={transfer_value}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--stream', type=str, default='s_plus',
+                        choices=['s_plus', 's_minus', 's_in', 's_out', 's_pl',
+                                 's_long'],
+                        help='Select the CTrL Stream to train on: [s_plus], '
+                             's_minus, s_in, s_out, s_pl or s_long.')
+    parser.add_argument('--save', type=bool, default=False,
+                        help='Whether to save the generated experiences to'
+                             ' disk or load them all in memory.')
+    parser.add_argument('--path', type=str,
+                        help='Path used to save the generated stream.')
+    parser.add_argument('--max-epochs', type=int, default=200,
+                        help='The maximum number of training epochs for each '
+                             'task. Default to 200.')
+    parser.add_argument('--cuda', type=int, default=0,
+                        help='Select zero-indexed cuda device. -1 to use CPU.')
+    args = parser.parse_args()
+    main(args)
diff --git a/notebooks/from-zero-to-hero-tutorial/04_training.ipynb b/notebooks/from-zero-to-hero-tutorial/04_training.ipynb
index dbeda1573..92e48b45e 100644
--- a/notebooks/from-zero-to-hero-tutorial/04_training.ipynb
+++ b/notebooks/from-zero-to-hero-tutorial/04_training.ipynb
@@ -13,7 +13,9 @@
     "---\n",
     "# Training\n",
     "\n",
-    "Welcome to the \"_Training_\" tutorial of the \"_From Zero to Hero_\" series. In this part we will present the functionalities offered by the `training` module."
+    "Welcome to the \"_Training_\" tutorial of the \"_From Zero to Hero_\" series. In this part we will present the functionalities offered by the `training` module.\n",
+    "\n",
+    "First, let's install Avalanche. You can skip this step if you have installed it already."
    ]
   },
   {
@@ -33,9 +35,9 @@
       "  Cloning https://github.com/ContinualAI/avalanche.git to /tmp/pip-req-build-f00959wq\r\n",
       "  Running command git clone -q https://github.com/ContinualAI/avalanche.git /tmp/pip-req-build-f00959wq\r\n",
       "^C\r\n",
-      "\u001B[31mERROR: Operation cancelled by user\u001B[0m\r\n",
-      "\u001B[33mWARNING: You are using pip version 21.2.4; however, version 21.3 is available.\r\n",
-      "You should consider upgrading via the '/home/carta/anaconda3/envs/avalanche/bin/python -m pip install --upgrade pip' command.\u001B[0m\r\n"
+      "\u001b[31mERROR: Operation cancelled by user\u001b[0m\r\n",
+      "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 21.3 is available.\r\n",
+      "You should consider upgrading via the '/home/carta/anaconda3/envs/avalanche/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n"
      ]
     }
    ],
@@ -435,6 +437,11 @@
   {
    "cell_type": "code",
    "execution_count": 3,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [],
    "source": [
     "from avalanche.training.plugins import EarlyStoppingPlugin\n",
@@ -442,29 +449,27 @@
     "strategy = Naive(\n",
     "    model, optimizer, criterion,\n",
     "    plugins=[EarlyStoppingPlugin(patience=10, val_stream_name='train')])"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "In Avalanche, most continual learning strategies are implemented using plugins, which makes it easy to combine them together. For example, it is extremely easy to create a hybrid strategy that combines replay and EWC together by passing the appropriate `plugins` list to the `BaseStrategy`:"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "In Avalanche, most continual learning strategies are implemented using plugins, which makes it easy to combine them together. For example, it is extremely easy to create a hybrid strategy that combines replay and EWC together by passing the appropriate `plugins` list to the `BaseStrategy`:"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [],
    "source": [
     "from avalanche.training.strategies import BaseStrategy\n",
@@ -475,25 +480,18 @@
     "strategy = BaseStrategy(\n",
     "    model, optimizer, criterion,\n",
     "    plugins=[replay, ewc])"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "Beware that most strategy plugins modify the internal state. As a result, not all the strategy plugins can be combined together. For example, it does not make sense to use multiple replay plugins since they will try to modify the same strategy variables (mini-batches, dataloaders), and therefore they will be in conflict."
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "Beware that most strategy plugins modify the internal state. As a result, not all the strategy plugins can be combined together. For example, it does not make sense to use multiple replay plugins since they will try to modify the same strategy variables (mini-batches, dataloaders), and therefore they will be in conflict."
+   ]
   },
   {
    "cell_type": "markdown",
@@ -580,7 +578,7 @@
     "- `self.experience`: the current experience.\n",
     "- `self.adapted_dataset`: the data modified by the dataset adaptation phase.\n",
     "- `self.dataloader`: the current dataloader.\n",
-    "- `self.mbatch`: the current mini-batch. For classification problems, mini-batches have the form `<x, y, t>`, where `x` is the input, `y` is the label, and `t` is the target.\n",
+    "- `self.mbatch`: the current mini-batch. For classification problems, mini-batches have the form `<x, y, t>`, where `x` is the input, `y` is the target class, and `t` is the task label.\n",
     "- `self.mb_output`: the current model's output.\n",
     "- `self.loss`: the current loss.\n",
     "- `self.is_training`: `True` if the strategy is in training mode.\n",
@@ -664,7 +662,22 @@
     },
     {
      "data": {
-      "text/plain": "{'Top1_Acc_Epoch/train_phase/train_stream/Task000': 0.8854205685270737,\n 'Loss_Epoch/train_phase/train_stream/Task000': 0.4297838434443649,\n 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp000': 0.9205405405405406,\n 'Loss_Exp/eval_phase/test_stream/Task000/Exp000': 0.23593760548411188,\n 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp001': 0.9418550992155053,\n 'Loss_Exp/eval_phase/test_stream/Task000/Exp001': 0.2060205339147685,\n 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp002': 0.9365404298874105,\n 'Loss_Exp/eval_phase/test_stream/Task000/Exp002': 0.225994213805472,\n 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp003': 0.8395245170876672,\n 'Loss_Exp/eval_phase/test_stream/Task000/Exp003': 0.48296057826637806,\n 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004': 0.9701492537313433,\n 'Loss_Exp/eval_phase/test_stream/Task000/Exp004': 0.10016660355772841,\n 'Top1_Acc_Stream/eval_phase/test_stream/Task000': 0.9219,\n 'Loss_Stream/eval_phase/test_stream/Task000': 0.2500956041585654}"
+      "text/plain": [
+       "{'Top1_Acc_Epoch/train_phase/train_stream/Task000': 0.8854205685270737,\n",
+       " 'Loss_Epoch/train_phase/train_stream/Task000': 0.4297838434443649,\n",
+       " 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp000': 0.9205405405405406,\n",
+       " 'Loss_Exp/eval_phase/test_stream/Task000/Exp000': 0.23593760548411188,\n",
+       " 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp001': 0.9418550992155053,\n",
+       " 'Loss_Exp/eval_phase/test_stream/Task000/Exp001': 0.2060205339147685,\n",
+       " 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp002': 0.9365404298874105,\n",
+       " 'Loss_Exp/eval_phase/test_stream/Task000/Exp002': 0.225994213805472,\n",
+       " 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp003': 0.8395245170876672,\n",
+       " 'Loss_Exp/eval_phase/test_stream/Task000/Exp003': 0.48296057826637806,\n",
+       " 'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004': 0.9701492537313433,\n",
+       " 'Loss_Exp/eval_phase/test_stream/Task000/Exp004': 0.10016660355772841,\n",
+       " 'Top1_Acc_Stream/eval_phase/test_stream/Task000': 0.9219,\n",
+       " 'Loss_Stream/eval_phase/test_stream/Task000': 0.2500956041585654}"
+      ]
      },
      "execution_count": 7,
      "metadata": {},
@@ -811,9 +824,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.2"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/notebooks/how-tos/avalanchedataset/README.ipynb b/notebooks/how-tos/avalanchedataset/README.ipynb
new file mode 100644
index 000000000..35d3fa571
--- /dev/null
+++ b/notebooks/how-tos/avalanchedataset/README.ipynb
@@ -0,0 +1,52 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d369a312",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    },
+    "tags": []
+   },
+   "source": [
+    "---\n",
+    "description: Dealing with AvalancheDatasets\n",
+    "---\n",
+    "\n",
+    "# AvalancheDataset\n",
+    "\n",
+    "The `AvalancheDataset` is an implementation of the PyTorch `Dataset` class that comes with many useful out-of-the-box functionalities. For most users, the *AvalancheDataset* can be used as a plain PyTorch Dataset that will return `x, y, t` elements. However, the AvalancheDataset is much more powerful than a simple PyTorch Dataset. \n",
+    "\n",
+    "**A serie of _Mini How-Tos_** will guide you through the functionalities of the *AvalancheDataset* and its subclasses:\n",
+    "\n",
+    "- [Preamble: PyTorch Datasets](https://avalanche.continualai.org/how-tos/avalanchedataset/preamble-pytorch-datasets)\n",
+    "- [Creating AvalancheDatasets](https://avalanche.continualai.org/how-tos/avalanchedataset/creating-avalanchedatasets)\n",
+    "- [Advanced Transformations](https://avalanche.continualai.org/how-tos/avalanchedataset/advanced-transformations)\n",
+    "\n",
+    "Brefore jumping to the actual *Mini How-To*s, **we recommend having a look at the basic notions of Dataset and DataLoader by reading the [Preamble page](https://avalanche.continualai.org/how-tos/avalanchedataset/preamble-pytorch-datasets)**."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/how-tos/avalanchedataset/advanced-transformations.ipynb b/notebooks/how-tos/avalanchedataset/advanced-transformations.ipynb
new file mode 100644
index 000000000..f7eae1cdb
--- /dev/null
+++ b/notebooks/how-tos/avalanchedataset/advanced-transformations.ipynb
@@ -0,0 +1,623 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "53277e45-b7d7-4857-a79d-d7218bd6b6a9",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "description: Dealing with transformations (groups, appending, replacing, freezing).\n",
+    "---\n",
+    "\n",
+    "# Advanced Transformations\n",
+    "AvalancheDataset (and its subclasses like the Avalanche*Tensor/Subset/Concat*Dataset) allow for a finer control over transformations. While torchvision (and other) datasets allow for a minimal mechanism to apply transformations, with AvalancheDataset one can:\n",
+    "1. Have multiple **transformation \"groups\"** in the same dataset (like separated train and test transformations).\n",
+    "2. **Append, replace and remove transformations**, even by using nested Subset/Concat Datasets.\n",
+    "3. **Freeze transformations**, so that they can't be changed.\n",
+    "\n",
+    "The following sub-sections show examples on how to use these features. Please note that all the constructor parameters and the methods described in this How-To can be used on AvalancheDataset subclasses as well. For more info on all the available subclasses, refer to [this Mini How-To](https://avalanche.continualai.org/how-tos/avalanchedataset/creating-avalanchedatasets).\n",
+    "\n",
+    "It is warmly recommended to **run this page as a notebook** using Colab (info at the bottom of this page).\n",
+    "\n",
+    "Let's start by installing Avalanche:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "470e6f0b-ecc2-45c3-af53-112da5d7c37e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/lorenzo/Desktop/github_repos/avalanche/notebooks\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install git+https://github.com/ContinualAI/avalanche.git\n",
+    "\n",
+    "# Or, if you cloned Avalanche on your PC:\n",
+    "# import sys\n",
+    "# from pathlib import Path\n",
+    "# sys.path.append(str(Path.cwd() / '../../..'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e770eef5-86d4-4601-b26f-ea12013515df",
+   "metadata": {},
+   "source": [
+    "## Transformation groups\n",
+    "AvalancheDatasets can contain multiple **transformation groups**. This can be useful to keep train and test transformations in the same dataset and to have different set of transformations. This may come in handy in many situations (for instance, to apply ad-hoc transformations to replay data).\n",
+    "\n",
+    "As in torchvision datasets, AvalancheDataset supports the two kind of transformations: the `transform`, which is applied to X values, and the `target_transform`, which is applied to Y values. The latter is rarely used. This means that **a transformation group is a pair of transformations to be applied to the X and Y values** of each instance returned by the dataset. In both torchvision and Avalanche implementations, **a transformation must be a function (or other callable object)** that accepts one input (the X or Y value) and outputs its transformed version. This pair of functions is stored in the `transform` and `target_transform` fields of the dataset. A comprehensive guide on transformations can be found in the [torchvision documentation](https://pytorch.org/vision/stable/transforms.html).\n",
+    "\n",
+    "In the following example, a MNIST dataset is created and then wrapped in an AvalancheDataset. When creating the AvalancheDataset, we can set *train* and *eval* transformations by passing a *transform\\_groups* parameter. Train transformations usually include some form of random augmentation, while eval transformations usually include a sequence of deterministic transformations only. Here we define the sequence of train transformations as a random rotation followed by the ToTensor operation. The eval transformations only include the ToTensor operation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "482132af-cfa8-4c16-ae27-5e7a67c75a0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torchvision import transforms\n",
+    "from torchvision.datasets import MNIST\n",
+    "from avalanche.benchmarks.utils import AvalancheDataset\n",
+    "\n",
+    "mnist_dataset = MNIST('mnist_data', download=True)\n",
+    "\n",
+    "# Define the training transformation for X values\n",
+    "train_transformation = transforms.Compose([\n",
+    "    transforms.RandomRotation(45),\n",
+    "    transforms.ToTensor(),\n",
+    "])\n",
+    "# Define the training transformation for Y values (rarely used)\n",
+    "train_target_transformation = None\n",
+    "\n",
+    "# Define the test transformation for X values\n",
+    "eval_transformation = transforms.ToTensor()\n",
+    "# Define the test transformation for Y values (rarely used)\n",
+    "eval_target_transformation = None\n",
+    "\n",
+    "transform_groups = {\n",
+    "    'train': (train_transformation, train_target_transformation),\n",
+    "    'eval': (eval_transformation, eval_target_transformation)\n",
+    "}\n",
+    "\n",
+    "avl_mnist_transform = AvalancheDataset(mnist_dataset, transform_groups=transform_groups)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5f7cca4-e2e7-456b-b6da-54c1c8579eb1",
+   "metadata": {},
+   "source": [
+    "Of course, one can also just use the `transform` and `target_transform` constructor parameters to set the transformations for both the *train* and the *eval* groups. However, it is recommended to use the approach based on *transform\\_groups* (shown in the code above) as it is much more flexible."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "51fbd4bf-9535-446a-bc95-6d1cd0dd96f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Not recommended: use transform_groups instead\n",
+    "avl_mnist_same_transforms =  AvalancheDataset(mnist_dataset, transform=train_transformation)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fbdfa287-347e-4847-95f8-c5a00a4a2107",
+   "metadata": {},
+   "source": [
+    "### Using `.train()` and `.eval()`\n",
+    "\n",
+    "**The default behaviour of the AvalancheDataset is to use transformations from the _train_ group.** However, one can easily obtain a version of the dataset where the *eval* group is used. Note: when obtaining the dataset of experiences from the test stream, those datasets will already be using the *eval* group of transformations so you don't need to switch to the eval group ;).\n",
+    "\n",
+    "As noted before, transformations for the current group are loaded in the `transform` and `target_transform` fields. These fields can be changed directly, but this is *NOT* recommended, as this will not create a copy of the dataset and may probably affect other parts of the code in which the dataset is used.\n",
+    "\n",
+    "The recommended way to switch between the *train* and *eval* groups is to use the `.train()` and `.eval()` methods to obtain a copy (view) of the dataset with the proper transformations enabled. This is another very handy feature of the AvalancheDataset: **methods that manipulate the AvalancheDataset fields (and transformations) always create a view of the dataset. The original dataset is never changed.**\n",
+    "\n",
+    "In the following cell we use the *avl\\_mnist\\_transform* dataset created in the cells above. We first obtain a view of it in which *eval* transformations are enabled. Then, starting from this view, we obtain a version of it in which *train* transformations are enabled. We want to double-stress that `.train()` and `.eval()` never change the group of the dataset on which they are called: they always create a view.\n",
+    "\n",
+    "One can check that the correct transformation group is in use by looking at the content of the *transform/target_transform* fields."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6f8500fa-0f7f-48f7-a26c-d77a1588a244",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original dataset transformation: Compose(\n",
+      "    RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)\n",
+      "    ToTensor()\n",
+      ")\n",
+      "--------------------------------\n",
+      "Eval version of the dataset: ToTensor()\n",
+      "--------------------------------\n",
+      "Back to train transformations: Compose(\n",
+      "    RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)\n",
+      "    ToTensor()\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Obtain a view of the dataset in which eval transformations are enabled\n",
+    "avl_mnist_eval = avl_mnist_transform.eval()\n",
+    "\n",
+    "# Obtain a view of the dataset in which we get back to train transforms\n",
+    "# Basically, avl_mnist_transform ~= avl_mnist_train\n",
+    "avl_mnist_train = avl_mnist_eval.train()\n",
+    "\n",
+    "# Check the current transformations function for the 3 datasets\n",
+    "print('Original dataset transformation:', avl_mnist_transform.transform)\n",
+    "# Output:\n",
+    "# Original dataset transformation: Compose(\n",
+    "#     RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)\n",
+    "#     ToTensor()\n",
+    "# )\n",
+    "print('--------------------------------')\n",
+    "print('Eval version of the dataset:', avl_mnist_eval.transform)\n",
+    "# Output: \"Eval version of the dataset: ToTensor()\"\n",
+    "print('--------------------------------')\n",
+    "print('Back to train transformations:', avl_mnist_train.transform)\n",
+    "# Output:\n",
+    "# Back to train transformations: Compose(\n",
+    "#     RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)\n",
+    "#     ToTensor()\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8172b379-d60a-43ba-8e0d-c4fcdc0e997e",
+   "metadata": {},
+   "source": [
+    "### Custom transformation groups\n",
+    "In *AvalancheDataset*s the **_train_ and _eval_ transformation groups are always available**. However, *AvalancheDataset* also supports **custom transformation groups**.\n",
+    "\n",
+    "The following example shows how to create an AvalancheDataset with an additional group named *replay*. We define the replay transformation as a random crop followed by the ToTensor operation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "69c8912d-b826-4265-ba71-c33501a1f956",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<avalanche.benchmarks.utils.avalanche_dataset.AvalancheDataset at 0x7f51d878a970>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "replay_transform = transforms.Compose([\n",
+    "    transforms.RandomCrop(28, padding=4),\n",
+    "    transforms.ToTensor()\n",
+    "])\n",
+    "\n",
+    "replay_target_transform = None\n",
+    "\n",
+    "transform_groups_with_replay = {\n",
+    "    'train': (None, None),\n",
+    "    'eval': (None, None),\n",
+    "    'replay': (replay_transform, replay_target_transform)\n",
+    "}\n",
+    "\n",
+    "AvalancheDataset(mnist_dataset, transform_groups=transform_groups_with_replay)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6bc0508a-bc4d-4896-984c-609c5803f9e6",
+   "metadata": {},
+   "source": [
+    "However, once created the dataset will use the *train* group. There are two ways to **switch to our custom group**:\n",
+    "- Set the group when creating the dataset using the `initial_transform_group` constructor parameter\n",
+    "- Switch to the group using the `.with_transforms(group_name)` method\n",
+    "\n",
+    "The `.with_transforms(group_name)` method behaves in the same way `.train()` and `.eval()` do by creating a view of the original dataset.\n",
+    "\n",
+    "The following example shows how to use both methods:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2fd29287-da97-4aad-ab3c-cfd514629ad8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Compose(\n",
+      "    RandomCrop(size=(28, 28), padding=4)\n",
+      "    ToTensor()\n",
+      ")\n",
+      "Compose(\n",
+      "    RandomCrop(size=(28, 28), padding=4)\n",
+      "    ToTensor()\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Method 1: create the dataset with \"replay\" as the default group\n",
+    "avl_mnist_custom_transform_1 = AvalancheDataset(\n",
+    "    mnist_dataset,\n",
+    "    transform_groups=transform_groups_with_replay,\n",
+    "    initial_transform_group='replay')\n",
+    "\n",
+    "print(avl_mnist_custom_transform_1.transform)\n",
+    "\n",
+    "# Method 2: switch to \"replay\" using `.with_transforms(group_name)`\n",
+    "avl_mnist_custom_transform_not_enabled = AvalancheDataset(\n",
+    "    mnist_dataset,\n",
+    "    transform_groups=transform_groups_with_replay)\n",
+    "\n",
+    "avl_mnist_custom_transform_2 = avl_mnist_custom_transform_not_enabled.with_transforms('replay')\n",
+    "print(avl_mnist_custom_transform_2.transform)\n",
+    "\n",
+    "# Both prints output:\n",
+    "# Compose(\n",
+    "#     RandomCrop(size=(28, 28), padding=4)\n",
+    "#     ToTensor()\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "08fc9f3f-f95a-49ed-ba94-1bbec8960235",
+   "metadata": {},
+   "source": [
+    "## Appending transformations\n",
+    "\n",
+    "In the standard torchvision datasets the only way to append (that is, add a new transformation step to the list of existing one) is to change the *transform* field directly by doing something like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e4d02b6e-0e73-4205-a497-a1540ff03185",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Compose(\n",
+      "    ToTensor()\n",
+      "    RandomCrop(size=(28, 28), padding=4)\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Append a transform by using torchvision datasets (>>> DON'T DO THIS! <<<)\n",
+    "\n",
+    "# Create the dataset\n",
+    "mnist_dataset_w_totensor = MNIST('mnist_data', download=True, transform=transforms.ToTensor())\n",
+    "\n",
+    "# Append a transform\n",
+    "to_append_transform = transforms.RandomCrop(size=(28, 28), padding=4)\n",
+    "mnist_dataset_w_totensor.transform = transforms.Compose(\n",
+    "    [mnist_dataset_w_totensor.transform, to_append_transform]\n",
+    ")\n",
+    "print(mnist_dataset_w_totensor.transform)\n",
+    "# Prints:\n",
+    "# Compose(\n",
+    "#     ToTensor()\n",
+    "#     RandomCrop(size=(28, 28), padding=4)\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "55cb1749-2f72-4780-9699-99b97544f4da",
+   "metadata": {},
+   "source": [
+    "This solution has many huge drawbacks:\n",
+    "- The transformation field of the dataset is changed directly. This will affect other parts of the code that use that dataset instance.\n",
+    "- If the initial transform is `None`, then `Compose` will not complain, but the process will crash later (try it by yourself: replace the first element of Compose in cell above with `None`, then try obtaining a data point from the dataset).\n",
+    "- If you need to change transformations only temporarly to do some specific things in a limited part of the code, then you need to store the previous set of transformations in some variable in order to switch back to them later.\n",
+    "\n",
+    "AvalancheDataset offers a very simple method to append transformations without incurring in those issues. The `.add_transforms(transform=None, target_transform=None)` method will append the given transform(s) **to the currently enabled transform group** and will return a new (a view actually) dataset with given transformations appended to the existing ones. The original dataset is not affected. One can also use `.add_transforms_to_group(group_name, transform, target_transform)` to change transformations for a different group.\n",
+    "\n",
+    "The next cell shows how to use `.add_transforms(...)` to append the *to\\_append\\_transform* transform defined in the cell above."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "99a295d6-7a3f-4e8a-89af-9f97915848c2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "With appended transforms: Compose(\n",
+      "    ToTensor()\n",
+      "    RandomCrop(size=(28, 28), padding=4)\n",
+      ")\n",
+      "Original dataset: ToTensor()\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create the dataset\n",
+    "avl_mnist = AvalancheDataset(MNIST('mnist_data', download=True), transform=transforms.ToTensor())\n",
+    "\n",
+    "# Append a transformation. Simple as:\n",
+    "avl_mnist_appended_transform = avl_mnist.add_transforms(to_append_transform)\n",
+    "\n",
+    "print('With appended transforms:', avl_mnist_appended_transform.transform)\n",
+    "# Prints:\n",
+    "# With appended transforms: Compose(\n",
+    "#     ToTensor()\n",
+    "#     RandomCrop(size=(28, 28), padding=4)\n",
+    "# )\n",
+    "\n",
+    "# Check that the original dataset was not affected:\n",
+    "print('Original dataset:', avl_mnist.transform)\n",
+    "# Prints: \"Original dataset: ToTensor()\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "19f6a174-4073-4b24-a604-95df9ff5a0f3",
+   "metadata": {},
+   "source": [
+    "Note that by using `.add_transforms(...)`:\n",
+    "\n",
+    "- The original dataset is not changed, which means that other parts of the code that use that dataset instance are not affected.\n",
+    "- You don't need to worry about *None* transformations.\n",
+    "- In order to revert to the original transformations you don't need to keep a copy of them: the original dataset is not affected!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c5ee8e22-982d-4fa1-ab2a-97e47bb1d693",
+   "metadata": {},
+   "source": [
+    "## Replacing transformations\n",
+    "\n",
+    "The replacement operation follows the same idea (and benefits) of the append one. By using `.replace_transforms(transform, target_transform)` one can obtain a view of the original dataset in which the **transformaations for the current group** are replaced with the given ones. One may also change tranformations for other groups by passing the name of the group as the optional parameter `group`. As with any transform-related operation, the original dataset is not affected. \n",
+    "\n",
+    "Note: one can use `.replace_transforms(...)` to remove previous transformations (by passing `None` as the new transform).\n",
+    "\n",
+    "The following cell shows how to use `.replace_transforms(...)` to replace the transformations of the current group:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "0e9f9c46-9d3d-40eb-b7b8-afad15668bc9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "With replaced transform: RandomCrop(size=(28, 28), padding=4)\n",
+      "Original dataset: ToTensor()\n"
+     ]
+    }
+   ],
+   "source": [
+    "new_transform = transforms.RandomCrop(size=(28, 28), padding=4)\n",
+    "\n",
+    "# Append a transformation. Simple as:\n",
+    "avl_mnist_replaced_transform = avl_mnist.replace_transforms(new_transform, None)\n",
+    "\n",
+    "print('With replaced transform:', avl_mnist_replaced_transform.transform)\n",
+    "# Prints: \"With replaces transforms: RandomCrop(size=(28, 28), padding=4)\"\n",
+    "\n",
+    "# Check that the original dataset was not affected:\n",
+    "print('Original dataset:', avl_mnist.transform)\n",
+    "# Prints: \"Original dataset: ToTensor()\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc2e4781-0f2f-4eb6-b61e-affc8aaf255c",
+   "metadata": {},
+   "source": [
+    "## Freezing transformations\n",
+    "\n",
+    "One last functionality regarding transformations is the ability to \"freeze\" transformations. Freezing transformations menas **permanently glueing transformations to the dataset so that they can't be replaced or changed in any way** (usually by mistake). Frozen transformations cannot be changed by using `.replace_transforms(...)` or even by changing the `transform` field directly.\n",
+    "\n",
+    "One may wonder when this may come in handy... in fact, you will probably rarely need to freeze transformations. However, imagine having to instantiate the PermutedMNIST benchmark. You want the permutation transformation to not be changed by mistake. However, the end users do not know how the internal implementations of the benchmark works, so they may end up messing with those transformations. By freezing the permutation transformation, users cannot mess with it.\n",
+    "\n",
+    "Transformations for all transform groups can be frozen at once by using `.freeze_transforms()`. Transformations can be frozen for a single group by using `.freeze_group_transforms(group_name)`. As always, those methods return a view of the original dataset.\n",
+    "\n",
+    "The cell below shows a simplified excerpt from the [PermutedMNIST benchmark implementation](https://github.com/ContinualAI/avalanche/blob/master/avalanche/benchmarks/classic/cmnist.py). First, a *PixelsPermutation* instance is created. That instance is a transformation that will permute the pixels of the input image. We then create the train end test sets. Once created, transformations for those datasets are frozen using `.freeze_transforms()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "af97e04b-01b4-4a32-ba1f-42ed94977c56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from avalanche.benchmarks.classic.cmnist import PixelsPermutation\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "\n",
+    "# Instantiate MNIST train and test sets\n",
+    "mnist_train = MNIST('mnist_data', train=True, download=True)\n",
+    "mnist_test = MNIST('mnist_data', train=False, download=True)\n",
+    "    \n",
+    "# Define the transformation used to permute the pixels\n",
+    "rng_seed = 4321\n",
+    "rng_permute = np.random.RandomState(rng_seed)\n",
+    "idx_permute = torch.from_numpy(rng_permute.permutation(784)).type(torch.int64)\n",
+    "permutation_transform = PixelsPermutation(idx_permute)\n",
+    "\n",
+    "# Define the transforms group\n",
+    "perm_group_transforms = dict(\n",
+    "    train=(permutation_transform, None),\n",
+    "    eval=(permutation_transform, None)\n",
+    ")\n",
+    "\n",
+    "# Create the datasets and freeze transforms\n",
+    "# Note: one can call \"freeze_transforms\" on constructor result\n",
+    "# or you can do this in 2 steps. The result is the same (obviously).\n",
+    "# The next part show both ways:\n",
+    "\n",
+    "# Train set\n",
+    "permuted_train_set = AvalancheDataset(\n",
+    "    mnist_train, \n",
+    "    transform_groups=perm_group_transforms).freeze_transforms()\n",
+    "\n",
+    "# Test set\n",
+    "permuted_test_set = AvalancheDataset(\n",
+    "    mnist_test, transform_groups=perm_group_transforms, \n",
+    "    initial_transform_group='eval')\n",
+    "permuted_test_set = permuted_test_set.freeze_transforms()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f5fea92-cb21-4144-bee8-6e2b37606d25",
+   "metadata": {},
+   "source": [
+    "In this way, that transform can't be removed. However, remember that one can always append other transforms atop of frozen transforms.\n",
+    "\n",
+    "The cell below shows that `replace_transforms` can't remove frozen transformations:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "62ae4c3e-da05-494c-9df4-394774a42908",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Before replace_transforms:\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMAAAADACAAAAAB3tzPbAAAEP0lEQVR4nO2dTYhOYRTHzzWDBTFZ+WrIR5ShJoXU1DRKkYXJR1HKyoJZKQtNEeWjpqywkCLKAg2lEJms1IiUYSULFpjNGKLJ51j8z9Q9t+e997kz7+u8p85v8+957vP179TpPu99n3sTEvxJF/pFXRtF0gHpi23/BTKzsCEvpEGUJsVOU6+4AW3cgDbmDSQyN7VDHqXrmI2QZ5AhyDfI9Njp5Gyi7gmkrbChXJb5CLgBbdyANuYNNMqk9BgiKpdAHoS6R+dP5lSgriFQl2FruqHMpuYj4Aa0cQPamDeQRLdcAXkNkbnsAhER7cvpfROyPXCpFbIAcrvyGL8gk0Wl+Qi4AW3cgDZJO7QXMit9TWaaUe5Qeax7kE2ha8H9rKi7A9lcefwg5iPgBrRxA9qYN5DJinchZXPZPCIieo9CxB53fPATo7UQzvjmI+AGtHED2pg3EL8nZq5Bdhc2nAbZBrlS2J5/utwAic7F5iPgBrRxA9qYNxCRRvdALkEGIbMhfIO4vnAQuanvISKig6JF2ZvYZRDzEXAD2rgBbcwbCKdRJL0a7M5zfiKN5hWkBWI+Am5AGzegjXkDjbJ4GrIj0PI6ZGdolOK0WzZ/roK8DFxqESXzEXAD2rgBbcwbyNyN8k+e+AH0BQqtoX68IT8DmUNERB9R6ITcqs4CU7yDzIf43y7rBDegjXkD1dwTv4EsFZUXIYsgfEzxCBERHQ/MGT/pD4j5CLgBbdyANuYNjB1F5F1wb+WWbPUv5CtkRvVWMhfyIba9n+iuE9yANm5Am2QY2lSyX+g84BYUnkI+xQ7FUw+XXAGlpzaMG9DGDWhj3kDpv11KJvTgegSyEDI4vhWYj4Ab0MYNaGPeQEQa5Scu9yEj4hqfxDlERESHS86dl4MPQM5BmomIaBcKJ0U38xFwA9q4AW3MG5jg3WjZeX5DRObks5H9oW58PLO58sDmI+AGtHED2pg3EE6jsMVPk9ZBBiDfQ+2PEhHRsSquKwQ/z/oM8bvROsENaGPeQBL946Bs+ByyumQ3BlvpIdGCYhfC+FHEOsENaOMGtKnBnjiYOFdCBqo+m/kIuAFt3IA25g00FjcpS0+oMpQ/b0BC5x7z8FeX1xduQBs3oE1yAtpd2LIJMlyrlfDRxc6S3cxHwA1o4wa0MW8gs6k/D9lPRNV5GVA8/HbM4BOgHMxHwA1o4wa0MW8gk0ZzMuf/TarRmI+AG9DGDWhj3kDt/nb5ELIGUvwJcUZm68uQvUQ09tWzzGfPzEfADWjjBrTJZKHFkLeF/XJu7fhV51ch/BIhvFFIvoKoD4WOwhHzMB8BN6CNG9DGvIGIPXHwHkqCT8fKL8fyyKOFS+DP1S4XlcXZlFO9+Qi4AW3cgDbmDYylUT6aPVTY4SdkCuQspCvdoktckvArKqem6/JuQ8U1fucuf8ybj5abj4Ab0MYNaGPewD8mbYqJbB1JxgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<PIL.Image.Image image mode=L size=192x192 at 0x7F50C0FE5DC0>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "After replace_transforms:\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMAAAADACAAAAAB3tzPbAAAEP0lEQVR4nO2dTYhOYRTHzzWDBTFZ+WrIR5ShJoXU1DRKkYXJR1HKyoJZKQtNEeWjpqywkCLKAg2lEJms1IiUYSULFpjNGKLJ51j8z9Q9t+e997kz7+u8p85v8+957vP179TpPu99n3sTEvxJF/pFXRtF0gHpi23/BTKzsCEvpEGUJsVOU6+4AW3cgDbmDSQyN7VDHqXrmI2QZ5AhyDfI9Njp5Gyi7gmkrbChXJb5CLgBbdyANuYNNMqk9BgiKpdAHoS6R+dP5lSgriFQl2FruqHMpuYj4Aa0cQPamDeQRLdcAXkNkbnsAhER7cvpfROyPXCpFbIAcrvyGL8gk0Wl+Qi4AW3cgDZJO7QXMit9TWaaUe5Qeax7kE2ha8H9rKi7A9lcefwg5iPgBrRxA9qYN5DJinchZXPZPCIieo9CxB53fPATo7UQzvjmI+AGtHED2pg3EL8nZq5Bdhc2nAbZBrlS2J5/utwAic7F5iPgBrRxA9qYNxCRRvdALkEGIbMhfIO4vnAQuanvISKig6JF2ZvYZRDzEXAD2rgBbcwbCKdRJL0a7M5zfiKN5hWkBWI+Am5AGzegjXkDjbJ4GrIj0PI6ZGdolOK0WzZ/roK8DFxqESXzEXAD2rgBbcwbyNyN8k+e+AH0BQqtoX68IT8DmUNERB9R6ITcqs4CU7yDzIf43y7rBDegjXkD1dwTv4EsFZUXIYsgfEzxCBERHQ/MGT/pD4j5CLgBbdyANuYNjB1F5F1wb+WWbPUv5CtkRvVWMhfyIba9n+iuE9yANm5Am2QY2lSyX+g84BYUnkI+xQ7FUw+XXAGlpzaMG9DGDWhj3kDpv11KJvTgegSyEDI4vhWYj4Ab0MYNaGPeQEQa5Scu9yEj4hqfxDlERESHS86dl4MPQM5BmomIaBcKJ0U38xFwA9q4AW3MG5jg3WjZeX5DRObks5H9oW58PLO58sDmI+AGtHED2pg3EE6jsMVPk9ZBBiDfQ+2PEhHRsSquKwQ/z/oM8bvROsENaGPeQBL946Bs+ByyumQ3BlvpIdGCYhfC+FHEOsENaOMGtKnBnjiYOFdCBqo+m/kIuAFt3IA25g00FjcpS0+oMpQ/b0BC5x7z8FeX1xduQBs3oE1yAtpd2LIJMlyrlfDRxc6S3cxHwA1o4wa0MW8gs6k/D9lPRNV5GVA8/HbM4BOgHMxHwA1o4wa0MW8gk0ZzMuf/TarRmI+AG9DGDWhj3kDt/nb5ELIGUvwJcUZm68uQvUQ09tWzzGfPzEfADWjjBrTJZKHFkLeF/XJu7fhV51ch/BIhvFFIvoKoD4WOwhHzMB8BN6CNG9DGvIGIPXHwHkqCT8fKL8fyyKOFS+DP1S4XlcXZlFO9+Qi4AW3cgDbmDYylUT6aPVTY4SdkCuQspCvdoktckvArKqem6/JuQ8U1fucuf8ybj5abj4Ab0MYNaGPewD8mbYqJbB1JxgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<PIL.Image.Image image mode=L size=192x192 at 0x7F50C0FE5820>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMAAAADACAAAAAB3tzPbAAAEP0lEQVR4nO2dTYhOYRTHzzWDBTFZ+WrIR5ShJoXU1DRKkYXJR1HKyoJZKQtNEeWjpqywkCLKAg2lEJms1IiUYSULFpjNGKLJ51j8z9Q9t+e997kz7+u8p85v8+957vP179TpPu99n3sTEvxJF/pFXRtF0gHpi23/BTKzsCEvpEGUJsVOU6+4AW3cgDbmDSQyN7VDHqXrmI2QZ5AhyDfI9Njp5Gyi7gmkrbChXJb5CLgBbdyANuYNNMqk9BgiKpdAHoS6R+dP5lSgriFQl2FruqHMpuYj4Aa0cQPamDeQRLdcAXkNkbnsAhER7cvpfROyPXCpFbIAcrvyGL8gk0Wl+Qi4AW3cgDZJO7QXMit9TWaaUe5Qeax7kE2ha8H9rKi7A9lcefwg5iPgBrRxA9qYN5DJinchZXPZPCIieo9CxB53fPATo7UQzvjmI+AGtHED2pg3EL8nZq5Bdhc2nAbZBrlS2J5/utwAic7F5iPgBrRxA9qYNxCRRvdALkEGIbMhfIO4vnAQuanvISKig6JF2ZvYZRDzEXAD2rgBbcwbCKdRJL0a7M5zfiKN5hWkBWI+Am5AGzegjXkDjbJ4GrIj0PI6ZGdolOK0WzZ/roK8DFxqESXzEXAD2rgBbcwbyNyN8k+e+AH0BQqtoX68IT8DmUNERB9R6ITcqs4CU7yDzIf43y7rBDegjXkD1dwTv4EsFZUXIYsgfEzxCBERHQ/MGT/pD4j5CLgBbdyANuYNjB1F5F1wb+WWbPUv5CtkRvVWMhfyIba9n+iuE9yANm5Am2QY2lSyX+g84BYUnkI+xQ7FUw+XXAGlpzaMG9DGDWhj3kDpv11KJvTgegSyEDI4vhWYj4Ab0MYNaGPeQEQa5Scu9yEj4hqfxDlERESHS86dl4MPQM5BmomIaBcKJ0U38xFwA9q4AW3MG5jg3WjZeX5DRObks5H9oW58PLO58sDmI+AGtHED2pg3EE6jsMVPk9ZBBiDfQ+2PEhHRsSquKwQ/z/oM8bvROsENaGPeQBL946Bs+ByyumQ3BlvpIdGCYhfC+FHEOsENaOMGtKnBnjiYOFdCBqo+m/kIuAFt3IA25g00FjcpS0+oMpQ/b0BC5x7z8FeX1xduQBs3oE1yAtpd2LIJMlyrlfDRxc6S3cxHwA1o4wa0MW8gs6k/D9lPRNV5GVA8/HbM4BOgHMxHwA1o4wa0MW8gk0ZzMuf/TarRmI+AG9DGDWhj3kDt/nb5ELIGUvwJcUZm68uQvUQ09tWzzGfPzEfADWjjBrTJZKHFkLeF/XJu7fhV51ch/BIhvFFIvoKoD4WOwhHzMB8BN6CNG9DGvIGIPXHwHkqCT8fKL8fyyKOFS+DP1S4XlcXZlFO9+Qi4AW3cgDbmDYylUT6aPVTY4SdkCuQspCvdoktckvArKqem6/JuQ8U1fucuf8ybj5abj4Ab0MYNaGPewD8mbYqJbB1JxgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<PIL.Image.Image image mode=L size=192x192 at 0x7F50C0FE5DC0>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# First, show that the image pixels are permuted\n",
+    "print('Before replace_transforms:')\n",
+    "display(permuted_train_set[0][0].resize((192, 192), 0))\n",
+    "\n",
+    "# Try to remove the permutation\n",
+    "with_removed_transforms = permuted_train_set.replace_transforms(None, None)\n",
+    "\n",
+    "print('After replace_transforms:')\n",
+    "display(permuted_train_set[0][0].resize((192, 192), 0))\n",
+    "display(with_removed_transforms[0][0].resize((192, 192), 0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "553d4633-c7aa-46c4-b2bb-90267b998e74",
+   "metadata": {},
+   "source": [
+    "## Transformations wrap-up\n",
+    "This completes the *Mini How-To* for the functionalities of the *AvalancheDataset* related to **transformations**. \n",
+    "\n",
+    "Here you learned how to use **transformation groups** and how to **append/replace/freeze transformations** in a simple way.\n",
+    "\n",
+    "Other *Mini How-To*s will guide you through the other functionalities offered by the *AvalancheDataset* class. The list of *Mini How-To*s can be found [here](https://avalanche.continualai.org/how-tos/avalanchedataset)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "792a197d-a0c3-44c2-87fb-fae43f687195",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "## 🤝 Run it on Google Colab\n",
+    "\n",
+    "You can run _this chapter_ and play with it on Google Colaboratory by clicking here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/how-tos/avalanchedataset/advanced-transformations.ipynb)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/how-tos/avalanchedataset/creating-avalanchedatasets.ipynb b/notebooks/how-tos/avalanchedataset/creating-avalanchedatasets.ipynb
new file mode 100644
index 000000000..d62ac92bf
--- /dev/null
+++ b/notebooks/how-tos/avalanchedataset/creating-avalanchedatasets.ipynb
@@ -0,0 +1,440 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "9d2b1a53-2326-41ca-a439-ad9264cff63b",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "description: Creation and manipulation of AvalancheDatasets and its subclasses.\n",
+    "---\n",
+    "\n",
+    "# Creating AvalancheDatasets\n",
+    "\n",
+    "The *AvalancheDataset* is an implementation of the PyTorch Dataset class which comes with many out-of-the-box functionalities. The *AvalancheDataset* (an its few subclass) are extensively used through the whole Avalanche library as the reference way to manipulate datasets:\n",
+    "\n",
+    "- The dataset carried by the `experience.dataset` field is always an *AvalancheDataset*.\n",
+    "- Benchmark creation functions accept *AvalancheDataset*s to create benchmarks where a finer control over task labels is required.\n",
+    "- Internally, benchmarks are created by manipulating *AvalancheDataset*s.\n",
+    "\n",
+    "This first *Mini How-To* will guide through the main ways you can use to **instantiate an _AvalancheDataset_** while the **other Mini How-Tos ([complete list here](https://avalanche.continualai.org/how-tos/avalanchedataset)) will show how to use its functionalities**.\n",
+    "\n",
+    "It is warmly recommended to **run this page as a notebook** using Colab (info at the bottom of this page).\n",
+    "\n",
+    "Let's start by installing avalanche:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e583617f-2ad9-4ba9-a581-1d53eec5e205",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install git+https://github.com/ContinualAI/avalanche.git\n",
+    "\n",
+    "# Or, if you cloned Avalanche on your PC:\n",
+    "# import sys\n",
+    "# from pathlib import Path\n",
+    "# sys.path.append(str(Path.cwd() / '../../..'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "114da4e5-8a44-4faa-b55e-3b0242b7a1c8",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## AvalancheDataset vs PyTorch Dataset\n",
+    "This mini How-To will guide you through the main ways used to instantiate an *AvalancheDataset*.\n",
+    "\n",
+    "First thing: the base class `AvalancheDataset` is a **wrapper for existing datasets**. Only two things must be considered when wrapping an existing dataset:\n",
+    "\n",
+    "- Apart from the x and y values, the resulting AvalancheDataset will also return a third value: the task label (which defaults to 0).\n",
+    "- The wrapped dataset must contain a valid **targets** field.\n",
+    "\n",
+    "The **targets field** is available is nearly all *torchvision* datasets. It must be a list containing the label for each data point (usually the y value). In this way, Avalanche can use that field when instantiating benchmarks like the \"Class/Task-Incremental* and *Domain-Incremental* ones.\n",
+    "\n",
+    "Avalanche exposes 4 classes of *AvalancheDataset*s which map exactly the 4 *Dataset* classes offered by PyTorch:\n",
+    "- `AvalancheDataset`: the base class, which acts a wrapper to existing *Dataset* instances.\n",
+    "- `AvalancheTensorDataset`: equivalent to PyTorch `TesnsorDataset`.\n",
+    "- `AvalancheSubset`: equivalent to PyTorch `Subset`.\n",
+    "- `AvalancheConcatDataset`: equivalent to PyTorch `ConcatDataset`.\n",
+    "\n",
+    "## 🛠️ Create an AvalancheDataset\n",
+    "Given a dataset (like MNIST), an *AvalancheDataset* can be instantiated as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2acfeea9-e8fe-4370-9a59-e034087b0acd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from avalanche.benchmarks.utils import AvalancheDataset\n",
+    "from torchvision.datasets import MNIST\n",
+    "\n",
+    "# Instantiate the MNIST train dataset from torchvision\n",
+    "mnist_dataset = MNIST('mnist_data', download=True)\n",
+    "\n",
+    "# Create the AvalancheDataset\n",
+    "mnist_avalanche_dataset = AvalancheDataset(mnist_dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f0ffacc-3990-41b9-bc85-cf6f053e8847",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "Just like any other Dataset, a data point can be obtained using the `x, y = dataset[idx]` syntax. **When obtaining a data point from an AvalancheDataset, an additional third value (the task label) will be returned**:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d4069fe0-a458-4308-b018-3f6a9f9193e3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "x=<PIL.Image.Image image mode=L size=28x28 at 0x7F75CDC5B550>, y=5\n",
+      "x=<PIL.Image.Image image mode=L size=28x28 at 0x7F75CDC5B5B0>, y=5, t=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Obtain the first instance from the original dataset\n",
+    "x, y = mnist_dataset[0]\n",
+    "print(f'x={x}, y={y}')\n",
+    "# Output: \"x=<PIL.Image.Image image mode=L size=28x28 at 0x7FBEDFDB2430>, y=5\"\n",
+    "\n",
+    "# Obtain the first instance from the AvalancheDataset\n",
+    "x, y, t = mnist_avalanche_dataset[0]\n",
+    "print(f'x={x}, y={y}, t={t}')\n",
+    "# Output: \"x=<PIL.Image.Image image mode=L size=28x28 at 0x7FBEEFD3A850>, y=5, t=0\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0df2be15-0471-473b-a603-089f41f54fc6",
+   "metadata": {},
+   "source": [
+    "**Useful tip:** if you are not sure if you are dealing with a PyTorch *Dataset* or an *AvalancheDataset*, or if you want to ignore task labels, you can use this syntax:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ae39b667-6a3f-4c89-bdf8-196a42844ec5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can use \"x, y, *_\" to manage both kinds of Datasets\n",
+    "x, y, *_ = mnist_dataset[0]  # OK\n",
+    "x, y, *_ = mnist_avalanche_dataset[0]  # OK"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a5cdbbb-d3db-48f7-b946-d5a3c4151353",
+   "metadata": {},
+   "source": [
+    "## The AvalancheTensorDataset\n",
+    "The PyTorch *TensorDataset* is one of the most useful Dataset classes as it can be used to quickly prototype the data loading part of your code.\n",
+    "\n",
+    "A *TensorDataset* can be wrapped in an AvalancheDataset just like any Dataset, but this is not much convenient, as shown below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8d3e9ca2-b971-4dea-9a80-f7f368e07ed5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "x=tensor([0.8383, 0.1409, 0.7622, 0.6625, 0.6322, 0.1188, 0.7383]), y=4, t=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from torch.utils.data import TensorDataset\n",
+    "\n",
+    "\n",
+    "# Create 10 instances described by 7 features \n",
+    "x_data = torch.rand(10, 7)\n",
+    "\n",
+    "# Create the class labels for the 10 instances\n",
+    "y_data = torch.randint(0, 5, (10,))\n",
+    "\n",
+    "# Create the tensor dataset\n",
+    "tensor_dataset = TensorDataset(x_data, y_data)\n",
+    "\n",
+    "# Wrap it in an AvalancheDataset\n",
+    "wrapped_tensor_dataset = AvalancheDataset(tensor_dataset)\n",
+    "\n",
+    "# Obtain the first instance from the dataset\n",
+    "x, y, t = wrapped_tensor_dataset[0]\n",
+    "print(f'x={x}, y={y}, t={t}')\n",
+    "# Output: \"x=tensor([0.6329, 0.8495, 0.1853, 0.7254, 0.7893, 0.8079, 0.1106]), y=4, t=0\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "669670c9-e3ea-44a2-aecf-40bd8cb9cdb8",
+   "metadata": {},
+   "source": [
+    "**Instead, it is recommended to use the AvalancheTensorDataset** class to get the same result. In this way, you can just skip one intermediate step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ab827b7a-de79-4a38-bfd5-18dd582404aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "x=tensor([0.8383, 0.1409, 0.7622, 0.6625, 0.6322, 0.1188, 0.7383]), y=4, t=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "from avalanche.benchmarks.utils import AvalancheTensorDataset\n",
+    "\n",
+    "# Create the tensor dataset\n",
+    "avl_tensor_dataset = AvalancheTensorDataset(x_data, y_data)\n",
+    "\n",
+    "# Obtain the first instance from the AvalancheTensorDataset\n",
+    "x, y, t = avl_tensor_dataset[0]\n",
+    "print(f'x={x}, y={y}, t={t}')\n",
+    "# Output: \"x=tensor([0.6329, 0.8495, 0.1853, 0.7254, 0.7893, 0.8079, 0.1106]), y=4, t=0\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6f614c68-c0dc-49d9-a3e2-03348e248ca9",
+   "metadata": {},
+   "source": [
+    "In both cases, **AvalancheDataset will automatically populate its _targets_ field by using the values from the second Tensor** (which usually contains the Y values). This behaviour can be customized by passing a custom `targets` constructor parameter (by either passing a list of targets or the index of the Tensor to use).\n",
+    "\n",
+    "The cell below shows the content of the target field of the dataset created in the cell above. Notice that the *targets* field has been filled with the content of the second Tensor (*y\\_data*)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "1f81487c-e05e-4b98-addd-25ef5f7d9746",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "y_data= tensor([4, 0, 2, 2, 1, 1, 0, 0, 0, 1])\n",
+      "targets field= [tensor(4), tensor(0), tensor(2), tensor(2), tensor(1), tensor(1), tensor(0), tensor(0), tensor(0), tensor(1)]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check the targets field\n",
+    "print('y_data=', y_data)\n",
+    " # Output: \"y_data= tensor([4, 3, 3, 2, 0, 1, 3, 3, 3, 2])\"\n",
+    "\n",
+    "print('targets field=', avl_tensor_dataset.targets)\n",
+    "# Output: \"targets field= [tensor(4), tensor(3), tensor(3), tensor(2), \n",
+    "#          tensor(0), tensor(1), tensor(3), tensor(3), tensor(3), tensor(2)]\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62bdf7d2-97be-4e20-9ef5-2b51bebcfecd",
+   "metadata": {},
+   "source": [
+    "## The AvalancheSubset and AvalancheConcatDataset classes\n",
+    "Avalanche offers the `AvalancheSubset` and `AvalancheConcatDataset` implementations that extend the functionalities of PyTorch *Subset* and *ConcatDataset*.\n",
+    "\n",
+    "Regarding the subsetting operation, `AvalancheSubset` behaves in the same way the PyTorch `Subset` class does: both implementations accept a dataset and a list of indices as parameters. The resulting Subset is not a copy of the dataset, it's just a view. This is similar to creating a view of a NumPy array by passing a list of indexes using the `numpy_array[list_of_indices]` syntax. This can be used to both *create a smaller dataset* and to *change the order of data points* in the dataset.\n",
+    "\n",
+    "Here we create a toy dataset in which each X and Y values are *int*s. We then obtain a subset of it by creating an **AvalancheSubset**:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "02efbde8-5216-4595-acab-999c6386845b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The subset contains 4 instances.\n",
+      "x=50, y=10, t=0\n",
+      "x=55, y=15, t=0\n",
+      "x=58, y=18, t=0\n",
+      "x=52, y=12, t=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "from avalanche.benchmarks.utils import AvalancheSubset\n",
+    "\n",
+    "# Define the X values of 10 instances (each instance is an int)\n",
+    "x_data_toy = [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]\n",
+    "\n",
+    "# Define the class labels for the 10 instances\n",
+    "y_data_toy = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n",
+    "\n",
+    "# Create  the tensor dataset\n",
+    "# Note: AvalancheSubset can also be applied to PyTorch TensorDataset directly!\n",
+    "# However, note that PyTorch TensorDataset doesn't support Python lists...\n",
+    "# ... (it only supports Tensors) while AvalancheTensorDataset does.\n",
+    "toy_dataset = AvalancheTensorDataset(x_data_toy, y_data_toy) \n",
+    "\n",
+    "# Define the indices for the subset\n",
+    "# Here we want to obtain a subset containing only the data points...\n",
+    "# ... at indices 0, 5, 8, 2 (in this specific order)\n",
+    "subset_indices = [0, 5, 8, 2]\n",
+    "\n",
+    "# Create the subset\n",
+    "avl_subset = AvalancheSubset(toy_dataset, indices=subset_indices)\n",
+    "print('The subset contains', len(avl_subset), 'instances.')\n",
+    "# Output: \"The subset contains 4 instances.\"\n",
+    "\n",
+    "# Obtain instances from the AvalancheSubset\n",
+    "for x, y, t in avl_subset:\n",
+    "    print(f'x={x}, y={y}, t={t}')\n",
+    "# Output:\n",
+    "# x=50, y=10, t=0\n",
+    "# x=55, y=15, t=0\n",
+    "# x=58, y=18, t=0\n",
+    "# x=52, y=12, t=0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ccde422-314d-4c6a-9d36-3bfd2e43d3c3",
+   "metadata": {},
+   "source": [
+    "Concatenation is even simpler. Just like with PyTorch *ConcatDataset*, one can easily concatentate datasets with **AvalancheConcatDataset**.\n",
+    "\n",
+    "Both *AvalancheConcatDataset* and PyTorch *ConcatDataset* accept a list of datasets to concatenate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fdf11bef-17cf-499d-8407-3c7c4e9f0a0b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The concat dataset contains 10 instances.\n",
+      "x=50, y=10, t=0\n",
+      "x=51, y=11, t=0\n",
+      "x=52, y=12, t=0\n",
+      "x=53, y=13, t=0\n",
+      "x=54, y=14, t=0\n",
+      "x=60, y=20, t=0\n",
+      "x=61, y=21, t=0\n",
+      "x=62, y=22, t=0\n",
+      "x=63, y=23, t=0\n",
+      "x=64, y=24, t=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "from avalanche.benchmarks.utils import AvalancheConcatDataset\n",
+    "\n",
+    "# Define the 2 datasets to be concatenated\n",
+    "x_data_toy_1 = [50, 51, 52, 53, 54]\n",
+    "y_data_toy_1 = [10, 11, 12, 13, 14]\n",
+    "x_data_toy_2 = [60, 61, 62, 63, 64]\n",
+    "y_data_toy_2 = [20, 21, 22, 23, 24]\n",
+    "\n",
+    "# Create the datasets\n",
+    "toy_dataset_1 = AvalancheTensorDataset(x_data_toy_1, y_data_toy_1) \n",
+    "toy_dataset_2 = AvalancheTensorDataset(x_data_toy_2, y_data_toy_2) \n",
+    "\n",
+    "# Create the concat dataset\n",
+    "avl_concat = AvalancheConcatDataset([toy_dataset_1, toy_dataset_2])\n",
+    "print('The concat dataset contains', len(avl_concat), 'instances.')\n",
+    "# Output: \"The concat dataset contains 10 instances.\"\n",
+    "\n",
+    "# Obtain instances from the AvalancheConcatDataset\n",
+    "for x, y, t in avl_concat:\n",
+    "    print(f'x={x}, y={y}, t={t}')\n",
+    "# Output:\n",
+    "# x=51, y=11, t=0\n",
+    "# x=52, y=12, t=0\n",
+    "# x=53, y=13, t=0\n",
+    "# x=54, y=14, t=0\n",
+    "# x=60, y=20, t=0\n",
+    "# x=61, y=21, t=0\n",
+    "# x=62, y=22, t=0\n",
+    "# x=63, y=23, t=0\n",
+    "# x=64, y=24, t=0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2901ba6e-7653-4356-852b-e402693897ca",
+   "metadata": {},
+   "source": [
+    "## Dataset Creation wrap-up\n",
+    "This *Mini How-To* showed you how to **create instances of AvalancheDataset (and its subclasses)**.\n",
+    "\n",
+    "Other *Mini How-To*s will guide you through the functionalities offered by AvalancheDataset. The list of *Mini How-To*s can be found [here](https://avalanche.continualai.org/how-tos/avalanchedataset)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18a9baff-7fab-4757-b155-fdb388bad8c9",
+   "metadata": {},
+   "source": [
+    "## 🤝 Run it on Google Colab\n",
+    "\n",
+    "You can run _this chapter_ and play with it on Google Colaboratory by clicking here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/how-tos/avalanchedataset/creating-avalanchedatasets.ipynb)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/how-tos/avalanchedataset/preamble-pytorch-datasets.ipynb b/notebooks/how-tos/avalanchedataset/preamble-pytorch-datasets.ipynb
new file mode 100644
index 000000000..93b75136f
--- /dev/null
+++ b/notebooks/how-tos/avalanchedataset/preamble-pytorch-datasets.ipynb
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ccdb4767",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "---\n",
+    "description: Few words about PyTorch Datasets\n",
+    "---\n",
+    "\n",
+    "# Preamble: PyTorch Datasets\n",
+    "This short preamble will briefly go through the basic notions of Dataset offered natively by PyTorch. A solid grasp of these notions are needed to understand:\n",
+    "1. How PyTorch data loading works in general\n",
+    "2. How AvalancheDatasets differs from PyTorch Datasets\n",
+    "\n",
+    "## 📚 Dataset: general definition\n",
+    "\n",
+    "In PyTorch, **a `Dataset` is a class** exposing two methods:\n",
+    "- `__len__()`, which returns the amount of instances in the dataset (as an `int`). \n",
+    "- `__getitem__(idx)`, which returns the data point at index `idx`.\n",
+    "\n",
+    "In other words, a Dataset instance is just an object for which, similarly to a list, one can simply:\n",
+    "- Obtain its length using the Python `len(dataset)` function.\n",
+    "- Obtain a single data point using the `x, y = dataset[idx]` syntax.\n",
+    "\n",
+    "The content of the dataset can be either loaded in memory when the dataset is instantiated (like the torchvision MNIST dataset does) or, for big datasets like ImageNet, the content is kept on disk, with the dataset keeping the list of files in an internal field. In this case, data is loaded from the storage on-the-fly when `__getitem__(idx)` is called. The way those things are managed is specific to each dataset implementation.\n",
+    "\n",
+    "## PyTorch Datasets\n",
+    "The PyTorch library offers 4 Dataset implementations:\n",
+    "- `Dataset`: an interface defining the `__len__` and `__getitem__` methods.\n",
+    "- `TensorDataset`: instantiated by passing X and Y tensors. Each row of the X and Y tensors is interpreted as a data point. The `__getitem__(idx)` method will simply return the `idx`-th row of X and Y tensors.\n",
+    "- `ConcatDataset`: instantiated by passing a list of datasets. The resulting dataset is a concatenation of those datasets.\n",
+    "- `Subset`: instantiated by passing a dataset and a list of indices. The resulting dataset will only contain the data points described by that list of indices.\n",
+    "\n",
+    "As explained in the mini *How-To*s, Avalanche offers a customized version for all these 4 datasets.\n",
+    "\n",
+    "## Transformations\n",
+    "Most datasets from the *torchvision* libraries (as well as datasets found \"in the wild\") allow for a `transformation` function to be passed to the dataset constructor. The support for transformations is not mandatory for a dataset, but it is quite common to support them. The transformation is used to process the X value of a data point before returning it. This is used to normalize values, apply augmentations, etcetera.\n",
+    "\n",
+    "As explained in the mini *How-To*s, the `AvalancheDataset` class implements a very rich and powerful set of functionalities for managing transformations.\n",
+    "\n",
+    "## Quick note on the IterableDataset class\n",
+    "A variation of the standard `Dataset` exist in PyTorch: the [IterableDataset](https://pytorch.org/docs/stable/data.html#iterable-style-datasets). When using an `IterableDataset`, one can load the data points in a sequential way only (by using a tape-alike approach). The `dataset[idx]` syntax and `len(dataset)` function are not allowed. **Avalanche does NOT support `IterableDataset`s.** You shouldn't worry about this because, realistically, you will never encounter such datasets.\n",
+    "\n",
+    "## DataLoader\n",
+    "The `Dataset` is a very simple object that only returns one data point given its index. In order to create minibatches and speed-up the data loading process, a `DataLoader` is required.\n",
+    "\n",
+    "The PyTorch `DataLoader` class is a very efficient mechanism that, given a `Dataset`, will return **minibatches** by optonally **shuffling** data brefore each epoch and by **loading data in parallel** by using multiple workers.\n",
+    "\n",
+    "## Preamble wrap-up\n",
+    "To wrap-up, let's see how the native, *non-Avalanche*, PyTorch components work in practice. In the following code we create a `TensorDataset` and then we load it in minibatches using a `DataLoader`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8bce4be3-91ef-4816-9a3f-5c392ef05027",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n",
+      "Loaded minibatch of 10 instances\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from torch.utils.data.dataset import TensorDataset\n",
+    "from torch.utils.data.dataloader import DataLoader\n",
+    "\n",
+    "# Create a dataset of 100 data points described by 22 features + 1 class label\n",
+    "x_data = torch.rand(100, 22)\n",
+    "y_data = torch.randint(0, 5, (100,))\n",
+    "\n",
+    "# Create the Dataset\n",
+    "my_dataset = TensorDataset(x_data, y_data)\n",
+    "\n",
+    "# Create the DataLoader\n",
+    "my_dataloader = DataLoader(my_dataset, batch_size=10, shuffle=True, num_workers=4)\n",
+    "\n",
+    "# Run one epoch\n",
+    "for x_minibatch, y_minibatch in my_dataloader:\n",
+    "    print('Loaded minibatch of', len(x_minibatch), 'instances')\n",
+    "# Output: \"Loaded minibatch of 10 instances\" x10 times"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93f6fdec-f1d0-4cdc-a6e0-6e7c0a6b3be7",
+   "metadata": {},
+   "source": [
+    "## Next steps\n",
+    "With these notions in mind, you can start start your journey on understanding the functionalities offered by the AvalancheDatasets by going through the *Mini How-To*s.\n",
+    "\n",
+    "Please refer to the [list of the *Mini How-To*s regarding AvalancheDatasets](https://avalanche.continualai.org/how-tos/avalanchedataset) for a complete list. It is recommended to start with the **\"Creating AvalancheDatasets\"** *Mini How-To*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "02de0ce4-2711-4832-8b0e-516040483ae5",
+   "metadata": {},
+   "source": [
+    "## 🤝 Run it on Google Colab\n",
+    "\n",
+    "You can run _this chapter_ and play with it on Google Colaboratory by clicking here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ContinualAI/avalanche/blob/master/notebooks/how-tos/avalanchedataset/preamble-pytorch-datasets.ipynb)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/how-tos/dataloading_buffers_replay.ipynb b/notebooks/how-tos/dataloading_buffers_replay.ipynb
index d2a04cf3b..b14515f46 100644
--- a/notebooks/how-tos/dataloading_buffers_replay.ipynb
+++ b/notebooks/how-tos/dataloading_buffers_replay.ipynb
@@ -9,6 +9,9 @@
     }
    },
    "source": [
+    "---\n",
+    "description: How to implement replay and data loading\n",
+    "---\n",
     "# Dataloading, Memory Buffers, and Replay\n",
     "\n",
     "Avalanche provides several components that help you to balance data loading and implement rehearsal strategies.\n",
@@ -17,27 +20,42 @@
     "\n",
     "**Buffers** are used to store data from the previous experiences. They are dynamic datasets with a fixed maximum size, and they can be updated with new data continuously.\n",
     "\n",
-    "Finally, **Replay** strategies implement rehearsal by using Avalanche's plugin system. Most rehearsal strategies use a custom dataloader to balance the buffer with the current experience and a buffer that is updated for each experience."
+    "Finally, **Replay** strategies implement rehearsal by using Avalanche's plugin system. Most rehearsal strategies use a custom dataloader to balance the buffer with the current experience and a buffer that is updated for each experience.\n",
+    "\n",
+    "First, let's install Avalanche. You can skip this step if you have installed it already."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install git+https://github.com/ContinualAI/avalanche.git"
    ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
    "source": [
     "## Dataloaders\n",
     "Avalanche dataloaders are simple iterators, located under `avalanche.benchmarks.utils.data_loader`. Their interface is equivalent to pytorch's dataloaders. For example, `GroupBalancedDataLoader` takes a sequence of datasets and iterates over them by providing balanced mini-batches, where the number of samples is split equally among groups. Internally, it instantiate a `DataLoader` for each separate group. More specialized dataloaders exist such as `TaskBalancedDataLoader`.\n",
     "\n",
     "All the dataloaders accept keyword arguments (`**kwargs`) that are passed directly to the dataloaders for each group."
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -56,16 +74,15 @@
     "for x, y, t in dl:\n",
     "    print(t.tolist())\n",
     "    break"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
    "source": [
     "## Memory Buffers\n",
     "Memory buffers store data up to a maximum capacity, and they implement policies to select which data to store and which the to remove when the buffer is full. They are available in the module `avalanche.training.storage_policy`. The base class is the `ExemplarsBuffer`, which implements two methods:\n",
@@ -73,17 +90,16 @@
     "- `resize(strategy, new_size)` - updates the maximum size and updates the buffer accordingly.\n",
     "\n",
     "The data can be access using the attribute `buffer`."
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 29,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -101,31 +117,29 @@
     "storage_p = ReservoirSamplingBuffer(max_size=30)\n",
     "\n",
     "print(f\"Max buffer size: {storage_p.max_size}, current size: {len(storage_p.buffer)}\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "At first, the buffer is empty. We can update it with data from a new experience.\n",
-    "\n",
-    "Notice that we use a `SimpleNamespace` because we want to use the buffer standalone, without instantiating an Avalanche strategy. Reservoir sampling requires only the `experience` from the strategy's state."
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "At first, the buffer is empty. We can update it with data from a new experience.\n",
+    "\n",
+    "Notice that we use a `SimpleNamespace` because we want to use the buffer standalone, without instantiating an Avalanche strategy. Reservoir sampling requires only the `experience` from the strategy's state."
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 30,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -155,31 +169,29 @@
     "    storage_p.update(strategy_state)\n",
     "    print(f\"Max buffer size: {storage_p.max_size}, current size: {len(storage_p.buffer)}\")\n",
     "    print(f\"class targets: {storage_p.buffer.targets}\\n\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "Notice after each update some samples are substituted with new data. Reservoir sampling select these samples randomly.\n",
-    "\n",
-    "Avalanche offers many more storage policies. For example, `ParametricBuffer` is a buffer split into several groups according to the `groupby` parameters (`None`, 'class', 'task', 'experience'), and according to an optional `ExemplarsSelectionStrategy` (random selection is the default choice)."
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "Notice after each update some samples are substituted with new data. Reservoir sampling select these samples randomly.\n",
+    "\n",
+    "Avalanche offers many more storage policies. For example, `ParametricBuffer` is a buffer split into several groups according to the `groupby` parameters (`None`, 'class', 'task', 'experience'), and according to an optional `ExemplarsSelectionStrategy` (random selection is the default choice)."
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 31,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -218,29 +230,27 @@
     "    storage_p.update(strategy_state)\n",
     "    print(f\"Max buffer size: {storage_p.max_size}, current size: {len(storage_p.buffer)}\")\n",
     "    print(f\"class targets: {storage_p.buffer.targets}\\n\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "The advantage of using grouping buffers is that you get a balanced rehearsal buffer. You can even access the groups separately with the `buffer_groups` attribute. Combined with balanced dataloaders, you can ensure that the mini-batches stay balanced during training."
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "The advantage of using grouping buffers is that you get a balanced rehearsal buffer. You can even access the groups separately with the `buffer_groups` attribute. Combined with balanced dataloaders, you can ensure that the mini-batches stay balanced during training."
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 34,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -262,17 +272,16 @@
    "source": [
     "for k, v in storage_p.buffer_groups.items():\n",
     "    print(f\"(group {k}) -> size {len(v.buffer)}\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 35,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -289,31 +298,29 @@
     "for x, y, t in dl:\n",
     "    print(y.tolist())\n",
     "    break"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "## Replay Plugins\n",
-    "\n",
-    "Avalanche's strategy plugins can be used to update the rehearsal buffer and set the dataloader. This allows to easily implement replay strategies:"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "## Replay Plugins\n",
+    "\n",
+    "Avalanche's strategy plugins can be used to update the rehearsal buffer and set the dataloader. This allows to easily implement replay strategies:"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 36,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [],
    "source": [
     "from avalanche.benchmarks.utils.data_loader import ReplayDataLoader\n",
@@ -350,29 +357,27 @@
     "        \"\"\"\n",
     "        print(\"Buffer update.\")\n",
     "        self.storage_policy.update(strategy, **kwargs)\n"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "And of course, we can use the plugin to train our continual model"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "And of course, we can use the plugin to train our continual model"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 38,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -588,34 +593,28 @@
     "\n",
     "    print('Computing accuracy on the whole test set')\n",
     "    results.append(cl_strategy.eval(scenario.test_stream))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "name": "avalanche-env",
+   "display_name": "Python 3",
    "language": "python",
-   "display_name": "Python (avalanche-env)"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ "nbformat_minor": 1
+}
diff --git a/setup.py b/setup.py
index 24f88ea88..a7b4f120c 100644
--- a/setup.py
+++ b/setup.py
@@ -51,5 +51,7 @@ def get_version(rel_path):
         'torch',
         'torchvision',
         'gdown',
+        'ctrl-benchmark',
+        'setuptools<=59.5.0'
     ]
 )
diff --git a/tests/target_metrics/__init__.py b/tests/evaluation/__init__.py
similarity index 100%
rename from tests/target_metrics/__init__.py
rename to tests/evaluation/__init__.py
diff --git a/tests/evaluation/test_image_samples.py b/tests/evaluation/test_image_samples.py
new file mode 100644
index 000000000..626b58bf9
--- /dev/null
+++ b/tests/evaluation/test_image_samples.py
@@ -0,0 +1,113 @@
+import unittest
+from unittest.mock import MagicMock
+
+from torch.utils.data import DataLoader
+from torchvision.transforms import Resize, Compose, ToTensor
+from torchvision.utils import save_image
+
+from avalanche.benchmarks import SplitMNIST
+from avalanche.benchmarks.utils import AvalancheTensorDataset
+from avalanche.evaluation.metrics import ImagesSamplePlugin
+
+
+class ImageSamplesTests(unittest.TestCase):
+    def test_image_samples(args):
+        p_metric = ImagesSamplePlugin(
+            n_cols=5,
+            n_rows=5,
+            group=True,
+            mode="train"
+        )
+
+        scenario = SplitMNIST(5)
+        curr_exp = scenario.train_stream[0]
+        curr_dataset = curr_exp.dataset
+        strategy_mock = MagicMock(
+            eval_mb_size=32,
+            experience=curr_exp,
+            adapted_dataset=curr_dataset
+        )
+
+        mval = p_metric.after_train_dataset_adaptation(strategy_mock)
+        img_grid = mval[0].value.image
+
+        # save_image(img_grid, './logs/test_image_grid.png')
+
+    def test_tensor_samples(args):
+        p_metric = ImagesSamplePlugin(
+            n_cols=5,
+            n_rows=5,
+            group=True,
+            mode="train"
+        )
+
+        scenario = SplitMNIST(5)
+        curr_exp = scenario.train_stream[0]
+        for mb in DataLoader(curr_exp.dataset, batch_size=32):
+            break
+        curr_dataset = AvalancheTensorDataset(*mb[:2], targets=mb[1])
+
+        strategy_mock = MagicMock(
+            eval_mb_size=32,
+            experience=curr_exp,
+            adapted_dataset=curr_dataset
+        )
+
+        mval = p_metric.after_train_dataset_adaptation(strategy_mock)
+        img_grid = mval[0].value.image
+
+        # save_image(img_grid, './logs/test_tensor_grid.png')
+
+    def test_samples_augmentations(args):
+        scenario = SplitMNIST(5)
+        curr_exp = scenario.train_stream[0]
+
+        # we use a ReSize transform because it's easy to detect if it's been
+        # applied without looking at the image.
+        curr_dataset = curr_exp.dataset.replace_transforms(
+            transform=Compose([Resize(8), ToTensor()]), target_transform=None
+        )
+
+        ##########################################
+        # WITH AUGMENTATIONS
+        ##########################################
+        p_metric = ImagesSamplePlugin(
+            n_cols=5,
+            n_rows=5,
+            group=True,
+            mode="train",
+            disable_augmentations=False
+        )
+
+        strategy_mock = MagicMock(
+            eval_mb_size=32,
+            experience=curr_exp,
+            adapted_dataset=curr_dataset
+        )
+
+        mval = p_metric.after_train_dataset_adaptation(strategy_mock)
+        img_grid = mval[0].value.image
+        assert img_grid.shape == (3, 52, 52)
+        # save_image(img_grid, './logs/test_image_with_aug.png')
+
+        ##########################################
+        # WITHOUT AUGMENTATIONS
+        ##########################################
+        p_metric = ImagesSamplePlugin(
+            n_cols=5,
+            n_rows=5,
+            group=True,
+            mode="train",
+            disable_augmentations=True
+        )
+
+        strategy_mock = MagicMock(
+            eval_mb_size=32,
+            experience=curr_exp,
+            adapted_dataset=curr_dataset
+        )
+
+        mval = p_metric.after_train_dataset_adaptation(strategy_mock)
+        img_grid = mval[0].value.image
+        assert img_grid.shape == (3, 152, 152)
+        # save_image(img_grid, './logs/test_image_with_aug.png')
diff --git a/tests/target_metrics/mt.pickle b/tests/target_metrics/mt.pickle
index 6d47b4d01..53f02c071 100644
Binary files a/tests/target_metrics/mt.pickle and b/tests/target_metrics/mt.pickle differ
diff --git a/tests/target_metrics/sit.pickle b/tests/target_metrics/sit.pickle
index fe28219c9..9c13709c3 100644
Binary files a/tests/target_metrics/sit.pickle and b/tests/target_metrics/sit.pickle differ
diff --git a/tests/target_metrics/tpp.pickle b/tests/target_metrics/tpp.pickle
index c56f55574..b02386526 100644
Binary files a/tests/target_metrics/tpp.pickle and b/tests/target_metrics/tpp.pickle differ
diff --git a/tests/test_avalanche_dataset.py b/tests/test_avalanche_dataset.py
index 81f4c736e..f5e0f2da5 100644
--- a/tests/test_avalanche_dataset.py
+++ b/tests/test_avalanche_dataset.py
@@ -23,7 +23,7 @@
     create_generic_benchmark_from_tensor_lists
 from avalanche.benchmarks.utils import AvalancheDataset, \
     AvalancheSubset, AvalancheConcatDataset, AvalancheDatasetType, \
-    AvalancheTensorDataset
+    AvalancheTensorDataset, concat_datasets_sequentially
 from avalanche.benchmarks.utils.dataset_utils import ConstantSequence
 from avalanche.training.utils import load_all_dataset
 import random
@@ -1325,6 +1325,41 @@ def test_avalanche_avalanche_subset_concat_stack_overflow(self):
         self.assertTrue(torch.equal(tensor_t,
                                     leaf[d_sz*dataset_hierarchy_depth:][2]))
 
+    def test_avalanche_concat_datasets_sequentially(self):
+        # create list of training datasets
+        train = [AvalancheDataset(TensorDataset(torch.randn(20, 10),
+                                                torch.randint(0, 2, (20,)))),
+                 AvalancheDataset(TensorDataset(torch.randn(20, 10),
+                                                torch.randint(2, 4, (20,)))),
+                 AvalancheDataset(TensorDataset(torch.randn(20, 10),
+                                                torch.randint(4, 6, (20,)))),
+                 AvalancheDataset(TensorDataset(torch.randn(20, 10),
+                                                torch.randint(0, 2, (20,))))]
+
+        # create list of test datasets
+        test = [AvalancheDataset(TensorDataset(torch.randn(20, 10), 
+                                               torch.randint(0, 2, (20,)))),
+                AvalancheDataset(TensorDataset(torch.randn(20, 10), 
+                                               torch.randint(2, 4, (20,)))),
+                AvalancheDataset(TensorDataset(torch.randn(20, 10), 
+                                               torch.randint(4, 6, (20,)))),
+                AvalancheDataset(TensorDataset(torch.randn(20, 10), 
+                                               torch.randint(0, 2, (20,))))]
+
+        # concatenate datasets
+        final_train, _, classes = concat_datasets_sequentially(train, test)
+
+        # merge all classes into a single list
+        classes_all = []
+        for class_list in classes:
+            classes_all.extend(class_list)
+
+        # get the target set of classes 
+        target_classes = list(set(map(int, final_train.targets)))
+
+        # test for correctness
+        self.assertEqual(classes_all, target_classes)
+
 
 class TransformationSubsetTests(unittest.TestCase):
     def test_avalanche_subset_transform(self):
diff --git a/tests/test_ctrl.py b/tests/test_ctrl.py
new file mode 100644
index 000000000..d3720ba6f
--- /dev/null
+++ b/tests/test_ctrl.py
@@ -0,0 +1,69 @@
+import unittest
+from pathlib import Path
+
+from tempfile import TemporaryDirectory
+import torch
+
+from avalanche.benchmarks.classic.ctrl import CTrL
+
+
+def custom_equals(item, other) -> bool:
+    """
+    Helper function allowing to test if two items are equal.
+    The function is called recursively if the items are lists or tuples and
+     it uses `torch.equal` if the two items to compare are Tensors.
+    """
+    if type(item) != type(other):
+        return False
+    if isinstance(item, (tuple, list)):
+        if len(item) != len(other):
+            return False
+        return all(custom_equals(*elts) for elts in zip(item, other))
+    if isinstance(item, torch.Tensor):
+        return torch.equal(item, other)
+    return item == other
+
+
+class CTrLTests(unittest.TestCase):
+    stream_lengths = dict(
+        s_plus=6,
+        s_minus=6,
+        s_in=6,
+        s_out=6,
+        s_pl=5,
+    )
+
+    long_stream_lengths = [8, 15]
+
+    def test_length(self):
+        for stream, length in self.stream_lengths.items():
+            with self.subTest(stream=stream, length=length):
+                bench = CTrL(stream)
+                self.assertEqual(length, bench.n_experiences)
+
+    def test_length_long(self):
+        for n_tasks in self.long_stream_lengths:
+            with self.subTest(n_tasks=n_tasks), TemporaryDirectory() as tmp:
+                bench = CTrL('s_long', save_to_disk=True, path=Path(tmp),
+                             n_tasks=n_tasks)
+                self.assertEqual(n_tasks, bench.n_experiences)
+
+    def test_n_tasks_param(self):
+        for stream in self.stream_lengths.keys():
+            with self.subTest(stream=stream):
+                with self.assertRaises(ValueError):
+                    CTrL(stream, n_tasks=3)
+
+        with self.subTest(stream='s_long'):
+            CTrL('s_long', n_tasks=3)
+
+    def test_determinism(self):
+        for stream in self.stream_lengths.keys():
+            with self.subTest(stream=stream):
+                bench_1 = CTrL(stream, seed=1)
+                bench_2 = CTrL(stream, seed=1)
+
+                for exp1, exp2 in zip(bench_1.train_stream,
+                                      bench_2.train_stream):
+                    for sample1, sample2 in zip(exp1.dataset, exp2.dataset):
+                        self.assertTrue(custom_equals(sample1, sample2))
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 0e2079a8e..a560983d0 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -306,9 +306,13 @@ def metric_check(self, name):
                 self.assertTrue(el >= init)
                 init = el
             for el, elref in zip(v[0], vref[0]):
-                self.assertEqual(el, elref)
+                emsg = f"wrong timestep for {kref} (Expected={elref}, " \
+                       f"Actual={el})."
+                self.assertEqual(el, elref, msg=emsg)
             for el, elref in zip(v[1], vref[1]):
-                self.assertAlmostEqual(el, elref, delta=DELTA)
+                emsg = f"wrong value for {kref} (Expected={elref}, " \
+                       f"Actual={el})."
+                self.assertAlmostEqual(el, elref, delta=DELTA, msg=emsg)
 
     def test_accuracy(self):
         self.metric_check('Acc')
@@ -432,9 +436,13 @@ def metric_check(self, name):
                 self.assertTrue(el >= init)
                 init = el
             for el, elref in zip(v[0], vref[0]):
-                self.assertEqual(el, elref)
+                emsg = f"wrong value for {kref} (Expected={elref}," \
+                        " Actual={el})."
+                self.assertEqual(el, elref, msg=emsg)
             for el, elref in zip(v[1], vref[1]):
-                self.assertAlmostEqual(el, elref, delta=DELTA)
+                emsg = f"wrong value for {kref} (Expected={elref},"\
+                        " Actual={el})."
+                self.assertAlmostEqual(el, elref, delta=DELTA, msg=emsg)
 
     def test_accuracy(self):
         self.metric_check('Acc')
@@ -534,7 +542,7 @@ def setUpClass(cls) -> None:
             collect_all=True)  # collect all metrics (set to True by default)
         cl_strategy = BaseStrategy(
             model, SGD(model.parameters(), lr=0.001, momentum=0.9),
-            CrossEntropyLoss(), train_mb_size=2, train_epochs=2,
+            CrossEntropyLoss(), train_mb_size=4, train_epochs=2,
             eval_mb_size=2, device=DEVICE,
             evaluator=eval_plugin, eval_every=1)
         for i, experience in enumerate(benchmark.train_stream):
diff --git a/tests/training/test_plugins.py b/tests/training/test_plugins.py
index 9877ec00f..1fa974369 100644
--- a/tests/training/test_plugins.py
+++ b/tests/training/test_plugins.py
@@ -1,18 +1,24 @@
+import itertools
 import sys
 
 import torch
+from torch import nn
 import unittest
 from sklearn.datasets import make_classification
 from sklearn.model_selection import train_test_split
 
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
-from torch.optim.lr_scheduler import MultiStepLR
+from torch.optim.lr_scheduler import MultiStepLR, ReduceLROnPlateau
 from torch.utils.data import TensorDataset
+from torch.utils.data.dataloader import DataLoader
 
-from avalanche.benchmarks import nc_benchmark
+from avalanche.benchmarks import nc_benchmark, GenericCLScenario, \
+    benchmark_with_validation_stream
+from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader
+from avalanche.evaluation.metrics import Mean
 from avalanche.logging import TextLogger
-from avalanche.models import SimpleMLP
+from avalanche.models import BaseModel
 from avalanche.training.plugins import StrategyPlugin
 from avalanche.training.plugins.lr_scheduling import LRSchedulerPlugin
 from avalanche.training.strategies import Naive
@@ -95,10 +101,10 @@ class PluginTests(unittest.TestCase):
     def test_callback_reachability(self):
         # Check that all the callbacks are called during
         # training and test loops.
-        model = SimpleMLP(input_size=6, hidden_size=10)
+        model = _PlainMLP(input_size=6, hidden_size=10)
         optimizer = SGD(model.parameters(), lr=1e-3)
         criterion = CrossEntropyLoss()
-        benchmark = self.create_benchmark()
+        benchmark = PluginTests.create_benchmark()
 
         plug = MockPlugin()
         strategy = Naive(model, optimizer, criterion,
@@ -110,19 +116,21 @@ def test_callback_reachability(self):
         strategy.eval([benchmark.test_stream[0]], num_workers=4)
         assert all(plug.activated)
 
-    def create_benchmark(self, task_labels=False):
+    @staticmethod
+    def create_benchmark(task_labels=False, seed=None):
         n_samples_per_class = 20
 
         dataset = make_classification(
             n_samples=10 * n_samples_per_class,
             n_classes=10,
-            n_features=6, n_informative=6, n_redundant=0)
+            n_features=6, n_informative=6, n_redundant=0,
+            random_state=seed)
 
         X = torch.from_numpy(dataset[0]).float()
         y = torch.from_numpy(dataset[1]).long()
 
         train_X, test_X, train_y, test_y = train_test_split(
-            X, y, train_size=0.6, shuffle=True, stratify=y)
+            X, y, train_size=0.6, shuffle=True, stratify=y, random_state=seed)
 
         train_dataset = TensorDataset(train_X, train_y)
         test_dataset = TensorDataset(test_X, test_y)
@@ -131,75 +139,407 @@ def create_benchmark(self, task_labels=False):
                             fixed_class_order=list(range(10)))
 
     def test_scheduler_plugin(self):
-        self._test_scheduler_plugin(gamma=1 / 2.,
-                                    milestones=[2, 3],
-                                    base_lr=4.,
-                                    epochs=3,
-                                    reset_lr=True,
-                                    reset_scheduler=True,
-                                    expected=[[4., 2., 1.],
-                                              [4., 2., 1.]],
-                                    )
-
-        self._test_scheduler_plugin(gamma=1 / 2.,
-                                    milestones=[2, 3],
-                                    base_lr=4.,
-                                    epochs=3,
-                                    reset_lr=False,
-                                    reset_scheduler=True,
-                                    expected=[[4., 2., 1.],
-                                              [1., .5, .25]],
-                                    )
-
-        self._test_scheduler_plugin(gamma=1 / 2.,
-                                    milestones=[2, 3],
-                                    base_lr=4.,
-                                    epochs=3,
-                                    reset_lr=True,
-                                    reset_scheduler=False,
-                                    expected=[[4., 2., 1.],
-                                              [4., 4., 4.]],
-                                    )
-
-        self._test_scheduler_plugin(gamma=1 / 2.,
-                                    milestones=[2, 3],
-                                    base_lr=4.,
-                                    epochs=3,
-                                    reset_lr=False,
-                                    reset_scheduler=False,
-                                    expected=[[4., 2., 1.],
-                                              [1., 1., 1.]],
-                                    )
-
-    def _test_scheduler_plugin(self, gamma, milestones, base_lr, epochs,
-                               reset_lr, reset_scheduler, expected):
-
-        class TestPlugin(StrategyPlugin):
-            def __init__(self, expected_lrs):
-                super().__init__()
-                self.expected_lrs = expected_lrs
-
-            def after_training_epoch(self, strategy, **kwargs):
-                exp_id = strategy.clock.train_exp_counter
-                curr_epoch = strategy.clock.train_exp_epochs
-                expected_lr = self.expected_lrs[exp_id][curr_epoch]
-                for group in strategy.optimizer.param_groups:
-                    assert group['lr'] == expected_lr
-
-        benchmark = self.create_benchmark()
-        model = SimpleMLP(input_size=6, hidden_size=10)
-
+        PluginTests._test_scheduler_multi_step_lr_plugin(
+            gamma=1 / 2.,
+            milestones=[2, 3],
+            base_lr=4.,
+            epochs=3,
+            reset_lr=True,
+            reset_scheduler=True,
+            expected=[[4., 2., 1.], [4., 2., 1.]])
+
+        PluginTests._test_scheduler_multi_step_lr_plugin(
+            gamma=1 / 2.,
+            milestones=[2, 3],
+            base_lr=4.,
+            epochs=3,
+            reset_lr=False,
+            reset_scheduler=True,
+            expected=[[4., 2., 1.], [1., .5, .25]])
+
+        PluginTests._test_scheduler_multi_step_lr_plugin(
+            gamma=1 / 2.,
+            milestones=[2, 3],
+            base_lr=4.,
+            epochs=3,
+            reset_lr=True,
+            reset_scheduler=False,
+            expected=[[4., 2., 1.], [4., 4., 4.]])
+
+        PluginTests._test_scheduler_multi_step_lr_plugin(
+            gamma=1 / 2.,
+            milestones=[2, 3],
+            base_lr=4.,
+            epochs=3,
+            reset_lr=False,
+            reset_scheduler=False,
+            expected=[[4., 2., 1.], [1., 1., 1.]])
+
+    @staticmethod
+    def _test_scheduler_multi_step_lr_plugin(
+            gamma, milestones, base_lr, epochs,
+            reset_lr, reset_scheduler, expected):
+
+        benchmark = PluginTests.create_benchmark()
+        model = _PlainMLP(input_size=6, hidden_size=10)
         optim = SGD(model.parameters(), lr=base_lr)
-        lrSchedulerPlugin = LRSchedulerPlugin(
-            MultiStepLR(optim, milestones=milestones, gamma=gamma),
-            reset_lr=reset_lr, reset_scheduler=reset_scheduler)
+        scheduler = MultiStepLR(optim, milestones=milestones, gamma=gamma)
+
+        PluginTests._test_scheduler_plugin(
+            benchmark, model, optim, scheduler,
+            epochs, reset_lr, reset_scheduler, expected)
+
+    def assert_model_equals(self, model1, model2):
+        dict1 = model1.state_dict()
+        dict2 = model2.state_dict()
+
+        # compare keys
+        self.assertSetEqual(set(dict1.keys()), set(dict2.keys()))
+
+        # compare params
+        for (k, v) in dict1.items():
+            self.assertTrue(torch.equal(v, dict2[k]))
+
+    def assert_benchmark_equals(
+            self,
+            bench1: GenericCLScenario,
+            bench2: GenericCLScenario):
+        self.assertSetEqual(set(bench1.streams.keys()),
+                            set(bench2.streams.keys()))
+
+        for stream_name in list(bench1.streams.keys()):
+            for exp1, exp2 in zip(bench1.streams[stream_name],
+                                  bench2.streams[stream_name]):
+                dataset1 = exp1.dataset
+                dataset2 = exp2.dataset
+                for t_idx in range(3):
+                    dataset1_content = dataset1[:][t_idx]
+                    dataset2_content = dataset2[:][t_idx]
+                    self.assertTrue(torch.equal(dataset1_content,
+                                                dataset2_content))
+
+    def _verify_rop_tests_reproducibility(
+            self, init_strategy, n_epochs, criterion):
+        # This doesn't actually test the support for the specific scheduler
+        # (ReduceLROnPlateau), but it's only used to check if:
+        # - the same model+benchmark pair can be instantiated in a
+        #   deterministic way.
+        # - the same results could be obtained in a standard training loop in a
+        #   deterministic way.
+        models_rnd = []
+        benchmarks_rnd = []
+        for _ in range(2):
+            benchmark, model = init_strategy()
+            models_rnd.append(model)
+            benchmarks_rnd.append(benchmark)
+
+        self.assert_model_equals(*models_rnd)
+        self.assert_benchmark_equals(*benchmarks_rnd)
+
+        expected_lrs_rnd = []
+        for _ in range(2):
+            benchmark, model = init_strategy()
+
+            expected_lrs = []
+            model.train()
+            for exp in benchmark.train_stream:
+                optimizer = SGD(model.parameters(), lr=0.001)
+                scheduler = ReduceLROnPlateau(optimizer)
+                expected_lrs.append([])
+                train_loss = Mean()
+                for epoch in range(n_epochs):
+                    train_loss.reset()
+                    for x, y, t in TaskBalancedDataLoader(
+                            exp.dataset,
+                            oversample_small_groups=True,
+                            num_workers=0,
+                            batch_size=32,
+                            shuffle=False,
+                            pin_memory=False):
+                        optimizer.zero_grad()
+                        outputs = model(x)
+                        loss = criterion(outputs, y)
+                        train_loss.update(loss, weight=len(x))
+                        loss.backward()
+                        optimizer.step()
+                    scheduler.step(train_loss.result())
+                    for group in optimizer.param_groups:
+                        expected_lrs[-1].append(group['lr'])
+                        break
+            expected_lrs_rnd.append(expected_lrs)
+        self.assertEqual(expected_lrs_rnd[0], expected_lrs_rnd[1])
+
+    def test_scheduler_reduce_on_plateau_plugin(self):
+        # Regression test for issue #858
+        n_epochs = 20
+        criterion = CrossEntropyLoss()
+
+        def _prepare_rng_critical_parts(seed=1234):
+            torch.random.manual_seed(seed)
+            return (PluginTests.create_benchmark(seed=seed),
+                    _PlainMLP(input_size=6, hidden_size=10))
+
+        self._verify_rop_tests_reproducibility(
+            _prepare_rng_critical_parts,
+            n_epochs,
+            criterion)
+
+        # Everything is in order, now we can test the plugin support for the
+        # ReduceLROnPlateau scheduler!
+
+        for reset_lr, reset_scheduler in itertools.product(
+                (True, False), (True, False)):
+            with self.subTest(reset_lr=reset_lr,
+                              reset_scheduler=reset_scheduler):
+                # First, obtain the reference (expected) lr timeline by running
+                # a plain PyTorch training loop with ReduceLROnPlateau.
+                benchmark, model = _prepare_rng_critical_parts()
+                model.train()
+                expected_lrs = []
+
+                optimizer = SGD(model.parameters(), lr=0.001)
+                scheduler = ReduceLROnPlateau(optimizer)
+                for exp in benchmark.train_stream:
+                    if reset_lr:
+                        for group in optimizer.param_groups:
+                            group['lr'] = 0.001
+
+                    if reset_scheduler:
+                        scheduler = ReduceLROnPlateau(optimizer)
+
+                    expected_lrs.append([])
+                    train_loss = Mean()
+                    for epoch in range(n_epochs):
+                        train_loss.reset()
+                        for x, y, t in TaskBalancedDataLoader(
+                                exp.dataset,
+                                oversample_small_groups=True,
+                                num_workers=0,
+                                batch_size=32,
+                                shuffle=False,
+                                pin_memory=False):
+                            optimizer.zero_grad()
+                            outputs = model(x)
+                            loss = criterion(outputs, y)
+                            train_loss.update(loss, weight=len(x))
+                            loss.backward()
+                            optimizer.step()
+                        scheduler.step(train_loss.result())
+                        for group in optimizer.param_groups:
+                            expected_lrs[-1].append(group['lr'])
+                            break
+
+                # Now we have the correct timeline stored in expected_lrs.
+                # Let's test the plugin!
+                benchmark, model = _prepare_rng_critical_parts()
+                optimizer = SGD(model.parameters(), lr=0.001)
+                scheduler = ReduceLROnPlateau(optimizer)
+
+                PluginTests._test_scheduler_plugin(
+                    benchmark, model, optimizer, scheduler,
+                    n_epochs, reset_lr, reset_scheduler, expected_lrs,
+                    criterion=criterion,
+                    metric='train_loss')
+
+        # Other tests
+        benchmark, model = _prepare_rng_critical_parts()
+        optimizer = SGD(model.parameters(), lr=0.001)
+        scheduler = ReduceLROnPlateau(optimizer)
+        scheduler2 = MultiStepLR(optimizer, [1, 2, 3])
+
+        # The metric must be set
+        with self.assertRaises(Exception):
+            LRSchedulerPlugin(
+                scheduler,
+                metric=None)
+
+        # Doesn't make sense to set the metric when using a non-metric
+        # based scheduler (should warn)
+        with self.assertWarns(Warning):
+            LRSchedulerPlugin(
+                scheduler2,
+                metric='train_loss')
+
+        # Must raise an error on unsupported metric
+        with self.assertRaises(Exception):
+            LRSchedulerPlugin(
+                scheduler,
+                metric='cuteness')
+
+    def test_scheduler_reduce_on_plateau_plugin_with_val_stream(self):
+        # Regression test for issue #858 (part 2)
+        n_epochs = 20
+        criterion = CrossEntropyLoss()
+
+        def _prepare_rng_critical_parts(seed=1234):
+            torch.random.manual_seed(seed)
+            initial_benchmark = PluginTests.create_benchmark(seed=seed)
+            val_benchmark = benchmark_with_validation_stream(
+                initial_benchmark, 0.3, shuffle=True)
+            return (val_benchmark,
+                    _PlainMLP(input_size=6, hidden_size=10))
+
+        self._verify_rop_tests_reproducibility(
+            _prepare_rng_critical_parts,
+            n_epochs,
+            criterion)
+
+        # Everything is in order, now we can test the plugin support for the
+        # ReduceLROnPlateau scheduler!
+        for reset_lr, reset_scheduler in itertools.product(
+                (True, False), (True, False)):
+            with self.subTest(reset_lr=reset_lr,
+                              reset_scheduler=reset_scheduler):
+                # First, obtain the reference (expected) lr timeline by running
+                # a plain PyTorch training loop with ReduceLROnPlateau.
+                benchmark, model = _prepare_rng_critical_parts()
+
+                expected_lrs = []
+
+                optimizer = SGD(model.parameters(), lr=0.001)
+                scheduler = ReduceLROnPlateau(optimizer)
+                for exp_idx, exp in enumerate(benchmark.train_stream):
+                    expected_lrs.append([])
+                    model.train()
+                    if reset_lr:
+                        for group in optimizer.param_groups:
+                            group['lr'] = 0.001
+
+                    if reset_scheduler:
+                        scheduler = ReduceLROnPlateau(optimizer)
+
+                    for epoch in range(n_epochs):
+                        for x, y, t in TaskBalancedDataLoader(
+                                exp.dataset,
+                                oversample_small_groups=True,
+                                num_workers=0,
+                                batch_size=32,
+                                shuffle=False,
+                                pin_memory=False):
+                            optimizer.zero_grad()
+                            outputs = model(x)
+                            loss = criterion(outputs, y)
+                            loss.backward()
+                            optimizer.step()
+                        for group in optimizer.param_groups:
+                            expected_lrs[-1].append(group['lr'])
+                            break
+
+                        val_loss = Mean()
+                        val_exp = benchmark.valid_stream[exp_idx]
+
+                        model.eval()
+                        with torch.no_grad():
+                            for x, y, t in DataLoader(
+                                    val_exp.dataset,
+                                    num_workers=0,
+                                    batch_size=100,
+                                    pin_memory=False):
+                                outputs = model(x)
+                                loss = criterion(outputs, y)
+                                val_loss.update(loss, weight=len(x))
+
+                        scheduler.step(val_loss.result())
+
+                # Now we have the correct timeline stored in expected_lrs
+                # Let's test the plugin!
+                benchmark, model = _prepare_rng_critical_parts()
+                optimizer = SGD(model.parameters(), lr=0.001)
+                scheduler = ReduceLROnPlateau(optimizer)
+
+                PluginTests._test_scheduler_plugin(
+                    benchmark, model, optimizer, scheduler,
+                    n_epochs, reset_lr, reset_scheduler, expected_lrs,
+                    criterion=criterion,
+                    metric='val_loss',
+                    eval_on_valid_stream=True)
+
+    @staticmethod
+    def _test_scheduler_plugin(
+            benchmark, model, optim, scheduler, epochs,
+            reset_lr, reset_scheduler, expected, criterion=None,
+            metric=None, eval_on_valid_stream=False):
+        lr_scheduler_plugin = LRSchedulerPlugin(
+            scheduler,
+            reset_lr=reset_lr,
+            reset_scheduler=reset_scheduler,
+            metric=metric)
+
+        verifier_plugin = SchedulerPluginTestPlugin(expected)
+
+        if criterion is None:
+            criterion = CrossEntropyLoss()
+        if eval_on_valid_stream:
+            cl_strategy = Naive(
+                model, optim, criterion, train_mb_size=32,
+                train_epochs=epochs, eval_mb_size=100,
+                plugins=[lr_scheduler_plugin, verifier_plugin],
+                eval_every=1, evaluator=None)
+
+            cl_strategy.train(benchmark.train_stream[0], shuffle=False,
+                              eval_streams=[benchmark.valid_stream[0]])
+            cl_strategy.train(benchmark.train_stream[1], shuffle=False,
+                              eval_streams=[benchmark.valid_stream[1]])
+        else:
+            cl_strategy = Naive(
+                model, optim, criterion, train_mb_size=32,
+                train_epochs=epochs, eval_mb_size=100,
+                plugins=[lr_scheduler_plugin, verifier_plugin],
+                evaluator=None)
+
+            cl_strategy.train(benchmark.train_stream[0], shuffle=False)
+            cl_strategy.train(benchmark.train_stream[1], shuffle=False)
+
+
+class SchedulerPluginTestPlugin(StrategyPlugin):
+    def __init__(self, expected_lrs):
+        super().__init__()
+        self.expected_lrs = expected_lrs
 
-        cl_strategy = Naive(model, optim, CrossEntropyLoss(), train_mb_size=32,
-                            train_epochs=epochs, eval_mb_size=100,
-                            plugins=[lrSchedulerPlugin, TestPlugin(expected)])
+    def after_training_epoch(self, strategy, **kwargs):
+        exp_id = strategy.clock.train_exp_counter
+        curr_epoch = strategy.clock.train_exp_epochs
+        expected_lr = self.expected_lrs[exp_id][curr_epoch]
+        for group in strategy.optimizer.param_groups:
+            assert group['lr'] == expected_lr,\
+                f"LR mismatch: {group['lr']} vs {expected_lr}"
+
+
+class _PlainMLP(nn.Module, BaseModel):
+    """
+    An internal MLP implementation without Dropout.
+
+    Needed to reproduce tests for the ReduceLROnPlateau scheduler
+    """
+    def __init__(self, num_classes=10, input_size=28 * 28,
+                 hidden_size=512, hidden_layers=1):
+
+        super().__init__()
 
-        cl_strategy.train(benchmark.train_stream[0])
-        cl_strategy.train(benchmark.train_stream[1])
+        layers = nn.Sequential(*(nn.Linear(input_size, hidden_size),
+                                 nn.ReLU(inplace=True)))
+        for layer_idx in range(hidden_layers - 1):
+            layers.add_module(
+                f"fc{layer_idx + 1}", nn.Sequential(
+                    *(nn.Linear(hidden_size, hidden_size),
+                      nn.ReLU(inplace=True))))
+
+        self.features = nn.Sequential(*layers)
+        self.classifier = nn.Linear(hidden_size, num_classes)
+        self._input_size = input_size
+
+    def forward(self, x):
+        x = x.contiguous()
+        x = x.view(x.size(0), self._input_size)
+        x = self.features(x)
+        x = self.classifier(x)
+        return x
+
+    def get_features(self, x):
+        x = x.contiguous()
+        x = x.view(x.size(0), self._input_size)
+        x = self.features(x)
+        return x
 
 
 if __name__ == '__main__':
diff --git a/tests/training/test_strategies.py b/tests/training/test_strategies.py
index e3882cf5f..cda029841 100644
--- a/tests/training/test_strategies.py
+++ b/tests/training/test_strategies.py
@@ -73,9 +73,19 @@ def test_periodic_eval(self):
         strategy = Naive(model, optimizer, criterion, train_epochs=2,
                          eval_every=1, evaluator=EvaluationPlugin(acc))
         strategy.train(benchmark.train_stream[0])
-        # eval is called after every epoch + the end of the training loop
         curve = strategy.evaluator.get_all_metrics()[curve_key][1]
-        assert len(curve) == 4
+        assert len(curve) == 3
+
+        ###################
+        # Case #4: Eval in iteration mode
+        ###################
+        acc = StreamAccuracy()
+        strategy = Naive(model, optimizer, criterion, train_epochs=2,
+                         eval_every=100, evaluator=EvaluationPlugin(acc),
+                         peval_mode='iteration')
+        strategy.train(benchmark.train_stream[0])
+        curve = strategy.evaluator.get_all_metrics()[curve_key][1]
+        assert len(curve) == 5
 
     def test_forward_hooks(self):
         model = SimpleMLP(input_size=6, hidden_size=10)