ContinualAI · AntonioCarta · Jan 7, 2022 · Apr 23, 2021 · Apr 23, 2021 · May 4, 2021
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -11,7 +11,7 @@ assignees: ''
 A clear and concise description of what the bug is.
 
 🐜 **To Reproduce**
-Steps / minimal snipped of code to reproduce the issue.
+A [minimal working example](https://en.wikipedia.org/wiki/Minimal_working_example) to reproduce the issue. The code should be executable without modifications.
 
 🐝 **Expected behavior**
 A clear and concise description of what you expected to happen.

diff --git a/README.md b/README.md
@@ -83,10 +83,10 @@ Current Release
 
 Avalanche is a framework in constant development. Thanks to the support of the [ContinualAI]() community and its active members we are quickly extending its features and improve its usability based on the demands of our research community!
 
-A the moment, Avalanche is in [**Alpha v0.0.1**](https://avalanche.continualai.org/getting-started/alpha-version), but we already support [several *Benchmarks*, *Strategies* and *Metrics*](https://avalanche.continualai.org/getting-started/alpha-version), that make it, we believe, the best tool out there for your continual learning research! 💪
+A the moment, Avalanche is in [**Beta (v0.1.0)**](https://github.com/ContinualAI/avalanche/releases/tag/v0.1.0). We support [several *Benchmarks*, *Strategies* and *Metrics*](https://avalanche.continualai.org/getting-started/alpha-version), that make it, we believe, the best tool out there for your continual learning research! 💪
 
-*Please note that, at the moment, we **do not** support stable releases and packaged versions of the library.*
-*We do this intentionally as in this early phase we would like to stimulate contributions only from experienced CL researchers and coders.*
+**You can install Avalanche by running `pip install avalanche-lib`.**  
+Look [here](https://avalanche.continualai.org/getting-started/how-to-install) for a more complete guide on the different ways available to install Avalanche.
 
 Getting Started
 ----------------

diff --git a/avalanche/__init__.py b/avalanche/__init__.py
@@ -5,7 +5,7 @@
 from avalanche import training
 
 
-__version__ = "0.0.1"
+__version__ = "0.1.0"
 
 _dataset_add = None
 

diff --git a/avalanche/benchmarks/classic/__init__.py b/avalanche/benchmarks/classic/__init__.py
@@ -3,10 +3,12 @@
 from .ccub200 import *
 from .cfashion_mnist import *
 from .cimagenet import *
+from .cinaturalist import *
 from .cmnist import *
 from .comniglot import *
 from .core50 import CORe50
 from .ctiny_imagenet import *
+from .ctrl import *
 from .endless_cl_sim import *
 from .openloris import *
 from .stream51 import *
diff --git a/avalanche/benchmarks/classic/ctrl.py b/avalanche/benchmarks/classic/ctrl.py
@@ -0,0 +1,105 @@
+################################################################################
+# Copyright (c) 2021 ContinualAI.                                              #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 22-06-2021                                                             #
+# Author(s): Tom Veniat                                                        #
+# E-mail: contact@continualai.org                                              #
+# Website: avalanche.continualai.org                                           #
+################################################################################
+
+import random
+import sys
+from pathlib import Path
+
+import torchvision.transforms.functional as F
+from torchvision import transforms
+from tqdm import tqdm
+
+import ctrl
+from avalanche.benchmarks import dataset_benchmark
+from avalanche.benchmarks.datasets import default_dataset_location
+from avalanche.benchmarks.utils import AvalancheTensorDataset, \
+    common_paths_root, AvalancheDataset, PathsDataset
+
+
+def CTrL(stream_name: str, save_to_disk: bool = False,
+         path: Path = default_dataset_location(''), seed: int = None,
+         n_tasks: int = None):
+    """
+    Gives access to the Continual Transfer Learning benchmark streams
+    introduced in https://arxiv.org/abs/2012.12631.
+    :param stream_name: Name of the test stream to generate. Must be one of
+    `s_plus`, `s_minus`, `s_in`, `s_out` and `s_pl`.
+    :param save_to_disk:  Whether to save each stream on the disk or load
+    everything in memory. Setting it to `True` will save memory but takes more
+    time on the first generation using the corresponding seed.
+    :param path: The path under which the generated stream will be saved if
+    save_to_disk is True.
+    :param seed: The seed to use to generate the streams. If no seed is given,
+    a random one will be used to make sure that the generated stream can
+    be reproduced.
+    :param n_tasks: The number of tasks to generate. This parameter is only
+    relevant for the `s_long` stream, as all other streams have a fixed number
+    of tasks.
+    :return: A scenario containing 3 streams: train, val and test.
+    """
+    seed = seed or random.randint(0, sys.maxsize)
+    if stream_name != 's_long' and n_tasks is not None:
+        raise ValueError('The n_tasks parameter can only be used with the '
+                         f'"s_long" stream, asked {n_tasks} for {stream_name}')
+    elif stream_name == 's_long' and n_tasks is None:
+        n_tasks = 100
+
+    stream = ctrl.get_stream(stream_name, seed)
+
+    if save_to_disk:
+        folder = path / 'ctrl' / stream_name / f'seed_{seed}'
+
+    # Train, val and test experiences
+    exps = [[], [], []]
+    for t_id, t in enumerate(tqdm(stream, desc=f'Loading {stream_name}'), ):
+        trans = transforms.Normalize(t.statistics['mean'],
+                                     t.statistics['std'])
+        for split, split_name, exp in zip(t.datasets, t.split_names, exps):
+            samples, labels = split.tensors
+            task_labels = [t.id] * samples.size(0)
+            if save_to_disk:
+                exp_folder = folder / f'exp_{t_id}' / split_name
+                exp_folder.mkdir(parents=True, exist_ok=True)
+                files = []
+                for i, (sample, label) in enumerate(zip(samples, labels)):
+                    sample_path = exp_folder / f'sample_{i}.png'
+                    if not sample_path.exists():
+                        F.to_pil_image(sample).save(sample_path)
+                    files.append((sample_path, label.item()))
+
+                common_root, exp_paths_list = common_paths_root(files)
+                paths_dataset = PathsDataset(common_root, exp_paths_list)
+                dataset = AvalancheDataset(
+                    paths_dataset,
+                    task_labels=task_labels,
+                    transform=transforms.Compose([
+                        transforms.ToTensor(),
+                        trans
+                    ])
+                )
+            else:
+                dataset = AvalancheTensorDataset(samples, labels.squeeze(1),
+                                                 task_labels=task_labels,
+                                                 transform=trans)
+            exp.append(dataset)
+        if stream_name == 's_long' and t_id == n_tasks - 1:
+            break
+
+    return dataset_benchmark(
+        train_datasets=exps[0],
+        test_datasets=exps[2],
+        other_streams_datasets=dict(val=exps[1]),
+    )
+
+
+__all__ = [
+    'CTrL'
+]
diff --git a/avalanche/benchmarks/classic/stream51.py b/avalanche/benchmarks/classic/stream51.py
@@ -247,7 +247,7 @@ def CLStream51(
 
 
 __all__ = [
-    'Stream51'
+    'CLStream51'
 ]
 
 if __name__ == "__main__":

diff --git a/avalanche/benchmarks/datasets/__init__.py b/avalanche/benchmarks/datasets/__init__.py
@@ -2,10 +2,11 @@
 from .downloadable_dataset import *
 from .core50 import *
 from .cub200 import *
+from .endless_cl_sim import *
 from .mini_imagenet import *
 from .openloris import *
+from .stream51 import *
 from .tiny_imagenet import *
 from .omniglot import *
-from .stream51 import *
 from .torchvision_wrapper import *
 from .inaturalist import *
diff --git a/avalanche/benchmarks/utils/avalanche_dataset.py b/avalanche/benchmarks/utils/avalanche_dataset.py
@@ -2004,20 +2004,34 @@ def concat_datasets_sequentially(
 
     new_class_ids_per_dataset = []
     for dataset_idx in range(len(train_dataset_list)):
+
+        # Get the train and test sets of the dataset
+        train_set = train_dataset_list[dataset_idx]
+        test_set = test_dataset_list[dataset_idx]
+
+        # Get the classes in the dataset
+        dataset_classes = set(map(int, train_set.targets))
+
         # The class IDs for this dataset will be in range
         # [n_classes_in_previous_datasets,
         #       n_classes_in_previous_datasets + classes_in_this_dataset)
-        class_mapping = list(
+        new_classes = list(
             range(next_remapped_idx,
                   next_remapped_idx + classes_per_dataset[dataset_idx]))
-        new_class_ids_per_dataset.append(class_mapping)
-
-        train_set = train_dataset_list[dataset_idx]
-        test_set = test_dataset_list[dataset_idx]
-
+        new_class_ids_per_dataset.append(new_classes)
+
         # AvalancheSubset is used to apply the class IDs transformation.
         # Remember, the class_mapping parameter must be a list in which:
         # new_class_id = class_mapping[original_class_id]
+        # Hence, a list of size equal to the maximum class index is created
+        # Only elements corresponding to the present classes are remapped
+        class_mapping = [-1] * (max(dataset_classes) + 1)
+        j = 0
+        for i in dataset_classes:
+            class_mapping[i] = new_classes[j]
+            j += 1        
+
+        # Create remapped datasets and append them to the final list
         remapped_train_datasets.append(
             AvalancheSubset(train_set, class_mapping=class_mapping))
         remapped_test_datasets.append(

diff --git a/avalanche/benchmarks/utils/data_loader.py b/avalanche/benchmarks/utils/data_loader.py
@@ -41,6 +41,8 @@ def _default_collate_mbatches_fn(mbatches):
 
 
 class TaskBalancedDataLoader:
+    """ Task-balanced data loader for Avalanche's datasets."""
+
     def __init__(self, data: AvalancheDataset,
                  oversample_small_tasks: bool = False,
                  collate_mbatches=_default_collate_mbatches_fn,
@@ -95,9 +97,12 @@ def __len__(self):
 
 
 class GroupBalancedDataLoader:
+    """ Data loader that balances data from multiple datasets."""
+
     def __init__(self, datasets: Sequence[AvalancheDataset],
                  oversample_small_groups: bool = False,
                  collate_mbatches=_default_collate_mbatches_fn,
+                 batch_size: int = 32,
                  **kwargs):
         """ Data loader that balances data from multiple datasets.
 
@@ -115,6 +120,8 @@ def __init__(self, datasets: Sequence[AvalancheDataset],
         :param collate_mbatches: function that given a sequence of mini-batches
             (one for each task) combines them into a single mini-batch. Used to
             combine the mini-batches obtained separately from each task.
+        :param batch_size: the size of the batch. It must be greater than or
+            equal to the number of groups.
         :param kwargs: data loader arguments used to instantiate the loader for
             each group separately. See pytorch :class:`DataLoader`.
         """
@@ -123,8 +130,19 @@ def __init__(self, datasets: Sequence[AvalancheDataset],
         self.oversample_small_groups = oversample_small_groups
         self.collate_mbatches = collate_mbatches
 
+        # check if batch_size is larger than or equal to the number of datasets
+        assert batch_size >= len(datasets)
+
+        # divide the batch between all datasets in the group
+        ds_batch_size = batch_size // len(datasets)
+        remaining = batch_size % len(datasets)
+
         for data in self.datasets:
-            self.dataloaders.append(DataLoader(data, **kwargs))
+            bs = ds_batch_size
+            if remaining > 0:
+                bs += 1
+                remaining -= 1
+            self.dataloaders.append(DataLoader(data, batch_size=bs, **kwargs))
         self.max_len = max([len(d) for d in self.dataloaders])
 
     def __iter__(self):
@@ -166,6 +184,9 @@ def __len__(self):
 
 
 class GroupBalancedInfiniteDataLoader:
+    """ Data loader that balances data from multiple datasets emitting an
+        infinite stream."""
+
     def __init__(self, datasets: Sequence[AvalancheDataset],
                  collate_mbatches=_default_collate_mbatches_fn,
                  **kwargs):
@@ -214,6 +235,8 @@ def __len__(self):
 
 
 class ReplayDataLoader:
+    """ Custom data loader for rehearsal/replay strategies."""
+
     def __init__(self, data: AvalancheDataset, memory: AvalancheDataset = None,
                  oversample_small_tasks: bool = False,
                  collate_mbatches=_default_collate_mbatches_fn,
@@ -240,7 +263,9 @@ def __init__(self, data: AvalancheDataset, memory: AvalancheDataset = None,
             combine the mini-batches obtained separately from each task.
         :param batch_size: the size of the batch. It must be greater than or
             equal to the number of tasks.
-        :param ratio_data_mem: How many of the samples should be from
+        :param force_data_batch_size: How many of the samples should be from the
+            current `data`. If None, it will equally divide each batch between
+            samples from all seen tasks in the current `data` and `memory`.
         :param kwargs: data loader arguments used to instantiate the loader for
             each task separately. See pytorch :class:`DataLoader`.
         """
@@ -256,19 +281,23 @@ def __init__(self, data: AvalancheDataset, memory: AvalancheDataset = None,
             assert force_data_batch_size <= batch_size, \
                 "Forced batch size of data must be <= entire batch size"
 
-            mem_batch_size = batch_size - force_data_batch_size
-            remaining_example = 0
+            remaining_example_data = 0
+
             mem_keys = len(self.memory.task_set)
+            mem_batch_size = batch_size - force_data_batch_size
+            mem_batch_size_k = mem_batch_size // mem_keys
+            remaining_example_mem = mem_batch_size % mem_keys
+
             assert mem_batch_size >= mem_keys, \
                 "Batch size must be greator or equal " \
                 "to the number of tasks in the memory."
 
             self.loader_data, _ = self._create_dataloaders(
                 data, force_data_batch_size,
-                remaining_example, **kwargs)
+                remaining_example_data, **kwargs)
             self.loader_memory, _ = self._create_dataloaders(
-                memory, mem_batch_size,
-                remaining_example, **kwargs)
+                memory, mem_batch_size_k,
+                remaining_example_mem, **kwargs)
         else:
             num_keys = len(self.data.task_set) + len(self.memory.task_set)
             assert batch_size >= num_keys, \