Merge pull request #1155 from AndreaCossu/master

Fixed lwf bug for alpha and prev classes per task dict
ContinualAI · Oct 19, 2022 · 21e1672 · 21e1672
2 parents 4e338f2 + 75a34cc
commit 21e1672
Show file tree

Hide file tree

Showing 14 changed files with 40 additions and 35 deletions.
diff --git a/avalanche/benchmarks/classic/ccifar100.py b/avalanche/benchmarks/classic/ccifar100.py
@@ -165,12 +165,12 @@ def SplitCIFAR110(
     scenario are "Class Incremental", "New Classes", etc. By default,
     an equal amount of classes will be assigned to each experience.
 
-    This generator will apply a task label "0" to all experiences.
+    This generator will apply a task label 0 to all experiences.
 
     The benchmark instance returned by this method will have two fields,
     `train_stream` and `test_stream`, which can be iterated to obtain
     training and test :class:`Experience`. Each Experience contains the
-    `dataset` and the associated task label (always "0" for this specific
+    `dataset` and the associated task label (always 0 for this specific
     benchmark).
 
     The benchmark API is quite simple and is uniform across all benchmark

diff --git a/avalanche/benchmarks/classic/clear.py b/avalanche/benchmarks/classic/clear.py
@@ -84,7 +84,7 @@ def CLEAR(
     For 'streaming' protocol, train stream is 100% of current task data,
     and test stream is just a duplicate of train stream.
 
-    The task label "0" will be assigned to each experience.
+    The task label 0 will be assigned to each experience.
 
     :param evaluation_protocol: Choose from ['iid', 'streaming']
         if chosen 'iid', then must specify a seed between [0,1,2,3,4];

diff --git a/avalanche/benchmarks/classic/cmnist.py b/avalanche/benchmarks/classic/cmnist.py
@@ -183,7 +183,7 @@ def PermutedMNIST(
     training and test :class:`Experience`. Each Experience contains the
     `dataset` and the associated task label.
 
-    A progressive task label, starting from "0", is applied to each experience.
+    A progressive task label, starting from 0, is applied to each experience.
 
     The benchmark API is quite simple and is uniform across all benchmark
     generators. It is recommended to check the tutorial of the "benchmark" API,
@@ -281,7 +281,7 @@ def RotatedMNIST(
     training and test :class:`Experience`. Each Experience contains the
     `dataset` and the associated task label.
 
-    A progressive task label, starting from "0", is applied to each experience.
+    A progressive task label, starting from 0, is applied to each experience.
 
     The benchmark API is quite simple and is uniform across all benchmark
     generators. It is recommended to check the tutorial of the "benchmark" API,

diff --git a/avalanche/benchmarks/classic/core50.py b/avalanche/benchmarks/classic/core50.py
@@ -82,7 +82,7 @@ def CORe50(
     training and test :class:`Experience`. Each Experience contains the
     `dataset` and the associated task label.
 
-    The task label "0" will be assigned to each experience.
+    The task label 0 will be assigned to each experience.
 
     The benchmark API is quite simple and is uniform across all benchmark
     generators. It is recommended to check the tutorial of the "benchmark" API,

diff --git a/avalanche/benchmarks/classic/openloris.py b/avalanche/benchmarks/classic/openloris.py
@@ -69,7 +69,7 @@ def OpenLORIS(
     training and test :class:`Experience`. Each Experience contains the
     `dataset` and the associated task label.
 
-    The task label "0" will be assigned to each experience.
+    The task label 0 will be assigned to each experience.
 
     The benchmark API is quite simple and is uniform across all benchmark
     generators. It is recommended to check the tutorial of the "benchmark" API,

diff --git a/avalanche/benchmarks/generators/benchmark_generators.py b/avalanche/benchmarks/generators/benchmark_generators.py
@@ -81,7 +81,7 @@ class won't be assigned to more than one experience.
     Task-Incremental benchmarks.
 
     The ``task_labels`` parameter determines if each incremental experience has
-    an increasing task label or if, at the contrary, a default task label "0"
+    an increasing task label or if, at the contrary, a default task label 0
     has to be assigned to all experiences. This can be useful when
     differentiating between Single-Incremental-Task and Multi-Task scenarios.
 
@@ -270,7 +270,7 @@ def ni_benchmark(
     Domain-Incremental benchmarks.
 
     The ``task_labels`` parameter determines if each incremental experience has
-    an increasing task label or if, at the contrary, a default task label "0"
+    an increasing task label or if, at the contrary, a default task label 0
     has to be assigned to all experiences. This can be useful when
     differentiating between Single-Incremental-Task and Multi-Task scenarios.
 

diff --git a/avalanche/benchmarks/generators/scenario_generators.py b/avalanche/benchmarks/generators/scenario_generators.py
@@ -67,7 +67,7 @@ def nc_scenario(
     class won't be assigned to more than one experience.
 
     The ``task_labels`` parameter determines if each incremental experience has
-    an increasing task label or if, at the contrary, a default task label "0"
+    an increasing task label or if, at the contrary, a default task label 0
     has to be assigned to all experiences. This can be useful when
     differentiating between Single-Incremental-Task and Multi-Task scenarios.
 
@@ -238,7 +238,7 @@ def ni_scenario(
     experience will contain patterns belonging to the same classes.
 
     The ``task_labels`` parameter determines if each incremental experience has
-    an increasing task label or if, at the contrary, a default task label "0"
+    an increasing task label or if, at the contrary, a default task label 0
     has to be assigned to all experiences. This can be useful when
     differentiating between Single-Incremental-Task and Multi-Task scenarios.
 

diff --git a/avalanche/benchmarks/utils/classification_dataset.py b/avalanche/benchmarks/utils/classification_dataset.py
@@ -154,7 +154,7 @@ def make_classification_dataset(
 
     This dataset will try to inherit the task labels from the input
     dataset. If none are available and none are given via the `task_labels`
-    parameter, each pattern will be assigned a default task label "0".
+    parameter, each pattern will be assigned a default task label 0.
 
     Creates a ``AvalancheDataset`` instance.
 
@@ -187,7 +187,7 @@ def make_classification_dataset(
         task label for all the instances. Defaults to None, which means that
         the dataset will try to obtain the task labels from the original
         dataset. If no task labels could be found, a default task label
-        "0" will be applied to all instances.
+        0 will be applied to all instances.
     :param targets: The label of each pattern. Defaults to None, which
         means that the targets will be retrieved from the dataset (if
         possible).
@@ -410,7 +410,7 @@ def classification_subset(
         that value will be used as the task label for all the instances.
         Defaults to None, which means that the dataset will try to
         obtain the task labels from the original dataset. If no task labels
-        could be found, a default task label "0" will be applied to all
+        could be found, a default task label 0 will be applied to all
         instances.
     :param targets: The label of each pattern. Defaults to None, which
         means that the targets will be retrieved from the dataset (if
@@ -530,7 +530,7 @@ def make_tensor_classification_dataset(
         of ints, one for each pattern in the dataset. Alternatively can be a
         single int value, in which case that value will be used as the task
         label for all the instances. Defaults to None, which means that a
-        default task label "0" will be applied to all patterns.
+        default task label 0 will be applied to all patterns.
     :param targets: The label of each pattern. Defaults to None, which
         means that the targets will be retrieved from the second tensor of
         the dataset. Otherwise, it can be a sequence of values containing
@@ -645,7 +645,7 @@ def concat_classification_datasets(
         in which case that value will be used as the task label for all
         instances. Defaults to None, which means that the dataset will try
         to obtain the task labels from the original datasets. If no task
-        labels could be found for a dataset, a default task label "0" will
+        labels could be found for a dataset, a default task label 0 will
         be applied to all patterns of that dataset.
     :param collate_fn: The function to use when slicing to merge single
         patterns. In the future this function may become the function

diff --git a/avalanche/evaluation/metrics/amca.py b/avalanche/evaluation/metrics/amca.py
@@ -89,7 +89,7 @@ class to 0.0.
         :param classes: The classes to keep track of. If None (default), all
             classes seen are tracked. Otherwise, it can be a dict of classes
             to be tracked (as "task-id" -> "list of class ids") or, if running
-            a task-free benchmark (with only task "0"), a simple list of class
+            a task-free benchmark (with only task 0), a simple list of class
             ids. By passing this parameter, the list of classes to be considered
             is created immediately. This will ensure that the mean class
             accuracy is correctly computed. In addition, this can be used to

diff --git a/avalanche/evaluation/metrics/class_accuracy.py b/avalanche/evaluation/metrics/class_accuracy.py
@@ -70,7 +70,7 @@ def __init__(self, classes: Optional[TrackedClassesType] = None):
         :param classes: The classes to keep track of. If None (default), all
             classes seen are tracked. Otherwise, it can be a dict of classes
             to be tracked (as "task-id" -> "list of class ids") or, if running
-            a task-free benchmark (with only task "0"), a simple list of class
+            a task-free benchmark (with only task 0), a simple list of class
             ids. By passing this parameter, the plot of each class is
             created immediately (with a default value of 0.0) and plots
             will be aligned across all classes. In addition, this can be used to

diff --git a/avalanche/models/dynamic_modules.py b/avalanche/models/dynamic_modules.py
@@ -351,8 +351,8 @@ def adaptation(self, experience: CLExperience):
             task_labels = [task_labels[0]]
 
         for tid in set(task_labels):
+            tid = str(tid)
             # head adaptation
-            tid = str(tid)  # need str keys
             if tid not in self.classifiers:  # create new head
                 new_head = IncrementalClassifier(
                     self.in_features, self.starting_out_features
@@ -379,7 +379,7 @@ def adaptation(self, experience: CLExperience):
                     )
 
                 au_name = f"active_units_T{tid}"
-                curr_head = self.classifiers[str(tid)]
+                curr_head = self.classifiers[tid]
                 old_nunits = self._buffers[au_name].shape[0]
 
                 new_nclasses = max(
@@ -405,7 +405,8 @@ def forward_single_task(self, x, task_label):
         :param task_label:
         :return:
         """
-        out = self.classifiers[str(task_label)](x)
+        task_label = str(task_label)
+        out = self.classifiers[task_label](x)
         if self.masking:
             au_name = f"active_units_T{task_label}"
             curr_au = self._buffers[au_name]

diff --git a/avalanche/training/plugins/lwf.py b/avalanche/training/plugins/lwf.py
@@ -23,6 +23,7 @@ def before_backward(self, strategy, **kwargs):
         """
         Add distillation loss
         """
+
         strategy.loss += self.lwf(
             strategy.mb_x, strategy.mb_output, strategy.model)
 

diff --git a/avalanche/training/regularization.py b/avalanche/training/regularization.py
@@ -4,11 +4,11 @@
 import torch
 
 from avalanche.models import MultiTaskModule, avalanche_forward
+from collections import defaultdict
 
 
 class RegularizationMethod:
     """RegularizationMethod implement regularization strategies.
-
     RegularizationMethod is a callable.
     The method `update` is called to update the loss, typically at the end
     of an experience.
@@ -37,13 +37,14 @@ def __init__(self, alpha=1, temperature=2):
         self.alpha = alpha
         self.temperature = temperature
         self.prev_model = None
-        self.expcount = 0  # count number of experiences (used to increase alpha)
-
-        self.prev_classes_by_task = {"0": set()}
+        self.expcount = 0
+        # count number of experiences (used to increase alpha)
+        self.prev_classes_by_task = defaultdict(set)
         """ In Avalanche, targets of different experiences are not ordered. 
         As a result, some units may be allocated even though their 
         corresponding class has never been seen by the model.
-        Knowledge distillation uses only units corresponding to old classes. 
+        Knowledge distillation uses only units corresponding
+        to old classes. 
         """
 
     def _distillation_loss(self, out, prev_out, active_units):
@@ -69,16 +70,16 @@ def _lwf_penalty(self, out, x, curr_model):
                 # output from previous output heads.
                 with torch.no_grad():
                     y_prev = avalanche_forward(self.prev_model, x, None)
-                y_prev = {str(k): v for k, v in y_prev.items()}
+                y_prev = {k: v for k, v in y_prev.items()}
                 # in a multitask scenario we need to compute the output
                 # from all the heads, so we need to call forward again.
                 # TODO: can we avoid this?
                 y_curr = avalanche_forward(curr_model, x, None)
-                y_curr = {str(k): v for k, v in y_curr.items()}
+                y_curr = {k: v for k, v in y_curr.items()}
             else:  # no task labels. Single task LwF
                 with torch.no_grad():
-                    y_prev = {"0": self.prev_model(x)}
-                y_curr = {"0": out}
+                    y_prev = {0: self.prev_model(x)}
+                y_curr = {0: out}
 
             dist_loss = 0
             for task_id in y_prev.keys():
@@ -95,7 +96,7 @@ def __call__(self, mb_x, mb_pred, model):
         Add distillation loss
         """
         alpha = (
-            self.expcount
+            self.alpha[self.expcount]
             if isinstance(self.alpha, (list, tuple))
             else self.alpha
         )
@@ -108,22 +109,24 @@ def update(self, experience, model):
         :param experience: current experience
         :param model: current model
         """
+
         self.expcount += 1
         self.prev_model = copy.deepcopy(model)
         task_ids = experience.dataset.targets_task_labels.uniques
+
         for task_id in task_ids:
             task_data = experience.dataset.task_set[task_id]
             pc = set(task_data.targets.uniques)
 
             if task_id not in self.prev_classes_by_task:
-                self.prev_classes_by_task[str(task_id)] = pc
+                self.prev_classes_by_task[task_id] = pc
             else:
-                self.prev_classes_by_task[str(task_id)] = self.prev_classes_by_task[
+                self.prev_classes_by_task[task_id] = self.prev_classes_by_task[
                     task_id
                 ].union(pc)
 
 
 __all__ = [
     "RegularizationMethod",
     "LearningWithoutForgetting"
-]
+]
diff --git a/tests/training/test_regularization.py b/tests/training/test_regularization.py
@@ -47,7 +47,7 @@ def test_lwf(self):
                 # non-zero loss for all the previous heads
                 loss.backward()
                 for tid in lwf.prev_classes_by_task.keys():
-                    head = model.classifier.classifiers[tid]
+                    head = model.classifier.classifiers[str(tid)]
                     weight = head.classifier.weight
                     assert weight.grad is not None
                     assert torch.norm(weight.grad) > 0