From 136c69ebde7b3ca037444c333542bae21fb71165 Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Tue, 12 Jul 2022 16:47:53 +0200 Subject: [PATCH 01/10] Initialize new template structure --- .../supervised/strategy_wrappers_temp.py | 103 ++++ .../training/templates/base_general_sgd.py | 516 ++++++++++++++++++ .../training/templates/common_templates.py | 227 ++++++++ .../templates/observation_type/__init__.py | 6 + .../observation_type/batch_observation.py | 46 ++ .../observation_type/online_observation.py | 58 ++ .../templates/problem_type/__init__.py | 5 + .../problem_type/supervised_problem.py | 31 ++ .../templates/update_type/__init__.py | 5 + .../templates/update_type/meta_update.py | 27 + .../templates/update_type/sgd_update.py | 36 ++ 11 files changed, 1060 insertions(+) create mode 100644 avalanche/training/supervised/strategy_wrappers_temp.py create mode 100644 avalanche/training/templates/base_general_sgd.py create mode 100644 avalanche/training/templates/common_templates.py create mode 100644 avalanche/training/templates/observation_type/__init__.py create mode 100644 avalanche/training/templates/observation_type/batch_observation.py create mode 100644 avalanche/training/templates/observation_type/online_observation.py create mode 100644 avalanche/training/templates/problem_type/__init__.py create mode 100644 avalanche/training/templates/problem_type/supervised_problem.py create mode 100644 avalanche/training/templates/update_type/__init__.py create mode 100644 avalanche/training/templates/update_type/meta_update.py create mode 100644 avalanche/training/templates/update_type/sgd_update.py diff --git a/avalanche/training/supervised/strategy_wrappers_temp.py b/avalanche/training/supervised/strategy_wrappers_temp.py new file mode 100644 index 000000000..19bcbba36 --- /dev/null +++ b/avalanche/training/supervised/strategy_wrappers_temp.py @@ -0,0 +1,103 @@ +from typing import Optional, List +from torch.nn import Module, CrossEntropyLoss +from torch.optim import Optimizer + +from avalanche.training.plugins import EvaluationPlugin +from avalanche.training.templates.common_templates import ( + SupervisedTemplate, + OnlineSupervisedTemplate +) +from avalanche.training.plugins.evaluation import default_evaluator +from avalanche.training.plugins import SupervisedPlugin + + +class Naive(SupervisedTemplate): + def __init__( + self, + model: Module, + optimizer: Optimizer, + criterion=CrossEntropyLoss(), + train_mb_size: int = 1, + train_epochs: int = 1, + eval_mb_size: Optional[int] = None, + device=None, + plugins: Optional[List[SupervisedPlugin]] = None, + evaluator: EvaluationPlugin = default_evaluator, + eval_every=-1, + **base_kwargs + ): + super().__init__( + model, + optimizer, + criterion, + train_mb_size=train_mb_size, + train_epochs=train_epochs, + eval_mb_size=eval_mb_size, + device=device, + plugins=plugins, + evaluator=evaluator, + eval_every=eval_every, + **base_kwargs + ) + + +class OnlineNaive(OnlineSupervisedTemplate): + """Online naive finetuning. + + The simplest (and least effective) Continual Learning strategy. Naive just + incrementally fine tunes a single model without employing any method + to contrast the catastrophic forgetting of previous knowledge. + This strategy does not use task identities. + + Naive is easy to set up and its results are commonly used to show the worst + performing baseline. + """ + + def __init__( + self, + model: Module, + optimizer: Optimizer, + criterion=CrossEntropyLoss(), + train_passes: int = 1, + train_mb_size: int = 1, + eval_mb_size: int = None, + device=None, + plugins: Optional[List[SupervisedPlugin]] = None, + evaluator: EvaluationPlugin = default_evaluator, + eval_every=-1, + ): + """ + Creates an instance of the Naive strategy. + + :param model: The model. + :param optimizer: The optimizer to use. + :param criterion: The loss criterion to use. + :param num_passes: The number of passes for each sub-experience. + Defaults to 1. + :param train_mb_size: The train minibatch size. Defaults to 1. + :param eval_mb_size: The eval minibatch size. Defaults to 1. + :param device: The device to use. Defaults to None (cpu). + :param plugins: Plugins to be added. Defaults to None. + :param evaluator: (optional) instance of EvaluationPlugin for logging + and metric computations. + :param eval_every: the frequency of the calls to `eval` inside the + training loop. -1 disables the evaluation. 0 means `eval` is called + only at the end of the learning experience. Values >0 mean that + `eval` is called every `eval_every` epochs and at the end of the + learning experience. + """ + super().__init__( + model, + optimizer, + criterion, + train_passes=train_passes, + train_mb_size=train_mb_size, + eval_mb_size=eval_mb_size, + device=device, + plugins=plugins, + evaluator=evaluator, + eval_every=eval_every, + ) + + +__all__ = ["Naive", "OnlineNaive"] diff --git a/avalanche/training/templates/base_general_sgd.py b/avalanche/training/templates/base_general_sgd.py new file mode 100644 index 000000000..954bfe9d0 --- /dev/null +++ b/avalanche/training/templates/base_general_sgd.py @@ -0,0 +1,516 @@ +from typing import Iterable, Sequence, Optional, Union, List +from pkg_resources import parse_version + +import torch +from torch.nn import Module, CrossEntropyLoss +from torch.optim import Optimizer +from torch.utils.data import DataLoader + +from avalanche.benchmarks import CLExperience, CLStream +from avalanche.core import BaseSGDPlugin +from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin +from avalanche.training.plugins.clock import Clock +from avalanche.training.plugins.evaluation import default_evaluator +from avalanche.training.templates.base import BaseTemplate, ExpSequence +from avalanche.models.utils import avalanche_model_adaptation +from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \ + collate_from_data_or_kwargs +from avalanche.training.utils import trigger_plugins + + +class BaseGeneralSGDTemplate(BaseTemplate): + """Base SGD class for continual learning skeletons. + + **Training loop** + The training loop is organized as follows:: + + train + train_exp # for each experience + + **Evaluation loop** + The evaluation loop is organized as follows:: + + eval + eval_exp # for each experience + + """ + + PLUGIN_CLASS = BaseSGDPlugin + + def __init__( + self, + model: Module, + optimizer: Optimizer, + criterion=CrossEntropyLoss(), + train_mb_size: int = 1, + train_epochs: int = 1, + eval_mb_size: Optional[int] = 1, + device="cpu", + plugins: Optional[List["SupervisedPlugin"]] = None, + evaluator: EvaluationPlugin = default_evaluator, + eval_every=-1, + peval_mode="epoch", + ): + """Init. + + :param model: PyTorch model. + :param optimizer: PyTorch optimizer. + :param criterion: loss function. + :param train_mb_size: mini-batch size for training. + :param train_epochs: number of training epochs. + :param eval_mb_size: mini-batch size for eval. + :param evaluator: (optional) instance of EvaluationPlugin for logging + and metric computations. None to remove logging. + :param eval_every: the frequency of the calls to `eval` inside the + training loop. -1 disables the evaluation. 0 means `eval` is called + only at the end of the learning experience. Values >0 mean that + `eval` is called every `eval_every` epochs and at the end of the + learning experience. + :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the + periodic evaluation during training should execute every + `eval_every` epochs or iterations (Default='epoch'). + """ + super().__init__(model=model, device=device, plugins=plugins) + + self.optimizer: Optimizer = optimizer + """ PyTorch optimizer. """ + + self._criterion = criterion + """ Criterion. """ + + self.train_epochs: int = train_epochs + """ Number of training epochs. """ + + self.train_mb_size: int = train_mb_size + """ Training mini-batch size. """ + + self.eval_mb_size: int = ( + train_mb_size if eval_mb_size is None else eval_mb_size + ) + """ Eval mini-batch size. """ + + if evaluator is None: + evaluator = EvaluationPlugin() + self.plugins.append(evaluator) + self.evaluator = evaluator + """ EvaluationPlugin used for logging and metric computations. """ + + # Configure periodic evaluation. + assert peval_mode in {"experience", "epoch", "iteration"} + self.eval_every = eval_every + peval = PeriodicEval(eval_every, peval_mode) + self.plugins.append(peval) + + self.clock = Clock() + """ Incremental counters for strategy events. """ + # WARNING: Clock needs to be the last plugin, otherwise + # counters will be wrong for plugins called after it. + self.plugins.append(self.clock) + + ################################################################### + # State variables. These are updated during the train/eval loops. # + ################################################################### + + self.adapted_dataset = None + """ Data used to train. It may be modified by plugins. Plugins can + append data to it (e.g. for replay). + + .. note:: + + This dataset may contain samples from different experiences. If you + want the original data for the current experience + use :attr:`.BaseTemplate.experience`. + """ + + self.dataloader = None + """ Dataloader. """ + + self.mbatch = None + """ Current mini-batch. """ + + self.mb_output = None + """ Model's output computed on the current mini-batch. """ + + self.loss = None + """ Loss of the current mini-batch. """ + + self._stop_training = False + + def train(self, + experiences: Union[CLExperience, + ExpSequence], + eval_streams: Optional[Sequence[Union[CLExperience, + ExpSequence]]] = None, + **kwargs): + super().train(experiences, eval_streams, **kwargs) + return self.evaluator.get_last_metrics() + + @torch.no_grad() + def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs): + """ + Evaluate the current model on a series of experiences and + returns the last recorded value for each metric. + + :param exp_list: CL experience information. + :param kwargs: custom arguments. + + :return: dictionary containing last recorded value for + each metric name + """ + super().eval(exp_list, **kwargs) + return self.evaluator.get_last_metrics() + + def _train_exp( + self, experience: CLExperience, eval_streams, **kwargs + ): + # Should be implemented in Observation Type + raise NotImplementedError() + + def _eval_exp(self, **kwargs): + self.eval_epoch(**kwargs) + + def make_optimizer(self, **kwargs): + """Optimizer initialization.""" + # Should be implemented in Observation Type + raise NotImplementedError() + + def criterion(self): + """Compute loss function.""" + raise NotImplementedError() + + def forward(self): + """Compute the model's output given the current mini-batch.""" + raise NotImplementedError() + + def model_adaptation(self, model=None): + """Adapts the model to the current experience.""" + raise NotImplementedError() + + def stop_training(self): + """Signals to stop training at the next iteration.""" + self._stop_training = True + + def training_epoch(self, **kwargs): + # Should be implemented in Update Type + raise NotADirectoryError() + + def backward(self): + """Run the backward pass.""" + self.loss.backward() + + def optimizer_step(self): + """Execute the optimizer step (weights update).""" + self.optimizer.step() + + def eval_epoch(self, **kwargs): + """Evaluation loop over the current `self.dataloader`.""" + for self.mbatch in self.dataloader: + self._unpack_minibatch() + self._before_eval_iteration(**kwargs) + + self._before_eval_forward(**kwargs) + self.mb_output = self.forward() + self._after_eval_forward(**kwargs) + self.loss = self.criterion() + + self._after_eval_iteration(**kwargs) + + # ==================================================================> NEW + + def maybe_adapt_model_and_make_optimizer(self): + # Should be implemented in observation type + raise NotImplementedError() + + def _before_training_exp(self, **kwargs): + """Setup to train on a single experience.""" + # Data Adaptation (e.g. add new samples/data augmentation) + self._before_train_dataset_adaptation(**kwargs) + self.train_dataset_adaptation(**kwargs) + self._after_train_dataset_adaptation(**kwargs) + + self.make_train_dataloader(**kwargs) + + # Model Adaptation (e.g. freeze/add new units) + # self.model = self.model_adaptation() + # self.make_optimizer() + self.maybe_adapt_model_and_make_optimizer() + + super()._before_training_exp(**kwargs) + + def _save_train_state(self): + """Save the training state which may be modified by the eval loop. + + This currently includes: experience, adapted_dataset, dataloader, + is_training, and train/eval modes for each module. + + TODO: we probably need a better way to do this. + """ + state = super()._save_train_state() + new_state = { + "adapted_dataset": self.adapted_dataset, + "dataloader": self.dataloader, + } + return {**state, **new_state} + + def train_dataset_adaptation(self, **kwargs): + """Initialize `self.adapted_dataset`.""" + self.adapted_dataset = self.experience.dataset + self.adapted_dataset = self.adapted_dataset.train() + + def _load_train_state(self, prev_state): + super()._load_train_state(prev_state) + self.adapted_dataset = prev_state["adapted_dataset"] + self.dataloader = prev_state["dataloader"] + + def _before_eval_exp(self, **kwargs): + + # Data Adaptation + self._before_eval_dataset_adaptation(**kwargs) + self.eval_dataset_adaptation(**kwargs) + self._after_eval_dataset_adaptation(**kwargs) + + self.make_eval_dataloader(**kwargs) + # Model Adaptation (e.g. freeze/add new units) + self.model = self.model_adaptation() + + super()._before_eval_exp(**kwargs) + + def make_train_dataloader( + self, + num_workers=0, + shuffle=True, + pin_memory=True, + persistent_workers=False, + **kwargs + ): + """Data loader initialization. + + Called at the start of each learning experience after the dataset + adaptation. + + :param num_workers: number of thread workers for the data loading. + :param shuffle: True if the data should be shuffled, False otherwise. + :param pin_memory: If True, the data loader will copy Tensors into CUDA + pinned memory before returning them. Defaults to True. + """ + + other_dataloader_args = {} + + if parse_version(torch.__version__) >= parse_version("1.7.0"): + other_dataloader_args["persistent_workers"] = persistent_workers + for k, v in kwargs.items(): + other_dataloader_args[k] = v + + self.dataloader = TaskBalancedDataLoader( + self.adapted_dataset, + oversample_small_groups=True, + num_workers=num_workers, + batch_size=self.train_mb_size, + shuffle=shuffle, + pin_memory=pin_memory, + **other_dataloader_args + ) + + def make_eval_dataloader( + self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs + ): + """ + Initializes the eval data loader. + :param num_workers: How many subprocesses to use for data loading. + 0 means that the data will be loaded in the main process. + (default: 0). + :param pin_memory: If True, the data loader will copy Tensors into CUDA + pinned memory before returning them. Defaults to True. + :param kwargs: + :return: + """ + other_dataloader_args = {} + + if parse_version(torch.__version__) >= parse_version("1.7.0"): + other_dataloader_args["persistent_workers"] = persistent_workers + for k, v in kwargs.items(): + other_dataloader_args[k] = v + + collate_from_data_or_kwargs(self.adapted_dataset, + other_dataloader_args) + self.dataloader = DataLoader( + self.adapted_dataset, + num_workers=num_workers, + batch_size=self.eval_mb_size, + pin_memory=pin_memory, + **other_dataloader_args + ) + + def eval_dataset_adaptation(self, **kwargs): + """Initialize `self.adapted_dataset`.""" + self.adapted_dataset = self.experience.dataset + self.adapted_dataset = self.adapted_dataset.eval() + + def model_adaptation(self, model=None): + """Adapts the model to the current data. + + Calls the :class:`~avalanche.models.DynamicModule`s adaptation. + """ + if model is None: + model = self.model + avalanche_model_adaptation(model, self.experience) + return model.to(self.device) + + def _unpack_minibatch(self): + """Move to device""" + # First verify the mini-batch + self._check_minibatch() + + for i in range(len(self.mbatch)): + self.mbatch[i] = self.mbatch[i].to(self.device) + + ######################################################### + # Plugin Triggers # + ######################################################### + + def _before_training_epoch(self, **kwargs): + trigger_plugins(self, "before_training_epoch", **kwargs) + + def _after_training_epoch(self, **kwargs): + trigger_plugins(self, "after_training_epoch", **kwargs) + + def _before_training_iteration(self, **kwargs): + trigger_plugins(self, "before_training_iteration", **kwargs) + + def _before_forward(self, **kwargs): + trigger_plugins(self, "before_forward", **kwargs) + + def _after_forward(self, **kwargs): + trigger_plugins(self, "after_forward", **kwargs) + + def _before_backward(self, **kwargs): + trigger_plugins(self, "before_backward", **kwargs) + + def _after_backward(self, **kwargs): + trigger_plugins(self, "after_backward", **kwargs) + + def _after_training_iteration(self, **kwargs): + trigger_plugins(self, "after_training_iteration", **kwargs) + + def _before_update(self, **kwargs): + trigger_plugins(self, "before_update", **kwargs) + + def _after_update(self, **kwargs): + trigger_plugins(self, "after_update", **kwargs) + + def _before_eval_iteration(self, **kwargs): + trigger_plugins(self, "before_eval_iteration", **kwargs) + + def _before_eval_forward(self, **kwargs): + trigger_plugins(self, "before_eval_forward", **kwargs) + + def _after_eval_forward(self, **kwargs): + trigger_plugins(self, "after_eval_forward", **kwargs) + + def _after_eval_iteration(self, **kwargs): + trigger_plugins(self, "after_eval_iteration", **kwargs) + + # ==================================================================> NEW + + def _before_train_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "before_train_dataset_adaptation", **kwargs) + + def _after_train_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "after_train_dataset_adaptation", **kwargs) + + def _before_eval_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs) + + def _after_eval_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs) + + +class PeriodicEval(SupervisedPlugin): + """Schedules periodic evaluation during training. + + This plugin is automatically configured and added by the BaseTemplate. + """ + + def __init__(self, eval_every=-1, peval_mode="epoch", do_initial=True): + """Init. + + :param eval_every: the frequency of the calls to `eval` inside the + training loop. -1 disables the evaluation. 0 means `eval` is called + only at the end of the learning experience. Values >0 mean that + `eval` is called every `eval_every` epochs and at the end of the + learning experience. + :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the + periodic evaluation during training should execute every + `eval_every` epochs or iterations (Default='epoch'). + :param do_initial: whether to evaluate before each `train` call. + Occasionally needed becuase some metrics need to know the + accuracy before training. + """ + super().__init__() + assert peval_mode in {"experience", "epoch", "iteration"} + self.eval_every = eval_every + self.peval_mode = peval_mode + self.do_initial = do_initial and eval_every > -1 + self.do_final = None + self._is_eval_updated = False + + def before_training(self, strategy, **kwargs): + """Eval before each learning experience. + + Occasionally needed because some metrics need the accuracy before + training. + """ + if self.do_initial: + self._peval(strategy, **kwargs) + + def before_training_exp(self, strategy, **kwargs): + # We evaluate at the start of each experience because train_epochs + # could change. + self.do_final = True + if self.peval_mode == "epoch": + if ( + self.eval_every > 0 + and (strategy.train_epochs - 1) % self.eval_every == 0 + ): + self.do_final = False + else: # peval_mode == 'iteration' + # we may need to fix this but we don't have a way to know + # the number of total iterations. + # Right now there may be two eval calls at the last iterations. + pass + self.do_final = self.do_final and self.eval_every > -1 + + def _peval(self, strategy, **kwargs): + for el in strategy._eval_streams: + strategy.eval(el, **kwargs) + + def _maybe_peval(self, strategy, counter, **kwargs): + if self.eval_every > 0 and counter % self.eval_every == 0: + self._peval(strategy, **kwargs) + + def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs): + """Periodic eval controlled by `self.eval_every` and + `self.peval_mode`.""" + if self.peval_mode == "epoch": + self._maybe_peval(strategy, strategy.clock.train_exp_epochs, + **kwargs) + + def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs): + """Periodic eval controlled by `self.eval_every` and + `self.peval_mode`.""" + if self.peval_mode == "iteration": + self._maybe_peval(strategy, strategy.clock.train_exp_iterations, + **kwargs) + + # ---> New + def after_training_exp(self, strategy, **kwargs): + """Final eval after a learning experience.""" + if self.do_final: + self._peval(strategy, **kwargs) + + # def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs): + # """Periodic eval controlled by `self.eval_every` and + # `self.peval_mode`.""" + # if self.peval_mode == "experience": + # self._maybe_peval(strategy, strategy.clock.train_exp_counter, + # **kwargs) diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/common_templates.py new file mode 100644 index 000000000..f5b055e17 --- /dev/null +++ b/avalanche/training/templates/common_templates.py @@ -0,0 +1,227 @@ +from typing import Sequence, Optional + +from torch.nn import Module, CrossEntropyLoss +from torch.optim import Optimizer + +from avalanche.training.plugins import SupervisedPlugin +from avalanche.training.plugins.evaluation import default_evaluator + +from .observation_type import * +from .problem_type import * +from .update_type import * +from .base_general_sgd import BaseGeneralSGDTemplate + + +class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate, + BaseGeneralSGDTemplate): + """Base class for continual learning strategies. + + BaseTemplate is the super class of all task-based continual learning + strategies. It implements a basic training loop and callback system + that allows to execute code at each experience of the training loop. + Plugins can be used to implement callbacks to augment the training + loop with additional behavior (e.g. a memory buffer for replay). + + **Scenarios** + This strategy supports several continual learning scenarios: + + * class-incremental scenarios (no task labels) + * multi-task scenarios, where task labels are provided) + * multi-incremental scenarios, where the same task may be revisited + + The exact scenario depends on the data stream and whether it provides + the task labels. + + **Training loop** + The training loop is organized as follows:: + + train + train_exp # for each experience + adapt_train_dataset + train_dataset_adaptation + make_train_dataloader + train_epoch # for each epoch + # forward + # backward + # model update + + **Evaluation loop** + The evaluation loop is organized as follows:: + + eval + eval_exp # for each experience + adapt_eval_dataset + eval_dataset_adaptation + make_eval_dataloader + eval_epoch # for each epoch + # forward + # backward + # model update + + """ + + PLUGIN_CLASS = SupervisedPlugin + + def __init__( + self, + model: Module, + optimizer: Optimizer, + criterion=CrossEntropyLoss(), + train_mb_size: int = 1, + train_epochs: int = 1, + eval_mb_size: Optional[int] = 1, + device="cpu", + plugins: Optional[Sequence["SupervisedPlugin"]] = None, + evaluator=default_evaluator, + eval_every=-1, + peval_mode="epoch", + ): + """Init. + + :param model: PyTorch model. + :param optimizer: PyTorch optimizer. + :param criterion: loss function. + :param train_mb_size: mini-batch size for training. + :param train_epochs: number of training epochs. + :param eval_mb_size: mini-batch size for eval. + :param device: PyTorch device where the model will be allocated. + :param plugins: (optional) list of StrategyPlugins. + :param evaluator: (optional) instance of EvaluationPlugin for logging + and metric computations. None to remove logging. + :param eval_every: the frequency of the calls to `eval` inside the + training loop. -1 disables the evaluation. 0 means `eval` is called + only at the end of the learning experience. Values >0 mean that + `eval` is called every `eval_every` epochs and at the end of the + learning experience. + :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the + periodic evaluation during training should execute every + `eval_every` epochs or iterations (Default='epoch'). + """ + super().__init__( + model=model, + optimizer=optimizer, + criterion=criterion, + train_mb_size=train_mb_size, + train_epochs=train_epochs, + eval_mb_size=eval_mb_size, + device=device, + plugins=plugins, + evaluator=evaluator, + eval_every=eval_every, + peval_mode=peval_mode, + ) + ################################################################### + # State variables. These are updated during the train/eval loops. # + ################################################################### + + # self.adapted_dataset = None + # """ Data used to train. It may be modified by plugins. Plugins can + # append data to it (e.g. for replay). + # + # .. note:: + # + # This dataset may contain samples from different experiences. If you + # want the original data for the current experience + # use :attr:`.BaseTemplate.experience`. + + +class OnlineSupervisedTemplate(OnlineObservation, SupervisedProblem, SGDUpdate, + BaseGeneralSGDTemplate): + """Base class for continual learning strategies. + + BaseTemplate is the super class of all task-based continual learning + strategies. It implements a basic training loop and callback system + that allows to execute code at each experience of the training loop. + Plugins can be used to implement callbacks to augment the training + loop with additional behavior (e.g. a memory buffer for replay). + + **Scenarios** + This strategy supports several continual learning scenarios: + + * class-incremental scenarios (no task labels) + * multi-task scenarios, where task labels are provided) + * multi-incremental scenarios, where the same task may be revisited + + The exact scenario depends on the data stream and whether it provides + the task labels. + + **Training loop** + The training loop is organized as follows:: + + train + train_exp # for each experience + adapt_train_dataset + train_dataset_adaptation + make_train_dataloader + train_pass # for each pass + # forward + # backward + # model update + + **Evaluation loop** + The evaluation loop is organized as follows:: + + eval + eval_exp # for each experience + adapt_eval_dataset + eval_dataset_adaptation + make_eval_dataloader + eval_epoch # for each epoch + # forward + # backward + # model update + + """ + + PLUGIN_CLASS = SupervisedPlugin + + def __init__( + self, + model: Module, + optimizer: Optimizer, + criterion=CrossEntropyLoss(), + train_mb_size: int = 1, + train_passes: int = 1, + eval_mb_size: Optional[int] = 1, + device="cpu", + plugins: Optional[Sequence["SupervisedPlugin"]] = None, + evaluator=default_evaluator, + eval_every=-1, + peval_mode="experience", + ): + """Init. + + :param model: PyTorch model. + :param optimizer: PyTorch optimizer. + :param criterion: loss function. + :param train_mb_size: mini-batch size for training. + :param train_passes: number of training passes. + :param eval_mb_size: mini-batch size for eval. + :param device: PyTorch device where the model will be allocated. + :param plugins: (optional) list of StrategyPlugins. + :param evaluator: (optional) instance of EvaluationPlugin for logging + and metric computations. None to remove logging. + :param eval_every: the frequency of the calls to `eval` inside the + training loop. -1 disables the evaluation. 0 means `eval` is called + only at the end of the learning experience. Values >0 mean that + `eval` is called every `eval_every` experiences and at the end of + the learning experience. + :param peval_mode: one of {'experience', 'iteration'}. Decides whether + the periodic evaluation during training should execute every + `eval_every` experience or iterations (Default='experience'). + """ + super().__init__( + model=model, + optimizer=optimizer, + criterion=criterion, + train_mb_size=train_mb_size, + train_epochs=1, + eval_mb_size=eval_mb_size, + device=device, + plugins=plugins, + evaluator=evaluator, + eval_every=eval_every, + peval_mode=peval_mode, + ) + + self.train_passes = train_passes diff --git a/avalanche/training/templates/observation_type/__init__.py b/avalanche/training/templates/observation_type/__init__.py new file mode 100644 index 000000000..4391bfbd7 --- /dev/null +++ b/avalanche/training/templates/observation_type/__init__.py @@ -0,0 +1,6 @@ +"""Observation types mainly define the way data samples are observed: + batch(multiple epochs) vs. online(one epoch) + +""" +from .batch_observation import BatchObservation +from .online_observation import OnlineObservation diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py new file mode 100644 index 000000000..4f67ef91b --- /dev/null +++ b/avalanche/training/templates/observation_type/batch_observation.py @@ -0,0 +1,46 @@ +from typing import Iterable + +from avalanche.benchmarks import CLExperience +from avalanche.models.dynamic_optimizers import reset_optimizer + + +class BatchObservation: + def _train_exp( + self, experience: CLExperience, eval_streams=None, **kwargs + ): + """Training loop over a single Experience object. + + :param experience: CL experience information. + :param eval_streams: list of streams for evaluation. + If None: use the training experience for evaluation. + Use [] if you do not want to evaluate during training. + :param kwargs: custom arguments. + """ + if eval_streams is None: + eval_streams = [experience] + for i, exp in enumerate(eval_streams): + if not isinstance(exp, Iterable): + eval_streams[i] = [exp] + for _ in range(self.train_epochs): + self._before_training_epoch(**kwargs) + + if self._stop_training: # Early stopping + self._stop_training = False + break + + self.training_epoch(**kwargs) + self._after_training_epoch(**kwargs) + + def make_optimizer(self): + """Optimizer initialization. + + Called before each training experiene to configure the optimizer. + """ + # we reset the optimizer's state after each experience. + # This allows to add new parameters (new heads) and + # freezing old units during the model's adaptation phase. + reset_optimizer(self.optimizer, self.model) + + def maybe_adapt_model_and_make_optimizer(self): + self.model = self.model_adaptation() + self.make_optimizer() diff --git a/avalanche/training/templates/observation_type/online_observation.py b/avalanche/training/templates/observation_type/online_observation.py new file mode 100644 index 000000000..faf98dca0 --- /dev/null +++ b/avalanche/training/templates/observation_type/online_observation.py @@ -0,0 +1,58 @@ +from typing import Iterable + +from avalanche.benchmarks import OnlineCLExperience +from avalanche.models.dynamic_optimizers import reset_optimizer +from avalanche.models.dynamic_optimizers import update_optimizer + + +class OnlineObservation: + def _train_exp( + self, experience: OnlineCLExperience, eval_streams=None, **kwargs + ): + """Training loop over a single Experience object. + + :param experience: Online CL experience information. + :param eval_streams: list of streams for evaluation. + If None: use the training experience for evaluation. + Use [] if you do not want to evaluate during training. + :param kwargs: custom arguments. + """ + if eval_streams is None: + eval_streams = [experience] + for i, exp in enumerate(eval_streams): + if not isinstance(exp, Iterable): + eval_streams[i] = [exp] + + self.training_epoch(**kwargs) + + def make_optimizer(self): + """Optimizer initialization. + + Called before each training experience to configure the optimizer. + """ + # We reset the optimizer's state after each experience if task + # boundaries are given, otherwise it updates the optimizer only if + # new parameters are added to the model after each adaptation step. + + # We assume the current experience is an OnlineCLExperience: + if self.experience.access_task_boundaries: + reset_optimizer(self.optimizer, self.model) + + else: + update_optimizer(self.optimizer, + self.model_params_before_adaptation, + self.model.parameters(), + reset_state=False) + + def maybe_adapt_model_and_make_optimizer(self): + # If strategy has access to the task boundaries, and the current + # sub-experience is the first sub-experience in the online (sub-)stream, + # then adapt the model with the full origin experience: + if self.experience.access_task_boundaries: + if self.experience.is_first_subexp: + self.model = self.model_adaptation() + self.make_optimizer() + # Otherwise, adapt to the current sub-experience: + else: + self.model = self.model_adaptation() + self.make_optimizer() diff --git a/avalanche/training/templates/problem_type/__init__.py b/avalanche/training/templates/problem_type/__init__.py new file mode 100644 index 000000000..0932beb4c --- /dev/null +++ b/avalanche/training/templates/problem_type/__init__.py @@ -0,0 +1,5 @@ +"""Problem types mainly define the properties and criterions depending on + how inputs should be mapped to outputs. + +""" +from .supervised_problem import SupervisedProblem diff --git a/avalanche/training/templates/problem_type/supervised_problem.py b/avalanche/training/templates/problem_type/supervised_problem.py new file mode 100644 index 000000000..9432e04ef --- /dev/null +++ b/avalanche/training/templates/problem_type/supervised_problem.py @@ -0,0 +1,31 @@ +from avalanche.models import avalanche_forward + + +class SupervisedProblem: + @property + def mb_x(self): + """Current mini-batch input.""" + return self.mbatch[0] + + @property + def mb_y(self): + """Current mini-batch target.""" + return self.mbatch[1] + + @property + def mb_task_id(self): + """Current mini-batch task labels.""" + assert len(self.mbatch) >= 3 + return self.mbatch[-1] + + def criterion(self): + """Loss function for supervised problems.""" + return self._criterion(self.mb_output, self.mb_y) + + def forward(self): + """Compute the model's output given the current mini-batch.""" + return avalanche_forward(self.model, self.mb_x, self.mb_task_id) + + def _check_minibatch(self): + """Check if the current mini-batch has 3 components.""" + assert len(self.mbatch) >= 3 diff --git a/avalanche/training/templates/update_type/__init__.py b/avalanche/training/templates/update_type/__init__.py new file mode 100644 index 000000000..3cc498524 --- /dev/null +++ b/avalanche/training/templates/update_type/__init__.py @@ -0,0 +1,5 @@ +"""Update types define how the model is updated for every batch of data. +""" + +from .sgd_update import SGDUpdate +from .meta_update import MetaUpdate diff --git a/avalanche/training/templates/update_type/meta_update.py b/avalanche/training/templates/update_type/meta_update.py new file mode 100644 index 000000000..a04559189 --- /dev/null +++ b/avalanche/training/templates/update_type/meta_update.py @@ -0,0 +1,27 @@ + +class MetaUpdate: + def training_epoch(self, **kwargs): + """Training epoch. + + :param kwargs: + :return: + """ + for self.mbatch in self.dataloader: + if self._stop_training: + break + + self._unpack_minibatch() + self._before_training_iteration(**kwargs) + + self.optimizer.zero_grad() + self.loss = 0 + + # Fast updates + self._before_fast_update(**kwargs) + self._after_fast_updates(**kwargs) + + # Slow updates + self._before_slow_update(**kwargs) + self._after_slow_updates(**kwargs) + + self._after_training_iteration(**kwargs) diff --git a/avalanche/training/templates/update_type/sgd_update.py b/avalanche/training/templates/update_type/sgd_update.py new file mode 100644 index 000000000..d85365f49 --- /dev/null +++ b/avalanche/training/templates/update_type/sgd_update.py @@ -0,0 +1,36 @@ + +class SGDUpdate: + def training_epoch(self, **kwargs): + """Training epoch. + + :param kwargs: + :return: + """ + for self.mbatch in self.dataloader: + if self._stop_training: + break + + self._unpack_minibatch() + self._before_training_iteration(**kwargs) + + self.optimizer.zero_grad() + self.loss = 0 + + # Forward + self._before_forward(**kwargs) + self.mb_output = self.forward() + self._after_forward(**kwargs) + + # Loss & Backward + self.loss += self.criterion() + + self._before_backward(**kwargs) + self.backward() + self._after_backward(**kwargs) + + # Optimization step + self._before_update(**kwargs) + self.optimizer_step() + self._after_update(**kwargs) + + self._after_training_iteration(**kwargs) From 38e0cef559cf6a4057bcd376517074a674fdeb31 Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Tue, 12 Jul 2022 16:52:41 +0200 Subject: [PATCH 02/10] Syntax fix --- avalanche/training/templates/base_general_sgd.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/avalanche/training/templates/base_general_sgd.py b/avalanche/training/templates/base_general_sgd.py index 954bfe9d0..f20a6414b 100644 --- a/avalanche/training/templates/base_general_sgd.py +++ b/avalanche/training/templates/base_general_sgd.py @@ -488,14 +488,16 @@ def _maybe_peval(self, strategy, counter, **kwargs): if self.eval_every > 0 and counter % self.eval_every == 0: self._peval(strategy, **kwargs) - def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs): + def after_training_epoch(self, strategy: "BaseGeneralSGDTemplate", + **kwargs): """Periodic eval controlled by `self.eval_every` and `self.peval_mode`.""" if self.peval_mode == "epoch": self._maybe_peval(strategy, strategy.clock.train_exp_epochs, **kwargs) - def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs): + def after_training_iteration(self, strategy: "BaseGeneralSGDTemplate", + **kwargs): """Periodic eval controlled by `self.eval_every` and `self.peval_mode`.""" if self.peval_mode == "iteration": From 346017beb1a6d259cc3133c853063ed6118e19fa Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Sun, 24 Jul 2022 20:36:47 +0200 Subject: [PATCH 03/10] Update meta-learning template and add examples --- avalanche/NEW_core.py | 226 ++++++++++++++++ avalanche/training/plugins/NEW_lamaml.py | 251 ++++++++++++++++++ ...ppers_temp.py => NEW_strategy_wrappers.py} | 2 +- .../{base_general_sgd.py => NEW_base_sgd.py} | 3 +- ...n_templates.py => NEW_common_templates.py} | 129 ++++++++- .../templates/update_type/meta_update.py | 36 ++- examples/NEW_lamaml.py | 73 +++++ examples/NEW_naive.py | 59 ++++ examples/NEW_online_naive.py | 137 ++++++++++ 9 files changed, 900 insertions(+), 16 deletions(-) create mode 100644 avalanche/NEW_core.py create mode 100644 avalanche/training/plugins/NEW_lamaml.py rename avalanche/training/supervised/{strategy_wrappers_temp.py => NEW_strategy_wrappers.py} (98%) rename avalanche/training/templates/{base_general_sgd.py => NEW_base_sgd.py} (99%) rename avalanche/training/templates/{common_templates.py => NEW_common_templates.py} (63%) create mode 100644 examples/NEW_lamaml.py create mode 100644 examples/NEW_naive.py create mode 100644 examples/NEW_online_naive.py diff --git a/avalanche/NEW_core.py b/avalanche/NEW_core.py new file mode 100644 index 000000000..baf59efb6 --- /dev/null +++ b/avalanche/NEW_core.py @@ -0,0 +1,226 @@ +from abc import ABC +from typing import TypeVar, Generic +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from avalanche.training.templates.base import BaseTemplate + +CallbackResult = TypeVar("CallbackResult") +Template = TypeVar("Template", bound="BaseTemplate") + + +class BasePlugin(Generic[Template], ABC): + """ABC for BaseTemplate plugins. + + A plugin is simply an object implementing some strategy callbacks. + Plugins are called automatically during the strategy execution. + + Callbacks provide access before/after each phase of the execution. + In general, for each method of the training and evaluation loops, + `StrategyCallbacks` + provide two functions `before_{method}` and `after_{method}`, called + before and after the method, respectively. + Therefore plugins can "inject" additional code by implementing callbacks. + Each callback has a `strategy` argument that gives access to the state. + + In Avalanche, callbacks are used to implement continual strategies, metrics + and loggers. + """ + + def __init__(self): + pass + + def before_training(self, strategy: Template, *args, **kwargs): + """Called before `train` by the `BaseTemplate`.""" + pass + + def before_training_exp(self, strategy: Template, *args, **kwargs): + """Called before `train_exp` by the `BaseTemplate`.""" + pass + + def after_training_exp(self, strategy: Template, *args, **kwargs): + """Called after `train_exp` by the `BaseTemplate`.""" + pass + + def after_training(self, strategy: Template, *args, **kwargs): + """Called after `train` by the `BaseTemplate`.""" + pass + + def before_eval( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `eval` by the `BaseTemplate`.""" + pass + + def before_eval_exp( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `eval_exp` by the `BaseTemplate`.""" + pass + + def after_eval_exp( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `eval_exp` by the `BaseTemplate`.""" + pass + + def after_eval(self, strategy: Template, *args, **kwargs) -> CallbackResult: + """Called after `eval` by the `BaseTemplate`.""" + pass + + +class BaseSGDPlugin(BasePlugin[Template], ABC): + """ABC for BaseSGDTemplate plugins. + + See `BaseSGDTemplate` for complete description of the train/eval loop. + """ + + def before_training_epoch( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `train_epoch` by the `BaseTemplate`.""" + pass + + def before_training_iteration( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before the start of a training iteration by the + `BaseTemplate`.""" + pass + + def before_forward( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `model.forward()` by the `BaseTemplate`.""" + pass + + def after_forward( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `model.forward()` by the `BaseTemplate`.""" + pass + + def before_backward( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `criterion.backward()` by the `BaseTemplate`.""" + pass + + def after_backward( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `criterion.backward()` by the `BaseTemplate`.""" + pass + + def after_training_iteration( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after the end of a training iteration by the + `BaseTemplate`.""" + pass + + def before_update( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `optimizer.update()` by the `BaseTemplate`.""" + pass + + def after_update( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `optimizer.update()` by the `BaseTemplate`.""" + pass + + def after_training_epoch( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `train_epoch` by the `BaseTemplate`.""" + pass + + def before_eval_iteration( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before the start of a training iteration by the + `BaseTemplate`.""" + pass + + def before_eval_forward( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `model.forward()` by the `BaseTemplate`.""" + pass + + def after_eval_forward( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `model.forward()` by the `BaseTemplate`.""" + pass + + def after_eval_iteration( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after the end of an iteration by the + `BaseTemplate`.""" + pass + + def before_train_dataset_adaptation( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `train_dataset_adapatation` by the `BaseTemplate`.""" + pass + + def after_train_dataset_adaptation( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `train_dataset_adapatation` by the `BaseTemplate`.""" + pass + + def before_eval_dataset_adaptation( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `eval_dataset_adaptation` by the `BaseTemplate`.""" + pass + + def after_eval_dataset_adaptation( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called after `eval_dataset_adaptation` by the `BaseTemplate`.""" + pass + + # ====================================================================> NEW + + def before_inner_updates( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_inner_updates` by the `BaseTemplate`.""" + pass + + def inner_updates( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_inner_updates` by the `BaseTemplate`.""" + pass + + def after_inner_updates( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_outer_updates` by the `BaseTemplate`.""" + pass + + def before_outer_update( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_outer_updates` by the `BaseTemplate`.""" + pass + + def outer_update( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_inner_updates` by the `BaseTemplate`.""" + pass + + def after_outer_update( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_outer_updates` by the `BaseTemplate`.""" + pass diff --git a/avalanche/training/plugins/NEW_lamaml.py b/avalanche/training/plugins/NEW_lamaml.py new file mode 100644 index 000000000..a44e8057d --- /dev/null +++ b/avalanche/training/plugins/NEW_lamaml.py @@ -0,0 +1,251 @@ +from typing import TYPE_CHECKING + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import math + +try: + import higher +except ImportError: + raise ModuleNotFoundError("higher not found, if you want to use " + "MAML please install avalanche with " + "the extra dependencies: " + "pip install avalanche-lib[extra]") + +from avalanche.NEW_core import BaseSGDPlugin +from avalanche.models.utils import avalanche_forward + + +class LaMAMLPlugin(BaseSGDPlugin): + """LaMAML Plugin. + """ + + def __init__( + self, + n_inner_updates: int = 5, + second_order: bool = True, + grad_clip_norm: float = 1.0, + learn_lr: bool = True, + lr_alpha: float = 0.25, + sync_update: bool = False, + alpha_init: float = 0.1, + ): + """Implementation of Look-ahead MAML (LaMAML) algorithm in Avalanche + using Higher library for applying fast updates. + + :param n_inner_updates: number of inner updates. + :param second_order: If True, it computes the second-order derivative + of the inner update trajectory for the meta-loss. Otherwise, + it computes the meta-loss with a first-order approximation. + :param grad_clip_norm: gradient clipping norm. + :param learn_lr: if True, it learns the LR for each batch of data. + :param lr_alpha: LR for learning the main update's learning rate. + :param sync_update: if True, it updates the meta-model with a fixed + learning rate. Mutually exclusive with learn_lr and + lr_alpha. + :param alpha_init: initialization value for learnable LRs. + + """ + + super().__init__() + + self.n_inner_updates = n_inner_updates + self.second_order = second_order + self.grad_clip_norm = grad_clip_norm + self.learn_lr = learn_lr + self.lr_alpha = lr_alpha + self.sync_update = sync_update + self.alpha_init = alpha_init + self.alpha_params = None + self.is_model_initialized = False + + def before_training(self, strategy, **kwargs): + if not self.is_model_initialized: + strategy.model.apply(init_kaiming_normal) + self.is_model_initialized = True + + def before_training_exp(self, strategy, **kwargs): + # Initialize alpha-lr parameters + if self.alpha_params is None: + self.alpha_params = nn.ParameterList([]) + # Iterate through model parameters and add the corresponding + # alpha_lr parameter + for p in strategy.model.parameters(): + alpha_param = nn.Parameter( + torch.ones(p.shape) * self.alpha_init, requires_grad=True + ) + self.alpha_params.append(alpha_param) + self.alpha_params.to(strategy.device) + + # Create optimizer for the alpha_lr parameters + self.optimizer_alpha = torch.optim.SGD( + self.alpha_params.parameters(), lr=self.lr_alpha + ) + + # For task-incremental heads: + # If new parameters are added to the model, update alpha_lr + # parameters respectively + if len(self.alpha_params) < len(list(strategy.model.parameters())): + for iter_p, p in enumerate(strategy.model.parameters()): + # Skip the older parameters + if iter_p < len(self.alpha_params): + continue + # Add new alpha_lr for the new parameter + alpha_param = nn.Parameter( + torch.ones(p.shape) * self.alpha_init, requires_grad=True + ) + self.alpha_params.append(alpha_param) + + self.alpha_params.to(strategy.device) + # Re-init optimizer for the new set of alpha_lr parameters + self.optimizer_alpha = torch.optim.SGD( + self.alpha_params.parameters(), lr=self.lr_alpha + ) + + def before_inner_updates(self, strategy, **kwargs): + # Create a stateless copy of the model for inner-updates + self.fast_model = higher.patch.monkeypatch( + strategy.model, + copy_initial_weights=True, + track_higher_grads=self.second_order, + ) + if strategy.clock.train_exp_counter > 0: + self.batch_x = strategy.mb_x[: strategy.train_mb_size] + self.batch_y = strategy.mb_y[: strategy.train_mb_size] + self.batch_t = strategy.mb_task_id[: strategy.train_mb_size] + else: + self.batch_x = strategy.mb_x + self.batch_y = strategy.mb_y + self.batch_t = strategy.mb_task_id + + bsize_data = self.batch_x.shape[0] + self.rough_sz = math.ceil(bsize_data / self.n_inner_updates) + self.meta_losses = [0 for _ in range(self.n_inner_updates)] + + def single_inner_update(self, x, y, t, criterion): + logits = avalanche_forward(self.fast_model, x, t) + loss = criterion(logits, y) + + # Compute gradient with respect to the current fast weights + grads = list( + torch.autograd.grad( + loss, + self.fast_model.fast_params, + create_graph=self.second_order, + retain_graph=self.second_order, + allow_unused=True, + ) + ) + + # Clip grad norms + grads = [ + torch.clamp(g, min=-self.grad_clip_norm, max=self.grad_clip_norm) + if g is not None + else g + for g in grads + ] + + # New fast parameters + new_fast_params = [ + param - alpha * grad if grad is not None else param + for (param, alpha, grad) in zip( + self.fast_model.fast_params, self.alpha_params.parameters(), + grads + ) + ] + + # Update fast model's weights + self.fast_model.update_params(new_fast_params) + + def inner_updates(self, strategy, **kwargs): + """Update fast weights using current samples and + return the updated fast model. + """ + for i in range(self.n_inner_updates): + batch_x_i = self.batch_x[i * self.rough_sz: + (i + 1) * self.rough_sz] + batch_y_i = self.batch_y[i * self.rough_sz: + (i + 1) * self.rough_sz] + batch_t_i = self.batch_t[i * self.rough_sz: + (i + 1) * self.rough_sz] + + # We assume that samples for inner update are from the same task + self.single_inner_update(batch_x_i, batch_y_i, batch_t_i, + strategy._criterion) + + # Compute meta-loss with the combination of batch and buffer samples + logits_meta = avalanche_forward( + self.fast_model, strategy.mb_x, strategy.mb_task_id + ) + meta_loss = strategy._criterion(logits_meta, strategy.mb_y) + self.meta_losses[i] = meta_loss + + def apply_grad(self, module, grads, device): + for i, p in enumerate(module.parameters()): + grad = grads[i] + if grad is None: + grad = torch.zeros(p.shape).float().to(device) + + if p.grad is None: + p.grad = grad + else: + p.grad += grad + + def outer_update(self, strategy, **kwargs): + # Compute meta-gradient for the main model + meta_loss = sum(self.meta_losses) / len(self.meta_losses) + meta_grad_model = torch.autograd.grad( + meta_loss, + self.fast_model.parameters(time=0), + retain_graph=True, + allow_unused=True, + ) + strategy.model.zero_grad() + self.apply_grad(strategy.model, meta_grad_model, strategy.device) + + # Clip gradients + torch.nn.utils.clip_grad_norm_( + strategy.model.parameters(), self.grad_clip_norm + ) + + if self.learn_lr: + # Compute meta-gradient for alpha-lr parameters + meta_grad_alpha = torch.autograd.grad( + meta_loss, self.alpha_params.parameters(), allow_unused=True + ) + self.alpha_params.zero_grad() + self.apply_grad(self.alpha_params, meta_grad_alpha, strategy.device) + + torch.nn.utils.clip_grad_norm_( + self.alpha_params.parameters(), self.grad_clip_norm + ) + self.optimizer_alpha.step() + + # If sync-update: update with self.optimizer + # o.w: use the learned LRs to update the model + if self.sync_update: + self.optimizer.step() + else: + for p, alpha in zip( + strategy.model.parameters(), self.alpha_params.parameters() + ): + # Use relu on updated LRs to avoid negative values + p.data = p.data - p.grad * F.relu(alpha) + + strategy.loss = meta_loss + + +def init_kaiming_normal(m): + if isinstance(m, nn.Conv2d): + torch.nn.init.constant_(m.weight.data, 1.0) + torch.nn.init.kaiming_normal_(m.weight.data) + if m.bias is not None: + m.bias.data.zero_() + + elif isinstance(m, nn.Linear): + torch.nn.init.constant_(m.weight.data, 1.0) + torch.nn.init.kaiming_normal_(m.weight.data) + if m.bias is not None: + m.bias.data.zero_() diff --git a/avalanche/training/supervised/strategy_wrappers_temp.py b/avalanche/training/supervised/NEW_strategy_wrappers.py similarity index 98% rename from avalanche/training/supervised/strategy_wrappers_temp.py rename to avalanche/training/supervised/NEW_strategy_wrappers.py index 19bcbba36..e5abd5119 100644 --- a/avalanche/training/supervised/strategy_wrappers_temp.py +++ b/avalanche/training/supervised/NEW_strategy_wrappers.py @@ -3,7 +3,7 @@ from torch.optim import Optimizer from avalanche.training.plugins import EvaluationPlugin -from avalanche.training.templates.common_templates import ( +from avalanche.training.templates.NEW_common_templates import ( SupervisedTemplate, OnlineSupervisedTemplate ) diff --git a/avalanche/training/templates/base_general_sgd.py b/avalanche/training/templates/NEW_base_sgd.py similarity index 99% rename from avalanche/training/templates/base_general_sgd.py rename to avalanche/training/templates/NEW_base_sgd.py index f20a6414b..8b5545aa3 100644 --- a/avalanche/training/templates/base_general_sgd.py +++ b/avalanche/training/templates/NEW_base_sgd.py @@ -18,7 +18,7 @@ from avalanche.training.utils import trigger_plugins -class BaseGeneralSGDTemplate(BaseTemplate): +class BaseSGDTemplate(BaseTemplate): """Base SGD class for continual learning skeletons. **Training loop** @@ -142,6 +142,7 @@ def train(self, eval_streams: Optional[Sequence[Union[CLExperience, ExpSequence]]] = None, **kwargs): + super().train(experiences, eval_streams, **kwargs) return self.evaluator.get_last_metrics() diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/NEW_common_templates.py similarity index 63% rename from avalanche/training/templates/common_templates.py rename to avalanche/training/templates/NEW_common_templates.py index f5b055e17..eb3fdd6be 100644 --- a/avalanche/training/templates/common_templates.py +++ b/avalanche/training/templates/NEW_common_templates.py @@ -3,17 +3,17 @@ from torch.nn import Module, CrossEntropyLoss from torch.optim import Optimizer -from avalanche.training.plugins import SupervisedPlugin +from avalanche.core import BaseSGDPlugin from avalanche.training.plugins.evaluation import default_evaluator from .observation_type import * from .problem_type import * from .update_type import * -from .base_general_sgd import BaseGeneralSGDTemplate +from .NEW_base_sgd import BaseSGDTemplate class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate, - BaseGeneralSGDTemplate): + BaseSGDTemplate): """Base class for continual learning strategies. BaseTemplate is the super class of all task-based continual learning @@ -60,7 +60,7 @@ class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate, """ - PLUGIN_CLASS = SupervisedPlugin + PLUGIN_CLASS = BaseSGDPlugin def __init__( self, @@ -71,7 +71,120 @@ def __init__( train_epochs: int = 1, eval_mb_size: Optional[int] = 1, device="cpu", - plugins: Optional[Sequence["SupervisedPlugin"]] = None, + plugins: Optional[Sequence["BaseSGDPlugin"]] = None, + evaluator=default_evaluator, + eval_every=-1, + peval_mode="epoch", + ): + """Init. + + :param model: PyTorch model. + :param optimizer: PyTorch optimizer. + :param criterion: loss function. + :param train_mb_size: mini-batch size for training. + :param train_epochs: number of training epochs. + :param eval_mb_size: mini-batch size for eval. + :param device: PyTorch device where the model will be allocated. + :param plugins: (optional) list of StrategyPlugins. + :param evaluator: (optional) instance of EvaluationPlugin for logging + and metric computations. None to remove logging. + :param eval_every: the frequency of the calls to `eval` inside the + training loop. -1 disables the evaluation. 0 means `eval` is called + only at the end of the learning experience. Values >0 mean that + `eval` is called every `eval_every` epochs and at the end of the + learning experience. + :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the + periodic evaluation during training should execute every + `eval_every` epochs or iterations (Default='epoch'). + """ + super().__init__( + model=model, + optimizer=optimizer, + criterion=criterion, + train_mb_size=train_mb_size, + train_epochs=train_epochs, + eval_mb_size=eval_mb_size, + device=device, + plugins=plugins, + evaluator=evaluator, + eval_every=eval_every, + peval_mode=peval_mode, + ) + ################################################################### + # State variables. These are updated during the train/eval loops. # + ################################################################### + + # self.adapted_dataset = None + # """ Data used to train. It may be modified by plugins. Plugins can + # append data to it (e.g. for replay). + # + # .. note:: + # + # This dataset may contain samples from different experiences. If you + # want the original data for the current experience + # use :attr:`.BaseTemplate.experience`. + + +class SupervisedMetaLearningTemplate(BatchObservation, SupervisedProblem, + MetaUpdate, BaseSGDTemplate): + """Base class for continual learning strategies. + + BaseTemplate is the super class of all task-based continual learning + strategies. It implements a basic training loop and callback system + that allows to execute code at each experience of the training loop. + Plugins can be used to implement callbacks to augment the training + loop with additional behavior (e.g. a memory buffer for replay). + + **Scenarios** + This strategy supports several continual learning scenarios: + + * class-incremental scenarios (no task labels) + * multi-task scenarios, where task labels are provided) + * multi-incremental scenarios, where the same task may be revisited + + The exact scenario depends on the data stream and whether it provides + the task labels. + + **Training loop** + The training loop is organized as follows:: + + train + train_exp # for each experience + adapt_train_dataset + train_dataset_adaptation + make_train_dataloader + train_epoch # for each epoch + # forward + # backward + # model update + + **Evaluation loop** + The evaluation loop is organized as follows:: + + eval + eval_exp # for each experience + adapt_eval_dataset + eval_dataset_adaptation + make_eval_dataloader + eval_epoch # for each epoch + # forward + # backward + # model update + + """ + + PLUGIN_CLASS = BaseSGDPlugin + + def __init__( + self, + model: Module, + optimizer: Optimizer, + criterion=CrossEntropyLoss(), + train_mb_size: int = 1, + train_epochs: int = 1, + eval_mb_size: Optional[int] = 1, + device="cpu", + plugins: Optional[Sequence["BaseSGDPlugin"]] = None, evaluator=default_evaluator, eval_every=-1, peval_mode="epoch", @@ -126,7 +239,7 @@ def __init__( class OnlineSupervisedTemplate(OnlineObservation, SupervisedProblem, SGDUpdate, - BaseGeneralSGDTemplate): + BaseSGDTemplate): """Base class for continual learning strategies. BaseTemplate is the super class of all task-based continual learning @@ -173,7 +286,7 @@ class OnlineSupervisedTemplate(OnlineObservation, SupervisedProblem, SGDUpdate, """ - PLUGIN_CLASS = SupervisedPlugin + PLUGIN_CLASS = BaseSGDPlugin def __init__( self, @@ -184,7 +297,7 @@ def __init__( train_passes: int = 1, eval_mb_size: Optional[int] = 1, device="cpu", - plugins: Optional[Sequence["SupervisedPlugin"]] = None, + plugins: Optional[Sequence["BaseSGDPlugin"]] = None, evaluator=default_evaluator, eval_every=-1, peval_mode="experience", diff --git a/avalanche/training/templates/update_type/meta_update.py b/avalanche/training/templates/update_type/meta_update.py index a04559189..cc1090385 100644 --- a/avalanche/training/templates/update_type/meta_update.py +++ b/avalanche/training/templates/update_type/meta_update.py @@ -1,3 +1,5 @@ +from avalanche.training.utils import trigger_plugins + class MetaUpdate: def training_epoch(self, **kwargs): @@ -16,12 +18,34 @@ def training_epoch(self, **kwargs): self.optimizer.zero_grad() self.loss = 0 - # Fast updates - self._before_fast_update(**kwargs) - self._after_fast_updates(**kwargs) + # Inner updates + self._before_inner_updates(**kwargs) + self._inner_updates(**kwargs) + self._after_inner_updates(**kwargs) + + # Outer update + self._before_outer_update(**kwargs) + self._outer_update(**kwargs) + self._after_outer_update(**kwargs) - # Slow updates - self._before_slow_update(**kwargs) - self._after_slow_updates(**kwargs) + self.mb_output = self.forward() self._after_training_iteration(**kwargs) + + def _before_inner_updates(self, **kwargs): + trigger_plugins(self, "before_inner_updates", **kwargs) + + def _inner_updates(self, **kwargs): + trigger_plugins(self, "inner_updates", **kwargs) + + def _after_inner_updates(self, **kwargs): + trigger_plugins(self, "after_inner_updates", **kwargs) + + def _before_outer_update(self, **kwargs): + trigger_plugins(self, "before_outer_update", **kwargs) + + def _outer_update(self, **kwargs): + trigger_plugins(self, "outer_update", **kwargs) + + def _after_outer_update(self, **kwargs): + trigger_plugins(self, "after_outer_update", **kwargs) diff --git a/examples/NEW_lamaml.py b/examples/NEW_lamaml.py new file mode 100644 index 000000000..ec456461d --- /dev/null +++ b/examples/NEW_lamaml.py @@ -0,0 +1,73 @@ +import torch +from os.path import expanduser + +from avalanche.models import MTSimpleMLP +from avalanche.evaluation.metrics import ( + accuracy_metrics, + loss_metrics, +) +from avalanche.training.plugins import EvaluationPlugin +from avalanche.benchmarks.classic import SplitMNIST +from avalanche.logging import InteractiveLogger +from avalanche.training.templates.NEW_common_templates import ( + SupervisedMetaLearningTemplate +) +from avalanche.training.plugins.NEW_lamaml import LaMAMLPlugin + + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + scenario = SplitMNIST( + n_experiences=5, + dataset_root=expanduser("~") + "/.avalanche/data/mnist/", + return_task_id=True + ) + + # choose some metrics and evaluation method + interactive_logger = InteractiveLogger() + eval_plugin = EvaluationPlugin( + accuracy_metrics( + minibatch=True, epoch=True, experience=True, stream=True + ), + loss_metrics(minibatch=True, epoch=True, experience=True, stream=True), + loggers=[interactive_logger], + ) + + model = MTSimpleMLP(hidden_size=128) + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + criterion = torch.nn.CrossEntropyLoss() + + # LaMAML plugin + lamaml_plugin = LaMAMLPlugin( + n_inner_updates=5, + second_order=True, + grad_clip_norm=1.0, + learn_lr=True, + lr_alpha=0.25, + sync_update=False, + alpha_init=0.1, + ) + + # create strategy + strategy = SupervisedMetaLearningTemplate( + model, + optimizer, + criterion, + train_epochs=1, + device=device, + train_mb_size=32, + evaluator=eval_plugin, + plugins=[lamaml_plugin] + ) + + # train on the selected scenario with the chosen strategy + for experience in scenario.train_stream: + print("Start training on experience ", experience.current_experience) + strategy.train(experience) + strategy.eval(scenario.test_stream[:]) + + +if __name__ == "__main__": + main() diff --git a/examples/NEW_naive.py b/examples/NEW_naive.py new file mode 100644 index 000000000..5aa29388a --- /dev/null +++ b/examples/NEW_naive.py @@ -0,0 +1,59 @@ +import torch +from os.path import expanduser + +from avalanche.models import SimpleMLP +from avalanche.evaluation.metrics import ( + accuracy_metrics, + loss_metrics, +) +from avalanche.training.plugins import EvaluationPlugin +from avalanche.benchmarks.classic import SplitMNIST +from avalanche.logging import InteractiveLogger +from avalanche.training.supervised.NEW_strategy_wrappers import ( + Naive +) + + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + scenario = SplitMNIST( + n_experiences=5, + dataset_root=expanduser("~") + "/.avalanche/data/mnist/" + ) + + # choose some metrics and evaluation method + interactive_logger = InteractiveLogger() + eval_plugin = EvaluationPlugin( + accuracy_metrics( + minibatch=True, epoch=True, experience=True, stream=True + ), + loss_metrics(minibatch=True, epoch=True, experience=True, stream=True), + loggers=[interactive_logger], + ) + + model = SimpleMLP(hidden_size=128) + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + criterion = torch.nn.CrossEntropyLoss() + + # create strategy + strategy = Naive( + model, + optimizer, + criterion, + train_epochs=1, + device=device, + train_mb_size=32, + evaluator=eval_plugin, + ) + + # train on the selected scenario with the chosen strategy + for experience in scenario.train_stream: + print("Start training on experience ", experience.current_experience) + strategy.train(experience) + strategy.eval(scenario.test_stream[:]) + + +if __name__ == "__main__": + main() diff --git a/examples/NEW_online_naive.py b/examples/NEW_online_naive.py new file mode 100644 index 000000000..249bf9437 --- /dev/null +++ b/examples/NEW_online_naive.py @@ -0,0 +1,137 @@ +################################################################################ +# Copyright (c) 2021 ContinualAI. # +# Copyrights licensed under the MIT License. # +# See the accompanying LICENSE file for terms. # +# # +# Date: 12-10-2020 # +# Author(s): Vincenzo Lomonaco, Hamed Hemati # +# E-mail: contact@continualai.org # +# Website: avalanche.continualai.org # +################################################################################ + +""" +This is a simple example on how to use the Naive strategy. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from os.path import expanduser + +import argparse +import torch +from torch.nn import CrossEntropyLoss +from torchvision import transforms +from torchvision.datasets import MNIST +from torchvision.transforms import ToTensor, RandomCrop +import torch.optim.lr_scheduler +from avalanche.benchmarks import nc_benchmark +from avalanche.models import SimpleMLP +from avalanche.training.supervised.NEW_strategy_wrappers import OnlineNaive +from avalanche.benchmarks.scenarios.online_scenario import OnlineCLScenario +from avalanche.evaluation.metrics import ( + forgetting_metrics, + accuracy_metrics, + loss_metrics, +) +from avalanche.logging import InteractiveLogger +from avalanche.training.plugins import EvaluationPlugin + + +def main(args): + # --- CONFIG + device = torch.device( + f"cuda:{args.cuda}" + if torch.cuda.is_available() and args.cuda >= 0 + else "cpu" + ) + n_batches = 5 + # --------- + + # --- TRANSFORMATIONS + train_transform = transforms.Compose( + [ + RandomCrop(28, padding=4), + ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + ] + ) + test_transform = transforms.Compose( + [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ) + # --------- + + # --- SCENARIO CREATION + mnist_train = MNIST( + root=expanduser("~") + "/.avalanche/data/mnist/", + train=True, + download=True, + transform=train_transform, + ) + mnist_test = MNIST( + root=expanduser("~") + "/.avalanche/data/mnist/", + train=False, + download=True, + transform=test_transform, + ) + scenario = nc_benchmark( + mnist_train, mnist_test, n_batches, task_labels=False, seed=1234 + ) + # --------- + + # MODEL CREATION + model = SimpleMLP(num_classes=scenario.n_classes) + + # choose some metrics and evaluation method + interactive_logger = InteractiveLogger() + + eval_plugin = EvaluationPlugin( + accuracy_metrics( + minibatch=True, epoch=True, experience=True, stream=True + ), + loss_metrics(minibatch=True, epoch=True, experience=True, stream=True), + forgetting_metrics(experience=True), + loggers=[interactive_logger], + ) + + # CREATE THE STRATEGY INSTANCE (ONLINE-NAIVE) + cl_strategy = OnlineNaive( + model, + torch.optim.Adam(model.parameters(), lr=0.1), + CrossEntropyLoss(), + train_passes=1, + train_mb_size=1, + eval_mb_size=32, + device=device, + evaluator=eval_plugin, + ) + + # TRAINING LOOP + print("Starting experiment...") + results = [] + + # Create online benchmark + batch_streams = scenario.streams.values() + # ocl_benchmark = OnlineCLScenario(batch_streams) + for i, exp in enumerate(scenario.train_stream): + # Create online scenario from experience exp + ocl_benchmark = OnlineCLScenario(original_streams=batch_streams, + experiences=exp, + experience_size=1, + access_task_boundaries=True) + # Train on the online train stream of the scenario + cl_strategy.train(ocl_benchmark.train_stream) + results.append(cl_strategy.eval(scenario.original_test_stream)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--cuda", + type=int, + default=0, + help="Select zero-indexed cuda device. -1 to use CPU.", + ) + args = parser.parse_args() + main(args) From 6c5bfb0ed5d47657a4de15813aa220a2aa0e19a2 Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Sun, 24 Jul 2022 20:42:17 +0200 Subject: [PATCH 04/10] Fix syntax --- avalanche/training/templates/NEW_base_sgd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/avalanche/training/templates/NEW_base_sgd.py b/avalanche/training/templates/NEW_base_sgd.py index 8b5545aa3..41c2f57e5 100644 --- a/avalanche/training/templates/NEW_base_sgd.py +++ b/avalanche/training/templates/NEW_base_sgd.py @@ -489,7 +489,7 @@ def _maybe_peval(self, strategy, counter, **kwargs): if self.eval_every > 0 and counter % self.eval_every == 0: self._peval(strategy, **kwargs) - def after_training_epoch(self, strategy: "BaseGeneralSGDTemplate", + def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs): """Periodic eval controlled by `self.eval_every` and `self.peval_mode`.""" @@ -497,7 +497,7 @@ def after_training_epoch(self, strategy: "BaseGeneralSGDTemplate", self._maybe_peval(strategy, strategy.clock.train_exp_epochs, **kwargs) - def after_training_iteration(self, strategy: "BaseGeneralSGDTemplate", + def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs): """Periodic eval controlled by `self.eval_every` and `self.peval_mode`.""" From 3e0c54a4ce7c88873f21dca96a1632ec60397700 Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Wed, 31 Aug 2022 17:01:18 +0200 Subject: [PATCH 05/10] Remove `NEW_` prefixes and split plugin templates --- avalanche/NEW_core.py | 226 -------- avalanche/core.py | 30 + .../supervised/NEW_strategy_wrappers.py | 103 ---- .../training/supervised/strategy_wrappers.py | 2 +- .../supervised/strategy_wrappers_online.py | 6 +- avalanche/training/templates/NEW_base_sgd.py | 519 ------------------ avalanche/training/templates/__init__.py | 8 +- avalanche/training/templates/base_sgd.py | 303 ++++++---- ...ommon_templates.py => common_templates.py} | 2 +- examples/NEW_lamaml.py | 2 +- examples/NEW_online_naive.py | 137 ----- examples/lamaml_cifar100.py | 2 +- examples/{NEW_naive.py => naive.py} | 2 +- 13 files changed, 251 insertions(+), 1091 deletions(-) delete mode 100644 avalanche/NEW_core.py delete mode 100644 avalanche/training/supervised/NEW_strategy_wrappers.py delete mode 100644 avalanche/training/templates/NEW_base_sgd.py rename avalanche/training/templates/{NEW_common_templates.py => common_templates.py} (99%) delete mode 100644 examples/NEW_online_naive.py rename examples/{NEW_naive.py => naive.py} (96%) diff --git a/avalanche/NEW_core.py b/avalanche/NEW_core.py deleted file mode 100644 index baf59efb6..000000000 --- a/avalanche/NEW_core.py +++ /dev/null @@ -1,226 +0,0 @@ -from abc import ABC -from typing import TypeVar, Generic -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from avalanche.training.templates.base import BaseTemplate - -CallbackResult = TypeVar("CallbackResult") -Template = TypeVar("Template", bound="BaseTemplate") - - -class BasePlugin(Generic[Template], ABC): - """ABC for BaseTemplate plugins. - - A plugin is simply an object implementing some strategy callbacks. - Plugins are called automatically during the strategy execution. - - Callbacks provide access before/after each phase of the execution. - In general, for each method of the training and evaluation loops, - `StrategyCallbacks` - provide two functions `before_{method}` and `after_{method}`, called - before and after the method, respectively. - Therefore plugins can "inject" additional code by implementing callbacks. - Each callback has a `strategy` argument that gives access to the state. - - In Avalanche, callbacks are used to implement continual strategies, metrics - and loggers. - """ - - def __init__(self): - pass - - def before_training(self, strategy: Template, *args, **kwargs): - """Called before `train` by the `BaseTemplate`.""" - pass - - def before_training_exp(self, strategy: Template, *args, **kwargs): - """Called before `train_exp` by the `BaseTemplate`.""" - pass - - def after_training_exp(self, strategy: Template, *args, **kwargs): - """Called after `train_exp` by the `BaseTemplate`.""" - pass - - def after_training(self, strategy: Template, *args, **kwargs): - """Called after `train` by the `BaseTemplate`.""" - pass - - def before_eval( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `eval` by the `BaseTemplate`.""" - pass - - def before_eval_exp( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `eval_exp` by the `BaseTemplate`.""" - pass - - def after_eval_exp( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `eval_exp` by the `BaseTemplate`.""" - pass - - def after_eval(self, strategy: Template, *args, **kwargs) -> CallbackResult: - """Called after `eval` by the `BaseTemplate`.""" - pass - - -class BaseSGDPlugin(BasePlugin[Template], ABC): - """ABC for BaseSGDTemplate plugins. - - See `BaseSGDTemplate` for complete description of the train/eval loop. - """ - - def before_training_epoch( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `train_epoch` by the `BaseTemplate`.""" - pass - - def before_training_iteration( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before the start of a training iteration by the - `BaseTemplate`.""" - pass - - def before_forward( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `model.forward()` by the `BaseTemplate`.""" - pass - - def after_forward( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `model.forward()` by the `BaseTemplate`.""" - pass - - def before_backward( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `criterion.backward()` by the `BaseTemplate`.""" - pass - - def after_backward( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `criterion.backward()` by the `BaseTemplate`.""" - pass - - def after_training_iteration( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after the end of a training iteration by the - `BaseTemplate`.""" - pass - - def before_update( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `optimizer.update()` by the `BaseTemplate`.""" - pass - - def after_update( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `optimizer.update()` by the `BaseTemplate`.""" - pass - - def after_training_epoch( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `train_epoch` by the `BaseTemplate`.""" - pass - - def before_eval_iteration( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before the start of a training iteration by the - `BaseTemplate`.""" - pass - - def before_eval_forward( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `model.forward()` by the `BaseTemplate`.""" - pass - - def after_eval_forward( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `model.forward()` by the `BaseTemplate`.""" - pass - - def after_eval_iteration( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after the end of an iteration by the - `BaseTemplate`.""" - pass - - def before_train_dataset_adaptation( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `train_dataset_adapatation` by the `BaseTemplate`.""" - pass - - def after_train_dataset_adaptation( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `train_dataset_adapatation` by the `BaseTemplate`.""" - pass - - def before_eval_dataset_adaptation( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `eval_dataset_adaptation` by the `BaseTemplate`.""" - pass - - def after_eval_dataset_adaptation( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called after `eval_dataset_adaptation` by the `BaseTemplate`.""" - pass - - # ====================================================================> NEW - - def before_inner_updates( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `_inner_updates` by the `BaseTemplate`.""" - pass - - def inner_updates( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `_inner_updates` by the `BaseTemplate`.""" - pass - - def after_inner_updates( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `_outer_updates` by the `BaseTemplate`.""" - pass - - def before_outer_update( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `_outer_updates` by the `BaseTemplate`.""" - pass - - def outer_update( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `_inner_updates` by the `BaseTemplate`.""" - pass - - def after_outer_update( - self, strategy: Template, *args, **kwargs - ) -> CallbackResult: - """Called before `_outer_updates` by the `BaseTemplate`.""" - pass diff --git a/avalanche/core.py b/avalanche/core.py index 829daa7e2..ac13aac9f 100644 --- a/avalanche/core.py +++ b/avalanche/core.py @@ -193,3 +193,33 @@ def after_eval_dataset_adaptation( ) -> CallbackResult: """Called after `eval_dataset_adaptation` by the `BaseTemplate`.""" pass + + +class SupervisedMetaLearningPlugin(SupervisedPlugin[Template], ABC): + """ABC for SupervisedMetaLearningTemplate plugins. + + See `BaseTemplate` for complete description of the train/eval loop. + """ + def before_inner_updates( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_inner_updates` by the `BaseTemplate`.""" + pass + + def after_inner_updates( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_outer_updates` by the `BaseTemplate`.""" + pass + + def before_outer_update( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_outer_updates` by the `BaseTemplate`.""" + pass + + def after_outer_update( + self, strategy: Template, *args, **kwargs + ) -> CallbackResult: + """Called before `_outer_updates` by the `BaseTemplate`.""" + pass diff --git a/avalanche/training/supervised/NEW_strategy_wrappers.py b/avalanche/training/supervised/NEW_strategy_wrappers.py deleted file mode 100644 index e5abd5119..000000000 --- a/avalanche/training/supervised/NEW_strategy_wrappers.py +++ /dev/null @@ -1,103 +0,0 @@ -from typing import Optional, List -from torch.nn import Module, CrossEntropyLoss -from torch.optim import Optimizer - -from avalanche.training.plugins import EvaluationPlugin -from avalanche.training.templates.NEW_common_templates import ( - SupervisedTemplate, - OnlineSupervisedTemplate -) -from avalanche.training.plugins.evaluation import default_evaluator -from avalanche.training.plugins import SupervisedPlugin - - -class Naive(SupervisedTemplate): - def __init__( - self, - model: Module, - optimizer: Optimizer, - criterion=CrossEntropyLoss(), - train_mb_size: int = 1, - train_epochs: int = 1, - eval_mb_size: Optional[int] = None, - device=None, - plugins: Optional[List[SupervisedPlugin]] = None, - evaluator: EvaluationPlugin = default_evaluator, - eval_every=-1, - **base_kwargs - ): - super().__init__( - model, - optimizer, - criterion, - train_mb_size=train_mb_size, - train_epochs=train_epochs, - eval_mb_size=eval_mb_size, - device=device, - plugins=plugins, - evaluator=evaluator, - eval_every=eval_every, - **base_kwargs - ) - - -class OnlineNaive(OnlineSupervisedTemplate): - """Online naive finetuning. - - The simplest (and least effective) Continual Learning strategy. Naive just - incrementally fine tunes a single model without employing any method - to contrast the catastrophic forgetting of previous knowledge. - This strategy does not use task identities. - - Naive is easy to set up and its results are commonly used to show the worst - performing baseline. - """ - - def __init__( - self, - model: Module, - optimizer: Optimizer, - criterion=CrossEntropyLoss(), - train_passes: int = 1, - train_mb_size: int = 1, - eval_mb_size: int = None, - device=None, - plugins: Optional[List[SupervisedPlugin]] = None, - evaluator: EvaluationPlugin = default_evaluator, - eval_every=-1, - ): - """ - Creates an instance of the Naive strategy. - - :param model: The model. - :param optimizer: The optimizer to use. - :param criterion: The loss criterion to use. - :param num_passes: The number of passes for each sub-experience. - Defaults to 1. - :param train_mb_size: The train minibatch size. Defaults to 1. - :param eval_mb_size: The eval minibatch size. Defaults to 1. - :param device: The device to use. Defaults to None (cpu). - :param plugins: Plugins to be added. Defaults to None. - :param evaluator: (optional) instance of EvaluationPlugin for logging - and metric computations. - :param eval_every: the frequency of the calls to `eval` inside the - training loop. -1 disables the evaluation. 0 means `eval` is called - only at the end of the learning experience. Values >0 mean that - `eval` is called every `eval_every` epochs and at the end of the - learning experience. - """ - super().__init__( - model, - optimizer, - criterion, - train_passes=train_passes, - train_mb_size=train_mb_size, - eval_mb_size=eval_mb_size, - device=device, - plugins=plugins, - evaluator=evaluator, - eval_every=eval_every, - ) - - -__all__ = ["Naive", "OnlineNaive"] diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py index 743c74b20..713ba8c7c 100644 --- a/avalanche/training/supervised/strategy_wrappers.py +++ b/avalanche/training/supervised/strategy_wrappers.py @@ -34,7 +34,7 @@ MASPlugin, ) from avalanche.training.templates.base import BaseTemplate -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates.common_templates import SupervisedTemplate from avalanche.models.generator import MlpVAE, VAE_loss from avalanche.logging import InteractiveLogger diff --git a/avalanche/training/supervised/strategy_wrappers_online.py b/avalanche/training/supervised/strategy_wrappers_online.py index 3eb5d5003..24464690e 100644 --- a/avalanche/training/supervised/strategy_wrappers_online.py +++ b/avalanche/training/supervised/strategy_wrappers_online.py @@ -15,13 +15,13 @@ from avalanche.training.plugins.evaluation import default_evaluator from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin -from avalanche.training.templates.online_supervised import ( +from avalanche.training.templates.common_templates import ( OnlineSupervisedTemplate, ) class OnlineNaive(OnlineSupervisedTemplate): - """Naive finetuning. + """Online naive finetuning. The simplest (and least effective) Continual Learning strategy. Naive just incrementally fine tunes a single model without employing any method @@ -42,7 +42,7 @@ def __init__( eval_mb_size: int = None, device=None, plugins: Optional[List[SupervisedPlugin]] = None, - evaluator: EvaluationPlugin = default_evaluator(), + evaluator: EvaluationPlugin = default_evaluator, eval_every=-1, ): """ diff --git a/avalanche/training/templates/NEW_base_sgd.py b/avalanche/training/templates/NEW_base_sgd.py deleted file mode 100644 index 41c2f57e5..000000000 --- a/avalanche/training/templates/NEW_base_sgd.py +++ /dev/null @@ -1,519 +0,0 @@ -from typing import Iterable, Sequence, Optional, Union, List -from pkg_resources import parse_version - -import torch -from torch.nn import Module, CrossEntropyLoss -from torch.optim import Optimizer -from torch.utils.data import DataLoader - -from avalanche.benchmarks import CLExperience, CLStream -from avalanche.core import BaseSGDPlugin -from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin -from avalanche.training.plugins.clock import Clock -from avalanche.training.plugins.evaluation import default_evaluator -from avalanche.training.templates.base import BaseTemplate, ExpSequence -from avalanche.models.utils import avalanche_model_adaptation -from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \ - collate_from_data_or_kwargs -from avalanche.training.utils import trigger_plugins - - -class BaseSGDTemplate(BaseTemplate): - """Base SGD class for continual learning skeletons. - - **Training loop** - The training loop is organized as follows:: - - train - train_exp # for each experience - - **Evaluation loop** - The evaluation loop is organized as follows:: - - eval - eval_exp # for each experience - - """ - - PLUGIN_CLASS = BaseSGDPlugin - - def __init__( - self, - model: Module, - optimizer: Optimizer, - criterion=CrossEntropyLoss(), - train_mb_size: int = 1, - train_epochs: int = 1, - eval_mb_size: Optional[int] = 1, - device="cpu", - plugins: Optional[List["SupervisedPlugin"]] = None, - evaluator: EvaluationPlugin = default_evaluator, - eval_every=-1, - peval_mode="epoch", - ): - """Init. - - :param model: PyTorch model. - :param optimizer: PyTorch optimizer. - :param criterion: loss function. - :param train_mb_size: mini-batch size for training. - :param train_epochs: number of training epochs. - :param eval_mb_size: mini-batch size for eval. - :param evaluator: (optional) instance of EvaluationPlugin for logging - and metric computations. None to remove logging. - :param eval_every: the frequency of the calls to `eval` inside the - training loop. -1 disables the evaluation. 0 means `eval` is called - only at the end of the learning experience. Values >0 mean that - `eval` is called every `eval_every` epochs and at the end of the - learning experience. - :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the - periodic evaluation during training should execute every - `eval_every` epochs or iterations (Default='epoch'). - """ - super().__init__(model=model, device=device, plugins=plugins) - - self.optimizer: Optimizer = optimizer - """ PyTorch optimizer. """ - - self._criterion = criterion - """ Criterion. """ - - self.train_epochs: int = train_epochs - """ Number of training epochs. """ - - self.train_mb_size: int = train_mb_size - """ Training mini-batch size. """ - - self.eval_mb_size: int = ( - train_mb_size if eval_mb_size is None else eval_mb_size - ) - """ Eval mini-batch size. """ - - if evaluator is None: - evaluator = EvaluationPlugin() - self.plugins.append(evaluator) - self.evaluator = evaluator - """ EvaluationPlugin used for logging and metric computations. """ - - # Configure periodic evaluation. - assert peval_mode in {"experience", "epoch", "iteration"} - self.eval_every = eval_every - peval = PeriodicEval(eval_every, peval_mode) - self.plugins.append(peval) - - self.clock = Clock() - """ Incremental counters for strategy events. """ - # WARNING: Clock needs to be the last plugin, otherwise - # counters will be wrong for plugins called after it. - self.plugins.append(self.clock) - - ################################################################### - # State variables. These are updated during the train/eval loops. # - ################################################################### - - self.adapted_dataset = None - """ Data used to train. It may be modified by plugins. Plugins can - append data to it (e.g. for replay). - - .. note:: - - This dataset may contain samples from different experiences. If you - want the original data for the current experience - use :attr:`.BaseTemplate.experience`. - """ - - self.dataloader = None - """ Dataloader. """ - - self.mbatch = None - """ Current mini-batch. """ - - self.mb_output = None - """ Model's output computed on the current mini-batch. """ - - self.loss = None - """ Loss of the current mini-batch. """ - - self._stop_training = False - - def train(self, - experiences: Union[CLExperience, - ExpSequence], - eval_streams: Optional[Sequence[Union[CLExperience, - ExpSequence]]] = None, - **kwargs): - - super().train(experiences, eval_streams, **kwargs) - return self.evaluator.get_last_metrics() - - @torch.no_grad() - def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs): - """ - Evaluate the current model on a series of experiences and - returns the last recorded value for each metric. - - :param exp_list: CL experience information. - :param kwargs: custom arguments. - - :return: dictionary containing last recorded value for - each metric name - """ - super().eval(exp_list, **kwargs) - return self.evaluator.get_last_metrics() - - def _train_exp( - self, experience: CLExperience, eval_streams, **kwargs - ): - # Should be implemented in Observation Type - raise NotImplementedError() - - def _eval_exp(self, **kwargs): - self.eval_epoch(**kwargs) - - def make_optimizer(self, **kwargs): - """Optimizer initialization.""" - # Should be implemented in Observation Type - raise NotImplementedError() - - def criterion(self): - """Compute loss function.""" - raise NotImplementedError() - - def forward(self): - """Compute the model's output given the current mini-batch.""" - raise NotImplementedError() - - def model_adaptation(self, model=None): - """Adapts the model to the current experience.""" - raise NotImplementedError() - - def stop_training(self): - """Signals to stop training at the next iteration.""" - self._stop_training = True - - def training_epoch(self, **kwargs): - # Should be implemented in Update Type - raise NotADirectoryError() - - def backward(self): - """Run the backward pass.""" - self.loss.backward() - - def optimizer_step(self): - """Execute the optimizer step (weights update).""" - self.optimizer.step() - - def eval_epoch(self, **kwargs): - """Evaluation loop over the current `self.dataloader`.""" - for self.mbatch in self.dataloader: - self._unpack_minibatch() - self._before_eval_iteration(**kwargs) - - self._before_eval_forward(**kwargs) - self.mb_output = self.forward() - self._after_eval_forward(**kwargs) - self.loss = self.criterion() - - self._after_eval_iteration(**kwargs) - - # ==================================================================> NEW - - def maybe_adapt_model_and_make_optimizer(self): - # Should be implemented in observation type - raise NotImplementedError() - - def _before_training_exp(self, **kwargs): - """Setup to train on a single experience.""" - # Data Adaptation (e.g. add new samples/data augmentation) - self._before_train_dataset_adaptation(**kwargs) - self.train_dataset_adaptation(**kwargs) - self._after_train_dataset_adaptation(**kwargs) - - self.make_train_dataloader(**kwargs) - - # Model Adaptation (e.g. freeze/add new units) - # self.model = self.model_adaptation() - # self.make_optimizer() - self.maybe_adapt_model_and_make_optimizer() - - super()._before_training_exp(**kwargs) - - def _save_train_state(self): - """Save the training state which may be modified by the eval loop. - - This currently includes: experience, adapted_dataset, dataloader, - is_training, and train/eval modes for each module. - - TODO: we probably need a better way to do this. - """ - state = super()._save_train_state() - new_state = { - "adapted_dataset": self.adapted_dataset, - "dataloader": self.dataloader, - } - return {**state, **new_state} - - def train_dataset_adaptation(self, **kwargs): - """Initialize `self.adapted_dataset`.""" - self.adapted_dataset = self.experience.dataset - self.adapted_dataset = self.adapted_dataset.train() - - def _load_train_state(self, prev_state): - super()._load_train_state(prev_state) - self.adapted_dataset = prev_state["adapted_dataset"] - self.dataloader = prev_state["dataloader"] - - def _before_eval_exp(self, **kwargs): - - # Data Adaptation - self._before_eval_dataset_adaptation(**kwargs) - self.eval_dataset_adaptation(**kwargs) - self._after_eval_dataset_adaptation(**kwargs) - - self.make_eval_dataloader(**kwargs) - # Model Adaptation (e.g. freeze/add new units) - self.model = self.model_adaptation() - - super()._before_eval_exp(**kwargs) - - def make_train_dataloader( - self, - num_workers=0, - shuffle=True, - pin_memory=True, - persistent_workers=False, - **kwargs - ): - """Data loader initialization. - - Called at the start of each learning experience after the dataset - adaptation. - - :param num_workers: number of thread workers for the data loading. - :param shuffle: True if the data should be shuffled, False otherwise. - :param pin_memory: If True, the data loader will copy Tensors into CUDA - pinned memory before returning them. Defaults to True. - """ - - other_dataloader_args = {} - - if parse_version(torch.__version__) >= parse_version("1.7.0"): - other_dataloader_args["persistent_workers"] = persistent_workers - for k, v in kwargs.items(): - other_dataloader_args[k] = v - - self.dataloader = TaskBalancedDataLoader( - self.adapted_dataset, - oversample_small_groups=True, - num_workers=num_workers, - batch_size=self.train_mb_size, - shuffle=shuffle, - pin_memory=pin_memory, - **other_dataloader_args - ) - - def make_eval_dataloader( - self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs - ): - """ - Initializes the eval data loader. - :param num_workers: How many subprocesses to use for data loading. - 0 means that the data will be loaded in the main process. - (default: 0). - :param pin_memory: If True, the data loader will copy Tensors into CUDA - pinned memory before returning them. Defaults to True. - :param kwargs: - :return: - """ - other_dataloader_args = {} - - if parse_version(torch.__version__) >= parse_version("1.7.0"): - other_dataloader_args["persistent_workers"] = persistent_workers - for k, v in kwargs.items(): - other_dataloader_args[k] = v - - collate_from_data_or_kwargs(self.adapted_dataset, - other_dataloader_args) - self.dataloader = DataLoader( - self.adapted_dataset, - num_workers=num_workers, - batch_size=self.eval_mb_size, - pin_memory=pin_memory, - **other_dataloader_args - ) - - def eval_dataset_adaptation(self, **kwargs): - """Initialize `self.adapted_dataset`.""" - self.adapted_dataset = self.experience.dataset - self.adapted_dataset = self.adapted_dataset.eval() - - def model_adaptation(self, model=None): - """Adapts the model to the current data. - - Calls the :class:`~avalanche.models.DynamicModule`s adaptation. - """ - if model is None: - model = self.model - avalanche_model_adaptation(model, self.experience) - return model.to(self.device) - - def _unpack_minibatch(self): - """Move to device""" - # First verify the mini-batch - self._check_minibatch() - - for i in range(len(self.mbatch)): - self.mbatch[i] = self.mbatch[i].to(self.device) - - ######################################################### - # Plugin Triggers # - ######################################################### - - def _before_training_epoch(self, **kwargs): - trigger_plugins(self, "before_training_epoch", **kwargs) - - def _after_training_epoch(self, **kwargs): - trigger_plugins(self, "after_training_epoch", **kwargs) - - def _before_training_iteration(self, **kwargs): - trigger_plugins(self, "before_training_iteration", **kwargs) - - def _before_forward(self, **kwargs): - trigger_plugins(self, "before_forward", **kwargs) - - def _after_forward(self, **kwargs): - trigger_plugins(self, "after_forward", **kwargs) - - def _before_backward(self, **kwargs): - trigger_plugins(self, "before_backward", **kwargs) - - def _after_backward(self, **kwargs): - trigger_plugins(self, "after_backward", **kwargs) - - def _after_training_iteration(self, **kwargs): - trigger_plugins(self, "after_training_iteration", **kwargs) - - def _before_update(self, **kwargs): - trigger_plugins(self, "before_update", **kwargs) - - def _after_update(self, **kwargs): - trigger_plugins(self, "after_update", **kwargs) - - def _before_eval_iteration(self, **kwargs): - trigger_plugins(self, "before_eval_iteration", **kwargs) - - def _before_eval_forward(self, **kwargs): - trigger_plugins(self, "before_eval_forward", **kwargs) - - def _after_eval_forward(self, **kwargs): - trigger_plugins(self, "after_eval_forward", **kwargs) - - def _after_eval_iteration(self, **kwargs): - trigger_plugins(self, "after_eval_iteration", **kwargs) - - # ==================================================================> NEW - - def _before_train_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "before_train_dataset_adaptation", **kwargs) - - def _after_train_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "after_train_dataset_adaptation", **kwargs) - - def _before_eval_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs) - - def _after_eval_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs) - - -class PeriodicEval(SupervisedPlugin): - """Schedules periodic evaluation during training. - - This plugin is automatically configured and added by the BaseTemplate. - """ - - def __init__(self, eval_every=-1, peval_mode="epoch", do_initial=True): - """Init. - - :param eval_every: the frequency of the calls to `eval` inside the - training loop. -1 disables the evaluation. 0 means `eval` is called - only at the end of the learning experience. Values >0 mean that - `eval` is called every `eval_every` epochs and at the end of the - learning experience. - :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the - periodic evaluation during training should execute every - `eval_every` epochs or iterations (Default='epoch'). - :param do_initial: whether to evaluate before each `train` call. - Occasionally needed becuase some metrics need to know the - accuracy before training. - """ - super().__init__() - assert peval_mode in {"experience", "epoch", "iteration"} - self.eval_every = eval_every - self.peval_mode = peval_mode - self.do_initial = do_initial and eval_every > -1 - self.do_final = None - self._is_eval_updated = False - - def before_training(self, strategy, **kwargs): - """Eval before each learning experience. - - Occasionally needed because some metrics need the accuracy before - training. - """ - if self.do_initial: - self._peval(strategy, **kwargs) - - def before_training_exp(self, strategy, **kwargs): - # We evaluate at the start of each experience because train_epochs - # could change. - self.do_final = True - if self.peval_mode == "epoch": - if ( - self.eval_every > 0 - and (strategy.train_epochs - 1) % self.eval_every == 0 - ): - self.do_final = False - else: # peval_mode == 'iteration' - # we may need to fix this but we don't have a way to know - # the number of total iterations. - # Right now there may be two eval calls at the last iterations. - pass - self.do_final = self.do_final and self.eval_every > -1 - - def _peval(self, strategy, **kwargs): - for el in strategy._eval_streams: - strategy.eval(el, **kwargs) - - def _maybe_peval(self, strategy, counter, **kwargs): - if self.eval_every > 0 and counter % self.eval_every == 0: - self._peval(strategy, **kwargs) - - def after_training_epoch(self, strategy: "BaseSGDTemplate", - **kwargs): - """Periodic eval controlled by `self.eval_every` and - `self.peval_mode`.""" - if self.peval_mode == "epoch": - self._maybe_peval(strategy, strategy.clock.train_exp_epochs, - **kwargs) - - def after_training_iteration(self, strategy: "BaseSGDTemplate", - **kwargs): - """Periodic eval controlled by `self.eval_every` and - `self.peval_mode`.""" - if self.peval_mode == "iteration": - self._maybe_peval(strategy, strategy.clock.train_exp_iterations, - **kwargs) - - # ---> New - def after_training_exp(self, strategy, **kwargs): - """Final eval after a learning experience.""" - if self.do_final: - self._peval(strategy, **kwargs) - - # def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs): - # """Periodic eval controlled by `self.eval_every` and - # `self.peval_mode`.""" - # if self.peval_mode == "experience": - # self._maybe_peval(strategy, strategy.clock.train_exp_counter, - # **kwargs) diff --git a/avalanche/training/templates/__init__.py b/avalanche/training/templates/__init__.py index 191c78e28..5438e8d9b 100644 --- a/avalanche/training/templates/__init__.py +++ b/avalanche/training/templates/__init__.py @@ -11,6 +11,8 @@ """ from .base import BaseTemplate from .base_sgd import BaseSGDTemplate -from .base_online_sgd import BaseOnlineSGDTemplate -from .online_supervised import OnlineSupervisedTemplate -from .supervised import SupervisedTemplate +from .common_templates import ( + SupervisedTemplate, + SupervisedMetaLearningTemplate, + OnlineSupervisedTemplate +) diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py index 36e550f45..41c2f57e5 100644 --- a/avalanche/training/templates/base_sgd.py +++ b/avalanche/training/templates/base_sgd.py @@ -1,8 +1,10 @@ from typing import Iterable, Sequence, Optional, Union, List +from pkg_resources import parse_version import torch -from torch.nn import Module +from torch.nn import Module, CrossEntropyLoss from torch.optim import Optimizer +from torch.utils.data import DataLoader from avalanche.benchmarks import CLExperience, CLStream from avalanche.core import BaseSGDPlugin @@ -10,17 +12,14 @@ from avalanche.training.plugins.clock import Clock from avalanche.training.plugins.evaluation import default_evaluator from avalanche.training.templates.base import BaseTemplate, ExpSequence - -from typing import TYPE_CHECKING - +from avalanche.models.utils import avalanche_model_adaptation +from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \ + collate_from_data_or_kwargs from avalanche.training.utils import trigger_plugins -if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate - class BaseSGDTemplate(BaseTemplate): - """Base class for continual learning skeletons. + """Base SGD class for continual learning skeletons. **Training loop** The training loop is organized as follows:: @@ -42,12 +41,13 @@ def __init__( self, model: Module, optimizer: Optimizer, + criterion=CrossEntropyLoss(), train_mb_size: int = 1, train_epochs: int = 1, eval_mb_size: Optional[int] = 1, device="cpu", plugins: Optional[List["SupervisedPlugin"]] = None, - evaluator: EvaluationPlugin = default_evaluator(), + evaluator: EvaluationPlugin = default_evaluator, eval_every=-1, peval_mode="epoch", ): @@ -55,6 +55,7 @@ def __init__( :param model: PyTorch model. :param optimizer: PyTorch optimizer. + :param criterion: loss function. :param train_mb_size: mini-batch size for training. :param train_epochs: number of training epochs. :param eval_mb_size: mini-batch size for eval. @@ -74,6 +75,9 @@ def __init__( self.optimizer: Optimizer = optimizer """ PyTorch optimizer. """ + self._criterion = criterion + """ Criterion. """ + self.train_epochs: int = train_epochs """ Number of training epochs. """ @@ -92,7 +96,7 @@ def __init__( """ EvaluationPlugin used for logging and metric computations. """ # Configure periodic evaluation. - assert peval_mode in {"epoch", "iteration"} + assert peval_mode in {"experience", "epoch", "iteration"} self.eval_every = eval_every peval = PeriodicEval(eval_every, peval_mode) self.plugins.append(peval) @@ -107,6 +111,17 @@ def __init__( # State variables. These are updated during the train/eval loops. # ################################################################### + self.adapted_dataset = None + """ Data used to train. It may be modified by plugins. Plugins can + append data to it (e.g. for replay). + + .. note:: + + This dataset may contain samples from different experiences. If you + want the original data for the current experience + use :attr:`.BaseTemplate.experience`. + """ + self.dataloader = None """ Dataloader. """ @@ -127,6 +142,7 @@ def train(self, eval_streams: Optional[Sequence[Union[CLExperience, ExpSequence]]] = None, **kwargs): + super().train(experiences, eval_streams, **kwargs) return self.evaluator.get_last_metrics() @@ -145,58 +161,18 @@ def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs): super().eval(exp_list, **kwargs) return self.evaluator.get_last_metrics() - def _before_training_exp(self, **kwargs): - self.make_train_dataloader(**kwargs) - # Model Adaptation (e.g. freeze/add new units) - self.model = self.model_adaptation() - self.make_optimizer() - super()._before_training_exp(**kwargs) - def _train_exp( - self, experience: CLExperience, eval_streams=None, **kwargs + self, experience: CLExperience, eval_streams, **kwargs ): - """Training loop over a single Experience object. - - :param experience: CL experience information. - :param eval_streams: list of streams for evaluation. - If None: use the training experience for evaluation. - Use [] if you do not want to evaluate during training. - :param kwargs: custom arguments. - """ - if eval_streams is None: - eval_streams = [experience] - for i, exp in enumerate(eval_streams): - if not isinstance(exp, Iterable): - eval_streams[i] = [exp] - for _ in range(self.train_epochs): - self._before_training_epoch(**kwargs) - - if self._stop_training: # Early stopping - self._stop_training = False - break - - self.training_epoch(**kwargs) - self._after_training_epoch(**kwargs) - - def _before_eval_exp(self, **kwargs): - self.make_eval_dataloader(**kwargs) - # Model Adaptation (e.g. freeze/add new units) - self.model = self.model_adaptation() - super()._before_eval_exp(**kwargs) + # Should be implemented in Observation Type + raise NotImplementedError() def _eval_exp(self, **kwargs): self.eval_epoch(**kwargs) - def make_train_dataloader(self, **kwargs): - """Assign dataloader to self.dataloader.""" - raise NotImplementedError() - - def make_eval_dataloader(self, **kwargs): - """Assign dataloader to self.dataloader.""" - raise NotImplementedError() - def make_optimizer(self, **kwargs): """Optimizer initialization.""" + # Should be implemented in Observation Type raise NotImplementedError() def criterion(self): @@ -216,39 +192,8 @@ def stop_training(self): self._stop_training = True def training_epoch(self, **kwargs): - """Training epoch. - - :param kwargs: - :return: - """ - for self.mbatch in self.dataloader: - if self._stop_training: - break - - self._unpack_minibatch() - self._before_training_iteration(**kwargs) - - self.optimizer.zero_grad() - self.loss = 0 - - # Forward - self._before_forward(**kwargs) - self.mb_output = self.forward() - self._after_forward(**kwargs) - - # Loss & Backward - self.loss += self.criterion() - - self._before_backward(**kwargs) - self.backward() - self._after_backward(**kwargs) - - # Optimization step - self._before_update(**kwargs) - self.optimizer_step() - self._after_update(**kwargs) - - self._after_training_iteration(**kwargs) + # Should be implemented in Update Type + raise NotADirectoryError() def backward(self): """Run the backward pass.""" @@ -271,8 +216,152 @@ def eval_epoch(self, **kwargs): self._after_eval_iteration(**kwargs) + # ==================================================================> NEW + + def maybe_adapt_model_and_make_optimizer(self): + # Should be implemented in observation type + raise NotImplementedError() + + def _before_training_exp(self, **kwargs): + """Setup to train on a single experience.""" + # Data Adaptation (e.g. add new samples/data augmentation) + self._before_train_dataset_adaptation(**kwargs) + self.train_dataset_adaptation(**kwargs) + self._after_train_dataset_adaptation(**kwargs) + + self.make_train_dataloader(**kwargs) + + # Model Adaptation (e.g. freeze/add new units) + # self.model = self.model_adaptation() + # self.make_optimizer() + self.maybe_adapt_model_and_make_optimizer() + + super()._before_training_exp(**kwargs) + + def _save_train_state(self): + """Save the training state which may be modified by the eval loop. + + This currently includes: experience, adapted_dataset, dataloader, + is_training, and train/eval modes for each module. + + TODO: we probably need a better way to do this. + """ + state = super()._save_train_state() + new_state = { + "adapted_dataset": self.adapted_dataset, + "dataloader": self.dataloader, + } + return {**state, **new_state} + + def train_dataset_adaptation(self, **kwargs): + """Initialize `self.adapted_dataset`.""" + self.adapted_dataset = self.experience.dataset + self.adapted_dataset = self.adapted_dataset.train() + + def _load_train_state(self, prev_state): + super()._load_train_state(prev_state) + self.adapted_dataset = prev_state["adapted_dataset"] + self.dataloader = prev_state["dataloader"] + + def _before_eval_exp(self, **kwargs): + + # Data Adaptation + self._before_eval_dataset_adaptation(**kwargs) + self.eval_dataset_adaptation(**kwargs) + self._after_eval_dataset_adaptation(**kwargs) + + self.make_eval_dataloader(**kwargs) + # Model Adaptation (e.g. freeze/add new units) + self.model = self.model_adaptation() + + super()._before_eval_exp(**kwargs) + + def make_train_dataloader( + self, + num_workers=0, + shuffle=True, + pin_memory=True, + persistent_workers=False, + **kwargs + ): + """Data loader initialization. + + Called at the start of each learning experience after the dataset + adaptation. + + :param num_workers: number of thread workers for the data loading. + :param shuffle: True if the data should be shuffled, False otherwise. + :param pin_memory: If True, the data loader will copy Tensors into CUDA + pinned memory before returning them. Defaults to True. + """ + + other_dataloader_args = {} + + if parse_version(torch.__version__) >= parse_version("1.7.0"): + other_dataloader_args["persistent_workers"] = persistent_workers + for k, v in kwargs.items(): + other_dataloader_args[k] = v + + self.dataloader = TaskBalancedDataLoader( + self.adapted_dataset, + oversample_small_groups=True, + num_workers=num_workers, + batch_size=self.train_mb_size, + shuffle=shuffle, + pin_memory=pin_memory, + **other_dataloader_args + ) + + def make_eval_dataloader( + self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs + ): + """ + Initializes the eval data loader. + :param num_workers: How many subprocesses to use for data loading. + 0 means that the data will be loaded in the main process. + (default: 0). + :param pin_memory: If True, the data loader will copy Tensors into CUDA + pinned memory before returning them. Defaults to True. + :param kwargs: + :return: + """ + other_dataloader_args = {} + + if parse_version(torch.__version__) >= parse_version("1.7.0"): + other_dataloader_args["persistent_workers"] = persistent_workers + for k, v in kwargs.items(): + other_dataloader_args[k] = v + + collate_from_data_or_kwargs(self.adapted_dataset, + other_dataloader_args) + self.dataloader = DataLoader( + self.adapted_dataset, + num_workers=num_workers, + batch_size=self.eval_mb_size, + pin_memory=pin_memory, + **other_dataloader_args + ) + + def eval_dataset_adaptation(self, **kwargs): + """Initialize `self.adapted_dataset`.""" + self.adapted_dataset = self.experience.dataset + self.adapted_dataset = self.adapted_dataset.eval() + + def model_adaptation(self, model=None): + """Adapts the model to the current data. + + Calls the :class:`~avalanche.models.DynamicModule`s adaptation. + """ + if model is None: + model = self.model + avalanche_model_adaptation(model, self.experience) + return model.to(self.device) + def _unpack_minibatch(self): """Move to device""" + # First verify the mini-batch + self._check_minibatch() + for i in range(len(self.mbatch)): self.mbatch[i] = self.mbatch[i].to(self.device) @@ -322,6 +411,20 @@ def _after_eval_forward(self, **kwargs): def _after_eval_iteration(self, **kwargs): trigger_plugins(self, "after_eval_iteration", **kwargs) + # ==================================================================> NEW + + def _before_train_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "before_train_dataset_adaptation", **kwargs) + + def _after_train_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "after_train_dataset_adaptation", **kwargs) + + def _before_eval_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs) + + def _after_eval_dataset_adaptation(self, **kwargs): + trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs) + class PeriodicEval(SupervisedPlugin): """Schedules periodic evaluation during training. @@ -345,7 +448,7 @@ def __init__(self, eval_every=-1, peval_mode="epoch", do_initial=True): accuracy before training. """ super().__init__() - assert peval_mode in {"epoch", "iteration"} + assert peval_mode in {"experience", "epoch", "iteration"} self.eval_every = eval_every self.peval_mode = peval_mode self.do_initial = do_initial and eval_every > -1 @@ -378,11 +481,6 @@ def before_training_exp(self, strategy, **kwargs): pass self.do_final = self.do_final and self.eval_every > -1 - def after_training_exp(self, strategy, **kwargs): - """Final eval after a learning experience.""" - if self.do_final: - self._peval(strategy, **kwargs) - def _peval(self, strategy, **kwargs): for el in strategy._eval_streams: strategy.eval(el, **kwargs) @@ -391,16 +489,31 @@ def _maybe_peval(self, strategy, counter, **kwargs): if self.eval_every > 0 and counter % self.eval_every == 0: self._peval(strategy, **kwargs) - def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs): + def after_training_epoch(self, strategy: "BaseSGDTemplate", + **kwargs): """Periodic eval controlled by `self.eval_every` and `self.peval_mode`.""" if self.peval_mode == "epoch": self._maybe_peval(strategy, strategy.clock.train_exp_epochs, **kwargs) - def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs): + def after_training_iteration(self, strategy: "BaseSGDTemplate", + **kwargs): """Periodic eval controlled by `self.eval_every` and `self.peval_mode`.""" if self.peval_mode == "iteration": self._maybe_peval(strategy, strategy.clock.train_exp_iterations, **kwargs) + + # ---> New + def after_training_exp(self, strategy, **kwargs): + """Final eval after a learning experience.""" + if self.do_final: + self._peval(strategy, **kwargs) + + # def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs): + # """Periodic eval controlled by `self.eval_every` and + # `self.peval_mode`.""" + # if self.peval_mode == "experience": + # self._maybe_peval(strategy, strategy.clock.train_exp_counter, + # **kwargs) diff --git a/avalanche/training/templates/NEW_common_templates.py b/avalanche/training/templates/common_templates.py similarity index 99% rename from avalanche/training/templates/NEW_common_templates.py rename to avalanche/training/templates/common_templates.py index eb3fdd6be..54aefd058 100644 --- a/avalanche/training/templates/NEW_common_templates.py +++ b/avalanche/training/templates/common_templates.py @@ -9,7 +9,7 @@ from .observation_type import * from .problem_type import * from .update_type import * -from .NEW_base_sgd import BaseSGDTemplate +from .base_sgd import BaseSGDTemplate class SupervisedTemplate(BatchObservation, SupervisedProblem, SGDUpdate, diff --git a/examples/NEW_lamaml.py b/examples/NEW_lamaml.py index ec456461d..5e30e303e 100644 --- a/examples/NEW_lamaml.py +++ b/examples/NEW_lamaml.py @@ -9,7 +9,7 @@ from avalanche.training.plugins import EvaluationPlugin from avalanche.benchmarks.classic import SplitMNIST from avalanche.logging import InteractiveLogger -from avalanche.training.templates.NEW_common_templates import ( +from avalanche.training.templates.common_templates import ( SupervisedMetaLearningTemplate ) from avalanche.training.plugins.NEW_lamaml import LaMAMLPlugin diff --git a/examples/NEW_online_naive.py b/examples/NEW_online_naive.py deleted file mode 100644 index 249bf9437..000000000 --- a/examples/NEW_online_naive.py +++ /dev/null @@ -1,137 +0,0 @@ -################################################################################ -# Copyright (c) 2021 ContinualAI. # -# Copyrights licensed under the MIT License. # -# See the accompanying LICENSE file for terms. # -# # -# Date: 12-10-2020 # -# Author(s): Vincenzo Lomonaco, Hamed Hemati # -# E-mail: contact@continualai.org # -# Website: avalanche.continualai.org # -################################################################################ - -""" -This is a simple example on how to use the Naive strategy. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from os.path import expanduser - -import argparse -import torch -from torch.nn import CrossEntropyLoss -from torchvision import transforms -from torchvision.datasets import MNIST -from torchvision.transforms import ToTensor, RandomCrop -import torch.optim.lr_scheduler -from avalanche.benchmarks import nc_benchmark -from avalanche.models import SimpleMLP -from avalanche.training.supervised.NEW_strategy_wrappers import OnlineNaive -from avalanche.benchmarks.scenarios.online_scenario import OnlineCLScenario -from avalanche.evaluation.metrics import ( - forgetting_metrics, - accuracy_metrics, - loss_metrics, -) -from avalanche.logging import InteractiveLogger -from avalanche.training.plugins import EvaluationPlugin - - -def main(args): - # --- CONFIG - device = torch.device( - f"cuda:{args.cuda}" - if torch.cuda.is_available() and args.cuda >= 0 - else "cpu" - ) - n_batches = 5 - # --------- - - # --- TRANSFORMATIONS - train_transform = transforms.Compose( - [ - RandomCrop(28, padding=4), - ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)), - ] - ) - test_transform = transforms.Compose( - [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ) - # --------- - - # --- SCENARIO CREATION - mnist_train = MNIST( - root=expanduser("~") + "/.avalanche/data/mnist/", - train=True, - download=True, - transform=train_transform, - ) - mnist_test = MNIST( - root=expanduser("~") + "/.avalanche/data/mnist/", - train=False, - download=True, - transform=test_transform, - ) - scenario = nc_benchmark( - mnist_train, mnist_test, n_batches, task_labels=False, seed=1234 - ) - # --------- - - # MODEL CREATION - model = SimpleMLP(num_classes=scenario.n_classes) - - # choose some metrics and evaluation method - interactive_logger = InteractiveLogger() - - eval_plugin = EvaluationPlugin( - accuracy_metrics( - minibatch=True, epoch=True, experience=True, stream=True - ), - loss_metrics(minibatch=True, epoch=True, experience=True, stream=True), - forgetting_metrics(experience=True), - loggers=[interactive_logger], - ) - - # CREATE THE STRATEGY INSTANCE (ONLINE-NAIVE) - cl_strategy = OnlineNaive( - model, - torch.optim.Adam(model.parameters(), lr=0.1), - CrossEntropyLoss(), - train_passes=1, - train_mb_size=1, - eval_mb_size=32, - device=device, - evaluator=eval_plugin, - ) - - # TRAINING LOOP - print("Starting experiment...") - results = [] - - # Create online benchmark - batch_streams = scenario.streams.values() - # ocl_benchmark = OnlineCLScenario(batch_streams) - for i, exp in enumerate(scenario.train_stream): - # Create online scenario from experience exp - ocl_benchmark = OnlineCLScenario(original_streams=batch_streams, - experiences=exp, - experience_size=1, - access_task_boundaries=True) - # Train on the online train stream of the scenario - cl_strategy.train(ocl_benchmark.train_stream) - results.append(cl_strategy.eval(scenario.original_test_stream)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--cuda", - type=int, - default=0, - help="Select zero-indexed cuda device. -1 to use CPU.", - ) - args = parser.parse_args() - main(args) diff --git a/examples/lamaml_cifar100.py b/examples/lamaml_cifar100.py index a974031f8..7ac903238 100644 --- a/examples/lamaml_cifar100.py +++ b/examples/lamaml_cifar100.py @@ -10,7 +10,7 @@ import torch.optim.lr_scheduler from avalanche.benchmarks.classic import SplitCIFAR100 from avalanche.models import MTSimpleCNN -from avalanche.training.supervised import LaMAML +from avalanche.training.supervised.lamaml import LaMAML from avalanche.training.plugins import ReplayPlugin from avalanche.training.storage_policy import ReservoirSamplingBuffer from avalanche.evaluation.metrics import ( diff --git a/examples/NEW_naive.py b/examples/naive.py similarity index 96% rename from examples/NEW_naive.py rename to examples/naive.py index 5aa29388a..91e895dac 100644 --- a/examples/NEW_naive.py +++ b/examples/naive.py @@ -9,7 +9,7 @@ from avalanche.training.plugins import EvaluationPlugin from avalanche.benchmarks.classic import SplitMNIST from avalanche.logging import InteractiveLogger -from avalanche.training.supervised.NEW_strategy_wrappers import ( +from avalanche.training.supervised import ( Naive ) From 50f2683e30e2dca508fa973021f15f169bb14f53 Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Wed, 31 Aug 2022 17:38:06 +0200 Subject: [PATCH 06/10] Update meta-learning example w.r.t the new template --- avalanche/training/plugins/NEW_lamaml.py | 251 ------------------ avalanche/training/supervised/lamaml.py | 93 +++---- .../templates/update_type/meta_update.py | 4 +- examples/NEW_lamaml.py | 73 ----- examples/lamaml_cifar100.py | 2 +- 5 files changed, 43 insertions(+), 380 deletions(-) delete mode 100644 avalanche/training/plugins/NEW_lamaml.py delete mode 100644 examples/NEW_lamaml.py diff --git a/avalanche/training/plugins/NEW_lamaml.py b/avalanche/training/plugins/NEW_lamaml.py deleted file mode 100644 index a44e8057d..000000000 --- a/avalanche/training/plugins/NEW_lamaml.py +++ /dev/null @@ -1,251 +0,0 @@ -from typing import TYPE_CHECKING - -import torch -import torch.nn as nn -import torch.nn.functional as F - -import math - -try: - import higher -except ImportError: - raise ModuleNotFoundError("higher not found, if you want to use " - "MAML please install avalanche with " - "the extra dependencies: " - "pip install avalanche-lib[extra]") - -from avalanche.NEW_core import BaseSGDPlugin -from avalanche.models.utils import avalanche_forward - - -class LaMAMLPlugin(BaseSGDPlugin): - """LaMAML Plugin. - """ - - def __init__( - self, - n_inner_updates: int = 5, - second_order: bool = True, - grad_clip_norm: float = 1.0, - learn_lr: bool = True, - lr_alpha: float = 0.25, - sync_update: bool = False, - alpha_init: float = 0.1, - ): - """Implementation of Look-ahead MAML (LaMAML) algorithm in Avalanche - using Higher library for applying fast updates. - - :param n_inner_updates: number of inner updates. - :param second_order: If True, it computes the second-order derivative - of the inner update trajectory for the meta-loss. Otherwise, - it computes the meta-loss with a first-order approximation. - :param grad_clip_norm: gradient clipping norm. - :param learn_lr: if True, it learns the LR for each batch of data. - :param lr_alpha: LR for learning the main update's learning rate. - :param sync_update: if True, it updates the meta-model with a fixed - learning rate. Mutually exclusive with learn_lr and - lr_alpha. - :param alpha_init: initialization value for learnable LRs. - - """ - - super().__init__() - - self.n_inner_updates = n_inner_updates - self.second_order = second_order - self.grad_clip_norm = grad_clip_norm - self.learn_lr = learn_lr - self.lr_alpha = lr_alpha - self.sync_update = sync_update - self.alpha_init = alpha_init - self.alpha_params = None - self.is_model_initialized = False - - def before_training(self, strategy, **kwargs): - if not self.is_model_initialized: - strategy.model.apply(init_kaiming_normal) - self.is_model_initialized = True - - def before_training_exp(self, strategy, **kwargs): - # Initialize alpha-lr parameters - if self.alpha_params is None: - self.alpha_params = nn.ParameterList([]) - # Iterate through model parameters and add the corresponding - # alpha_lr parameter - for p in strategy.model.parameters(): - alpha_param = nn.Parameter( - torch.ones(p.shape) * self.alpha_init, requires_grad=True - ) - self.alpha_params.append(alpha_param) - self.alpha_params.to(strategy.device) - - # Create optimizer for the alpha_lr parameters - self.optimizer_alpha = torch.optim.SGD( - self.alpha_params.parameters(), lr=self.lr_alpha - ) - - # For task-incremental heads: - # If new parameters are added to the model, update alpha_lr - # parameters respectively - if len(self.alpha_params) < len(list(strategy.model.parameters())): - for iter_p, p in enumerate(strategy.model.parameters()): - # Skip the older parameters - if iter_p < len(self.alpha_params): - continue - # Add new alpha_lr for the new parameter - alpha_param = nn.Parameter( - torch.ones(p.shape) * self.alpha_init, requires_grad=True - ) - self.alpha_params.append(alpha_param) - - self.alpha_params.to(strategy.device) - # Re-init optimizer for the new set of alpha_lr parameters - self.optimizer_alpha = torch.optim.SGD( - self.alpha_params.parameters(), lr=self.lr_alpha - ) - - def before_inner_updates(self, strategy, **kwargs): - # Create a stateless copy of the model for inner-updates - self.fast_model = higher.patch.monkeypatch( - strategy.model, - copy_initial_weights=True, - track_higher_grads=self.second_order, - ) - if strategy.clock.train_exp_counter > 0: - self.batch_x = strategy.mb_x[: strategy.train_mb_size] - self.batch_y = strategy.mb_y[: strategy.train_mb_size] - self.batch_t = strategy.mb_task_id[: strategy.train_mb_size] - else: - self.batch_x = strategy.mb_x - self.batch_y = strategy.mb_y - self.batch_t = strategy.mb_task_id - - bsize_data = self.batch_x.shape[0] - self.rough_sz = math.ceil(bsize_data / self.n_inner_updates) - self.meta_losses = [0 for _ in range(self.n_inner_updates)] - - def single_inner_update(self, x, y, t, criterion): - logits = avalanche_forward(self.fast_model, x, t) - loss = criterion(logits, y) - - # Compute gradient with respect to the current fast weights - grads = list( - torch.autograd.grad( - loss, - self.fast_model.fast_params, - create_graph=self.second_order, - retain_graph=self.second_order, - allow_unused=True, - ) - ) - - # Clip grad norms - grads = [ - torch.clamp(g, min=-self.grad_clip_norm, max=self.grad_clip_norm) - if g is not None - else g - for g in grads - ] - - # New fast parameters - new_fast_params = [ - param - alpha * grad if grad is not None else param - for (param, alpha, grad) in zip( - self.fast_model.fast_params, self.alpha_params.parameters(), - grads - ) - ] - - # Update fast model's weights - self.fast_model.update_params(new_fast_params) - - def inner_updates(self, strategy, **kwargs): - """Update fast weights using current samples and - return the updated fast model. - """ - for i in range(self.n_inner_updates): - batch_x_i = self.batch_x[i * self.rough_sz: - (i + 1) * self.rough_sz] - batch_y_i = self.batch_y[i * self.rough_sz: - (i + 1) * self.rough_sz] - batch_t_i = self.batch_t[i * self.rough_sz: - (i + 1) * self.rough_sz] - - # We assume that samples for inner update are from the same task - self.single_inner_update(batch_x_i, batch_y_i, batch_t_i, - strategy._criterion) - - # Compute meta-loss with the combination of batch and buffer samples - logits_meta = avalanche_forward( - self.fast_model, strategy.mb_x, strategy.mb_task_id - ) - meta_loss = strategy._criterion(logits_meta, strategy.mb_y) - self.meta_losses[i] = meta_loss - - def apply_grad(self, module, grads, device): - for i, p in enumerate(module.parameters()): - grad = grads[i] - if grad is None: - grad = torch.zeros(p.shape).float().to(device) - - if p.grad is None: - p.grad = grad - else: - p.grad += grad - - def outer_update(self, strategy, **kwargs): - # Compute meta-gradient for the main model - meta_loss = sum(self.meta_losses) / len(self.meta_losses) - meta_grad_model = torch.autograd.grad( - meta_loss, - self.fast_model.parameters(time=0), - retain_graph=True, - allow_unused=True, - ) - strategy.model.zero_grad() - self.apply_grad(strategy.model, meta_grad_model, strategy.device) - - # Clip gradients - torch.nn.utils.clip_grad_norm_( - strategy.model.parameters(), self.grad_clip_norm - ) - - if self.learn_lr: - # Compute meta-gradient for alpha-lr parameters - meta_grad_alpha = torch.autograd.grad( - meta_loss, self.alpha_params.parameters(), allow_unused=True - ) - self.alpha_params.zero_grad() - self.apply_grad(self.alpha_params, meta_grad_alpha, strategy.device) - - torch.nn.utils.clip_grad_norm_( - self.alpha_params.parameters(), self.grad_clip_norm - ) - self.optimizer_alpha.step() - - # If sync-update: update with self.optimizer - # o.w: use the learned LRs to update the model - if self.sync_update: - self.optimizer.step() - else: - for p, alpha in zip( - strategy.model.parameters(), self.alpha_params.parameters() - ): - # Use relu on updated LRs to avoid negative values - p.data = p.data - p.grad * F.relu(alpha) - - strategy.loss = meta_loss - - -def init_kaiming_normal(m): - if isinstance(m, nn.Conv2d): - torch.nn.init.constant_(m.weight.data, 1.0) - torch.nn.init.kaiming_normal_(m.weight.data) - if m.bias is not None: - m.bias.data.zero_() - - elif isinstance(m, nn.Linear): - torch.nn.init.constant_(m.weight.data, 1.0) - torch.nn.init.kaiming_normal_(m.weight.data) - if m.bias is not None: - m.bias.data.zero_() diff --git a/avalanche/training/supervised/lamaml.py b/avalanche/training/supervised/lamaml.py index e7852b029..b13c1148f 100644 --- a/avalanche/training/supervised/lamaml.py +++ b/avalanche/training/supervised/lamaml.py @@ -5,6 +5,7 @@ import torch.nn.functional as F from torch.nn import Module, CrossEntropyLoss from torch.optim import Optimizer +import math try: import higher @@ -13,29 +14,14 @@ "MAML please install avalanche with " "the extra dependencies: " "pip install avalanche-lib[extra]") -import math from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin from avalanche.training.plugins.evaluation import default_evaluator -from avalanche.training.templates import SupervisedTemplate +from avalanche.training.templates import SupervisedMetaLearningTemplate from avalanche.models.utils import avalanche_forward -def init_kaiming_normal(m): - if isinstance(m, nn.Conv2d): - torch.nn.init.constant_(m.weight.data, 1.0) - torch.nn.init.kaiming_normal_(m.weight.data) - if m.bias is not None: - m.bias.data.zero_() - - elif isinstance(m, nn.Linear): - torch.nn.init.constant_(m.weight.data, 1.0) - torch.nn.init.kaiming_normal_(m.weight.data) - if m.bias is not None: - m.bias.data.zero_() - - -class LaMAML(SupervisedTemplate): +class LaMAML(SupervisedMetaLearningTemplate): def __init__( self, model: Module, @@ -139,21 +125,18 @@ def _before_training_exp(self, **kwargs): self.alpha_params.parameters(), lr=self.lr_alpha ) - def training_epoch(self, **kwargs): - for self.mbatch in self.dataloader: - if self._stop_training: - break - - self._unpack_minibatch() - self._before_training_iteration(**kwargs) - self.loss = 0 - - self.train_batch() + def apply_grad(self, module, grads): + for i, p in enumerate(module.parameters()): + grad = grads[i] + if grad is None: + grad = torch.zeros(p.shape).float().to(self.device) - self.mb_output = self.forward() - self._after_training_iteration(**kwargs) + if p.grad is None: + p.grad = grad + else: + p.grad += grad - def inner_update(self, fast_model, x, y, t): + def inner_update_step(self, fast_model, x, y, t): """Update fast weights using current samples and return the updated fast model. """ @@ -190,20 +173,9 @@ def inner_update(self, fast_model, x, y, t): # Update fast model's weights fast_model.update_params(new_fast_params) - def apply_grad(self, module, grads): - for i, p in enumerate(module.parameters()): - grad = grads[i] - if grad is None: - grad = torch.zeros(p.shape).float().to(self.device) - - if p.grad is None: - p.grad = grad - else: - p.grad += grad - - def train_batch(self): + def _inner_updates(self, **kwargs): # Create a stateless copy of the model for inner-updates - fast_model = higher.patch.monkeypatch( + self.fast_model = higher.patch.monkeypatch( self.model, copy_initial_weights=True, track_higher_grads=self.second_order, @@ -217,28 +189,29 @@ def train_batch(self): bsize_data = batch_x.shape[0] rough_sz = math.ceil(bsize_data / self.n_inner_updates) - meta_losses = [0 for _ in range(self.n_inner_updates)] + self.meta_losses = [0 for _ in range(self.n_inner_updates)] for i in range(self.n_inner_updates): - batch_x_i = batch_x[i * rough_sz : (i + 1) * rough_sz] - batch_y_i = batch_y[i * rough_sz : (i + 1) * rough_sz] - batch_t_i = batch_t[i * rough_sz : (i + 1) * rough_sz] + batch_x_i = batch_x[i * rough_sz: (i + 1) * rough_sz] + batch_y_i = batch_y[i * rough_sz: (i + 1) * rough_sz] + batch_t_i = batch_t[i * rough_sz: (i + 1) * rough_sz] # We assume that samples for inner update are from the same task - self.inner_update(fast_model, batch_x_i, batch_y_i, batch_t_i) + self.inner_update_step(self.fast_model, batch_x_i, batch_y_i, batch_t_i) # Compute meta-loss with the combination of batch and buffer samples logits_meta = avalanche_forward( - fast_model, self.mb_x, self.mb_task_id + self.fast_model, self.mb_x, self.mb_task_id ) meta_loss = self._criterion(logits_meta, self.mb_y) - meta_losses[i] = meta_loss + self.meta_losses[i] = meta_loss + def _outer_update(self, **kwargs): # Compute meta-gradient for the main model - meta_loss = sum(meta_losses) / len(meta_losses) + meta_loss = sum(self.meta_losses) / len(self.meta_losses) meta_grad_model = torch.autograd.grad( meta_loss, - fast_model.parameters(time=0), + self.fast_model.parameters(time=0), retain_graph=True, allow_unused=True, ) @@ -269,9 +242,23 @@ def train_batch(self): self.optimizer.step() else: for p, alpha in zip( - self.model.parameters(), self.alpha_params.parameters() + self.model.parameters(), self.alpha_params.parameters() ): # Use relu on updated LRs to avoid negative values p.data = p.data - p.grad * F.relu(alpha) self.loss = meta_loss + + +def init_kaiming_normal(m): + if isinstance(m, nn.Conv2d): + torch.nn.init.constant_(m.weight.data, 1.0) + torch.nn.init.kaiming_normal_(m.weight.data) + if m.bias is not None: + m.bias.data.zero_() + + elif isinstance(m, nn.Linear): + torch.nn.init.constant_(m.weight.data, 1.0) + torch.nn.init.kaiming_normal_(m.weight.data) + if m.bias is not None: + m.bias.data.zero_() diff --git a/avalanche/training/templates/update_type/meta_update.py b/avalanche/training/templates/update_type/meta_update.py index cc1090385..d387db9c0 100644 --- a/avalanche/training/templates/update_type/meta_update.py +++ b/avalanche/training/templates/update_type/meta_update.py @@ -36,7 +36,7 @@ def _before_inner_updates(self, **kwargs): trigger_plugins(self, "before_inner_updates", **kwargs) def _inner_updates(self, **kwargs): - trigger_plugins(self, "inner_updates", **kwargs) + raise NotImplementedError() def _after_inner_updates(self, **kwargs): trigger_plugins(self, "after_inner_updates", **kwargs) @@ -45,7 +45,7 @@ def _before_outer_update(self, **kwargs): trigger_plugins(self, "before_outer_update", **kwargs) def _outer_update(self, **kwargs): - trigger_plugins(self, "outer_update", **kwargs) + raise NotImplementedError() def _after_outer_update(self, **kwargs): trigger_plugins(self, "after_outer_update", **kwargs) diff --git a/examples/NEW_lamaml.py b/examples/NEW_lamaml.py deleted file mode 100644 index 5e30e303e..000000000 --- a/examples/NEW_lamaml.py +++ /dev/null @@ -1,73 +0,0 @@ -import torch -from os.path import expanduser - -from avalanche.models import MTSimpleMLP -from avalanche.evaluation.metrics import ( - accuracy_metrics, - loss_metrics, -) -from avalanche.training.plugins import EvaluationPlugin -from avalanche.benchmarks.classic import SplitMNIST -from avalanche.logging import InteractiveLogger -from avalanche.training.templates.common_templates import ( - SupervisedMetaLearningTemplate -) -from avalanche.training.plugins.NEW_lamaml import LaMAMLPlugin - - -def main(): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print(f"Using device: {device}") - - scenario = SplitMNIST( - n_experiences=5, - dataset_root=expanduser("~") + "/.avalanche/data/mnist/", - return_task_id=True - ) - - # choose some metrics and evaluation method - interactive_logger = InteractiveLogger() - eval_plugin = EvaluationPlugin( - accuracy_metrics( - minibatch=True, epoch=True, experience=True, stream=True - ), - loss_metrics(minibatch=True, epoch=True, experience=True, stream=True), - loggers=[interactive_logger], - ) - - model = MTSimpleMLP(hidden_size=128) - optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - criterion = torch.nn.CrossEntropyLoss() - - # LaMAML plugin - lamaml_plugin = LaMAMLPlugin( - n_inner_updates=5, - second_order=True, - grad_clip_norm=1.0, - learn_lr=True, - lr_alpha=0.25, - sync_update=False, - alpha_init=0.1, - ) - - # create strategy - strategy = SupervisedMetaLearningTemplate( - model, - optimizer, - criterion, - train_epochs=1, - device=device, - train_mb_size=32, - evaluator=eval_plugin, - plugins=[lamaml_plugin] - ) - - # train on the selected scenario with the chosen strategy - for experience in scenario.train_stream: - print("Start training on experience ", experience.current_experience) - strategy.train(experience) - strategy.eval(scenario.test_stream[:]) - - -if __name__ == "__main__": - main() diff --git a/examples/lamaml_cifar100.py b/examples/lamaml_cifar100.py index 7ac903238..5e713e972 100644 --- a/examples/lamaml_cifar100.py +++ b/examples/lamaml_cifar100.py @@ -77,7 +77,7 @@ def main(args): lr_alpha=0.25, sync_update=False, train_mb_size=10, - train_epochs=10, + train_epochs=1, eval_mb_size=100, device=device, plugins=[replay_plugin], From a66a6a36c3512094689da8729520fda1ac98ba5f Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Wed, 31 Aug 2022 18:50:12 +0200 Subject: [PATCH 07/10] Add `model_adaptation` to `observation_type` implementation --- avalanche/training/supervised/ar1.py | 2 +- avalanche/training/supervised/cumulative.py | 2 +- avalanche/training/supervised/deep_slda.py | 2 +- avalanche/training/supervised/icarl.py | 2 +- .../training/supervised/joint_training.py | 2 +- avalanche/training/supervised/lamaml.py | 3 ++- .../training/supervised/strategy_wrappers.py | 2 +- .../supervised/strategy_wrappers_online.py | 2 +- avalanche/training/templates/base_sgd.py | 12 +-------- .../observation_type/batch_observation.py | 11 ++++++++ .../observation_type/online_observation.py | 27 +++++++++++++++++++ examples/lamaml_cifar100.py | 3 ++- tests/training/test_online_strategies.py | 7 +++-- 13 files changed, 55 insertions(+), 22 deletions(-) diff --git a/avalanche/training/supervised/ar1.py b/avalanche/training/supervised/ar1.py index ceaa06fb7..a514c57e8 100644 --- a/avalanche/training/supervised/ar1.py +++ b/avalanche/training/supervised/ar1.py @@ -16,7 +16,7 @@ SynapticIntelligencePlugin, CWRStarPlugin, ) -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate from avalanche.training.utils import ( replace_bn_with_brn, get_last_fc_layer, diff --git a/avalanche/training/supervised/cumulative.py b/avalanche/training/supervised/cumulative.py index a143fcad1..28123c809 100644 --- a/avalanche/training/supervised/cumulative.py +++ b/avalanche/training/supervised/cumulative.py @@ -7,7 +7,7 @@ from avalanche.benchmarks.utils import AvalancheConcatDataset from avalanche.training.plugins.evaluation import default_evaluator from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate class Cumulative(SupervisedTemplate): diff --git a/avalanche/training/supervised/deep_slda.py b/avalanche/training/supervised/deep_slda.py index 06446d86d..676652636 100644 --- a/avalanche/training/supervised/deep_slda.py +++ b/avalanche/training/supervised/deep_slda.py @@ -5,7 +5,7 @@ import torch from avalanche.training.plugins import SupervisedPlugin -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate from avalanche.training.plugins.evaluation import default_evaluator from avalanche.models.dynamic_modules import MultiTaskModule from avalanche.models import FeatureExtractorBackbone diff --git a/avalanche/training/supervised/icarl.py b/avalanche/training/supervised/icarl.py index 3b559fae9..0f5cd63a6 100644 --- a/avalanche/training/supervised/icarl.py +++ b/avalanche/training/supervised/icarl.py @@ -18,7 +18,7 @@ from avalanche.training.plugins.strategy_plugin import SupervisedPlugin from torch.nn import Module from torch.utils.data import DataLoader -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate class ICaRL(SupervisedTemplate): diff --git a/avalanche/training/supervised/joint_training.py b/avalanche/training/supervised/joint_training.py index d84627151..335f5db43 100644 --- a/avalanche/training/supervised/joint_training.py +++ b/avalanche/training/supervised/joint_training.py @@ -17,7 +17,7 @@ from avalanche.benchmarks.scenarios import ClassificationExperience from avalanche.benchmarks.utils import AvalancheConcatDataset from avalanche.training.plugins.evaluation import default_evaluator -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate from avalanche.models import DynamicModule if TYPE_CHECKING: diff --git a/avalanche/training/supervised/lamaml.py b/avalanche/training/supervised/lamaml.py index b13c1148f..431f3f55c 100644 --- a/avalanche/training/supervised/lamaml.py +++ b/avalanche/training/supervised/lamaml.py @@ -197,7 +197,8 @@ def _inner_updates(self, **kwargs): batch_t_i = batch_t[i * rough_sz: (i + 1) * rough_sz] # We assume that samples for inner update are from the same task - self.inner_update_step(self.fast_model, batch_x_i, batch_y_i, batch_t_i) + self.inner_update_step(self.fast_model, batch_x_i, batch_y_i, + batch_t_i) # Compute meta-loss with the combination of batch and buffer samples logits_meta = avalanche_forward( diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py index 713ba8c7c..195c75ce1 100644 --- a/avalanche/training/supervised/strategy_wrappers.py +++ b/avalanche/training/supervised/strategy_wrappers.py @@ -34,7 +34,7 @@ MASPlugin, ) from avalanche.training.templates.base import BaseTemplate -from avalanche.training.templates.common_templates import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate from avalanche.models.generator import MlpVAE, VAE_loss from avalanche.logging import InteractiveLogger diff --git a/avalanche/training/supervised/strategy_wrappers_online.py b/avalanche/training/supervised/strategy_wrappers_online.py index 24464690e..d757e2401 100644 --- a/avalanche/training/supervised/strategy_wrappers_online.py +++ b/avalanche/training/supervised/strategy_wrappers_online.py @@ -15,7 +15,7 @@ from avalanche.training.plugins.evaluation import default_evaluator from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin -from avalanche.training.templates.common_templates import ( +from avalanche.training.templates import ( OnlineSupervisedTemplate, ) diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py index 41c2f57e5..46813a9ed 100644 --- a/avalanche/training/templates/base_sgd.py +++ b/avalanche/training/templates/base_sgd.py @@ -47,7 +47,7 @@ def __init__( eval_mb_size: Optional[int] = 1, device="cpu", plugins: Optional[List["SupervisedPlugin"]] = None, - evaluator: EvaluationPlugin = default_evaluator, + evaluator: EvaluationPlugin = default_evaluator(), eval_every=-1, peval_mode="epoch", ): @@ -347,16 +347,6 @@ def eval_dataset_adaptation(self, **kwargs): self.adapted_dataset = self.experience.dataset self.adapted_dataset = self.adapted_dataset.eval() - def model_adaptation(self, model=None): - """Adapts the model to the current data. - - Calls the :class:`~avalanche.models.DynamicModule`s adaptation. - """ - if model is None: - model = self.model - avalanche_model_adaptation(model, self.experience) - return model.to(self.device) - def _unpack_minibatch(self): """Move to device""" # First verify the mini-batch diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py index 4f67ef91b..35b887d11 100644 --- a/avalanche/training/templates/observation_type/batch_observation.py +++ b/avalanche/training/templates/observation_type/batch_observation.py @@ -2,6 +2,7 @@ from avalanche.benchmarks import CLExperience from avalanche.models.dynamic_optimizers import reset_optimizer +from avalanche.models.utils import avalanche_model_adaptation class BatchObservation: @@ -31,6 +32,16 @@ def _train_exp( self.training_epoch(**kwargs) self._after_training_epoch(**kwargs) + def model_adaptation(self, model=None): + """Adapts the model to the current data. + + Calls the :class:`~avalanche.models.DynamicModule`s adaptation. + """ + if model is None: + model = self.model + avalanche_model_adaptation(model, self.experience) + return model.to(self.device) + def make_optimizer(self): """Optimizer initialization. diff --git a/avalanche/training/templates/observation_type/online_observation.py b/avalanche/training/templates/observation_type/online_observation.py index faf98dca0..e542375a4 100644 --- a/avalanche/training/templates/observation_type/online_observation.py +++ b/avalanche/training/templates/observation_type/online_observation.py @@ -3,6 +3,7 @@ from avalanche.benchmarks import OnlineCLExperience from avalanche.models.dynamic_optimizers import reset_optimizer from avalanche.models.dynamic_optimizers import update_optimizer +from avalanche.models.utils import avalanche_model_adaptation class OnlineObservation: @@ -44,6 +45,32 @@ def make_optimizer(self): self.model.parameters(), reset_state=False) + def model_adaptation(self, model=None): + """Adapts the model to the current data. + + Calls the :class:`~avalanche.models.DynamicModule`s adaptation. + """ + if model is None: + model = self.model + + # For training: + if isinstance(self.experience, OnlineCLExperience): + # If the strategy has access to task boundaries, adapt the model + # for the whole origin experience to add the + if self.experience.access_task_boundaries: + avalanche_model_adaptation(model, + self.experience.origin_experience) + else: + self.model_params_before_adaptation = list(model.parameters()) + avalanche_model_adaptation(model, self.experience) + + # For evaluation, the experience is not necessarily an online + # experience: + else: + avalanche_model_adaptation(model, self.experience) + + return model.to(self.device) + def maybe_adapt_model_and_make_optimizer(self): # If strategy has access to the task boundaries, and the current # sub-experience is the first sub-experience in the online (sub-)stream, diff --git a/examples/lamaml_cifar100.py b/examples/lamaml_cifar100.py index 5e713e972..a85b7ba0b 100644 --- a/examples/lamaml_cifar100.py +++ b/examples/lamaml_cifar100.py @@ -31,7 +31,8 @@ def main(args): ) # --- SCENARIO CREATION - scenario = SplitCIFAR100(n_experiences=20, return_task_id=True) + scenario = SplitCIFAR100(n_experiences=20, return_task_id=True, + class_ids_from_zero_in_each_exp=True) config = {"scenario": "SplitCIFAR100"} # MODEL CREATION diff --git a/tests/training/test_online_strategies.py b/tests/training/test_online_strategies.py index e5cf6c3eb..fdced8935 100644 --- a/tests/training/test_online_strategies.py +++ b/tests/training/test_online_strategies.py @@ -10,6 +10,7 @@ from avalanche.benchmarks.scenarios.online_scenario import OnlineCLScenario from avalanche.training import OnlineNaive from tests.unit_tests_utils import get_fast_benchmark +from avalanche.training.plugins.evaluation import default_evaluator class StrategyTest(unittest.TestCase): @@ -51,7 +52,8 @@ def test_naive(self): criterion, train_mb_size=1, device=self.device, - eval_mb_size=50 + eval_mb_size=50, + evaluator=default_evaluator(), ) ocl_benchmark = OnlineCLScenario(benchmark_streams, access_task_boundaries=True) @@ -65,7 +67,8 @@ def test_naive(self): criterion, train_mb_size=1, device=self.device, - eval_mb_size=50 + eval_mb_size=50, + evaluator=default_evaluator(), ) ocl_benchmark = OnlineCLScenario(benchmark_streams, access_task_boundaries=False) From 1cd1924249118a9e377cf28013efbba46b37eaee Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Fri, 30 Sep 2022 16:34:54 +0200 Subject: [PATCH 08/10] Update imports and delete old template files --- avalanche/evaluation/metric_definitions.py | 2 +- avalanche/evaluation/metric_utils.py | 2 +- avalanche/evaluation/metrics/checkpoint.py | 2 +- .../evaluation/metrics/confusion_matrix.py | 2 +- .../evaluation/metrics/forgetting_bwt.py | 2 +- .../evaluation/metrics/forward_transfer.py | 2 +- avalanche/evaluation/metrics/gpu_usage.py | 2 +- .../evaluation/metrics/images_samples.py | 2 +- .../evaluation/metrics/labels_repartition.py | 2 +- avalanche/evaluation/metrics/mean_scores.py | 2 +- avalanche/evaluation/metrics/ram_usage.py | 2 +- avalanche/evaluation/metrics/timing.py | 2 +- avalanche/logging/interactive_logging.py | 6 +- avalanche/logging/wandb_logger.py | 2 +- avalanche/training/plugins/evaluation.py | 2 +- avalanche/training/plugins/gdumb.py | 2 +- avalanche/training/plugins/gss_greedy.py | 2 +- avalanche/training/plugins/lr_scheduling.py | 2 +- avalanche/training/plugins/replay.py | 2 +- .../training/plugins/synaptic_intelligence.py | 2 +- avalanche/training/storage_policy.py | 2 +- .../training/templates/base_online_sgd.py | 386 ------------------ .../training/templates/online_supervised.py | 341 ---------------- avalanche/training/templates/supervised.py | 311 -------------- tests/training/test_replay.py | 2 +- tests/training/test_strategies.py | 2 +- 26 files changed, 27 insertions(+), 1061 deletions(-) delete mode 100644 avalanche/training/templates/base_online_sgd.py delete mode 100644 avalanche/training/templates/online_supervised.py delete mode 100644 avalanche/training/templates/supervised.py diff --git a/avalanche/evaluation/metric_definitions.py b/avalanche/evaluation/metric_definitions.py index 78defbbd8..a27f60794 100644 --- a/avalanche/evaluation/metric_definitions.py +++ b/avalanche/evaluation/metric_definitions.py @@ -18,7 +18,7 @@ if TYPE_CHECKING: from .metric_results import MetricResult - from ..training.templates.supervised import SupervisedTemplate + from ..training.templates import SupervisedTemplate TResult = TypeVar("TResult") TAggregated = TypeVar("TAggregated", bound="PluginMetric") diff --git a/avalanche/evaluation/metric_utils.py b/avalanche/evaluation/metric_utils.py index 2e3ca3fbc..a2a53351b 100644 --- a/avalanche/evaluation/metric_utils.py +++ b/avalanche/evaluation/metric_utils.py @@ -19,7 +19,7 @@ from torch import Tensor if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate from avalanche.benchmarks.scenarios import ClassificationExperience from avalanche.evaluation import PluginMetric diff --git a/avalanche/evaluation/metrics/checkpoint.py b/avalanche/evaluation/metrics/checkpoint.py index 08b1c8ce8..ed3f0feb2 100644 --- a/avalanche/evaluation/metrics/checkpoint.py +++ b/avalanche/evaluation/metrics/checkpoint.py @@ -19,7 +19,7 @@ from avalanche.evaluation.metric_utils import get_metric_name if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class WeightCheckpoint(PluginMetric[Tensor]): diff --git a/avalanche/evaluation/metrics/confusion_matrix.py b/avalanche/evaluation/metrics/confusion_matrix.py index 88a3bc419..a0910104f 100644 --- a/avalanche/evaluation/metrics/confusion_matrix.py +++ b/avalanche/evaluation/metrics/confusion_matrix.py @@ -41,7 +41,7 @@ ) if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class ConfusionMatrix(Metric[Tensor]): diff --git a/avalanche/evaluation/metrics/forgetting_bwt.py b/avalanche/evaluation/metrics/forgetting_bwt.py index d0652a3d0..2abfae036 100644 --- a/avalanche/evaluation/metrics/forgetting_bwt.py +++ b/avalanche/evaluation/metrics/forgetting_bwt.py @@ -21,7 +21,7 @@ ) if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class Forgetting(Metric[Union[float, None, Dict[int, float]]]): diff --git a/avalanche/evaluation/metrics/forward_transfer.py b/avalanche/evaluation/metrics/forward_transfer.py index f6eb934bd..fdd41b482 100644 --- a/avalanche/evaluation/metrics/forward_transfer.py +++ b/avalanche/evaluation/metrics/forward_transfer.py @@ -21,7 +21,7 @@ ) if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class ForwardTransfer(Metric[Union[float, None, Dict[int, float]]]): diff --git a/avalanche/evaluation/metrics/gpu_usage.py b/avalanche/evaluation/metrics/gpu_usage.py index 7ae1f4648..6304a6213 100644 --- a/avalanche/evaluation/metrics/gpu_usage.py +++ b/avalanche/evaluation/metrics/gpu_usage.py @@ -20,7 +20,7 @@ from avalanche.evaluation.metric_results import MetricResult if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class MaxGPU(Metric[float]): diff --git a/avalanche/evaluation/metrics/images_samples.py b/avalanche/evaluation/metrics/images_samples.py index cb568dd29..d64f6c3f1 100644 --- a/avalanche/evaluation/metrics/images_samples.py +++ b/avalanche/evaluation/metrics/images_samples.py @@ -21,7 +21,7 @@ if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate from avalanche.benchmarks.utils import AvalancheDataset diff --git a/avalanche/evaluation/metrics/labels_repartition.py b/avalanche/evaluation/metrics/labels_repartition.py index 2b85f855c..436e40849 100644 --- a/avalanche/evaluation/metrics/labels_repartition.py +++ b/avalanche/evaluation/metrics/labels_repartition.py @@ -26,7 +26,7 @@ if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate from avalanche.evaluation.metric_results import MetricResult diff --git a/avalanche/evaluation/metrics/mean_scores.py b/avalanche/evaluation/metrics/mean_scores.py index bdc422f83..d4d927891 100644 --- a/avalanche/evaluation/metrics/mean_scores.py +++ b/avalanche/evaluation/metrics/mean_scores.py @@ -32,7 +32,7 @@ from typing_extensions import Literal if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate from avalanche.evaluation.metric_results import MetricResult diff --git a/avalanche/evaluation/metrics/ram_usage.py b/avalanche/evaluation/metrics/ram_usage.py index 1dd022469..b358248b5 100644 --- a/avalanche/evaluation/metrics/ram_usage.py +++ b/avalanche/evaluation/metrics/ram_usage.py @@ -19,7 +19,7 @@ from avalanche.evaluation.metric_results import MetricResult if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class MaxRAM(Metric[float]): diff --git a/avalanche/evaluation/metrics/timing.py b/avalanche/evaluation/metrics/timing.py index 2704fc4c0..eb09ca8c2 100644 --- a/avalanche/evaluation/metrics/timing.py +++ b/avalanche/evaluation/metrics/timing.py @@ -18,7 +18,7 @@ from avalanche.evaluation.metrics.mean import Mean if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class ElapsedTime(Metric[float]): diff --git a/avalanche/logging/interactive_logging.py b/avalanche/logging/interactive_logging.py index dac6699c5..03dee5461 100644 --- a/avalanche/logging/interactive_logging.py +++ b/avalanche/logging/interactive_logging.py @@ -19,7 +19,7 @@ from tqdm import tqdm if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class InteractiveLogger(TextLogger, SupervisedPlugin): @@ -61,6 +61,8 @@ def before_training_epoch( metric_values: List["MetricValue"], **kwargs ): + if isinstance(strategy.experience, OnlineCLExperience): + return super().before_training_epoch(strategy, metric_values, **kwargs) self._progress.total = len(strategy.dataloader) @@ -70,6 +72,8 @@ def after_training_epoch( metric_values: List["MetricValue"], **kwargs ): + if isinstance(strategy.experience, OnlineCLExperience): + return self._end_progress() super().after_training_epoch(strategy, metric_values, **kwargs) diff --git a/avalanche/logging/wandb_logger.py b/avalanche/logging/wandb_logger.py index dd523916d..5c4902992 100644 --- a/avalanche/logging/wandb_logger.py +++ b/avalanche/logging/wandb_logger.py @@ -34,7 +34,7 @@ if TYPE_CHECKING: from avalanche.evaluation.metric_results import MetricValue - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class WandBLogger(BaseLogger, SupervisedPlugin): diff --git a/avalanche/training/plugins/evaluation.py b/avalanche/training/plugins/evaluation.py index 7e092534a..10e8eadfc 100644 --- a/avalanche/training/plugins/evaluation.py +++ b/avalanche/training/plugins/evaluation.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from avalanche.evaluation import PluginMetric from avalanche.logging import BaseLogger - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class EvaluationPlugin: diff --git a/avalanche/training/plugins/gdumb.py b/avalanche/training/plugins/gdumb.py index 85c921be5..be44c8cdc 100644 --- a/avalanche/training/plugins/gdumb.py +++ b/avalanche/training/plugins/gdumb.py @@ -5,7 +5,7 @@ from avalanche.training.storage_policy import ClassBalancedBuffer if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class GDumbPlugin(SupervisedPlugin): diff --git a/avalanche/training/plugins/gss_greedy.py b/avalanche/training/plugins/gss_greedy.py index 30121dc23..6d1914755 100644 --- a/avalanche/training/plugins/gss_greedy.py +++ b/avalanche/training/plugins/gss_greedy.py @@ -6,7 +6,7 @@ from avalanche.training.plugins.strategy_plugin import SupervisedPlugin if TYPE_CHECKING: - from ..templates.supervised import SupervisedTemplate + from ..templates import SupervisedTemplate class GSS_greedyPlugin(SupervisedPlugin): diff --git a/avalanche/training/plugins/lr_scheduling.py b/avalanche/training/plugins/lr_scheduling.py index 46c29ba63..e288ff915 100644 --- a/avalanche/training/plugins/lr_scheduling.py +++ b/avalanche/training/plugins/lr_scheduling.py @@ -8,7 +8,7 @@ import inspect if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class LRSchedulerPlugin(SupervisedPlugin): diff --git a/avalanche/training/plugins/replay.py b/avalanche/training/plugins/replay.py index 62add9f4d..0b8946cb1 100644 --- a/avalanche/training/plugins/replay.py +++ b/avalanche/training/plugins/replay.py @@ -9,7 +9,7 @@ ) if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class ReplayPlugin(SupervisedPlugin): diff --git a/avalanche/training/plugins/synaptic_intelligence.py b/avalanche/training/plugins/synaptic_intelligence.py index 79379fce0..6efd4da42 100644 --- a/avalanche/training/plugins/synaptic_intelligence.py +++ b/avalanche/training/plugins/synaptic_intelligence.py @@ -13,7 +13,7 @@ from avalanche.training.utils import get_layers_and_params if TYPE_CHECKING: - from ..templates.supervised import SupervisedTemplate + from ..templates import SupervisedTemplate SynDataType = Dict[str, Dict[str, Tensor]] diff --git a/avalanche/training/storage_policy.py b/avalanche/training/storage_policy.py index 576d92540..7a1cbd262 100644 --- a/avalanche/training/storage_policy.py +++ b/avalanche/training/storage_policy.py @@ -16,7 +16,7 @@ from avalanche.models import FeatureExtractorBackbone if TYPE_CHECKING: - from .templates.supervised import SupervisedTemplate + from .templates import SupervisedTemplate class ExemplarsBuffer(ABC): diff --git a/avalanche/training/templates/base_online_sgd.py b/avalanche/training/templates/base_online_sgd.py deleted file mode 100644 index 176547932..000000000 --- a/avalanche/training/templates/base_online_sgd.py +++ /dev/null @@ -1,386 +0,0 @@ -from typing import Iterable, Sequence, Optional, Union, List - -import torch -from torch.nn import Module -from torch.optim import Optimizer - -from avalanche.benchmarks import CLExperience, CLStream -from avalanche.core import BaseSGDPlugin -from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin -from avalanche.training.plugins.clock import Clock -from avalanche.training.plugins.evaluation import default_evaluator -from avalanche.training.templates.base import BaseTemplate, ExpSequence - -from typing import TYPE_CHECKING - -from avalanche.training.utils import trigger_plugins - -if TYPE_CHECKING: - from avalanche.training.templates.supervised import SupervisedTemplate - - -class BaseOnlineSGDTemplate(BaseTemplate): - """Base class for continual learning skeletons. - - **Training loop** - The training loop is organized as follows:: - - train - train_exp # for each experience - - **Evaluation loop** - The evaluation loop is organized as follows:: - - eval - eval_exp # for each experience - - """ - - PLUGIN_CLASS = BaseSGDPlugin - - def __init__( - self, - model: Module, - optimizer: Optimizer, - train_mb_size: int = 1, - train_passes: int = 1, - eval_mb_size: Optional[int] = 1, - device="cpu", - plugins: Optional[List["SupervisedPlugin"]] = None, - evaluator: EvaluationPlugin = default_evaluator(), - eval_every=-1, - peval_mode="experience", - ): - """Init. - - :param model: PyTorch model. - :param optimizer: PyTorch optimizer. - :param train_mb_size: mini-batch size for training. - :param train_passes: number of training passes. - :param eval_mb_size: mini-batch size for eval. - :param evaluator: (optional) instance of EvaluationPlugin for logging - and metric computations. None to remove logging. - :param eval_every: the frequency of the calls to `eval` inside the - training loop. -1 disables the evaluation. 0 means `eval` is called - only at the end of the learning experience. Values >0 mean - that `eval` is called every `eval_every` experience and at the end - of the learning experience. - :param peval_mode: one of {'experience', 'iteration'}. Decides whether - the periodic evaluation during training should execute every - `eval_every` experiences or iterations (Default='experience'). - """ - super().__init__(model=model, device=device, plugins=plugins) - - self.optimizer: Optimizer = optimizer - """ PyTorch optimizer. """ - - self.train_passes: int = train_passes - """ Number of training passes. """ - - self.train_mb_size: int = train_mb_size - """ Training mini-batch size. """ - - self.eval_mb_size: int = ( - train_mb_size if eval_mb_size is None else eval_mb_size - ) - """ Eval mini-batch size. """ - - if evaluator is None: - evaluator = EvaluationPlugin() - self.plugins.append(evaluator) - self.evaluator = evaluator - """ EvaluationPlugin used for logging and metric computations. """ - - # Configure periodic evaluation. - assert peval_mode in {"experience", "iteration"} - self.eval_every = eval_every - peval = PeriodicEval(eval_every, peval_mode) - self.plugins.append(peval) - - self.clock = Clock() - """ Incremental counters for strategy events. """ - # WARNING: Clock needs to be the last plugin, otherwise - # counters will be wrong for plugins called after it. - self.plugins.append(self.clock) - - ################################################################### - # State variables. These are updated during the train/eval loops. # - ################################################################### - - self.dataloader = None - """ Dataloader. """ - - self.mbatch = None - """ Current mini-batch. """ - - self.mb_output = None - """ Model's output computed on the current mini-batch. """ - - self.loss = None - """ Loss of the current mini-batch. """ - - self._stop_training = False - - def train(self, - experiences: Union[CLExperience, - ExpSequence], - eval_streams: Optional[Sequence[Union[CLExperience, - ExpSequence]]] = None, - **kwargs): - super().train(experiences, eval_streams, **kwargs) - return self.evaluator.get_last_metrics() - - @torch.no_grad() - def eval(self, exp_list: Union[CLExperience, CLStream], **kwargs): - """ - Evaluate the current model on a series of experiences and - returns the last recorded value for each metric. - - :param exp_list: CL experience information. - :param kwargs: custom arguments. - - :return: dictionary containing last recorded value for - each metric name - """ - super().eval(exp_list, **kwargs) - return self.evaluator.get_last_metrics() - - def _before_training_exp(self, **kwargs): - self.make_train_dataloader(**kwargs) - # Model Adaptation (e.g. freeze/add new units) - - # If strategy has access to the task boundaries, and the current - # sub-experience is the first sub-experience in the online (sub-)stream, - # then adapt the model with the full origin experience: - if self.experience.access_task_boundaries: - if self.experience.is_first_subexp: - self.model = self.model_adaptation() - self.make_optimizer() - # Otherwise, adapt to the current sub-experience: - else: - self.model = self.model_adaptation() - self.make_optimizer() - - super()._before_training_exp(**kwargs) - - def _train_exp( - self, experience: CLExperience, eval_streams=None, **kwargs - ): - """Training loop over a single Experience object. - - :param experience: CL experience information. - :param eval_streams: list of streams for evaluation. - If None: use the training experience for evaluation. - Use [] if you do not want to evaluate during training. - :param kwargs: custom arguments. - """ - if eval_streams is None: - eval_streams = [experience] - for i, exp in enumerate(eval_streams): - if not isinstance(exp, Iterable): - eval_streams[i] = [exp] - - self.training_pass(**kwargs) - - def _before_eval_exp(self, **kwargs): - self.make_eval_dataloader(**kwargs) - # Model Adaptation (e.g. freeze/add new units) - self.model = self.model_adaptation() - super()._before_eval_exp(**kwargs) - - def _eval_exp(self, **kwargs): - self.eval_epoch(**kwargs) - - def make_train_dataloader(self, **kwargs): - """Assign dataloader to self.dataloader.""" - raise NotImplementedError() - - def make_eval_dataloader(self, **kwargs): - """Assign dataloader to self.dataloader.""" - raise NotImplementedError() - - def make_optimizer(self, **kwargs): - """Optimizer initialization.""" - raise NotImplementedError() - - def criterion(self): - """Compute loss function.""" - raise NotImplementedError() - - def forward(self): - """Compute the model's output given the current mini-batch.""" - raise NotImplementedError() - - def model_adaptation(self, model=None): - """Adapts the model to the current experience.""" - raise NotImplementedError() - - def stop_training(self): - """Signals to stop training at the next iteration.""" - self._stop_training = True - - def training_pass(self, **kwargs): - """Training pass. - - :param kwargs: - :return: - """ - for self.pass_itr in range(self.train_passes): - for self.mbatch in self.dataloader: - if self._stop_training: - break - - self._unpack_minibatch() - self._before_training_iteration(**kwargs) - - self.optimizer.zero_grad() - self.loss = 0 - - # Forward - self._before_forward(**kwargs) - self.mb_output = self.forward() - self._after_forward(**kwargs) - - # Loss & Backward - self.loss += self.criterion() - - self._before_backward(**kwargs) - self.backward() - self._after_backward(**kwargs) - - # Optimization step - self._before_update(**kwargs) - self.optimizer_step() - self._after_update(**kwargs) - - self._after_training_iteration(**kwargs) - - def backward(self): - """Run the backward pass.""" - self.loss.backward() - - def optimizer_step(self): - """Execute the optimizer step (weights update).""" - self.optimizer.step() - - def eval_epoch(self, **kwargs): - """Evaluation loop over the current `self.dataloader`.""" - for self.mbatch in self.dataloader: - self._unpack_minibatch() - self._before_eval_iteration(**kwargs) - - self._before_eval_forward(**kwargs) - self.mb_output = self.forward() - self._after_eval_forward(**kwargs) - self.loss = self.criterion() - - self._after_eval_iteration(**kwargs) - - def _unpack_minibatch(self): - """Move to device""" - for i in range(len(self.mbatch)): - self.mbatch[i] = self.mbatch[i].to(self.device) - - ######################################################### - # Plugin Triggers # - ######################################################### - - def _before_training_iteration(self, **kwargs): - trigger_plugins(self, "before_training_iteration", **kwargs) - - def _before_forward(self, **kwargs): - trigger_plugins(self, "before_forward", **kwargs) - - def _after_forward(self, **kwargs): - trigger_plugins(self, "after_forward", **kwargs) - - def _before_backward(self, **kwargs): - trigger_plugins(self, "before_backward", **kwargs) - - def _after_backward(self, **kwargs): - trigger_plugins(self, "after_backward", **kwargs) - - def _after_training_iteration(self, **kwargs): - trigger_plugins(self, "after_training_iteration", **kwargs) - - def _before_update(self, **kwargs): - trigger_plugins(self, "before_update", **kwargs) - - def _after_update(self, **kwargs): - trigger_plugins(self, "after_update", **kwargs) - - def _before_eval_iteration(self, **kwargs): - trigger_plugins(self, "before_eval_iteration", **kwargs) - - def _before_eval_forward(self, **kwargs): - trigger_plugins(self, "before_eval_forward", **kwargs) - - def _after_eval_forward(self, **kwargs): - trigger_plugins(self, "after_eval_forward", **kwargs) - - def _after_eval_iteration(self, **kwargs): - trigger_plugins(self, "after_eval_iteration", **kwargs) - - -class PeriodicEval(SupervisedPlugin): - """Schedules periodic evaluation during training. - - This plugin is automatically configured and added by the BaseTemplate. - """ - - def __init__(self, eval_every=-1, peval_mode="experience", - do_initial=True): - """Init. - - :param eval_every: the frequency of the calls to `eval` inside the - training loop. -1 disables the evaluation. 0 means `eval` is called - only at the end of the learning experience. Values >0 mean - that `eval` is called every `eval_every` experience and at the - end of the learning experience. - :param peval_mode: one of {'experience', 'iteration'}. Decides whether - the periodic evaluation during training should execute every - `eval_every` experience or iterations - (Default='experience'). - :param do_initial: whether to evaluate before each `train` call. - Occasionally needed becuase some metrics need to know the - accuracy before training. - """ - super().__init__() - assert peval_mode in {"experience", "iteration"} - self.eval_every = eval_every - self.peval_mode = peval_mode - self.do_initial = do_initial and eval_every > -1 - self.do_final = None - self._is_eval_updated = False - - def before_training(self, strategy, **kwargs): - """Eval before each learning experience. - - Occasionally needed because some metrics need the accuracy before - training. - """ - if self.do_initial: - self._peval(strategy, **kwargs) - - def _peval(self, strategy, **kwargs): - for el in strategy._eval_streams: - strategy.eval(el, **kwargs) - - def _maybe_peval(self, strategy, counter, **kwargs): - if self.eval_every > 0 and counter % self.eval_every == 0: - self._peval(strategy, **kwargs) - - def after_training_exp(self, strategy: "BaseOnlineSGDTemplate", **kwargs): - """Periodic eval controlled by `self.eval_every` and - `self.peval_mode`.""" - if self.peval_mode == "experience": - self._maybe_peval(strategy, strategy.clock.train_exp_counter, - **kwargs) - - def after_training_iteration(self, strategy: "BaseOnlineSGDTemplate", - **kwargs): - """Periodic eval controlled by `self.eval_every` and - `self.peval_mode`.""" - if self.peval_mode == "iteration": - self._maybe_peval(strategy, strategy.clock.train_exp_iterations, - **kwargs) diff --git a/avalanche/training/templates/online_supervised.py b/avalanche/training/templates/online_supervised.py deleted file mode 100644 index 6354471cb..000000000 --- a/avalanche/training/templates/online_supervised.py +++ /dev/null @@ -1,341 +0,0 @@ -from typing import Sequence, Optional -from pkg_resources import parse_version - -import torch -from torch.nn import Module, CrossEntropyLoss -from torch.optim import Optimizer -from torch.utils.data import DataLoader - -from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \ - collate_from_data_or_kwargs -from avalanche.models import avalanche_forward -from avalanche.models.dynamic_optimizers import reset_optimizer -from avalanche.models.utils import avalanche_model_adaptation -from avalanche.training.plugins import SupervisedPlugin -from avalanche.training.plugins.evaluation import default_evaluator -from avalanche.training.templates.base_online_sgd import BaseOnlineSGDTemplate -from avalanche.training.utils import trigger_plugins -from avalanche.benchmarks.scenarios import OnlineCLExperience -from avalanche.models.dynamic_optimizers import update_optimizer - - -class OnlineSupervisedTemplate(BaseOnlineSGDTemplate): - """Base class for continual learning strategies. - - BaseTemplate is the super class of all task-based continual learning - strategies. It implements a basic training loop and callback system - that allows to execute code at each experience of the training loop. - Plugins can be used to implement callbacks to augment the training - loop with additional behavior (e.g. a memory buffer for replay). - - **Scenarios** - This strategy supports several continual learning scenarios: - - * class-incremental scenarios (no task labels) - * multi-task scenarios, where task labels are provided) - * multi-incremental scenarios, where the same task may be revisited - - The exact scenario depends on the data stream and whether it provides - the task labels. - - **Training loop** - The training loop is organized as follows:: - - train - train_exp # for each experience - adapt_train_dataset - train_dataset_adaptation - make_train_dataloader - train_pass # for each pass - # forward - # backward - # model update - - **Evaluation loop** - The evaluation loop is organized as follows:: - - eval - eval_exp # for each experience - adapt_eval_dataset - eval_dataset_adaptation - make_eval_dataloader - eval_epoch # for each epoch - # forward - # backward - # model update - - """ - - PLUGIN_CLASS = SupervisedPlugin - - def __init__( - self, - model: Module, - optimizer: Optimizer, - criterion=CrossEntropyLoss(), - train_mb_size: int = 1, - train_passes: int = 1, - eval_mb_size: Optional[int] = 1, - device="cpu", - plugins: Optional[Sequence["SupervisedPlugin"]] = None, - evaluator=default_evaluator(), - eval_every=-1, - peval_mode="experience", - ): - """Init. - - :param model: PyTorch model. - :param optimizer: PyTorch optimizer. - :param criterion: loss function. - :param train_mb_size: mini-batch size for training. - :param train_passes: number of training passes. - :param eval_mb_size: mini-batch size for eval. - :param device: PyTorch device where the model will be allocated. - :param plugins: (optional) list of StrategyPlugins. - :param evaluator: (optional) instance of EvaluationPlugin for logging - and metric computations. None to remove logging. - :param eval_every: the frequency of the calls to `eval` inside the - training loop. -1 disables the evaluation. 0 means `eval` is called - only at the end of the learning experience. Values >0 mean that - `eval` is called every `eval_every` experiences and at the end of - the learning experience. - :param peval_mode: one of {'experience', 'iteration'}. Decides whether - the periodic evaluation during training should execute every - `eval_every` experience or iterations (Default='experience'). - """ - super().__init__( - model=model, - optimizer=optimizer, - train_mb_size=train_mb_size, - train_passes=train_passes, - eval_mb_size=eval_mb_size, - device=device, - plugins=plugins, - evaluator=evaluator, - eval_every=eval_every, - peval_mode=peval_mode, - ) - self._criterion = criterion - - ################################################################### - # State variables. These are updated during the train/eval loops. # - ################################################################### - - self.adapted_dataset = None - """ Data used to train. It may be modified by plugins. Plugins can - append data to it (e.g. for replay). - - .. note:: - - This dataset may contain samples from different experiences. If you - want the original data for the current experience - use :attr:`.BaseTemplate.experience`. - """ - - @property - def mb_x(self): - """Current mini-batch input.""" - return self.mbatch[0] - - @property - def mb_y(self): - """Current mini-batch target.""" - return self.mbatch[1] - - @property - def mb_task_id(self): - """Current mini-batch task labels.""" - assert len(self.mbatch) >= 3 - return self.mbatch[-1] - - def criterion(self): - """Loss function.""" - return self._criterion(self.mb_output, self.mb_y) - - def _before_training_exp(self, **kwargs): - """Setup to train on a single experience.""" - # Data Adaptation (e.g. add new samples/data augmentation) - self._before_train_dataset_adaptation(**kwargs) - self.train_dataset_adaptation(**kwargs) - self._after_train_dataset_adaptation(**kwargs) - super()._before_training_exp(**kwargs) - - def _load_train_state(self, prev_state): - super()._load_train_state(prev_state) - self.adapted_dataset = prev_state["adapted_dataset"] - self.dataloader = prev_state["dataloader"] - - def _save_train_state(self): - """Save the training state which may be modified by the eval loop. - - This currently includes: experience, adapted_dataset, dataloader, - is_training, and train/eval modes for each module. - - TODO: we probably need a better way to do this. - """ - state = super()._save_train_state() - new_state = { - "adapted_dataset": self.adapted_dataset, - "dataloader": self.dataloader, - } - return {**state, **new_state} - - def train_dataset_adaptation(self, **kwargs): - """Initialize `self.adapted_dataset`.""" - self.adapted_dataset = self.experience.dataset - self.adapted_dataset = self.adapted_dataset.train() - - def _before_eval_exp(self, **kwargs): - # Data Adaptation - self._before_eval_dataset_adaptation(**kwargs) - self.eval_dataset_adaptation(**kwargs) - self._after_eval_dataset_adaptation(**kwargs) - super()._before_eval_exp(**kwargs) - - def make_train_dataloader( - self, - num_workers=0, - shuffle=True, - pin_memory=True, - persistent_workers=False, - **kwargs - ): - """Data loader initialization. - - Called at the start of each learning experience after the dataset - adaptation. - - :param num_workers: number of thread workers for the data loading. - :param shuffle: True if the data should be shuffled, False otherwise. - :param pin_memory: If True, the data loader will copy Tensors into CUDA - pinned memory before returning them. Defaults to True. - """ - - other_dataloader_args = {} - - if parse_version(torch.__version__) >= parse_version("1.7.0"): - other_dataloader_args["persistent_workers"] = persistent_workers - for k, v in kwargs.items(): - other_dataloader_args[k] = v - - collate_from_data_or_kwargs(self.adapted_dataset, - other_dataloader_args) - self.dataloader = TaskBalancedDataLoader( - self.adapted_dataset, - oversample_small_groups=True, - num_workers=num_workers, - batch_size=self.train_mb_size, - shuffle=shuffle, - pin_memory=pin_memory, - **other_dataloader_args - ) - - def make_eval_dataloader( - self, num_workers=0, pin_memory=True, persistent_workers=False, - **kwargs - ): - """ - Initializes the eval data loader. - :param num_workers: How many subprocesses to use for data loading. - 0 means that the data will be loaded in the main process. - (default: 0). - :param pin_memory: If True, the data loader will copy Tensors into CUDA - pinned memory before returning them. Defaults to True. - :param kwargs: - :return: - """ - other_dataloader_args = {} - - if parse_version(torch.__version__) >= parse_version("1.7.0"): - other_dataloader_args["persistent_workers"] = persistent_workers - for k, v in kwargs.items(): - other_dataloader_args[k] = v - - collate_from_data_or_kwargs(self.adapted_dataset, - other_dataloader_args) - self.dataloader = DataLoader( - self.adapted_dataset, - num_workers=num_workers, - batch_size=self.eval_mb_size, - pin_memory=pin_memory, - **other_dataloader_args - ) - - def forward(self): - """Compute the model's output given the current mini-batch.""" - return avalanche_forward(self.model, self.mb_x, self.mb_task_id) - - def model_adaptation(self, model=None): - """Adapts the model to the current data. - - Calls the :class:`~avalanche.models.DynamicModule`s adaptation. - """ - if model is None: - model = self.model - - # For training: - if isinstance(self.experience, OnlineCLExperience): - # If the strategy has access to task boundaries, adapt the model - # for the whole origin experience to add the - if self.experience.access_task_boundaries: - avalanche_model_adaptation(model, - self.experience.origin_experience) - else: - self.model_params_before_adaptation = list(model.parameters()) - avalanche_model_adaptation(model, self.experience) - - # For evaluation, the experience is not necessarily an online - # experience: - else: - avalanche_model_adaptation(model, self.experience) - - return model.to(self.device) - - def _unpack_minibatch(self): - """We assume mini-batches have the form . - This allows for arbitrary tensors between y and t. - Keep in mind that in the most general case mb_task_id is a tensor - which may contain different labels for each sample. - """ - assert len(self.mbatch) >= 3 - super()._unpack_minibatch() - - def eval_dataset_adaptation(self, **kwargs): - """Initialize `self.adapted_dataset`.""" - self.adapted_dataset = self.experience.dataset - self.adapted_dataset = self.adapted_dataset.eval() - - def make_optimizer(self): - """Optimizer initialization. - - Called before each training experience to configure the optimizer. - """ - # We reset the optimizer's state after each experience if task - # boundaries are given, otherwise it updates the optimizer only if - # new parameters are added to the model after each adaptation step. - - # We assume the current experience is an OnlineCLExperience: - if self.experience.access_task_boundaries: - reset_optimizer(self.optimizer, self.model) - - else: - update_optimizer(self.optimizer, - self.model_params_before_adaptation, - self.model.parameters(), - reset_state=False) - - ######################################################### - # Plugin Triggers # - ######################################################### - - def _before_train_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "before_train_dataset_adaptation", **kwargs) - - def _after_train_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "after_train_dataset_adaptation", **kwargs) - - def _before_eval_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs) - - def _after_eval_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs) diff --git a/avalanche/training/templates/supervised.py b/avalanche/training/templates/supervised.py deleted file mode 100644 index 9b1eb104a..000000000 --- a/avalanche/training/templates/supervised.py +++ /dev/null @@ -1,311 +0,0 @@ -from typing import Sequence, Optional -from pkg_resources import parse_version - -import torch -from torch.nn import Module, CrossEntropyLoss -from torch.optim import Optimizer -from torch.utils.data import DataLoader - -from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \ - collate_from_data_or_kwargs -from avalanche.models import avalanche_forward -from avalanche.models.dynamic_optimizers import reset_optimizer -from avalanche.models.utils import avalanche_model_adaptation -from avalanche.training.plugins import SupervisedPlugin -from avalanche.training.plugins.evaluation import default_evaluator -from avalanche.training.templates.base_sgd import BaseSGDTemplate -from avalanche.training.utils import trigger_plugins - - -class SupervisedTemplate(BaseSGDTemplate): - """Base class for continual learning strategies. - - BaseTemplate is the super class of all task-based continual learning - strategies. It implements a basic training loop and callback system - that allows to execute code at each experience of the training loop. - Plugins can be used to implement callbacks to augment the training - loop with additional behavior (e.g. a memory buffer for replay). - - **Scenarios** - This strategy supports several continual learning scenarios: - - * class-incremental scenarios (no task labels) - * multi-task scenarios, where task labels are provided) - * multi-incremental scenarios, where the same task may be revisited - - The exact scenario depends on the data stream and whether it provides - the task labels. - - **Training loop** - The training loop is organized as follows:: - - train - train_exp # for each experience - adapt_train_dataset - train_dataset_adaptation - make_train_dataloader - train_epoch # for each epoch - # forward - # backward - # model update - - **Evaluation loop** - The evaluation loop is organized as follows:: - - eval - eval_exp # for each experience - adapt_eval_dataset - eval_dataset_adaptation - make_eval_dataloader - eval_epoch # for each epoch - # forward - # backward - # model update - - """ - - PLUGIN_CLASS = SupervisedPlugin - - def __init__( - self, - model: Module, - optimizer: Optimizer, - criterion=CrossEntropyLoss(), - train_mb_size: int = 1, - train_epochs: int = 1, - eval_mb_size: Optional[int] = 1, - device="cpu", - plugins: Optional[Sequence["SupervisedPlugin"]] = None, - evaluator=default_evaluator(), - eval_every=-1, - peval_mode="epoch", - ): - """Init. - - :param model: PyTorch model. - :param optimizer: PyTorch optimizer. - :param criterion: loss function. - :param train_mb_size: mini-batch size for training. - :param train_epochs: number of training epochs. - :param eval_mb_size: mini-batch size for eval. - :param device: PyTorch device where the model will be allocated. - :param plugins: (optional) list of StrategyPlugins. - :param evaluator: (optional) instance of EvaluationPlugin for logging - and metric computations. None to remove logging. - :param eval_every: the frequency of the calls to `eval` inside the - training loop. -1 disables the evaluation. 0 means `eval` is called - only at the end of the learning experience. Values >0 mean that - `eval` is called every `eval_every` epochs and at the end of the - learning experience. - :param peval_mode: one of {'epoch', 'iteration'}. Decides whether the - periodic evaluation during training should execute every - `eval_every` epochs or iterations (Default='epoch'). - """ - super().__init__( - model=model, - optimizer=optimizer, - train_mb_size=train_mb_size, - train_epochs=train_epochs, - eval_mb_size=eval_mb_size, - device=device, - plugins=plugins, - evaluator=evaluator, - eval_every=eval_every, - peval_mode=peval_mode, - ) - self._criterion = criterion - - ################################################################### - # State variables. These are updated during the train/eval loops. # - ################################################################### - - self.adapted_dataset = None - """ Data used to train. It may be modified by plugins. Plugins can - append data to it (e.g. for replay). - - .. note:: - - This dataset may contain samples from different experiences. If you - want the original data for the current experience - use :attr:`.BaseTemplate.experience`. - """ - - @property - def mb_x(self): - """Current mini-batch input.""" - return self.mbatch[0] - - @property - def mb_y(self): - """Current mini-batch target.""" - return self.mbatch[1] - - @property - def mb_task_id(self): - """Current mini-batch task labels.""" - assert len(self.mbatch) >= 3 - return self.mbatch[-1] - - def criterion(self): - """Loss function.""" - return self._criterion(self.mb_output, self.mb_y) - - def _before_training_exp(self, **kwargs): - """Setup to train on a single experience.""" - # Data Adaptation (e.g. add new samples/data augmentation) - self._before_train_dataset_adaptation(**kwargs) - self.train_dataset_adaptation(**kwargs) - self._after_train_dataset_adaptation(**kwargs) - super()._before_training_exp(**kwargs) - - def _load_train_state(self, prev_state): - super()._load_train_state(prev_state) - self.adapted_dataset = prev_state["adapted_dataset"] - self.dataloader = prev_state["dataloader"] - - def _save_train_state(self): - """Save the training state which may be modified by the eval loop. - - This currently includes: experience, adapted_dataset, dataloader, - is_training, and train/eval modes for each module. - - TODO: we probably need a better way to do this. - """ - state = super()._save_train_state() - new_state = { - "adapted_dataset": self.adapted_dataset, - "dataloader": self.dataloader, - } - return {**state, **new_state} - - def train_dataset_adaptation(self, **kwargs): - """Initialize `self.adapted_dataset`.""" - self.adapted_dataset = self.experience.dataset - self.adapted_dataset = self.adapted_dataset.train() - - def _before_eval_exp(self, **kwargs): - # Data Adaptation - self._before_eval_dataset_adaptation(**kwargs) - self.eval_dataset_adaptation(**kwargs) - self._after_eval_dataset_adaptation(**kwargs) - super()._before_eval_exp(**kwargs) - - def make_train_dataloader( - self, - num_workers=0, - shuffle=True, - pin_memory=True, - persistent_workers=False, - **kwargs - ): - """Data loader initialization. - - Called at the start of each learning experience after the dataset - adaptation. - - :param num_workers: number of thread workers for the data loading. - :param shuffle: True if the data should be shuffled, False otherwise. - :param pin_memory: If True, the data loader will copy Tensors into CUDA - pinned memory before returning them. Defaults to True. - """ - - other_dataloader_args = {} - - if parse_version(torch.__version__) >= parse_version("1.7.0"): - other_dataloader_args["persistent_workers"] = persistent_workers - for k, v in kwargs.items(): - other_dataloader_args[k] = v - - self.dataloader = TaskBalancedDataLoader( - self.adapted_dataset, - oversample_small_groups=True, - num_workers=num_workers, - batch_size=self.train_mb_size, - shuffle=shuffle, - pin_memory=pin_memory, - **other_dataloader_args - ) - - def make_eval_dataloader( - self, num_workers=0, pin_memory=True, persistent_workers=False, **kwargs - ): - """ - Initializes the eval data loader. - :param num_workers: How many subprocesses to use for data loading. - 0 means that the data will be loaded in the main process. - (default: 0). - :param pin_memory: If True, the data loader will copy Tensors into CUDA - pinned memory before returning them. Defaults to True. - :param kwargs: - :return: - """ - other_dataloader_args = {} - - if parse_version(torch.__version__) >= parse_version("1.7.0"): - other_dataloader_args["persistent_workers"] = persistent_workers - for k, v in kwargs.items(): - other_dataloader_args[k] = v - - collate_from_data_or_kwargs(self.adapted_dataset, - other_dataloader_args) - self.dataloader = DataLoader( - self.adapted_dataset, - num_workers=num_workers, - batch_size=self.eval_mb_size, - pin_memory=pin_memory, - **other_dataloader_args - ) - - def forward(self): - """Compute the model's output given the current mini-batch.""" - return avalanche_forward(self.model, self.mb_x, self.mb_task_id) - - def model_adaptation(self, model=None): - """Adapts the model to the current data. - - Calls the :class:`~avalanche.models.DynamicModule`s adaptation. - """ - if model is None: - model = self.model - avalanche_model_adaptation(model, self.experience) - return model.to(self.device) - - def _unpack_minibatch(self): - """We assume mini-batches have the form . - This allows for arbitrary tensors between y and t. - Keep in mind that in the most general case mb_task_id is a tensor - which may contain different labels for each sample. - """ - assert len(self.mbatch) >= 3 - super()._unpack_minibatch() - - def eval_dataset_adaptation(self, **kwargs): - """Initialize `self.adapted_dataset`.""" - self.adapted_dataset = self.experience.dataset - self.adapted_dataset = self.adapted_dataset.eval() - - def make_optimizer(self): - """Optimizer initialization. - - Called before each training experiene to configure the optimizer. - """ - # we reset the optimizer's state after each experience. - # This allows to add new parameters (new heads) and - # freezing old units during the model's adaptation phase. - reset_optimizer(self.optimizer, self.model) - - ######################################################### - # Plugin Triggers # - ######################################################### - - def _before_train_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "before_train_dataset_adaptation", **kwargs) - - def _after_train_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "after_train_dataset_adaptation", **kwargs) - - def _before_eval_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "before_eval_dataset_adaptation", **kwargs) - - def _after_eval_dataset_adaptation(self, **kwargs): - trigger_plugins(self, "after_eval_dataset_adaptation", **kwargs) diff --git a/tests/training/test_replay.py b/tests/training/test_replay.py index 26afa78bc..7ae908161 100644 --- a/tests/training/test_replay.py +++ b/tests/training/test_replay.py @@ -22,7 +22,7 @@ ParametricBuffer, ) from avalanche.training.supervised import Naive -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate from tests.unit_tests_utils import get_fast_benchmark diff --git a/tests/training/test_strategies.py b/tests/training/test_strategies.py index c7a9b773e..95889c818 100644 --- a/tests/training/test_strategies.py +++ b/tests/training/test_strategies.py @@ -47,7 +47,7 @@ from avalanche.training.supervised.icarl import ICaRL from avalanche.training.supervised.joint_training import AlreadyTrainedError from avalanche.training.supervised.strategy_wrappers import PNNStrategy -from avalanche.training.templates.supervised import SupervisedTemplate +from avalanche.training.templates import SupervisedTemplate from avalanche.training.utils import get_last_fc_layer from tests.unit_tests_utils import get_fast_benchmark, get_device From aee1d2efc403124c9b9bbc6bd04295f8eeea59ce Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Fri, 30 Sep 2022 16:35:27 +0200 Subject: [PATCH 09/10] Update `observation_type` and `base_sgd` --- avalanche/logging/base_logger.py | 2 +- avalanche/training/templates/base_sgd.py | 30 +++++++++++++++++-- .../training/templates/common_templates.py | 2 +- .../observation_type/batch_observation.py | 28 +---------------- .../observation_type/online_observation.py | 21 +------------ examples/online_replay.py | 2 +- 6 files changed, 33 insertions(+), 52 deletions(-) diff --git a/avalanche/logging/base_logger.py b/avalanche/logging/base_logger.py index 9e03daa87..77b86864e 100644 --- a/avalanche/logging/base_logger.py +++ b/avalanche/logging/base_logger.py @@ -4,7 +4,7 @@ if TYPE_CHECKING: from avalanche.evaluation.metric_results import MetricValue - from avalanche.training.templates.supervised import SupervisedTemplate + from avalanche.training.templates import SupervisedTemplate class BaseLogger(ABC): diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py index 46813a9ed..dc0ba9d38 100644 --- a/avalanche/training/templates/base_sgd.py +++ b/avalanche/training/templates/base_sgd.py @@ -218,7 +218,7 @@ def eval_epoch(self, **kwargs): # ==================================================================> NEW - def maybe_adapt_model_and_make_optimizer(self): + def check_model_and_optimizer(self): # Should be implemented in observation type raise NotImplementedError() @@ -234,10 +234,36 @@ def _before_training_exp(self, **kwargs): # Model Adaptation (e.g. freeze/add new units) # self.model = self.model_adaptation() # self.make_optimizer() - self.maybe_adapt_model_and_make_optimizer() + self.check_model_and_optimizer() super()._before_training_exp(**kwargs) + def _train_exp( + self, experience: CLExperience, eval_streams=None, **kwargs + ): + """Training loop over a single Experience object. + + :param experience: CL experience information. + :param eval_streams: list of streams for evaluation. + If None: use the training experience for evaluation. + Use [] if you do not want to evaluate during training. + :param kwargs: custom arguments. + """ + if eval_streams is None: + eval_streams = [experience] + for i, exp in enumerate(eval_streams): + if not isinstance(exp, Iterable): + eval_streams[i] = [exp] + for _ in range(self.train_epochs): + self._before_training_epoch(**kwargs) + + if self._stop_training: # Early stopping + self._stop_training = False + break + + self.training_epoch(**kwargs) + self._after_training_epoch(**kwargs) + def _save_train_state(self): """Save the training state which may be modified by the eval loop. diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/common_templates.py index 54aefd058..a8f5e35e6 100644 --- a/avalanche/training/templates/common_templates.py +++ b/avalanche/training/templates/common_templates.py @@ -328,7 +328,7 @@ def __init__( optimizer=optimizer, criterion=criterion, train_mb_size=train_mb_size, - train_epochs=1, + train_epochs=train_passes, eval_mb_size=eval_mb_size, device=device, plugins=plugins, diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py index 35b887d11..4ec073849 100644 --- a/avalanche/training/templates/observation_type/batch_observation.py +++ b/avalanche/training/templates/observation_type/batch_observation.py @@ -6,32 +6,6 @@ class BatchObservation: - def _train_exp( - self, experience: CLExperience, eval_streams=None, **kwargs - ): - """Training loop over a single Experience object. - - :param experience: CL experience information. - :param eval_streams: list of streams for evaluation. - If None: use the training experience for evaluation. - Use [] if you do not want to evaluate during training. - :param kwargs: custom arguments. - """ - if eval_streams is None: - eval_streams = [experience] - for i, exp in enumerate(eval_streams): - if not isinstance(exp, Iterable): - eval_streams[i] = [exp] - for _ in range(self.train_epochs): - self._before_training_epoch(**kwargs) - - if self._stop_training: # Early stopping - self._stop_training = False - break - - self.training_epoch(**kwargs) - self._after_training_epoch(**kwargs) - def model_adaptation(self, model=None): """Adapts the model to the current data. @@ -52,6 +26,6 @@ def make_optimizer(self): # freezing old units during the model's adaptation phase. reset_optimizer(self.optimizer, self.model) - def maybe_adapt_model_and_make_optimizer(self): + def check_model_and_optimizer(self): self.model = self.model_adaptation() self.make_optimizer() diff --git a/avalanche/training/templates/observation_type/online_observation.py b/avalanche/training/templates/observation_type/online_observation.py index e542375a4..d3dbfaac5 100644 --- a/avalanche/training/templates/observation_type/online_observation.py +++ b/avalanche/training/templates/observation_type/online_observation.py @@ -7,25 +7,6 @@ class OnlineObservation: - def _train_exp( - self, experience: OnlineCLExperience, eval_streams=None, **kwargs - ): - """Training loop over a single Experience object. - - :param experience: Online CL experience information. - :param eval_streams: list of streams for evaluation. - If None: use the training experience for evaluation. - Use [] if you do not want to evaluate during training. - :param kwargs: custom arguments. - """ - if eval_streams is None: - eval_streams = [experience] - for i, exp in enumerate(eval_streams): - if not isinstance(exp, Iterable): - eval_streams[i] = [exp] - - self.training_epoch(**kwargs) - def make_optimizer(self): """Optimizer initialization. @@ -71,7 +52,7 @@ def model_adaptation(self, model=None): return model.to(self.device) - def maybe_adapt_model_and_make_optimizer(self): + def check_model_and_optimizer(self): # If strategy has access to the task boundaries, and the current # sub-experience is the first sub-experience in the online (sub-)stream, # then adapt the model with the full origin experience: diff --git a/examples/online_replay.py b/examples/online_replay.py index b01144471..d998d72a8 100644 --- a/examples/online_replay.py +++ b/examples/online_replay.py @@ -127,7 +127,7 @@ def main(args): experiences=exp, experience_size=1) # Train on the online train stream of the scenario - cl_strategy.train(ocl_benchmark.online_train_stream) + cl_strategy.train(ocl_benchmark.train_stream) results.append(cl_strategy.eval(scenario.test_stream)) From 678a23433558eca8bf22fcadbb2559e3e385edd5 Mon Sep 17 00:00:00 2001 From: hamedhemati Date: Mon, 3 Oct 2022 16:27:33 +0200 Subject: [PATCH 10/10] Update `observation_type` and `base_sgd` --- examples/online_naive.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/online_naive.py b/examples/online_naive.py index 04c87919e..d4b7b581f 100644 --- a/examples/online_naive.py +++ b/examples/online_naive.py @@ -120,8 +120,10 @@ def main(args): experiences=exp, experience_size=1, access_task_boundaries=True) + # Train on the online train stream of the scenario cl_strategy.train(ocl_benchmark.train_stream) + results.append(cl_strategy.eval(scenario.original_test_stream))