Lightning-AI
diff --git a/‎CHANGELOG.md‎
Lines changed: 7 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎pytorch_lightning/core/lightning.py‎
Lines changed: 3 additions & 4 deletions b/‎pytorch_lightning/core/lightning.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎pytorch_lightning/loops/epoch/training_epoch_loop.py‎
Lines changed: 47 additions & 27 deletions b/‎pytorch_lightning/loops/epoch/training_epoch_loop.py‎
Lines changed: 47 additions & 27 deletions
diff --git a/‎pytorch_lightning/loops/fit_loop.py‎
Lines changed: 1 addition & 1 deletion b/‎pytorch_lightning/loops/fit_loop.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_lightning/loops/utilities.py‎
Lines changed: 8 additions & 1 deletion b/‎pytorch_lightning/loops/utilities.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎tests/deprecated_api/test_remove_1-8.py‎
Lines changed: 50 additions & 0 deletions b/‎tests/deprecated_api/test_remove_1-8.py‎
Lines changed: 50 additions & 0 deletions
@@ -30,7 +30,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
     * Broadcast the `_terminate_gracefully` to all processes and add support for DDP ([#10638](https://github.com/PyTorchLightning/pytorch-lightning/pull/10638))
 
 
-- Added support for re-instantiation of custom (subclasses of) `DataLoaders` returned in the `*_dataloader()` methods, i.e., automatic replacement of samplers now works with custom types of `DataLoader` ([#10680](https://github.com/PyTorchLightning/pytorch-lightning/pull/10639))
+- Added support for re-instantiation of custom (subclasses of) `DataLoaders` returned in the `*_dataloader()` methods, i.e., automatic replacement of samplers now works with custom types of `DataLoader` ([#10680](https://github.com/PyTorchLightning/pytorch-lightning/pull/10680))
 
 
 - Added a function to validate if fault tolerant training is supported. ([#10465](https://github.com/PyTorchLightning/pytorch-lightning/pull/10465))
@@ -410,6 +410,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `TrainerOptimizersMixin` and moved functionality to `core/optimizer.py`([#11155](https://github.com/PyTorchLightning/pytorch-lightning/pull/11155))
 
 
+- Deprecated the `on_train_batch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#12182](https://github.com/PyTorchLightning/pytorch-lightning/pull/12182))
+
+
+- Deprecated the `training_epoch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#12182](https://github.com/PyTorchLightning/pytorch-lightning/pull/12182))
+
+
 - Deprecated `TrainerCallbackHookMixin` ([#11148](https://github.com/PyTorchLightning/pytorch-lightning/pull/11148))
 
 
 
@@ -705,10 +705,9 @@ def training_epoch_end(self, outputs: EPOCH_OUTPUT) -> None:
             training_epoch_end(train_outs)
 
         Args:
-            outputs: List of outputs you defined in :meth:`training_step`.
-                If there are multiple optimizers, it is a list containing a list of outputs for each optimizer.
-                If using ``truncated_bptt_steps > 1``, each element is a list of outputs corresponding to the outputs
-                of each processed split batch.
+            outputs: List of outputs you defined in :meth:`training_step`. If there are multiple optimizers or when
+                using ``truncated_bptt_steps > 0``, the lists have the dimensions
+                (n_batches, tbptt_steps, n_optimizers). Dimensions of length 1 are squeezed.
 
         Return:
             None
 
@@ -18,10 +18,11 @@
 import numpy as np
 import torch
 
+import pytorch_lightning as pl
 from pytorch_lightning import loops  # import as loops to avoid circular imports
 from pytorch_lightning.loops.batch import TrainingBatchLoop
 from pytorch_lightning.loops.batch.training_batch_loop import _OUTPUTS_TYPE as _BATCH_OUTPUTS_TYPE
-from pytorch_lightning.loops.utilities import _get_active_optimizers, _is_max_limit_reached
+from pytorch_lightning.loops.utilities import _get_active_optimizers, _is_max_limit_reached, _v1_8_output_format
 from pytorch_lightning.trainer.connectors.logger_connector.result import _ResultCollection
 from pytorch_lightning.trainer.progress import BatchProgress, SchedulerProgress
 from pytorch_lightning.trainer.supporters import CombinedLoader
@@ -216,7 +217,7 @@ def advance(self, data_fetcher: AbstractDataFetcher) -> None:  # type: ignore[ov
 
         batch_end_outputs = self._prepare_outputs_training_batch_end(
             batch_output,
-            automatic=self.trainer.lightning_module.trainer.lightning_module.automatic_optimization,
+            lightning_module=self.trainer.lightning_module,
             num_optimizers=len(self.trainer.optimizers),
         )
 
@@ -337,26 +338,38 @@ def _should_accumulate(self) -> bool:
     @staticmethod
     def _prepare_outputs_training_batch_end(
         batch_output: _BATCH_OUTPUTS_TYPE,
-        automatic: bool,
+        lightning_module: "pl.LightningModule",
         num_optimizers: int,
     ) -> Union[List[List[Dict[str, Any]]], List[Dict[str, Any]]]:
-        """Processes the outputs from the batch loop into the format passed to the ``training_batch_end`` hook.
-
-        ``(tbptt_steps, n_opt) -> (n_opt, tbptt_steps)``. The optimizer dimension might have been squeezed.
-        """
+        """Processes the outputs from the batch loop into the format passed to the ``on_train_batch_end`` hook."""
         if not batch_output:
             return []
 
         # convert optimizer dicts to list
-        if automatic:
+        if lightning_module.automatic_optimization:
             batch_output = apply_to_collection(
                 batch_output, dtype=dict, function=_convert_optim_dict, num_optimizers=num_optimizers
             )
-        array = np.array(batch_output, dtype=object)
-        if array.ndim == 1:
-            array = np.expand_dims(array, 1)
 
-        array = array.transpose((1, 0))
+        array = np.array(batch_output, dtype=object)
+        # TODO: remove in v1.8
+        if (
+            num_optimizers > 1
+            and lightning_module.truncated_bptt_steps > 0
+            and not _v1_8_output_format(lightning_module.on_train_batch_end)
+        ):
+            rank_zero_deprecation(
+                "You are training with multiple optimizers AND truncated backpropagation through time enabled."
+                " The current format of the `on_train_batch_end(outputs, ...)` is a 2d list with sizes"
+                " (n_optimizers, tbptt_steps), however, this has been deprecated and will change in version v1.8 to"
+                " (tbptt_steps, n_optimizers). You can update your code by adding the following parameter to your"
+                " hook signature: `on_train_batch_end(outputs, ..., new_format=True)`."
+            )
+            # (tbptt_steps, n_opt) -> (n_opt, tbptt_steps)
+            if array.ndim == 1:
+                array = np.expand_dims(array, 1)
+            array = array.transpose((1, 0))
+        # squeeze all single-element dimensions
         array = array.squeeze()
         array = array.tolist()
         array = _recursive_unpad(array)
@@ -365,35 +378,42 @@ def _prepare_outputs_training_batch_end(
     @staticmethod
     def _prepare_outputs_training_epoch_end(
         batch_outputs: _OUTPUTS_TYPE,
-        automatic: bool,
+        lightning_module: "pl.LightningModule",
         num_optimizers: int,
     ) -> Union[List[List[List[Dict[str, Any]]]], List[List[Dict[str, Any]]], List[Dict[str, Any]]]:
-        """Processes the outputs from the batch loop into the format passed to the ``training_epoch_end`` hook.
-
-        ``(n_batches, tbptt_steps, n_opt) -> (n_opt, n_batches, tbptt_steps)``.
-        All single-element dimensions might have been squeezed.
-
-        This processing is necessary because the format of the inputs to the ``training_epoch_end`` hook does not
-        match the loop structure and because empty dimensions are squeezed. This could break with loop customization.
-        """
+        """Processes the outputs from the batch loop into the format passed to the ``training_epoch_end`` hook."""
         # `batch_outputs` (plural) is the same as `epoch_end_output` (singular)
         if not batch_outputs:
             return []
 
         # convert optimizer dicts to list
-        if automatic:
+        if lightning_module.automatic_optimization:
             batch_outputs = apply_to_collection(
                 batch_outputs, dtype=dict, function=_convert_optim_dict, num_optimizers=num_optimizers
             )
 
         array = _recursive_pad(batch_outputs)
-        if array.ndim == 2:
-            array = np.expand_dims(array, 2)
-        array = array.transpose((2, 0, 1))
+        # TODO: remove in v1.8
+        if (
+            num_optimizers > 1
+            and lightning_module.truncated_bptt_steps > 0
+            and not _v1_8_output_format(lightning_module.on_train_epoch_end)
+        ):
+            rank_zero_deprecation(
+                "You are training with multiple optimizers AND truncated backpropagation through time enabled."
+                " The current format of the `training_epoch_end(outputs)` is a 3d list with sizes"
+                " (n_optimizers, n_batches, tbptt_steps), however, this has been deprecated and will change in version"
+                " v1.8 to (n_batches, tbptt_steps, n_optimizers). You can update your code by adding the following"
+                " parameter to your hook signature: `training_epoch_end(outputs, new_format=True)`."
+            )
+            # (n_batches, tbptt_steps, n_opt) -> (n_opt, n_batches, tbptt_steps)
+            if array.ndim == 2:
+                array = np.expand_dims(array, 2)
+            array = array.transpose((2, 0, 1))
+        # squeeze all single-element dimensions
         array = array.squeeze()
         array = array.tolist()
         array = _recursive_unpad(array)
-
         # in case we squeezed from 1-element array to a 0-dim array
         array = array if isinstance(array, list) else [array]
         # remove residual empty lists
@@ -519,7 +539,7 @@ def _reload_dataloader_state_dict(self, data_fetcher: AbstractDataFetcher) -> No
             self._dataloader_state_dict = None
 
 
-def _convert_optim_dict(outs: Dict[int, Dict[str, Any]], num_optimizers: int) -> List[Dict[str, Any]]:
+def _convert_optim_dict(outs: Dict[int, Dict[str, Any]], num_optimizers: int) -> List[Optional[Dict[str, Any]]]:
     """Converts an optimizer dict to a list in which the key of the dict determines the position of the element.
 
     Example::
 
@@ -276,7 +276,7 @@ def on_advance_end(self) -> None:
         if is_overridden("training_epoch_end", model) and self._outputs:
             epoch_end_outputs = self.epoch_loop._prepare_outputs_training_epoch_end(
                 self._outputs,
-                automatic=model.automatic_optimization,
+                lightning_module=model,
                 num_optimizers=len(self.trainer.optimizers),
             )
             # run lightning module hook training_epoch_end
 
@@ -11,11 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import inspect
 from collections import OrderedDict
 from contextlib import contextmanager
 from datetime import timedelta
 from functools import lru_cache
-from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, Generator, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
@@ -221,3 +222,9 @@ def _reset_progress(loop: Loop) -> None:
             v.reset()
         elif isinstance(v, Loop):
             _reset_progress(v)
+
+
+# TODO: remove in v1.8
+def _v1_8_output_format(fx: Callable) -> bool:
+    parameters = inspect.signature(fx).parameters
+    return "new_format" in parameters and parameters["new_format"].default is True
@@ -42,6 +42,7 @@
 from pytorch_lightning.utilities.enums import DeviceType, DistributedType
 from pytorch_lightning.utilities.imports import _TORCHTEXT_LEGACY
 from pytorch_lightning.utilities.rank_zero import rank_zero_only, rank_zero_warn
+from tests.deprecated_api import no_deprecated_call
 from tests.helpers.boring_model import BoringDataModule, BoringModel
 from tests.helpers.runif import RunIf
 from tests.helpers.torchtext_utils import get_dummy_torchtext_data_iterator
@@ -652,6 +653,55 @@ def test_v1_8_0_weights_save_path(tmpdir):
         _ = trainer.weights_save_path
 
 
+def test_deprecated_epoch_outputs_format(tmpdir):
+    class DeprecationModel(BoringModel):
+        def __init__(self):
+            super().__init__()
+            self.truncated_bptt_steps = 1
+
+        def training_step(self, batch, batch_idx, optimizer_idx, hiddens):
+            output = super().training_step(batch, batch_idx)
+            output["hiddens"] = hiddens
+            return output
+
+        def tbptt_split_batch(self, batch, split_size):
+            return [batch, batch]
+
+        def training_epoch_end(self, outputs):
+            ...
+
+        def on_train_batch_end(self, outputs, batch, batch_idx) -> None:
+            ...
+
+        def configure_optimizers(self):
+            return [torch.optim.Adam(self.parameters()), torch.optim.Adam(self.parameters())]
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
+    model = DeprecationModel()
+    batch_match = r"on_train_batch_end.*will change in version v1.8 to \(tbptt_steps, n_optimizers\)"
+    with pytest.deprecated_call(match=batch_match):
+        trainer.fit(model)
+
+    class DeprecationModel2(DeprecationModel):
+        def on_train_batch_end(self, *args, new_format=True):
+            ...
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
+    model = DeprecationModel()
+    epoch_match = r"training_epoch_end.*will change in version v1.8 to \(n_batches, tbptt_steps, n_optimizers\)"
+    with pytest.deprecated_call(match=epoch_match):
+        trainer.fit(model)
+
+    class NoDeprecationModel(DeprecationModel2):
+        def training_epoch_end(self, outputs, new_format=True):
+            ...
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
+    model = NoDeprecationModel()
+    with no_deprecated_call(match="will change in version v1.8.*new_format=True"):
+        trainer.fit(model)
+
+
 @pytest.mark.flaky(reruns=3)
 @pytest.mark.parametrize(["action", "expected"], [("a", [3, 1]), ("b", [2]), ("c", [1])])
 def test_simple_profiler_iterable_durations(tmpdir, action: str, expected: list):
Original file line number	Diff line number	Diff line change
`@@ -276,7 +276,7 @@ def on_advance_end(self) -> None:`
`276`	`276`	`if is_overridden("training_epoch_end", model) and self._outputs:`
`277`	`277`	`epoch_end_outputs = self.epoch_loop._prepare_outputs_training_epoch_end(`
`278`	`278`	`self._outputs,`
`279`		`- automatic=model.automatic_optimization,`
	`279`	`+ lightning_module=model,`
`280`	`280`	`num_optimizers=len(self.trainer.optimizers),`
`281`	`281`	`)`
`282`	`282`	`# run lightning module hook training_epoch_end`