Fix for deepspeed

Lightning-AI · Sep 6, 2021 · 1149a74 · 1149a74
1 parent b2d4c7e
commit 1149a74
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 7 deletions.
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
@@ -632,7 +632,7 @@ def training_step(self, *args, **kwargs) -> STEP_OUTPUT:
             - ``None`` - Training will skip to the next batch
 
         Note:
-            Returning ``None`` is currently not supported for multi-GPU or TPU.
+            Returning ``None`` is currently not supported for multi-GPU or TPU, or using `DeepSpeed`.
 
         In this step you'd normally do the forward pass and calculate the loss for a batch.
         You can also do fancier things like multiple forward passes or something model specific.

diff --git a/pytorch_lightning/plugins/precision/deepspeed_precision.py b/pytorch_lightning/plugins/precision/deepspeed_precision.py
@@ -20,6 +20,7 @@
 import pytorch_lightning as pl
 from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
 from pytorch_lightning.utilities import GradClipAlgorithmType
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.warnings import WarningCache
 
@@ -44,12 +45,14 @@ def pre_optimizer_step(
         """Hook to do something before each optimizer step."""
         result = lambda_closure()  # DeepSpeed does not support closures
         super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs)
-        skipped_backward = result is None
         # in manual optimization, the closure does not return a value
-        if not model.automatic_optimization or not skipped_backward:
-            # the following should be in a `optimizer_step` hook but we don't have one in the precision plugin.
-            deepspeed_engine = model.trainer.model
-            deepspeed_engine.step()
+        if model.automatic_optimization and result is None:
+            raise MisconfigurationException(
+                "Skipping backward by returning `None` from your `training_step` is not supported by `DeepSpeed`"
+            )
+        # the following should be in a `optimizer_step` hook but we don't have one in the precision plugin.
+        deepspeed_engine = model.trainer.model
+        deepspeed_engine.step()
         return False
 
     def backward(self, model: "pl.LightningModule", closure_loss: Tensor, *args: Any, **kwargs: Any) -> None:

diff --git a/pytorch_lightning/plugins/precision/native_amp.py b/pytorch_lightning/plugins/precision/native_amp.py
@@ -96,7 +96,7 @@ def pre_optimizer_step(
                 f"native PyTorch amp and lbfgs are not compatible (optimizer {optimizer_idx})."
                 " To request, please file a Github issue in PyTorch and tag @mcarilli"
             )
-        result = lambda_closure()
+        result = lambda_closure()  # native amp does not support closures
         self.scaler.unscale_(optimizer)
         super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs)
         skipped_backward = result is None