Lightning-AI · tchaton · Jun 3, 2021 · Jun 3, 2021 · SeanNaren · Jun 4, 2021
@@ -316,7 +316,7 @@ def model_sharded_context(self) -> Generator[None, None, None]:
         else:
             model_parallel_context = super().model_sharded_context()
 
-        with model_parallel_context:
+        with torch.cuda.amp.autocast(), model_parallel_context:
             yield
 
     def _set_deepspeed_activation_checkpointing(self):

@@ -24,10 +24,10 @@ class ModelParallelBoringModel(BoringModel):
 
     def __init__(self):
         super().__init__()
-        self.linear = None
+        self.layer = None
 
     def configure_sharded_model(self) -> None:
-        self.linear = torch.nn.Linear(32, 2)
+        self.layer = torch.nn.Linear(32, 2)
 
     def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
         self.configure_sharded_model()