From e439d8f0bbdba66e017fdb6638622d89db05ccb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Tue, 6 Jul 2021 19:33:24 +0200
Subject: [PATCH 1/7] move device

---
 pytorch_lightning/accelerators/gpu.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py
index 3348727a36e61..82cb9d8cb2298 100644
--- a/pytorch_lightning/accelerators/gpu.py
+++ b/pytorch_lightning/accelerators/gpu.py
@@ -26,16 +26,18 @@
 class GPUAccelerator(Accelerator):
     """ Accelerator for GPU devices. """
 
+    def setup_environment(self) -> None:
+        if "cuda" not in str(self.root_device):
+            raise MisconfigurationException(f"Device should be GPU, got {self.root_device} instead")
+        torch.cuda.set_device(self.root_device)
+
     def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
         """
         Raises:
             MisconfigurationException:
                 If the selected device is not GPU.
         """
-        if "cuda" not in str(self.root_device):
-            raise MisconfigurationException(f"Device should be GPU, got {self.root_device} instead")
         self.set_nvidia_flags(trainer.local_rank)
-        torch.cuda.set_device(self.root_device)
         return super().setup(trainer, model)
 
     def on_train_start(self) -> None:

From c39dd574ed06a6320fc162c5fc67c657adb34667 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Tue, 6 Jul 2021 19:34:37 +0200
Subject: [PATCH 2/7] debug

---
 pytorch_lightning/trainer/trainer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b984608c87d6d..8c698eb30029d 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -815,6 +815,8 @@ def _run(self, model: 'pl.LightningModule') -> Optional[Union[_EVALUATE_OUTPUT,
         self.call_hook("on_before_accelerator_backend_setup", model)
         self.accelerator.connect(model)
         self.accelerator.setup_environment()
+        # check if bug is fixed
+        _ = self.log_dir
         self._call_setup_hook(model)  # allow user to setup lightning_module in accelerator environment
 
         # restore modules after setup

From 64264df3a2a2a0c7dd3556323bb7d2b123dbf711 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Tue, 6 Jul 2021 19:44:25 +0200
Subject: [PATCH 3/7] missing super call

---
 pytorch_lightning/accelerators/gpu.py | 1 +
 pytorch_lightning/trainer/trainer.py  | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py
index 82cb9d8cb2298..0592cffa1a4bc 100644
--- a/pytorch_lightning/accelerators/gpu.py
+++ b/pytorch_lightning/accelerators/gpu.py
@@ -27,6 +27,7 @@ class GPUAccelerator(Accelerator):
     """ Accelerator for GPU devices. """
 
     def setup_environment(self) -> None:
+        super().setup_environment()
         if "cuda" not in str(self.root_device):
             raise MisconfigurationException(f"Device should be GPU, got {self.root_device} instead")
         torch.cuda.set_device(self.root_device)
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 8c698eb30029d..41ffb4ba1da9e 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -816,7 +816,8 @@ def _run(self, model: 'pl.LightningModule') -> Optional[Union[_EVALUATE_OUTPUT,
         self.accelerator.connect(model)
         self.accelerator.setup_environment()
         # check if bug is fixed
-        _ = self.log_dir
+        if torch.distributed.is_available() and torch.distributed.is_initialized():
+            _ = self.log_dir
         self._call_setup_hook(model)  # allow user to setup lightning_module in accelerator environment
 
         # restore modules after setup

From 8561edb3b95bdea1b37450e8b073ad80d6616b51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Tue, 6 Jul 2021 20:50:13 +0200
Subject: [PATCH 4/7] set_device in ddp plugin

---
 pytorch_lightning/plugins/training_type/ddp.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
index a882390b78b0d..8e4f4c0694e67 100644
--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@@ -367,8 +367,6 @@ def pre_backward(self, closure_loss: torch.Tensor, should_accumulate: bool, opti
             prepare_for_backward(self.model, closure_loss)
 
     def model_to_device(self):
-        if self.root_device.type == "cuda":
-            torch.cuda.set_device(self.root_device)
         self.model.to(self.root_device)
 
     def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Union[ReduceOp, str] = "mean") -> torch.Tensor:

From 519d01cee558886bd163cfff4b8012b08a544372 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Wed, 7 Jul 2021 01:02:20 +0200
Subject: [PATCH 5/7] redundant set device in single device plugin

---
 pytorch_lightning/plugins/training_type/single_device.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/single_device.py b/pytorch_lightning/plugins/training_type/single_device.py
index d4a328902eba0..c1ef9028ceb7f 100644
--- a/pytorch_lightning/plugins/training_type/single_device.py
+++ b/pytorch_lightning/plugins/training_type/single_device.py
@@ -61,9 +61,6 @@ def root_device(self) -> torch.device:
         return self.device
 
     def model_to_device(self) -> None:
-        if self.on_gpu:
-            torch.cuda.set_device(self.root_device)
-
         self._model.to(self.root_device)
 
     def setup(self, model: torch.nn.Module) -> torch.nn.Module:

From 83d6bfaf469776cd5d6d18132ee96e32d7280394 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Wed, 7 Jul 2021 01:23:23 +0200
Subject: [PATCH 6/7] remove redundant set_device in ddp subclasses

---
 pytorch_lightning/plugins/training_type/deepspeed.py     | 2 --
 pytorch_lightning/plugins/training_type/fully_sharded.py | 1 -
 2 files changed, 3 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 4d229e4bff43a..e704b662fd6ca 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -339,8 +339,6 @@ def setup_distributed(self):
         if not self._config_initialized:
             self._format_config()
             self._config_initialized = True
-        if self.on_gpu:
-            torch.cuda.set_device(self.root_device)
 
     def pre_dispatch(self):
         self.init_deepspeed()
diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py
index 476df9be13cfe..a02be35409dc5 100644
--- a/pytorch_lightning/plugins/training_type/fully_sharded.py
+++ b/pytorch_lightning/plugins/training_type/fully_sharded.py
@@ -118,7 +118,6 @@ def setup_distributed(self) -> None:
                 "You selected accelerator to be `ddp_fully_sharded`, but GPU is not available."
             )
         super().setup_distributed()
-        torch.cuda.set_device(self.root_device)
 
     @contextlib.contextmanager
     def model_sharded_context(self) -> Generator:

From 3f9a9e2b47558a57eb92d53e9d002d1288dfbca8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Wed, 7 Jul 2021 02:56:30 +0200
Subject: [PATCH 7/7] reset debug changes

---
 pytorch_lightning/trainer/trainer.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 41ffb4ba1da9e..b984608c87d6d 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -815,9 +815,6 @@ def _run(self, model: 'pl.LightningModule') -> Optional[Union[_EVALUATE_OUTPUT,
         self.call_hook("on_before_accelerator_backend_setup", model)
         self.accelerator.connect(model)
         self.accelerator.setup_environment()
-        # check if bug is fixed
-        if torch.distributed.is_available() and torch.distributed.is_initialized():
-            _ = self.log_dir
         self._call_setup_hook(model)  # allow user to setup lightning_module in accelerator environment
 
         # restore modules after setup