Add get_model method in Trainer (#39)

Chris-hughes10 · Oct 14, 2022 · e26e219 · e26e219
1 parent 34cd222
commit e26e219
Show file tree

Hide file tree

Showing 9 changed files with 17 additions and 9 deletions.
diff --git a/docs/trainer.rst b/docs/trainer.rst
@@ -124,6 +124,7 @@ Utility Methods
 .. automethod:: Trainer.load_checkpoint
 .. automethod:: Trainer.print
 .. automethod:: Trainer.gather
+.. automethod:: Trainer.get_model
 
 Customizing Trainer Behaviour
 ================================

diff --git a/pytorch_accelerated/schedulers/cosine_scheduler.py b/pytorch_accelerated/schedulers/cosine_scheduler.py
@@ -125,8 +125,8 @@ def get_updated_values(self, num_updates: int):
                     1
                     + math.cos(
                         math.pi
-                        * num_updates ** self.k_decay
-                        / total_cosine_iterations ** self.k_decay
+                        * num_updates**self.k_decay
+                        / total_cosine_iterations**self.k_decay
                     )
                 )
                 for lr_max in self.base_lr_values

diff --git a/pytorch_accelerated/trainer.py b/pytorch_accelerated/trainer.py
@@ -657,7 +657,7 @@ def _create_run_config(
             else False,
             "mixed_precision": self._accelerator.mixed_precision,
             "gradient_clip_value": gradient_clip_value,
-            "num_processes": self._accelerator.num_processes
+            "num_processes": self._accelerator.num_processes,
         }
 
         return TrainerRunConfig(**config)
@@ -884,7 +884,7 @@ def save_checkpoint(
         # TODO: add save method for run history?
 
         checkpoint = {
-            "model_state_dict": self._accelerator.unwrap_model(self.model).state_dict(),
+            "model_state_dict": self.get_model().state_dict(),
         }
 
         if save_optimizer:
@@ -918,9 +918,7 @@ def load_checkpoint(self, checkpoint_path, load_optimizer=True):
         """
         self._accelerator.wait_for_everyone()
         checkpoint = torch.load(checkpoint_path, map_location="cpu")
-        self._accelerator.unwrap_model(self.model).load_state_dict(
-            checkpoint["model_state_dict"]
-        )
+        self.get_model().load_state_dict(checkpoint["model_state_dict"])
         if load_optimizer and "optimizer_state_dict" in checkpoint:
             if self.optimizer is None:
                 raise ValueError(
@@ -930,6 +928,15 @@ def load_checkpoint(self, checkpoint_path, load_optimizer=True):
                 )
             self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
 
+    def get_model(self):
+        """
+        Extract the model in :class:`Trainer` from its distributed containers.
+        Useful before saving a model.
+
+        :return: the model in :class:`Trainer`, subclassed from :class:`~torch.nn.Module`
+        """
+        return self._accelerator.unwrap_model(self.model)
+
 
 class TrainerWithTimmScheduler(Trainer):
     """Subclass of the :class:`Trainer` that works with `timm schedulers <https://fastai.github.io/timmdocs/schedulers>`_ instead

diff --git a/requirements.dev.txt b/requirements.dev.txt
@@ -1,4 +1,4 @@
 black
 versioneer==0.21
-pytest==1.11.0
+pytest==7.1.3
 pytest-mock==3.6.1
diff --git a/test/finetuning.py → test/test_finetuning.py b/test/finetuning.py → test/test_finetuning.py
diff --git a/test/placeholders.py → test/test_placeholders.py b/test/placeholders.py → test/test_placeholders.py
@@ -38,7 +38,7 @@ def create_run_config(
         is_local_process_zero=True,
         is_world_process_zero=True,
         is_distributed=True,
-        mixed_precision='fp16',
+        mixed_precision="fp16",
         num_processes=1,
         num_update_steps_per_epoch=num_update_steps_per_epoch,
     )

diff --git a/test/run_history.py → test/test_run_history.py b/test/run_history.py → test/test_run_history.py
diff --git a/test/trainer.py → test/test_trainer.py b/test/trainer.py → test/test_trainer.py
diff --git a/test/utils.py → test/test_utils.py b/test/utils.py → test/test_utils.py