Construct tensors directly on GPU (#1218)

* Replace .to(device) when possible * fix numpy dep * black * Add warning for device != cpu and copy=False * Update changelog * Remove warning * Update buffers.py
DLR-RM · Dec 19, 2022 · 68a40e0 · 68a40e0
1 parent 0c1bc0b
commit 68a40e0
Show file tree

Hide file tree

Showing 6 changed files with 10 additions and 9 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -49,6 +49,7 @@ Others:
 - Fixed ``stable_baselines3/common/atari_wrappers.py`` type hints
 - Exposed modules in ``__init__.py`` with the ``__all__`` attribute (@ZikangXiong)
 - Upgraded GitHub CI/setup-python to v4 and checkout to v3
+- Set tensors construction directly on the device
 
 Documentation:
 ^^^^^^^^^^^^^^

diff --git a/setup.py b/setup.py
@@ -77,7 +77,7 @@
     package_data={"stable_baselines3": ["py.typed", "version.txt"]},
     install_requires=[
         "gym==0.21",  # Fixed version due to breaking changes in 0.22
-        "numpy",
+        "numpy<1.24",  # Required for gym==0.21
         "torch>=1.11",
         'typing_extensions>=4.0,<5; python_version < "3.8.0"',
         # For saving models

diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py
@@ -127,13 +127,13 @@ def to_torch(self, array: np.ndarray, copy: bool = True) -> th.Tensor:
         Note: it copies the data by default
 
         :param array:
-        :param copy: Whether to copy or not the data
-            (may be useful to avoid changing things be reference)
+        :param copy: Whether to copy or not the data (may be useful to avoid changing things
+            by reference). This argument is inoperative if the device is not the CPU.
         :return:
         """
         if copy:
-            return th.tensor(array).to(self.device)
-        return th.as_tensor(array).to(self.device)
+            return th.tensor(array, device=self.device)
+        return th.as_tensor(array, device=self.device)
 
     @staticmethod
     def _normalize_obs(

diff --git a/stable_baselines3/common/policies.py b/stable_baselines3/common/policies.py
@@ -183,7 +183,7 @@ def load_from_vector(self, vector: np.ndarray) -> None:
 
         :param vector:
         """
-        th.nn.utils.vector_to_parameters(th.FloatTensor(vector).to(self.device), self.parameters())
+        th.nn.utils.vector_to_parameters(th.FloatTensor(vector, device=self.device), self.parameters())
 
     def parameters_to_vector(self) -> np.ndarray:
         """

diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py
@@ -461,9 +461,9 @@ def obs_as_tensor(
     :return: PyTorch tensor of the observation on a desired device.
     """
     if isinstance(obs, np.ndarray):
-        return th.as_tensor(obs).to(device)
+        return th.as_tensor(obs, device=device)
     elif isinstance(obs, dict):
-        return {key: th.as_tensor(_obs).to(device) for (key, _obs) in obs.items()}
+        return {key: th.as_tensor(_obs, device=device) for (key, _obs) in obs.items()}
     else:
         raise Exception(f"Unrecognized type of observation {type(obs)}")
 

diff --git a/stable_baselines3/sac/sac.py b/stable_baselines3/sac/sac.py
@@ -181,7 +181,7 @@ def _setup_model(self) -> None:
             # Force conversion to float
             # this will throw an error if a malformed string (different from 'auto')
             # is passed
-            self.ent_coef_tensor = th.tensor(float(self.ent_coef)).to(self.device)
+            self.ent_coef_tensor = th.tensor(float(self.ent_coef), device=self.device)
 
     def _create_aliases(self) -> None:
         self.actor = self.policy.actor