cuda gpu devices clarification

From the context, it should hopefully be clear that we are talking about nvidia cuda gpus update error message update
Lightning-AI · Dec 22, 2022 · 6500156 · 6500156
1 parent 38e8cb8
commit 6500156
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 13 deletions.
diff --git a/src/lightning_lite/accelerators/cuda.py b/src/lightning_lite/accelerators/cuda.py
@@ -79,17 +79,17 @@ def register_accelerators(cls, accelerator_registry: Dict) -> None:
 
 
 def find_usable_cuda_devices(num_devices: int = -1) -> List[int]:
-    """Returns a list of all available and usable CUDA GPUs.
+    """Returns a list of all available and usable CUDA GPU devices.
 
     A GPU is considered usable if we can successfully move a tensor to the device, and this is what this function
-    tests for each GPU on the system until the target number of usable GPUs is found.
+    tests for each GPU on the system until the target number of usable devices is found.
 
     A subset of GPUs on the system might be used by other processes, and if the GPU is configured to operate in
     'exclusive' mode (configurable by the admin), then only one process is allowed to occupy it.
 
     Args:
-        num_devices: The number of GPUs you want to request. By default, this function will return as many as there are
-            usable GPUs available.
+        num_devices: The number of devices you want to request. By default, this function will return as many as there
+            are usable CUDA GPU devices available.
 
     Warning:
         If multiple processes call this function at the same time, there can be race conditions in the case where
@@ -98,11 +98,11 @@ def find_usable_cuda_devices(num_devices: int = -1) -> List[int]:
     visible_devices = _get_all_visible_cuda_devices()
     if not visible_devices:
         raise ValueError(
-            f"You requested to find {num_devices} GPUs but there are no visible CUDA devices on this machine."
+            f"You requested to find {num_devices} devices but there are no visible CUDA devices on this machine."
         )
     if num_devices > len(visible_devices):
         raise ValueError(
-            f"You requested to find {num_devices} GPUs but this machine only has {len(visible_devices)} GPUs."
+            f"You requested to find {num_devices} devices but this machine only has {len(visible_devices)} GPUs."
         )
 
     available_devices = []
@@ -122,17 +122,16 @@ def find_usable_cuda_devices(num_devices: int = -1) -> List[int]:
 
     if len(available_devices) != num_devices:
         raise RuntimeError(
-            f"You requested to find {num_devices} GPUs but only {len(available_devices)} are currently available."
-            f" GPUs {unavailable_devices} are occupied by other processes and can't be"
-            " used at the moment."
+            f"You requested to find {num_devices} devices but only {len(available_devices)} are currently available."
+            f" The devices {unavailable_devices} are occupied by other processes and can't be used at the moment."
         )
     return available_devices
 
 
 def _get_all_visible_cuda_devices() -> List[int]:
-    """Returns a list of all visible CUDA GPUs.
+    """Returns a list of all visible CUDA GPU devices.
 
-    GPUs masked by the environment variabale ``CUDA_VISIBLE_DEVICES`` won't be returned here. For example, assume you
+    Devices masked by the environment variabale ``CUDA_VISIBLE_DEVICES`` won't be returned here. For example, assume you
     have 8 physical GPUs. If ``CUDA_VISIBLE_DEVICES="1,3,6"``, then this function will return the list ``[0, 1, 2]``
     because these are the three visible GPUs after applying the mask ``CUDA_VISIBLE_DEVICES``.
     """

diff --git a/tests/tests_lite/accelerators/test_cuda.py b/tests/tests_lite/accelerators/test_cuda.py
@@ -123,7 +123,7 @@ def test_find_usable_cuda_devices_error_handling():
 
     # Asking for GPUs if no GPUs visible
     with mock.patch("lightning_lite.accelerators.cuda.num_cuda_devices", return_value=0), pytest.raises(
-        ValueError, match="You requested to find 2 GPUs but there are no visible CUDA"
+        ValueError, match="You requested to find 2 devices but there are no visible CUDA"
     ):
         find_usable_cuda_devices(2)
 
@@ -137,5 +137,5 @@ def test_find_usable_cuda_devices_error_handling():
     tensor_mock = Mock(side_effect=RuntimeError)  # simulate device placement fails
     with mock.patch("lightning_lite.accelerators.cuda.num_cuda_devices", return_value=2), mock.patch(
         "lightning_lite.accelerators.cuda.torch.tensor", tensor_mock
-    ), pytest.raises(RuntimeError, match=escape("GPUs [0, 1] are occupied by other processes")):
+    ), pytest.raises(RuntimeError, match=escape("The devices [0, 1] are occupied by other processes")):
         find_usable_cuda_devices(2)