NVIDIA · mdboom · Nov 24, 2025 · Nov 24, 2025 · Nov 25, 2025 · Dec 1, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -45,6 +45,7 @@ repos:
     rev: "3e8a8703264a2f4a69428a0aa4dcb512790b2c8c"  # frozen: v6.0.0
     hooks:
     - id: check-added-large-files
+      exclude: cuda_bindings/cuda/bindings/nvml.pyx
     - id: check-case-conflict
     - id: check-docstring-first
     - id: check-merge-conflict

diff --git a/cuda_bindings/cuda/bindings/_internal/_nvml.pxd b/cuda_bindings/cuda/bindings/_internal/_nvml.pxd
diff --git a/cuda_bindings/cuda/bindings/_internal/_nvml_linux.pyx b/cuda_bindings/cuda/bindings/_internal/_nvml_linux.pyx
diff --git a/cuda_bindings/cuda/bindings/_internal/_nvml_windows.pyx b/cuda_bindings/cuda/bindings/_internal/_nvml_windows.pyx
diff --git a/cuda_bindings/cuda/bindings/_nvml.pxd b/cuda_bindings/cuda/bindings/_nvml.pxd
diff --git a/cuda_bindings/cuda/bindings/_nvml.pyx b/cuda_bindings/cuda/bindings/_nvml.pyx
diff --git a/cuda_bindings/cuda/bindings/cy_nvml.pxd b/cuda_bindings/cuda/bindings/cy_nvml.pxd
diff --git a/cuda_bindings/cuda/bindings/cy_nvml.pyx b/cuda_bindings/cuda/bindings/cy_nvml.pyx
diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml
@@ -41,6 +41,7 @@ test = [
     "pytest>=6.2.4",
     "pytest-benchmark>=3.4.1",
     "pyglet>=2.1.9",
+    "looseversion>=1.3.0",
 ]
 
 [project.urls]

diff --git a/cuda_bindings/tests/nvml/README.md b/cuda_bindings/tests/nvml/README.md
diff --git a/cuda_bindings/tests/nvml/__init__.py b/cuda_bindings/tests/nvml/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
diff --git a/cuda_bindings/tests/nvml/conftest.py b/cuda_bindings/tests/nvml/conftest.py
@@ -0,0 +1,139 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+from collections import namedtuple
+
+import pytest
+from cuda.bindings import _nvml as nvml
+
+
+class NVMLInitializer:
+    def __init__(self):
+        pass
+
+    def __enter__(self):
+        nvml.init_v2()
+
+    def __exit__(self, exception_type, exception, trace):
+        nvml.shutdown()
+
+
+@pytest.fixture
+def nvml_init():
+    with NVMLInitializer():
+        yield
+
+
+@pytest.fixture(scope="session", autouse=True)
+def device_info():
+    dev_count = None
+    bus_id_to_board_details = {}
+
+    with NVMLInitializer():
+        dev_count = nvml.device_get_count_v2()
+
+        # Store some details for each device now when we know NVML is in known state
+        for i in range(dev_count):
+            try:
+                dev = nvml.device_get_handle_by_index_v2(i)
+            except nvml.NoPermissionError:
+                continue
+            pci_info = nvml.device_get_pci_info_v3(dev)
+
+            name = nvml.device_get_name(dev)
+            # Get architecture name ex: Ampere, Kepler
+            arch_id = nvml.device_get_architecture(dev)
+            # 1 = NVML_DEVICE_ARCH_KEPLER and 12 = NVML_DEVICE_ARCH_COUNT
+            assert 1 <= arch_id <= 12, "Architecture not found, presumably something newer"
+            # arch_name = (utils.nvml_architecture_name.get(archID)).split("_")[-1]
+            # archName = archName[0] + archName[1:].lower()
+
+            BoardCfg = namedtuple("BoardCfg", "name, ids_arr")
+            board = BoardCfg(name, ids_arr=[(pci_info.pci_device_id, pci_info.pci_sub_system_id)])
+
+            try:
+                serial = nvml.device_get_serial(dev)
+            except:
+                serial = None
+
+            bus_id = pci_info.bus_id
+            device_id = pci_info.device_
+            uuid = nvml.device_get_uuid(dev)
+
+            BoardDetails = namedtuple("BoardDetails", "name, board, arch_id, bus_id, device_id, serial")
+            bus_id_to_board_details[uuid] = BoardDetails(name, board, arch_id, bus_id, device_id, serial)
+
+    return bus_id_to_board_details
+
+
+def get_devices(device_info):
+    for uuid in list(device_info.keys()):
+        try:
+            yield nvml.device_get_handle_by_uuid(uuid)
+        except nvml.NoPermissionError:
+            continue  # ignore devices that can't be accessed
+
+
+@pytest.fixture
+def for_all_devices(device_info):
+    with NVMLInitializer():
+        unique_devices = set()
+        for device_id in get_devices(device_info):
+            if device_id not in unique_devices:
+                unique_devices.add(device_id)
+                yield device_id
+                # RestoreDefaultEnvironment.restore()
+
+
+@pytest.fixture
+def driver(nvml_init, request):
+    driver_vsn = nvml.system_get_driver_version()
+    # Return "major" version only
+    return int(driver_vsn.split(".")[0])
+
+
+@pytest.fixture
+def ngpus(nvml_init):
+    result = nvml.device_get_count_v2()
+    assert result > 0
+    return result
+
+
+@pytest.fixture
+def handles(ngpus):
+    handles = [nvml.device_get_handle_by_index_v2(i) for i in range(ngpus)]
+    assert len(handles) == ngpus
+    return handles
+
+
+@pytest.fixture
+def nmigs(handles):
+    return nvml.device_get_max_mig_device_count(handles[0])
+
+
+@pytest.fixture
+def mig_handles(nmigs):
+    handles = [nvml.device_get_mig_device_handle_by_index(i) for i in range(nmigs)]
+    assert len(handles) == nmigs
+    return handles
+
+
+@pytest.fixture
+def serials(ngpus, handles):
+    serials = [nvml.device_get_serial(handles[i]) for i in range(ngpus)]
+    assert len(serials) == ngpus
+    return serials
+
+
+@pytest.fixture
+def uuids(ngpus, handles):
+    uuids = [nvml.device_get_uuid(handles[i]) for i in range(ngpus)]
+    assert len(uuids) == ngpus
+    return uuids
+
+
+@pytest.fixture
+def pci_info(ngpus, handles):
+    pci_info = [nvml.device_get_pci_info_v3(handles[i]) for i in range(ngpus)]
+    assert len(pci_info) == ngpus
+    return pci_info
diff --git a/cuda_bindings/tests/nvml/test_compute_mode.py b/cuda_bindings/tests/nvml/test_compute_mode.py
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+
+import sys
+
+import pytest
+from cuda.bindings import _nvml as nvml
+
+COMPUTE_MODES = [
+    nvml.ComputeMode.COMPUTEMODE_DEFAULT,
+    nvml.ComputeMode.COMPUTEMODE_PROHIBITED,
+    nvml.ComputeMode.COMPUTEMODE_EXCLUSIVE_PROCESS,
+]
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
+def test_compute_mode_supported_nonroot(for_all_devices):
+    device = for_all_devices
+
+    try:
+        original_compute_mode = nvml.device_get_compute_mode(device)
+    except nvml.NotSupportedError:
+        pytest.skip("nvmlDeviceGetComputeMode not supported")
+
+    for cm in COMPUTE_MODES:
+        with pytest.raises(nvml.NoPermissionError):
+            nvml.device_set_compute_mode(device, cm)
+        assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
diff --git a/cuda_bindings/tests/nvml/test_cuda.py b/cuda_bindings/tests/nvml/test_cuda.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import cuda.bindings.driver as cuda
+from cuda.bindings import _nvml as nvml
+
+from .conftest import NVMLInitializer
+
+
+def get_nvml_device_names():
+    result = []
+    with NVMLInitializer():
+        # uses NVML Library to get the device count, device id and device pci id
+        num_devices = nvml.device_get_count_v2()
+        for idx in range(num_devices):
+            handle = nvml.device_get_handle_by_index_v2(idx)
+            name = nvml.device_get_name(handle)
+            info = nvml.device_get_pci_info_v3(handle)
+            assert isinstance(info.bus, int)
+            assert isinstance(name, str)
+            result.append({"name": name, "id": info.bus})
+
+    return result
+
+
+def get_cuda_device_names(sort_by_bus_id=True):
+    result = []
+
+    (err,) = cuda.cuInit(0)
+    assert err == cuda.CUresult.CUDA_SUCCESS
+
+    err, device_count = cuda.cuDeviceGetCount()
+    assert err == cuda.CUresult.CUDA_SUCCESS
+
+    for dev in range(device_count):
+        size = 256
+        err, name = cuda.cuDeviceGetName(size, dev)
+        name = name.split(b"\x00")[0].decode()
+        assert err == cuda.CUresult.CUDA_SUCCESS
+
+        err, pci_bus_id = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, dev)
+        assert err == cuda.CUresult.CUDA_SUCCESS
+        assert isinstance(pci_bus_id, int)
+
+        result.append({"name": name, "id": pci_bus_id})
+
+    if sort_by_bus_id:
+        result = sorted(result, key=lambda k: k["id"])
+
+    return result
+
+
+def test_cuda_device_order():
+    cuda_devices = get_cuda_device_names()
+    nvml_devices = get_nvml_device_names()
+
+    assert cuda_devices == nvml_devices, "CUDA and NVML device lists do not match"
diff --git a/cuda_bindings/tests/nvml/test_gpu.py b/cuda_bindings/tests/nvml/test_gpu.py
@@ -0,0 +1,40 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import pytest
+from cuda.bindings import _nvml as nvml
+
+from . import util
+
+
+def test_gpu_get_module_id(nvml_init):
+    # Unique module IDs cannot exceed the number of GPUs on the system
+    device_count = nvml.device_get_count_v2()
+
+    for i in range(device_count):
+        device = nvml.device_get_handle_by_index_v2(i)
+        uuid = nvml.device_get_uuid(device)
+
+        if util.is_vgpu(device):
+            continue
+
+        module_id = nvml.device_get_module_id(device)
+        assert isinstance(module_id, int)
+
+
+def test_gpu_get_platform_info(for_all_devices):
+    device = for_all_devices
+
+    if util.is_vgpu(device):
+        pytest.skip("Not supported on vGPU device")
+
+    # TODO
+    # if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
+    #     test_utils.skip_test("Not supported on chip before Blackwell")
+
+    try:
+        platform_info = nvml.device_get_platform_info(device)
+    except nvml.NotSupportedError:
+        pytest.skip("Not supported returned, likely NVLink is disabled.")
+
+    assert isinstance(platform_info, nvml.PlatformInfo_v2)
diff --git a/cuda_bindings/tests/nvml/test_init.py b/cuda_bindings/tests/nvml/test_init.py
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import sys
+
+import pytest
+from cuda.bindings import _nvml as nvml
+
+
+def assert_nvml_is_initialized():
+    assert nvml.device_get_count_v2() > 0
+
+
+def assert_nvml_is_uninitialized():
+    with pytest.raises(nvml.UninitializedError):
+        nvml.device_get_count_v2()
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
+def test_init_ref_count():
+    """
+    Verifies that we can call NVML shutdown and init(2) multiple times, and that ref counting works
+    """
+    with pytest.raises(nvml.UninitializedError):
+        nvml.shutdown()
+
+    assert_nvml_is_uninitialized()
+
+    for i in range(3):
+        # Init 5 times
+        for j in range(5):
+            nvml.init_v2()
+            assert_nvml_is_initialized()
+
+        # Shutdown 4 times, NVML should remain initailized
+        for j in range(4):
+            nvml.shutdown()
+            assert_nvml_is_initialized()
+
+        # Shutdown the final time
+        nvml.shutdown()
+        assert_nvml_is_uninitialized()
+
+
+def test_init_check_index(nvml_init):
+    """
+    Verifies that the index from nvmlDeviceGetIndex is correct
+    """
+    dev_count = nvml.device_get_count_v2()
+    for idx in range(dev_count):
+        handle = nvml.device_get_handle_by_index_v2(idx)
+        # Verify that the index matches
+        assert idx == nvml.device_get_index(handle)
diff --git a/cuda_bindings/tests/nvml/test_nvlink.py b/cuda_bindings/tests/nvml/test_nvlink.py
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+
+from cuda.bindings import _nvml as nvml
+
+
+def test_nvlink_get_link_count(for_all_devices):
+    """
+    Checks that the link count of the device is same.
+    """
+    device = for_all_devices
+
+    fields = nvml.FieldValue(1)
+    fields[0].field_id = nvml.FI.DEV_NVLINK_LINK_COUNT
+    value = nvml.device_get_field_values(device, fields)[0]
+    assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
+        f"Unexpected return {value.nvml_return} for link count field query"
+    )
+
+    # Use the alternative argument to device_get_field_values
+    value = nvml.device_get_field_values(device, [nvml.FI.DEV_NVLINK_LINK_COUNT])[0]
+    assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
+        f"Unexpected return {value.nvml_return} for link count field query"
+    )
+
+    # The feature_nvlink_supported detection is not robust, so we
+    # can't be more specific about how many links we should find.
+    if value.nvml_return == nvml.Return.SUCCESS:
+        assert value.value.ui_val <= nvml.NVLINK_MAX_LINKS, f"Unexpected link count {value.value.ui_val}"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
		# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE