diff --git a/.github/workflows/install_gpu_driver.ps1 b/.github/workflows/install_gpu_driver.ps1 deleted file mode 100644 index 256c5cf3a9..0000000000 --- a/.github/workflows/install_gpu_driver.ps1 +++ /dev/null @@ -1,35 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -# Install the driver -function Install-Driver { - - # Set the correct URL, filename, and arguments to the installer - # This driver is picked to support Windows 11 & CUDA 13.0 - $url = 'https://us.download.nvidia.com/tesla/580.88/580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; - $file_dir = 'C:\NVIDIA-Driver\580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; - $install_args = '/s /noeula /noreboot'; - - # Create the folder for the driver download - if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { - New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null - } - - # Download the file to a specified directory - # Disabling progress bar due to https://github.com/GoogleCloudPlatform/compute-gpu-installation/issues/29 - $ProgressPreference_tmp = $ProgressPreference - $ProgressPreference = 'SilentlyContinue' - Write-Output 'Downloading the driver installer...' - Invoke-WebRequest $url -OutFile $file_dir - $ProgressPreference = $ProgressPreference_tmp - Write-Output 'Download complete!' - - # Install the file with the specified path from earlier - Write-Output 'Running the driver installer...' - Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait - Write-Output 'Done!' -} - -# Run the functions -Install-Driver diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 9c33e60168..ebcc47af13 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -74,7 +74,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, GPU ${{ matrix.GPU }} + name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }} needs: compute-matrix strategy: fail-fast: false diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 9133170bc4..6152df55dc 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -63,7 +63,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, GPU ${{ matrix.GPU }} + name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }} (${{ matrix.DRIVER_MODE }}) # The build stage could fail but we want the CI to keep moving. needs: compute-matrix strategy: @@ -80,11 +80,23 @@ jobs: continue-on-error: true - name: Update driver + env: + DRIVER_MODE: ${{ matrix.DRIVER_MODE }} + GPU_TYPE: ${{ matrix.GPU }} run: | - .github/workflows/install_gpu_driver.ps1 + ci/tools/install_gpu_driver.ps1 - name: Ensure GPU is working - run: nvidia-smi + run: | + nvidia-smi + + $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}" + Write-Output "Driver mode check: $mode_output" + if ("$mode_output" -eq "") { + Write-Error "Switching to driver mode ${{ matrix.DRIVER_MODE }} failed!" + exit 1 + } + Write-Output "Driver mode verified: ${{ matrix.DRIVER_MODE }}" - name: Set environment variables env: diff --git a/ci/test-matrix.json b/ci/test-matrix.json index d1110cfcde..6c5f201acb 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -1,6 +1,6 @@ { "_description": "Test matrix configurations for CUDA Python CI workflows. This file consolidates the test matrices that were previously hardcoded in the workflow files. All GPU and ARCH values are hard-coded for each architecture: l4 GPU for amd64, a100 GPU for arm64.", - "_sorted_by": "Please keep matrices sorted in ascending order by [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]", + "_sorted_by": "Please keep matrices sorted in ascending order by [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]. Windows entries also include DRIVER_MODE.", "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1", "linux": { "pull-request": [ @@ -25,48 +25,7 @@ { "ARCH": "arm64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" } ], - "nightly": [ - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" } - ], + "nightly": [], "special_runners": { "amd64": [ { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "H100", "DRIVER": "latest" } @@ -75,20 +34,19 @@ }, "windows": { "pull-request": [ - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } ], - "nightly": [ - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } - ] + "nightly": [] } } diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 new file mode 100644 index 0000000000..5602eeb489 --- /dev/null +++ b/ci/tools/install_gpu_driver.ps1 @@ -0,0 +1,82 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# Install the driver +function Install-Driver { + + # Set the correct URL, filename, and arguments to the installer + # This driver is picked to support Windows 11 & CUDA 13.0 + $version = '581.15' + + # Get GPU type from environment variable + $gpu_type = $env:GPU_TYPE + + $data_center_gpus = @('a100', 'h100', 'l4', 't4', 'v100', 'rtxa6000', 'rtx6000ada') + $desktop_gpus = @('rtx2080', 'rtx4090', 'rtxpro6000') + + if ($data_center_gpus -contains $gpu_type) { + Write-Output "Data center GPU detected: $gpu_type" + $filename="$version-data-center-tesla-desktop-winserver-2022-2025-dch-international.exe" + $server_path="tesla/$version" + } elseif ($desktop_gpus -contains $gpu_type) { + Write-Output "Desktop GPU detected: $gpu_type" + $filename="$version-desktop-win10-win11-64bit-international-dch-whql.exe" + $server_path="Windows/$version" + } else { + Write-Output "Unknown GPU type: $gpu_type" + exit 1 + } + + $url="https://us.download.nvidia.com/$server_path/$filename" + $filepath="C:\NVIDIA-Driver\$filename" + + Write-Output "Installing NVIDIA driver version $version for GPU type $gpu_type" + Write-Output "Download URL: $url" + + # Silent install arguments + $install_args = '/s /noeula /noreboot'; + + # Create the folder for the driver download + if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { + New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null + } + + # Download the file to a specified directory + # Disabling progress bar due to https://github.com/GoogleCloudPlatform/compute-gpu-installation/issues/29 + $ProgressPreference_tmp = $ProgressPreference + $ProgressPreference = 'SilentlyContinue' + Write-Output 'Downloading the driver installer...' + Invoke-WebRequest $url -OutFile $filepath + $ProgressPreference = $ProgressPreference_tmp + Write-Output 'Download complete!' + + # Install the file with the specified path from earlier + Write-Output 'Running the driver installer...' + Start-Process -FilePath $filepath -ArgumentList $install_args -Wait + Write-Output 'Done!' + + # Handle driver mode configuration + # This assumes we have the prior knowledge on which GPU can use which mode. + $driver_mode = $env:DRIVER_MODE + if ($driver_mode -eq "WDDM") { + Write-Output "Setting driver mode to WDDM..." + nvidia-smi -fdm 0 + } elseif ($driver_mode -eq "TCC") { + Write-Output "Setting driver mode to TCC..." + nvidia-smi -fdm 1 + } elseif ($driver_mode -eq "MCDM") { + Write-Output "Setting driver mode to MCDM..." + nvidia-smi -fdm 2 + } else { + Write-Output "Unknown driver mode: $driver_mode" + exit 1 + } + pnputil /disable-device /class Display + pnputil /enable-device /class Display + # Give it a minute to settle: + Start-Sleep -Seconds 5 +} + +# Run the functions +Install-Driver diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py index c17c30bc97..1ebc9dfd48 100644 --- a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py @@ -70,6 +70,7 @@ class VirtualMemoryResourceOptions: peers: Iterable[int] = field(default_factory=tuple) self_access: VirtualMemoryAccessTypeT = "rw" peer_access: VirtualMemoryAccessTypeT = "rw" + win32_handle_metadata: int | None = 0 _a = driver.CUmemAccess_flags _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0} @@ -212,6 +213,7 @@ def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryRes prop.location.id = self.device.device_id prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) + prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0 # Query granularity gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) @@ -495,11 +497,11 @@ def allocate(self, size: int, stream: Stream = None) -> Buffer: # ---- Build allocation properties ---- prop = driver.CUmemAllocationProp() prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) - prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) prop.location.id = self.device.device_id if config.location_type == "device" else -1 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) + prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0 # ---- Query and apply granularity ---- # Choose min vs recommended granularity per config diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index e64f9d38e1..a920005f21 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -56,6 +56,7 @@ test-cu12 = ["cuda-core[test]", "cupy-cuda12x; python_version < '3.14'", "cuda-t test-cu13 = ["cuda-core[test]", "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"] # runtime headers needed by CuPy # free threaded build, cupy doesn't support free-threaded builds yet, so avoid installing it for now # TODO: cupy should support free threaded builds +test-cu12-ft = ["cuda-core[test]", "cuda-toolkit[cudart]==12.*"] test-cu13-ft = ["cuda-core[test]", "cuda-toolkit[cudart]==13.*"] [project.urls] diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index d960e6ee12..21dee23280 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -1,7 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import ctypes import sys +from ctypes import wintypes try: from cuda.bindings import driver @@ -11,7 +13,6 @@ import numpy as np except ImportError: np = None -import ctypes import platform import pytest @@ -28,6 +29,7 @@ from cuda.core.experimental._memory import IPCBufferDescriptor from cuda.core.experimental._utils.cuda_utils import handle_return from cuda.core.experimental.utils import StridedMemoryView +from helpers import IS_WINDOWS from helpers.buffers import DummyUnifiedMemoryResource from cuda_python_test_helpers import supports_ipc_mempool @@ -312,8 +314,31 @@ def test_device_memory_resource_initialization(mempool_device, use_device_object buffer.close() +def get_handle_type(): + def get_sa(): + class SECURITY_ATTRIBUTES(ctypes.Structure): + _fields_ = [ + ("nLength", wintypes.DWORD), + ("lpSecurityDescriptor", wintypes.LPVOID), + ("bInheritHandle", wintypes.BOOL), + ] + + sa = SECURITY_ATTRIBUTES() + sa.nLength = ctypes.sizeof(sa) + sa.lpSecurityDescriptor = None + sa.bInheritHandle = False # TODO: why? + + return sa + + if IS_WINDOWS: + return (("win32", get_sa()), ("win32_kmt", None)) + else: + return (("posix_fd", None),) + + @pytest.mark.parametrize("use_device_object", [True, False]) -def test_vmm_allocator_basic_allocation(use_device_object): +@pytest.mark.parametrize("handle_type", get_handle_type()) +def test_vmm_allocator_basic_allocation(use_device_object, handle_type): """Test basic VMM allocation functionality. This test verifies that VirtualMemoryResource can allocate memory @@ -326,7 +351,12 @@ def test_vmm_allocator_basic_allocation(use_device_object): if not device.properties.virtual_memory_management_supported: pytest.skip("Virtual memory management is not supported on this device") - options = VirtualMemoryResourceOptions() + handle_type, security_attribute = handle_type # unpack + win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 + options = VirtualMemoryResourceOptions( + handle_type=handle_type, + win32_handle_metadata=win32_handle_metadata, + ) # Create VMM allocator with default config device_arg = device if use_device_object else device.device_id vmm_mr = VirtualMemoryResource(device_arg, config=options) @@ -376,7 +406,7 @@ def test_vmm_allocator_policy_configuration(): location_type="device", granularity="minimum", gpu_direct_rdma=True, - handle_type="posix_fd" if platform.system() != "Windows" else "win32", + handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt", peers=(), self_access="rw", peer_access="rw", @@ -400,7 +430,7 @@ def test_vmm_allocator_policy_configuration(): location_type="device", granularity="recommended", gpu_direct_rdma=False, - handle_type="posix_fd", + handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt", peers=(), self_access="r", # Read-only access peer_access="r", @@ -416,7 +446,8 @@ def test_vmm_allocator_policy_configuration(): modified_buffer.close() -def test_vmm_allocator_grow_allocation(): +@pytest.mark.parametrize("handle_type", get_handle_type()) +def test_vmm_allocator_grow_allocation(handle_type): """Test VMM allocator's ability to grow existing allocations. This test verifies that VirtualMemoryResource can grow existing @@ -429,7 +460,12 @@ def test_vmm_allocator_grow_allocation(): if not device.properties.virtual_memory_management_supported: pytest.skip("Virtual memory management is not supported on this device") - options = VirtualMemoryResourceOptions() + handle_type, security_attribute = handle_type # unpack + win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 + options = VirtualMemoryResourceOptions( + handle_type=handle_type, + win32_handle_metadata=win32_handle_metadata, + ) vmm_mr = VirtualMemoryResource(device, config=options)