From f92ee6c0818cbd9c65f8ce79b5af78d22ea52a9f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 19:57:09 +0000 Subject: [PATCH 01/24] Initial plan From 5e08b0778f5d862ec7b2076d629ff084930cc773 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:00:45 +0000 Subject: [PATCH 02/24] Move install_gpu_driver.ps1 to ci/tools and update call sites Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- .github/workflows/test-wheel-windows.yml | 2 +- {.github/workflows => ci/tools}/install_gpu_driver.ps1 | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename {.github/workflows => ci/tools}/install_gpu_driver.ps1 (100%) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 93b81ff0a7..c27d9a7206 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -85,7 +85,7 @@ jobs: - name: Update driver run: | - .github/workflows/install_gpu_driver.ps1 + ci/tools/install_gpu_driver.ps1 - name: Ensure GPU is working run: nvidia-smi diff --git a/.github/workflows/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 similarity index 100% rename from .github/workflows/install_gpu_driver.ps1 rename to ci/tools/install_gpu_driver.ps1 From 2219f3baee92d3e32bb9d9fbac7d3f2638088f54 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:02:00 +0000 Subject: [PATCH 03/24] Update install_gpu_driver.ps1 to support GPU type detection and driver modes Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- ci/tools/install_gpu_driver.ps1 | 180 ++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 10 deletions(-) diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 index 256c5cf3a9..e7d57a141b 100644 --- a/ci/tools/install_gpu_driver.ps1 +++ b/ci/tools/install_gpu_driver.ps1 @@ -2,14 +2,158 @@ # # SPDX-License-Identifier: Apache-2.0 +# Detect GPU type from JOB_RUNNER environment variable +function Get-GPUType { + param( + [string]$JobRunner = $env:JOB_RUNNER + ) + + if ([string]::IsNullOrEmpty($JobRunner)) { + Write-Output "Warning: JOB_RUNNER environment variable not set. Using default GPU detection." + return "unknown" + } + + # Extract GPU type from runner label (e.g., "windows-amd64-gpu-l4-latest-1") + if ($JobRunner -match "gpu-([^-]+)") { + $gpuType = $matches[1].ToLower() + Write-Output "Detected GPU type: $gpuType" + return $gpuType + } + + Write-Output "Warning: Could not parse GPU type from JOB_RUNNER: $JobRunner" + return "unknown" +} + +# Determine if GPU is a data center GPU +function Is-DataCenterGPU { + param( + [string]$GpuType + ) + + $dataCenterGPUs = @("l4", "a100", "t4", "h100", "a10", "a30", "a40") + return $dataCenterGPUs -contains $GpuType +} + +# Get driver URL and filename based on GPU type +function Get-DriverInfo { + param( + [string]$GpuType, + [string]$DriverMode = $env:DRIVER_MODE + ) + + $isDataCenter = Is-DataCenterGPU -GpuType $GpuType + + # Default driver version that supports Windows 11 & CUDA 13.0 + $driverVersion = "580.88" + + if ($isDataCenter) { + # Data center GPU - use Tesla driver + $filename = "$driverVersion-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe" + $url = "https://us.download.nvidia.com/tesla/$driverVersion/$filename" + } else { + # Desktop GPU - use GeForce/Quadro driver + $filename = "$driverVersion-desktop-win10-win11-64bit-international-dch-whql.exe" + $url = "https://us.download.nvidia.com/Windows/$driverVersion/$filename" + } + + return @{ + Url = $url + Filename = $filename + IsDataCenter = $isDataCenter + } +} + +# Set driver mode using nvidia-smi +function Set-DriverMode { + param( + [string]$DriverMode, + [bool]$IsDataCenter + ) + + if ([string]::IsNullOrEmpty($DriverMode)) { + Write-Output "No driver mode specified, skipping mode configuration" + return + } + + $DriverMode = $DriverMode.ToUpper() + Write-Output "Configuring driver mode: $DriverMode" + + if (-not $IsDataCenter) { + if ($DriverMode -ne "WDDM") { + Write-Output "Warning: Desktop GPUs only support WDDM mode. Requested mode '$DriverMode' will be ignored." + } + # Desktop GPUs are always in WDDM mode, no configuration needed + return + } + + # Data center GPUs support TCC and MCDM (not WDDM) + if ($DriverMode -eq "WDDM") { + Write-Output "Warning: Data center GPUs do not support WDDM mode. Skipping mode configuration." + return + } + + try { + # Check current mode + $currentMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String + Write-Output "Current driver mode: $currentMode" + + if ($DriverMode -eq "TCC") { + # Set TCC mode (nvidia-smi -fdm 0 sets TCC mode) + Write-Output "Setting TCC mode..." + & nvidia-smi -fdm 0 + + # Verify mode was set + Write-Output "Resetting display device..." + # Reset display devices to apply the change + $devcon = "C:\Windows\System32\pnputil.exe" + if (Test-Path $devcon) { + & $devcon /restart-device "PCI\VEN_10DE*" + } + } elseif ($DriverMode -eq "MCDM") { + # Set MCDM mode (nvidia-smi -fdm 2 sets MCDM mode) + Write-Output "Setting MCDM mode..." + & nvidia-smi -fdm 2 + + # Verify mode was set + Write-Output "Resetting display device..." + # Reset display devices to apply the change + $devcon = "C:\Windows\System32\pnputil.exe" + if (Test-Path $devcon) { + & $devcon /restart-device "PCI\VEN_10DE*" + } + } + + # Wait for device reset + Start-Sleep -Seconds 5 + + # Verify new mode + $newMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String + Write-Output "New driver mode: $newMode" + } catch { + Write-Output "Warning: Failed to set driver mode: $_" + } +} + # Install the driver function Install-Driver { - - # Set the correct URL, filename, and arguments to the installer - # This driver is picked to support Windows 11 & CUDA 13.0 - $url = 'https://us.download.nvidia.com/tesla/580.88/580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; - $file_dir = 'C:\NVIDIA-Driver\580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; - $install_args = '/s /noeula /noreboot'; + param( + [string]$GpuType = (Get-GPUType), + [string]$DriverMode = $env:DRIVER_MODE + ) + + Write-Output "Installing GPU driver for GPU type: $GpuType" + + # Get driver information + $driverInfo = Get-DriverInfo -GpuType $GpuType -DriverMode $DriverMode + $url = $driverInfo.Url + $filename = $driverInfo.Filename + $isDataCenter = $driverInfo.IsDataCenter + + Write-Output "Driver URL: $url" + Write-Output "Is Data Center GPU: $isDataCenter" + + $file_dir = "C:\NVIDIA-Driver\$filename" + $install_args = '/s /noeula /noreboot' # Create the folder for the driver download if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { @@ -21,14 +165,30 @@ function Install-Driver { $ProgressPreference_tmp = $ProgressPreference $ProgressPreference = 'SilentlyContinue' Write-Output 'Downloading the driver installer...' - Invoke-WebRequest $url -OutFile $file_dir - $ProgressPreference = $ProgressPreference_tmp - Write-Output 'Download complete!' + try { + Invoke-WebRequest $url -OutFile $file_dir + $ProgressPreference = $ProgressPreference_tmp + Write-Output 'Download complete!' + } catch { + $ProgressPreference = $ProgressPreference_tmp + Write-Output "Error downloading driver: $_" + Write-Output "Falling back to default driver..." + # Fall back to the original hardcoded driver if download fails + $url = 'https://us.download.nvidia.com/tesla/580.88/580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe' + $file_dir = 'C:\NVIDIA-Driver\580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe' + $ProgressPreference = 'SilentlyContinue' + Invoke-WebRequest $url -OutFile $file_dir + $ProgressPreference = $ProgressPreference_tmp + Write-Output 'Fallback download complete!' + } # Install the file with the specified path from earlier Write-Output 'Running the driver installer...' Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait - Write-Output 'Done!' + Write-Output 'Driver installation complete!' + + # Set driver mode if specified + Set-DriverMode -DriverMode $DriverMode -IsDataCenter $isDataCenter } # Run the functions From de5b109618c27f60a8576428d64a6fb909fce868 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:03:05 +0000 Subject: [PATCH 04/24] Make nightly sections empty in ci/test-matrix.json Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- ci/test-matrix.json | 50 ++------------------------------------------- 1 file changed, 2 insertions(+), 48 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index d1110cfcde..02352d0e3c 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -25,48 +25,7 @@ { "ARCH": "arm64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" } ], - "nightly": [ - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" } - ], + "nightly": [], "special_runners": { "amd64": [ { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "H100", "DRIVER": "latest" } @@ -84,11 +43,6 @@ { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } ], - "nightly": [ - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } - ] + "nightly": [] } } From 585e184e2c6b3707852ad9205101aeb3f42b5be4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:04:45 +0000 Subject: [PATCH 05/24] Expand Windows test matrix with driver mode support Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- ci/test-matrix.json | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 02352d0e3c..1e9453f760 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -1,6 +1,6 @@ { "_description": "Test matrix configurations for CUDA Python CI workflows. This file consolidates the test matrices that were previously hardcoded in the workflow files. All GPU and ARCH values are hard-coded for each architecture: l4 GPU for amd64, a100 GPU for arm64.", - "_sorted_by": "Please keep matrices sorted in ascending order by [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]", + "_sorted_by": "Please keep matrices sorted in ascending order by [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]. Windows entries also include DRIVER_MODE.", "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1", "linux": { "pull-request": [ @@ -34,14 +34,18 @@ }, "windows": { "pull-request": [ - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" } ], "nightly": [] } From de4201177fb6e9496f7b1ddab8c60865e93ec27f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:05:53 +0000 Subject: [PATCH 06/24] Wire driver mode from test-matrix.json into Windows workflow Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- .github/workflows/test-wheel-windows.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index c27d9a7206..2068d91a29 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -84,6 +84,9 @@ jobs: continue-on-error: true - name: Update driver + env: + DRIVER_MODE: ${{ matrix.DRIVER_MODE }} + JOB_RUNNER: windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1 run: | ci/tools/install_gpu_driver.ps1 From 35fa1596c67edd67566dbdfd9354a264e41b0d6a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:15:08 +0000 Subject: [PATCH 07/24] Update install_gpu_driver.ps1 to match CCCL implementation with driver mode support Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- ci/tools/install_gpu_driver.ps1 | 232 +++++++++----------------------- 1 file changed, 65 insertions(+), 167 deletions(-) diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 index e7d57a141b..58f825fd39 100644 --- a/ci/tools/install_gpu_driver.ps1 +++ b/ci/tools/install_gpu_driver.ps1 @@ -2,158 +2,44 @@ # # SPDX-License-Identifier: Apache-2.0 -# Detect GPU type from JOB_RUNNER environment variable -function Get-GPUType { - param( - [string]$JobRunner = $env:JOB_RUNNER - ) - - if ([string]::IsNullOrEmpty($JobRunner)) { - Write-Output "Warning: JOB_RUNNER environment variable not set. Using default GPU detection." - return "unknown" - } - - # Extract GPU type from runner label (e.g., "windows-amd64-gpu-l4-latest-1") - if ($JobRunner -match "gpu-([^-]+)") { - $gpuType = $matches[1].ToLower() - Write-Output "Detected GPU type: $gpuType" - return $gpuType - } - - Write-Output "Warning: Could not parse GPU type from JOB_RUNNER: $JobRunner" - return "unknown" -} +# Sourced from https://github.com/NVIDIA/cccl -# Determine if GPU is a data center GPU -function Is-DataCenterGPU { - param( - [string]$GpuType - ) - - $dataCenterGPUs = @("l4", "a100", "t4", "h100", "a10", "a30", "a40") - return $dataCenterGPUs -contains $GpuType -} +# Install the driver +function Install-Driver { + + # Set the correct URL, filename, and arguments to the installer + # This driver is picked to support Windows 11 & CUDA 13.0 + $version = '581.15' -# Get driver URL and filename based on GPU type -function Get-DriverInfo { - param( - [string]$GpuType, - [string]$DriverMode = $env:DRIVER_MODE - ) - - $isDataCenter = Is-DataCenterGPU -GpuType $GpuType - - # Default driver version that supports Windows 11 & CUDA 13.0 - $driverVersion = "580.88" - - if ($isDataCenter) { - # Data center GPU - use Tesla driver - $filename = "$driverVersion-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe" - $url = "https://us.download.nvidia.com/tesla/$driverVersion/$filename" + # extract gpu type from gha runner label: + # Labels are in the form: --gpu--- + $gha_runner_label=$env:JOB_RUNNER + $gpu_type = $gha_runner_label.Split('-')[3] + + $data_center_gpus = @('a100', 'h100', 'l4', 't4', 'v100', 'rtxa6000', 'rtx6000ada') + $desktop_gpus = @('rtx2080', 'rtx4090', 'rtxpro6000') + + if ($data_center_gpus -contains $gpu_type) { + Write-Output "Data center GPU detected: $gpu_type" + $filename="$version-data-center-tesla-desktop-winserver-2022-2025-dch-international.exe" + $server_path="tesla/$version" + } elseif ($desktop_gpus -contains $gpu_type) { + Write-Output "Desktop GPU detected: $gpu_type" + $filename="$version-desktop-win10-win11-64bit-international-dch-whql.exe" + $server_path="Windows/$version" } else { - # Desktop GPU - use GeForce/Quadro driver - $filename = "$driverVersion-desktop-win10-win11-64bit-international-dch-whql.exe" - $url = "https://us.download.nvidia.com/Windows/$driverVersion/$filename" - } - - return @{ - Url = $url - Filename = $filename - IsDataCenter = $isDataCenter + Write-Output "Unknown GPU type: $gpu_type" + exit 1 } -} -# Set driver mode using nvidia-smi -function Set-DriverMode { - param( - [string]$DriverMode, - [bool]$IsDataCenter - ) - - if ([string]::IsNullOrEmpty($DriverMode)) { - Write-Output "No driver mode specified, skipping mode configuration" - return - } - - $DriverMode = $DriverMode.ToUpper() - Write-Output "Configuring driver mode: $DriverMode" - - if (-not $IsDataCenter) { - if ($DriverMode -ne "WDDM") { - Write-Output "Warning: Desktop GPUs only support WDDM mode. Requested mode '$DriverMode' will be ignored." - } - # Desktop GPUs are always in WDDM mode, no configuration needed - return - } - - # Data center GPUs support TCC and MCDM (not WDDM) - if ($DriverMode -eq "WDDM") { - Write-Output "Warning: Data center GPUs do not support WDDM mode. Skipping mode configuration." - return - } - - try { - # Check current mode - $currentMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String - Write-Output "Current driver mode: $currentMode" - - if ($DriverMode -eq "TCC") { - # Set TCC mode (nvidia-smi -fdm 0 sets TCC mode) - Write-Output "Setting TCC mode..." - & nvidia-smi -fdm 0 - - # Verify mode was set - Write-Output "Resetting display device..." - # Reset display devices to apply the change - $devcon = "C:\Windows\System32\pnputil.exe" - if (Test-Path $devcon) { - & $devcon /restart-device "PCI\VEN_10DE*" - } - } elseif ($DriverMode -eq "MCDM") { - # Set MCDM mode (nvidia-smi -fdm 2 sets MCDM mode) - Write-Output "Setting MCDM mode..." - & nvidia-smi -fdm 2 - - # Verify mode was set - Write-Output "Resetting display device..." - # Reset display devices to apply the change - $devcon = "C:\Windows\System32\pnputil.exe" - if (Test-Path $devcon) { - & $devcon /restart-device "PCI\VEN_10DE*" - } - } - - # Wait for device reset - Start-Sleep -Seconds 5 - - # Verify new mode - $newMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String - Write-Output "New driver mode: $newMode" - } catch { - Write-Output "Warning: Failed to set driver mode: $_" - } -} + $url="https://us.download.nvidia.com/$server_path/$filename" + $filepath="C:\NVIDIA-Driver\$filename" -# Install the driver -function Install-Driver { - param( - [string]$GpuType = (Get-GPUType), - [string]$DriverMode = $env:DRIVER_MODE - ) - - Write-Output "Installing GPU driver for GPU type: $GpuType" - - # Get driver information - $driverInfo = Get-DriverInfo -GpuType $GpuType -DriverMode $DriverMode - $url = $driverInfo.Url - $filename = $driverInfo.Filename - $isDataCenter = $driverInfo.IsDataCenter - - Write-Output "Driver URL: $url" - Write-Output "Is Data Center GPU: $isDataCenter" - - $file_dir = "C:\NVIDIA-Driver\$filename" - $install_args = '/s /noeula /noreboot' + Write-Output "Installing NVIDIA driver version $version for GPU type $gpu_type" + Write-Output "Download URL: $url" + + # Silent install arguments + $install_args = '/s /noeula /noreboot'; # Create the folder for the driver download if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { @@ -165,30 +51,42 @@ function Install-Driver { $ProgressPreference_tmp = $ProgressPreference $ProgressPreference = 'SilentlyContinue' Write-Output 'Downloading the driver installer...' - try { - Invoke-WebRequest $url -OutFile $file_dir - $ProgressPreference = $ProgressPreference_tmp - Write-Output 'Download complete!' - } catch { - $ProgressPreference = $ProgressPreference_tmp - Write-Output "Error downloading driver: $_" - Write-Output "Falling back to default driver..." - # Fall back to the original hardcoded driver if download fails - $url = 'https://us.download.nvidia.com/tesla/580.88/580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe' - $file_dir = 'C:\NVIDIA-Driver\580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe' - $ProgressPreference = 'SilentlyContinue' - Invoke-WebRequest $url -OutFile $file_dir - $ProgressPreference = $ProgressPreference_tmp - Write-Output 'Fallback download complete!' - } + Invoke-WebRequest $url -OutFile $filepath + $ProgressPreference = $ProgressPreference_tmp + Write-Output 'Download complete!' # Install the file with the specified path from earlier Write-Output 'Running the driver installer...' - Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait - Write-Output 'Driver installation complete!' - - # Set driver mode if specified - Set-DriverMode -DriverMode $DriverMode -IsDataCenter $isDataCenter + Start-Process -FilePath $filepath -ArgumentList $install_args -Wait + Write-Output 'Done!' + + # Handle driver mode configuration for data center GPUs + $driver_mode = $env:DRIVER_MODE + if ($data_center_gpus -contains $gpu_type) { + if ($driver_mode -eq "MCDM") { + Write-Output "Setting driver mode to MCDM..." + nvidia-smi -fdm 2 + pnputil /disable-device /class Display + pnputil /enable-device /class Display + # Give it a minute to settle: + Start-Sleep -Seconds 5 + } elseif ($driver_mode -eq "TCC") { + Write-Output "Setting driver mode to TCC..." + nvidia-smi -fdm 0 + pnputil /disable-device /class Display + pnputil /enable-device /class Display + # Give it a minute to settle: + Start-Sleep -Seconds 5 + } else { + # Default: TCC -> MCDM on data center GPUs + Write-Output "Setting driver mode to MCDM (default)..." + nvidia-smi -fdm 2 + pnputil /disable-device /class Display + pnputil /enable-device /class Display + # Give it a minute to settle: + Start-Sleep -Seconds 5 + } + } } # Run the functions From da32f6c086f4791771cca3ad6630689086ed6801 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:42:28 +0000 Subject: [PATCH 08/24] Simplify driver mode handling per review feedback Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- ci/tools/install_gpu_driver.ps1 | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 index 58f825fd39..3929ebe720 100644 --- a/ci/tools/install_gpu_driver.ps1 +++ b/ci/tools/install_gpu_driver.ps1 @@ -2,8 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -# Sourced from https://github.com/NVIDIA/cccl - # Install the driver function Install-Driver { @@ -60,9 +58,10 @@ function Install-Driver { Start-Process -FilePath $filepath -ArgumentList $install_args -Wait Write-Output 'Done!' - # Handle driver mode configuration for data center GPUs + # Handle driver mode configuration $driver_mode = $env:DRIVER_MODE if ($data_center_gpus -contains $gpu_type) { + # Data center GPUs: TCC -> MCDM if ($driver_mode -eq "MCDM") { Write-Output "Setting driver mode to MCDM..." nvidia-smi -fdm 2 @@ -70,16 +69,11 @@ function Install-Driver { pnputil /enable-device /class Display # Give it a minute to settle: Start-Sleep -Seconds 5 - } elseif ($driver_mode -eq "TCC") { - Write-Output "Setting driver mode to TCC..." - nvidia-smi -fdm 0 - pnputil /disable-device /class Display - pnputil /enable-device /class Display - # Give it a minute to settle: - Start-Sleep -Seconds 5 - } else { - # Default: TCC -> MCDM on data center GPUs - Write-Output "Setting driver mode to MCDM (default)..." + } + } elseif ($desktop_gpus -contains $gpu_type) { + # Desktop GPUs: WDDM -> MCDM + if ($driver_mode -eq "MCDM") { + Write-Output "Setting driver mode to MCDM..." nvidia-smi -fdm 2 pnputil /disable-device /class Display pnputil /enable-device /class Display From a4a65ad9090c7119c3f93c89aa61afb307cdb839 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:50:03 +0000 Subject: [PATCH 09/24] Use GPU_TYPE env var instead of parsing JOB_RUNNER Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- .github/workflows/test-wheel-windows.yml | 2 +- ci/tools/install_gpu_driver.ps1 | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 2068d91a29..be229857b9 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -86,7 +86,7 @@ jobs: - name: Update driver env: DRIVER_MODE: ${{ matrix.DRIVER_MODE }} - JOB_RUNNER: windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1 + GPU_TYPE: ${{ matrix.GPU }} run: | ci/tools/install_gpu_driver.ps1 diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 index 3929ebe720..3a1722cb96 100644 --- a/ci/tools/install_gpu_driver.ps1 +++ b/ci/tools/install_gpu_driver.ps1 @@ -9,10 +9,8 @@ function Install-Driver { # This driver is picked to support Windows 11 & CUDA 13.0 $version = '581.15' - # extract gpu type from gha runner label: - # Labels are in the form: --gpu--- - $gha_runner_label=$env:JOB_RUNNER - $gpu_type = $gha_runner_label.Split('-')[3] + # Get GPU type from environment variable + $gpu_type = $env:GPU_TYPE $data_center_gpus = @('a100', 'h100', 'l4', 't4', 'v100', 'rtxa6000', 'rtx6000ada') $desktop_gpus = @('rtx2080', 'rtx4090', 'rtxpro6000') From 16b0e3fe2e7322b248750f74f9ef46903adb2c36 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 15:56:57 -0500 Subject: [PATCH 10/24] ensure each GPU kind are tested under two modes --- ci/test-matrix.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 1e9453f760..88b99ea0ee 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -38,12 +38,12 @@ { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" } ], From f789922ac35c4e41d2855946487501808e2e3d66 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 16:35:27 -0500 Subject: [PATCH 11/24] fix arch coverage - we do not have access to rtx6000ada - rtxpro6000 is a datacenter card - cover WDDM in at least 2 pipelines --- ci/test-matrix.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 88b99ea0ee..45fcda5734 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -34,17 +34,17 @@ }, "windows": { "pull-request": [ - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" } ], "nightly": [] From f2ffbb18d56a0c84c1879baafb458d281c79b9ce Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 16:55:11 -0500 Subject: [PATCH 12/24] make script more flexible; ensure cover 6 different GPUs, each with 2 different modes rtx2080, rtx4090, rtxpro6000, v100, a100, l4 (t4 nodes are too slow) --- ci/test-matrix.json | 12 +++++------ ci/tools/install_gpu_driver.ps1 | 37 +++++++++++++++------------------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 45fcda5734..d70da90016 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -39,13 +39,13 @@ { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" } + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } ], "nightly": [] } diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 index 3a1722cb96..5602eeb489 100644 --- a/ci/tools/install_gpu_driver.ps1 +++ b/ci/tools/install_gpu_driver.ps1 @@ -57,28 +57,25 @@ function Install-Driver { Write-Output 'Done!' # Handle driver mode configuration + # This assumes we have the prior knowledge on which GPU can use which mode. $driver_mode = $env:DRIVER_MODE - if ($data_center_gpus -contains $gpu_type) { - # Data center GPUs: TCC -> MCDM - if ($driver_mode -eq "MCDM") { - Write-Output "Setting driver mode to MCDM..." - nvidia-smi -fdm 2 - pnputil /disable-device /class Display - pnputil /enable-device /class Display - # Give it a minute to settle: - Start-Sleep -Seconds 5 - } - } elseif ($desktop_gpus -contains $gpu_type) { - # Desktop GPUs: WDDM -> MCDM - if ($driver_mode -eq "MCDM") { - Write-Output "Setting driver mode to MCDM..." - nvidia-smi -fdm 2 - pnputil /disable-device /class Display - pnputil /enable-device /class Display - # Give it a minute to settle: - Start-Sleep -Seconds 5 - } + if ($driver_mode -eq "WDDM") { + Write-Output "Setting driver mode to WDDM..." + nvidia-smi -fdm 0 + } elseif ($driver_mode -eq "TCC") { + Write-Output "Setting driver mode to TCC..." + nvidia-smi -fdm 1 + } elseif ($driver_mode -eq "MCDM") { + Write-Output "Setting driver mode to MCDM..." + nvidia-smi -fdm 2 + } else { + Write-Output "Unknown driver mode: $driver_mode" + exit 1 } + pnputil /disable-device /class Display + pnputil /enable-device /class Display + # Give it a minute to settle: + Start-Sleep -Seconds 5 } # Run the functions From 0293947492808a93c58e8194b991da54a60a38bd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 00:34:02 +0000 Subject: [PATCH 13/24] Add driver mode verification and change v100 to rtxpro6000 for CUDA 13 Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- .github/workflows/test-wheel-windows.yml | 12 ++++++++++++ ci/test-matrix.json | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index be229857b9..2e3737af14 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -93,6 +93,18 @@ jobs: - name: Ensure GPU is working run: nvidia-smi + - name: Verify driver mode + env: + EXPECTED_MODE: ${{ matrix.DRIVER_MODE }} + run: | + $mode_output = nvidia-smi -q | Select-String -Pattern "Driver Model" + Write-Output "Driver mode check: $mode_output" + if ($mode_output -notmatch "$env:EXPECTED_MODE") { + Write-Error "Expected driver mode $env:EXPECTED_MODE but got: $mode_output" + exit 1 + } + Write-Output "Driver mode verified: $env:EXPECTED_MODE" + - name: Set environment variables env: BUILD_CUDA_VER: ${{ inputs.build-ctk-ver }} diff --git a/ci/test-matrix.json b/ci/test-matrix.json index d70da90016..0b7dc3435c 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -45,7 +45,7 @@ { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } ], "nightly": [] } From 1706a0651aee2e89754f1735991e4c434b5d3082 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 19:51:08 -0500 Subject: [PATCH 14/24] fix --- .github/workflows/test-wheel-windows.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 2e3737af14..12d67d7aa0 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -94,16 +94,14 @@ jobs: run: nvidia-smi - name: Verify driver mode - env: - EXPECTED_MODE: ${{ matrix.DRIVER_MODE }} run: | - $mode_output = nvidia-smi -q | Select-String -Pattern "Driver Model" + $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}" Write-Output "Driver mode check: $mode_output" - if ($mode_output -notmatch "$env:EXPECTED_MODE") { - Write-Error "Expected driver mode $env:EXPECTED_MODE but got: $mode_output" + if ($mode_output -notmatch "${{ matrix.DRIVER_MODE }}") { + Write-Error "Expected driver mode ${{ matrix.DRIVER_MODE }} but got: $mode_output" exit 1 } - Write-Output "Driver mode verified: $env:EXPECTED_MODE" + Write-Output "Driver mode verified: ${{ matrix.DRIVER_MODE }}" - name: Set environment variables env: From 2393b68aae4018b125bd3acebd54f68eb380b77d Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 19:52:04 -0500 Subject: [PATCH 15/24] merge Removed redundant 'Ensure GPU is working' step and kept the driver mode verification. --- .github/workflows/test-wheel-windows.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 12d67d7aa0..e20124adc5 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -91,10 +91,9 @@ jobs: ci/tools/install_gpu_driver.ps1 - name: Ensure GPU is working - run: nvidia-smi - - - name: Verify driver mode run: | + nvidia-smi + $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}" Write-Output "Driver mode check: $mode_output" if ($mode_output -notmatch "${{ matrix.DRIVER_MODE }}") { From e363f0ef6b4eb0dc90188788da677d657fd107bd Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 20:39:09 -0500 Subject: [PATCH 16/24] ensure using CTK 12.x with V100 + driver mode check can fail --- .github/workflows/test-wheel-windows.yml | 4 ++-- ci/test-matrix.json | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index e20124adc5..f695ffc3d6 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -96,8 +96,8 @@ jobs: $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}" Write-Output "Driver mode check: $mode_output" - if ($mode_output -notmatch "${{ matrix.DRIVER_MODE }}") { - Write-Error "Expected driver mode ${{ matrix.DRIVER_MODE }} but got: $mode_output" + if ($mode_output -eq "") { + Write-Error "Switching to driver mode ${{ matrix.DRIVER_MODE }} failed!" exit 1 } Write-Output "Driver mode verified: ${{ matrix.DRIVER_MODE }}" diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 0b7dc3435c..b950abaed9 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -36,16 +36,16 @@ "pull-request": [ { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } ], "nightly": [] } From 3370245c04dda737f35ea4916178576e562a631f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 21:27:00 -0500 Subject: [PATCH 17/24] fix syntax --- .github/workflows/test-wheel-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index f695ffc3d6..23384e1af6 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -96,7 +96,7 @@ jobs: $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}" Write-Output "Driver mode check: $mode_output" - if ($mode_output -eq "") { + if ("$mode_output" -eq "") { Write-Error "Switching to driver mode ${{ matrix.DRIVER_MODE }} failed!" exit 1 } From c7abbdf1b401da6ff652ae67a8ab01c2b19916cd Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 22:14:49 -0500 Subject: [PATCH 18/24] avoid testing Quadro + WDDM; make driver mode show up in pipeline names --- .github/workflows/test-wheel-linux.yml | 2 +- .github/workflows/test-wheel-windows.yml | 2 +- ci/test-matrix.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index f763d65b9d..5d2eeb5282 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -76,7 +76,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, GPU ${{ matrix.GPU }} + name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }} needs: compute-matrix strategy: fail-fast: false diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 23384e1af6..87b1653bf9 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -65,7 +65,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, GPU ${{ matrix.GPU }} + name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }} (${{ matrix.DRIVER_MODE }}) # The build stage could fail but we want the CI to keep moving. needs: compute-matrix strategy: diff --git a/ci/test-matrix.json b/ci/test-matrix.json index b950abaed9..f6f3542316 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -41,7 +41,7 @@ { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, From 7dc0f916a0d406bbce813e292ee5c9630b5a2ab8 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Nov 2025 22:52:07 -0500 Subject: [PATCH 19/24] add missing `test-cu12-ft` dep group --- cuda_core/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index 31ceb5b1a5..45a90bb8c4 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -56,6 +56,7 @@ test-cu12 = ["cuda-core[test]", "cupy-cuda12x; python_version < '3.14'", "cuda-t test-cu13 = ["cuda-core[test]", "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"] # runtime headers needed by CuPy # free threaded build, cupy doesn't support free-threaded builds yet, so avoid installing it for now # TODO: cupy should support free threaded builds +test-cu12-ft = ["cuda-core[test]", "cuda-toolkit[cudart]==12.*"] test-cu13-ft = ["cuda-core[test]", "cuda-toolkit[cudart]==13.*"] [project.urls] From 85c0059af61dc5785bf9ed15ec15a46215ab53fe Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 17 Nov 2025 22:16:53 -0800 Subject: [PATCH 20/24] fix VMM on Windows --- .../_memory/_virtual_memory_resource.py | 4 +- cuda_core/tests/test_memory.py | 48 ++++++++++++++++--- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py index c17c30bc97..1ebc9dfd48 100644 --- a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py @@ -70,6 +70,7 @@ class VirtualMemoryResourceOptions: peers: Iterable[int] = field(default_factory=tuple) self_access: VirtualMemoryAccessTypeT = "rw" peer_access: VirtualMemoryAccessTypeT = "rw" + win32_handle_metadata: int | None = 0 _a = driver.CUmemAccess_flags _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0} @@ -212,6 +213,7 @@ def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryRes prop.location.id = self.device.device_id prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) + prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0 # Query granularity gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) @@ -495,11 +497,11 @@ def allocate(self, size: int, stream: Stream = None) -> Buffer: # ---- Build allocation properties ---- prop = driver.CUmemAllocationProp() prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) - prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) prop.location.id = self.device.device_id if config.location_type == "device" else -1 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) + prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0 # ---- Query and apply granularity ---- # Choose min vs recommended granularity per config diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index d960e6ee12..9c88f1d0e3 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import ctypes +from ctypes import wintypes import sys try: @@ -28,6 +30,8 @@ from cuda.core.experimental._memory import IPCBufferDescriptor from cuda.core.experimental._utils.cuda_utils import handle_return from cuda.core.experimental.utils import StridedMemoryView + +from helpers import IS_WINDOWS from helpers.buffers import DummyUnifiedMemoryResource from cuda_python_test_helpers import supports_ipc_mempool @@ -312,8 +316,31 @@ def test_device_memory_resource_initialization(mempool_device, use_device_object buffer.close() +def get_handle_type(): + def get_sa(): + class SECURITY_ATTRIBUTES(ctypes.Structure): + _fields_ = [ + ("nLength", wintypes.DWORD), + ("lpSecurityDescriptor", wintypes.LPVOID), + ("bInheritHandle", wintypes.BOOL) + ] + + sa = SECURITY_ATTRIBUTES() + sa.nLength = ctypes.sizeof(sa) + sa.lpSecurityDescriptor = None + sa.bInheritHandle = False # TODO: why? + + return sa + + if IS_WINDOWS: + return (("win32", get_sa()), ("win32_kmt", None)) + else: + return (("posix", None),) + + @pytest.mark.parametrize("use_device_object", [True, False]) -def test_vmm_allocator_basic_allocation(use_device_object): +@pytest.mark.parametrize("handle_type", get_handle_type()) +def test_vmm_allocator_basic_allocation(use_device_object, handle_type): """Test basic VMM allocation functionality. This test verifies that VirtualMemoryResource can allocate memory @@ -326,7 +353,11 @@ def test_vmm_allocator_basic_allocation(use_device_object): if not device.properties.virtual_memory_management_supported: pytest.skip("Virtual memory management is not supported on this device") - options = VirtualMemoryResourceOptions() + handle_type, security_attribute = handle_type # unpack + win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 + options = VirtualMemoryResourceOptions( + handle_type=handle_type, win32_handle_metadata=win32_handle_metadata, + ) # Create VMM allocator with default config device_arg = device if use_device_object else device.device_id vmm_mr = VirtualMemoryResource(device_arg, config=options) @@ -376,7 +407,7 @@ def test_vmm_allocator_policy_configuration(): location_type="device", granularity="minimum", gpu_direct_rdma=True, - handle_type="posix_fd" if platform.system() != "Windows" else "win32", + handle_type="posix_fd" if IS_WINDOWS else "win32_kmt", peers=(), self_access="rw", peer_access="rw", @@ -400,7 +431,7 @@ def test_vmm_allocator_policy_configuration(): location_type="device", granularity="recommended", gpu_direct_rdma=False, - handle_type="posix_fd", + handle_type="posix_fd" if IS_WINDOWS else "win32_kmt", peers=(), self_access="r", # Read-only access peer_access="r", @@ -416,7 +447,8 @@ def test_vmm_allocator_policy_configuration(): modified_buffer.close() -def test_vmm_allocator_grow_allocation(): +@pytest.mark.parametrize("handle_type", get_handle_type()) +def test_vmm_allocator_grow_allocation(handle_type): """Test VMM allocator's ability to grow existing allocations. This test verifies that VirtualMemoryResource can grow existing @@ -429,7 +461,11 @@ def test_vmm_allocator_grow_allocation(): if not device.properties.virtual_memory_management_supported: pytest.skip("Virtual memory management is not supported on this device") - options = VirtualMemoryResourceOptions() + handle_type, security_attribute = handle_type # unpack + win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 + options = VirtualMemoryResourceOptions( + handle_type=handle_type, win32_handle_metadata=win32_handle_metadata, + ) vmm_mr = VirtualMemoryResource(device, config=options) From ca1aa7474c7b6cbf63befb52fdbf1d04dd7c2f47 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Nov 2025 06:23:30 +0000 Subject: [PATCH 21/24] [pre-commit.ci] auto code formatting --- cuda_core/tests/test_memory.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index 9c88f1d0e3..60905bee40 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import ctypes -from ctypes import wintypes import sys +from ctypes import wintypes try: from cuda.bindings import driver @@ -13,7 +13,6 @@ import numpy as np except ImportError: np = None -import ctypes import platform import pytest @@ -30,7 +29,6 @@ from cuda.core.experimental._memory import IPCBufferDescriptor from cuda.core.experimental._utils.cuda_utils import handle_return from cuda.core.experimental.utils import StridedMemoryView - from helpers import IS_WINDOWS from helpers.buffers import DummyUnifiedMemoryResource @@ -322,7 +320,7 @@ class SECURITY_ATTRIBUTES(ctypes.Structure): _fields_ = [ ("nLength", wintypes.DWORD), ("lpSecurityDescriptor", wintypes.LPVOID), - ("bInheritHandle", wintypes.BOOL) + ("bInheritHandle", wintypes.BOOL), ] sa = SECURITY_ATTRIBUTES() @@ -356,7 +354,8 @@ def test_vmm_allocator_basic_allocation(use_device_object, handle_type): handle_type, security_attribute = handle_type # unpack win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 options = VirtualMemoryResourceOptions( - handle_type=handle_type, win32_handle_metadata=win32_handle_metadata, + handle_type=handle_type, + win32_handle_metadata=win32_handle_metadata, ) # Create VMM allocator with default config device_arg = device if use_device_object else device.device_id @@ -464,7 +463,8 @@ def test_vmm_allocator_grow_allocation(handle_type): handle_type, security_attribute = handle_type # unpack win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 options = VirtualMemoryResourceOptions( - handle_type=handle_type, win32_handle_metadata=win32_handle_metadata, + handle_type=handle_type, + win32_handle_metadata=win32_handle_metadata, ) vmm_mr = VirtualMemoryResource(device, config=options) From 3781575ccba82814240de55ff00f6476e541d50f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 18 Nov 2025 01:26:09 -0500 Subject: [PATCH 22/24] RTX cards cannot run MCDM, switch back to L4 for now Updated GPU configurations for Python versions 3.13 and 3.14. --- ci/test-matrix.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index f6f3542316..6c5f201acb 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -40,10 +40,10 @@ { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, - { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } ], From da63359941b18ef6847d89a359e6dca9d2cad006 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 18 Nov 2025 02:31:10 -0500 Subject: [PATCH 23/24] fix silly typo --- cuda_core/tests/test_memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index 60905bee40..e508be03df 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -333,7 +333,7 @@ class SECURITY_ATTRIBUTES(ctypes.Structure): if IS_WINDOWS: return (("win32", get_sa()), ("win32_kmt", None)) else: - return (("posix", None),) + return (("posix_fd", None),) @pytest.mark.parametrize("use_device_object", [True, False]) From 6c8cbcb401e2a9cd50ada02a9ec28c272de8a095 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 18 Nov 2025 15:02:36 +0000 Subject: [PATCH 24/24] fix stupid negation --- cuda_core/tests/test_memory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index e508be03df..21dee23280 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -406,7 +406,7 @@ def test_vmm_allocator_policy_configuration(): location_type="device", granularity="minimum", gpu_direct_rdma=True, - handle_type="posix_fd" if IS_WINDOWS else "win32_kmt", + handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt", peers=(), self_access="rw", peer_access="rw", @@ -430,7 +430,7 @@ def test_vmm_allocator_policy_configuration(): location_type="device", granularity="recommended", gpu_direct_rdma=False, - handle_type="posix_fd" if IS_WINDOWS else "win32_kmt", + handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt", peers=(), self_access="r", # Read-only access peer_access="r",