From f92ee6c0818cbd9c65f8ce79b5af78d22ea52a9f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 19:57:09 +0000
Subject: [PATCH 01/24] Initial plan


From 5e08b0778f5d862ec7b2076d629ff084930cc773 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:00:45 +0000
Subject: [PATCH 02/24] Move install_gpu_driver.ps1 to ci/tools and update call
 sites

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 .github/workflows/test-wheel-windows.yml               | 2 +-
 {.github/workflows => ci/tools}/install_gpu_driver.ps1 | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename {.github/workflows => ci/tools}/install_gpu_driver.ps1 (100%)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 93b81ff0a7..c27d9a7206 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -85,7 +85,7 @@ jobs:
 
       - name: Update driver
         run: |
-          .github/workflows/install_gpu_driver.ps1
+          ci/tools/install_gpu_driver.ps1
 
       - name: Ensure GPU is working
         run: nvidia-smi
diff --git a/.github/workflows/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1
similarity index 100%
rename from .github/workflows/install_gpu_driver.ps1
rename to ci/tools/install_gpu_driver.ps1

From 2219f3baee92d3e32bb9d9fbac7d3f2638088f54 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:02:00 +0000
Subject: [PATCH 03/24] Update install_gpu_driver.ps1 to support GPU type
 detection and driver modes

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 ci/tools/install_gpu_driver.ps1 | 180 ++++++++++++++++++++++++++++++--
 1 file changed, 170 insertions(+), 10 deletions(-)

diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1
index 256c5cf3a9..e7d57a141b 100644
--- a/ci/tools/install_gpu_driver.ps1
+++ b/ci/tools/install_gpu_driver.ps1
@@ -2,14 +2,158 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+# Detect GPU type from JOB_RUNNER environment variable
+function Get-GPUType {
+    param(
+        [string]$JobRunner = $env:JOB_RUNNER
+    )
+    
+    if ([string]::IsNullOrEmpty($JobRunner)) {
+        Write-Output "Warning: JOB_RUNNER environment variable not set. Using default GPU detection."
+        return "unknown"
+    }
+    
+    # Extract GPU type from runner label (e.g., "windows-amd64-gpu-l4-latest-1")
+    if ($JobRunner -match "gpu-([^-]+)") {
+        $gpuType = $matches[1].ToLower()
+        Write-Output "Detected GPU type: $gpuType"
+        return $gpuType
+    }
+    
+    Write-Output "Warning: Could not parse GPU type from JOB_RUNNER: $JobRunner"
+    return "unknown"
+}
+
+# Determine if GPU is a data center GPU
+function Is-DataCenterGPU {
+    param(
+        [string]$GpuType
+    )
+    
+    $dataCenterGPUs = @("l4", "a100", "t4", "h100", "a10", "a30", "a40")
+    return $dataCenterGPUs -contains $GpuType
+}
+
+# Get driver URL and filename based on GPU type
+function Get-DriverInfo {
+    param(
+        [string]$GpuType,
+        [string]$DriverMode = $env:DRIVER_MODE
+    )
+    
+    $isDataCenter = Is-DataCenterGPU -GpuType $GpuType
+    
+    # Default driver version that supports Windows 11 & CUDA 13.0
+    $driverVersion = "580.88"
+    
+    if ($isDataCenter) {
+        # Data center GPU - use Tesla driver
+        $filename = "$driverVersion-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe"
+        $url = "https://us.download.nvidia.com/tesla/$driverVersion/$filename"
+    } else {
+        # Desktop GPU - use GeForce/Quadro driver
+        $filename = "$driverVersion-desktop-win10-win11-64bit-international-dch-whql.exe"
+        $url = "https://us.download.nvidia.com/Windows/$driverVersion/$filename"
+    }
+    
+    return @{
+        Url = $url
+        Filename = $filename
+        IsDataCenter = $isDataCenter
+    }
+}
+
+# Set driver mode using nvidia-smi
+function Set-DriverMode {
+    param(
+        [string]$DriverMode,
+        [bool]$IsDataCenter
+    )
+    
+    if ([string]::IsNullOrEmpty($DriverMode)) {
+        Write-Output "No driver mode specified, skipping mode configuration"
+        return
+    }
+    
+    $DriverMode = $DriverMode.ToUpper()
+    Write-Output "Configuring driver mode: $DriverMode"
+    
+    if (-not $IsDataCenter) {
+        if ($DriverMode -ne "WDDM") {
+            Write-Output "Warning: Desktop GPUs only support WDDM mode. Requested mode '$DriverMode' will be ignored."
+        }
+        # Desktop GPUs are always in WDDM mode, no configuration needed
+        return
+    }
+    
+    # Data center GPUs support TCC and MCDM (not WDDM)
+    if ($DriverMode -eq "WDDM") {
+        Write-Output "Warning: Data center GPUs do not support WDDM mode. Skipping mode configuration."
+        return
+    }
+    
+    try {
+        # Check current mode
+        $currentMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String
+        Write-Output "Current driver mode: $currentMode"
+        
+        if ($DriverMode -eq "TCC") {
+            # Set TCC mode (nvidia-smi -fdm 0 sets TCC mode)
+            Write-Output "Setting TCC mode..."
+            & nvidia-smi -fdm 0
+            
+            # Verify mode was set
+            Write-Output "Resetting display device..."
+            # Reset display devices to apply the change
+            $devcon = "C:\Windows\System32\pnputil.exe"
+            if (Test-Path $devcon) {
+                & $devcon /restart-device "PCI\VEN_10DE*"
+            }
+        } elseif ($DriverMode -eq "MCDM") {
+            # Set MCDM mode (nvidia-smi -fdm 2 sets MCDM mode)
+            Write-Output "Setting MCDM mode..."
+            & nvidia-smi -fdm 2
+            
+            # Verify mode was set
+            Write-Output "Resetting display device..."
+            # Reset display devices to apply the change
+            $devcon = "C:\Windows\System32\pnputil.exe"
+            if (Test-Path $devcon) {
+                & $devcon /restart-device "PCI\VEN_10DE*"
+            }
+        }
+        
+        # Wait for device reset
+        Start-Sleep -Seconds 5
+        
+        # Verify new mode
+        $newMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String
+        Write-Output "New driver mode: $newMode"
+    } catch {
+        Write-Output "Warning: Failed to set driver mode: $_"
+    }
+}
+
 # Install the driver
 function Install-Driver {
-
-    # Set the correct URL, filename, and arguments to the installer
-    # This driver is picked to support Windows 11 & CUDA 13.0
-    $url = 'https://us.download.nvidia.com/tesla/580.88/580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe';
-    $file_dir = 'C:\NVIDIA-Driver\580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe';
-    $install_args = '/s /noeula /noreboot';
+    param(
+        [string]$GpuType = (Get-GPUType),
+        [string]$DriverMode = $env:DRIVER_MODE
+    )
+    
+    Write-Output "Installing GPU driver for GPU type: $GpuType"
+    
+    # Get driver information
+    $driverInfo = Get-DriverInfo -GpuType $GpuType -DriverMode $DriverMode
+    $url = $driverInfo.Url
+    $filename = $driverInfo.Filename
+    $isDataCenter = $driverInfo.IsDataCenter
+    
+    Write-Output "Driver URL: $url"
+    Write-Output "Is Data Center GPU: $isDataCenter"
+    
+    $file_dir = "C:\NVIDIA-Driver\$filename"
+    $install_args = '/s /noeula /noreboot'
 
     # Create the folder for the driver download
     if (!(Test-Path -Path 'C:\NVIDIA-Driver')) {
@@ -21,14 +165,30 @@ function Install-Driver {
     $ProgressPreference_tmp = $ProgressPreference
     $ProgressPreference = 'SilentlyContinue'
     Write-Output 'Downloading the driver installer...'
-    Invoke-WebRequest $url -OutFile $file_dir
-    $ProgressPreference = $ProgressPreference_tmp
-    Write-Output 'Download complete!'
+    try {
+        Invoke-WebRequest $url -OutFile $file_dir
+        $ProgressPreference = $ProgressPreference_tmp
+        Write-Output 'Download complete!'
+    } catch {
+        $ProgressPreference = $ProgressPreference_tmp
+        Write-Output "Error downloading driver: $_"
+        Write-Output "Falling back to default driver..."
+        # Fall back to the original hardcoded driver if download fails
+        $url = 'https://us.download.nvidia.com/tesla/580.88/580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'
+        $file_dir = 'C:\NVIDIA-Driver\580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'
+        $ProgressPreference = 'SilentlyContinue'
+        Invoke-WebRequest $url -OutFile $file_dir
+        $ProgressPreference = $ProgressPreference_tmp
+        Write-Output 'Fallback download complete!'
+    }
 
     # Install the file with the specified path from earlier
     Write-Output 'Running the driver installer...'
     Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait
-    Write-Output 'Done!'
+    Write-Output 'Driver installation complete!'
+    
+    # Set driver mode if specified
+    Set-DriverMode -DriverMode $DriverMode -IsDataCenter $isDataCenter
 }
 
 # Run the functions

From de5b109618c27f60a8576428d64a6fb909fce868 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:03:05 +0000
Subject: [PATCH 04/24] Make nightly sections empty in ci/test-matrix.json

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 ci/test-matrix.json | 50 ++-------------------------------------------
 1 file changed, 2 insertions(+), 48 deletions(-)

diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index d1110cfcde..02352d0e3c 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -25,48 +25,7 @@
       { "ARCH": "arm64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }
     ],
-    "nightly": [
-      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
-      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
-      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
-      { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
-      { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
-      { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
-      { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }
-    ],
+    "nightly": [],
     "special_runners": {
       "amd64": [
         { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "H100", "DRIVER": "latest" }
@@ -84,11 +43,6 @@
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }
     ],
-    "nightly": [
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }
-    ]
+    "nightly": []
   }
 }

From 585e184e2c6b3707852ad9205101aeb3f42b5be4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:04:45 +0000
Subject: [PATCH 05/24] Expand Windows test matrix with driver mode support

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 ci/test-matrix.json | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index 02352d0e3c..1e9453f760 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -1,6 +1,6 @@
 {
   "_description": "Test matrix configurations for CUDA Python CI workflows. This file consolidates the test matrices that were previously hardcoded in the workflow files. All GPU and ARCH values are hard-coded for each architecture: l4 GPU for amd64, a100 GPU for arm64.",
-  "_sorted_by": "Please keep matrices sorted in ascending order by [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]",
+  "_sorted_by": "Please keep matrices sorted in ascending order by [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]. Windows entries also include DRIVER_MODE.",
   "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1",
   "linux": {
     "pull-request": [
@@ -34,14 +34,18 @@
   },
   "windows": {
     "pull-request": [
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }
+      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
+      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
+      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
+      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }
     ],
     "nightly": []
   }

From de4201177fb6e9496f7b1ddab8c60865e93ec27f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:05:53 +0000
Subject: [PATCH 06/24] Wire driver mode from test-matrix.json into Windows
 workflow

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 .github/workflows/test-wheel-windows.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index c27d9a7206..2068d91a29 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -84,6 +84,9 @@ jobs:
         continue-on-error: true
 
       - name: Update driver
+        env:
+          DRIVER_MODE: ${{ matrix.DRIVER_MODE }}
+          JOB_RUNNER: windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1
         run: |
           ci/tools/install_gpu_driver.ps1
 

From 35fa1596c67edd67566dbdfd9354a264e41b0d6a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:15:08 +0000
Subject: [PATCH 07/24] Update install_gpu_driver.ps1 to match CCCL
 implementation with driver mode support

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 ci/tools/install_gpu_driver.ps1 | 232 +++++++++-----------------------
 1 file changed, 65 insertions(+), 167 deletions(-)

diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1
index e7d57a141b..58f825fd39 100644
--- a/ci/tools/install_gpu_driver.ps1
+++ b/ci/tools/install_gpu_driver.ps1
@@ -2,158 +2,44 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# Detect GPU type from JOB_RUNNER environment variable
-function Get-GPUType {
-    param(
-        [string]$JobRunner = $env:JOB_RUNNER
-    )
-    
-    if ([string]::IsNullOrEmpty($JobRunner)) {
-        Write-Output "Warning: JOB_RUNNER environment variable not set. Using default GPU detection."
-        return "unknown"
-    }
-    
-    # Extract GPU type from runner label (e.g., "windows-amd64-gpu-l4-latest-1")
-    if ($JobRunner -match "gpu-([^-]+)") {
-        $gpuType = $matches[1].ToLower()
-        Write-Output "Detected GPU type: $gpuType"
-        return $gpuType
-    }
-    
-    Write-Output "Warning: Could not parse GPU type from JOB_RUNNER: $JobRunner"
-    return "unknown"
-}
+# Sourced from https://github.com/NVIDIA/cccl
 
-# Determine if GPU is a data center GPU
-function Is-DataCenterGPU {
-    param(
-        [string]$GpuType
-    )
-    
-    $dataCenterGPUs = @("l4", "a100", "t4", "h100", "a10", "a30", "a40")
-    return $dataCenterGPUs -contains $GpuType
-}
+# Install the driver
+function Install-Driver {
+
+    # Set the correct URL, filename, and arguments to the installer
+    # This driver is picked to support Windows 11 & CUDA 13.0
+    $version = '581.15'
 
-# Get driver URL and filename based on GPU type
-function Get-DriverInfo {
-    param(
-        [string]$GpuType,
-        [string]$DriverMode = $env:DRIVER_MODE
-    )
-    
-    $isDataCenter = Is-DataCenterGPU -GpuType $GpuType
-    
-    # Default driver version that supports Windows 11 & CUDA 13.0
-    $driverVersion = "580.88"
-    
-    if ($isDataCenter) {
-        # Data center GPU - use Tesla driver
-        $filename = "$driverVersion-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe"
-        $url = "https://us.download.nvidia.com/tesla/$driverVersion/$filename"
+    # extract gpu type from gha runner label:
+    # Labels are in the form: <os>-<cpu>-gpu-<gpu>-<driver>-<n>
+    $gha_runner_label=$env:JOB_RUNNER
+    $gpu_type = $gha_runner_label.Split('-')[3]
+
+    $data_center_gpus = @('a100', 'h100', 'l4', 't4', 'v100', 'rtxa6000', 'rtx6000ada')
+    $desktop_gpus = @('rtx2080', 'rtx4090', 'rtxpro6000')
+
+    if ($data_center_gpus -contains $gpu_type) {
+        Write-Output "Data center GPU detected: $gpu_type"
+        $filename="$version-data-center-tesla-desktop-winserver-2022-2025-dch-international.exe"
+        $server_path="tesla/$version"
+    } elseif ($desktop_gpus -contains $gpu_type) {
+        Write-Output "Desktop GPU detected: $gpu_type"
+        $filename="$version-desktop-win10-win11-64bit-international-dch-whql.exe"
+        $server_path="Windows/$version"
     } else {
-        # Desktop GPU - use GeForce/Quadro driver
-        $filename = "$driverVersion-desktop-win10-win11-64bit-international-dch-whql.exe"
-        $url = "https://us.download.nvidia.com/Windows/$driverVersion/$filename"
-    }
-    
-    return @{
-        Url = $url
-        Filename = $filename
-        IsDataCenter = $isDataCenter
+        Write-Output "Unknown GPU type: $gpu_type"
+        exit 1
     }
-}
 
-# Set driver mode using nvidia-smi
-function Set-DriverMode {
-    param(
-        [string]$DriverMode,
-        [bool]$IsDataCenter
-    )
-    
-    if ([string]::IsNullOrEmpty($DriverMode)) {
-        Write-Output "No driver mode specified, skipping mode configuration"
-        return
-    }
-    
-    $DriverMode = $DriverMode.ToUpper()
-    Write-Output "Configuring driver mode: $DriverMode"
-    
-    if (-not $IsDataCenter) {
-        if ($DriverMode -ne "WDDM") {
-            Write-Output "Warning: Desktop GPUs only support WDDM mode. Requested mode '$DriverMode' will be ignored."
-        }
-        # Desktop GPUs are always in WDDM mode, no configuration needed
-        return
-    }
-    
-    # Data center GPUs support TCC and MCDM (not WDDM)
-    if ($DriverMode -eq "WDDM") {
-        Write-Output "Warning: Data center GPUs do not support WDDM mode. Skipping mode configuration."
-        return
-    }
-    
-    try {
-        # Check current mode
-        $currentMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String
-        Write-Output "Current driver mode: $currentMode"
-        
-        if ($DriverMode -eq "TCC") {
-            # Set TCC mode (nvidia-smi -fdm 0 sets TCC mode)
-            Write-Output "Setting TCC mode..."
-            & nvidia-smi -fdm 0
-            
-            # Verify mode was set
-            Write-Output "Resetting display device..."
-            # Reset display devices to apply the change
-            $devcon = "C:\Windows\System32\pnputil.exe"
-            if (Test-Path $devcon) {
-                & $devcon /restart-device "PCI\VEN_10DE*"
-            }
-        } elseif ($DriverMode -eq "MCDM") {
-            # Set MCDM mode (nvidia-smi -fdm 2 sets MCDM mode)
-            Write-Output "Setting MCDM mode..."
-            & nvidia-smi -fdm 2
-            
-            # Verify mode was set
-            Write-Output "Resetting display device..."
-            # Reset display devices to apply the change
-            $devcon = "C:\Windows\System32\pnputil.exe"
-            if (Test-Path $devcon) {
-                & $devcon /restart-device "PCI\VEN_10DE*"
-            }
-        }
-        
-        # Wait for device reset
-        Start-Sleep -Seconds 5
-        
-        # Verify new mode
-        $newMode = & nvidia-smi -q | Select-String -Pattern "Driver Mode" | Out-String
-        Write-Output "New driver mode: $newMode"
-    } catch {
-        Write-Output "Warning: Failed to set driver mode: $_"
-    }
-}
+    $url="https://us.download.nvidia.com/$server_path/$filename"
+    $filepath="C:\NVIDIA-Driver\$filename"
 
-# Install the driver
-function Install-Driver {
-    param(
-        [string]$GpuType = (Get-GPUType),
-        [string]$DriverMode = $env:DRIVER_MODE
-    )
-    
-    Write-Output "Installing GPU driver for GPU type: $GpuType"
-    
-    # Get driver information
-    $driverInfo = Get-DriverInfo -GpuType $GpuType -DriverMode $DriverMode
-    $url = $driverInfo.Url
-    $filename = $driverInfo.Filename
-    $isDataCenter = $driverInfo.IsDataCenter
-    
-    Write-Output "Driver URL: $url"
-    Write-Output "Is Data Center GPU: $isDataCenter"
-    
-    $file_dir = "C:\NVIDIA-Driver\$filename"
-    $install_args = '/s /noeula /noreboot'
+    Write-Output "Installing NVIDIA driver version $version for GPU type $gpu_type"
+    Write-Output "Download URL: $url"
+
+    # Silent install arguments
+    $install_args = '/s /noeula /noreboot';
 
     # Create the folder for the driver download
     if (!(Test-Path -Path 'C:\NVIDIA-Driver')) {
@@ -165,30 +51,42 @@ function Install-Driver {
     $ProgressPreference_tmp = $ProgressPreference
     $ProgressPreference = 'SilentlyContinue'
     Write-Output 'Downloading the driver installer...'
-    try {
-        Invoke-WebRequest $url -OutFile $file_dir
-        $ProgressPreference = $ProgressPreference_tmp
-        Write-Output 'Download complete!'
-    } catch {
-        $ProgressPreference = $ProgressPreference_tmp
-        Write-Output "Error downloading driver: $_"
-        Write-Output "Falling back to default driver..."
-        # Fall back to the original hardcoded driver if download fails
-        $url = 'https://us.download.nvidia.com/tesla/580.88/580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'
-        $file_dir = 'C:\NVIDIA-Driver\580.88-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'
-        $ProgressPreference = 'SilentlyContinue'
-        Invoke-WebRequest $url -OutFile $file_dir
-        $ProgressPreference = $ProgressPreference_tmp
-        Write-Output 'Fallback download complete!'
-    }
+    Invoke-WebRequest $url -OutFile $filepath
+    $ProgressPreference = $ProgressPreference_tmp
+    Write-Output 'Download complete!'
 
     # Install the file with the specified path from earlier
     Write-Output 'Running the driver installer...'
-    Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait
-    Write-Output 'Driver installation complete!'
-    
-    # Set driver mode if specified
-    Set-DriverMode -DriverMode $DriverMode -IsDataCenter $isDataCenter
+    Start-Process -FilePath $filepath -ArgumentList $install_args -Wait
+    Write-Output 'Done!'
+
+    # Handle driver mode configuration for data center GPUs
+    $driver_mode = $env:DRIVER_MODE
+    if ($data_center_gpus -contains $gpu_type) {
+        if ($driver_mode -eq "MCDM") {
+            Write-Output "Setting driver mode to MCDM..."
+            nvidia-smi -fdm 2
+            pnputil /disable-device /class Display
+            pnputil /enable-device /class Display
+            # Give it a minute to settle:
+            Start-Sleep -Seconds 5
+        } elseif ($driver_mode -eq "TCC") {
+            Write-Output "Setting driver mode to TCC..."
+            nvidia-smi -fdm 0
+            pnputil /disable-device /class Display
+            pnputil /enable-device /class Display
+            # Give it a minute to settle:
+            Start-Sleep -Seconds 5
+        } else {
+            # Default: TCC -> MCDM on data center GPUs
+            Write-Output "Setting driver mode to MCDM (default)..."
+            nvidia-smi -fdm 2
+            pnputil /disable-device /class Display
+            pnputil /enable-device /class Display
+            # Give it a minute to settle:
+            Start-Sleep -Seconds 5
+        }
+    }
 }
 
 # Run the functions

From da32f6c086f4791771cca3ad6630689086ed6801 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:42:28 +0000
Subject: [PATCH 08/24] Simplify driver mode handling per review feedback

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 ci/tools/install_gpu_driver.ps1 | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1
index 58f825fd39..3929ebe720 100644
--- a/ci/tools/install_gpu_driver.ps1
+++ b/ci/tools/install_gpu_driver.ps1
@@ -2,8 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# Sourced from https://github.com/NVIDIA/cccl
-
 # Install the driver
 function Install-Driver {
 
@@ -60,9 +58,10 @@ function Install-Driver {
     Start-Process -FilePath $filepath -ArgumentList $install_args -Wait
     Write-Output 'Done!'
 
-    # Handle driver mode configuration for data center GPUs
+    # Handle driver mode configuration
     $driver_mode = $env:DRIVER_MODE
     if ($data_center_gpus -contains $gpu_type) {
+        # Data center GPUs: TCC -> MCDM
         if ($driver_mode -eq "MCDM") {
             Write-Output "Setting driver mode to MCDM..."
             nvidia-smi -fdm 2
@@ -70,16 +69,11 @@ function Install-Driver {
             pnputil /enable-device /class Display
             # Give it a minute to settle:
             Start-Sleep -Seconds 5
-        } elseif ($driver_mode -eq "TCC") {
-            Write-Output "Setting driver mode to TCC..."
-            nvidia-smi -fdm 0
-            pnputil /disable-device /class Display
-            pnputil /enable-device /class Display
-            # Give it a minute to settle:
-            Start-Sleep -Seconds 5
-        } else {
-            # Default: TCC -> MCDM on data center GPUs
-            Write-Output "Setting driver mode to MCDM (default)..."
+        }
+    } elseif ($desktop_gpus -contains $gpu_type) {
+        # Desktop GPUs: WDDM -> MCDM
+        if ($driver_mode -eq "MCDM") {
+            Write-Output "Setting driver mode to MCDM..."
             nvidia-smi -fdm 2
             pnputil /disable-device /class Display
             pnputil /enable-device /class Display

From a4a65ad9090c7119c3f93c89aa61afb307cdb839 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 16 Nov 2025 20:50:03 +0000
Subject: [PATCH 09/24] Use GPU_TYPE env var instead of parsing JOB_RUNNER

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 .github/workflows/test-wheel-windows.yml | 2 +-
 ci/tools/install_gpu_driver.ps1          | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 2068d91a29..be229857b9 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -86,7 +86,7 @@ jobs:
       - name: Update driver
         env:
           DRIVER_MODE: ${{ matrix.DRIVER_MODE }}
-          JOB_RUNNER: windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1
+          GPU_TYPE: ${{ matrix.GPU }}
         run: |
           ci/tools/install_gpu_driver.ps1
 
diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1
index 3929ebe720..3a1722cb96 100644
--- a/ci/tools/install_gpu_driver.ps1
+++ b/ci/tools/install_gpu_driver.ps1
@@ -9,10 +9,8 @@ function Install-Driver {
     # This driver is picked to support Windows 11 & CUDA 13.0
     $version = '581.15'
 
-    # extract gpu type from gha runner label:
-    # Labels are in the form: <os>-<cpu>-gpu-<gpu>-<driver>-<n>
-    $gha_runner_label=$env:JOB_RUNNER
-    $gpu_type = $gha_runner_label.Split('-')[3]
+    # Get GPU type from environment variable
+    $gpu_type = $env:GPU_TYPE
 
     $data_center_gpus = @('a100', 'h100', 'l4', 't4', 'v100', 'rtxa6000', 'rtx6000ada')
     $desktop_gpus = @('rtx2080', 'rtx4090', 'rtxpro6000')

From 16b0e3fe2e7322b248750f74f9ef46903adb2c36 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 16 Nov 2025 15:56:57 -0500
Subject: [PATCH 10/24] ensure each GPU kind are tested under two modes

---
 ci/test-matrix.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index 1e9453f760..88b99ea0ee 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -38,12 +38,12 @@
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }
     ],

From f789922ac35c4e41d2855946487501808e2e3d66 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 16 Nov 2025 16:35:27 -0500
Subject: [PATCH 11/24] fix arch coverage

- we do not have access to rtx6000ada
- rtxpro6000 is a datacenter card
- cover WDDM in at least 2 pipelines
---
 ci/test-matrix.json | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index 88b99ea0ee..45fcda5734 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -34,17 +34,17 @@
   },
   "windows": {
     "pull-request": [
-      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx6000ada", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }
     ],
     "nightly": []

From f2ffbb18d56a0c84c1879baafb458d281c79b9ce Mon Sep 17 00:00:00 2001
From: Leo Fang <leo80042@gmail.com>
Date: Sun, 16 Nov 2025 16:55:11 -0500
Subject: [PATCH 12/24] make script more flexible; ensure cover 6 different
 GPUs, each with 2 different modes

rtx2080, rtx4090, rtxpro6000, v100, a100, l4 (t4 nodes are too slow)
---
 ci/test-matrix.json             | 12 +++++------
 ci/tools/install_gpu_driver.ps1 | 37 +++++++++++++++------------------
 2 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index 45fcda5734..d70da90016 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -39,13 +39,13 @@
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
+      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }
+      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
+      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }
     ],
     "nightly": []
   }
diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1
index 3a1722cb96..5602eeb489 100644
--- a/ci/tools/install_gpu_driver.ps1
+++ b/ci/tools/install_gpu_driver.ps1
@@ -57,28 +57,25 @@ function Install-Driver {
     Write-Output 'Done!'
 
     # Handle driver mode configuration
+    # This assumes we have the prior knowledge on which GPU can use which mode.
     $driver_mode = $env:DRIVER_MODE
-    if ($data_center_gpus -contains $gpu_type) {
-        # Data center GPUs: TCC -> MCDM
-        if ($driver_mode -eq "MCDM") {
-            Write-Output "Setting driver mode to MCDM..."
-            nvidia-smi -fdm 2
-            pnputil /disable-device /class Display
-            pnputil /enable-device /class Display
-            # Give it a minute to settle:
-            Start-Sleep -Seconds 5
-        }
-    } elseif ($desktop_gpus -contains $gpu_type) {
-        # Desktop GPUs: WDDM -> MCDM
-        if ($driver_mode -eq "MCDM") {
-            Write-Output "Setting driver mode to MCDM..."
-            nvidia-smi -fdm 2
-            pnputil /disable-device /class Display
-            pnputil /enable-device /class Display
-            # Give it a minute to settle:
-            Start-Sleep -Seconds 5
-        }
+    if ($driver_mode -eq "WDDM") {
+        Write-Output "Setting driver mode to WDDM..."
+        nvidia-smi -fdm 0
+    } elseif ($driver_mode -eq "TCC") {
+        Write-Output "Setting driver mode to TCC..."
+        nvidia-smi -fdm 1
+    } elseif ($driver_mode -eq "MCDM") {
+        Write-Output "Setting driver mode to MCDM..."
+        nvidia-smi -fdm 2
+    } else {
+        Write-Output "Unknown driver mode: $driver_mode"
+        exit 1
     }
+    pnputil /disable-device /class Display
+    pnputil /enable-device /class Display
+    # Give it a minute to settle:
+    Start-Sleep -Seconds 5
 }
 
 # Run the functions

From 0293947492808a93c58e8194b991da54a60a38bd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 17 Nov 2025 00:34:02 +0000
Subject: [PATCH 13/24] Add driver mode verification and change v100 to
 rtxpro6000 for CUDA 13

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 .github/workflows/test-wheel-windows.yml | 12 ++++++++++++
 ci/test-matrix.json                      |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index be229857b9..2e3737af14 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -93,6 +93,18 @@ jobs:
       - name: Ensure GPU is working
         run: nvidia-smi
 
+      - name: Verify driver mode
+        env:
+          EXPECTED_MODE: ${{ matrix.DRIVER_MODE }}
+        run: |
+          $mode_output = nvidia-smi -q | Select-String -Pattern "Driver Model"
+          Write-Output "Driver mode check: $mode_output"
+          if ($mode_output -notmatch "$env:EXPECTED_MODE") {
+            Write-Error "Expected driver mode $env:EXPECTED_MODE but got: $mode_output"
+            exit 1
+          }
+          Write-Output "Driver mode verified: $env:EXPECTED_MODE"
+
       - name: Set environment variables
         env:
           BUILD_CUDA_VER: ${{ inputs.build-ctk-ver }}
diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index d70da90016..0b7dc3435c 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -45,7 +45,7 @@
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }
+      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }
     ],
     "nightly": []
   }

From 1706a0651aee2e89754f1735991e4c434b5d3082 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 16 Nov 2025 19:51:08 -0500
Subject: [PATCH 14/24] fix

---
 .github/workflows/test-wheel-windows.yml | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 2e3737af14..12d67d7aa0 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -94,16 +94,14 @@ jobs:
         run: nvidia-smi
 
       - name: Verify driver mode
-        env:
-          EXPECTED_MODE: ${{ matrix.DRIVER_MODE }}
         run: |
-          $mode_output = nvidia-smi -q | Select-String -Pattern "Driver Model"
+          $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}"
           Write-Output "Driver mode check: $mode_output"
-          if ($mode_output -notmatch "$env:EXPECTED_MODE") {
-            Write-Error "Expected driver mode $env:EXPECTED_MODE but got: $mode_output"
+          if ($mode_output -notmatch "${{ matrix.DRIVER_MODE }}") {
+            Write-Error "Expected driver mode ${{ matrix.DRIVER_MODE }} but got: $mode_output"
             exit 1
           }
-          Write-Output "Driver mode verified: $env:EXPECTED_MODE"
+          Write-Output "Driver mode verified: ${{ matrix.DRIVER_MODE }}"
 
       - name: Set environment variables
         env:

From 2393b68aae4018b125bd3acebd54f68eb380b77d Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 16 Nov 2025 19:52:04 -0500
Subject: [PATCH 15/24] merge

Removed redundant 'Ensure GPU is working' step and kept the driver mode verification.
---
 .github/workflows/test-wheel-windows.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 12d67d7aa0..e20124adc5 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -91,10 +91,9 @@ jobs:
           ci/tools/install_gpu_driver.ps1
 
       - name: Ensure GPU is working
-        run: nvidia-smi
-
-      - name: Verify driver mode
         run: |
+          nvidia-smi
+
           $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}"
           Write-Output "Driver mode check: $mode_output"
           if ($mode_output -notmatch "${{ matrix.DRIVER_MODE }}") {

From e363f0ef6b4eb0dc90188788da677d657fd107bd Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 16 Nov 2025 20:39:09 -0500
Subject: [PATCH 16/24] ensure using CTK 12.x with V100 + driver mode check can
 fail

---
 .github/workflows/test-wheel-windows.yml | 4 ++--
 ci/test-matrix.json                      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index e20124adc5..f695ffc3d6 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -96,8 +96,8 @@ jobs:
 
           $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}"
           Write-Output "Driver mode check: $mode_output"
-          if ($mode_output -notmatch "${{ matrix.DRIVER_MODE }}") {
-            Write-Error "Expected driver mode ${{ matrix.DRIVER_MODE }} but got: $mode_output"
+          if ($mode_output -eq "") {
+            Write-Error "Switching to driver mode ${{ matrix.DRIVER_MODE }} failed!"
             exit 1
           }
           Write-Output "Driver mode verified: ${{ matrix.DRIVER_MODE }}"
diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index 0b7dc3435c..b950abaed9 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -36,16 +36,16 @@
     "pull-request": [
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
+      { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
-      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
+      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }
+      { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }
     ],
     "nightly": []
   }

From 3370245c04dda737f35ea4916178576e562a631f Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 16 Nov 2025 21:27:00 -0500
Subject: [PATCH 17/24] fix syntax

---
 .github/workflows/test-wheel-windows.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index f695ffc3d6..23384e1af6 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -96,7 +96,7 @@ jobs:
 
           $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}"
           Write-Output "Driver mode check: $mode_output"
-          if ($mode_output -eq "") {
+          if ("$mode_output" -eq "") {
             Write-Error "Switching to driver mode ${{ matrix.DRIVER_MODE }} failed!"
             exit 1
           }

From c7abbdf1b401da6ff652ae67a8ab01c2b19916cd Mon Sep 17 00:00:00 2001
From: Leo Fang <leo80042@gmail.com>
Date: Sun, 16 Nov 2025 22:14:49 -0500
Subject: [PATCH 18/24] avoid testing Quadro + WDDM; make driver mode show up
 in pipeline names

---
 .github/workflows/test-wheel-linux.yml   | 2 +-
 .github/workflows/test-wheel-windows.yml | 2 +-
 ci/test-matrix.json                      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml
index f763d65b9d..5d2eeb5282 100644
--- a/.github/workflows/test-wheel-linux.yml
+++ b/.github/workflows/test-wheel-linux.yml
@@ -76,7 +76,7 @@ jobs:
           echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}"
 
   test:
-    name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, GPU ${{ matrix.GPU }}
+    name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }}
     needs: compute-matrix
     strategy:
       fail-fast: false
diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 23384e1af6..87b1653bf9 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -65,7 +65,7 @@ jobs:
           echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}"
 
   test:
-    name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, GPU ${{ matrix.GPU }}
+    name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }} (${{ matrix.DRIVER_MODE }})
     # The build stage could fail but we want the CI to keep moving.
     needs: compute-matrix
     strategy:
diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index b950abaed9..f6f3542316 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -41,7 +41,7 @@
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
+      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },

From 7dc0f916a0d406bbce813e292ee5c9630b5a2ab8 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 16 Nov 2025 22:52:07 -0500
Subject: [PATCH 19/24] add missing `test-cu12-ft` dep group

---
 cuda_core/pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
index 31ceb5b1a5..45a90bb8c4 100644
--- a/cuda_core/pyproject.toml
+++ b/cuda_core/pyproject.toml
@@ -56,6 +56,7 @@ test-cu12 = ["cuda-core[test]", "cupy-cuda12x; python_version < '3.14'", "cuda-t
 test-cu13 = ["cuda-core[test]", "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"]  # runtime headers needed by CuPy
 # free threaded build, cupy doesn't support free-threaded builds yet, so avoid installing it for now
 # TODO: cupy should support free threaded builds
+test-cu12-ft = ["cuda-core[test]", "cuda-toolkit[cudart]==12.*"]
 test-cu13-ft = ["cuda-core[test]", "cuda-toolkit[cudart]==13.*"]
 
 [project.urls]

From 85c0059af61dc5785bf9ed15ec15a46215ab53fe Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 17 Nov 2025 22:16:53 -0800
Subject: [PATCH 20/24] fix VMM on Windows

---
 .../_memory/_virtual_memory_resource.py       |  4 +-
 cuda_core/tests/test_memory.py                | 48 ++++++++++++++++---
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
index c17c30bc97..1ebc9dfd48 100644
--- a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
+++ b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
@@ -70,6 +70,7 @@ class VirtualMemoryResourceOptions:
     peers: Iterable[int] = field(default_factory=tuple)
     self_access: VirtualMemoryAccessTypeT = "rw"
     peer_access: VirtualMemoryAccessTypeT = "rw"
+    win32_handle_metadata: int | None = 0
 
     _a = driver.CUmemAccess_flags
     _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0}
@@ -212,6 +213,7 @@ def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryRes
         prop.location.id = self.device.device_id
         prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0
         prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type)
+        prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0
 
         # Query granularity
         gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity)
@@ -495,11 +497,11 @@ def allocate(self, size: int, stream: Stream = None) -> Buffer:
         # ---- Build allocation properties ----
         prop = driver.CUmemAllocationProp()
         prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type)
-
         prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type)
         prop.location.id = self.device.device_id if config.location_type == "device" else -1
         prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0
         prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type)
+        prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0
 
         # ---- Query and apply granularity ----
         # Choose min vs recommended granularity per config
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index d960e6ee12..9c88f1d0e3 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1,6 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+import ctypes
+from ctypes import wintypes
 import sys
 
 try:
@@ -28,6 +30,8 @@
 from cuda.core.experimental._memory import IPCBufferDescriptor
 from cuda.core.experimental._utils.cuda_utils import handle_return
 from cuda.core.experimental.utils import StridedMemoryView
+
+from helpers import IS_WINDOWS
 from helpers.buffers import DummyUnifiedMemoryResource
 
 from cuda_python_test_helpers import supports_ipc_mempool
@@ -312,8 +316,31 @@ def test_device_memory_resource_initialization(mempool_device, use_device_object
     buffer.close()
 
 
+def get_handle_type():
+    def get_sa():
+        class SECURITY_ATTRIBUTES(ctypes.Structure):
+            _fields_ = [
+                ("nLength", wintypes.DWORD),
+                ("lpSecurityDescriptor", wintypes.LPVOID),
+                ("bInheritHandle", wintypes.BOOL)
+            ]
+
+        sa = SECURITY_ATTRIBUTES()
+        sa.nLength = ctypes.sizeof(sa)
+        sa.lpSecurityDescriptor = None
+        sa.bInheritHandle = False  # TODO: why?
+
+        return sa
+
+    if IS_WINDOWS:
+        return (("win32", get_sa()), ("win32_kmt", None))
+    else:
+        return (("posix", None),)
+
+
 @pytest.mark.parametrize("use_device_object", [True, False])
-def test_vmm_allocator_basic_allocation(use_device_object):
+@pytest.mark.parametrize("handle_type", get_handle_type())
+def test_vmm_allocator_basic_allocation(use_device_object, handle_type):
     """Test basic VMM allocation functionality.
 
     This test verifies that VirtualMemoryResource can allocate memory
@@ -326,7 +353,11 @@ def test_vmm_allocator_basic_allocation(use_device_object):
     if not device.properties.virtual_memory_management_supported:
         pytest.skip("Virtual memory management is not supported on this device")
 
-    options = VirtualMemoryResourceOptions()
+    handle_type, security_attribute = handle_type  # unpack
+    win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
+    options = VirtualMemoryResourceOptions(
+        handle_type=handle_type, win32_handle_metadata=win32_handle_metadata,
+    )
     # Create VMM allocator with default config
     device_arg = device if use_device_object else device.device_id
     vmm_mr = VirtualMemoryResource(device_arg, config=options)
@@ -376,7 +407,7 @@ def test_vmm_allocator_policy_configuration():
         location_type="device",
         granularity="minimum",
         gpu_direct_rdma=True,
-        handle_type="posix_fd" if platform.system() != "Windows" else "win32",
+        handle_type="posix_fd" if IS_WINDOWS else "win32_kmt",
         peers=(),
         self_access="rw",
         peer_access="rw",
@@ -400,7 +431,7 @@ def test_vmm_allocator_policy_configuration():
         location_type="device",
         granularity="recommended",
         gpu_direct_rdma=False,
-        handle_type="posix_fd",
+        handle_type="posix_fd" if IS_WINDOWS else "win32_kmt",
         peers=(),
         self_access="r",  # Read-only access
         peer_access="r",
@@ -416,7 +447,8 @@ def test_vmm_allocator_policy_configuration():
     modified_buffer.close()
 
 
-def test_vmm_allocator_grow_allocation():
+@pytest.mark.parametrize("handle_type", get_handle_type())
+def test_vmm_allocator_grow_allocation(handle_type):
     """Test VMM allocator's ability to grow existing allocations.
 
     This test verifies that VirtualMemoryResource can grow existing
@@ -429,7 +461,11 @@ def test_vmm_allocator_grow_allocation():
     if not device.properties.virtual_memory_management_supported:
         pytest.skip("Virtual memory management is not supported on this device")
 
-    options = VirtualMemoryResourceOptions()
+    handle_type, security_attribute = handle_type  # unpack
+    win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
+    options = VirtualMemoryResourceOptions(
+        handle_type=handle_type, win32_handle_metadata=win32_handle_metadata,
+    )
 
     vmm_mr = VirtualMemoryResource(device, config=options)
 

From ca1aa7474c7b6cbf63befb52fdbf1d04dd7c2f47 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 18 Nov 2025 06:23:30 +0000
Subject: [PATCH 21/24] [pre-commit.ci] auto code formatting

---
 cuda_core/tests/test_memory.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 9c88f1d0e3..60905bee40 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -2,8 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import ctypes
-from ctypes import wintypes
 import sys
+from ctypes import wintypes
 
 try:
     from cuda.bindings import driver
@@ -13,7 +13,6 @@
     import numpy as np
 except ImportError:
     np = None
-import ctypes
 import platform
 
 import pytest
@@ -30,7 +29,6 @@
 from cuda.core.experimental._memory import IPCBufferDescriptor
 from cuda.core.experimental._utils.cuda_utils import handle_return
 from cuda.core.experimental.utils import StridedMemoryView
-
 from helpers import IS_WINDOWS
 from helpers.buffers import DummyUnifiedMemoryResource
 
@@ -322,7 +320,7 @@ class SECURITY_ATTRIBUTES(ctypes.Structure):
             _fields_ = [
                 ("nLength", wintypes.DWORD),
                 ("lpSecurityDescriptor", wintypes.LPVOID),
-                ("bInheritHandle", wintypes.BOOL)
+                ("bInheritHandle", wintypes.BOOL),
             ]
 
         sa = SECURITY_ATTRIBUTES()
@@ -356,7 +354,8 @@ def test_vmm_allocator_basic_allocation(use_device_object, handle_type):
     handle_type, security_attribute = handle_type  # unpack
     win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
     options = VirtualMemoryResourceOptions(
-        handle_type=handle_type, win32_handle_metadata=win32_handle_metadata,
+        handle_type=handle_type,
+        win32_handle_metadata=win32_handle_metadata,
     )
     # Create VMM allocator with default config
     device_arg = device if use_device_object else device.device_id
@@ -464,7 +463,8 @@ def test_vmm_allocator_grow_allocation(handle_type):
     handle_type, security_attribute = handle_type  # unpack
     win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
     options = VirtualMemoryResourceOptions(
-        handle_type=handle_type, win32_handle_metadata=win32_handle_metadata,
+        handle_type=handle_type,
+        win32_handle_metadata=win32_handle_metadata,
     )
 
     vmm_mr = VirtualMemoryResource(device, config=options)

From 3781575ccba82814240de55ff00f6476e541d50f Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Tue, 18 Nov 2025 01:26:09 -0500
Subject: [PATCH 22/24] RTX cards cannot run MCDM, switch back to L4 for now

Updated GPU configurations for Python versions 3.13 and 3.14.
---
 ci/test-matrix.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index f6f3542316..6c5f201acb 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -40,10 +40,10 @@
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" },
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
-      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
+      { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }
     ],

From da63359941b18ef6847d89a359e6dca9d2cad006 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Tue, 18 Nov 2025 02:31:10 -0500
Subject: [PATCH 23/24] fix silly typo

---
 cuda_core/tests/test_memory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 60905bee40..e508be03df 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -333,7 +333,7 @@ class SECURITY_ATTRIBUTES(ctypes.Structure):
     if IS_WINDOWS:
         return (("win32", get_sa()), ("win32_kmt", None))
     else:
-        return (("posix", None),)
+        return (("posix_fd", None),)
 
 
 @pytest.mark.parametrize("use_device_object", [True, False])

From 6c8cbcb401e2a9cd50ada02a9ec28c272de8a095 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Tue, 18 Nov 2025 15:02:36 +0000
Subject: [PATCH 24/24] fix stupid negation

---
 cuda_core/tests/test_memory.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index e508be03df..21dee23280 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -406,7 +406,7 @@ def test_vmm_allocator_policy_configuration():
         location_type="device",
         granularity="minimum",
         gpu_direct_rdma=True,
-        handle_type="posix_fd" if IS_WINDOWS else "win32_kmt",
+        handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt",
         peers=(),
         self_access="rw",
         peer_access="rw",
@@ -430,7 +430,7 @@ def test_vmm_allocator_policy_configuration():
         location_type="device",
         granularity="recommended",
         gpu_direct_rdma=False,
-        handle_type="posix_fd" if IS_WINDOWS else "win32_kmt",
+        handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt",
         peers=(),
         self_access="r",  # Read-only access
         peer_access="r",