-
Notifications
You must be signed in to change notification settings - Fork 296
CI: allow specifying custom driver versions in test matrix #2176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b1b6070
3e016b5
c0ca869
4a23b23
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,16 @@ | |
| # Windows entries also include DRIVER_MODE. | ||
| # | ||
| # Notes: | ||
| # - DRIVER accepts: | ||
| # * 'latest' - use the runner's pre-installed latest driver (no install step) | ||
| # * 'earliest' - use the runner's pre-installed earliest driver (no install step) | ||
| # * a version string (e.g. '580.65.06') | ||
| # - install that version via ci/tools/install_gpu_driver.sh (Linux) | ||
| # or ci/tools/install_gpu_driver.ps1 (Windows) at the start of the | ||
| # job. The matrix row is routed to the 'latest' runner image (the | ||
| # install scripts swap the driver themselves). | ||
| # - DRIVER: 'earliest' does not work with CUDA 12.9.1 | ||
| # - DRIVER: a custom version is not supported with FLAVOR=wsl on Linux. | ||
|
|
||
| linux: | ||
| pull-request: | ||
|
|
@@ -29,10 +38,10 @@ linux: | |
| - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: '610.43.02' } | ||
| - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 't4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: '610.43.02' } | ||
| - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
|
|
@@ -74,7 +83,7 @@ linux: | |
| - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } | ||
| # nightly-numba-cuda | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06' } | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } | ||
| # nightly-standard (arm64 l4×2 — nightly-only per runner team request) | ||
|
|
@@ -113,4 +122,4 @@ windows: | |
| - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } | ||
| # nightly-numba-cuda | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } | ||
| - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC' } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # configure_driver_mode.ps1 -- set the NVIDIA driver mode on a Windows CI | ||
| # runner and cycle the display devices so the new mode takes effect | ||
| # without rebooting. Always runs (whether or not install_gpu_driver.ps1 | ||
| # just ran). When install_gpu_driver.ps1 has run, this single device | ||
| # cycle also activates the freshly-installed driver. | ||
| # | ||
| # Inputs (env): | ||
| # DRIVER_MODE One of WDDM, TCC, MCDM. | ||
|
|
||
| function Set-DriverMode { | ||
|
|
||
| # Map matrix DRIVER_MODE to nvidia-smi -fdm code. | ||
| # This assumes we have the prior knowledge on which GPU can use which mode. | ||
| $driver_mode = $env:DRIVER_MODE | ||
| if ($driver_mode -eq "WDDM") { | ||
| Write-Output "Setting driver mode to WDDM..." | ||
| nvidia-smi -fdm 0 | ||
| } elseif ($driver_mode -eq "TCC") { | ||
| Write-Output "Setting driver mode to TCC..." | ||
| nvidia-smi -fdm 1 | ||
| } elseif ($driver_mode -eq "MCDM") { | ||
| Write-Output "Setting driver mode to MCDM..." | ||
| nvidia-smi -fdm 2 | ||
| } else { | ||
| Write-Output "Unknown driver mode: $driver_mode" | ||
| exit 1 | ||
| } | ||
|
|
||
| # Only restart NVIDIA display adapters, not other display devices (e.g. QEMU VGA) | ||
| $nvidia_devices = Get-PnpDevice -Class Display -FriendlyName "NVIDIA*" | ||
| foreach ($device in $nvidia_devices) { | ||
| Write-Output "Restarting device: $($device.FriendlyName) ($($device.InstanceId))" | ||
| pnputil /disable-device "$($device.InstanceId)" | ||
| pnputil /enable-device "$($device.InstanceId)" | ||
| } | ||
|
Comment on lines
+34
to
+39
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems this function now cycles display devices on every Windows job, even when no driver was installed and |
||
| # Give it a minute to settle: | ||
| Start-Sleep -Seconds 5 | ||
| } | ||
|
|
||
| # Run the functions | ||
| Set-DriverMode | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
xref https://github.com/NVIDIA/cuda-python/actions/runs/27081260952/job/79927472925