From 4aadce2b6b0933557c6831eb37dd3cf9419bf47c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 02:14:28 +0000 Subject: [PATCH 1/9] Add nightly CI pipeline for optional-dependency testing (PyTorch, numba-cuda) Add ci-nightly.yml that downloads wheels from the latest successful CI run on main and tests them against PyTorch and numba-cuda, without rebuilding. Key changes: - ci-nightly.yml: new orchestrator (schedule 2 AM UTC + workflow_dispatch) - test-wheel-linux/windows.yml: add run-id input for cross-run artifact downloads, and test-mode input (standard/nightly-pytorch/nightly-numba-cuda) with conditional test steps - ci/test-matrix.yml: add nightly entries with MODE field (4 pytorch + 6 numba-cuda across linux-64, linux-aarch64, win-64) - ci/tools/run-tests: add nightly-install mode that installs all wheels without running standard tests Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci-nightly.yml | 177 +++++++++++++++++++++++ .github/workflows/test-wheel-linux.yml | 73 +++++++++- .github/workflows/test-wheel-windows.yml | 76 +++++++++- ci/test-matrix.yml | 22 ++- ci/tools/run-tests | 35 ++++- 5 files changed, 376 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/ci-nightly.yml diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml new file mode 100644 index 00000000000..dda13abb5e2 --- /dev/null +++ b/.github/workflows/ci-nightly.yml @@ -0,0 +1,177 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# Nightly CI pipeline that tests optional dependencies (PyTorch, numba-cuda) +# against the latest cuda-python wheels built on main. +# +# This workflow does NOT build wheels — it downloads them from the latest +# successful CI run on main and runs integration tests with optional deps. + +name: "CI: Nightly optional-deps" + +on: + schedule: + # 2 AM UTC daily, after the midnight main CI build finishes + - cron: "0 2 * * *" + workflow_dispatch: + inputs: + run-id: + description: > + Override the CI run ID to download artifacts from. + Leave empty to auto-detect the latest successful main run. + type: string + default: '' + +jobs: + find-wheels: + runs-on: ubuntu-latest + outputs: + RUN_ID: ${{ steps.find.outputs.run_id }} + CUDA_BUILD_VER: ${{ steps.get-vars.outputs.cuda_build_ver }} + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Get CUDA build versions + id: get-vars + run: | + cuda_build_ver=$(yq '.cuda.build.version' ci/versions.yml) + echo "cuda_build_ver=$cuda_build_ver" >> $GITHUB_OUTPUT + + - name: Find latest successful CI run on main + id: find + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [[ -n "${{ inputs.run-id }}" ]]; then + echo "run_id=${{ inputs.run-id }}" >> $GITHUB_OUTPUT + echo "Using manually specified run ID: ${{ inputs.run-id }}" + exit 0 + fi + + RUN_ID=$(gh run list \ + -b main \ + -L 1 \ + -w "CI" \ + -s success \ + -R "${{ github.repository }}" \ + --json databaseId \ + | jq -r '.[0].databaseId') + + if [[ -z "$RUN_ID" || "$RUN_ID" == "null" ]]; then + echo "::error::No successful CI run found on main" + exit 1 + fi + + echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT + echo "Using latest successful CI run: $RUN_ID" + + # ── PyTorch interop tests ── + + test-pytorch-linux: + name: "Nightly PyTorch (linux-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: nightly + host-platform: linux-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + test-mode: nightly-pytorch + matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + + test-pytorch-windows: + name: "Nightly PyTorch (win-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-windows.yml + with: + build-type: nightly + host-platform: win-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + test-mode: nightly-pytorch + matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + + # ── numba-cuda tests ── + + test-numba-cuda-linux-64: + name: "Nightly numba-cuda (linux-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: nightly + host-platform: linux-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + test-mode: nightly-numba-cuda + matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + + test-numba-cuda-linux-aarch64: + name: "Nightly numba-cuda (linux-aarch64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: nightly + host-platform: linux-aarch64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + test-mode: nightly-numba-cuda + matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + + test-numba-cuda-windows: + name: "Nightly numba-cuda (win-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-windows.yml + with: + build-type: nightly + host-platform: win-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + test-mode: nightly-numba-cuda + matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + + # ── Status check ── + + checks: + name: Nightly check status + if: always() + runs-on: ubuntu-latest + needs: + - test-pytorch-linux + - test-pytorch-windows + - test-numba-cuda-linux-64 + - test-numba-cuda-linux-aarch64 + - test-numba-cuda-windows + steps: + - name: Exit + run: | + # if any dependencies were cancelled or failed, that's a failure + jq -e 'all(.[]; . == "success" or . == "skipped")' <<< '${{ toJSON(needs.*.result) }}' diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 35c5e6c3734..3e7b5ee97a3 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -29,6 +29,18 @@ on: skip-bindings-test: type: boolean default: false + run-id: + description: > + Workflow run ID to download artifacts from. + Defaults to the current run when empty. + type: string + default: '' + test-mode: + description: > + Test mode: 'standard' (default), 'nightly-pytorch', or + 'nightly-numba-cuda'. + type: string + default: 'standard' defaults: run: @@ -128,6 +140,8 @@ jobs: with: name: cuda-pathfinder-wheel path: ./cuda_pathfinder + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Download cuda-python build artifacts if: ${{ env.USE_BACKPORT_BINDINGS == '0' }} @@ -135,6 +149,8 @@ jobs: with: name: cuda-python-wheel path: . + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Download cuda.bindings build artifacts if: ${{ env.USE_BACKPORT_BINDINGS == '0' }} @@ -142,6 +158,8 @@ jobs: with: name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Download cuda-python & cuda.bindings build artifacts from the prior branch if: ${{ env.USE_BACKPORT_BINDINGS == '1' }} @@ -194,6 +212,8 @@ jobs: with: name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}-tests path: ${{ env.CUDA_BINDINGS_CYTHON_TESTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Display structure of downloaded cuda.bindings Cython tests if: ${{ env.SKIP_CYTHON_TEST == '0' }} @@ -206,6 +226,8 @@ jobs: with: name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Display structure of downloaded cuda.core build artifacts run: | @@ -218,6 +240,8 @@ jobs: with: name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-tests path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Display structure of downloaded cuda.core Cython tests if: ${{ env.SKIP_CYTHON_TEST == '0' }} @@ -256,7 +280,9 @@ jobs: - name: Set up test repetition on nightly runs run: echo "PYTEST_ADDOPTS=\"--count=${{ inputs.nruns }}\"" >> "$GITHUB_ENV" + # ── Standard test steps (skipped for nightly modes) ── - name: Run cuda.pathfinder tests with see_what_works + if: ${{ inputs.test-mode == 'standard' }} env: CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS: see_what_works CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS: see_what_works @@ -264,14 +290,14 @@ jobs: run: run-tests pathfinder - name: Run cuda.bindings tests - if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} + if: ${{ inputs.test-mode == 'standard' && env.SKIP_CUDA_BINDINGS_TEST == '0' }} env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} run: run-tests bindings - name: Run cuda.bindings benchmarks (smoke test) - if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} + if: ${{ inputs.test-mode == 'standard' && env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | pip install pyperf pushd benchmarks/cuda_bindings @@ -279,12 +305,14 @@ jobs: popd - name: Run cuda.core tests + if: ${{ inputs.test-mode == 'standard' }} env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} run: run-tests core - name: Ensure cuda-python installable + if: ${{ inputs.test-mode == 'standard' }} run: | if [[ "${{ matrix.LOCAL_CTK }}" == 1 ]]; then pip install --only-binary=:all: cuda_python*.whl @@ -293,6 +321,7 @@ jobs: fi - name: Install cuda.pathfinder extra wheels for testing + if: ${{ inputs.test-mode == 'standard' }} run: | set -euo pipefail pushd cuda_pathfinder @@ -301,8 +330,48 @@ jobs: popd - name: Run cuda.pathfinder tests with all_must_work + if: ${{ inputs.test-mode == 'standard' }} env: CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS: all_must_work CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS: all_must_work CUDA_PATHFINDER_TEST_FIND_NVIDIA_BITCODE_LIB_STRICTNESS: all_must_work run: run-tests pathfinder + + # ── Nightly: install all cuda-python wheels ── + - name: Install cuda-python wheels for nightly testing + if: ${{ inputs.test-mode != 'standard' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + run: run-tests nightly-install + + # ── Nightly: PyTorch interop tests ── + - name: Install PyTorch + if: ${{ inputs.test-mode == 'nightly-pytorch' }} + run: | + TORCH_VER="${{ matrix.TORCH_VER }}" + TORCH_CUDA="${{ matrix.TORCH_CUDA }}" + if [[ "$TORCH_VER" == "latest" ]]; then + pip install torch --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" + else + pip install "torch==${TORCH_VER}" --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" + fi + python -c "import torch; print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}')" + + - name: Run PyTorch interop tests + if: ${{ inputs.test-mode == 'nightly-pytorch' }} + run: | + pushd cuda_core + pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ + popd + + # ── Nightly: numba-cuda tests ── + - name: Install numba-cuda + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + run: | + pip install numba-cuda + python -c "import numba_cuda; print(f'numba-cuda installed')" + + - name: Run numba-cuda tests + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + run: python -m numba_cuda.numba.cuda.tests diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 765823c6bfc..ad219a2f30d 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -29,6 +29,18 @@ on: skip-bindings-test: type: boolean default: false + run-id: + description: > + Workflow run ID to download artifacts from. + Defaults to the current run when empty. + type: string + default: '' + test-mode: + description: > + Test mode: 'standard' (default), 'nightly-pytorch', or + 'nightly-numba-cuda'. + type: string + default: 'standard' jobs: compute-matrix: @@ -123,6 +135,8 @@ jobs: with: name: cuda-pathfinder-wheel path: ./cuda_pathfinder + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Download cuda-python build artifacts if: ${{ env.USE_BACKPORT_BINDINGS == '0' }} @@ -130,6 +144,8 @@ jobs: with: name: cuda-python-wheel path: . + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Download cuda.bindings build artifacts if: ${{ env.USE_BACKPORT_BINDINGS == '0' }} @@ -137,6 +153,8 @@ jobs: with: name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Download cuda-python & cuda.bindings build artifacts from the prior branch if: ${{ env.USE_BACKPORT_BINDINGS == '1' }} @@ -180,6 +198,8 @@ jobs: with: name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}-tests path: ${{ env.CUDA_BINDINGS_CYTHON_TESTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Display structure of downloaded cuda.bindings Cython tests if: ${{ env.SKIP_CYTHON_TEST == '0' }} @@ -192,6 +212,8 @@ jobs: with: name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Display structure of downloaded cuda.core build artifacts run: | @@ -204,6 +226,8 @@ jobs: with: name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-tests path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Display structure of downloaded cuda.core Cython tests if: ${{ env.SKIP_CYTHON_TEST == '0' }} @@ -237,7 +261,9 @@ jobs: shell: bash --noprofile --norc -xeuo pipefail {0} run: echo "PYTEST_ADDOPTS=\"--count=${{ inputs.nruns }}\"" >> "$GITHUB_ENV" + # ── Standard test steps (skipped for nightly modes) ── - name: Run cuda.pathfinder tests with see_what_works + if: ${{ inputs.test-mode == 'standard' }} env: CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS: see_what_works CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS: see_what_works @@ -246,7 +272,7 @@ jobs: run: run-tests pathfinder - name: Run cuda.bindings tests - if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} + if: ${{ inputs.test-mode == 'standard' && env.SKIP_CUDA_BINDINGS_TEST == '0' }} env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} @@ -254,6 +280,7 @@ jobs: run: run-tests bindings - name: Run cuda.core tests + if: ${{ inputs.test-mode == 'standard' }} env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} @@ -261,6 +288,7 @@ jobs: run: run-tests core - name: Ensure cuda-python installable + if: ${{ inputs.test-mode == 'standard' }} run: | if ('${{ matrix.LOCAL_CTK }}' -eq '1') { pip install --only-binary=:all: (Get-ChildItem -Filter cuda_python*.whl).FullName @@ -269,6 +297,7 @@ jobs: } - name: Install cuda.pathfinder extra wheels for testing + if: ${{ inputs.test-mode == 'standard' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: | pushd cuda_pathfinder @@ -277,9 +306,54 @@ jobs: popd - name: Run cuda.pathfinder tests with all_must_work + if: ${{ inputs.test-mode == 'standard' }} env: CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS: all_must_work CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS: all_must_work CUDA_PATHFINDER_TEST_FIND_NVIDIA_BITCODE_LIB_STRICTNESS: all_must_work shell: bash --noprofile --norc -xeuo pipefail {0} run: run-tests pathfinder + + # ── Nightly: install all cuda-python wheels ── + - name: Install cuda-python wheels for nightly testing + if: ${{ inputs.test-mode != 'standard' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: run-tests nightly-install + + # ── Nightly: PyTorch interop tests ── + - name: Install PyTorch + if: ${{ inputs.test-mode == 'nightly-pytorch' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + TORCH_VER="${{ matrix.TORCH_VER }}" + TORCH_CUDA="${{ matrix.TORCH_CUDA }}" + if [[ "$TORCH_VER" == "latest" ]]; then + pip install torch --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" + else + pip install "torch==${TORCH_VER}" --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" + fi + python -c "import torch; print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}')" + + - name: Run PyTorch interop tests + if: ${{ inputs.test-mode == 'nightly-pytorch' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pushd cuda_core + pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ + popd + + # ── Nightly: numba-cuda tests ── + - name: Install numba-cuda + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pip install numba-cuda + python -c "import numba_cuda; print(f'numba-cuda installed')" + + - name: Run numba-cuda tests + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: python -m numba_cuda.numba.cuda.tests diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index a402e3e4cf7..d8be9b350a5 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -62,7 +62,17 @@ linux: - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 't4', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' } - nightly: [] + nightly: + # nightly-pytorch (amd64 only — PyTorch does not ship arm64 GPU wheels) + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + # nightly-numba-cuda + - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } windows: pull-request: @@ -85,4 +95,12 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - nightly: [] + nightly: + # nightly-pytorch + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + # nightly-numba-cuda + - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } diff --git a/ci/tools/run-tests b/ci/tools/run-tests index d42634a7073..3500bfa0d28 100755 --- a/ci/tools/run-tests +++ b/ci/tools/run-tests @@ -13,8 +13,8 @@ if [[ ${#} -ne 1 ]]; then echo "Error: This script requires exactly 1 argument. You provided ${#}" exit 1 fi -if [[ "${1}" != "bindings" && "${1}" != "core" && "${1}" != "pathfinder" ]]; then - echo "Error: Invalid test module '${1}'. Must be 'bindings', 'core', or 'pathfinder'" +if [[ "${1}" != "bindings" && "${1}" != "core" && "${1}" != "pathfinder" && "${1}" != "nightly-install" ]]; then + echo "Error: Invalid test module '${1}'. Must be 'bindings', 'core', 'pathfinder', or 'nightly-install'" exit 1 fi @@ -91,4 +91,35 @@ elif [[ "${test_module}" == "core" ]]; then ${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/cython fi popd +elif [[ "${test_module}" == "nightly-install" ]]; then + # Install all wheels (pathfinder already installed above) without running tests. + # Used by nightly optional-dependency pipelines (pytorch, numba-cuda). + echo "Installing bindings wheel" + pushd ./cuda_bindings + if [[ "${LOCAL_CTK}" == 1 ]]; then + pip install "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl + else + pip install $(ls "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl)[all] + fi + popd + + TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})" + echo "Installing core wheel" + + FREE_THREADING="" + if python -c 'import sys; assert not sys._is_gil_enabled()' 2> /dev/null; then + FREE_THREADING+="-ft" + fi + + pushd ./cuda_core + CUDA_VER_MINOR="$(cut -d '.' -f 1-2 <<< "${CUDA_VER}")" + WHL_EXTRA=("${CUDA_CORE_ARTIFACTS_DIR}"/*.whl) + if [[ "${LOCAL_CTK}" != 1 ]]; then + WHL_EXTRA=("${WHL_EXTRA[0]}[cu${TEST_CUDA_MAJOR}]") + fi + pip install "${WHL_EXTRA[@]}" --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" "cuda-toolkit==${CUDA_VER_MINOR}.*" + popd + + echo "All cuda-python wheels installed for nightly testing" + pip list | grep -i "cuda\|pathfinder" fi From 6dea4c3d5acb0542ad8222699ad80dcd3cfe08ce Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 02:33:37 +0000 Subject: [PATCH 2/9] Add concurrency group and fix checks job in ci-nightly.yml - Add concurrency group matching ci.yml's pattern - Replace jq one-liner with explicit cancelled/failure checks per ci.yml's battle-tested pattern (see long comment there for rationale) Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci-nightly.yml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index dda13abb5e2..f0e76c8d2d8 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -10,6 +10,10 @@ name: "CI: Nightly optional-deps" +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true + on: schedule: # 2 AM UTC daily, after the midnight main CI build finishes @@ -173,5 +177,21 @@ jobs: steps: - name: Exit run: | - # if any dependencies were cancelled or failed, that's a failure - jq -e 'all(.[]; . == "success" or . == "skipped")' <<< '${{ toJSON(needs.*.result) }}' + # If any dependency was cancelled or failed, that's a failure. + # + # See ci.yml for the full rationale on why we must use always() + # and explicitly check each result rather than relying on the + # default behaviour. + if ${{ needs.test-pytorch-linux.result == 'cancelled' || + needs.test-pytorch-linux.result == 'failure' || + needs.test-pytorch-windows.result == 'cancelled' || + needs.test-pytorch-windows.result == 'failure' || + needs.test-numba-cuda-linux-64.result == 'cancelled' || + needs.test-numba-cuda-linux-64.result == 'failure' || + needs.test-numba-cuda-linux-aarch64.result == 'cancelled' || + needs.test-numba-cuda-linux-aarch64.result == 'failure' || + needs.test-numba-cuda-windows.result == 'cancelled' || + needs.test-numba-cuda-windows.result == 'failure' }}; then + exit 1 + fi + exit 0 From ac5238ca67d353c1df06d012432e8eb095ee9334 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 02:35:11 +0000 Subject: [PATCH 3/9] Temporarily add push trigger to ci-nightly.yml for testing Remove before merging. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci-nightly.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index f0e76c8d2d8..eb40c7979a6 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -15,6 +15,10 @@ concurrency: cancel-in-progress: true on: + push: + branches: + - "main" + - "pull-request/[0-9]+" schedule: # 2 AM UTC daily, after the midnight main CI build finishes - cron: "0 2 * * *" From cb5aefa876b9f1ca94d06345cfd599dd010363bc Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 02:39:29 +0000 Subject: [PATCH 4/9] Use shallow clone (fetch-depth: 1) in ci-nightly.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full history is not needed — we only read ci/versions.yml. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci-nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index eb40c7979a6..d218ace0531 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -41,7 +41,7 @@ jobs: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - fetch-depth: 0 + fetch-depth: 1 - name: Get CUDA build versions id: get-vars From 92865983b14b26c2f8f793f067b2521ac95bd5b7 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 02:42:26 +0000 Subject: [PATCH 5/9] Fix artifact name mismatch in nightly CI by passing source SHA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Artifact names embed the commit SHA from the build that created them. When the nightly workflow downloads artifacts from a different CI run, it must use that run's SHA — not github.sha (the nightly run's own SHA) — to construct the correct artifact names. - ci-nightly.yml: resolve head_sha from the source CI run via `gh run view --json headSha`, pass it to test workflows - test-wheel-linux/windows.yml: add `sha` input (defaults to github.sha for backward compatibility), use it in env-vars Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci-nightly.yml | 45 +++++++++++++++--------- .github/workflows/test-wheel-linux.yml | 8 ++++- .github/workflows/test-wheel-windows.yml | 8 ++++- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index d218ace0531..72778445a91 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -36,6 +36,7 @@ jobs: runs-on: ubuntu-latest outputs: RUN_ID: ${{ steps.find.outputs.run_id }} + HEAD_SHA: ${{ steps.find.outputs.head_sha }} CUDA_BUILD_VER: ${{ steps.get-vars.outputs.cuda_build_ver }} steps: - name: Checkout repository @@ -55,27 +56,34 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | if [[ -n "${{ inputs.run-id }}" ]]; then - echo "run_id=${{ inputs.run-id }}" >> $GITHUB_OUTPUT - echo "Using manually specified run ID: ${{ inputs.run-id }}" - exit 0 + RUN_ID="${{ inputs.run-id }}" + echo "Using manually specified run ID: $RUN_ID" + else + RUN_ID=$(gh run list \ + -b main \ + -L 1 \ + -w "CI" \ + -s success \ + -R "${{ github.repository }}" \ + --json databaseId \ + | jq -r '.[0].databaseId') + + if [[ -z "$RUN_ID" || "$RUN_ID" == "null" ]]; then + echo "::error::No successful CI run found on main" + exit 1 + fi + echo "Using latest successful CI run: $RUN_ID" fi - RUN_ID=$(gh run list \ - -b main \ - -L 1 \ - -w "CI" \ - -s success \ + # Resolve the head SHA from the CI run — artifact names embed this. + HEAD_SHA=$(gh run view "$RUN_ID" \ -R "${{ github.repository }}" \ - --json databaseId \ - | jq -r '.[0].databaseId') - - if [[ -z "$RUN_ID" || "$RUN_ID" == "null" ]]; then - echo "::error::No successful CI run found on main" - exit 1 - fi + --json headSha \ + | jq -r '.headSha') echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT - echo "Using latest successful CI run: $RUN_ID" + echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT + echo "Source commit: $HEAD_SHA" # ── PyTorch interop tests ── @@ -93,6 +101,7 @@ jobs: host-platform: linux-64 build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' @@ -110,6 +119,7 @@ jobs: host-platform: win-64 build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' @@ -129,6 +139,7 @@ jobs: host-platform: linux-64 build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' @@ -146,6 +157,7 @@ jobs: host-platform: linux-aarch64 build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' @@ -163,6 +175,7 @@ jobs: host-platform: win-64 build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 3e7b5ee97a3..892bcb19056 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -41,6 +41,12 @@ on: 'nightly-numba-cuda'. type: string default: 'standard' + sha: + description: > + Commit SHA used to construct artifact names. + Defaults to github.sha (current run) when empty. + type: string + default: '' defaults: run: @@ -131,7 +137,7 @@ jobs: HOST_PLATFORM: ${{ inputs.host-platform }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} PY_VER: ${{ matrix.PY_VER }} - SHA: ${{ github.sha }} + SHA: ${{ inputs.sha || github.sha }} SKIP_BINDINGS_TEST_OVERRIDE: ${{ inputs.skip-bindings-test && '1' || '0' }} run: ./ci/tools/env-vars test diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index ad219a2f30d..300ca157bf8 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -41,6 +41,12 @@ on: 'nightly-numba-cuda'. type: string default: 'standard' + sha: + description: > + Commit SHA used to construct artifact names. + Defaults to github.sha (current run) when empty. + type: string + default: '' jobs: compute-matrix: @@ -125,7 +131,7 @@ jobs: HOST_PLATFORM: ${{ inputs.host-platform }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} PY_VER: ${{ matrix.PY_VER }} - SHA: ${{ github.sha }} + SHA: ${{ inputs.sha || github.sha }} SKIP_BINDINGS_TEST_OVERRIDE: ${{ inputs.skip-bindings-test && '1' || '0' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: ./ci/tools/env-vars test From 0b7cc50c5cea3cbac79059365f31578929f45087 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 03:14:36 +0000 Subject: [PATCH 6/9] Fix nightly CI: single pip call, display step, numba cmd, Windows VC++ - Install ALL wheels (pathfinder + bindings + core) and optional dep (torch/numba-cuda) in a single pip call so pip resolves everything together and avoids costly reinstall cycles from version conflicts - Fix "Display structure" step: show only artifact files (cuda_python*.whl, cuda_pathfinder/) instead of ls -lahR . which lists the entire repo - Fix numba-cuda test command: python -m numba.runtests numba.cuda.tests - Install Visual C++ Redistributable on Windows before PyTorch (https://github.com/pytorch/pytorch/issues/166628) - run-tests now does pip list at the end of nightly installs Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/test-wheel-linux.yml | 40 ++++------- .github/workflows/test-wheel-windows.yml | 49 +++++++------ ci/tools/run-tests | 88 +++++++++++++++++------- 3 files changed, 100 insertions(+), 77 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 892bcb19056..e1a36bc086b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -205,7 +205,7 @@ jobs: - name: Display structure of downloaded cuda-python artifacts run: | pwd - ls -lahR . + ls -lah cuda_python*.whl cuda_pathfinder/ - name: Display structure of downloaded cuda.bindings artifacts run: | @@ -343,27 +343,24 @@ jobs: CUDA_PATHFINDER_TEST_FIND_NVIDIA_BITCODE_LIB_STRICTNESS: all_must_work run: run-tests pathfinder - # ── Nightly: install all cuda-python wheels ── - - name: Install cuda-python wheels for nightly testing - if: ${{ inputs.test-mode != 'standard' }} + # ── Nightly: install wheels + optional dep together ── + - name: Install cuda-python wheels + PyTorch + if: ${{ inputs.test-mode == 'nightly-pytorch' }} env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} - run: run-tests nightly-install + TORCH_VER: ${{ matrix.TORCH_VER }} + TORCH_CUDA: ${{ matrix.TORCH_CUDA }} + run: run-tests nightly-pytorch - # ── Nightly: PyTorch interop tests ── - - name: Install PyTorch - if: ${{ inputs.test-mode == 'nightly-pytorch' }} - run: | - TORCH_VER="${{ matrix.TORCH_VER }}" - TORCH_CUDA="${{ matrix.TORCH_CUDA }}" - if [[ "$TORCH_VER" == "latest" ]]; then - pip install torch --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" - else - pip install "torch==${TORCH_VER}" --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" - fi - python -c "import torch; print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}')" + - name: Install cuda-python wheels + numba-cuda + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + run: run-tests nightly-numba-cuda + # ── Nightly: run tests ── - name: Run PyTorch interop tests if: ${{ inputs.test-mode == 'nightly-pytorch' }} run: | @@ -371,13 +368,6 @@ jobs: pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ popd - # ── Nightly: numba-cuda tests ── - - name: Install numba-cuda - if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} - run: | - pip install numba-cuda - python -c "import numba_cuda; print(f'numba-cuda installed')" - - name: Run numba-cuda tests if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} - run: python -m numba_cuda.numba.cuda.tests + run: python -m numba.runtests numba.cuda.tests diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 300ca157bf8..eefc9273594 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -191,7 +191,8 @@ jobs: - name: Display structure of downloaded cuda-python artifacts run: | Get-Location - Get-ChildItem -Recurse -Force | Select-Object Mode, LastWriteTime, Length, FullName + Get-ChildItem cuda_python*.whl | Select-Object Mode, LastWriteTime, Length, FullName + Get-ChildItem cuda_pathfinder/ | Select-Object Mode, LastWriteTime, Length, FullName - name: Display structure of downloaded cuda.bindings artifacts run: | @@ -320,29 +321,33 @@ jobs: shell: bash --noprofile --norc -xeuo pipefail {0} run: run-tests pathfinder - # ── Nightly: install all cuda-python wheels ── - - name: Install cuda-python wheels for nightly testing - if: ${{ inputs.test-mode != 'standard' }} + # ── Nightly: install wheels + optional dep together ── + - name: Install Visual C++ Redistributable (required by PyTorch on Windows) + if: ${{ inputs.test-mode == 'nightly-pytorch' }} + run: | + Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vc_redist.x64.exe" -OutFile "vc_redist.x64.exe" + Start-Process -FilePath ".\vc_redist.x64.exe" -ArgumentList "/install", "/quiet", "/norestart" -Wait + Remove-Item "vc_redist.x64.exe" + + - name: Install cuda-python wheels + PyTorch + if: ${{ inputs.test-mode == 'nightly-pytorch' }} env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + TORCH_VER: ${{ matrix.TORCH_VER }} + TORCH_CUDA: ${{ matrix.TORCH_CUDA }} shell: bash --noprofile --norc -xeuo pipefail {0} - run: run-tests nightly-install + run: run-tests nightly-pytorch - # ── Nightly: PyTorch interop tests ── - - name: Install PyTorch - if: ${{ inputs.test-mode == 'nightly-pytorch' }} + - name: Install cuda-python wheels + numba-cuda + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - TORCH_VER="${{ matrix.TORCH_VER }}" - TORCH_CUDA="${{ matrix.TORCH_CUDA }}" - if [[ "$TORCH_VER" == "latest" ]]; then - pip install torch --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" - else - pip install "torch==${TORCH_VER}" --index-url "https://download.pytorch.org/whl/${TORCH_CUDA}" - fi - python -c "import torch; print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}')" + run: run-tests nightly-numba-cuda + # ── Nightly: run tests ── - name: Run PyTorch interop tests if: ${{ inputs.test-mode == 'nightly-pytorch' }} shell: bash --noprofile --norc -xeuo pipefail {0} @@ -351,15 +356,7 @@ jobs: pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ popd - # ── Nightly: numba-cuda tests ── - - name: Install numba-cuda - if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - pip install numba-cuda - python -c "import numba_cuda; print(f'numba-cuda installed')" - - name: Run numba-cuda tests if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} shell: bash --noprofile --norc -xeuo pipefail {0} - run: python -m numba_cuda.numba.cuda.tests + run: python -m numba.runtests numba.cuda.tests diff --git a/ci/tools/run-tests b/ci/tools/run-tests index 3500bfa0d28..f76c45e325c 100755 --- a/ci/tools/run-tests +++ b/ci/tools/run-tests @@ -13,19 +13,22 @@ if [[ ${#} -ne 1 ]]; then echo "Error: This script requires exactly 1 argument. You provided ${#}" exit 1 fi -if [[ "${1}" != "bindings" && "${1}" != "core" && "${1}" != "pathfinder" && "${1}" != "nightly-install" ]]; then - echo "Error: Invalid test module '${1}'. Must be 'bindings', 'core', 'pathfinder', or 'nightly-install'" +if [[ "${1}" != "bindings" && "${1}" != "core" && "${1}" != "pathfinder" && "${1}" != "nightly-pytorch" && "${1}" != "nightly-numba-cuda" ]]; then + echo "Error: Invalid test module '${1}'. Must be 'bindings', 'core', 'pathfinder', 'nightly-pytorch', or 'nightly-numba-cuda'" exit 1 fi test_module=${1} -# Unconditionally install pathfinder wheel -# (it is a direct dependency of bindings, and a transitive dependency of core) -pushd ./cuda_pathfinder -echo "Installing pathfinder wheel" -pip install ./*.whl --group test -popd +# For standard modes, install pathfinder up front (it is a direct dependency +# of bindings, and a transitive dependency of core). Nightly modes install +# all wheels together in a single pip call further below. +if [[ "${test_module}" != "nightly-pytorch" && "${test_module}" != "nightly-numba-cuda" ]]; then + pushd ./cuda_pathfinder + echo "Installing pathfinder wheel" + pip install ./*.whl --group test + popd +fi if [[ "${test_module}" == "pathfinder" ]]; then pushd ./cuda_pathfinder @@ -91,35 +94,68 @@ elif [[ "${test_module}" == "core" ]]; then ${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/cython fi popd -elif [[ "${test_module}" == "nightly-install" ]]; then - # Install all wheels (pathfinder already installed above) without running tests. - # Used by nightly optional-dependency pipelines (pytorch, numba-cuda). - echo "Installing bindings wheel" - pushd ./cuda_bindings - if [[ "${LOCAL_CTK}" == 1 ]]; then - pip install "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl - else - pip install $(ls "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl)[all] - fi - popd +elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-numba-cuda" ]]; then + # Nightly optional-dependency testing. + # Install ALL wheels (pathfinder + bindings + core) and the optional dep + # in a single pip call so pip resolves version constraints in one shot + # and avoids costly uninstall/reinstall cycles. + # + # We pushd into cuda_core/ so that --group reads test dependency groups + # from cuda_core/pyproject.toml (needed for numpy, cupy, ml_dtypes, etc.). + # All other wheel paths use ../ to reach the repo root. TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})" - echo "Installing core wheel" + CUDA_VER_MINOR="$(cut -d '.' -f 1-2 <<< "${CUDA_VER}")" FREE_THREADING="" if python -c 'import sys; assert not sys._is_gil_enabled()' 2> /dev/null; then FREE_THREADING+="-ft" fi + # Resolve the pathfinder wheel path before pushd (it's relative to repo root). + # CUDA_BINDINGS_ARTIFACTS_DIR and CUDA_CORE_ARTIFACTS_DIR are already absolute + # (set via realpath in env-vars). + PATHFINDER_WHL=($(realpath ./cuda_pathfinder/*.whl)) + pushd ./cuda_core - CUDA_VER_MINOR="$(cut -d '.' -f 1-2 <<< "${CUDA_VER}")" - WHL_EXTRA=("${CUDA_CORE_ARTIFACTS_DIR}"/*.whl) + + # Build wheel specs (paths are absolute, so pushd doesn't affect them) + BINDINGS_WHL=("${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl) if [[ "${LOCAL_CTK}" != 1 ]]; then - WHL_EXTRA=("${WHL_EXTRA[0]}[cu${TEST_CUDA_MAJOR}]") + BINDINGS_WHL=("${BINDINGS_WHL[0]}[all]") fi - pip install "${WHL_EXTRA[@]}" --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" "cuda-toolkit==${CUDA_VER_MINOR}.*" + + CORE_WHL=("${CUDA_CORE_ARTIFACTS_DIR}"/*.whl) + if [[ "${LOCAL_CTK}" != 1 ]]; then + CORE_WHL=("${CORE_WHL[0]}[cu${TEST_CUDA_MAJOR}]") + fi + + # All packages in one pip call: pathfinder + bindings + core + test deps + optional dep + PIP_ARGS=( + "${PATHFINDER_WHL[@]}" + "${BINDINGS_WHL[@]}" + "${CORE_WHL[@]}" + --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" + "cuda-toolkit==${CUDA_VER_MINOR}.*" + ) + + if [[ "${test_module}" == "nightly-pytorch" ]]; then + # TORCH_VER and TORCH_CUDA must be set by the caller. + echo "Installing pathfinder + bindings + core + test deps + PyTorch ${TORCH_VER} (${TORCH_CUDA})" + if [[ "${TORCH_VER}" == "latest" ]]; then + PIP_ARGS+=(torch) + else + PIP_ARGS+=("torch==${TORCH_VER}") + fi + PIP_ARGS+=(--extra-index-url "https://download.pytorch.org/whl/${TORCH_CUDA}") + elif [[ "${test_module}" == "nightly-numba-cuda" ]]; then + echo "Installing pathfinder + bindings + core + test deps + numba-cuda" + PIP_ARGS+=("numba-cuda[cu${TEST_CUDA_MAJOR}]") + fi + + pip install "${PIP_ARGS[@]}" popd - echo "All cuda-python wheels installed for nightly testing" - pip list | grep -i "cuda\|pathfinder" + echo "Nightly install complete — installed packages:" + pip list fi From edeaa76015bfa221636feefad7c3b6a7c706f994 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 04:04:16 +0000 Subject: [PATCH 7/9] Match CUDA_VER to TORCH_CUDA in nightly pytorch matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CUDA_VER in the test environment should match TORCH_CUDA in major.minor. BUILD_CUDA_VER (from build-ctk-ver input) is used for artifact names, so CUDA_VER can differ. - cu126 → CUDA_VER: 12.6.3 (was 12.9.1) - cu130 → CUDA_VER: 13.0.2 (was 13.2.1) For CUDA 12 entries, USE_BACKPORT_BINDINGS kicks in automatically since BUILD_CUDA_MAJOR (13) \!= TEST_CUDA_MAJOR (12), pulling bindings from the backport branch. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/test-wheel-linux.yml | 4 + .github/workflows/test-wheel-windows.yml | 5 ++ ci/test-matrix.yml | 16 ++-- ci/tools/patch-numba-cuda | 105 +++++++++++++++++++++++ ci/tools/run-tests | 30 ++++--- 5 files changed, 139 insertions(+), 21 deletions(-) create mode 100755 ci/tools/patch-numba-cuda diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index e1a36bc086b..a546c8cf47b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -368,6 +368,10 @@ jobs: pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ popd + - name: Patch numba-cuda (upstream bug workarounds) + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + run: python ci/tools/patch-numba-cuda + - name: Run numba-cuda tests if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} run: python -m numba.runtests numba.cuda.tests diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index eefc9273594..0fef1e5b8af 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -356,6 +356,11 @@ jobs: pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ popd + - name: Patch numba-cuda (upstream bug workarounds) + if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: python ci/tools/patch-numba-cuda + - name: Run numba-cuda tests if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} shell: bash --noprofile --norc -xeuo pipefail {0} diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index d8be9b350a5..19931c3943a 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -64,10 +64,10 @@ linux: - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' } nightly: # nightly-pytorch (amd64 only — PyTorch does not ship arm64 GPU wheels) - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } # nightly-numba-cuda - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } @@ -97,10 +97,10 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: # nightly-pytorch - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } # nightly-numba-cuda - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } diff --git a/ci/tools/patch-numba-cuda b/ci/tools/patch-numba-cuda new file mode 100755 index 00000000000..4e0b94c2688 --- /dev/null +++ b/ci/tools/patch-numba-cuda @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Patch known upstream bugs in installed numba-cuda before running tests. + +These patches are temporary workarounds; each should be removed once the +corresponding upstream fix is released. +""" + +import pathlib +import site + + +def patch_test_linker_indent(): + """Fix indentation bug in test_linker.py. + + add_from_numba and debuggable_kernel reference test_device_functions_ltoir + which is only defined inside ``if TEST_BIN_DIR:``. They must be indented + under that block. + + Upstream: https://github.com/NVIDIA/numba-cuda/blob/200c2b96/ + numba_cuda/numba/cuda/tests/cudadrv/test_linker.py#L120 + """ + # Find the installed test_linker.py across all site-packages paths + rel_path = pathlib.Path("numba_cuda", "numba", "cuda", "tests", "cudadrv", "test_linker.py") + target = None + for sp in site.getsitepackages(): + candidate = pathlib.Path(sp) / rel_path + if candidate.exists(): + target = candidate + break + if target is None: + # Fallback: locate via importlib + try: + import numba_cuda + + pkg_dir = pathlib.Path(numba_cuda.__file__).parent + candidate = pkg_dir / "numba" / "cuda" / "tests" / "cudadrv" / "test_linker.py" + if candidate.exists(): + target = candidate + except ImportError: + pass + if target is None: + print("SKIP: test_linker.py not found in any site-packages") + return + print(f"Found: {target}") + + src = target.read_text() + + old = ( + "\nadd_from_numba = cuda.declare_device(\n" + ' "add_from_numba",\n' + ' "int32(int32, int32)",\n' + " link=[test_device_functions_ltoir],\n" + ")\n" + "\n" + "\n" + "def debuggable_kernel(result):\n" + " i = cuda.grid(1)\n" + " result[i] = add_from_numba(i, i)" + ) + + new = ( + "\n add_from_numba = cuda.declare_device(\n" + ' "add_from_numba",\n' + ' "int32(int32, int32)",\n' + " link=[test_device_functions_ltoir],\n" + " )\n" + "\n" + " def debuggable_kernel(result):\n" + " i = cuda.grid(1)\n" + " result[i] = add_from_numba(i, i)" + ) + + if old not in src: + print(f"SKIP: indent patch target not found in {target} (already patched?)") + return + + src = src.replace(old, new) + + # Part 2: add @unittest.skipUnless to test_debug_kernel_with_lto. + # After moving debuggable_kernel inside `if TEST_BIN_DIR:`, the symbol + # is undefined when the env var is not set, so the test must be skipped. + old_test = " def test_debug_kernel_with_lto(self):\n" + new_test = ( + ' @unittest.skipUnless(TEST_BIN_DIR, "NUMBA_CUDA_TEST_BIN_DIR not set")\n' + " def test_debug_kernel_with_lto(self):\n" + ) + + if old_test not in src: + print(f"SKIP: skip-decorator patch target not found in {target}") + elif new_test in src: + print("SKIP: skip decorator already present") + else: + src = src.replace(old_test, new_test, 1) + + target.write_text(src) + print(f"PATCHED: {target}") + + +if __name__ == "__main__": + patch_test_linker_indent() diff --git a/ci/tools/run-tests b/ci/tools/run-tests index f76c45e325c..a4001b7a001 100755 --- a/ci/tools/run-tests +++ b/ci/tools/run-tests @@ -99,10 +99,6 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu # Install ALL wheels (pathfinder + bindings + core) and the optional dep # in a single pip call so pip resolves version constraints in one shot # and avoids costly uninstall/reinstall cycles. - # - # We pushd into cuda_core/ so that --group reads test dependency groups - # from cuda_core/pyproject.toml (needed for numpy, cupy, ml_dtypes, etc.). - # All other wheel paths use ../ to reach the repo root. TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})" CUDA_VER_MINOR="$(cut -d '.' -f 1-2 <<< "${CUDA_VER}")" @@ -112,14 +108,11 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu FREE_THREADING+="-ft" fi - # Resolve the pathfinder wheel path before pushd (it's relative to repo root). - # CUDA_BINDINGS_ARTIFACTS_DIR and CUDA_CORE_ARTIFACTS_DIR are already absolute - # (set via realpath in env-vars). + # Resolve pathfinder wheel to absolute path before pushd. + # CUDA_BINDINGS_ARTIFACTS_DIR and CUDA_CORE_ARTIFACTS_DIR are already + # absolute (set via realpath in env-vars). PATHFINDER_WHL=($(realpath ./cuda_pathfinder/*.whl)) - pushd ./cuda_core - - # Build wheel specs (paths are absolute, so pushd doesn't affect them) BINDINGS_WHL=("${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl) if [[ "${LOCAL_CTK}" != 1 ]]; then BINDINGS_WHL=("${BINDINGS_WHL[0]}[all]") @@ -130,18 +123,23 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu CORE_WHL=("${CORE_WHL[0]}[cu${TEST_CUDA_MAJOR}]") fi - # All packages in one pip call: pathfinder + bindings + core + test deps + optional dep + # pushd so --group reads test dependency groups from cuda_core/pyproject.toml. + # The explicit cuda-toolkit[...]==X.Y.* pin overrides the group's looser ==X.*. + pushd ./cuda_core + PIP_ARGS=( "${PATHFINDER_WHL[@]}" "${BINDINGS_WHL[@]}" "${CORE_WHL[@]}" --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" - "cuda-toolkit==${CUDA_VER_MINOR}.*" ) if [[ "${test_module}" == "nightly-pytorch" ]]; then # TORCH_VER and TORCH_CUDA must be set by the caller. + # Use cuda-toolkit[cudart] only — torch brings its own nvcc/nvrtc/etc. + # This avoids version conflicts between our nvidia-* pins and torch's. echo "Installing pathfinder + bindings + core + test deps + PyTorch ${TORCH_VER} (${TORCH_CUDA})" + PIP_ARGS+=("cuda-toolkit[cudart]==${CUDA_VER_MINOR}.*") if [[ "${TORCH_VER}" == "latest" ]]; then PIP_ARGS+=(torch) else @@ -150,7 +148,13 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu PIP_ARGS+=(--extra-index-url "https://download.pytorch.org/whl/${TORCH_CUDA}") elif [[ "${test_module}" == "nightly-numba-cuda" ]]; then echo "Installing pathfinder + bindings + core + test deps + numba-cuda" - PIP_ARGS+=("numba-cuda[cu${TEST_CUDA_MAJOR}]") + # numba-cuda's test-cuXX group deps (can't use --group for a wheel install): + PIP_ARGS+=( + "cuda-toolkit[curand,cublas]==${CUDA_VER_MINOR}.*" + "numba-cuda[cu${TEST_CUDA_MAJOR}]" + "cupy-cuda${TEST_CUDA_MAJOR}x" + psutil cffi pytest-xdist pytest-benchmark filecheck ml_dtypes statistics + ) fi pip install "${PIP_ARGS[@]}" From 6f04205a03f37a1053718c0ccdf93e3ecb1e01fc Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 29 Apr 2026 04:04:16 +0000 Subject: [PATCH 8/9] Match CUDA_VER to TORCH_CUDA in nightly pytorch matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CUDA_VER in the test environment should match TORCH_CUDA in major.minor. BUILD_CUDA_VER (from build-ctk-ver input) is used for artifact names, so CUDA_VER can differ. - cu126 → CUDA_VER: 12.6.3 (was 12.9.1) - cu130 → CUDA_VER: 13.0.2 (was 13.2.1) For CUDA 12 entries, USE_BACKPORT_BINDINGS kicks in automatically since BUILD_CUDA_MAJOR (13) \!= TEST_CUDA_MAJOR (12), pulling bindings from the backport branch. Co-Authored-By: Claude Opus 4.6 (1M context) --- ci/test-matrix.yml | 16 ++++++++-------- ci/tools/run-tests | 30 +++++++++++++++++------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index d8be9b350a5..19931c3943a 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -64,10 +64,10 @@ linux: - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' } nightly: # nightly-pytorch (amd64 only — PyTorch does not ship arm64 GPU wheels) - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } # nightly-numba-cuda - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } @@ -97,10 +97,10 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: # nightly-pytorch - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } + - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } # nightly-numba-cuda - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } diff --git a/ci/tools/run-tests b/ci/tools/run-tests index f76c45e325c..a4001b7a001 100755 --- a/ci/tools/run-tests +++ b/ci/tools/run-tests @@ -99,10 +99,6 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu # Install ALL wheels (pathfinder + bindings + core) and the optional dep # in a single pip call so pip resolves version constraints in one shot # and avoids costly uninstall/reinstall cycles. - # - # We pushd into cuda_core/ so that --group reads test dependency groups - # from cuda_core/pyproject.toml (needed for numpy, cupy, ml_dtypes, etc.). - # All other wheel paths use ../ to reach the repo root. TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})" CUDA_VER_MINOR="$(cut -d '.' -f 1-2 <<< "${CUDA_VER}")" @@ -112,14 +108,11 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu FREE_THREADING+="-ft" fi - # Resolve the pathfinder wheel path before pushd (it's relative to repo root). - # CUDA_BINDINGS_ARTIFACTS_DIR and CUDA_CORE_ARTIFACTS_DIR are already absolute - # (set via realpath in env-vars). + # Resolve pathfinder wheel to absolute path before pushd. + # CUDA_BINDINGS_ARTIFACTS_DIR and CUDA_CORE_ARTIFACTS_DIR are already + # absolute (set via realpath in env-vars). PATHFINDER_WHL=($(realpath ./cuda_pathfinder/*.whl)) - pushd ./cuda_core - - # Build wheel specs (paths are absolute, so pushd doesn't affect them) BINDINGS_WHL=("${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl) if [[ "${LOCAL_CTK}" != 1 ]]; then BINDINGS_WHL=("${BINDINGS_WHL[0]}[all]") @@ -130,18 +123,23 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu CORE_WHL=("${CORE_WHL[0]}[cu${TEST_CUDA_MAJOR}]") fi - # All packages in one pip call: pathfinder + bindings + core + test deps + optional dep + # pushd so --group reads test dependency groups from cuda_core/pyproject.toml. + # The explicit cuda-toolkit[...]==X.Y.* pin overrides the group's looser ==X.*. + pushd ./cuda_core + PIP_ARGS=( "${PATHFINDER_WHL[@]}" "${BINDINGS_WHL[@]}" "${CORE_WHL[@]}" --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" - "cuda-toolkit==${CUDA_VER_MINOR}.*" ) if [[ "${test_module}" == "nightly-pytorch" ]]; then # TORCH_VER and TORCH_CUDA must be set by the caller. + # Use cuda-toolkit[cudart] only — torch brings its own nvcc/nvrtc/etc. + # This avoids version conflicts between our nvidia-* pins and torch's. echo "Installing pathfinder + bindings + core + test deps + PyTorch ${TORCH_VER} (${TORCH_CUDA})" + PIP_ARGS+=("cuda-toolkit[cudart]==${CUDA_VER_MINOR}.*") if [[ "${TORCH_VER}" == "latest" ]]; then PIP_ARGS+=(torch) else @@ -150,7 +148,13 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu PIP_ARGS+=(--extra-index-url "https://download.pytorch.org/whl/${TORCH_CUDA}") elif [[ "${test_module}" == "nightly-numba-cuda" ]]; then echo "Installing pathfinder + bindings + core + test deps + numba-cuda" - PIP_ARGS+=("numba-cuda[cu${TEST_CUDA_MAJOR}]") + # numba-cuda's test-cuXX group deps (can't use --group for a wheel install): + PIP_ARGS+=( + "cuda-toolkit[curand,cublas]==${CUDA_VER_MINOR}.*" + "numba-cuda[cu${TEST_CUDA_MAJOR}]" + "cupy-cuda${TEST_CUDA_MAJOR}x" + psutil cffi pytest-xdist pytest-benchmark filecheck ml_dtypes statistics + ) fi pip install "${PIP_ARGS[@]}" From cec1d8d9f443374970abca8bf8317661831d37b3 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Thu, 30 Apr 2026 19:49:57 +0000 Subject: [PATCH 9/9] Remove numba-cuda test_linker.py patch (fixed upstream) The indentation bug in test_linker.py was fixed in the latest numba-cuda release, so the workaround patch is no longer needed. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/test-wheel-linux.yml | 4 - .github/workflows/test-wheel-windows.yml | 5 -- ci/tools/patch-numba-cuda | 105 ----------------------- 3 files changed, 114 deletions(-) delete mode 100755 ci/tools/patch-numba-cuda diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index a546c8cf47b..e1a36bc086b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -368,10 +368,6 @@ jobs: pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ popd - - name: Patch numba-cuda (upstream bug workarounds) - if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} - run: python ci/tools/patch-numba-cuda - - name: Run numba-cuda tests if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} run: python -m numba.runtests numba.cuda.tests diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 0fef1e5b8af..eefc9273594 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -356,11 +356,6 @@ jobs: pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/ popd - - name: Patch numba-cuda (upstream bug workarounds) - if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} - shell: bash --noprofile --norc -xeuo pipefail {0} - run: python ci/tools/patch-numba-cuda - - name: Run numba-cuda tests if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} shell: bash --noprofile --norc -xeuo pipefail {0} diff --git a/ci/tools/patch-numba-cuda b/ci/tools/patch-numba-cuda deleted file mode 100755 index 4e0b94c2688..00000000000 --- a/ci/tools/patch-numba-cuda +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 - -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Patch known upstream bugs in installed numba-cuda before running tests. - -These patches are temporary workarounds; each should be removed once the -corresponding upstream fix is released. -""" - -import pathlib -import site - - -def patch_test_linker_indent(): - """Fix indentation bug in test_linker.py. - - add_from_numba and debuggable_kernel reference test_device_functions_ltoir - which is only defined inside ``if TEST_BIN_DIR:``. They must be indented - under that block. - - Upstream: https://github.com/NVIDIA/numba-cuda/blob/200c2b96/ - numba_cuda/numba/cuda/tests/cudadrv/test_linker.py#L120 - """ - # Find the installed test_linker.py across all site-packages paths - rel_path = pathlib.Path("numba_cuda", "numba", "cuda", "tests", "cudadrv", "test_linker.py") - target = None - for sp in site.getsitepackages(): - candidate = pathlib.Path(sp) / rel_path - if candidate.exists(): - target = candidate - break - if target is None: - # Fallback: locate via importlib - try: - import numba_cuda - - pkg_dir = pathlib.Path(numba_cuda.__file__).parent - candidate = pkg_dir / "numba" / "cuda" / "tests" / "cudadrv" / "test_linker.py" - if candidate.exists(): - target = candidate - except ImportError: - pass - if target is None: - print("SKIP: test_linker.py not found in any site-packages") - return - print(f"Found: {target}") - - src = target.read_text() - - old = ( - "\nadd_from_numba = cuda.declare_device(\n" - ' "add_from_numba",\n' - ' "int32(int32, int32)",\n' - " link=[test_device_functions_ltoir],\n" - ")\n" - "\n" - "\n" - "def debuggable_kernel(result):\n" - " i = cuda.grid(1)\n" - " result[i] = add_from_numba(i, i)" - ) - - new = ( - "\n add_from_numba = cuda.declare_device(\n" - ' "add_from_numba",\n' - ' "int32(int32, int32)",\n' - " link=[test_device_functions_ltoir],\n" - " )\n" - "\n" - " def debuggable_kernel(result):\n" - " i = cuda.grid(1)\n" - " result[i] = add_from_numba(i, i)" - ) - - if old not in src: - print(f"SKIP: indent patch target not found in {target} (already patched?)") - return - - src = src.replace(old, new) - - # Part 2: add @unittest.skipUnless to test_debug_kernel_with_lto. - # After moving debuggable_kernel inside `if TEST_BIN_DIR:`, the symbol - # is undefined when the env var is not set, so the test must be skipped. - old_test = " def test_debug_kernel_with_lto(self):\n" - new_test = ( - ' @unittest.skipUnless(TEST_BIN_DIR, "NUMBA_CUDA_TEST_BIN_DIR not set")\n' - " def test_debug_kernel_with_lto(self):\n" - ) - - if old_test not in src: - print(f"SKIP: skip-decorator patch target not found in {target}") - elif new_test in src: - print("SKIP: skip decorator already present") - else: - src = src.replace(old_test, new_test, 1) - - target.write_text(src) - print(f"PATCHED: {target}") - - -if __name__ == "__main__": - patch_test_linker_indent()