From 933ae57c20b4311198c0aa407d36b52e3d3ad9dd Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 29 Sep 2025 02:09:56 +0000 Subject: [PATCH 1/2] backport PR #986 --- .github/workflows/build-wheel.yml | 7 +++++++ ci/tools/run-tests | 10 ++++++++-- cuda_bindings/pyproject.toml | 4 ++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 34bb146dc7..a25f8444ed 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -30,6 +30,7 @@ jobs: - "3.11" - "3.12" - "3.13" + - "3.13t" name: py${{ matrix.python-version }} runs-on: ${{ (inputs.host-platform == 'linux-64' && 'linux-amd64-cpu8') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || @@ -116,6 +117,7 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair --namespace-pkg cuda -w {dest_dir} {wheel}" CIBW_ENVIRONMENT: > CUDA_PYTHON_PARALLEL_LEVEL=${{ env.CUDA_PYTHON_PARALLEL_LEVEL }} + CIBW_ENABLE: "cpython-freethreading" with: package-dir: ./cuda_core/ output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} @@ -163,6 +165,7 @@ jobs: CUDA_PYTHON_PARALLEL_LEVEL=${{ env.CUDA_PYTHON_PARALLEL_LEVEL }} CIBW_BEFORE_BUILD_WINDOWS: "pip install delvewheel" CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair --namespace-pkg cuda -w {dest_dir} {wheel}" + CIBW_ENABLE: "cpython-freethreading" with: package-dir: ./cuda_bindings/ output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} @@ -223,6 +226,10 @@ jobs: # workaround for actions/runner-images#12377 (the cached 3.13.4 is buggy on Windows) python-version: ${{ matrix.python-version == '3.13' && '3.13.5' || matrix.python-version }} + - name: verify free-threaded build + if: endsWith(matrix.python-verison, 't') + run: python -c 'import sys; assert not sys._is_gil_enabled()' + - name: Set up Python include paths run: | if [[ "${{ inputs.host-platform }}" == linux* ]]; then diff --git a/ci/tools/run-tests b/ci/tools/run-tests index ad1dbd6a9e..e8317d40c9 100755 --- a/ci/tools/run-tests +++ b/ci/tools/run-tests @@ -75,12 +75,18 @@ elif [[ "${test_module}" == "core" ]]; then echo "Installing core wheel" pwd ls + + FREE_THREADING="" + if python -c 'import sys; assert not sys._is_gil_enabled()' 2> /dev/null; then + FREE_THREADING+="-ft" + fi + if [[ "${LOCAL_CTK}" == 1 ]]; then # We already installed cuda-bindings, and all CTK components exist locally, # so just install the test dependencies. - pip install $(ls *.whl)["test-cu${TEST_CUDA_MAJOR}"] + pip install $(ls *.whl)["test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}"] else - pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}","test-cu${TEST_CUDA_MAJOR}"] + pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}","test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}"] fi popd pushd ./cuda_core diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index 4ef63b6ab6..56323b561d 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE [build-system] -requires = ["setuptools>=77.0.0", "cython>=3.0,<3.1.0", "pyclibrary>=0.1.7"] +requires = ["setuptools>=77.0.0", "cython>=3.1,<3.2", "pyclibrary>=0.1.7"] build-backend = "setuptools.build_meta" [project] @@ -40,7 +40,7 @@ all = [ ] test = [ - "cython>=3.0,<3.1.0", + "cython>=3.1,<3.2", "setuptools>=77.0.0", "numpy>=1.21.1", "pytest>=6.2.4", From c1f4a95560c0aacbdeb65b46055bc20c6ada993b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 29 Sep 2025 02:16:28 +0000 Subject: [PATCH 2/2] backport PR #1005 --- .github/workflows/build-wheel.yml | 4 ++++ .github/workflows/test-wheel-linux.yml | 1 + .github/workflows/test-wheel-windows.yml | 1 + ci/test-matrix.json | 8 ++++++-- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index a25f8444ed..96b4e501da 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -31,6 +31,8 @@ jobs: - "3.12" - "3.13" - "3.13t" + - "3.14" + - "3.14t" name: py${{ matrix.python-version }} runs-on: ${{ (inputs.host-platform == 'linux-64' && 'linux-amd64-cpu8') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || @@ -225,6 +227,8 @@ jobs: with: # workaround for actions/runner-images#12377 (the cached 3.13.4 is buggy on Windows) python-version: ${{ matrix.python-version == '3.13' && '3.13.5' || matrix.python-version }} + # allow prereleases only fetches prereleases for unreleased versions of Python + allow-prereleases: true - name: verify free-threaded build if: endsWith(matrix.python-verison, 't') diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index baca1675ad..0653f99e52 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -215,6 +215,7 @@ jobs: uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: ${{ matrix.PY_VER }} + allow-prereleases: true env: # we use self-hosted runners on which setup-python behaves weirdly... AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 27a91982b5..7c3fa73fbf 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -203,6 +203,7 @@ jobs: uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: ${{ matrix.PY_VER }} + allow-prereleases: true - name: Set up mini CTK if: ${{ matrix.LOCAL_CTK == '1' }} diff --git a/ci/test-matrix.json b/ci/test-matrix.json index f9f89bea49..e30ac1878c 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -14,7 +14,9 @@ { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, @@ -25,7 +27,9 @@ { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" } + { "ARCH": "arm64", "PY_VER": "3.13t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" } ], "nightly": [ { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },