From 7aa41b286619859bad2a2fbbb8b1b1099b41faca Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:03:08 -0400
Subject: [PATCH 1/9] Reapply "refactor(python): drop support for 3.9, document
3.14 support (#1069)" (#1109)
This reverts commit fcd7b99e2ea81343a3477fbcadec8559f2f4ec4b.
---
.github/workflows/build-wheel.yml | 1 -
CONTRIBUTING.md | 2 +-
ci/test-matrix.json | 14 ----------
cuda_bindings/docs/source/install.rst | 2 +-
cuda_bindings/docs/source/support.rst | 2 +-
cuda_bindings/pyproject.toml | 3 ++-
cuda_core/cuda/core/experimental/__init__.py | 11 --------
cuda_core/cuda/core/experimental/_device.pyx | 10 +++----
.../cuda/core/experimental/_launch_config.py | 6 ++---
cuda_core/cuda/core/experimental/_linker.py | 2 +-
cuda_core/cuda/core/experimental/_module.py | 26 +++++++-----------
cuda_core/cuda/core/experimental/_program.py | 2 +-
cuda_core/docs/source/install.rst | 3 ++-
cuda_core/pyproject.toml | 4 +--
.../_dynamic_libs/find_nvidia_dynamic_lib.py | 27 +++++++++----------
.../_dynamic_libs/load_dl_common.py | 4 +--
.../pathfinder/_dynamic_libs/load_dl_linux.py | 12 ++++-----
.../_dynamic_libs/load_dl_windows.py | 10 +++----
.../_headers/find_nvidia_headers.py | 9 +++----
.../cuda/pathfinder/_utils/env_vars.py | 3 +--
.../_utils/find_site_packages_dll.py | 7 ++++-
.../_utils/find_site_packages_so.py | 6 ++++-
cuda_pathfinder/pyproject.toml | 4 +--
.../tests/spawned_process_runner.py | 10 +++----
cuda_python/pyproject.toml | 4 ++-
ruff.toml | 3 ++-
toolshed/make_site_packages_libdirs.py | 4 +--
27 files changed, 83 insertions(+), 108 deletions(-)
diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
index 57a8581f15..6f683d3aed 100644
--- a/.github/workflows/build-wheel.yml
+++ b/.github/workflows/build-wheel.yml
@@ -28,7 +28,6 @@ jobs:
fail-fast: false
matrix:
python-version:
- - "3.9"
- "3.10"
- "3.11"
- "3.12"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 183d215865..67bd568d85 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -113,7 +113,7 @@ flowchart TD
B2["linux-aarch64
(Self-hosted)"]
B3["win-64
(GitHub-hosted)"]
end
- BUILD_DETAILS["• Python versions: 3.9, 3.10, 3.11, 3.12, 3.13
• CUDA version: 13.0.0 (build-time)
• Components: cuda-core, cuda-bindings,
cuda-pathfinder, cuda-python"]
+ BUILD_DETAILS["• Python versions: 3.10, 3.11, 3.12, 3.13, 3.14
• CUDA version: 13.0.0 (build-time)
• Components: cuda-core, cuda-bindings,
cuda-pathfinder, cuda-python"]
end
%% Artifact Storage
diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index 4b60779ec9..a8084442dc 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -4,8 +4,6 @@
"_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1",
"linux": {
"pull-request": [
- { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
- { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
@@ -16,8 +14,6 @@
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
- { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
- { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
{ "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
{ "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
{ "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
@@ -30,11 +26,6 @@
{ "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }
],
"nightly": [
- { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
- { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
- { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
- { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
- { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
{ "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
@@ -55,11 +46,6 @@
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
- { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
- { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
- { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
- { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
- { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
{ "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
{ "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
{ "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
diff --git a/cuda_bindings/docs/source/install.rst b/cuda_bindings/docs/source/install.rst
index 9836d172ac..58a6a0f31c 100644
--- a/cuda_bindings/docs/source/install.rst
+++ b/cuda_bindings/docs/source/install.rst
@@ -10,7 +10,7 @@ Runtime Requirements
``cuda.bindings`` supports the same platforms as CUDA. Runtime dependencies are:
* Linux (x86-64, arm64) and Windows (x86-64)
-* Python 3.9 - 3.14
+* Python 3.10 - 3.14
* Driver: Linux (580.65.06 or later) Windows (580.88 or later)
* Optionally, NVRTC, nvJitLink, NVVM, and cuFile from CUDA Toolkit 13.x
diff --git a/cuda_bindings/docs/source/support.rst b/cuda_bindings/docs/source/support.rst
index a34a5c49e2..4439d963c0 100644
--- a/cuda_bindings/docs/source/support.rst
+++ b/cuda_bindings/docs/source/support.rst
@@ -19,7 +19,7 @@ The ``cuda.bindings`` module has the following support policy:
depends on the underlying driver and the Toolkit versions, as described in the compatibility
documentation.)
4. The module supports all Python versions following the `CPython EOL schedule`_. As of writing
- Python 3.9 - 3.13 are supported.
+ Python 3.10 - 3.14 are supported.
5. The module exposes a Cython layer from which types and functions could be ``cimport``'d. While
we strive to keep this layer stable, due to Cython limitations a new *minor* release of this
module could require Cython layer users to rebuild their projects and update their pinning to
diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml
index bc0dbf1977..7523682591 100644
--- a/cuda_bindings/pyproject.toml
+++ b/cuda_bindings/pyproject.toml
@@ -9,16 +9,17 @@ name = "cuda-bindings"
description = "Python bindings for CUDA"
authors = [{name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com"},]
license = "LicenseRef-NVIDIA-SOFTWARE-LICENSE"
+requires-python = ">=3.10"
classifiers = [
"Intended Audience :: Developers",
"Topic :: Database",
"Topic :: Scientific/Engineering",
"Programming Language :: Python",
- "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
"Environment :: GPU :: NVIDIA CUDA",
]
dynamic = [
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
index 94fb0aa083..8a60c031c5 100644
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ b/cuda_core/cuda/core/experimental/__init__.py
@@ -26,17 +26,6 @@
finally:
del cuda.bindings, importlib, subdir, cuda_major, cuda_minor
-import sys # noqa: E402
-import warnings # noqa: E402
-
-if sys.version_info < (3, 10):
- warnings.warn(
- "support for Python 3.9 and below is deprecated and subject to future removal",
- category=FutureWarning,
- stacklevel=1,
- )
-del sys, warnings
-
from cuda.core.experimental import utils # noqa: E402
from cuda.core.experimental._device import Device # noqa: E402
from cuda.core.experimental._event import Event, EventOptions # noqa: E402
diff --git a/cuda_core/cuda/core/experimental/_device.pyx b/cuda_core/cuda/core/experimental/_device.pyx
index 1db2adbf8d..d800a3c172 100644
--- a/cuda_core/cuda/core/experimental/_device.pyx
+++ b/cuda_core/cuda/core/experimental/_device.pyx
@@ -10,7 +10,7 @@ from cuda.bindings cimport cydriver
from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN
import threading
-from typing import Optional, Union
+from typing import Union
from cuda.core.experimental._context import Context, ContextOptions
from cuda.core.experimental._event import Event, EventOptions
@@ -951,7 +951,7 @@ class Device:
"""
__slots__ = ("_id", "_mr", "_has_inited", "_properties")
- def __new__(cls, device_id: Optional[int] = None):
+ def __new__(cls, device_id: int | None = None):
global _is_cuInit
if _is_cuInit is False:
with _lock, nogil:
@@ -1223,7 +1223,7 @@ class Device:
"""
raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
- def create_stream(self, obj: Optional[IsStreamT] = None, options: Optional[StreamOptions] = None) -> Stream:
+ def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
"""Create a Stream object.
New stream objects can be created in two different ways:
@@ -1254,7 +1254,7 @@ class Device:
self._check_context_initialized()
return Stream._init(obj=obj, options=options, device_id=self._id)
- def create_event(self, options: Optional[EventOptions] = None) -> Event:
+ def create_event(self, options: EventOptions | None = None) -> Event:
"""Create an Event object without recording it to a Stream.
Note
@@ -1276,7 +1276,7 @@ class Device:
ctx = self._get_current_context()
return Event._init(self._id, ctx, options, True)
- def allocate(self, size, stream: Optional[Stream] = None) -> Buffer:
+ def allocate(self, size, stream: Stream | None = None) -> Buffer:
"""Allocate device memory from a specified stream.
Allocates device memory of `size` bytes on the specified `stream`
diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py
index d82e0ec3a2..c1e08da58d 100644
--- a/cuda_core/cuda/core/experimental/_launch_config.py
+++ b/cuda_core/cuda/core/experimental/_launch_config.py
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
-from typing import Optional, Union
+from typing import Union
from cuda.core.experimental._device import Device
from cuda.core.experimental._utils.cuda_utils import (
@@ -68,8 +68,8 @@ class LaunchConfig:
grid: Union[tuple, int] = None
cluster: Union[tuple, int] = None
block: Union[tuple, int] = None
- shmem_size: Optional[int] = None
- cooperative_launch: Optional[bool] = False
+ shmem_size: int | None = None
+ cooperative_launch: bool | None = False
def __post_init__(self):
_lazy_init()
diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index a3fa4b3e48..5c54a88c8c 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -343,7 +343,7 @@ def _exception_manager(self):
# our constructor could raise, in which case there's no handle available
error_log = self.get_error_log()
# Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
- # unfortunately we are still supporting Python 3.9/3.10...
+ # unfortunately we are still supporting Python 3.10...
# Here we rely on both CUDAError and nvJitLinkError have the error string placed in .args[0].
e.args = (e.args[0] + (f"\nLinker error log: {error_log}" if error_log else ""), *e.args[1:])
raise e
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 2c7ea3a156..f8ce8f95d0 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -4,7 +4,7 @@
import weakref
from collections import namedtuple
-from typing import Optional, Union
+from typing import Union
from warnings import warn
from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
@@ -310,7 +310,7 @@ def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocesso
driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size)
)
- def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
+ def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int:
"""Maximum potential cluster size.
The maximum potential cluster size for this kernel and given launch configuration.
@@ -332,7 +332,7 @@ def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stre
drv_cfg.hStream = stream.handle
return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg))
- def max_active_clusters(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
+ def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int:
"""Maximum number of active clusters on the target device.
The maximum number of clusters that could concurrently execute on the target device.
@@ -469,7 +469,7 @@ def __new__(self, *args, **kwargs):
)
@classmethod
- def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None):
+ def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None):
self = super().__new__(cls)
assert code_type in self._supported_code_type, f"{code_type=} is not supported"
_lazy_init()
@@ -496,7 +496,7 @@ def __reduce__(self):
return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)
@staticmethod
- def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing cubin.
Parameters
@@ -514,7 +514,7 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt
return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing PTX.
Parameters
@@ -532,7 +532,7 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optio
return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing LTOIR.
Parameters
@@ -550,9 +550,7 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt
return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_fatbin(
- module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
- ) -> "ObjectCode":
+ def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing fatbin.
Parameters
@@ -570,9 +568,7 @@ def from_fatbin(
return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_object(
- module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
- ) -> "ObjectCode":
+ def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing object code.
Parameters
@@ -590,9 +586,7 @@ def from_object(
return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_library(
- module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
- ) -> "ObjectCode":
+ def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing library.
Parameters
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index dee6f001e7..1db453fed1 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -49,7 +49,7 @@ def _nvvm_exception_manager(self):
except Exception:
error_log = ""
# Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
- # unfortunately we are still supporting Python 3.9/3.10...
+ # unfortunately we are still supporting Python 3.10...
e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:])
raise e
diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst
index cddde851d5..7100dade3b 100644
--- a/cuda_core/docs/source/install.rst
+++ b/cuda_core/docs/source/install.rst
@@ -26,7 +26,7 @@ dependencies are as follows:
.. [#f1] Including ``cuda-python``.
-``cuda.core`` supports Python 3.9 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided.
+``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided.
Free-threading Build Support
@@ -42,6 +42,7 @@ As of cuda-core 0.4.0, **experimental** packages for the `free-threaded interpre
.. _built-in modules that are known to be thread-unsafe: https://github.com/python/cpython/issues/116738
.. _free-threaded interpreter: https://docs.python.org/3/howto/free-threading-python.html
+``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64).
Installing from PyPI
--------------------
diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
index 5f12f6f120..31ceb5b1a5 100644
--- a/cuda_core/pyproject.toml
+++ b/cuda_core/pyproject.toml
@@ -14,7 +14,7 @@ dynamic = [
"version",
"readme",
]
-requires-python = '>=3.9'
+requires-python = '>=3.10'
description = "cuda.core: (experimental) pythonic CUDA module"
authors = [
{ name = "NVIDIA Corporation" }
@@ -32,11 +32,11 @@ classifiers = [
"Topic :: Scientific/Engineering",
"Topic :: Software Development :: Libraries",
"Programming Language :: Python :: 3 :: Only",
- "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
"Programming Language :: Python :: Implementation :: CPython",
"Environment :: GPU :: NVIDIA CUDA",
"Environment :: GPU :: NVIDIA CUDA :: 12",
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
index 75ebec3a85..65c9f4bf3c 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
@@ -4,7 +4,6 @@
import glob
import os
from collections.abc import Sequence
-from typing import Optional
from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -29,7 +28,7 @@ def _no_such_file_in_sub_dirs(
def _find_so_using_nvidia_lib_dirs(
libname: str, so_basename: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
rel_dirs = SITE_PACKAGES_LIBDIRS_LINUX.get(libname)
if rel_dirs is not None:
sub_dirs_searched = []
@@ -52,7 +51,7 @@ def _find_so_using_nvidia_lib_dirs(
return None
-def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]:
+def _find_dll_under_dir(dirpath: str, file_wild: str) -> str | None:
for path in sorted(glob.glob(os.path.join(dirpath, file_wild))):
if not os.path.isfile(path):
continue
@@ -63,7 +62,7 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]:
def _find_dll_using_nvidia_bin_dirs(
libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
rel_dirs = SITE_PACKAGES_LIBDIRS_WINDOWS.get(libname)
if rel_dirs is not None:
sub_dirs_searched = []
@@ -79,7 +78,7 @@ def _find_dll_using_nvidia_bin_dirs(
return None
-def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> Optional[str]:
+def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> str | None:
# Resolve paths for the four cases:
# Windows/Linux x nvvm yes/no
if IS_WINDOWS:
@@ -107,14 +106,14 @@ def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_
return None
-def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]:
+def _find_lib_dir_using_cuda_home(libname: str) -> str | None:
cuda_home = get_cuda_home_or_path()
if cuda_home is None:
return None
return _find_lib_dir_using_anchor_point(libname, anchor_point=cuda_home, linux_lib_dir="lib64")
-def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]:
+def _find_lib_dir_using_conda_prefix(libname: str) -> str | None:
conda_prefix = os.environ.get("CONDA_PREFIX")
if not conda_prefix:
return None
@@ -125,7 +124,7 @@ def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]:
def _find_so_using_lib_dir(
lib_dir: str, so_basename: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
so_name = os.path.join(lib_dir, so_basename)
if os.path.isfile(so_name):
return so_name
@@ -141,7 +140,7 @@ def _find_so_using_lib_dir(
def _find_dll_using_lib_dir(
lib_dir: str, libname: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
file_wild = libname + "*.dll"
dll_name = _find_dll_under_dir(lib_dir, file_wild)
if dll_name is not None:
@@ -162,9 +161,9 @@ def __init__(self, libname: str):
self.lib_searched_for = f"lib{libname}.so"
self.error_messages: list[str] = []
self.attachments: list[str] = []
- self.abs_path: Optional[str] = None
+ self.abs_path: str | None = None
- def try_site_packages(self) -> Optional[str]:
+ def try_site_packages(self) -> str | None:
if IS_WINDOWS:
return _find_dll_using_nvidia_bin_dirs(
self.libname,
@@ -180,13 +179,13 @@ def try_site_packages(self) -> Optional[str]:
self.attachments,
)
- def try_with_conda_prefix(self) -> Optional[str]:
+ def try_with_conda_prefix(self) -> str | None:
return self._find_using_lib_dir(_find_lib_dir_using_conda_prefix(self.libname))
- def try_with_cuda_home(self) -> Optional[str]:
+ def try_with_cuda_home(self) -> str | None:
return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname))
- def _find_using_lib_dir(self, lib_dir: Optional[str]) -> Optional[str]:
+ def _find_using_lib_dir(self, lib_dir: str | None) -> str | None:
if lib_dir is None:
return None
if IS_WINDOWS:
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
index 2e6c9eb17c..91e6284a00 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
@@ -1,8 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
+from collections.abc import Callable
from dataclasses import dataclass
-from typing import Callable, Optional
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import DIRECT_DEPENDENCIES
@@ -13,7 +13,7 @@ class DynamicLibNotFoundError(RuntimeError):
@dataclass
class LoadedDL:
- abs_path: Optional[str]
+ abs_path: str | None
was_already_loaded_from_elsewhere: bool
_handle_uint: int # Platform-agnostic unsigned pointer value
found_via: str
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
index 040e24705e..4d2bae5b90 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
@@ -5,7 +5,7 @@
import ctypes
import ctypes.util
import os
-from typing import Optional, cast
+from typing import cast
from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -76,8 +76,8 @@ class _LinkMapLNameView(ctypes.Structure):
assert _LinkMapLNameView.l_name.offset == ctypes.sizeof(ctypes.c_void_p)
-def _dl_last_error() -> Optional[str]:
- msg_bytes = cast(Optional[bytes], LIBDL.dlerror())
+def _dl_last_error() -> str | None:
+ msg_bytes = cast(bytes | None, LIBDL.dlerror())
if not msg_bytes:
return None # no pending error
# Never raises; undecodable bytes are mapped to U+DC80..U+DCFF
@@ -131,7 +131,7 @@ def get_candidate_sonames(libname: str) -> list[str]:
return candidate_sonames
-def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> Optional[LoadedDL]:
+def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> LoadedDL | None:
for soname in get_candidate_sonames(libname):
try:
handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD)
@@ -151,7 +151,7 @@ def _load_lib(libname: str, filename: str) -> ctypes.CDLL:
return ctypes.CDLL(filename, cdll_mode)
-def load_with_system_search(libname: str) -> Optional[LoadedDL]:
+def load_with_system_search(libname: str) -> LoadedDL | None:
"""Try to load a library using system search paths.
Args:
@@ -195,7 +195,7 @@ def _work_around_known_bugs(libname: str, found_path: str) -> None:
ctypes.CDLL(dep_path, CDLL_MODE)
-def load_with_abs_path(libname: str, found_path: str, found_via: Optional[str] = None) -> LoadedDL:
+def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL:
"""Load a dynamic library from the given path.
Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
index d8ac53fe8a..b9f15ea50b 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
@@ -5,7 +5,6 @@
import ctypes.wintypes
import os
import struct
-from typing import Optional
from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -100,10 +99,7 @@ def abs_path_for_dynamic_library(libname: str, handle: ctypes.wintypes.HMODULE)
return buffer.value
-def check_if_already_loaded_from_elsewhere(
- libname: str,
- have_abs_path: bool,
-) -> Optional[LoadedDL]:
+def check_if_already_loaded_from_elsewhere(libname: str, have_abs_path: bool) -> LoadedDL | None:
for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()):
handle = kernel32.GetModuleHandleW(dll_name)
if handle:
@@ -117,7 +113,7 @@ def check_if_already_loaded_from_elsewhere(
return None
-def load_with_system_search(libname: str) -> Optional[LoadedDL]:
+def load_with_system_search(libname: str) -> LoadedDL | None:
"""Try to load a DLL using system search paths.
Args:
@@ -136,7 +132,7 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:
return None
-def load_with_abs_path(libname: str, found_path: str, found_via: Optional[str] = None) -> LoadedDL:
+def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL:
"""Load a dynamic library from the given path.
Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
index b141700ab7..d770e99214 100644
--- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
+++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
@@ -4,7 +4,6 @@
import functools
import glob
import os
-from typing import Optional
from cuda.pathfinder._headers import supported_nvidia_headers
from cuda.pathfinder._utils.env_vars import get_cuda_home_or_path
@@ -12,7 +11,7 @@
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
-def _abs_norm(path: Optional[str]) -> Optional[str]:
+def _abs_norm(path: str | None) -> str | None:
if path:
return os.path.normpath(os.path.abspath(path))
return None
@@ -31,7 +30,7 @@ def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]:
return None
-def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> Optional[str]:
+def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> str | None:
parts = [anchor_point]
if libname == "nvvm":
parts.append(libname)
@@ -77,7 +76,7 @@ def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool)
return _find_based_on_ctk_layout(libname, h_basename, anchor_point)
-def _find_ctk_header_directory(libname: str) -> Optional[str]:
+def _find_ctk_header_directory(libname: str) -> str | None:
h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_CTK[libname]
candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK[libname]
@@ -97,7 +96,7 @@ def _find_ctk_header_directory(libname: str) -> Optional[str]:
@functools.cache
-def find_nvidia_header_directory(libname: str) -> Optional[str]:
+def find_nvidia_header_directory(libname: str) -> str | None:
"""Locate the header directory for a supported NVIDIA library.
Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
index 3a7de992c0..cf78a627cb 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
@@ -3,7 +3,6 @@
import os
import warnings
-from typing import Optional
def _paths_differ(a: str, b: str) -> bool:
@@ -33,7 +32,7 @@ def _paths_differ(a: str, b: str) -> bool:
return True
-def get_cuda_home_or_path() -> Optional[str]:
+def get_cuda_home_or_path() -> str | None:
cuda_home = os.environ.get("CUDA_HOME")
cuda_path = os.environ.get("CUDA_PATH")
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
index 2f5695093c..507355727f 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
@@ -11,7 +11,12 @@ def find_all_dll_files_via_metadata() -> dict[str, tuple[str, ...]]:
results: collections.defaultdict[str, list[str]] = collections.defaultdict(list)
# sort dists for deterministic output
- for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)):
+
+ for dist in sorted(
+ importlib.metadata.distributions(),
+ # `get` exists before 3.12, even though the hints only exist for Python >=3.12
+ key=lambda d: (d.metadata.get("Name", ""), d.version), # type: ignore[attr-defined]
+ ):
files = dist.files
if not files:
continue
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
index 69e7eea3ad..33ee1f1bcf 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
@@ -23,7 +23,11 @@ def find_all_so_files_via_metadata() -> dict[str, dict[str, tuple[str, ...]]]:
)
# sort dists for deterministic output
- for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)):
+ for dist in sorted(
+ importlib.metadata.distributions(),
+ # `get` exists before 3.12, even though the hints only exist for Python >=3.12
+ key=lambda d: (d.metadata.get("Name", ""), d.version), # type: ignore[attr-defined]
+ ):
files = dist.files
if not files:
continue
diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml
index 3db1aecbc8..9a6e40f8d3 100644
--- a/cuda_pathfinder/pyproject.toml
+++ b/cuda_pathfinder/pyproject.toml
@@ -6,7 +6,7 @@ name = "cuda-pathfinder"
description = "Pathfinder for CUDA components"
authors = [{ name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com" }]
license = "Apache-2.0"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
dynamic = ["version", "readme"]
dependencies = []
@@ -109,7 +109,7 @@ inline-quotes = "double"
[tool.mypy]
# Basic settings
-python_version = "3.9"
+python_version = "3.10"
explicit_package_bases = true
warn_return_any = true
warn_unused_configs = true
diff --git a/cuda_pathfinder/tests/spawned_process_runner.py b/cuda_pathfinder/tests/spawned_process_runner.py
index 154178b2a2..f4440743f5 100644
--- a/cuda_pathfinder/tests/spawned_process_runner.py
+++ b/cuda_pathfinder/tests/spawned_process_runner.py
@@ -5,10 +5,10 @@
import queue # for Empty
import sys
import traceback
-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
from dataclasses import dataclass
from io import StringIO
-from typing import Any, Callable, Optional
+from typing import Any
PROCESS_KILLED = -9
PROCESS_NO_RESULT = -999
@@ -61,9 +61,9 @@ def __call__(self):
def run_in_spawned_child_process(
target: Callable[..., None],
*,
- args: Optional[Sequence[Any]] = None,
- kwargs: Optional[dict[str, Any]] = None,
- timeout: Optional[float] = None,
+ args: Sequence[Any] | None = None,
+ kwargs: dict[str, Any] | None = None,
+ timeout: float | None = None,
rethrow: bool = False,
) -> CompletedProcess:
"""Run `target` in a spawned child process, capturing stdout/stderr.
diff --git a/cuda_python/pyproject.toml b/cuda_python/pyproject.toml
index fd6cacaf2a..9048f5818b 100644
--- a/cuda_python/pyproject.toml
+++ b/cuda_python/pyproject.toml
@@ -22,16 +22,18 @@ classifiers = [
"Intended Audience :: Science/Research",
"Intended Audience :: End Users/Desktop",
"Programming Language :: Python :: 3 :: Only",
- "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
"Programming Language :: Python :: Implementation :: CPython",
"Environment :: GPU :: NVIDIA CUDA",
"Environment :: GPU :: NVIDIA CUDA :: 12",
+ "Environment :: GPU :: NVIDIA CUDA :: 13",
]
dynamic = ["version", "dependencies", "optional-dependencies"]
+requires-python = ">=3.10"
[project.urls]
homepage = "https://nvidia.github.io/cuda-python/"
diff --git a/ruff.toml b/ruff.toml
index 79c66e862c..6312d3e9ef 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
line-length = 120
respect-gitignore = true
-target-version = "py39"
+target-version = "py310"
[format]
docstring-code-format = true
@@ -40,6 +40,7 @@ ignore = [
"S101", # asserts
"S311", # allow use of the random.* even though many are not cryptographically secure
"S404", # allow importing the subprocess module
+ "B905", # preserve the default behavior of `zip` without the explicit `strict` argument
]
exclude = ["**/_version.py"]
diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py
index d84d821700..00a495a095 100755
--- a/toolshed/make_site_packages_libdirs.py
+++ b/toolshed/make_site_packages_libdirs.py
@@ -8,7 +8,7 @@
import argparse
import os
import re
-from typing import Dict, Optional, Set
+from typing import Dict, Set
_SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/")
@@ -38,7 +38,7 @@ def parse_lines_linux(lines) -> Dict[str, Set[str]]:
return d
-def extract_libname_from_dll(fname: str) -> Optional[str]:
+def extract_libname_from_dll(fname: str) -> str | None:
"""Return base libname per the heuristic, or None if not a .dll."""
base = os.path.basename(fname)
if not base.lower().endswith(".dll"):
From 9969317306466ae6956c9c76affeab0842dfe657 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:12:51 -0400
Subject: [PATCH 2/9] docs: remove duplicate information
---
cuda_core/docs/source/install.rst | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst
index 7100dade3b..72ec710785 100644
--- a/cuda_core/docs/source/install.rst
+++ b/cuda_core/docs/source/install.rst
@@ -26,7 +26,7 @@ dependencies are as follows:
.. [#f1] Including ``cuda-python``.
-``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided.
+``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.14 are also provided.
Free-threading Build Support
@@ -42,8 +42,6 @@ As of cuda-core 0.4.0, **experimental** packages for the `free-threaded interpre
.. _built-in modules that are known to be thread-unsafe: https://github.com/python/cpython/issues/116738
.. _free-threaded interpreter: https://docs.python.org/3/howto/free-threading-python.html
-``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64).
-
Installing from PyPI
--------------------
From 05faed3b84f7759059972354299db012b96a4cd7 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:17:57 -0400
Subject: [PATCH 3/9] test: remove B095 ruff rule and add `zip(...,
strict=True)` in test code
---
cuda_bindings/setup.py | 2 +-
cuda_bindings/tests/test_cuda.py | 4 ++--
cuda_bindings/tests/test_cufile.py | 2 +-
cuda_bindings/tests/test_nvjitlink.py | 2 +-
cuda_core/tests/test_module.py | 8 ++++----
cuda_core/tests/test_system.py | 2 +-
ruff.toml | 2 +-
7 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py
index d89d0bccd9..c4138c11d0 100644
--- a/cuda_bindings/setup.py
+++ b/cuda_bindings/setup.py
@@ -125,7 +125,7 @@ def discoverMembers(self, memberDict, prefix, seen=None):
next_seen = set(seen)
next_seen.add(self._name)
- for memberName, memberType in zip(self._member_names, self._member_types):
+ for memberName, memberType in zip(self._member_names, self._member_types, strict=True):
if memberName:
discovered.append(".".join([prefix, memberName]))
diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py
index cd723941be..53bcc08f95 100644
--- a/cuda_bindings/tests/test_cuda.py
+++ b/cuda_bindings/tests/test_cuda.py
@@ -432,7 +432,7 @@ def test_cuda_pointer_attr():
# List version
err, attr_value_list_v2 = cuda.cuPointerGetAttributes(len(attr_type_list), attr_type_list, ptr)
assert err == cuda.CUresult.CUDA_SUCCESS
- for attr1, attr2 in zip(attr_value_list, attr_value_list_v2):
+ for attr1, attr2 in zip(attr_value_list, attr_value_list_v2, strict=True):
assert str(attr1) == str(attr2)
# Test setting values
@@ -512,7 +512,7 @@ def test_cuda_mem_range_attr():
attr_type_size_list, attr_type_list, len(attr_type_list), ptr, size
)
assert err == cuda.CUresult.CUDA_SUCCESS
- for attr1, attr2 in zip(attr_value_list, attr_value_list_v2):
+ for attr1, attr2 in zip(attr_value_list, attr_value_list_v2, strict=True):
assert str(attr1) == str(attr2)
(err,) = cuda.cuMemFree(ptr)
diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py
index 3716e2bec5..446cfdc83d 100644
--- a/cuda_bindings/tests/test_cufile.py
+++ b/cuda_bindings/tests/test_cufile.py
@@ -318,7 +318,7 @@ def test_buf_register_multiple_buffers():
try:
# Register all buffers
flags = 0
- for buf_ptr, size in zip(buffers, buffer_sizes):
+ for buf_ptr, size in zip(buffers, buffer_sizes, strict=True):
buf_ptr_int = int(buf_ptr)
cufile.buf_register(buf_ptr_int, size, flags)
diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py
index 3bfeb8d35a..85c6058010 100644
--- a/cuda_bindings/tests/test_nvjitlink.py
+++ b/cuda_bindings/tests/test_nvjitlink.py
@@ -34,7 +34,7 @@
def _build_arch_ptx_parametrized_callable():
- av = tuple(zip(ARCHITECTURES, PTX_VERSIONS))
+ av = tuple(zip(ARCHITECTURES, PTX_VERSIONS, strict=True))
return pytest.mark.parametrize(
("arch", "ptx_bytes"),
[(a, (PTX_HEADER.format(VERSION=v, ARCH=a) + PTX_KERNEL).encode("utf-8")) for a, v in av],
diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 49df966c08..dffbc04209 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -248,9 +248,9 @@ class ExpectedStruct(ctypes.Structure):
sizes = [p.size for p in arg_info]
members = [getattr(ExpectedStruct, name) for name, _ in ExpectedStruct._fields_]
expected_offsets = tuple(m.offset for m in members)
- assert all(actual == expected for actual, expected in zip(offsets, expected_offsets))
+ assert all(actual == expected for actual, expected in zip(offsets, expected_offsets, strict=True))
expected_sizes = tuple(m.size for m in members)
- assert all(actual == expected for actual, expected in zip(sizes, expected_sizes))
+ assert all(actual == expected for actual, expected in zip(sizes, expected_sizes, strict=True))
@pytest.mark.parametrize("nargs", [0, 1, 2, 3, 16])
@@ -274,8 +274,8 @@ class ExpectedStruct(ctypes.Structure):
members = tuple(getattr(ExpectedStruct, f"arg_{i}") for i in range(nargs))
arg_info = krn.arguments_info
- assert all([actual.offset == expected.offset for actual, expected in zip(arg_info, members)])
- assert all([actual.size == expected.size for actual, expected in zip(arg_info, members)])
+ assert all([actual.offset == expected.offset for actual, expected in zip(arg_info, members, strict=True)])
+ assert all([actual.size == expected.size for actual, expected in zip(arg_info, members, strict=True)])
def test_num_args_error_handling(deinit_all_contexts_function, cuda12_4_prerequisite_check):
diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py
index d5195ed872..13f82b98f6 100644
--- a/cuda_core/tests/test_system.py
+++ b/cuda_core/tests/test_system.py
@@ -35,5 +35,5 @@ def test_devices():
expected_num_devices = handle_return(runtime.cudaGetDeviceCount())
expected_devices = tuple(Device(device_id) for device_id in range(expected_num_devices))
assert len(devices) == len(expected_devices), "Number of devices does not match expected value"
- for device, expected_device in zip(devices, expected_devices):
+ for device, expected_device in zip(devices, expected_devices, strict=True):
assert device.device_id == expected_device.device_id, "Device ID does not match expected value"
diff --git a/ruff.toml b/ruff.toml
index 6312d3e9ef..f28ff3cb98 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -40,7 +40,6 @@ ignore = [
"S101", # asserts
"S311", # allow use of the random.* even though many are not cryptographically secure
"S404", # allow importing the subprocess module
- "B905", # preserve the default behavior of `zip` without the explicit `strict` argument
]
exclude = ["**/_version.py"]
@@ -52,6 +51,7 @@ exclude = ["**/_version.py"]
"cuda_bindings/examples/**" = [
"E722",
"E501", # line too long
+ "B905", # preserve the default behavior of `zip` without the explicit `strict` argument
]
"cuda_bindings/tests/**" = [
From 1c3e7e3e6c8d6632d323b90d43e3dbe57c5a5f4b Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:23:49 -0400
Subject: [PATCH 4/9] chore: bump python in `cuda_python_test_helpers`
---
cuda_python_test_helpers/pyproject.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cuda_python_test_helpers/pyproject.toml b/cuda_python_test_helpers/pyproject.toml
index 85652b61c5..4709c05a07 100644
--- a/cuda_python_test_helpers/pyproject.toml
+++ b/cuda_python_test_helpers/pyproject.toml
@@ -12,7 +12,7 @@ description = "Shared test helpers for CUDA Python projects"
readme = {file = "README.md", content-type = "text/markdown"}
authors = [{ name = "NVIDIA Corporation" }]
license = "Apache-2.0"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Operating System :: POSIX :: Linux",
From 9e46f1f7d125060ab898a3b6256f7d637b7a531c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:45:42 -0400
Subject: [PATCH 5/9] refactor: modernize dict/set in toolshed
---
toolshed/make_site_packages_libdirs.py | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py
index 00a495a095..eba6c68234 100755
--- a/toolshed/make_site_packages_libdirs.py
+++ b/toolshed/make_site_packages_libdirs.py
@@ -8,7 +8,6 @@
import argparse
import os
import re
-from typing import Dict, Set
_SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/")
@@ -19,7 +18,7 @@ def strip_site_packages_prefix(p: str) -> str:
return _SITE_PACKAGES_RE.sub("", p)
-def parse_lines_linux(lines) -> Dict[str, Set[str]]:
+def parse_lines_linux(lines) -> dict[str, set[str]]:
d = {} # name -> set of dirs
for raw in lines:
line = raw.strip()
@@ -53,9 +52,9 @@ def extract_libname_from_dll(fname: str) -> str | None:
return name or None
-def parse_lines_windows(lines) -> Dict[str, Set[str]]:
+def parse_lines_windows(lines) -> dict[str, set[str]]:
"""Collect {libname: set(dirnames)} with deduped directories."""
- m: Dict[str, Set[str]] = {}
+ m: dict[str, set[str]] = {}
for raw in lines:
line = raw.strip()
if not line or line.startswith("#"):
@@ -69,7 +68,7 @@ def parse_lines_windows(lines) -> Dict[str, Set[str]]:
return m
-def dict_literal(d: Dict[str, Set[str]]) -> str:
+def dict_literal(d: dict[str, set[str]]) -> str:
"""Pretty, stable dict literal with tuple values (singletons keep trailing comma)."""
lines = ["{"]
for k in sorted(d):
From a7a1f3f30d014a6460d2a5c71fb91bbaa0b910c9 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:53:55 -0400
Subject: [PATCH 6/9] refactor: modernize unions
---
.../cuda/core/experimental/_launch_config.py | 13 +++---
cuda_core/cuda/core/experimental/_linker.py | 16 +++----
cuda_core/cuda/core/experimental/_module.py | 31 +++++++-------
cuda_core/cuda/core/experimental/_program.py | 42 +++++++++----------
.../cuda_python_test_helpers/__init__.py | 3 +-
5 files changed, 50 insertions(+), 55 deletions(-)
diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py
index c1e08da58d..bd76dae286 100644
--- a/cuda_core/cuda/core/experimental/_launch_config.py
+++ b/cuda_core/cuda/core/experimental/_launch_config.py
@@ -3,7 +3,6 @@
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
-from typing import Union
from cuda.core.experimental._device import Device
from cuda.core.experimental._utils.cuda_utils import (
@@ -45,15 +44,15 @@ class LaunchConfig:
Attributes
----------
- grid : Union[tuple, int]
+ grid : tuple | int
Collection of threads that will execute a kernel function. When cluster
is not specified, this represents the number of blocks, otherwise
this represents the number of clusters.
- cluster : Union[tuple, int]
+ cluster : tuple | int
Group of blocks (Thread Block Cluster) that will execute on the same
GPU Processing Cluster (GPC). Blocks within a cluster have access to
distributed shared memory and can be explicitly synchronized.
- block : Union[tuple, int]
+ block : tuple | int
Group of threads (Thread Block) that will execute on the same
streaming multiprocessor (SM). Threads within a thread blocks have
access to shared memory and can be explicitly synchronized.
@@ -65,9 +64,9 @@ class LaunchConfig:
"""
# TODO: expand LaunchConfig to include other attributes
- grid: Union[tuple, int] = None
- cluster: Union[tuple, int] = None
- block: Union[tuple, int] = None
+ grid: tuple | int = None
+ cluster: tuple | int = None
+ block: tuple | int = None
shmem_size: int | None = None
cooperative_launch: bool | None = False
diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index 5c54a88c8c..04c59c9d2d 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -9,7 +9,7 @@
import weakref
from contextlib import contextmanager
from dataclasses import dataclass
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING
from warnings import warn
if TYPE_CHECKING:
@@ -154,14 +154,14 @@ class LinkerOptions:
fma : bool, optional
Use fast multiply-add.
Default: True.
- kernels_used : [Union[str, tuple[str], list[str]]], optional
+ kernels_used : str | tuple[str] | list[str], optional
Pass a kernel or sequence of kernels that are used; any not in the list can be removed.
- variables_used : [Union[str, tuple[str], list[str]]], optional
+ variables_used : str | tuple[str] | list[str], optional
Pass a variable or sequence of variables that are used; any not in the list can be removed.
optimize_unused_variables : bool, optional
Assume that if a variable is not referenced in device code, it can be removed.
Default: False.
- ptxas_options : [Union[str, tuple[str], list[str]]], optional
+ ptxas_options : str | tuple[str] | list[str], optional
Pass options to PTXAS.
split_compile : int, optional
Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split
@@ -191,10 +191,10 @@ class LinkerOptions:
prec_div: bool | None = None
prec_sqrt: bool | None = None
fma: bool | None = None
- kernels_used: Union[str, tuple[str], list[str]] | None = None
- variables_used: Union[str, tuple[str], list[str]] | None = None
+ kernels_used: str | tuple[str] | list[str] | None = None
+ variables_used: str | tuple[str] | list[str] | None = None
optimize_unused_variables: bool | None = None
- ptxas_options: Union[str, tuple[str], list[str]] | None = None
+ ptxas_options: str | tuple[str] | list[str] | None = None
split_compile: int | None = None
split_compile_extended: int | None = None
no_cache: bool | None = None
@@ -350,7 +350,7 @@ def _exception_manager(self):
nvJitLinkHandleT = int
-LinkerHandleT = Union[nvJitLinkHandleT, "cuda.bindings.driver.CUlinkState"]
+LinkerHandleT = nvJitLinkHandleT | cuda.bindings.driver.CUlinkState
class Linker:
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index f8ce8f95d0..dcb5d06f5b 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -4,7 +4,6 @@
import weakref
from collections import namedtuple
-from typing import Union
from warnings import warn
from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
@@ -239,7 +238,7 @@ def max_active_blocks_per_multiprocessor(self, block_size: int, dynamic_shared_m
)
def max_potential_block_size(
- self, dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize], block_size_limit: int
+ self, dynamic_shared_memory_needed: int | driver.CUoccupancyB2DSize, block_size_limit: int
) -> MaxPotentialBlockSizeOccupancyResult:
"""MaxPotentialBlockSizeOccupancyResult: Suggested launch configuration for reasonable occupancy.
@@ -248,7 +247,7 @@ def max_potential_block_size(
Parameters
----------
- dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize]
+ dynamic_shared_memory_needed: int | driver.CUoccupancyB2DSize
The amount of dynamic shared memory in bytes needed by block.
Use `0` if block does not need shared memory. Use C-callable
represented by :obj:`~driver.CUoccupancyB2DSize` to encode
@@ -437,7 +436,7 @@ def occupancy(self) -> KernelOccupancy:
# TODO: implement from_handle()
-CodeTypeT = Union[bytes, bytearray, str]
+CodeTypeT = bytes | bytearray | str
class ObjectCode:
@@ -496,12 +495,12 @@ def __reduce__(self):
return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)
@staticmethod
- def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+ def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing cubin.
Parameters
----------
- module : Union[bytes, str]
+ module : bytes | str
Either a bytes object containing the in-memory cubin to load, or
a file path string pointing to the on-disk cubin to load.
name : Optional[str]
@@ -514,12 +513,12 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic
return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+ def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing PTX.
Parameters
----------
- module : Union[bytes, str]
+ module : bytes | str
Either a bytes object containing the in-memory ptx code to load, or
a file path string pointing to the on-disk ptx file to load.
name : Optional[str]
@@ -532,12 +531,12 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict
return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+ def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing LTOIR.
Parameters
----------
- module : Union[bytes, str]
+ module : bytes, str
Either a bytes object containing the in-memory ltoir code to load, or
a file path string pointing to the on-disk ltoir file to load.
name : Optional[str]
@@ -550,12 +549,12 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic
return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+ def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing fatbin.
Parameters
----------
- module : Union[bytes, str]
+ module : bytes| str
Either a bytes object containing the in-memory fatbin to load, or
a file path string pointing to the on-disk fatbin to load.
name : Optional[str]
@@ -568,12 +567,12 @@ def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: di
return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+ def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing object code.
Parameters
----------
- module : Union[bytes, str]
+ module : bytes | str
Either a bytes object containing the in-memory object code to load, or
a file path string pointing to the on-disk object code to load.
name : Optional[str]
@@ -586,12 +585,12 @@ def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: di
return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
@staticmethod
- def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+ def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing library.
Parameters
----------
- module : Union[bytes, str]
+ module : bytes | str
Either a bytes object containing the in-memory library to load, or
a file path string pointing to the on-disk library to load.
name : Optional[str]
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 1db453fed1..7ef24105b1 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -7,7 +7,7 @@
import weakref
from contextlib import contextmanager
from dataclasses import dataclass
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING
from warnings import warn
if TYPE_CHECKING:
@@ -115,7 +115,7 @@ def _process_define_macro_inner(formatted_options, macro):
def _process_define_macro(formatted_options, macro):
- union_type = "Union[str, tuple[str, str]]"
+ union_type = "str | tuple[str, str]"
if _process_define_macro_inner(formatted_options, macro):
return
if is_nested_sequence(macro):
@@ -154,7 +154,7 @@ class ProgramOptions:
Enable device code optimization. When specified along with ‘-G’, enables limited debug information generation
for optimized device code.
Default: None
- ptxas_options : Union[str, list[str]], optional
+ ptxas_options : str | list[str], optional
Specify one or more options directly to ptxas, the PTX optimizing assembler. Options should be strings.
For example ["-v", "-O2"].
Default: None
@@ -188,17 +188,17 @@ class ProgramOptions:
gen_opt_lto : bool, optional
Run the optimizer passes before generating the LTO IR.
Default: False
- define_macro : Union[str, tuple[str, str], list[Union[str, tuple[str, str]]]], optional
+ define_macro : str | tuple[str, str] | list[str | tuple[str, str]], optional
Predefine a macro. Can be either a string, in which case that macro will be set to 1, a 2 element tuple of
strings, in which case the first element is defined as the second, or a list of strings or tuples.
Default: None
- undefine_macro : Union[str, list[str]], optional
+ undefine_macro : str | list[str], optional
Cancel any previous definition of a macro, or list of macros.
Default: None
- include_path : Union[str, list[str]], optional
+ include_path : str | list[str], optional
Add the directory or directories to the list of directories to be searched for headers.
Default: None
- pre_include : Union[str, list[str]], optional
+ pre_include : str | list[str], optional
Preinclude one or more headers during preprocessing. Can be either a string or a list of strings.
Default: None
no_source_include : bool, optional
@@ -231,13 +231,13 @@ class ProgramOptions:
no_display_error_number : bool, optional
Disable the display of a diagnostic number for warning messages.
Default: False
- diag_error : Union[int, list[int]], optional
+ diag_error : int | list[int], optional
Emit error for a specified diagnostic message number or comma separated list of numbers.
Default: None
- diag_suppress : Union[int, list[int]], optional
+ diag_suppress : int | list[int], optional
Suppress a specified diagnostic message number or comma separated list of numbers.
Default: None
- diag_warn : Union[int, list[int]], optional
+ diag_warn : int | list[int], optional
Emit warning for a specified diagnostic message number or comma separated lis of numbers.
Default: None
brief_diagnostics : bool, optional
@@ -264,7 +264,7 @@ class ProgramOptions:
debug: bool | None = None
lineinfo: bool | None = None
device_code_optimize: bool | None = None
- ptxas_options: Union[str, list[str], tuple[str]] | None = None
+ ptxas_options: str | list[str] | tuple[str, ...] | None = None
max_register_count: int | None = None
ftz: bool | None = None
prec_sqrt: bool | None = None
@@ -274,12 +274,10 @@ class ProgramOptions:
extra_device_vectorization: bool | None = None
link_time_optimization: bool | None = None
gen_opt_lto: bool | None = None
- define_macro: (
- Union[str, tuple[str, str], list[Union[str, tuple[str, str]]], tuple[Union[str, tuple[str, str]]]] | None
- ) = None
- undefine_macro: Union[str, list[str], tuple[str]] | None = None
- include_path: Union[str, list[str], tuple[str]] | None = None
- pre_include: Union[str, list[str], tuple[str]] | None = None
+ define_macro: str | tuple[str, str] | list[str | tuple[str, str]] | tuple[str | tuple[str, str]] | None = None
+ undefine_macro: str | list[str] | tuple[str, ...] | None = None
+ include_path: str | list[str] | tuple[str, ...] | None = None
+ pre_include: str | list[str] | tuple[str, ...] | None = None
no_source_include: bool | None = None
std: str | None = None
builtin_move_forward: bool | None = None
@@ -290,9 +288,9 @@ class ProgramOptions:
device_int128: bool | None = None
optimization_info: str | None = None
no_display_error_number: bool | None = None
- diag_error: Union[int, list[int], tuple[int]] | None = None
- diag_suppress: Union[int, list[int], tuple[int]] | None = None
- diag_warn: Union[int, list[int], tuple[int]] | None = None
+ diag_error: int | list[int] | tuple[int] | None = None
+ diag_suppress: int | list[int] | tuple[int] | None = None
+ diag_warn: int | list[int] | tuple[int] | None = None
brief_diagnostics: bool | None = None
time: str | None = None
split_compile: int | None = None
@@ -428,7 +426,7 @@ def __repr__(self):
return str(self._formatted_options)
-ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT]
+ProgramHandleT = cuda.bindings.nvrtc.nvrtcProgram | LinkerHandleT
class Program:
@@ -574,7 +572,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
target_type : Any
String of the targeted compilation type.
Supported options are "ptx", "cubin" and "ltoir".
- name_expressions : Union[list, tuple], optional
+ name_expressions : list | tuple, optional
List of explicit name expressions to become accessible.
(Default to no expressions)
logs : Any, optional
diff --git a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py
index a661b4f1aa..e0b7261121 100644
--- a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py
+++ b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py
@@ -7,7 +7,6 @@
import platform
import sys
from contextlib import suppress
-from typing import Union
from cuda.core.experimental._utils.cuda_utils import handle_return
@@ -38,7 +37,7 @@ def _detect_wsl() -> bool:
@functools.cache
-def supports_ipc_mempool(device_id: Union[int, object]) -> bool:
+def supports_ipc_mempool(device_id: int | object) -> bool:
"""Return True if mempool IPC via POSIX file descriptor is supported.
Uses cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES)
From 593f9a1904af1092e93e2f34917018f30bbe32c7 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 16:17:27 -0400
Subject: [PATCH 7/9] chore: fix bot comments
---
cuda_core/cuda/core/experimental/_module.py | 26 ++++++++++----------
cuda_core/cuda/core/experimental/_program.py | 6 ++---
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index dcb5d06f5b..9654cb97d3 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -503,9 +503,9 @@ def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
module : bytes | str
Either a bytes object containing the in-memory cubin to load, or
a file path string pointing to the on-disk cubin to load.
- name : Optional[str]
+ name : str | None
A human-readable identifier representing this code object.
- symbol_mapping : Optional[dict]
+ symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
@@ -521,9 +521,9 @@ def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None
module : bytes | str
Either a bytes object containing the in-memory ptx code to load, or
a file path string pointing to the on-disk ptx file to load.
- name : Optional[str]
+ name : str | None
A human-readable identifier representing this code object.
- symbol_mapping : Optional[dict]
+ symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
@@ -539,9 +539,9 @@ def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
module : bytes, str
Either a bytes object containing the in-memory ltoir code to load, or
a file path string pointing to the on-disk ltoir file to load.
- name : Optional[str]
+ name : str | None
A human-readable identifier representing this code object.
- symbol_mapping : Optional[dict]
+ symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
@@ -554,12 +554,12 @@ def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
Parameters
----------
- module : bytes| str
+ module : bytes | str
Either a bytes object containing the in-memory fatbin to load, or
a file path string pointing to the on-disk fatbin to load.
- name : Optional[str]
+ name : str | None
A human-readable identifier representing this code object.
- symbol_mapping : Optional[dict]
+ symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
@@ -575,9 +575,9 @@ def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
module : bytes | str
Either a bytes object containing the in-memory object code to load, or
a file path string pointing to the on-disk object code to load.
- name : Optional[str]
+ name : str | None
A human-readable identifier representing this code object.
- symbol_mapping : Optional[dict]
+ symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
@@ -593,9 +593,9 @@ def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict |
module : bytes | str
Either a bytes object containing the in-memory library to load, or
a file path string pointing to the on-disk library to load.
- name : Optional[str]
+ name : str | None
A human-readable identifier representing this code object.
- symbol_mapping : Optional[dict]
+ symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 7ef24105b1..28c282ffea 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -288,9 +288,9 @@ class ProgramOptions:
device_int128: bool | None = None
optimization_info: str | None = None
no_display_error_number: bool | None = None
- diag_error: int | list[int] | tuple[int] | None = None
- diag_suppress: int | list[int] | tuple[int] | None = None
- diag_warn: int | list[int] | tuple[int] | None = None
+ diag_error: int | list[int] | tuple[int, ...] | None = None
+ diag_suppress: int | list[int] | tuple[int, ...] | None = None
+ diag_warn: int | list[int] | tuple[int, ...] | None = None
brief_diagnostics: bool | None = None
time: str | None = None
split_compile: int | None = None
From 041d824a2ab7d437fa0da029b59267b033483ad5 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 16:21:24 -0400
Subject: [PATCH 8/9] chore: fix bot comments
---
cuda_core/cuda/core/experimental/_module.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 9654cb97d3..18a9537ced 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -503,7 +503,7 @@ def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
module : bytes | str
Either a bytes object containing the in-memory cubin to load, or
a file path string pointing to the on-disk cubin to load.
- name : str | None
+ name : str
A human-readable identifier representing this code object.
symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
@@ -521,7 +521,7 @@ def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None
module : bytes | str
Either a bytes object containing the in-memory ptx code to load, or
a file path string pointing to the on-disk ptx file to load.
- name : str | None
+ name : str
A human-readable identifier representing this code object.
symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
@@ -539,7 +539,7 @@ def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
module : bytes, str
Either a bytes object containing the in-memory ltoir code to load, or
a file path string pointing to the on-disk ltoir file to load.
- name : str | None
+ name : str
A human-readable identifier representing this code object.
symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
@@ -557,7 +557,7 @@ def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
module : bytes | str
Either a bytes object containing the in-memory fatbin to load, or
a file path string pointing to the on-disk fatbin to load.
- name : str | None
+ name : str
A human-readable identifier representing this code object.
symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
@@ -575,7 +575,7 @@ def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
module : bytes | str
Either a bytes object containing the in-memory object code to load, or
a file path string pointing to the on-disk object code to load.
- name : str | None
+ name : str
A human-readable identifier representing this code object.
symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
@@ -593,7 +593,7 @@ def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict |
module : bytes | str
Either a bytes object containing the in-memory library to load, or
a file path string pointing to the on-disk library to load.
- name : str | None
+ name : str
A human-readable identifier representing this code object.
symbol_mapping : dict | None
A dictionary specifying how the unmangled symbol names (as keys)
From 77cc0fcde61a7c2bfece90c022c069f25cd59f0a Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Thu, 30 Oct 2025 13:40:25 -0400
Subject: [PATCH 9/9] chore: fix optionals
---
.../cuda/pathfinder/_headers/find_nvidia_headers.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
index d770e99214..63f8a627fd 100644
--- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
+++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
@@ -21,7 +21,7 @@ def _joined_isfile(dirpath: str, basename: str) -> bool:
return os.path.isfile(os.path.join(dirpath, basename))
-def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]:
+def _find_under_site_packages(sub_dir: str, h_basename: str) -> str | None:
# Installed from a wheel
hdr_dir: str # help mypy
for hdr_dir in find_sub_dirs_all_sitepackages(tuple(sub_dir.split("/"))):
@@ -52,7 +52,7 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str)
return None
-def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> Optional[str]:
+def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> str | None:
conda_prefix = os.environ.get("CONDA_PREFIX")
if not conda_prefix:
return None
@@ -134,7 +134,7 @@ def find_nvidia_header_directory(libname: str) -> str | None:
raise RuntimeError(f"UNKNOWN {libname=}")
candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname, [])
- hdr_dir: Optional[str] # help mypy
+ hdr_dir: str | None # help mypy
for cdir in candidate_dirs:
if hdr_dir := _find_under_site_packages(cdir, h_basename):
return _abs_norm(hdr_dir)