From 7aa41b286619859bad2a2fbbb8b1b1099b41faca Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:03:08 -0400 Subject: [PATCH 1/9] Reapply "refactor(python): drop support for 3.9, document 3.14 support (#1069)" (#1109) This reverts commit fcd7b99e2ea81343a3477fbcadec8559f2f4ec4b. --- .github/workflows/build-wheel.yml | 1 - CONTRIBUTING.md | 2 +- ci/test-matrix.json | 14 ---------- cuda_bindings/docs/source/install.rst | 2 +- cuda_bindings/docs/source/support.rst | 2 +- cuda_bindings/pyproject.toml | 3 ++- cuda_core/cuda/core/experimental/__init__.py | 11 -------- cuda_core/cuda/core/experimental/_device.pyx | 10 +++---- .../cuda/core/experimental/_launch_config.py | 6 ++--- cuda_core/cuda/core/experimental/_linker.py | 2 +- cuda_core/cuda/core/experimental/_module.py | 26 +++++++----------- cuda_core/cuda/core/experimental/_program.py | 2 +- cuda_core/docs/source/install.rst | 3 ++- cuda_core/pyproject.toml | 4 +-- .../_dynamic_libs/find_nvidia_dynamic_lib.py | 27 +++++++++---------- .../_dynamic_libs/load_dl_common.py | 4 +-- .../pathfinder/_dynamic_libs/load_dl_linux.py | 12 ++++----- .../_dynamic_libs/load_dl_windows.py | 10 +++---- .../_headers/find_nvidia_headers.py | 9 +++---- .../cuda/pathfinder/_utils/env_vars.py | 3 +-- .../_utils/find_site_packages_dll.py | 7 ++++- .../_utils/find_site_packages_so.py | 6 ++++- cuda_pathfinder/pyproject.toml | 4 +-- .../tests/spawned_process_runner.py | 10 +++---- cuda_python/pyproject.toml | 4 ++- ruff.toml | 3 ++- toolshed/make_site_packages_libdirs.py | 4 +-- 27 files changed, 83 insertions(+), 108 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 57a8581f15..6f683d3aed 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -28,7 +28,6 @@ jobs: fail-fast: false matrix: python-version: - - "3.9" - "3.10" - "3.11" - "3.12" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 183d215865..67bd568d85 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -113,7 +113,7 @@ flowchart TD B2["linux-aarch64
(Self-hosted)"] B3["win-64
(GitHub-hosted)"] end - BUILD_DETAILS["• Python versions: 3.9, 3.10, 3.11, 3.12, 3.13
• CUDA version: 13.0.0 (build-time)
• Components: cuda-core, cuda-bindings,
cuda-pathfinder, cuda-python"] + BUILD_DETAILS["• Python versions: 3.10, 3.11, 3.12, 3.13, 3.14
• CUDA version: 13.0.0 (build-time)
• Components: cuda-core, cuda-bindings,
cuda-pathfinder, cuda-python"] end %% Artifact Storage diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 4b60779ec9..a8084442dc 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -4,8 +4,6 @@ "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1", "linux": { "pull-request": [ - { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, @@ -16,8 +14,6 @@ { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, @@ -30,11 +26,6 @@ { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" } ], "nightly": [ - { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, - { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, @@ -55,11 +46,6 @@ { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, - { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, diff --git a/cuda_bindings/docs/source/install.rst b/cuda_bindings/docs/source/install.rst index 9836d172ac..58a6a0f31c 100644 --- a/cuda_bindings/docs/source/install.rst +++ b/cuda_bindings/docs/source/install.rst @@ -10,7 +10,7 @@ Runtime Requirements ``cuda.bindings`` supports the same platforms as CUDA. Runtime dependencies are: * Linux (x86-64, arm64) and Windows (x86-64) -* Python 3.9 - 3.14 +* Python 3.10 - 3.14 * Driver: Linux (580.65.06 or later) Windows (580.88 or later) * Optionally, NVRTC, nvJitLink, NVVM, and cuFile from CUDA Toolkit 13.x diff --git a/cuda_bindings/docs/source/support.rst b/cuda_bindings/docs/source/support.rst index a34a5c49e2..4439d963c0 100644 --- a/cuda_bindings/docs/source/support.rst +++ b/cuda_bindings/docs/source/support.rst @@ -19,7 +19,7 @@ The ``cuda.bindings`` module has the following support policy: depends on the underlying driver and the Toolkit versions, as described in the compatibility documentation.) 4. The module supports all Python versions following the `CPython EOL schedule`_. As of writing - Python 3.9 - 3.13 are supported. + Python 3.10 - 3.14 are supported. 5. The module exposes a Cython layer from which types and functions could be ``cimport``'d. While we strive to keep this layer stable, due to Cython limitations a new *minor* release of this module could require Cython layer users to rebuild their projects and update their pinning to diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index bc0dbf1977..7523682591 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -9,16 +9,17 @@ name = "cuda-bindings" description = "Python bindings for CUDA" authors = [{name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com"},] license = "LicenseRef-NVIDIA-SOFTWARE-LICENSE" +requires-python = ">=3.10" classifiers = [ "Intended Audience :: Developers", "Topic :: Database", "Topic :: Scientific/Engineering", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Environment :: GPU :: NVIDIA CUDA", ] dynamic = [ diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 94fb0aa083..8a60c031c5 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -26,17 +26,6 @@ finally: del cuda.bindings, importlib, subdir, cuda_major, cuda_minor -import sys # noqa: E402 -import warnings # noqa: E402 - -if sys.version_info < (3, 10): - warnings.warn( - "support for Python 3.9 and below is deprecated and subject to future removal", - category=FutureWarning, - stacklevel=1, - ) -del sys, warnings - from cuda.core.experimental import utils # noqa: E402 from cuda.core.experimental._device import Device # noqa: E402 from cuda.core.experimental._event import Event, EventOptions # noqa: E402 diff --git a/cuda_core/cuda/core/experimental/_device.pyx b/cuda_core/cuda/core/experimental/_device.pyx index 1db2adbf8d..d800a3c172 100644 --- a/cuda_core/cuda/core/experimental/_device.pyx +++ b/cuda_core/cuda/core/experimental/_device.pyx @@ -10,7 +10,7 @@ from cuda.bindings cimport cydriver from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN import threading -from typing import Optional, Union +from typing import Union from cuda.core.experimental._context import Context, ContextOptions from cuda.core.experimental._event import Event, EventOptions @@ -951,7 +951,7 @@ class Device: """ __slots__ = ("_id", "_mr", "_has_inited", "_properties") - def __new__(cls, device_id: Optional[int] = None): + def __new__(cls, device_id: int | None = None): global _is_cuInit if _is_cuInit is False: with _lock, nogil: @@ -1223,7 +1223,7 @@ class Device: """ raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189") - def create_stream(self, obj: Optional[IsStreamT] = None, options: Optional[StreamOptions] = None) -> Stream: + def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream: """Create a Stream object. New stream objects can be created in two different ways: @@ -1254,7 +1254,7 @@ class Device: self._check_context_initialized() return Stream._init(obj=obj, options=options, device_id=self._id) - def create_event(self, options: Optional[EventOptions] = None) -> Event: + def create_event(self, options: EventOptions | None = None) -> Event: """Create an Event object without recording it to a Stream. Note @@ -1276,7 +1276,7 @@ class Device: ctx = self._get_current_context() return Event._init(self._id, ctx, options, True) - def allocate(self, size, stream: Optional[Stream] = None) -> Buffer: + def allocate(self, size, stream: Stream | None = None) -> Buffer: """Allocate device memory from a specified stream. Allocates device memory of `size` bytes on the specified `stream` diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py index d82e0ec3a2..c1e08da58d 100644 --- a/cuda_core/cuda/core/experimental/_launch_config.py +++ b/cuda_core/cuda/core/experimental/_launch_config.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from dataclasses import dataclass -from typing import Optional, Union +from typing import Union from cuda.core.experimental._device import Device from cuda.core.experimental._utils.cuda_utils import ( @@ -68,8 +68,8 @@ class LaunchConfig: grid: Union[tuple, int] = None cluster: Union[tuple, int] = None block: Union[tuple, int] = None - shmem_size: Optional[int] = None - cooperative_launch: Optional[bool] = False + shmem_size: int | None = None + cooperative_launch: bool | None = False def __post_init__(self): _lazy_init() diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index a3fa4b3e48..5c54a88c8c 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -343,7 +343,7 @@ def _exception_manager(self): # our constructor could raise, in which case there's no handle available error_log = self.get_error_log() # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but - # unfortunately we are still supporting Python 3.9/3.10... + # unfortunately we are still supporting Python 3.10... # Here we rely on both CUDAError and nvJitLinkError have the error string placed in .args[0]. e.args = (e.args[0] + (f"\nLinker error log: {error_log}" if error_log else ""), *e.args[1:]) raise e diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 2c7ea3a156..f8ce8f95d0 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -4,7 +4,7 @@ import weakref from collections import namedtuple -from typing import Optional, Union +from typing import Union from warnings import warn from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config @@ -310,7 +310,7 @@ def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocesso driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size) ) - def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int: + def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int: """Maximum potential cluster size. The maximum potential cluster size for this kernel and given launch configuration. @@ -332,7 +332,7 @@ def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stre drv_cfg.hStream = stream.handle return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg)) - def max_active_clusters(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int: + def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int: """Maximum number of active clusters on the target device. The maximum number of clusters that could concurrently execute on the target device. @@ -469,7 +469,7 @@ def __new__(self, *args, **kwargs): ) @classmethod - def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None): + def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None): self = super().__new__(cls) assert code_type in self._supported_code_type, f"{code_type=} is not supported" _lazy_init() @@ -496,7 +496,7 @@ def __reduce__(self): return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map) @staticmethod - def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing cubin. Parameters @@ -514,7 +514,7 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing PTX. Parameters @@ -532,7 +532,7 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optio return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing LTOIR. Parameters @@ -550,9 +550,7 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_fatbin( - module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None - ) -> "ObjectCode": + def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing fatbin. Parameters @@ -570,9 +568,7 @@ def from_fatbin( return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_object( - module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None - ) -> "ObjectCode": + def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing object code. Parameters @@ -590,9 +586,7 @@ def from_object( return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_library( - module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None - ) -> "ObjectCode": + def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing library. Parameters diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index dee6f001e7..1db453fed1 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -49,7 +49,7 @@ def _nvvm_exception_manager(self): except Exception: error_log = "" # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but - # unfortunately we are still supporting Python 3.9/3.10... + # unfortunately we are still supporting Python 3.10... e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:]) raise e diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst index cddde851d5..7100dade3b 100644 --- a/cuda_core/docs/source/install.rst +++ b/cuda_core/docs/source/install.rst @@ -26,7 +26,7 @@ dependencies are as follows: .. [#f1] Including ``cuda-python``. -``cuda.core`` supports Python 3.9 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided. +``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided. Free-threading Build Support @@ -42,6 +42,7 @@ As of cuda-core 0.4.0, **experimental** packages for the `free-threaded interpre .. _built-in modules that are known to be thread-unsafe: https://github.com/python/cpython/issues/116738 .. _free-threaded interpreter: https://docs.python.org/3/howto/free-threading-python.html +``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). Installing from PyPI -------------------- diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index 5f12f6f120..31ceb5b1a5 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -14,7 +14,7 @@ dynamic = [ "version", "readme", ] -requires-python = '>=3.9' +requires-python = '>=3.10' description = "cuda.core: (experimental) pythonic CUDA module" authors = [ { name = "NVIDIA Corporation" } @@ -32,11 +32,11 @@ classifiers = [ "Topic :: Scientific/Engineering", "Topic :: Software Development :: Libraries", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", "Environment :: GPU :: NVIDIA CUDA", "Environment :: GPU :: NVIDIA CUDA :: 12", diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py index 75ebec3a85..65c9f4bf3c 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py @@ -4,7 +4,6 @@ import glob import os from collections.abc import Sequence -from typing import Optional from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( @@ -29,7 +28,7 @@ def _no_such_file_in_sub_dirs( def _find_so_using_nvidia_lib_dirs( libname: str, so_basename: str, error_messages: list[str], attachments: list[str] -) -> Optional[str]: +) -> str | None: rel_dirs = SITE_PACKAGES_LIBDIRS_LINUX.get(libname) if rel_dirs is not None: sub_dirs_searched = [] @@ -52,7 +51,7 @@ def _find_so_using_nvidia_lib_dirs( return None -def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]: +def _find_dll_under_dir(dirpath: str, file_wild: str) -> str | None: for path in sorted(glob.glob(os.path.join(dirpath, file_wild))): if not os.path.isfile(path): continue @@ -63,7 +62,7 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]: def _find_dll_using_nvidia_bin_dirs( libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str] -) -> Optional[str]: +) -> str | None: rel_dirs = SITE_PACKAGES_LIBDIRS_WINDOWS.get(libname) if rel_dirs is not None: sub_dirs_searched = [] @@ -79,7 +78,7 @@ def _find_dll_using_nvidia_bin_dirs( return None -def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> Optional[str]: +def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> str | None: # Resolve paths for the four cases: # Windows/Linux x nvvm yes/no if IS_WINDOWS: @@ -107,14 +106,14 @@ def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_ return None -def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]: +def _find_lib_dir_using_cuda_home(libname: str) -> str | None: cuda_home = get_cuda_home_or_path() if cuda_home is None: return None return _find_lib_dir_using_anchor_point(libname, anchor_point=cuda_home, linux_lib_dir="lib64") -def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]: +def _find_lib_dir_using_conda_prefix(libname: str) -> str | None: conda_prefix = os.environ.get("CONDA_PREFIX") if not conda_prefix: return None @@ -125,7 +124,7 @@ def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]: def _find_so_using_lib_dir( lib_dir: str, so_basename: str, error_messages: list[str], attachments: list[str] -) -> Optional[str]: +) -> str | None: so_name = os.path.join(lib_dir, so_basename) if os.path.isfile(so_name): return so_name @@ -141,7 +140,7 @@ def _find_so_using_lib_dir( def _find_dll_using_lib_dir( lib_dir: str, libname: str, error_messages: list[str], attachments: list[str] -) -> Optional[str]: +) -> str | None: file_wild = libname + "*.dll" dll_name = _find_dll_under_dir(lib_dir, file_wild) if dll_name is not None: @@ -162,9 +161,9 @@ def __init__(self, libname: str): self.lib_searched_for = f"lib{libname}.so" self.error_messages: list[str] = [] self.attachments: list[str] = [] - self.abs_path: Optional[str] = None + self.abs_path: str | None = None - def try_site_packages(self) -> Optional[str]: + def try_site_packages(self) -> str | None: if IS_WINDOWS: return _find_dll_using_nvidia_bin_dirs( self.libname, @@ -180,13 +179,13 @@ def try_site_packages(self) -> Optional[str]: self.attachments, ) - def try_with_conda_prefix(self) -> Optional[str]: + def try_with_conda_prefix(self) -> str | None: return self._find_using_lib_dir(_find_lib_dir_using_conda_prefix(self.libname)) - def try_with_cuda_home(self) -> Optional[str]: + def try_with_cuda_home(self) -> str | None: return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname)) - def _find_using_lib_dir(self, lib_dir: Optional[str]) -> Optional[str]: + def _find_using_lib_dir(self, lib_dir: str | None) -> str | None: if lib_dir is None: return None if IS_WINDOWS: diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py index 2e6c9eb17c..91e6284a00 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py @@ -1,8 +1,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +from collections.abc import Callable from dataclasses import dataclass -from typing import Callable, Optional from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import DIRECT_DEPENDENCIES @@ -13,7 +13,7 @@ class DynamicLibNotFoundError(RuntimeError): @dataclass class LoadedDL: - abs_path: Optional[str] + abs_path: str | None was_already_loaded_from_elsewhere: bool _handle_uint: int # Platform-agnostic unsigned pointer value found_via: str diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py index 040e24705e..4d2bae5b90 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py @@ -5,7 +5,7 @@ import ctypes import ctypes.util import os -from typing import Optional, cast +from typing import cast from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( @@ -76,8 +76,8 @@ class _LinkMapLNameView(ctypes.Structure): assert _LinkMapLNameView.l_name.offset == ctypes.sizeof(ctypes.c_void_p) -def _dl_last_error() -> Optional[str]: - msg_bytes = cast(Optional[bytes], LIBDL.dlerror()) +def _dl_last_error() -> str | None: + msg_bytes = cast(bytes | None, LIBDL.dlerror()) if not msg_bytes: return None # no pending error # Never raises; undecodable bytes are mapped to U+DC80..U+DCFF @@ -131,7 +131,7 @@ def get_candidate_sonames(libname: str) -> list[str]: return candidate_sonames -def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> Optional[LoadedDL]: +def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> LoadedDL | None: for soname in get_candidate_sonames(libname): try: handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) @@ -151,7 +151,7 @@ def _load_lib(libname: str, filename: str) -> ctypes.CDLL: return ctypes.CDLL(filename, cdll_mode) -def load_with_system_search(libname: str) -> Optional[LoadedDL]: +def load_with_system_search(libname: str) -> LoadedDL | None: """Try to load a library using system search paths. Args: @@ -195,7 +195,7 @@ def _work_around_known_bugs(libname: str, found_path: str) -> None: ctypes.CDLL(dep_path, CDLL_MODE) -def load_with_abs_path(libname: str, found_path: str, found_via: Optional[str] = None) -> LoadedDL: +def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL: """Load a dynamic library from the given path. Args: diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py index d8ac53fe8a..b9f15ea50b 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py @@ -5,7 +5,6 @@ import ctypes.wintypes import os import struct -from typing import Optional from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( @@ -100,10 +99,7 @@ def abs_path_for_dynamic_library(libname: str, handle: ctypes.wintypes.HMODULE) return buffer.value -def check_if_already_loaded_from_elsewhere( - libname: str, - have_abs_path: bool, -) -> Optional[LoadedDL]: +def check_if_already_loaded_from_elsewhere(libname: str, have_abs_path: bool) -> LoadedDL | None: for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): handle = kernel32.GetModuleHandleW(dll_name) if handle: @@ -117,7 +113,7 @@ def check_if_already_loaded_from_elsewhere( return None -def load_with_system_search(libname: str) -> Optional[LoadedDL]: +def load_with_system_search(libname: str) -> LoadedDL | None: """Try to load a DLL using system search paths. Args: @@ -136,7 +132,7 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]: return None -def load_with_abs_path(libname: str, found_path: str, found_via: Optional[str] = None) -> LoadedDL: +def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL: """Load a dynamic library from the given path. Args: diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index b141700ab7..d770e99214 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -4,7 +4,6 @@ import functools import glob import os -from typing import Optional from cuda.pathfinder._headers import supported_nvidia_headers from cuda.pathfinder._utils.env_vars import get_cuda_home_or_path @@ -12,7 +11,7 @@ from cuda.pathfinder._utils.platform_aware import IS_WINDOWS -def _abs_norm(path: Optional[str]) -> Optional[str]: +def _abs_norm(path: str | None) -> str | None: if path: return os.path.normpath(os.path.abspath(path)) return None @@ -31,7 +30,7 @@ def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]: return None -def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> Optional[str]: +def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> str | None: parts = [anchor_point] if libname == "nvvm": parts.append(libname) @@ -77,7 +76,7 @@ def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) return _find_based_on_ctk_layout(libname, h_basename, anchor_point) -def _find_ctk_header_directory(libname: str) -> Optional[str]: +def _find_ctk_header_directory(libname: str) -> str | None: h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_CTK[libname] candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK[libname] @@ -97,7 +96,7 @@ def _find_ctk_header_directory(libname: str) -> Optional[str]: @functools.cache -def find_nvidia_header_directory(libname: str) -> Optional[str]: +def find_nvidia_header_directory(libname: str) -> str | None: """Locate the header directory for a supported NVIDIA library. Args: diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py index 3a7de992c0..cf78a627cb 100644 --- a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py +++ b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py @@ -3,7 +3,6 @@ import os import warnings -from typing import Optional def _paths_differ(a: str, b: str) -> bool: @@ -33,7 +32,7 @@ def _paths_differ(a: str, b: str) -> bool: return True -def get_cuda_home_or_path() -> Optional[str]: +def get_cuda_home_or_path() -> str | None: cuda_home = os.environ.get("CUDA_HOME") cuda_path = os.environ.get("CUDA_PATH") diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py index 2f5695093c..507355727f 100644 --- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py +++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py @@ -11,7 +11,12 @@ def find_all_dll_files_via_metadata() -> dict[str, tuple[str, ...]]: results: collections.defaultdict[str, list[str]] = collections.defaultdict(list) # sort dists for deterministic output - for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)): + + for dist in sorted( + importlib.metadata.distributions(), + # `get` exists before 3.12, even though the hints only exist for Python >=3.12 + key=lambda d: (d.metadata.get("Name", ""), d.version), # type: ignore[attr-defined] + ): files = dist.files if not files: continue diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py index 69e7eea3ad..33ee1f1bcf 100644 --- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py +++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py @@ -23,7 +23,11 @@ def find_all_so_files_via_metadata() -> dict[str, dict[str, tuple[str, ...]]]: ) # sort dists for deterministic output - for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)): + for dist in sorted( + importlib.metadata.distributions(), + # `get` exists before 3.12, even though the hints only exist for Python >=3.12 + key=lambda d: (d.metadata.get("Name", ""), d.version), # type: ignore[attr-defined] + ): files = dist.files if not files: continue diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml index 3db1aecbc8..9a6e40f8d3 100644 --- a/cuda_pathfinder/pyproject.toml +++ b/cuda_pathfinder/pyproject.toml @@ -6,7 +6,7 @@ name = "cuda-pathfinder" description = "Pathfinder for CUDA components" authors = [{ name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com" }] license = "Apache-2.0" -requires-python = ">=3.9" +requires-python = ">=3.10" dynamic = ["version", "readme"] dependencies = [] @@ -109,7 +109,7 @@ inline-quotes = "double" [tool.mypy] # Basic settings -python_version = "3.9" +python_version = "3.10" explicit_package_bases = true warn_return_any = true warn_unused_configs = true diff --git a/cuda_pathfinder/tests/spawned_process_runner.py b/cuda_pathfinder/tests/spawned_process_runner.py index 154178b2a2..f4440743f5 100644 --- a/cuda_pathfinder/tests/spawned_process_runner.py +++ b/cuda_pathfinder/tests/spawned_process_runner.py @@ -5,10 +5,10 @@ import queue # for Empty import sys import traceback -from collections.abc import Sequence +from collections.abc import Callable, Sequence from dataclasses import dataclass from io import StringIO -from typing import Any, Callable, Optional +from typing import Any PROCESS_KILLED = -9 PROCESS_NO_RESULT = -999 @@ -61,9 +61,9 @@ def __call__(self): def run_in_spawned_child_process( target: Callable[..., None], *, - args: Optional[Sequence[Any]] = None, - kwargs: Optional[dict[str, Any]] = None, - timeout: Optional[float] = None, + args: Sequence[Any] | None = None, + kwargs: dict[str, Any] | None = None, + timeout: float | None = None, rethrow: bool = False, ) -> CompletedProcess: """Run `target` in a spawned child process, capturing stdout/stderr. diff --git a/cuda_python/pyproject.toml b/cuda_python/pyproject.toml index fd6cacaf2a..9048f5818b 100644 --- a/cuda_python/pyproject.toml +++ b/cuda_python/pyproject.toml @@ -22,16 +22,18 @@ classifiers = [ "Intended Audience :: Science/Research", "Intended Audience :: End Users/Desktop", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", "Environment :: GPU :: NVIDIA CUDA", "Environment :: GPU :: NVIDIA CUDA :: 12", + "Environment :: GPU :: NVIDIA CUDA :: 13", ] dynamic = ["version", "dependencies", "optional-dependencies"] +requires-python = ">=3.10" [project.urls] homepage = "https://nvidia.github.io/cuda-python/" diff --git a/ruff.toml b/ruff.toml index 79c66e862c..6312d3e9ef 100644 --- a/ruff.toml +++ b/ruff.toml @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 line-length = 120 respect-gitignore = true -target-version = "py39" +target-version = "py310" [format] docstring-code-format = true @@ -40,6 +40,7 @@ ignore = [ "S101", # asserts "S311", # allow use of the random.* even though many are not cryptographically secure "S404", # allow importing the subprocess module + "B905", # preserve the default behavior of `zip` without the explicit `strict` argument ] exclude = ["**/_version.py"] diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py index d84d821700..00a495a095 100755 --- a/toolshed/make_site_packages_libdirs.py +++ b/toolshed/make_site_packages_libdirs.py @@ -8,7 +8,7 @@ import argparse import os import re -from typing import Dict, Optional, Set +from typing import Dict, Set _SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/") @@ -38,7 +38,7 @@ def parse_lines_linux(lines) -> Dict[str, Set[str]]: return d -def extract_libname_from_dll(fname: str) -> Optional[str]: +def extract_libname_from_dll(fname: str) -> str | None: """Return base libname per the heuristic, or None if not a .dll.""" base = os.path.basename(fname) if not base.lower().endswith(".dll"): From 9969317306466ae6956c9c76affeab0842dfe657 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:12:51 -0400 Subject: [PATCH 2/9] docs: remove duplicate information --- cuda_core/docs/source/install.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst index 7100dade3b..72ec710785 100644 --- a/cuda_core/docs/source/install.rst +++ b/cuda_core/docs/source/install.rst @@ -26,7 +26,7 @@ dependencies are as follows: .. [#f1] Including ``cuda-python``. -``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided. +``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.14 are also provided. Free-threading Build Support @@ -42,8 +42,6 @@ As of cuda-core 0.4.0, **experimental** packages for the `free-threaded interpre .. _built-in modules that are known to be thread-unsafe: https://github.com/python/cpython/issues/116738 .. _free-threaded interpreter: https://docs.python.org/3/howto/free-threading-python.html -``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). - Installing from PyPI -------------------- From 05faed3b84f7759059972354299db012b96a4cd7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:17:57 -0400 Subject: [PATCH 3/9] test: remove B095 ruff rule and add `zip(..., strict=True)` in test code --- cuda_bindings/setup.py | 2 +- cuda_bindings/tests/test_cuda.py | 4 ++-- cuda_bindings/tests/test_cufile.py | 2 +- cuda_bindings/tests/test_nvjitlink.py | 2 +- cuda_core/tests/test_module.py | 8 ++++---- cuda_core/tests/test_system.py | 2 +- ruff.toml | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index d89d0bccd9..c4138c11d0 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -125,7 +125,7 @@ def discoverMembers(self, memberDict, prefix, seen=None): next_seen = set(seen) next_seen.add(self._name) - for memberName, memberType in zip(self._member_names, self._member_types): + for memberName, memberType in zip(self._member_names, self._member_types, strict=True): if memberName: discovered.append(".".join([prefix, memberName])) diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index cd723941be..53bcc08f95 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -432,7 +432,7 @@ def test_cuda_pointer_attr(): # List version err, attr_value_list_v2 = cuda.cuPointerGetAttributes(len(attr_type_list), attr_type_list, ptr) assert err == cuda.CUresult.CUDA_SUCCESS - for attr1, attr2 in zip(attr_value_list, attr_value_list_v2): + for attr1, attr2 in zip(attr_value_list, attr_value_list_v2, strict=True): assert str(attr1) == str(attr2) # Test setting values @@ -512,7 +512,7 @@ def test_cuda_mem_range_attr(): attr_type_size_list, attr_type_list, len(attr_type_list), ptr, size ) assert err == cuda.CUresult.CUDA_SUCCESS - for attr1, attr2 in zip(attr_value_list, attr_value_list_v2): + for attr1, attr2 in zip(attr_value_list, attr_value_list_v2, strict=True): assert str(attr1) == str(attr2) (err,) = cuda.cuMemFree(ptr) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 3716e2bec5..446cfdc83d 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -318,7 +318,7 @@ def test_buf_register_multiple_buffers(): try: # Register all buffers flags = 0 - for buf_ptr, size in zip(buffers, buffer_sizes): + for buf_ptr, size in zip(buffers, buffer_sizes, strict=True): buf_ptr_int = int(buf_ptr) cufile.buf_register(buf_ptr_int, size, flags) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index 3bfeb8d35a..85c6058010 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -34,7 +34,7 @@ def _build_arch_ptx_parametrized_callable(): - av = tuple(zip(ARCHITECTURES, PTX_VERSIONS)) + av = tuple(zip(ARCHITECTURES, PTX_VERSIONS, strict=True)) return pytest.mark.parametrize( ("arch", "ptx_bytes"), [(a, (PTX_HEADER.format(VERSION=v, ARCH=a) + PTX_KERNEL).encode("utf-8")) for a, v in av], diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 49df966c08..dffbc04209 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -248,9 +248,9 @@ class ExpectedStruct(ctypes.Structure): sizes = [p.size for p in arg_info] members = [getattr(ExpectedStruct, name) for name, _ in ExpectedStruct._fields_] expected_offsets = tuple(m.offset for m in members) - assert all(actual == expected for actual, expected in zip(offsets, expected_offsets)) + assert all(actual == expected for actual, expected in zip(offsets, expected_offsets, strict=True)) expected_sizes = tuple(m.size for m in members) - assert all(actual == expected for actual, expected in zip(sizes, expected_sizes)) + assert all(actual == expected for actual, expected in zip(sizes, expected_sizes, strict=True)) @pytest.mark.parametrize("nargs", [0, 1, 2, 3, 16]) @@ -274,8 +274,8 @@ class ExpectedStruct(ctypes.Structure): members = tuple(getattr(ExpectedStruct, f"arg_{i}") for i in range(nargs)) arg_info = krn.arguments_info - assert all([actual.offset == expected.offset for actual, expected in zip(arg_info, members)]) - assert all([actual.size == expected.size for actual, expected in zip(arg_info, members)]) + assert all([actual.offset == expected.offset for actual, expected in zip(arg_info, members, strict=True)]) + assert all([actual.size == expected.size for actual, expected in zip(arg_info, members, strict=True)]) def test_num_args_error_handling(deinit_all_contexts_function, cuda12_4_prerequisite_check): diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py index d5195ed872..13f82b98f6 100644 --- a/cuda_core/tests/test_system.py +++ b/cuda_core/tests/test_system.py @@ -35,5 +35,5 @@ def test_devices(): expected_num_devices = handle_return(runtime.cudaGetDeviceCount()) expected_devices = tuple(Device(device_id) for device_id in range(expected_num_devices)) assert len(devices) == len(expected_devices), "Number of devices does not match expected value" - for device, expected_device in zip(devices, expected_devices): + for device, expected_device in zip(devices, expected_devices, strict=True): assert device.device_id == expected_device.device_id, "Device ID does not match expected value" diff --git a/ruff.toml b/ruff.toml index 6312d3e9ef..f28ff3cb98 100644 --- a/ruff.toml +++ b/ruff.toml @@ -40,7 +40,6 @@ ignore = [ "S101", # asserts "S311", # allow use of the random.* even though many are not cryptographically secure "S404", # allow importing the subprocess module - "B905", # preserve the default behavior of `zip` without the explicit `strict` argument ] exclude = ["**/_version.py"] @@ -52,6 +51,7 @@ exclude = ["**/_version.py"] "cuda_bindings/examples/**" = [ "E722", "E501", # line too long + "B905", # preserve the default behavior of `zip` without the explicit `strict` argument ] "cuda_bindings/tests/**" = [ From 1c3e7e3e6c8d6632d323b90d43e3dbe57c5a5f4b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:23:49 -0400 Subject: [PATCH 4/9] chore: bump python in `cuda_python_test_helpers` --- cuda_python_test_helpers/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_python_test_helpers/pyproject.toml b/cuda_python_test_helpers/pyproject.toml index 85652b61c5..4709c05a07 100644 --- a/cuda_python_test_helpers/pyproject.toml +++ b/cuda_python_test_helpers/pyproject.toml @@ -12,7 +12,7 @@ description = "Shared test helpers for CUDA Python projects" readme = {file = "README.md", content-type = "text/markdown"} authors = [{ name = "NVIDIA Corporation" }] license = "Apache-2.0" -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3 :: Only", "Operating System :: POSIX :: Linux", From 9e46f1f7d125060ab898a3b6256f7d637b7a531c Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:45:42 -0400 Subject: [PATCH 5/9] refactor: modernize dict/set in toolshed --- toolshed/make_site_packages_libdirs.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py index 00a495a095..eba6c68234 100755 --- a/toolshed/make_site_packages_libdirs.py +++ b/toolshed/make_site_packages_libdirs.py @@ -8,7 +8,6 @@ import argparse import os import re -from typing import Dict, Set _SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/") @@ -19,7 +18,7 @@ def strip_site_packages_prefix(p: str) -> str: return _SITE_PACKAGES_RE.sub("", p) -def parse_lines_linux(lines) -> Dict[str, Set[str]]: +def parse_lines_linux(lines) -> dict[str, set[str]]: d = {} # name -> set of dirs for raw in lines: line = raw.strip() @@ -53,9 +52,9 @@ def extract_libname_from_dll(fname: str) -> str | None: return name or None -def parse_lines_windows(lines) -> Dict[str, Set[str]]: +def parse_lines_windows(lines) -> dict[str, set[str]]: """Collect {libname: set(dirnames)} with deduped directories.""" - m: Dict[str, Set[str]] = {} + m: dict[str, set[str]] = {} for raw in lines: line = raw.strip() if not line or line.startswith("#"): @@ -69,7 +68,7 @@ def parse_lines_windows(lines) -> Dict[str, Set[str]]: return m -def dict_literal(d: Dict[str, Set[str]]) -> str: +def dict_literal(d: dict[str, set[str]]) -> str: """Pretty, stable dict literal with tuple values (singletons keep trailing comma).""" lines = ["{"] for k in sorted(d): From a7a1f3f30d014a6460d2a5c71fb91bbaa0b910c9 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:53:55 -0400 Subject: [PATCH 6/9] refactor: modernize unions --- .../cuda/core/experimental/_launch_config.py | 13 +++--- cuda_core/cuda/core/experimental/_linker.py | 16 +++---- cuda_core/cuda/core/experimental/_module.py | 31 +++++++------- cuda_core/cuda/core/experimental/_program.py | 42 +++++++++---------- .../cuda_python_test_helpers/__init__.py | 3 +- 5 files changed, 50 insertions(+), 55 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py index c1e08da58d..bd76dae286 100644 --- a/cuda_core/cuda/core/experimental/_launch_config.py +++ b/cuda_core/cuda/core/experimental/_launch_config.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 from dataclasses import dataclass -from typing import Union from cuda.core.experimental._device import Device from cuda.core.experimental._utils.cuda_utils import ( @@ -45,15 +44,15 @@ class LaunchConfig: Attributes ---------- - grid : Union[tuple, int] + grid : tuple | int Collection of threads that will execute a kernel function. When cluster is not specified, this represents the number of blocks, otherwise this represents the number of clusters. - cluster : Union[tuple, int] + cluster : tuple | int Group of blocks (Thread Block Cluster) that will execute on the same GPU Processing Cluster (GPC). Blocks within a cluster have access to distributed shared memory and can be explicitly synchronized. - block : Union[tuple, int] + block : tuple | int Group of threads (Thread Block) that will execute on the same streaming multiprocessor (SM). Threads within a thread blocks have access to shared memory and can be explicitly synchronized. @@ -65,9 +64,9 @@ class LaunchConfig: """ # TODO: expand LaunchConfig to include other attributes - grid: Union[tuple, int] = None - cluster: Union[tuple, int] = None - block: Union[tuple, int] = None + grid: tuple | int = None + cluster: tuple | int = None + block: tuple | int = None shmem_size: int | None = None cooperative_launch: bool | None = False diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 5c54a88c8c..04c59c9d2d 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -9,7 +9,7 @@ import weakref from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING from warnings import warn if TYPE_CHECKING: @@ -154,14 +154,14 @@ class LinkerOptions: fma : bool, optional Use fast multiply-add. Default: True. - kernels_used : [Union[str, tuple[str], list[str]]], optional + kernels_used : str | tuple[str] | list[str], optional Pass a kernel or sequence of kernels that are used; any not in the list can be removed. - variables_used : [Union[str, tuple[str], list[str]]], optional + variables_used : str | tuple[str] | list[str], optional Pass a variable or sequence of variables that are used; any not in the list can be removed. optimize_unused_variables : bool, optional Assume that if a variable is not referenced in device code, it can be removed. Default: False. - ptxas_options : [Union[str, tuple[str], list[str]]], optional + ptxas_options : str | tuple[str] | list[str], optional Pass options to PTXAS. split_compile : int, optional Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split @@ -191,10 +191,10 @@ class LinkerOptions: prec_div: bool | None = None prec_sqrt: bool | None = None fma: bool | None = None - kernels_used: Union[str, tuple[str], list[str]] | None = None - variables_used: Union[str, tuple[str], list[str]] | None = None + kernels_used: str | tuple[str] | list[str] | None = None + variables_used: str | tuple[str] | list[str] | None = None optimize_unused_variables: bool | None = None - ptxas_options: Union[str, tuple[str], list[str]] | None = None + ptxas_options: str | tuple[str] | list[str] | None = None split_compile: int | None = None split_compile_extended: int | None = None no_cache: bool | None = None @@ -350,7 +350,7 @@ def _exception_manager(self): nvJitLinkHandleT = int -LinkerHandleT = Union[nvJitLinkHandleT, "cuda.bindings.driver.CUlinkState"] +LinkerHandleT = nvJitLinkHandleT | cuda.bindings.driver.CUlinkState class Linker: diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index f8ce8f95d0..dcb5d06f5b 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -4,7 +4,6 @@ import weakref from collections import namedtuple -from typing import Union from warnings import warn from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config @@ -239,7 +238,7 @@ def max_active_blocks_per_multiprocessor(self, block_size: int, dynamic_shared_m ) def max_potential_block_size( - self, dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize], block_size_limit: int + self, dynamic_shared_memory_needed: int | driver.CUoccupancyB2DSize, block_size_limit: int ) -> MaxPotentialBlockSizeOccupancyResult: """MaxPotentialBlockSizeOccupancyResult: Suggested launch configuration for reasonable occupancy. @@ -248,7 +247,7 @@ def max_potential_block_size( Parameters ---------- - dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize] + dynamic_shared_memory_needed: int | driver.CUoccupancyB2DSize The amount of dynamic shared memory in bytes needed by block. Use `0` if block does not need shared memory. Use C-callable represented by :obj:`~driver.CUoccupancyB2DSize` to encode @@ -437,7 +436,7 @@ def occupancy(self) -> KernelOccupancy: # TODO: implement from_handle() -CodeTypeT = Union[bytes, bytearray, str] +CodeTypeT = bytes | bytearray | str class ObjectCode: @@ -496,12 +495,12 @@ def __reduce__(self): return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map) @staticmethod - def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": + def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing cubin. Parameters ---------- - module : Union[bytes, str] + module : bytes | str Either a bytes object containing the in-memory cubin to load, or a file path string pointing to the on-disk cubin to load. name : Optional[str] @@ -514,12 +513,12 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": + def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing PTX. Parameters ---------- - module : Union[bytes, str] + module : bytes | str Either a bytes object containing the in-memory ptx code to load, or a file path string pointing to the on-disk ptx file to load. name : Optional[str] @@ -532,12 +531,12 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": + def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing LTOIR. Parameters ---------- - module : Union[bytes, str] + module : bytes, str Either a bytes object containing the in-memory ltoir code to load, or a file path string pointing to the on-disk ltoir file to load. name : Optional[str] @@ -550,12 +549,12 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": + def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing fatbin. Parameters ---------- - module : Union[bytes, str] + module : bytes| str Either a bytes object containing the in-memory fatbin to load, or a file path string pointing to the on-disk fatbin to load. name : Optional[str] @@ -568,12 +567,12 @@ def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: di return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": + def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing object code. Parameters ---------- - module : Union[bytes, str] + module : bytes | str Either a bytes object containing the in-memory object code to load, or a file path string pointing to the on-disk object code to load. name : Optional[str] @@ -586,12 +585,12 @@ def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: di return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": + def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing library. Parameters ---------- - module : Union[bytes, str] + module : bytes | str Either a bytes object containing the in-memory library to load, or a file path string pointing to the on-disk library to load. name : Optional[str] diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 1db453fed1..7ef24105b1 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -7,7 +7,7 @@ import weakref from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING from warnings import warn if TYPE_CHECKING: @@ -115,7 +115,7 @@ def _process_define_macro_inner(formatted_options, macro): def _process_define_macro(formatted_options, macro): - union_type = "Union[str, tuple[str, str]]" + union_type = "str | tuple[str, str]" if _process_define_macro_inner(formatted_options, macro): return if is_nested_sequence(macro): @@ -154,7 +154,7 @@ class ProgramOptions: Enable device code optimization. When specified along with ‘-G’, enables limited debug information generation for optimized device code. Default: None - ptxas_options : Union[str, list[str]], optional + ptxas_options : str | list[str], optional Specify one or more options directly to ptxas, the PTX optimizing assembler. Options should be strings. For example ["-v", "-O2"]. Default: None @@ -188,17 +188,17 @@ class ProgramOptions: gen_opt_lto : bool, optional Run the optimizer passes before generating the LTO IR. Default: False - define_macro : Union[str, tuple[str, str], list[Union[str, tuple[str, str]]]], optional + define_macro : str | tuple[str, str] | list[str | tuple[str, str]], optional Predefine a macro. Can be either a string, in which case that macro will be set to 1, a 2 element tuple of strings, in which case the first element is defined as the second, or a list of strings or tuples. Default: None - undefine_macro : Union[str, list[str]], optional + undefine_macro : str | list[str], optional Cancel any previous definition of a macro, or list of macros. Default: None - include_path : Union[str, list[str]], optional + include_path : str | list[str], optional Add the directory or directories to the list of directories to be searched for headers. Default: None - pre_include : Union[str, list[str]], optional + pre_include : str | list[str], optional Preinclude one or more headers during preprocessing. Can be either a string or a list of strings. Default: None no_source_include : bool, optional @@ -231,13 +231,13 @@ class ProgramOptions: no_display_error_number : bool, optional Disable the display of a diagnostic number for warning messages. Default: False - diag_error : Union[int, list[int]], optional + diag_error : int | list[int], optional Emit error for a specified diagnostic message number or comma separated list of numbers. Default: None - diag_suppress : Union[int, list[int]], optional + diag_suppress : int | list[int], optional Suppress a specified diagnostic message number or comma separated list of numbers. Default: None - diag_warn : Union[int, list[int]], optional + diag_warn : int | list[int], optional Emit warning for a specified diagnostic message number or comma separated lis of numbers. Default: None brief_diagnostics : bool, optional @@ -264,7 +264,7 @@ class ProgramOptions: debug: bool | None = None lineinfo: bool | None = None device_code_optimize: bool | None = None - ptxas_options: Union[str, list[str], tuple[str]] | None = None + ptxas_options: str | list[str] | tuple[str, ...] | None = None max_register_count: int | None = None ftz: bool | None = None prec_sqrt: bool | None = None @@ -274,12 +274,10 @@ class ProgramOptions: extra_device_vectorization: bool | None = None link_time_optimization: bool | None = None gen_opt_lto: bool | None = None - define_macro: ( - Union[str, tuple[str, str], list[Union[str, tuple[str, str]]], tuple[Union[str, tuple[str, str]]]] | None - ) = None - undefine_macro: Union[str, list[str], tuple[str]] | None = None - include_path: Union[str, list[str], tuple[str]] | None = None - pre_include: Union[str, list[str], tuple[str]] | None = None + define_macro: str | tuple[str, str] | list[str | tuple[str, str]] | tuple[str | tuple[str, str]] | None = None + undefine_macro: str | list[str] | tuple[str, ...] | None = None + include_path: str | list[str] | tuple[str, ...] | None = None + pre_include: str | list[str] | tuple[str, ...] | None = None no_source_include: bool | None = None std: str | None = None builtin_move_forward: bool | None = None @@ -290,9 +288,9 @@ class ProgramOptions: device_int128: bool | None = None optimization_info: str | None = None no_display_error_number: bool | None = None - diag_error: Union[int, list[int], tuple[int]] | None = None - diag_suppress: Union[int, list[int], tuple[int]] | None = None - diag_warn: Union[int, list[int], tuple[int]] | None = None + diag_error: int | list[int] | tuple[int] | None = None + diag_suppress: int | list[int] | tuple[int] | None = None + diag_warn: int | list[int] | tuple[int] | None = None brief_diagnostics: bool | None = None time: str | None = None split_compile: int | None = None @@ -428,7 +426,7 @@ def __repr__(self): return str(self._formatted_options) -ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT] +ProgramHandleT = cuda.bindings.nvrtc.nvrtcProgram | LinkerHandleT class Program: @@ -574,7 +572,7 @@ def compile(self, target_type, name_expressions=(), logs=None): target_type : Any String of the targeted compilation type. Supported options are "ptx", "cubin" and "ltoir". - name_expressions : Union[list, tuple], optional + name_expressions : list | tuple, optional List of explicit name expressions to become accessible. (Default to no expressions) logs : Any, optional diff --git a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py index a661b4f1aa..e0b7261121 100644 --- a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py +++ b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py @@ -7,7 +7,6 @@ import platform import sys from contextlib import suppress -from typing import Union from cuda.core.experimental._utils.cuda_utils import handle_return @@ -38,7 +37,7 @@ def _detect_wsl() -> bool: @functools.cache -def supports_ipc_mempool(device_id: Union[int, object]) -> bool: +def supports_ipc_mempool(device_id: int | object) -> bool: """Return True if mempool IPC via POSIX file descriptor is supported. Uses cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES) From 593f9a1904af1092e93e2f34917018f30bbe32c7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 16:17:27 -0400 Subject: [PATCH 7/9] chore: fix bot comments --- cuda_core/cuda/core/experimental/_module.py | 26 ++++++++++---------- cuda_core/cuda/core/experimental/_program.py | 6 ++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index dcb5d06f5b..9654cb97d3 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -503,9 +503,9 @@ def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | No module : bytes | str Either a bytes object containing the in-memory cubin to load, or a file path string pointing to the on-disk cubin to load. - name : Optional[str] + name : str | None A human-readable identifier representing this code object. - symbol_mapping : Optional[dict] + symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). @@ -521,9 +521,9 @@ def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None module : bytes | str Either a bytes object containing the in-memory ptx code to load, or a file path string pointing to the on-disk ptx file to load. - name : Optional[str] + name : str | None A human-readable identifier representing this code object. - symbol_mapping : Optional[dict] + symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). @@ -539,9 +539,9 @@ def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | No module : bytes, str Either a bytes object containing the in-memory ltoir code to load, or a file path string pointing to the on-disk ltoir file to load. - name : Optional[str] + name : str | None A human-readable identifier representing this code object. - symbol_mapping : Optional[dict] + symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). @@ -554,12 +554,12 @@ def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | N Parameters ---------- - module : bytes| str + module : bytes | str Either a bytes object containing the in-memory fatbin to load, or a file path string pointing to the on-disk fatbin to load. - name : Optional[str] + name : str | None A human-readable identifier representing this code object. - symbol_mapping : Optional[dict] + symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). @@ -575,9 +575,9 @@ def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | N module : bytes | str Either a bytes object containing the in-memory object code to load, or a file path string pointing to the on-disk object code to load. - name : Optional[str] + name : str | None A human-readable identifier representing this code object. - symbol_mapping : Optional[dict] + symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). @@ -593,9 +593,9 @@ def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | module : bytes | str Either a bytes object containing the in-memory library to load, or a file path string pointing to the on-disk library to load. - name : Optional[str] + name : str | None A human-readable identifier representing this code object. - symbol_mapping : Optional[dict] + symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 7ef24105b1..28c282ffea 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -288,9 +288,9 @@ class ProgramOptions: device_int128: bool | None = None optimization_info: str | None = None no_display_error_number: bool | None = None - diag_error: int | list[int] | tuple[int] | None = None - diag_suppress: int | list[int] | tuple[int] | None = None - diag_warn: int | list[int] | tuple[int] | None = None + diag_error: int | list[int] | tuple[int, ...] | None = None + diag_suppress: int | list[int] | tuple[int, ...] | None = None + diag_warn: int | list[int] | tuple[int, ...] | None = None brief_diagnostics: bool | None = None time: str | None = None split_compile: int | None = None From 041d824a2ab7d437fa0da029b59267b033483ad5 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Oct 2025 16:21:24 -0400 Subject: [PATCH 8/9] chore: fix bot comments --- cuda_core/cuda/core/experimental/_module.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 9654cb97d3..18a9537ced 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -503,7 +503,7 @@ def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | No module : bytes | str Either a bytes object containing the in-memory cubin to load, or a file path string pointing to the on-disk cubin to load. - name : str | None + name : str A human-readable identifier representing this code object. symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) @@ -521,7 +521,7 @@ def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None module : bytes | str Either a bytes object containing the in-memory ptx code to load, or a file path string pointing to the on-disk ptx file to load. - name : str | None + name : str A human-readable identifier representing this code object. symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) @@ -539,7 +539,7 @@ def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | No module : bytes, str Either a bytes object containing the in-memory ltoir code to load, or a file path string pointing to the on-disk ltoir file to load. - name : str | None + name : str A human-readable identifier representing this code object. symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) @@ -557,7 +557,7 @@ def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | N module : bytes | str Either a bytes object containing the in-memory fatbin to load, or a file path string pointing to the on-disk fatbin to load. - name : str | None + name : str A human-readable identifier representing this code object. symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) @@ -575,7 +575,7 @@ def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | N module : bytes | str Either a bytes object containing the in-memory object code to load, or a file path string pointing to the on-disk object code to load. - name : str | None + name : str A human-readable identifier representing this code object. symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) @@ -593,7 +593,7 @@ def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | module : bytes | str Either a bytes object containing the in-memory library to load, or a file path string pointing to the on-disk library to load. - name : str | None + name : str A human-readable identifier representing this code object. symbol_mapping : dict | None A dictionary specifying how the unmangled symbol names (as keys) From 77cc0fcde61a7c2bfece90c022c069f25cd59f0a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 30 Oct 2025 13:40:25 -0400 Subject: [PATCH 9/9] chore: fix optionals --- .../cuda/pathfinder/_headers/find_nvidia_headers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index d770e99214..63f8a627fd 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -21,7 +21,7 @@ def _joined_isfile(dirpath: str, basename: str) -> bool: return os.path.isfile(os.path.join(dirpath, basename)) -def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]: +def _find_under_site_packages(sub_dir: str, h_basename: str) -> str | None: # Installed from a wheel hdr_dir: str # help mypy for hdr_dir in find_sub_dirs_all_sitepackages(tuple(sub_dir.split("/"))): @@ -52,7 +52,7 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) return None -def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> Optional[str]: +def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> str | None: conda_prefix = os.environ.get("CONDA_PREFIX") if not conda_prefix: return None @@ -134,7 +134,7 @@ def find_nvidia_header_directory(libname: str) -> str | None: raise RuntimeError(f"UNKNOWN {libname=}") candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname, []) - hdr_dir: Optional[str] # help mypy + hdr_dir: str | None # help mypy for cdir in candidate_dirs: if hdr_dir := _find_under_site_packages(cdir, h_basename): return _abs_norm(hdr_dir)