From 7aa41b286619859bad2a2fbbb8b1b1099b41faca Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:03:08 -0400
Subject: [PATCH 1/9] Reapply "refactor(python): drop support for 3.9, document
 3.14 support (#1069)" (#1109)

This reverts commit fcd7b99e2ea81343a3477fbcadec8559f2f4ec4b.
---
 .github/workflows/build-wheel.yml             |  1 -
 CONTRIBUTING.md                               |  2 +-
 ci/test-matrix.json                           | 14 ----------
 cuda_bindings/docs/source/install.rst         |  2 +-
 cuda_bindings/docs/source/support.rst         |  2 +-
 cuda_bindings/pyproject.toml                  |  3 ++-
 cuda_core/cuda/core/experimental/__init__.py  | 11 --------
 cuda_core/cuda/core/experimental/_device.pyx  | 10 +++----
 .../cuda/core/experimental/_launch_config.py  |  6 ++---
 cuda_core/cuda/core/experimental/_linker.py   |  2 +-
 cuda_core/cuda/core/experimental/_module.py   | 26 +++++++-----------
 cuda_core/cuda/core/experimental/_program.py  |  2 +-
 cuda_core/docs/source/install.rst             |  3 ++-
 cuda_core/pyproject.toml                      |  4 +--
 .../_dynamic_libs/find_nvidia_dynamic_lib.py  | 27 +++++++++----------
 .../_dynamic_libs/load_dl_common.py           |  4 +--
 .../pathfinder/_dynamic_libs/load_dl_linux.py | 12 ++++-----
 .../_dynamic_libs/load_dl_windows.py          | 10 +++----
 .../_headers/find_nvidia_headers.py           |  9 +++----
 .../cuda/pathfinder/_utils/env_vars.py        |  3 +--
 .../_utils/find_site_packages_dll.py          |  7 ++++-
 .../_utils/find_site_packages_so.py           |  6 ++++-
 cuda_pathfinder/pyproject.toml                |  4 +--
 .../tests/spawned_process_runner.py           | 10 +++----
 cuda_python/pyproject.toml                    |  4 ++-
 ruff.toml                                     |  3 ++-
 toolshed/make_site_packages_libdirs.py        |  4 +--
 27 files changed, 83 insertions(+), 108 deletions(-)
diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
index 57a8581f15..6f683d3aed 100644
--- a/.github/workflows/build-wheel.yml
+++ b/.github/workflows/build-wheel.yml
@@ -28,7 +28,6 @@ jobs:
       fail-fast: false
       matrix:
         python-version:
-          - "3.9"
           - "3.10"
           - "3.11"
           - "3.12"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 183d215865..67bd568d85 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -113,7 +113,7 @@ flowchart TD
             B2["linux-aarch64<br/>(Self-hosted)"]
             B3["win-64<br/>(GitHub-hosted)"]
         end
-        BUILD_DETAILS["• Python versions: 3.9, 3.10, 3.11, 3.12, 3.13<br/>• CUDA version: 13.0.0 (build-time)<br/>• Components: cuda-core, cuda-bindings,<br/>  cuda-pathfinder, cuda-python"]
+        BUILD_DETAILS["• Python versions: 3.10, 3.11, 3.12, 3.13, 3.14<br/>• CUDA version: 13.0.0 (build-time)<br/>• Components: cuda-core, cuda-bindings,<br/>  cuda-pathfinder, cuda-python"]
     end
 
     %% Artifact Storage
diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index 4b60779ec9..a8084442dc 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -4,8 +4,6 @@
   "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1",
   "linux": {
     "pull-request": [
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
@@ -16,8 +14,6 @@
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
@@ -30,11 +26,6 @@
       { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }
     ],
     "nightly": [
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
@@ -55,11 +46,6 @@
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
diff --git a/cuda_bindings/docs/source/install.rst b/cuda_bindings/docs/source/install.rst
index 9836d172ac..58a6a0f31c 100644
--- a/cuda_bindings/docs/source/install.rst
+++ b/cuda_bindings/docs/source/install.rst
@@ -10,7 +10,7 @@ Runtime Requirements
 ``cuda.bindings`` supports the same platforms as CUDA. Runtime dependencies are:
 
 * Linux (x86-64, arm64) and Windows (x86-64)
-* Python 3.9 - 3.14
+* Python 3.10 - 3.14
 * Driver: Linux (580.65.06 or later) Windows (580.88 or later)
 * Optionally, NVRTC, nvJitLink, NVVM, and cuFile from CUDA Toolkit 13.x
 
diff --git a/cuda_bindings/docs/source/support.rst b/cuda_bindings/docs/source/support.rst
index a34a5c49e2..4439d963c0 100644
--- a/cuda_bindings/docs/source/support.rst
+++ b/cuda_bindings/docs/source/support.rst
@@ -19,7 +19,7 @@ The ``cuda.bindings`` module has the following support policy:
    depends on the underlying driver and the Toolkit versions, as described in the compatibility
    documentation.)
 4. The module supports all Python versions following the `CPython EOL schedule`_. As of writing
-   Python 3.9 - 3.13 are supported.
+   Python 3.10 - 3.14 are supported.
 5. The module exposes a Cython layer from which types and functions could be ``cimport``'d. While
    we strive to keep this layer stable, due to Cython limitations a new *minor* release of this
    module could require Cython layer users to rebuild their projects and update their pinning to
diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml
index bc0dbf1977..7523682591 100644
--- a/cuda_bindings/pyproject.toml
+++ b/cuda_bindings/pyproject.toml
@@ -9,16 +9,17 @@ name = "cuda-bindings"
 description = "Python bindings for CUDA"
 authors = [{name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com"},]
 license = "LicenseRef-NVIDIA-SOFTWARE-LICENSE"
+requires-python = ">=3.10"
 classifiers = [
     "Intended Audience :: Developers",
     "Topic :: Database",
     "Topic :: Scientific/Engineering",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dynamic = [
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
index 94fb0aa083..8a60c031c5 100644
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ b/cuda_core/cuda/core/experimental/__init__.py
@@ -26,17 +26,6 @@
 finally:
     del cuda.bindings, importlib, subdir, cuda_major, cuda_minor
 
-import sys  # noqa: E402
-import warnings  # noqa: E402
-
-if sys.version_info < (3, 10):
-    warnings.warn(
-        "support for Python 3.9 and below is deprecated and subject to future removal",
-        category=FutureWarning,
-        stacklevel=1,
-    )
-del sys, warnings
-
 from cuda.core.experimental import utils  # noqa: E402
 from cuda.core.experimental._device import Device  # noqa: E402
 from cuda.core.experimental._event import Event, EventOptions  # noqa: E402
diff --git a/cuda_core/cuda/core/experimental/_device.pyx b/cuda_core/cuda/core/experimental/_device.pyx
index 1db2adbf8d..d800a3c172 100644
--- a/cuda_core/cuda/core/experimental/_device.pyx
+++ b/cuda_core/cuda/core/experimental/_device.pyx
@@ -10,7 +10,7 @@ from cuda.bindings cimport cydriver
 from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN
 
 import threading
-from typing import Optional, Union
+from typing import Union
 
 from cuda.core.experimental._context import Context, ContextOptions
 from cuda.core.experimental._event import Event, EventOptions
@@ -951,7 +951,7 @@ class Device:
     """
     __slots__ = ("_id", "_mr", "_has_inited", "_properties")
 
-    def __new__(cls, device_id: Optional[int] = None):
+    def __new__(cls, device_id: int | None = None):
         global _is_cuInit
         if _is_cuInit is False:
             with _lock, nogil:
@@ -1223,7 +1223,7 @@ class Device:
         """
         raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
 
-    def create_stream(self, obj: Optional[IsStreamT] = None, options: Optional[StreamOptions] = None) -> Stream:
+    def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
         """Create a Stream object.
 
         New stream objects can be created in two different ways:
@@ -1254,7 +1254,7 @@ class Device:
         self._check_context_initialized()
         return Stream._init(obj=obj, options=options, device_id=self._id)
 
-    def create_event(self, options: Optional[EventOptions] = None) -> Event:
+    def create_event(self, options: EventOptions | None = None) -> Event:
         """Create an Event object without recording it to a Stream.
 
         Note
@@ -1276,7 +1276,7 @@ class Device:
         ctx = self._get_current_context()
         return Event._init(self._id, ctx, options, True)
 
-    def allocate(self, size, stream: Optional[Stream] = None) -> Buffer:
+    def allocate(self, size, stream: Stream | None = None) -> Buffer:
         """Allocate device memory from a specified stream.
 
         Allocates device memory of `size` bytes on the specified `stream`
diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py
index d82e0ec3a2..c1e08da58d 100644
--- a/cuda_core/cuda/core/experimental/_launch_config.py
+++ b/cuda_core/cuda/core/experimental/_launch_config.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from dataclasses import dataclass
-from typing import Optional, Union
+from typing import Union
 
 from cuda.core.experimental._device import Device
 from cuda.core.experimental._utils.cuda_utils import (
@@ -68,8 +68,8 @@ class LaunchConfig:
     grid: Union[tuple, int] = None
     cluster: Union[tuple, int] = None
     block: Union[tuple, int] = None
-    shmem_size: Optional[int] = None
-    cooperative_launch: Optional[bool] = False
+    shmem_size: int | None = None
+    cooperative_launch: bool | None = False
 
     def __post_init__(self):
         _lazy_init()
diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index a3fa4b3e48..5c54a88c8c 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -343,7 +343,7 @@ def _exception_manager(self):
             # our constructor could raise, in which case there's no handle available
             error_log = self.get_error_log()
         # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
-        # unfortunately we are still supporting Python 3.9/3.10...
+        # unfortunately we are still supporting Python 3.10...
         # Here we rely on both CUDAError and nvJitLinkError have the error string placed in .args[0].
         e.args = (e.args[0] + (f"\nLinker error log: {error_log}" if error_log else ""), *e.args[1:])
         raise e
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 2c7ea3a156..f8ce8f95d0 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -4,7 +4,7 @@
 
 import weakref
 from collections import namedtuple
-from typing import Optional, Union
+from typing import Union
 from warnings import warn
 
 from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
@@ -310,7 +310,7 @@ def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocesso
             driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size)
         )
 
-    def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
+    def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int:
         """Maximum potential cluster size.
 
         The maximum potential cluster size for this kernel and given launch configuration.
@@ -332,7 +332,7 @@ def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stre
             drv_cfg.hStream = stream.handle
         return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg))
 
-    def max_active_clusters(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
+    def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int:
         """Maximum number of active clusters on the target device.
 
         The maximum number of clusters that could concurrently execute on the target device.
@@ -469,7 +469,7 @@ def __new__(self, *args, **kwargs):
         )
 
     @classmethod
-    def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None):
+    def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None):
         self = super().__new__(cls)
         assert code_type in self._supported_code_type, f"{code_type=} is not supported"
         _lazy_init()
@@ -496,7 +496,7 @@ def __reduce__(self):
         return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)
 
     @staticmethod
-    def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+    def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing cubin.
 
         Parameters
@@ -514,7 +514,7 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt
         return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+    def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing PTX.
 
         Parameters
@@ -532,7 +532,7 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optio
         return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+    def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing LTOIR.
 
         Parameters
@@ -550,9 +550,7 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt
         return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_fatbin(
-        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
-    ) -> "ObjectCode":
+    def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing fatbin.
 
         Parameters
@@ -570,9 +568,7 @@ def from_fatbin(
         return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_object(
-        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
-    ) -> "ObjectCode":
+    def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing object code.
 
         Parameters
@@ -590,9 +586,7 @@ def from_object(
         return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_library(
-        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
-    ) -> "ObjectCode":
+    def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing library.
 
         Parameters
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index dee6f001e7..1db453fed1 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -49,7 +49,7 @@ def _nvvm_exception_manager(self):
             except Exception:
                 error_log = ""
         # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
-        # unfortunately we are still supporting Python 3.9/3.10...
+        # unfortunately we are still supporting Python 3.10...
         e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:])
         raise e
 
diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst
index cddde851d5..7100dade3b 100644
--- a/cuda_core/docs/source/install.rst
+++ b/cuda_core/docs/source/install.rst
@@ -26,7 +26,7 @@ dependencies are as follows:
 .. [#f1] Including ``cuda-python``.
 
 
-``cuda.core`` supports Python 3.9 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided.
+``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided.
 
 
 Free-threading Build Support
@@ -42,6 +42,7 @@ As of cuda-core 0.4.0, **experimental** packages for the `free-threaded interpre
 .. _built-in modules that are known to be thread-unsafe: https://github.com/python/cpython/issues/116738
 .. _free-threaded interpreter: https://docs.python.org/3/howto/free-threading-python.html
 
+``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64).
 
 Installing from PyPI
 --------------------
diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
index 5f12f6f120..31ceb5b1a5 100644
--- a/cuda_core/pyproject.toml
+++ b/cuda_core/pyproject.toml
@@ -14,7 +14,7 @@ dynamic = [
     "version",
     "readme",
 ]
-requires-python = '>=3.9'
+requires-python = '>=3.10'
 description = "cuda.core: (experimental) pythonic CUDA module"
 authors = [
     { name = "NVIDIA Corporation" }
@@ -32,11 +32,11 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Topic :: Software Development :: Libraries",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Programming Language :: Python :: Implementation :: CPython",
     "Environment :: GPU :: NVIDIA CUDA",
     "Environment :: GPU :: NVIDIA CUDA :: 12",
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
index 75ebec3a85..65c9f4bf3c 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
@@ -4,7 +4,6 @@
 import glob
 import os
 from collections.abc import Sequence
-from typing import Optional
 
 from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -29,7 +28,7 @@ def _no_such_file_in_sub_dirs(
 
 def _find_so_using_nvidia_lib_dirs(
     libname: str, so_basename: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
     rel_dirs = SITE_PACKAGES_LIBDIRS_LINUX.get(libname)
     if rel_dirs is not None:
         sub_dirs_searched = []
@@ -52,7 +51,7 @@ def _find_so_using_nvidia_lib_dirs(
     return None
 
 
-def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]:
+def _find_dll_under_dir(dirpath: str, file_wild: str) -> str | None:
     for path in sorted(glob.glob(os.path.join(dirpath, file_wild))):
         if not os.path.isfile(path):
             continue
@@ -63,7 +62,7 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]:
 
 def _find_dll_using_nvidia_bin_dirs(
     libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
     rel_dirs = SITE_PACKAGES_LIBDIRS_WINDOWS.get(libname)
     if rel_dirs is not None:
         sub_dirs_searched = []
@@ -79,7 +78,7 @@ def _find_dll_using_nvidia_bin_dirs(
     return None
 
 
-def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> Optional[str]:
+def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> str | None:
     # Resolve paths for the four cases:
     #    Windows/Linux x nvvm yes/no
     if IS_WINDOWS:
@@ -107,14 +106,14 @@ def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_
     return None
 
 
-def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]:
+def _find_lib_dir_using_cuda_home(libname: str) -> str | None:
     cuda_home = get_cuda_home_or_path()
     if cuda_home is None:
         return None
     return _find_lib_dir_using_anchor_point(libname, anchor_point=cuda_home, linux_lib_dir="lib64")
 
 
-def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]:
+def _find_lib_dir_using_conda_prefix(libname: str) -> str | None:
     conda_prefix = os.environ.get("CONDA_PREFIX")
     if not conda_prefix:
         return None
@@ -125,7 +124,7 @@ def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]:
 
 def _find_so_using_lib_dir(
     lib_dir: str, so_basename: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
     so_name = os.path.join(lib_dir, so_basename)
     if os.path.isfile(so_name):
         return so_name
@@ -141,7 +140,7 @@ def _find_so_using_lib_dir(
 
 def _find_dll_using_lib_dir(
     lib_dir: str, libname: str, error_messages: list[str], attachments: list[str]
-) -> Optional[str]:
+) -> str | None:
     file_wild = libname + "*.dll"
     dll_name = _find_dll_under_dir(lib_dir, file_wild)
     if dll_name is not None:
@@ -162,9 +161,9 @@ def __init__(self, libname: str):
             self.lib_searched_for = f"lib{libname}.so"
         self.error_messages: list[str] = []
         self.attachments: list[str] = []
-        self.abs_path: Optional[str] = None
+        self.abs_path: str | None = None
 
-    def try_site_packages(self) -> Optional[str]:
+    def try_site_packages(self) -> str | None:
         if IS_WINDOWS:
             return _find_dll_using_nvidia_bin_dirs(
                 self.libname,
@@ -180,13 +179,13 @@ def try_site_packages(self) -> Optional[str]:
                 self.attachments,
             )
 
-    def try_with_conda_prefix(self) -> Optional[str]:
+    def try_with_conda_prefix(self) -> str | None:
         return self._find_using_lib_dir(_find_lib_dir_using_conda_prefix(self.libname))
 
-    def try_with_cuda_home(self) -> Optional[str]:
+    def try_with_cuda_home(self) -> str | None:
         return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname))
 
-    def _find_using_lib_dir(self, lib_dir: Optional[str]) -> Optional[str]:
+    def _find_using_lib_dir(self, lib_dir: str | None) -> str | None:
         if lib_dir is None:
             return None
         if IS_WINDOWS:
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
index 2e6c9eb17c..91e6284a00 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
@@ -1,8 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Callable, Optional
 
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import DIRECT_DEPENDENCIES
 
@@ -13,7 +13,7 @@ class DynamicLibNotFoundError(RuntimeError):
 
 @dataclass
 class LoadedDL:
-    abs_path: Optional[str]
+    abs_path: str | None
     was_already_loaded_from_elsewhere: bool
     _handle_uint: int  # Platform-agnostic unsigned pointer value
     found_via: str
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
index 040e24705e..4d2bae5b90 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
@@ -5,7 +5,7 @@
 import ctypes
 import ctypes.util
 import os
-from typing import Optional, cast
+from typing import cast
 
 from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -76,8 +76,8 @@ class _LinkMapLNameView(ctypes.Structure):
 assert _LinkMapLNameView.l_name.offset == ctypes.sizeof(ctypes.c_void_p)
 
 
-def _dl_last_error() -> Optional[str]:
-    msg_bytes = cast(Optional[bytes], LIBDL.dlerror())
+def _dl_last_error() -> str | None:
+    msg_bytes = cast(bytes | None, LIBDL.dlerror())
     if not msg_bytes:
         return None  # no pending error
     # Never raises; undecodable bytes are mapped to U+DC80..U+DCFF
@@ -131,7 +131,7 @@ def get_candidate_sonames(libname: str) -> list[str]:
     return candidate_sonames
 
 
-def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> Optional[LoadedDL]:
+def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> LoadedDL | None:
     for soname in get_candidate_sonames(libname):
         try:
             handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD)
@@ -151,7 +151,7 @@ def _load_lib(libname: str, filename: str) -> ctypes.CDLL:
     return ctypes.CDLL(filename, cdll_mode)
 
 
-def load_with_system_search(libname: str) -> Optional[LoadedDL]:
+def load_with_system_search(libname: str) -> LoadedDL | None:
     """Try to load a library using system search paths.
 
     Args:
@@ -195,7 +195,7 @@ def _work_around_known_bugs(libname: str, found_path: str) -> None:
                     ctypes.CDLL(dep_path, CDLL_MODE)
 
 
-def load_with_abs_path(libname: str, found_path: str, found_via: Optional[str] = None) -> LoadedDL:
+def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL:
     """Load a dynamic library from the given path.
 
     Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
index d8ac53fe8a..b9f15ea50b 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
@@ -5,7 +5,6 @@
 import ctypes.wintypes
 import os
 import struct
-from typing import Optional
 
 from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -100,10 +99,7 @@ def abs_path_for_dynamic_library(libname: str, handle: ctypes.wintypes.HMODULE)
     return buffer.value
 
 
-def check_if_already_loaded_from_elsewhere(
-    libname: str,
-    have_abs_path: bool,
-) -> Optional[LoadedDL]:
+def check_if_already_loaded_from_elsewhere(libname: str, have_abs_path: bool) -> LoadedDL | None:
     for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()):
         handle = kernel32.GetModuleHandleW(dll_name)
         if handle:
@@ -117,7 +113,7 @@ def check_if_already_loaded_from_elsewhere(
     return None
 
 
-def load_with_system_search(libname: str) -> Optional[LoadedDL]:
+def load_with_system_search(libname: str) -> LoadedDL | None:
     """Try to load a DLL using system search paths.
 
     Args:
@@ -136,7 +132,7 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:
     return None
 
 
-def load_with_abs_path(libname: str, found_path: str, found_via: Optional[str] = None) -> LoadedDL:
+def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL:
     """Load a dynamic library from the given path.
 
     Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
index b141700ab7..d770e99214 100644
--- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
+++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
@@ -4,7 +4,6 @@
 import functools
 import glob
 import os
-from typing import Optional
 
 from cuda.pathfinder._headers import supported_nvidia_headers
 from cuda.pathfinder._utils.env_vars import get_cuda_home_or_path
@@ -12,7 +11,7 @@
 from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
 
 
-def _abs_norm(path: Optional[str]) -> Optional[str]:
+def _abs_norm(path: str | None) -> str | None:
     if path:
         return os.path.normpath(os.path.abspath(path))
     return None
@@ -31,7 +30,7 @@ def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]:
     return None
 
 
-def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> Optional[str]:
+def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> str | None:
     parts = [anchor_point]
     if libname == "nvvm":
         parts.append(libname)
@@ -77,7 +76,7 @@ def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool)
     return _find_based_on_ctk_layout(libname, h_basename, anchor_point)
 
 
-def _find_ctk_header_directory(libname: str) -> Optional[str]:
+def _find_ctk_header_directory(libname: str) -> str | None:
     h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_CTK[libname]
     candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK[libname]
 
@@ -97,7 +96,7 @@ def _find_ctk_header_directory(libname: str) -> Optional[str]:
 
 
 @functools.cache
-def find_nvidia_header_directory(libname: str) -> Optional[str]:
+def find_nvidia_header_directory(libname: str) -> str | None:
     """Locate the header directory for a supported NVIDIA library.
 
     Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
index 3a7de992c0..cf78a627cb 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
@@ -3,7 +3,6 @@
 
 import os
 import warnings
-from typing import Optional
 
 
 def _paths_differ(a: str, b: str) -> bool:
@@ -33,7 +32,7 @@ def _paths_differ(a: str, b: str) -> bool:
     return True
 
 
-def get_cuda_home_or_path() -> Optional[str]:
+def get_cuda_home_or_path() -> str | None:
     cuda_home = os.environ.get("CUDA_HOME")
     cuda_path = os.environ.get("CUDA_PATH")
 
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
index 2f5695093c..507355727f 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
@@ -11,7 +11,12 @@ def find_all_dll_files_via_metadata() -> dict[str, tuple[str, ...]]:
     results: collections.defaultdict[str, list[str]] = collections.defaultdict(list)
 
     # sort dists for deterministic output
-    for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)):
+
+    for dist in sorted(
+        importlib.metadata.distributions(),
+        # `get` exists before 3.12, even though the hints only exist for Python >=3.12
+        key=lambda d: (d.metadata.get("Name", ""), d.version),  # type: ignore[attr-defined]
+    ):
         files = dist.files
         if not files:
             continue
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
index 69e7eea3ad..33ee1f1bcf 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
@@ -23,7 +23,11 @@ def find_all_so_files_via_metadata() -> dict[str, dict[str, tuple[str, ...]]]:
     )
 
     # sort dists for deterministic output
-    for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)):
+    for dist in sorted(
+        importlib.metadata.distributions(),
+        # `get` exists before 3.12, even though the hints only exist for Python >=3.12
+        key=lambda d: (d.metadata.get("Name", ""), d.version),  # type: ignore[attr-defined]
+    ):
         files = dist.files
         if not files:
             continue
diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml
index 3db1aecbc8..9a6e40f8d3 100644
--- a/cuda_pathfinder/pyproject.toml
+++ b/cuda_pathfinder/pyproject.toml
@@ -6,7 +6,7 @@ name = "cuda-pathfinder"
 description = "Pathfinder for CUDA components"
 authors = [{ name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com" }]
 license = "Apache-2.0"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dynamic = ["version", "readme"]
 dependencies = []
 
@@ -109,7 +109,7 @@ inline-quotes = "double"
 
 [tool.mypy]
 # Basic settings
-python_version = "3.9"
+python_version = "3.10"
 explicit_package_bases = true
 warn_return_any = true
 warn_unused_configs = true
diff --git a/cuda_pathfinder/tests/spawned_process_runner.py b/cuda_pathfinder/tests/spawned_process_runner.py
index 154178b2a2..f4440743f5 100644
--- a/cuda_pathfinder/tests/spawned_process_runner.py
+++ b/cuda_pathfinder/tests/spawned_process_runner.py
@@ -5,10 +5,10 @@
 import queue  # for Empty
 import sys
 import traceback
-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from io import StringIO
-from typing import Any, Callable, Optional
+from typing import Any
 
 PROCESS_KILLED = -9
 PROCESS_NO_RESULT = -999
@@ -61,9 +61,9 @@ def __call__(self):
 def run_in_spawned_child_process(
     target: Callable[..., None],
     *,
-    args: Optional[Sequence[Any]] = None,
-    kwargs: Optional[dict[str, Any]] = None,
-    timeout: Optional[float] = None,
+    args: Sequence[Any] | None = None,
+    kwargs: dict[str, Any] | None = None,
+    timeout: float | None = None,
     rethrow: bool = False,
 ) -> CompletedProcess:
     """Run `target` in a spawned child process, capturing stdout/stderr.
diff --git a/cuda_python/pyproject.toml b/cuda_python/pyproject.toml
index fd6cacaf2a..9048f5818b 100644
--- a/cuda_python/pyproject.toml
+++ b/cuda_python/pyproject.toml
@@ -22,16 +22,18 @@ classifiers = [
     "Intended Audience :: Science/Research",
     "Intended Audience :: End Users/Desktop",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Programming Language :: Python :: Implementation :: CPython",
     "Environment :: GPU :: NVIDIA CUDA",
     "Environment :: GPU :: NVIDIA CUDA :: 12",
+    "Environment :: GPU :: NVIDIA CUDA :: 13",
 ]
 dynamic = ["version", "dependencies", "optional-dependencies"]
+requires-python = ">=3.10"
 
 [project.urls]
 homepage = "https://nvidia.github.io/cuda-python/"
diff --git a/ruff.toml b/ruff.toml
index 79c66e862c..6312d3e9ef 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 line-length = 120
 respect-gitignore = true
-target-version = "py39"
+target-version = "py310"
 
 [format]
 docstring-code-format = true
@@ -40,6 +40,7 @@ ignore = [
   "S101",   # asserts
   "S311",   # allow use of the random.* even though many are not cryptographically secure
   "S404",   # allow importing the subprocess module
+  "B905",   # preserve the default behavior of `zip` without the explicit `strict` argument
 ]
 
 exclude = ["**/_version.py"]
diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py
index d84d821700..00a495a095 100755
--- a/toolshed/make_site_packages_libdirs.py
+++ b/toolshed/make_site_packages_libdirs.py
@@ -8,7 +8,7 @@
 import argparse
 import os
 import re
-from typing import Dict, Optional, Set
+from typing import Dict, Set
 
 _SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/")
 
@@ -38,7 +38,7 @@ def parse_lines_linux(lines) -> Dict[str, Set[str]]:
     return d
 
 
-def extract_libname_from_dll(fname: str) -> Optional[str]:
+def extract_libname_from_dll(fname: str) -> str | None:
     """Return base libname per the heuristic, or None if not a .dll."""
     base = os.path.basename(fname)
     if not base.lower().endswith(".dll"):

From 9969317306466ae6956c9c76affeab0842dfe657 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:12:51 -0400
Subject: [PATCH 2/9] docs: remove duplicate information

---
 cuda_core/docs/source/install.rst | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst
index 7100dade3b..72ec710785 100644
--- a/cuda_core/docs/source/install.rst
+++ b/cuda_core/docs/source/install.rst
@@ -26,7 +26,7 @@ dependencies are as follows:
 .. [#f1] Including ``cuda-python``.
 
 
-``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.13 & 3.14 are also provided.
+``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64). **Experimental** free-threaded builds for Python 3.14 are also provided.
 
 
 Free-threading Build Support
@@ -42,8 +42,6 @@ As of cuda-core 0.4.0, **experimental** packages for the `free-threaded interpre
 .. _built-in modules that are known to be thread-unsafe: https://github.com/python/cpython/issues/116738
 .. _free-threaded interpreter: https://docs.python.org/3/howto/free-threading-python.html
 
-``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64).
-
 Installing from PyPI
 --------------------
 

From 05faed3b84f7759059972354299db012b96a4cd7 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:17:57 -0400
Subject: [PATCH 3/9] test: remove B095 ruff rule and add `zip(...,
 strict=True)` in test code

---
 cuda_bindings/setup.py                | 2 +-
 cuda_bindings/tests/test_cuda.py      | 4 ++--
 cuda_bindings/tests/test_cufile.py    | 2 +-
 cuda_bindings/tests/test_nvjitlink.py | 2 +-
 cuda_core/tests/test_module.py        | 8 ++++----
 cuda_core/tests/test_system.py        | 2 +-
 ruff.toml                             | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py
index d89d0bccd9..c4138c11d0 100644
--- a/cuda_bindings/setup.py
+++ b/cuda_bindings/setup.py
@@ -125,7 +125,7 @@ def discoverMembers(self, memberDict, prefix, seen=None):
         next_seen = set(seen)
         next_seen.add(self._name)
 
-        for memberName, memberType in zip(self._member_names, self._member_types):
+        for memberName, memberType in zip(self._member_names, self._member_types, strict=True):
             if memberName:
                 discovered.append(".".join([prefix, memberName]))
 
diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py
index cd723941be..53bcc08f95 100644
--- a/cuda_bindings/tests/test_cuda.py
+++ b/cuda_bindings/tests/test_cuda.py
@@ -432,7 +432,7 @@ def test_cuda_pointer_attr():
     # List version
     err, attr_value_list_v2 = cuda.cuPointerGetAttributes(len(attr_type_list), attr_type_list, ptr)
     assert err == cuda.CUresult.CUDA_SUCCESS
-    for attr1, attr2 in zip(attr_value_list, attr_value_list_v2):
+    for attr1, attr2 in zip(attr_value_list, attr_value_list_v2, strict=True):
         assert str(attr1) == str(attr2)
 
     # Test setting values
@@ -512,7 +512,7 @@ def test_cuda_mem_range_attr():
         attr_type_size_list, attr_type_list, len(attr_type_list), ptr, size
     )
     assert err == cuda.CUresult.CUDA_SUCCESS
-    for attr1, attr2 in zip(attr_value_list, attr_value_list_v2):
+    for attr1, attr2 in zip(attr_value_list, attr_value_list_v2, strict=True):
         assert str(attr1) == str(attr2)
 
     (err,) = cuda.cuMemFree(ptr)
diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py
index 3716e2bec5..446cfdc83d 100644
--- a/cuda_bindings/tests/test_cufile.py
+++ b/cuda_bindings/tests/test_cufile.py
@@ -318,7 +318,7 @@ def test_buf_register_multiple_buffers():
     try:
         # Register all buffers
         flags = 0
-        for buf_ptr, size in zip(buffers, buffer_sizes):
+        for buf_ptr, size in zip(buffers, buffer_sizes, strict=True):
             buf_ptr_int = int(buf_ptr)
             cufile.buf_register(buf_ptr_int, size, flags)
 
diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py
index 3bfeb8d35a..85c6058010 100644
--- a/cuda_bindings/tests/test_nvjitlink.py
+++ b/cuda_bindings/tests/test_nvjitlink.py
@@ -34,7 +34,7 @@
 
 
 def _build_arch_ptx_parametrized_callable():
-    av = tuple(zip(ARCHITECTURES, PTX_VERSIONS))
+    av = tuple(zip(ARCHITECTURES, PTX_VERSIONS, strict=True))
     return pytest.mark.parametrize(
         ("arch", "ptx_bytes"),
         [(a, (PTX_HEADER.format(VERSION=v, ARCH=a) + PTX_KERNEL).encode("utf-8")) for a, v in av],
diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 49df966c08..dffbc04209 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -248,9 +248,9 @@ class ExpectedStruct(ctypes.Structure):
     sizes = [p.size for p in arg_info]
     members = [getattr(ExpectedStruct, name) for name, _ in ExpectedStruct._fields_]
     expected_offsets = tuple(m.offset for m in members)
-    assert all(actual == expected for actual, expected in zip(offsets, expected_offsets))
+    assert all(actual == expected for actual, expected in zip(offsets, expected_offsets, strict=True))
     expected_sizes = tuple(m.size for m in members)
-    assert all(actual == expected for actual, expected in zip(sizes, expected_sizes))
+    assert all(actual == expected for actual, expected in zip(sizes, expected_sizes, strict=True))
 
 
 @pytest.mark.parametrize("nargs", [0, 1, 2, 3, 16])
@@ -274,8 +274,8 @@ class ExpectedStruct(ctypes.Structure):
     members = tuple(getattr(ExpectedStruct, f"arg_{i}") for i in range(nargs))
 
     arg_info = krn.arguments_info
-    assert all([actual.offset == expected.offset for actual, expected in zip(arg_info, members)])
-    assert all([actual.size == expected.size for actual, expected in zip(arg_info, members)])
+    assert all([actual.offset == expected.offset for actual, expected in zip(arg_info, members, strict=True)])
+    assert all([actual.size == expected.size for actual, expected in zip(arg_info, members, strict=True)])
 
 
 def test_num_args_error_handling(deinit_all_contexts_function, cuda12_4_prerequisite_check):
diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py
index d5195ed872..13f82b98f6 100644
--- a/cuda_core/tests/test_system.py
+++ b/cuda_core/tests/test_system.py
@@ -35,5 +35,5 @@ def test_devices():
     expected_num_devices = handle_return(runtime.cudaGetDeviceCount())
     expected_devices = tuple(Device(device_id) for device_id in range(expected_num_devices))
     assert len(devices) == len(expected_devices), "Number of devices does not match expected value"
-    for device, expected_device in zip(devices, expected_devices):
+    for device, expected_device in zip(devices, expected_devices, strict=True):
         assert device.device_id == expected_device.device_id, "Device ID does not match expected value"
diff --git a/ruff.toml b/ruff.toml
index 6312d3e9ef..f28ff3cb98 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -40,7 +40,6 @@ ignore = [
   "S101",   # asserts
   "S311",   # allow use of the random.* even though many are not cryptographically secure
   "S404",   # allow importing the subprocess module
-  "B905",   # preserve the default behavior of `zip` without the explicit `strict` argument
 ]
 
 exclude = ["**/_version.py"]
@@ -52,6 +51,7 @@ exclude = ["**/_version.py"]
 "cuda_bindings/examples/**" = [
   "E722",
   "E501", # line too long
+  "B905",   # preserve the default behavior of `zip` without the explicit `strict` argument
 ]
 
 "cuda_bindings/tests/**" = [

From 1c3e7e3e6c8d6632d323b90d43e3dbe57c5a5f4b Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:23:49 -0400
Subject: [PATCH 4/9] chore: bump python in `cuda_python_test_helpers`

---
 cuda_python_test_helpers/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_python_test_helpers/pyproject.toml b/cuda_python_test_helpers/pyproject.toml
index 85652b61c5..4709c05a07 100644
--- a/cuda_python_test_helpers/pyproject.toml
+++ b/cuda_python_test_helpers/pyproject.toml
@@ -12,7 +12,7 @@ description = "Shared test helpers for CUDA Python projects"
 readme = {file = "README.md", content-type = "text/markdown"}
 authors = [{ name = "NVIDIA Corporation" }]
 license = "Apache-2.0"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 classifiers = [
   "Programming Language :: Python :: 3 :: Only",
   "Operating System :: POSIX :: Linux",

From 9e46f1f7d125060ab898a3b6256f7d637b7a531c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:45:42 -0400
Subject: [PATCH 5/9] refactor: modernize dict/set in toolshed

---
 toolshed/make_site_packages_libdirs.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py
index 00a495a095..eba6c68234 100755
--- a/toolshed/make_site_packages_libdirs.py
+++ b/toolshed/make_site_packages_libdirs.py
@@ -8,7 +8,6 @@
 import argparse
 import os
 import re
-from typing import Dict, Set
 
 _SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/")
 
@@ -19,7 +18,7 @@ def strip_site_packages_prefix(p: str) -> str:
     return _SITE_PACKAGES_RE.sub("", p)
 
 
-def parse_lines_linux(lines) -> Dict[str, Set[str]]:
+def parse_lines_linux(lines) -> dict[str, set[str]]:
     d = {}  # name -> set of dirs
     for raw in lines:
         line = raw.strip()
@@ -53,9 +52,9 @@ def extract_libname_from_dll(fname: str) -> str | None:
     return name or None
 
 
-def parse_lines_windows(lines) -> Dict[str, Set[str]]:
+def parse_lines_windows(lines) -> dict[str, set[str]]:
     """Collect {libname: set(dirnames)} with deduped directories."""
-    m: Dict[str, Set[str]] = {}
+    m: dict[str, set[str]] = {}
     for raw in lines:
         line = raw.strip()
         if not line or line.startswith("#"):
@@ -69,7 +68,7 @@ def parse_lines_windows(lines) -> Dict[str, Set[str]]:
     return m
 
 
-def dict_literal(d: Dict[str, Set[str]]) -> str:
+def dict_literal(d: dict[str, set[str]]) -> str:
     """Pretty, stable dict literal with tuple values (singletons keep trailing comma)."""
     lines = ["{"]
     for k in sorted(d):

From a7a1f3f30d014a6460d2a5c71fb91bbaa0b910c9 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 15:53:55 -0400
Subject: [PATCH 6/9] refactor: modernize unions

---
 .../cuda/core/experimental/_launch_config.py  | 13 +++---
 cuda_core/cuda/core/experimental/_linker.py   | 16 +++----
 cuda_core/cuda/core/experimental/_module.py   | 31 +++++++-------
 cuda_core/cuda/core/experimental/_program.py  | 42 +++++++++----------
 .../cuda_python_test_helpers/__init__.py      |  3 +-
 5 files changed, 50 insertions(+), 55 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py
index c1e08da58d..bd76dae286 100644
--- a/cuda_core/cuda/core/experimental/_launch_config.py
+++ b/cuda_core/cuda/core/experimental/_launch_config.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from dataclasses import dataclass
-from typing import Union
 
 from cuda.core.experimental._device import Device
 from cuda.core.experimental._utils.cuda_utils import (
@@ -45,15 +44,15 @@ class LaunchConfig:
 
     Attributes
     ----------
-    grid : Union[tuple, int]
+    grid : tuple | int
         Collection of threads that will execute a kernel function. When cluster
         is not specified, this represents the number of blocks, otherwise
         this represents the number of clusters.
-    cluster : Union[tuple, int]
+    cluster : tuple | int
         Group of blocks (Thread Block Cluster) that will execute on the same
         GPU Processing Cluster (GPC). Blocks within a cluster have access to
         distributed shared memory and can be explicitly synchronized.
-    block : Union[tuple, int]
+    block : tuple | int
         Group of threads (Thread Block) that will execute on the same
         streaming multiprocessor (SM). Threads within a thread blocks have
         access to shared memory and can be explicitly synchronized.
@@ -65,9 +64,9 @@ class LaunchConfig:
     """
 
     # TODO: expand LaunchConfig to include other attributes
-    grid: Union[tuple, int] = None
-    cluster: Union[tuple, int] = None
-    block: Union[tuple, int] = None
+    grid: tuple | int = None
+    cluster: tuple | int = None
+    block: tuple | int = None
     shmem_size: int | None = None
     cooperative_launch: bool | None = False
 
diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index 5c54a88c8c..04c59c9d2d 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -9,7 +9,7 @@
 import weakref
 from contextlib import contextmanager
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING
 from warnings import warn
 
 if TYPE_CHECKING:
@@ -154,14 +154,14 @@ class LinkerOptions:
     fma : bool, optional
         Use fast multiply-add.
         Default: True.
-    kernels_used : [Union[str, tuple[str], list[str]]], optional
+    kernels_used : str | tuple[str] | list[str], optional
         Pass a kernel or sequence of kernels that are used; any not in the list can be removed.
-    variables_used : [Union[str, tuple[str], list[str]]], optional
+    variables_used : str | tuple[str] | list[str], optional
         Pass a variable or sequence of variables that are used; any not in the list can be removed.
     optimize_unused_variables : bool, optional
         Assume that if a variable is not referenced in device code, it can be removed.
         Default: False.
-    ptxas_options : [Union[str, tuple[str], list[str]]], optional
+    ptxas_options : str | tuple[str] | list[str], optional
         Pass options to PTXAS.
     split_compile : int, optional
         Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split
@@ -191,10 +191,10 @@ class LinkerOptions:
     prec_div: bool | None = None
     prec_sqrt: bool | None = None
     fma: bool | None = None
-    kernels_used: Union[str, tuple[str], list[str]] | None = None
-    variables_used: Union[str, tuple[str], list[str]] | None = None
+    kernels_used: str | tuple[str] | list[str] | None = None
+    variables_used: str | tuple[str] | list[str] | None = None
     optimize_unused_variables: bool | None = None
-    ptxas_options: Union[str, tuple[str], list[str]] | None = None
+    ptxas_options: str | tuple[str] | list[str] | None = None
     split_compile: int | None = None
     split_compile_extended: int | None = None
     no_cache: bool | None = None
@@ -350,7 +350,7 @@ def _exception_manager(self):
 
 
 nvJitLinkHandleT = int
-LinkerHandleT = Union[nvJitLinkHandleT, "cuda.bindings.driver.CUlinkState"]
+LinkerHandleT = nvJitLinkHandleT | cuda.bindings.driver.CUlinkState
 
 
 class Linker:
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index f8ce8f95d0..dcb5d06f5b 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -4,7 +4,6 @@
 
 import weakref
 from collections import namedtuple
-from typing import Union
 from warnings import warn
 
 from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
@@ -239,7 +238,7 @@ def max_active_blocks_per_multiprocessor(self, block_size: int, dynamic_shared_m
         )
 
     def max_potential_block_size(
-        self, dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize], block_size_limit: int
+        self, dynamic_shared_memory_needed: int | driver.CUoccupancyB2DSize, block_size_limit: int
     ) -> MaxPotentialBlockSizeOccupancyResult:
         """MaxPotentialBlockSizeOccupancyResult: Suggested launch configuration for reasonable occupancy.
 
@@ -248,7 +247,7 @@ def max_potential_block_size(
 
         Parameters
         ----------
-            dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize]
+            dynamic_shared_memory_needed: int | driver.CUoccupancyB2DSize
                 The amount of dynamic shared memory in bytes needed by block.
                 Use `0` if block does not need shared memory. Use C-callable
                 represented by :obj:`~driver.CUoccupancyB2DSize` to encode
@@ -437,7 +436,7 @@ def occupancy(self) -> KernelOccupancy:
     # TODO: implement from_handle()
 
 
-CodeTypeT = Union[bytes, bytearray, str]
+CodeTypeT = bytes | bytearray | str
 
 
 class ObjectCode:
@@ -496,12 +495,12 @@ def __reduce__(self):
         return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)
 
     @staticmethod
-    def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing cubin.
 
         Parameters
         ----------
-        module : Union[bytes, str]
+        module : bytes | str
             Either a bytes object containing the in-memory cubin to load, or
             a file path string pointing to the on-disk cubin to load.
         name : Optional[str]
@@ -514,12 +513,12 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic
         return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing PTX.
 
         Parameters
         ----------
-        module : Union[bytes, str]
+        module : bytes | str
             Either a bytes object containing the in-memory ptx code to load, or
             a file path string pointing to the on-disk ptx file to load.
         name : Optional[str]
@@ -532,12 +531,12 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict
         return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing LTOIR.
 
         Parameters
         ----------
-        module : Union[bytes, str]
+        module : bytes, str
             Either a bytes object containing the in-memory ltoir code to load, or
             a file path string pointing to the on-disk ltoir file to load.
         name : Optional[str]
@@ -550,12 +549,12 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic
         return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing fatbin.
 
         Parameters
         ----------
-        module : Union[bytes, str]
+        module : bytes| str
             Either a bytes object containing the in-memory fatbin to load, or
             a file path string pointing to the on-disk fatbin to load.
         name : Optional[str]
@@ -568,12 +567,12 @@ def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: di
         return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing object code.
 
         Parameters
         ----------
-        module : Union[bytes, str]
+        module : bytes | str
             Either a bytes object containing the in-memory object code to load, or
             a file path string pointing to the on-disk object code to load.
         name : Optional[str]
@@ -586,12 +585,12 @@ def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: di
         return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing library.
 
         Parameters
         ----------
-        module : Union[bytes, str]
+        module : bytes | str
             Either a bytes object containing the in-memory library to load, or
             a file path string pointing to the on-disk library to load.
         name : Optional[str]
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 1db453fed1..7ef24105b1 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -7,7 +7,7 @@
 import weakref
 from contextlib import contextmanager
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING
 from warnings import warn
 
 if TYPE_CHECKING:
@@ -115,7 +115,7 @@ def _process_define_macro_inner(formatted_options, macro):
 
 
 def _process_define_macro(formatted_options, macro):
-    union_type = "Union[str, tuple[str, str]]"
+    union_type = "str | tuple[str, str]"
     if _process_define_macro_inner(formatted_options, macro):
         return
     if is_nested_sequence(macro):
@@ -154,7 +154,7 @@ class ProgramOptions:
         Enable device code optimization. When specified along with ‘-G’, enables limited debug information generation
         for optimized device code.
         Default: None
-    ptxas_options : Union[str, list[str]], optional
+    ptxas_options : str | list[str], optional
         Specify one or more options directly to ptxas, the PTX optimizing assembler. Options should be strings.
         For example ["-v", "-O2"].
         Default: None
@@ -188,17 +188,17 @@ class ProgramOptions:
     gen_opt_lto : bool, optional
         Run the optimizer passes before generating the LTO IR.
         Default: False
-    define_macro : Union[str, tuple[str, str], list[Union[str, tuple[str, str]]]], optional
+    define_macro : str | tuple[str, str] | list[str | tuple[str, str]], optional
         Predefine a macro. Can be either a string, in which case that macro will be set to 1, a 2 element tuple of
         strings, in which case the first element is defined as the second, or a list of strings or tuples.
         Default: None
-    undefine_macro : Union[str, list[str]], optional
+    undefine_macro : str | list[str], optional
         Cancel any previous definition of a macro, or list of macros.
         Default: None
-    include_path : Union[str, list[str]], optional
+    include_path : str | list[str], optional
         Add the directory or directories to the list of directories to be searched for headers.
         Default: None
-    pre_include : Union[str, list[str]], optional
+    pre_include : str | list[str], optional
         Preinclude one or more headers during preprocessing. Can be either a string or a list of strings.
         Default: None
     no_source_include : bool, optional
@@ -231,13 +231,13 @@ class ProgramOptions:
     no_display_error_number : bool, optional
         Disable the display of a diagnostic number for warning messages.
         Default: False
-    diag_error : Union[int, list[int]], optional
+    diag_error : int | list[int], optional
         Emit error for a specified diagnostic message number or comma separated list of numbers.
         Default: None
-    diag_suppress : Union[int, list[int]], optional
+    diag_suppress : int | list[int], optional
         Suppress a specified diagnostic message number or comma separated list of numbers.
         Default: None
-    diag_warn : Union[int, list[int]], optional
+    diag_warn : int | list[int], optional
         Emit warning for a specified diagnostic message number or comma separated lis of numbers.
         Default: None
     brief_diagnostics : bool, optional
@@ -264,7 +264,7 @@ class ProgramOptions:
     debug: bool | None = None
     lineinfo: bool | None = None
     device_code_optimize: bool | None = None
-    ptxas_options: Union[str, list[str], tuple[str]] | None = None
+    ptxas_options: str | list[str] | tuple[str, ...] | None = None
     max_register_count: int | None = None
     ftz: bool | None = None
     prec_sqrt: bool | None = None
@@ -274,12 +274,10 @@ class ProgramOptions:
     extra_device_vectorization: bool | None = None
     link_time_optimization: bool | None = None
     gen_opt_lto: bool | None = None
-    define_macro: (
-        Union[str, tuple[str, str], list[Union[str, tuple[str, str]]], tuple[Union[str, tuple[str, str]]]] | None
-    ) = None
-    undefine_macro: Union[str, list[str], tuple[str]] | None = None
-    include_path: Union[str, list[str], tuple[str]] | None = None
-    pre_include: Union[str, list[str], tuple[str]] | None = None
+    define_macro: str | tuple[str, str] | list[str | tuple[str, str]] | tuple[str | tuple[str, str]] | None = None
+    undefine_macro: str | list[str] | tuple[str, ...] | None = None
+    include_path: str | list[str] | tuple[str, ...] | None = None
+    pre_include: str | list[str] | tuple[str, ...] | None = None
     no_source_include: bool | None = None
     std: str | None = None
     builtin_move_forward: bool | None = None
@@ -290,9 +288,9 @@ class ProgramOptions:
     device_int128: bool | None = None
     optimization_info: str | None = None
     no_display_error_number: bool | None = None
-    diag_error: Union[int, list[int], tuple[int]] | None = None
-    diag_suppress: Union[int, list[int], tuple[int]] | None = None
-    diag_warn: Union[int, list[int], tuple[int]] | None = None
+    diag_error: int | list[int] | tuple[int] | None = None
+    diag_suppress: int | list[int] | tuple[int] | None = None
+    diag_warn: int | list[int] | tuple[int] | None = None
     brief_diagnostics: bool | None = None
     time: str | None = None
     split_compile: int | None = None
@@ -428,7 +426,7 @@ def __repr__(self):
         return str(self._formatted_options)
 
 
-ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT]
+ProgramHandleT = cuda.bindings.nvrtc.nvrtcProgram | LinkerHandleT
 
 
 class Program:
@@ -574,7 +572,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
         target_type : Any
             String of the targeted compilation type.
             Supported options are "ptx", "cubin" and "ltoir".
-        name_expressions : Union[list, tuple], optional
+        name_expressions : list | tuple, optional
             List of explicit name expressions to become accessible.
             (Default to no expressions)
         logs : Any, optional
diff --git a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py
index a661b4f1aa..e0b7261121 100644
--- a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py
+++ b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py
@@ -7,7 +7,6 @@
 import platform
 import sys
 from contextlib import suppress
-from typing import Union
 
 from cuda.core.experimental._utils.cuda_utils import handle_return
 
@@ -38,7 +37,7 @@ def _detect_wsl() -> bool:
 
 
 @functools.cache
-def supports_ipc_mempool(device_id: Union[int, object]) -> bool:
+def supports_ipc_mempool(device_id: int | object) -> bool:
     """Return True if mempool IPC via POSIX file descriptor is supported.
 
     Uses cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES)

From 593f9a1904af1092e93e2f34917018f30bbe32c7 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 16:17:27 -0400
Subject: [PATCH 7/9] chore: fix bot comments

---
 cuda_core/cuda/core/experimental/_module.py  | 26 ++++++++++----------
 cuda_core/cuda/core/experimental/_program.py |  6 ++---
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index dcb5d06f5b..9654cb97d3 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -503,9 +503,9 @@ def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
         module : bytes | str
             Either a bytes object containing the in-memory cubin to load, or
             a file path string pointing to the on-disk cubin to load.
-        name : Optional[str]
+        name : str | None
             A human-readable identifier representing this code object.
-        symbol_mapping : Optional[dict]
+        symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
@@ -521,9 +521,9 @@ def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None
         module : bytes | str
             Either a bytes object containing the in-memory ptx code to load, or
             a file path string pointing to the on-disk ptx file to load.
-        name : Optional[str]
+        name : str | None
             A human-readable identifier representing this code object.
-        symbol_mapping : Optional[dict]
+        symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
@@ -539,9 +539,9 @@ def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
         module : bytes, str
             Either a bytes object containing the in-memory ltoir code to load, or
             a file path string pointing to the on-disk ltoir file to load.
-        name : Optional[str]
+        name : str | None
             A human-readable identifier representing this code object.
-        symbol_mapping : Optional[dict]
+        symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
@@ -554,12 +554,12 @@ def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
 
         Parameters
         ----------
-        module : bytes| str
+        module : bytes | str
             Either a bytes object containing the in-memory fatbin to load, or
             a file path string pointing to the on-disk fatbin to load.
-        name : Optional[str]
+        name : str | None
             A human-readable identifier representing this code object.
-        symbol_mapping : Optional[dict]
+        symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
@@ -575,9 +575,9 @@ def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
         module : bytes | str
             Either a bytes object containing the in-memory object code to load, or
             a file path string pointing to the on-disk object code to load.
-        name : Optional[str]
+        name : str | None
             A human-readable identifier representing this code object.
-        symbol_mapping : Optional[dict]
+        symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
@@ -593,9 +593,9 @@ def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict |
         module : bytes | str
             Either a bytes object containing the in-memory library to load, or
             a file path string pointing to the on-disk library to load.
-        name : Optional[str]
+        name : str | None
             A human-readable identifier representing this code object.
-        symbol_mapping : Optional[dict]
+        symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 7ef24105b1..28c282ffea 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -288,9 +288,9 @@ class ProgramOptions:
     device_int128: bool | None = None
     optimization_info: str | None = None
     no_display_error_number: bool | None = None
-    diag_error: int | list[int] | tuple[int] | None = None
-    diag_suppress: int | list[int] | tuple[int] | None = None
-    diag_warn: int | list[int] | tuple[int] | None = None
+    diag_error: int | list[int] | tuple[int, ...] | None = None
+    diag_suppress: int | list[int] | tuple[int, ...] | None = None
+    diag_warn: int | list[int] | tuple[int, ...] | None = None
     brief_diagnostics: bool | None = None
     time: str | None = None
     split_compile: int | None = None

From 041d824a2ab7d437fa0da029b59267b033483ad5 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 27 Oct 2025 16:21:24 -0400
Subject: [PATCH 8/9] chore: fix bot comments

---
 cuda_core/cuda/core/experimental/_module.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 9654cb97d3..18a9537ced 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -503,7 +503,7 @@ def from_cubin(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
         module : bytes | str
             Either a bytes object containing the in-memory cubin to load, or
             a file path string pointing to the on-disk cubin to load.
-        name : str | None
+        name : str
             A human-readable identifier representing this code object.
         symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
@@ -521,7 +521,7 @@ def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None
         module : bytes | str
             Either a bytes object containing the in-memory ptx code to load, or
             a file path string pointing to the on-disk ptx file to load.
-        name : str | None
+        name : str
             A human-readable identifier representing this code object.
         symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
@@ -539,7 +539,7 @@ def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | No
         module : bytes, str
             Either a bytes object containing the in-memory ltoir code to load, or
             a file path string pointing to the on-disk ltoir file to load.
-        name : str | None
+        name : str
             A human-readable identifier representing this code object.
         symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
@@ -557,7 +557,7 @@ def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
         module : bytes | str
             Either a bytes object containing the in-memory fatbin to load, or
             a file path string pointing to the on-disk fatbin to load.
-        name : str | None
+        name : str
             A human-readable identifier representing this code object.
         symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
@@ -575,7 +575,7 @@ def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | N
         module : bytes | str
             Either a bytes object containing the in-memory object code to load, or
             a file path string pointing to the on-disk object code to load.
-        name : str | None
+        name : str
             A human-readable identifier representing this code object.
         symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)
@@ -593,7 +593,7 @@ def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict |
         module : bytes | str
             Either a bytes object containing the in-memory library to load, or
             a file path string pointing to the on-disk library to load.
-        name : str | None
+        name : str
             A human-readable identifier representing this code object.
         symbol_mapping : dict | None
             A dictionary specifying how the unmangled symbol names (as keys)

From 77cc0fcde61a7c2bfece90c022c069f25cd59f0a Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Thu, 30 Oct 2025 13:40:25 -0400
Subject: [PATCH 9/9] chore: fix optionals

---
 .../cuda/pathfinder/_headers/find_nvidia_headers.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
index d770e99214..63f8a627fd 100644
--- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
+++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
@@ -21,7 +21,7 @@ def _joined_isfile(dirpath: str, basename: str) -> bool:
     return os.path.isfile(os.path.join(dirpath, basename))
 
 
-def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]:
+def _find_under_site_packages(sub_dir: str, h_basename: str) -> str | None:
     # Installed from a wheel
     hdr_dir: str  # help mypy
     for hdr_dir in find_sub_dirs_all_sitepackages(tuple(sub_dir.split("/"))):
@@ -52,7 +52,7 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str)
     return None
 
 
-def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> Optional[str]:
+def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> str | None:
     conda_prefix = os.environ.get("CONDA_PREFIX")
     if not conda_prefix:
         return None
@@ -134,7 +134,7 @@ def find_nvidia_header_directory(libname: str) -> str | None:
         raise RuntimeError(f"UNKNOWN {libname=}")
 
     candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname, [])
-    hdr_dir: Optional[str]  # help mypy
+    hdr_dir: str | None  # help mypy
     for cdir in candidate_dirs:
         if hdr_dir := _find_under_site_packages(cdir, h_basename):
             return _abs_norm(hdr_dir)