From 2b0fa02716143a431c7d9da9b192d94e37699e66 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Wed, 8 Oct 2025 15:19:26 -0400
Subject: [PATCH] Revert "refactor(python): drop support for 3.9, document 3.14
 support (#1069)"

This reverts commit 9bce9767fa10944890dafb5d64a71f8084026321.
---
 .github/workflows/build-wheel.yml             |  1 +
 CONTRIBUTING.md                               |  2 +-
 ci/test-matrix.json                           | 14 ++++++++++
 cuda_bindings/docs/source/install.rst         |  2 +-
 cuda_bindings/docs/source/support.rst         |  2 +-
 cuda_bindings/pyproject.toml                  |  3 +--
 cuda_core/cuda/core/experimental/__init__.py  | 11 ++++++++
 cuda_core/cuda/core/experimental/_device.pyx  | 10 +++----
 .../cuda/core/experimental/_launch_config.py  |  6 ++---
 cuda_core/cuda/core/experimental/_linker.py   |  2 +-
 cuda_core/cuda/core/experimental/_module.py   | 26 +++++++++++-------
 cuda_core/cuda/core/experimental/_program.py  |  2 +-
 cuda_core/docs/source/install.rst             |  2 +-
 cuda_core/pyproject.toml                      |  4 +--
 .../_dynamic_libs/find_nvidia_dynamic_lib.py  | 27 ++++++++++---------
 .../_dynamic_libs/load_dl_common.py           |  4 +--
 .../pathfinder/_dynamic_libs/load_dl_linux.py | 10 +++----
 .../_dynamic_libs/load_dl_windows.py          |  5 ++--
 .../_headers/find_nvidia_headers.py           | 13 ++++-----
 .../cuda/pathfinder/_utils/env_vars.py        |  3 ++-
 .../_utils/find_site_packages_dll.py          |  7 +----
 .../_utils/find_site_packages_so.py           |  6 +----
 cuda_pathfinder/pyproject.toml                |  4 +--
 .../tests/spawned_process_runner.py           | 10 +++----
 cuda_python/pyproject.toml                    |  4 +--
 ruff.toml                                     |  3 +--
 toolshed/make_site_packages_libdirs.py        |  4 +--
 27 files changed, 105 insertions(+), 82 deletions(-)
diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
index 5a0dba3a61..283a0d462b 100644
--- a/.github/workflows/build-wheel.yml
+++ b/.github/workflows/build-wheel.yml
@@ -28,6 +28,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version:
+          - "3.9"
           - "3.10"
           - "3.11"
           - "3.12"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 67bd568d85..183d215865 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -113,7 +113,7 @@ flowchart TD
             B2["linux-aarch64<br/>(Self-hosted)"]
             B3["win-64<br/>(GitHub-hosted)"]
         end
-        BUILD_DETAILS["• Python versions: 3.10, 3.11, 3.12, 3.13, 3.14<br/>• CUDA version: 13.0.0 (build-time)<br/>• Components: cuda-core, cuda-bindings,<br/>  cuda-pathfinder, cuda-python"]
+        BUILD_DETAILS["• Python versions: 3.9, 3.10, 3.11, 3.12, 3.13<br/>• CUDA version: 13.0.0 (build-time)<br/>• Components: cuda-core, cuda-bindings,<br/>  cuda-pathfinder, cuda-python"]
     end
 
     %% Artifact Storage
diff --git a/ci/test-matrix.json b/ci/test-matrix.json
index feea4ebaef..10721659b8 100644
--- a/ci/test-matrix.json
+++ b/ci/test-matrix.json
@@ -4,6 +4,8 @@
   "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1",
   "linux": {
     "pull-request": [
+      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
+      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
@@ -15,6 +17,8 @@
       { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
+      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
+      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
@@ -28,6 +32,11 @@
       { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }
     ],
     "nightly": [
+      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
+      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
+      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
+      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
+      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
@@ -48,6 +57,11 @@
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
+      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
+      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
+      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
+      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
+      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
diff --git a/cuda_bindings/docs/source/install.rst b/cuda_bindings/docs/source/install.rst
index 02441fd518..b5181c6a35 100644
--- a/cuda_bindings/docs/source/install.rst
+++ b/cuda_bindings/docs/source/install.rst
@@ -10,7 +10,7 @@ Runtime Requirements
 ``cuda.bindings`` supports the same platforms as CUDA. Runtime dependencies are:
 
 * Linux (x86-64, arm64) and Windows (x86-64)
-* Python 3.10 - 3.14
+* Python 3.9 - 3.13
 * Driver: Linux (580.65.06 or later) Windows (580.88 or later)
 * Optionally, NVRTC, nvJitLink, NVVM, and cuFile from CUDA Toolkit 13.x
 
diff --git a/cuda_bindings/docs/source/support.rst b/cuda_bindings/docs/source/support.rst
index 4439d963c0..a34a5c49e2 100644
--- a/cuda_bindings/docs/source/support.rst
+++ b/cuda_bindings/docs/source/support.rst
@@ -19,7 +19,7 @@ The ``cuda.bindings`` module has the following support policy:
    depends on the underlying driver and the Toolkit versions, as described in the compatibility
    documentation.)
 4. The module supports all Python versions following the `CPython EOL schedule`_. As of writing
-   Python 3.10 - 3.14 are supported.
+   Python 3.9 - 3.13 are supported.
 5. The module exposes a Cython layer from which types and functions could be ``cimport``'d. While
    we strive to keep this layer stable, due to Cython limitations a new *minor* release of this
    module could require Cython layer users to rebuild their projects and update their pinning to
diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml
index 74ed7b95d5..250f8e4076 100644
--- a/cuda_bindings/pyproject.toml
+++ b/cuda_bindings/pyproject.toml
@@ -9,17 +9,16 @@ name = "cuda-bindings"
 description = "Python bindings for CUDA"
 authors = [{name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com"},]
 license = "LicenseRef-NVIDIA-SOFTWARE-LICENSE"
-requires-python = ">=3.10"
 classifiers = [
     "Intended Audience :: Developers",
     "Topic :: Database",
     "Topic :: Scientific/Engineering",
     "Programming Language :: Python",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
-    "Programming Language :: Python :: 3.14",
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dynamic = [
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
index 8a60c031c5..94fb0aa083 100644
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ b/cuda_core/cuda/core/experimental/__init__.py
@@ -26,6 +26,17 @@
 finally:
     del cuda.bindings, importlib, subdir, cuda_major, cuda_minor
 
+import sys  # noqa: E402
+import warnings  # noqa: E402
+
+if sys.version_info < (3, 10):
+    warnings.warn(
+        "support for Python 3.9 and below is deprecated and subject to future removal",
+        category=FutureWarning,
+        stacklevel=1,
+    )
+del sys, warnings
+
 from cuda.core.experimental import utils  # noqa: E402
 from cuda.core.experimental._device import Device  # noqa: E402
 from cuda.core.experimental._event import Event, EventOptions  # noqa: E402
diff --git a/cuda_core/cuda/core/experimental/_device.pyx b/cuda_core/cuda/core/experimental/_device.pyx
index e847083d89..bcba09f985 100644
--- a/cuda_core/cuda/core/experimental/_device.pyx
+++ b/cuda_core/cuda/core/experimental/_device.pyx
@@ -10,7 +10,7 @@ from cuda.bindings cimport cydriver
 from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN
 
 import threading
-from typing import Union
+from typing import Optional, Union
 
 from cuda.core.experimental._context import Context, ContextOptions
 from cuda.core.experimental._event import Event, EventOptions
@@ -950,7 +950,7 @@ class Device:
     """
     __slots__ = ("_id", "_mr", "_has_inited", "_properties")
 
-    def __new__(cls, device_id: int | None = None):
+    def __new__(cls, device_id: Optional[int] = None):
         global _is_cuInit
         if _is_cuInit is False:
             with _lock, nogil:
@@ -1222,7 +1222,7 @@ class Device:
         """
         raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
 
-    def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
+    def create_stream(self, obj: Optional[IsStreamT] = None, options: Optional[StreamOptions] = None) -> Stream:
         """Create a Stream object.
 
         New stream objects can be created in two different ways:
@@ -1253,7 +1253,7 @@ class Device:
         self._check_context_initialized()
         return Stream._init(obj=obj, options=options, device_id=self._id)
 
-    def create_event(self, options: EventOptions | None = None) -> Event:
+    def create_event(self, options: Optional[EventOptions] = None) -> Event:
         """Create an Event object without recording it to a Stream.
 
         Note
@@ -1275,7 +1275,7 @@ class Device:
         ctx = self._get_current_context()
         return Event._init(self._id, ctx, options)
 
-    def allocate(self, size, stream: Stream | None = None) -> Buffer:
+    def allocate(self, size, stream: Optional[Stream] = None) -> Buffer:
         """Allocate device memory from a specified stream.
 
         Allocates device memory of `size` bytes on the specified `stream`
diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py
index c1e08da58d..d82e0ec3a2 100644
--- a/cuda_core/cuda/core/experimental/_launch_config.py
+++ b/cuda_core/cuda/core/experimental/_launch_config.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from dataclasses import dataclass
-from typing import Union
+from typing import Optional, Union
 
 from cuda.core.experimental._device import Device
 from cuda.core.experimental._utils.cuda_utils import (
@@ -68,8 +68,8 @@ class LaunchConfig:
     grid: Union[tuple, int] = None
     cluster: Union[tuple, int] = None
     block: Union[tuple, int] = None
-    shmem_size: int | None = None
-    cooperative_launch: bool | None = False
+    shmem_size: Optional[int] = None
+    cooperative_launch: Optional[bool] = False
 
     def __post_init__(self):
         _lazy_init()
diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index 5c54a88c8c..a3fa4b3e48 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -343,7 +343,7 @@ def _exception_manager(self):
             # our constructor could raise, in which case there's no handle available
             error_log = self.get_error_log()
         # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
-        # unfortunately we are still supporting Python 3.10...
+        # unfortunately we are still supporting Python 3.9/3.10...
         # Here we rely on both CUDAError and nvJitLinkError have the error string placed in .args[0].
         e.args = (e.args[0] + (f"\nLinker error log: {error_log}" if error_log else ""), *e.args[1:])
         raise e
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index f8ce8f95d0..2c7ea3a156 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -4,7 +4,7 @@
 
 import weakref
 from collections import namedtuple
-from typing import Union
+from typing import Optional, Union
 from warnings import warn
 
 from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
@@ -310,7 +310,7 @@ def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocesso
             driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size)
         )
 
-    def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int:
+    def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
         """Maximum potential cluster size.
 
         The maximum potential cluster size for this kernel and given launch configuration.
@@ -332,7 +332,7 @@ def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None
             drv_cfg.hStream = stream.handle
         return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg))
 
-    def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int:
+    def max_active_clusters(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
         """Maximum number of active clusters on the target device.
 
         The maximum number of clusters that could concurrently execute on the target device.
@@ -469,7 +469,7 @@ def __new__(self, *args, **kwargs):
         )
 
     @classmethod
-    def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None):
+    def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None):
         self = super().__new__(cls)
         assert code_type in self._supported_code_type, f"{code_type=} is not supported"
         _lazy_init()
@@ -496,7 +496,7 @@ def __reduce__(self):
         return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)
 
     @staticmethod
-    def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing cubin.
 
         Parameters
@@ -514,7 +514,7 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic
         return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing PTX.
 
         Parameters
@@ -532,7 +532,7 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict
         return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing LTOIR.
 
         Parameters
@@ -550,7 +550,9 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dic
         return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_fatbin(
+        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
+    ) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing fatbin.
 
         Parameters
@@ -568,7 +570,9 @@ def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: di
         return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_object(
+        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
+    ) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing object code.
 
         Parameters
@@ -586,7 +590,9 @@ def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: di
         return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
+    def from_library(
+        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
+    ) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing library.
 
         Parameters
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 1db453fed1..dee6f001e7 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -49,7 +49,7 @@ def _nvvm_exception_manager(self):
             except Exception:
                 error_log = ""
         # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
-        # unfortunately we are still supporting Python 3.10...
+        # unfortunately we are still supporting Python 3.9/3.10...
         e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:])
         raise e
 
diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst
index e3dbd5209c..68ee4176df 100644
--- a/cuda_core/docs/source/install.rst
+++ b/cuda_core/docs/source/install.rst
@@ -26,7 +26,7 @@ dependencies are as follows:
 .. [#f1] Including ``cuda-python``.
 
 
-``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64).
+``cuda.core`` supports Python 3.9 - 3.13, on Linux (x86-64, arm64) and Windows (x86-64).
 
 Installing from PyPI
 --------------------
diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
index 31ceb5b1a5..5f12f6f120 100644
--- a/cuda_core/pyproject.toml
+++ b/cuda_core/pyproject.toml
@@ -14,7 +14,7 @@ dynamic = [
     "version",
     "readme",
 ]
-requires-python = '>=3.10'
+requires-python = '>=3.9'
 description = "cuda.core: (experimental) pythonic CUDA module"
 authors = [
     { name = "NVIDIA Corporation" }
@@ -32,11 +32,11 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Topic :: Software Development :: Libraries",
     "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
-    "Programming Language :: Python :: 3.14",
     "Programming Language :: Python :: Implementation :: CPython",
     "Environment :: GPU :: NVIDIA CUDA",
     "Environment :: GPU :: NVIDIA CUDA :: 12",
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
index 65c9f4bf3c..75ebec3a85 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py
@@ -4,6 +4,7 @@
 import glob
 import os
 from collections.abc import Sequence
+from typing import Optional
 
 from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -28,7 +29,7 @@ def _no_such_file_in_sub_dirs(
 
 def _find_so_using_nvidia_lib_dirs(
     libname: str, so_basename: str, error_messages: list[str], attachments: list[str]
-) -> str | None:
+) -> Optional[str]:
     rel_dirs = SITE_PACKAGES_LIBDIRS_LINUX.get(libname)
     if rel_dirs is not None:
         sub_dirs_searched = []
@@ -51,7 +52,7 @@ def _find_so_using_nvidia_lib_dirs(
     return None
 
 
-def _find_dll_under_dir(dirpath: str, file_wild: str) -> str | None:
+def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]:
     for path in sorted(glob.glob(os.path.join(dirpath, file_wild))):
         if not os.path.isfile(path):
             continue
@@ -62,7 +63,7 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> str | None:
 
 def _find_dll_using_nvidia_bin_dirs(
     libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str]
-) -> str | None:
+) -> Optional[str]:
     rel_dirs = SITE_PACKAGES_LIBDIRS_WINDOWS.get(libname)
     if rel_dirs is not None:
         sub_dirs_searched = []
@@ -78,7 +79,7 @@ def _find_dll_using_nvidia_bin_dirs(
     return None
 
 
-def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> str | None:
+def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> Optional[str]:
     # Resolve paths for the four cases:
     #    Windows/Linux x nvvm yes/no
     if IS_WINDOWS:
@@ -106,14 +107,14 @@ def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_
     return None
 
 
-def _find_lib_dir_using_cuda_home(libname: str) -> str | None:
+def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]:
     cuda_home = get_cuda_home_or_path()
     if cuda_home is None:
         return None
     return _find_lib_dir_using_anchor_point(libname, anchor_point=cuda_home, linux_lib_dir="lib64")
 
 
-def _find_lib_dir_using_conda_prefix(libname: str) -> str | None:
+def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]:
     conda_prefix = os.environ.get("CONDA_PREFIX")
     if not conda_prefix:
         return None
@@ -124,7 +125,7 @@ def _find_lib_dir_using_conda_prefix(libname: str) -> str | None:
 
 def _find_so_using_lib_dir(
     lib_dir: str, so_basename: str, error_messages: list[str], attachments: list[str]
-) -> str | None:
+) -> Optional[str]:
     so_name = os.path.join(lib_dir, so_basename)
     if os.path.isfile(so_name):
         return so_name
@@ -140,7 +141,7 @@ def _find_so_using_lib_dir(
 
 def _find_dll_using_lib_dir(
     lib_dir: str, libname: str, error_messages: list[str], attachments: list[str]
-) -> str | None:
+) -> Optional[str]:
     file_wild = libname + "*.dll"
     dll_name = _find_dll_under_dir(lib_dir, file_wild)
     if dll_name is not None:
@@ -161,9 +162,9 @@ def __init__(self, libname: str):
             self.lib_searched_for = f"lib{libname}.so"
         self.error_messages: list[str] = []
         self.attachments: list[str] = []
-        self.abs_path: str | None = None
+        self.abs_path: Optional[str] = None
 
-    def try_site_packages(self) -> str | None:
+    def try_site_packages(self) -> Optional[str]:
         if IS_WINDOWS:
             return _find_dll_using_nvidia_bin_dirs(
                 self.libname,
@@ -179,13 +180,13 @@ def try_site_packages(self) -> str | None:
                 self.attachments,
             )
 
-    def try_with_conda_prefix(self) -> str | None:
+    def try_with_conda_prefix(self) -> Optional[str]:
         return self._find_using_lib_dir(_find_lib_dir_using_conda_prefix(self.libname))
 
-    def try_with_cuda_home(self) -> str | None:
+    def try_with_cuda_home(self) -> Optional[str]:
         return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname))
 
-    def _find_using_lib_dir(self, lib_dir: str | None) -> str | None:
+    def _find_using_lib_dir(self, lib_dir: Optional[str]) -> Optional[str]:
         if lib_dir is None:
             return None
         if IS_WINDOWS:
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
index 35b988ce93..416718f5a4 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_common.py
@@ -1,8 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-from collections.abc import Callable
 from dataclasses import dataclass
+from typing import Callable, Optional
 
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import DIRECT_DEPENDENCIES
 
@@ -13,7 +13,7 @@ class DynamicLibNotFoundError(RuntimeError):
 
 @dataclass
 class LoadedDL:
-    abs_path: str | None
+    abs_path: Optional[str]
     was_already_loaded_from_elsewhere: bool
     _handle_uint: int  # Platform-agnostic unsigned pointer value
 
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
index c3ac22dd5d..a7de858b73 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
@@ -5,7 +5,7 @@
 import ctypes
 import ctypes.util
 import os
-from typing import cast
+from typing import Optional, cast
 
 from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -76,8 +76,8 @@ class _LinkMapLNameView(ctypes.Structure):
 assert _LinkMapLNameView.l_name.offset == ctypes.sizeof(ctypes.c_void_p)
 
 
-def _dl_last_error() -> str | None:
-    msg_bytes = cast(bytes | None, LIBDL.dlerror())
+def _dl_last_error() -> Optional[str]:
+    msg_bytes = cast(Optional[bytes], LIBDL.dlerror())
     if not msg_bytes:
         return None  # no pending error
     # Never raises; undecodable bytes are mapped to U+DC80..U+DCFF
@@ -131,7 +131,7 @@ def get_candidate_sonames(libname: str) -> list[str]:
     return candidate_sonames
 
 
-def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> LoadedDL | None:
+def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> Optional[LoadedDL]:
     for soname in get_candidate_sonames(libname):
         try:
             handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD)
@@ -149,7 +149,7 @@ def _load_lib(libname: str, filename: str) -> ctypes.CDLL:
     return ctypes.CDLL(filename, cdll_mode)
 
 
-def load_with_system_search(libname: str) -> LoadedDL | None:
+def load_with_system_search(libname: str) -> Optional[LoadedDL]:
     """Try to load a library using system search paths.
 
     Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
index d43d699071..5da6d9b84a 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
@@ -5,6 +5,7 @@
 import ctypes.wintypes
 import os
 import struct
+from typing import Optional
 
 from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
 from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
@@ -99,7 +100,7 @@ def abs_path_for_dynamic_library(libname: str, handle: ctypes.wintypes.HMODULE)
     return buffer.value
 
 
-def check_if_already_loaded_from_elsewhere(libname: str, have_abs_path: bool) -> LoadedDL | None:
+def check_if_already_loaded_from_elsewhere(libname: str, have_abs_path: bool) -> Optional[LoadedDL]:
     for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()):
         handle = kernel32.GetModuleHandleW(dll_name)
         if handle:
@@ -113,7 +114,7 @@ def check_if_already_loaded_from_elsewhere(libname: str, have_abs_path: bool) ->
     return None
 
 
-def load_with_system_search(libname: str) -> LoadedDL | None:
+def load_with_system_search(libname: str) -> Optional[LoadedDL]:
     """Try to load a DLL using system search paths.
 
     Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
index 03c4a4412c..535d4b8003 100644
--- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
+++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
@@ -4,6 +4,7 @@
 import functools
 import glob
 import os
+from typing import Optional
 
 from cuda.pathfinder._headers import supported_nvidia_headers
 from cuda.pathfinder._utils.env_vars import get_cuda_home_or_path
@@ -11,7 +12,7 @@
 from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
 
 
-def _abs_norm(path: str | None) -> str | None:
+def _abs_norm(path: Optional[str]) -> Optional[str]:
     if path:
         return os.path.normpath(os.path.abspath(path))
     return None
@@ -21,7 +22,7 @@ def _joined_isfile(dirpath: str, basename: str) -> bool:
     return os.path.isfile(os.path.join(dirpath, basename))
 
 
-def _find_nvshmem_header_directory() -> str | None:
+def _find_nvshmem_header_directory() -> Optional[str]:
     if IS_WINDOWS:
         # nvshmem has no Windows support.
         return None
@@ -46,7 +47,7 @@ def _find_nvshmem_header_directory() -> str | None:
     return None
 
 
-def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> str | None:
+def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) -> Optional[str]:
     parts = [anchor_point]
     if libname == "nvvm":
         parts.append(libname)
@@ -61,7 +62,7 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str)
     return None
 
 
-def _find_based_on_conda_layout(libname: str, h_basename: str, conda_prefix: str) -> str | None:
+def _find_based_on_conda_layout(libname: str, h_basename: str, conda_prefix: str) -> Optional[str]:
     if IS_WINDOWS:
         anchor_point = os.path.join(conda_prefix, "Library")
         if not os.path.isdir(anchor_point):
@@ -78,7 +79,7 @@ def _find_based_on_conda_layout(libname: str, h_basename: str, conda_prefix: str
     return _find_based_on_ctk_layout(libname, h_basename, anchor_point)
 
 
-def _find_ctk_header_directory(libname: str) -> str | None:
+def _find_ctk_header_directory(libname: str) -> Optional[str]:
     h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_CTK[libname]
     candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK[libname]
 
@@ -103,7 +104,7 @@ def _find_ctk_header_directory(libname: str) -> str | None:
 
 
 @functools.cache
-def find_nvidia_header_directory(libname: str) -> str | None:
+def find_nvidia_header_directory(libname: str) -> Optional[str]:
     """Locate the header directory for a supported NVIDIA library.
 
     Args:
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
index cf78a627cb..3a7de992c0 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/env_vars.py
@@ -3,6 +3,7 @@
 
 import os
 import warnings
+from typing import Optional
 
 
 def _paths_differ(a: str, b: str) -> bool:
@@ -32,7 +33,7 @@ def _paths_differ(a: str, b: str) -> bool:
     return True
 
 
-def get_cuda_home_or_path() -> str | None:
+def get_cuda_home_or_path() -> Optional[str]:
     cuda_home = os.environ.get("CUDA_HOME")
     cuda_path = os.environ.get("CUDA_PATH")
 
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
index 507355727f..2f5695093c 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py
@@ -11,12 +11,7 @@ def find_all_dll_files_via_metadata() -> dict[str, tuple[str, ...]]:
     results: collections.defaultdict[str, list[str]] = collections.defaultdict(list)
 
     # sort dists for deterministic output
-
-    for dist in sorted(
-        importlib.metadata.distributions(),
-        # `get` exists before 3.12, even though the hints only exist for Python >=3.12
-        key=lambda d: (d.metadata.get("Name", ""), d.version),  # type: ignore[attr-defined]
-    ):
+    for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)):
         files = dist.files
         if not files:
             continue
diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
index 33ee1f1bcf..69e7eea3ad 100644
--- a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
+++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py
@@ -23,11 +23,7 @@ def find_all_so_files_via_metadata() -> dict[str, dict[str, tuple[str, ...]]]:
     )
 
     # sort dists for deterministic output
-    for dist in sorted(
-        importlib.metadata.distributions(),
-        # `get` exists before 3.12, even though the hints only exist for Python >=3.12
-        key=lambda d: (d.metadata.get("Name", ""), d.version),  # type: ignore[attr-defined]
-    ):
+    for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)):
         files = dist.files
         if not files:
             continue
diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml
index ee0a4e794e..c2029a0220 100644
--- a/cuda_pathfinder/pyproject.toml
+++ b/cuda_pathfinder/pyproject.toml
@@ -6,7 +6,7 @@ name = "cuda-pathfinder"
 description = "Pathfinder for CUDA components"
 authors = [{ name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com" }]
 license = "Apache-2.0"
-requires-python = ">=3.10"
+requires-python = ">=3.9"
 dynamic = ["version", "readme"]
 dependencies = []
 
@@ -94,7 +94,7 @@ inline-quotes = "double"
 
 [tool.mypy]
 # Basic settings
-python_version = "3.10"
+python_version = "3.9"
 explicit_package_bases = true
 warn_return_any = true
 warn_unused_configs = true
diff --git a/cuda_pathfinder/tests/spawned_process_runner.py b/cuda_pathfinder/tests/spawned_process_runner.py
index f4440743f5..154178b2a2 100644
--- a/cuda_pathfinder/tests/spawned_process_runner.py
+++ b/cuda_pathfinder/tests/spawned_process_runner.py
@@ -5,10 +5,10 @@
 import queue  # for Empty
 import sys
 import traceback
-from collections.abc import Callable, Sequence
+from collections.abc import Sequence
 from dataclasses import dataclass
 from io import StringIO
-from typing import Any
+from typing import Any, Callable, Optional
 
 PROCESS_KILLED = -9
 PROCESS_NO_RESULT = -999
@@ -61,9 +61,9 @@ def __call__(self):
 def run_in_spawned_child_process(
     target: Callable[..., None],
     *,
-    args: Sequence[Any] | None = None,
-    kwargs: dict[str, Any] | None = None,
-    timeout: float | None = None,
+    args: Optional[Sequence[Any]] = None,
+    kwargs: Optional[dict[str, Any]] = None,
+    timeout: Optional[float] = None,
     rethrow: bool = False,
 ) -> CompletedProcess:
     """Run `target` in a spawned child process, capturing stdout/stderr.
diff --git a/cuda_python/pyproject.toml b/cuda_python/pyproject.toml
index 9048f5818b..fd6cacaf2a 100644
--- a/cuda_python/pyproject.toml
+++ b/cuda_python/pyproject.toml
@@ -22,18 +22,16 @@ classifiers = [
     "Intended Audience :: Science/Research",
     "Intended Audience :: End Users/Desktop",
     "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
-    "Programming Language :: Python :: 3.14",
     "Programming Language :: Python :: Implementation :: CPython",
     "Environment :: GPU :: NVIDIA CUDA",
     "Environment :: GPU :: NVIDIA CUDA :: 12",
-    "Environment :: GPU :: NVIDIA CUDA :: 13",
 ]
 dynamic = ["version", "dependencies", "optional-dependencies"]
-requires-python = ">=3.10"
 
 [project.urls]
 homepage = "https://nvidia.github.io/cuda-python/"
diff --git a/ruff.toml b/ruff.toml
index 6312d3e9ef..79c66e862c 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 line-length = 120
 respect-gitignore = true
-target-version = "py310"
+target-version = "py39"
 
 [format]
 docstring-code-format = true
@@ -40,7 +40,6 @@ ignore = [
   "S101",   # asserts
   "S311",   # allow use of the random.* even though many are not cryptographically secure
   "S404",   # allow importing the subprocess module
-  "B905",   # preserve the default behavior of `zip` without the explicit `strict` argument
 ]
 
 exclude = ["**/_version.py"]
diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py
index 00a495a095..d84d821700 100755
--- a/toolshed/make_site_packages_libdirs.py
+++ b/toolshed/make_site_packages_libdirs.py
@@ -8,7 +8,7 @@
 import argparse
 import os
 import re
-from typing import Dict, Set
+from typing import Dict, Optional, Set
 
 _SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/")
 
@@ -38,7 +38,7 @@ def parse_lines_linux(lines) -> Dict[str, Set[str]]:
     return d
 
 
-def extract_libname_from_dll(fname: str) -> str | None:
+def extract_libname_from_dll(fname: str) -> Optional[str]:
     """Return base libname per the heuristic, or None if not a .dll."""
     base = os.path.basename(fname)
     if not base.lower().endswith(".dll"):