NVIDIA · cpcloud · Oct 8, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025
diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
@@ -28,7 +28,6 @@ jobs:
       fail-fast: false
       matrix:
         python-version:
-          - "3.9"
           - "3.10"
           - "3.11"
           - "3.12"

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -113,7 +113,7 @@ flowchart TD
             B2["linux-aarch64<br/>(Self-hosted)"]
             B3["win-64<br/>(GitHub-hosted)"]
         end
-        BUILD_DETAILS["• Python versions: 3.9, 3.10, 3.11, 3.12, 3.13<br/>• CUDA version: 13.0.0 (build-time)<br/>• Components: cuda-core, cuda-bindings,<br/>  cuda-pathfinder, cuda-python"]
+        BUILD_DETAILS["• Python versions: 3.10, 3.11, 3.12, 3.13, 3.14<br/>• CUDA version: 13.0.0 (build-time)<br/>• Components: cuda-core, cuda-bindings,<br/>  cuda-pathfinder, cuda-python"]
     end
 
     %% Artifact Storage

diff --git a/ci/test-matrix.json b/ci/test-matrix.json
@@ -4,8 +4,6 @@
   "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1",
   "linux": {
     "pull-request": [
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
@@ -17,8 +15,6 @@
       { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
@@ -32,11 +28,6 @@
       { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }
     ],
     "nightly": [
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "amd64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
@@ -57,11 +48,6 @@
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
       { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
-      { "ARCH": "arm64", "PY_VER": "3.9",  "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "earliest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
       { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },

diff --git a/cuda_bindings/docs/source/install.rst b/cuda_bindings/docs/source/install.rst
@@ -10,7 +10,7 @@ Runtime Requirements
 ``cuda.bindings`` supports the same platforms as CUDA. Runtime dependencies are:
 
 * Linux (x86-64, arm64) and Windows (x86-64)
-* Python 3.9 - 3.13
+* Python 3.10 - 3.14
 * Driver: Linux (580.65.06 or later) Windows (580.88 or later)
 * Optionally, NVRTC, nvJitLink, NVVM, and cuFile from CUDA Toolkit 13.x
 

diff --git a/cuda_bindings/docs/source/support.rst b/cuda_bindings/docs/source/support.rst
@@ -19,7 +19,7 @@ The ``cuda.bindings`` module has the following support policy:
    depends on the underlying driver and the Toolkit versions, as described in the compatibility
    documentation.)
 4. The module supports all Python versions following the `CPython EOL schedule`_. As of writing
-   Python 3.9 - 3.13 are supported.
+   Python 3.10 - 3.14 are supported.
 5. The module exposes a Cython layer from which types and functions could be ``cimport``'d. While
    we strive to keep this layer stable, due to Cython limitations a new *minor* release of this
    module could require Cython layer users to rebuild their projects and update their pinning to

diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml
@@ -9,16 +9,17 @@ name = "cuda-bindings"
 description = "Python bindings for CUDA"
 authors = [{name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com"},]
 license = "LicenseRef-NVIDIA-SOFTWARE-LICENSE"
+requires-python = ">=3.10"
 classifiers = [
     "Intended Audience :: Developers",
     "Topic :: Database",
     "Topic :: Scientific/Engineering",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dynamic = [

diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
@@ -26,17 +26,6 @@
 finally:
     del cuda.bindings, importlib, subdir, cuda_major, cuda_minor
 
-import sys  # noqa: E402
-import warnings  # noqa: E402
-
-if sys.version_info < (3, 10):
-    warnings.warn(
-        "support for Python 3.9 and below is deprecated and subject to future removal",
-        category=FutureWarning,
-        stacklevel=1,
-    )
-del sys, warnings
-
 from cuda.core.experimental import utils  # noqa: E402
 from cuda.core.experimental._device import Device  # noqa: E402
 from cuda.core.experimental._event import Event, EventOptions  # noqa: E402

diff --git a/cuda_core/cuda/core/experimental/_device.pyx b/cuda_core/cuda/core/experimental/_device.pyx
@@ -10,7 +10,7 @@ from cuda.bindings cimport cydriver
 from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN
 
 import threading
-from typing import Optional, Union
+from typing import Union
 
 from cuda.core.experimental._context import Context, ContextOptions
 from cuda.core.experimental._event import Event, EventOptions
@@ -949,7 +949,7 @@ class Device:
     """
     __slots__ = ("_id", "_mr", "_has_inited", "_properties")
 
-    def __new__(cls, device_id: Optional[int] = None):
+    def __new__(cls, device_id: int | None = None):
         global _is_cuInit
         if _is_cuInit is False:
             with _lock, nogil:
@@ -1221,7 +1221,7 @@ class Device:
         """
         raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
 
-    def create_stream(self, obj: Optional[IsStreamT] = None, options: Optional[StreamOptions] = None) -> Stream:
+    def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
         """Create a Stream object.
 
         New stream objects can be created in two different ways:
@@ -1252,7 +1252,7 @@ class Device:
         self._check_context_initialized()
         return Stream._init(obj=obj, options=options, device_id=self._id)
 
-    def create_event(self, options: Optional[EventOptions] = None) -> Event:
+    def create_event(self, options: EventOptions | None = None) -> Event:
         """Create an Event object without recording it to a Stream.
 
         Note
@@ -1274,7 +1274,7 @@ class Device:
         ctx = self._get_current_context()
         return Event._init(self._id, ctx, options)
 
-    def allocate(self, size, stream: Optional[Stream] = None) -> Buffer:
+    def allocate(self, size, stream: Stream | None = None) -> Buffer:
         """Allocate device memory from a specified stream.
 
         Allocates device memory of `size` bytes on the specified `stream`

diff --git a/cuda_core/cuda/core/experimental/_launch_config.py b/cuda_core/cuda/core/experimental/_launch_config.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from dataclasses import dataclass
-from typing import Optional, Union
+from typing import Union
 
 from cuda.core.experimental._device import Device
 from cuda.core.experimental._utils.cuda_utils import (
@@ -68,8 +68,8 @@ class LaunchConfig:
     grid: Union[tuple, int] = None
     cluster: Union[tuple, int] = None
     block: Union[tuple, int] = None
-    shmem_size: Optional[int] = None
-    cooperative_launch: Optional[bool] = False
+    shmem_size: int | None = None
+    cooperative_launch: bool | None = False
 
     def __post_init__(self):
         _lazy_init()

diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
@@ -343,7 +343,7 @@ def _exception_manager(self):
             # our constructor could raise, in which case there's no handle available
             error_log = self.get_error_log()
         # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
-        # unfortunately we are still supporting Python 3.9/3.10...
+        # unfortunately we are still supporting Python 3.10...
         # Here we rely on both CUDAError and nvJitLinkError have the error string placed in .args[0].
         e.args = (e.args[0] + (f"\nLinker error log: {error_log}" if error_log else ""), *e.args[1:])
         raise e

diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
@@ -4,7 +4,7 @@
 
 import weakref
 from collections import namedtuple
-from typing import Optional, Union
+from typing import Union
 from warnings import warn
 
 from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
@@ -310,7 +310,7 @@ def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocesso
             driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size)
         )
 
-    def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
+    def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int:
         """Maximum potential cluster size.
 
         The maximum potential cluster size for this kernel and given launch configuration.
@@ -332,7 +332,7 @@ def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stre
             drv_cfg.hStream = stream.handle
         return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg))
 
-    def max_active_clusters(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
+    def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int:
         """Maximum number of active clusters on the target device.
 
         The maximum number of clusters that could concurrently execute on the target device.
@@ -469,7 +469,7 @@ def __new__(self, *args, **kwargs):
         )
 
     @classmethod
-    def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None):
+    def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None):
         self = super().__new__(cls)
         assert code_type in self._supported_code_type, f"{code_type=} is not supported"
         _lazy_init()
@@ -496,7 +496,7 @@ def __reduce__(self):
         return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)
 
     @staticmethod
-    def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+    def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing cubin.
 
         Parameters
@@ -514,7 +514,7 @@ def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt
         return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+    def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing PTX.
 
         Parameters
@@ -532,7 +532,7 @@ def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optio
         return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
+    def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing LTOIR.
 
         Parameters
@@ -550,9 +550,7 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt
         return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_fatbin(
-        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
-    ) -> "ObjectCode":
+    def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing fatbin.
 
         Parameters
@@ -570,9 +568,7 @@ def from_fatbin(
         return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_object(
-        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
-    ) -> "ObjectCode":
+    def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing object code.
 
         Parameters
@@ -590,9 +586,7 @@ def from_object(
         return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
-    def from_library(
-        module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
-    ) -> "ObjectCode":
+    def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
         """Create an :class:`ObjectCode` instance from an existing library.
 
         Parameters

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
@@ -49,7 +49,7 @@ def _nvvm_exception_manager(self):
             except Exception:
                 error_log = ""
         # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
-        # unfortunately we are still supporting Python 3.9/3.10...
+        # unfortunately we are still supporting Python 3.10...
         e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:])
         raise e
 

diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst
@@ -26,7 +26,7 @@ dependencies are as follows:
 .. [#f1] Including ``cuda-python``.
 
 
-``cuda.core`` supports Python 3.9 - 3.13, on Linux (x86-64, arm64) and Windows (x86-64).
+``cuda.core`` supports Python 3.10 - 3.14, on Linux (x86-64, arm64) and Windows (x86-64).
 
 Installing from PyPI
 --------------------

diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
@@ -14,7 +14,7 @@ dynamic = [
     "version",
     "readme",
 ]
-requires-python = '>=3.9'
+requires-python = '>=3.10'
 description = "cuda.core: (experimental) pythonic CUDA module"
 authors = [
     { name = "NVIDIA Corporation" }
@@ -32,11 +32,11 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Topic :: Software Development :: Libraries",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Programming Language :: Python :: Implementation :: CPython",
     "Environment :: GPU :: NVIDIA CUDA",
     "Environment :: GPU :: NVIDIA CUDA :: 12",
-Original file line number
+Diff line change
@@ Expand Up / @@ -28,7 +28,6 @@ jobs: @@
           fail-fast: false
           matrix:
             python-version:
-              - "3.9"
               - "3.10"
               - "3.11"
               - "3.12"
@@ Expand Down @@