From 2d1d8fe9f54e672da788375ffd67d29e6d21032d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 26 Oct 2025 22:49:16 -0700 Subject: [PATCH 01/21] Add support for cutensor. Still works for existing libnames, cutensor testing is INCOMPLETE. --- .../_dynamic_libs/supported_nvidia_libs.py | 10 +++ .../_headers/find_nvidia_headers.py | 70 ++++++++++--------- .../_headers/supported_nvidia_headers.py | 14 ++++ cuda_pathfinder/cuda/pathfinder/_version.py | 2 +- cuda_pathfinder/pyproject.toml | 2 + 5 files changed, 63 insertions(+), 35 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py index 6030dc5c85..a7b73dafda 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py @@ -84,6 +84,8 @@ "mathdx": ("nvrtc",), "cufftMp": ("nvshmem_host",), "cudss": ("cublas", "cublasLt"), + "cutensor": ("cublasLt",), + "cutensorMg": ("cutensor", "cublasLt"), } # Based on these released files: @@ -241,6 +243,8 @@ "cufftMp": ("libcufftMp.so.11",), "mathdx": ("libmathdx.so.0",), "cudss": ("libcudss.so.0",), + "cutensor": ("libcutensor.so.2",), + "cutensorMg": ("libcutensorMg.so.2",), "nccl": ("libnccl.so.2",), "nvpl_fftw": ("libnvpl_fftw.so.0",), "nvshmem_host": ("libnvshmem_host.so.3",), @@ -402,6 +406,8 @@ SUPPORTED_WINDOWS_DLLS_OTHER = { "mathdx": ("mathdx64_0.dll",), "cudss": ("cudss64_0.dll",), + "cutensor": ("cutensor.dll",), + "cutensorMg": ("cutensorMg.dll",), } SUPPORTED_WINDOWS_DLLS = SUPPORTED_WINDOWS_DLLS_CTK | SUPPORTED_WINDOWS_DLLS_OTHER @@ -447,6 +453,8 @@ "cublasmp": ("nvidia/cublasmp/cu13/lib", "nvidia/cublasmp/cu12/lib"), "cudss": ("nvidia/cu13/lib", "nvidia/cu12/lib"), "cufftMp": ("nvidia/cufftmp/cu12/lib",), + "cutensor": ("cutensor/lib",), + "cutensorMg": ("cutensor/lib",), "mathdx": ("nvidia/cu13/lib", "nvidia/cu12/lib"), "nccl": ("nvidia/nccl/lib",), "nvpl_fftw": ("nvpl/lib",), @@ -485,6 +493,8 @@ } SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER = { "mathdx": ("nvidia/cu13/bin/x86_64", "nvidia/cu12/bin"), + "cutensor": ("nvidia/cutensor/bin",), + "cutensorMg": ("nvidia/cutensor/bin",), } SITE_PACKAGES_LIBDIRS_WINDOWS = SITE_PACKAGES_LIBDIRS_WINDOWS_CTK | SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index 535d4b8003..d1067243e9 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -22,28 +22,12 @@ def _joined_isfile(dirpath: str, basename: str) -> bool: return os.path.isfile(os.path.join(dirpath, basename)) -def _find_nvshmem_header_directory() -> Optional[str]: - if IS_WINDOWS: - # nvshmem has no Windows support. - return None - +def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]: # Installed from a wheel - nvidia_sub_dirs = ("nvidia", "nvshmem", "include") hdr_dir: str # help mypy - for hdr_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): - if _joined_isfile(hdr_dir, "nvshmem.h"): - return hdr_dir - - conda_prefix = os.environ.get("CONDA_PREFIX") - if conda_prefix and os.path.isdir(conda_prefix): - hdr_dir = os.path.join(conda_prefix, "include") - if _joined_isfile(hdr_dir, "nvshmem.h"): - return hdr_dir - - for hdr_dir in sorted(glob.glob("/usr/include/nvshmem_*"), reverse=True): - if _joined_isfile(hdr_dir, "nvshmem.h"): + for hdr_dir in find_sub_dirs_all_sitepackages(tuple(sub_dir.split("/"))): + if _joined_isfile(hdr_dir, h_basename): return hdr_dir - return None @@ -62,7 +46,10 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) return None -def _find_based_on_conda_layout(libname: str, h_basename: str, conda_prefix: str) -> Optional[str]: +def _find_based_on_conda_layout(libname: str, h_basename: str) -> Optional[str]: + conda_prefix = os.environ.get("CONDA_PREFIX") + if not conda_prefix: + return None if IS_WINDOWS: anchor_point = os.path.join(conda_prefix, "Library") if not os.path.isdir(anchor_point): @@ -83,18 +70,16 @@ def _find_ctk_header_directory(libname: str) -> Optional[str]: h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_CTK[libname] candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK[libname] - # Installed from a wheel for cdir in candidate_dirs: - hdr_dir: str # help mypy - for hdr_dir in find_sub_dirs_all_sitepackages(tuple(cdir.split("/"))): - if _joined_isfile(hdr_dir, h_basename): - return hdr_dir + if hdr_dir := _find_under_site_packages(cdir, h_basename): + return hdr_dir - conda_prefix = os.environ.get("CONDA_PREFIX") - if conda_prefix: # noqa: SIM102 - if result := _find_based_on_conda_layout(libname, h_basename, conda_prefix): + if result := _find_based_on_conda_layout(libname, h_basename): return result + if hdr_dir := _find_based_on_conda_layout(libname, h_basename): + return hdr_dir + cuda_home = get_cuda_home_or_path() if cuda_home: # noqa: SIM102 if result := _find_based_on_ctk_layout(libname, h_basename, cuda_home): @@ -135,16 +120,33 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]: Notes: - The ``SUPPORTED_HEADERS_CTK`` dictionary maps each supported CUDA Toolkit - (CTK) library to the name of its canonical header (e.g., ``"cublas" → + (CTK) libname to the name of its canonical header (e.g., ``"cublas" → "cublas.h"``). This is used to verify that the located directory is valid. - - The only supported non-CTK library at present is ``nvshmem``. + Similarly, the ``SUPPORTED_HEADERS_NON_CTK`` dictionary maps non-CTK + libnames to the name of the corresponding canonical header. """ - if libname == "nvshmem": - return _abs_norm(_find_nvshmem_header_directory()) - if libname in supported_nvidia_headers.SUPPORTED_HEADERS_CTK: return _abs_norm(_find_ctk_header_directory(libname)) - raise RuntimeError(f"UNKNOWN {libname=}") + h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_NON_CTK.get(libname) + if h_basename is None: + raise RuntimeError(f"UNKNOWN {libname=}") + + candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname) + hdr_dir: Optional[str] # help mypy + for cdir in candidate_dirs: + if hdr_dir := _find_under_site_packages(cdir, h_basename): + return hdr_dir + + if hdr_dir := _find_based_on_conda_layout(libname, h_basename): + return hdr_dir + + candidate_dirs = supported_nvidia_headers.SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname) + for cdir in candidate_dirs: + for hdr_dir in sorted(glob.glob(cdir), reverse=True): + if _joined_isfile(hdr_dir, h_basename): + return hdr_dir + + return None diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py index 1b2855c0c9..0d2d8c036c 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py @@ -57,3 +57,17 @@ "nvrtc": ("nvidia/cu13/include", "nvidia/cuda_nvrtc/include"), "nvvm": ("nvidia/cu13/include", "nvidia/cuda_nvcc/nvvm/include"), } + +SUPPORTED_HEADERS_NON_CTK = { + "cutensor": "cutensor.h", + "nvshmem": "nvshmem.h", +} + +SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK = { + "cutensor": ("cutensor/include",), + "nvshmem": ("nvidia/nvshmem/include",), +} + +SUPPORTED_INSTALL_DIRS_NON_CTK = { + "nvshmem": ("/usr/include/nvshmem_*",), +} diff --git a/cuda_pathfinder/cuda/pathfinder/_version.py b/cuda_pathfinder/cuda/pathfinder/_version.py index cb42a4a3c5..b5fa5ff85c 100644 --- a/cuda_pathfinder/cuda/pathfinder/_version.py +++ b/cuda_pathfinder/cuda/pathfinder/_version.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -__version__ = "1.3.1" +__version__ = "1.3.2" diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml index 80aeb1a4b6..3db1aecbc8 100644 --- a/cuda_pathfinder/pyproject.toml +++ b/cuda_pathfinder/pyproject.toml @@ -18,6 +18,7 @@ test = [ cu12 = [ "cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg,cccl]==12.*", "cuda-toolkit[cufile]==12.*; sys_platform != 'win32'", + "cutensor-cu12", "nvidia-cublasmp-cu12; sys_platform != 'win32'", "nvidia-cudss-cu12", "nvidia-cufftmp-cu12; sys_platform != 'win32'", @@ -28,6 +29,7 @@ cu12 = [ cu13 = [ "cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg,cccl,nvvm]==13.*", "cuda-toolkit[cufile]==13.*; sys_platform != 'win32'", + "cutensor-cu13", "nvidia-cublasmp-cu13; sys_platform != 'win32'", "nvidia-cudss-cu13", "nvidia-nccl-cu13; sys_platform != 'win32'", From 986a83ff5be12d3e0298dcf6f57c7547f909a852 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 10:53:10 -0700 Subject: [PATCH 02/21] Generalize tests/test_find_nvidia_headers.py to also cover cutensor --- .../_headers/find_nvidia_headers.py | 4 +- .../tests/test_find_nvidia_headers.py | 58 ++++++++++++------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index d1067243e9..ce3b6a6b2a 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -134,7 +134,7 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]: if h_basename is None: raise RuntimeError(f"UNKNOWN {libname=}") - candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname) + candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname, []) hdr_dir: Optional[str] # help mypy for cdir in candidate_dirs: if hdr_dir := _find_under_site_packages(cdir, h_basename): @@ -143,7 +143,7 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]: if hdr_dir := _find_based_on_conda_layout(libname, h_basename): return hdr_dir - candidate_dirs = supported_nvidia_headers.SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname) + candidate_dirs = supported_nvidia_headers.SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname, []) for cdir in candidate_dirs: for hdr_dir in sorted(glob.glob(cdir), reverse=True): if _joined_isfile(hdr_dir, h_basename): diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py index cdea0cd28f..8ffb74e152 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_headers.py +++ b/cuda_pathfinder/tests/test_find_nvidia_headers.py @@ -14,6 +14,7 @@ # sudo apt install libnvshmem3-cuda-13 libnvshmem3-dev-cuda-13 import functools +import glob import importlib.metadata import os import re @@ -24,20 +25,18 @@ from cuda.pathfinder._headers.supported_nvidia_headers import ( SUPPORTED_HEADERS_CTK, SUPPORTED_HEADERS_CTK_ALL, + SUPPORTED_HEADERS_NON_CTK, + SUPPORTED_INSTALL_DIRS_NON_CTK, SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK, ) -from cuda.pathfinder._utils.platform_aware import IS_WINDOWS STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") - -@functools.cache -def have_nvidia_nvshmem_package() -> bool: - pattern = re.compile(r"^nvidia-nvshmem-.*$") - return any( - pattern.match(dist.metadata["Name"]) for dist in importlib.metadata.distributions() if "Name" in dist.metadata - ) +NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES = { + "cutensor": r"^cutensor-.*$", + "nvshmem": r"^nvidia-nvshmem-.*$", +} def test_unknown_libname(): @@ -45,24 +44,43 @@ def test_unknown_libname(): find_nvidia_header_directory("unknown-libname") -def test_find_libname_nvshmem(info_summary_append): - hdr_dir = find_nvidia_header_directory("nvshmem") +def test_non_ctk_importlib_metadata_distributions_names(): + # Ensure the dict keys above stay in sync with supported_nvidia_headers + assert sorted(NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES) == sorted(SUPPORTED_HEADERS_NON_CTK) + + +@functools.cache +def have_distribution_for(libname: str) -> bool: + pattern = re.compile(NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES[libname]) + return any( + pattern.match(dist.metadata["Name"]) for dist in importlib.metadata.distributions() if "Name" in dist.metadata + ) + + +@pytest.mark.parametrize("libname", SUPPORTED_HEADERS_NON_CTK.keys()) +def test_find_non_ctk_headers(info_summary_append, libname): + hdr_dir = find_nvidia_header_directory(libname) info_summary_append(f"{hdr_dir=!r}") - if IS_WINDOWS: - assert hdr_dir is None - pytest.skip("nvshmem has no Windows support.") if hdr_dir: assert os.path.isdir(hdr_dir) - assert os.path.isfile(os.path.join(hdr_dir, "nvshmem.h")) - if STRICTNESS == "all_must_work" or have_nvidia_nvshmem_package(): + assert os.path.isfile(os.path.join(hdr_dir, SUPPORTED_HEADERS_NON_CTK[libname])) + if have_distribution_for(libname): + assert hdr_dir is not None + hdr_dir_parts = hdr_dir.split(os.path.sep) + assert "site-packages" in hdr_dir_parts + elif STRICTNESS == "all_must_work": assert hdr_dir is not None - if have_nvidia_nvshmem_package(): - hdr_dir_parts = hdr_dir.split(os.path.sep) - assert "site-packages" in hdr_dir_parts - elif conda_prefix := os.environ.get("CONDA_PREFIX"): + if conda_prefix := os.environ.get("CONDA_PREFIX"): assert hdr_dir.startswith(conda_prefix) else: - assert hdr_dir.startswith("/usr/include/nvshmem_") + inst_dirs = SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname) + if inst_dirs is not None: + for inst_dir in inst_dirs: + globbed = glob.glob(inst_dir) + if hdr_dir in globbed: + break + else: + raise RuntimeError(f"{hdr_dir=} does not match any {inst_dirs=}") def test_supported_headers_site_packages_ctk_consistency(): From 0583393350af87fdf213fb1174516d9193a5b75a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 13:36:46 -0700 Subject: [PATCH 03/21] test_find_nvidia_headers.py conda testing and fix --- .../_headers/find_nvidia_headers.py | 29 ++++++++++--------- .../tests/test_find_nvidia_headers.py | 5 ++-- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index ce3b6a6b2a..4cf0ba1b68 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -46,7 +46,7 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) return None -def _find_based_on_conda_layout(libname: str, h_basename: str) -> Optional[str]: +def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> Optional[str]: conda_prefix = os.environ.get("CONDA_PREFIX") if not conda_prefix: return None @@ -55,14 +55,18 @@ def _find_based_on_conda_layout(libname: str, h_basename: str) -> Optional[str]: if not os.path.isdir(anchor_point): return None else: - targets_include_path = glob.glob(os.path.join(conda_prefix, "targets", "*", "include")) - if not targets_include_path: - return None - if len(targets_include_path) != 1: - # Conda does not support multiple architectures. - # QUESTION(PR#956): Do we want to issue a warning? - return None - anchor_point = os.path.dirname(targets_include_path[0]) + if ctk_layout: + targets_include_path = glob.glob(os.path.join(conda_prefix, "targets", "*", "include")) + if not targets_include_path: + return None + if len(targets_include_path) != 1: + # Conda does not support multiple architectures. + # QUESTION(PR#956): Do we want to issue a warning? + return None + include_path = targets_include_path[0] + else: + include_path = os.path.join(conda_prefix, "include") + anchor_point = os.path.dirname(include_path) return _find_based_on_ctk_layout(libname, h_basename, anchor_point) @@ -74,10 +78,7 @@ def _find_ctk_header_directory(libname: str) -> Optional[str]: if hdr_dir := _find_under_site_packages(cdir, h_basename): return hdr_dir - if result := _find_based_on_conda_layout(libname, h_basename): - return result - - if hdr_dir := _find_based_on_conda_layout(libname, h_basename): + if hdr_dir := _find_based_on_conda_layout(libname, h_basename, True): return hdr_dir cuda_home = get_cuda_home_or_path() @@ -140,7 +141,7 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]: if hdr_dir := _find_under_site_packages(cdir, h_basename): return hdr_dir - if hdr_dir := _find_based_on_conda_layout(libname, h_basename): + if hdr_dir := _find_based_on_conda_layout(libname, h_basename, False): return hdr_dir candidate_dirs = supported_nvidia_headers.SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname, []) diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py index 8ffb74e152..2e8efbebc9 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_headers.py +++ b/cuda_pathfinder/tests/test_find_nvidia_headers.py @@ -3,8 +3,9 @@ # Currently these installations are only manually tested: -# conda create -y -n nvshmem python=3.12 -# conda activate nvshmem +# conda create --yes -n pathfinder_cu13 python=3.13 cuda-toolkit=13.0.2 +# conda activate pathfinder_cu13 +# conda install -y conda-forge::cutensor # conda install -y conda-forge::libnvshmem3 conda-forge::libnvshmem-dev # wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb From 1d1f5341660290e1d7f33e53fd482c7b05b81246 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 14:20:33 -0700 Subject: [PATCH 04/21] test_load_nvidia_dynamic_lib.py fix conda testing --- cuda_pathfinder/tests/local_helpers.py | 16 +++++++ .../tests/test_load_nvidia_dynamic_lib.py | 43 ++++++++----------- .../conda_create_for_pathfinder_testing.sh | 27 ++++++++++++ 3 files changed, 62 insertions(+), 24 deletions(-) create mode 100644 cuda_pathfinder/tests/local_helpers.py create mode 100755 toolshed/conda_create_for_pathfinder_testing.sh diff --git a/cuda_pathfinder/tests/local_helpers.py b/cuda_pathfinder/tests/local_helpers.py new file mode 100644 index 0000000000..7893ba8229 --- /dev/null +++ b/cuda_pathfinder/tests/local_helpers.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import functools +import importlib.metadata +import re + + +@functools.cache +def have_distribution(name_pattern: str) -> bool: + re_name_pattern = re.compile(name_pattern) + return any( + re_name_pattern.match(dist.metadata["Name"]) + for dist in importlib.metadata.distributions() + if "Name" in dist.metadata + ) diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index b50c644a61..652c8e57c0 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -1,19 +1,18 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import functools import json import os +import platform from unittest.mock import patch import pytest import spawned_process_runner from child_load_nvidia_dynamic_lib_helper import build_child_process_failed_for_libname_message, child_process_func +from local_helpers import have_distribution -from cuda.pathfinder import SUPPORTED_NVIDIA_LIBNAMES, load_nvidia_dynamic_lib +from cuda.pathfinder import load_nvidia_dynamic_lib from cuda.pathfinder._dynamic_libs import supported_nvidia_libs -from cuda.pathfinder._utils.find_site_packages_dll import find_all_dll_files_via_metadata -from cuda.pathfinder._utils.find_site_packages_so import find_all_so_files_via_metadata from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works") @@ -71,28 +70,24 @@ def test_runtime_error_on_non_64bit_python(): load_nvidia_dynamic_lib("not_used") -@functools.cache -def _get_libnames_for_test_load_nvidia_dynamic_lib(): - result = list(SUPPORTED_NVIDIA_LIBNAMES) - if IS_WINDOWS: - spld_other = supported_nvidia_libs.SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER - all_dyn_libs = find_all_dll_files_via_metadata() - for libname in spld_other: - for dll_name in all_dyn_libs: - if dll_name.startswith(libname): - result.append(libname) - else: - spld_other = supported_nvidia_libs.SITE_PACKAGES_LIBDIRS_LINUX_OTHER - all_dyn_libs = find_all_so_files_via_metadata() - for libname in spld_other: - so_basename = f"lib{libname}.so" - if so_basename in all_dyn_libs: - result.append(libname) +IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES = { + "mathdx": r"^nvidia-libmathdx-.*$", +} + - return tuple(result) +def _is_expected_load_nvidia_dynamic_lib_failure(libname): + if libname == "nvpl_fftw" and platform.machine().lower() != "aarch64": + return True + dist_name_pattern = IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES.get(libname) + if dist_name_pattern is not None: + return not have_distribution(dist_name_pattern) + return False -@pytest.mark.parametrize("libname", _get_libnames_for_test_load_nvidia_dynamic_lib()) +@pytest.mark.parametrize( + "libname", + supported_nvidia_libs.SUPPORTED_WINDOWS_DLLS if IS_WINDOWS else supported_nvidia_libs.SUPPORTED_LINUX_SONAMES, +) def test_load_nvidia_dynamic_lib(info_summary_append, libname): # We intentionally run each dynamic library operation in a child process # to ensure isolation of global dynamic linking state (e.g., dlopen handles). @@ -108,7 +103,7 @@ def raise_child_process_failed(): raise_child_process_failed() assert not result.stderr if result.stdout.startswith("CHILD_LOAD_NVIDIA_DYNAMIC_LIB_HELPER_DYNAMIC_LIB_NOT_FOUND_ERROR:"): - if STRICTNESS == "all_must_work": + if STRICTNESS == "all_must_work" and not _is_expected_load_nvidia_dynamic_lib_failure(libname): raise_child_process_failed() info_summary_append(f"Not found: {libname=!r}") else: diff --git a/toolshed/conda_create_for_pathfinder_testing.sh b/toolshed/conda_create_for_pathfinder_testing.sh new file mode 100755 index 0000000000..2433787581 --- /dev/null +++ b/toolshed/conda_create_for_pathfinder_testing.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +if [[ $# -ne 1 ]]; then + echo "Usage: $(basename "$0") ctk-major-minor-patch" 1>&2 + exit 1 +fi + +eval "$(conda shell.bash hook)" + +conda create --yes -n "pathfinder_testing_cu$1" python=3.13 cuda-toolkit="$1" +conda activate "pathfinder_testing_cu$1" + +for cpkg in \ + cutensor \ + libcublasmp-dev \ + libcudss-dev \ + libcufftmp-dev \ + libmathdx-dev \ + libnvshmem3 \ + libnvshmem-dev \ + libnvpl-fft-dev; do + echo "CONDA INSTALL: $cpkg" + conda install -y -c conda-forge "$cpkg" +done From e0a4ca6e36f40f6104658e54eb476bfa21aad361 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 21:45:32 -0700 Subject: [PATCH 05/21] Add conda_create_for_pathfinder_testing.ps1 --- .../conda_create_for_pathfinder_testing.ps1 | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 toolshed/conda_create_for_pathfinder_testing.ps1 diff --git a/toolshed/conda_create_for_pathfinder_testing.ps1 b/toolshed/conda_create_for_pathfinder_testing.ps1 new file mode 100644 index 0000000000..0c550e4183 --- /dev/null +++ b/toolshed/conda_create_for_pathfinder_testing.ps1 @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION +# SPDX-License-Identifier: Apache-2.0 + +param( + [Parameter(Mandatory = $true)] + [string]$CudaVersion +) + +$ErrorActionPreference = "Stop" + +& "$env:CONDA_EXE" "shell.powershell" "hook" | Out-String | Invoke-Expression + +conda create --yes -n "pathfinder_testing_cu$CudaVersion" python=3.13 "cuda-toolkit=$CudaVersion" +conda activate "pathfinder_testing_cu$CudaVersion" + +$cpkgs = @( + "cutensor", + "libcublasmp-dev", + "libcudss-dev", + "libcufftmp-dev", + "libmathdx-dev", + "libnvshmem3", + "libnvshmem-dev" + "libnvpl-fft-dev" +) + +foreach ($cpkg in $cpkgs) { + Write-Host "CONDA INSTALL: $cpkg" + conda install -y -c conda-forge $cpkg +} From 25dd364aa0a3b33c65d99abd520a65efa7d2694c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 21:53:08 -0700 Subject: [PATCH 06/21] Bug fix: SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER cutensor, cutensorMg paths --- .../cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py index a7b73dafda..0652dd9127 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py @@ -493,8 +493,8 @@ } SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER = { "mathdx": ("nvidia/cu13/bin/x86_64", "nvidia/cu12/bin"), - "cutensor": ("nvidia/cutensor/bin",), - "cutensorMg": ("nvidia/cutensor/bin",), + "cutensor": ("cutensor/bin",), + "cutensorMg": ("cutensor/bin",), } SITE_PACKAGES_LIBDIRS_WINDOWS = SITE_PACKAGES_LIBDIRS_WINDOWS_CTK | SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER From cdab9698e3bbebeaae54ff7af769039a1d8da261 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 22:00:51 -0700 Subject: [PATCH 07/21] Add cudss paths to SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER --- .../cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py index 0652dd9127..e0ab545925 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py @@ -492,6 +492,7 @@ "nvvm": ("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvcc/nvvm/bin"), } SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER = { + "cudss": ("nvidia/cu13/bin", "nvidia/cu12/bin"), "mathdx": ("nvidia/cu13/bin/x86_64", "nvidia/cu12/bin"), "cutensor": ("cutensor/bin",), "cutensorMg": ("cutensor/bin",), From 6d490f59cb3bc9e2d44f1ecce9b8e44be35e0cf3 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 22:21:55 -0700 Subject: [PATCH 08/21] Add SUPPORTED_HEADERS_NON_CTK_ALL to fix Windows site-packages tests --- .../pathfinder/_headers/supported_nvidia_headers.py | 10 +++++++++- cuda_pathfinder/tests/test_find_nvidia_headers.py | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py index 0d2d8c036c..be41f72971 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py @@ -58,10 +58,18 @@ "nvvm": ("nvidia/cu13/include", "nvidia/cuda_nvcc/nvvm/include"), } -SUPPORTED_HEADERS_NON_CTK = { +SUPPORTED_HEADERS_NON_CTK_COMMON = { "cutensor": "cutensor.h", +} +SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY = { "nvshmem": "nvshmem.h", } +SUPPORTED_HEADERS_NON_CTK_LINUX = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY +SUPPORTED_HEADERS_NON_CTK_WINDOWS = SUPPORTED_HEADERS_NON_CTK_COMMON +SUPPORTED_HEADERS_NON_CTK_ALL = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY +SUPPORTED_HEADERS_NON_CTK: Final[dict[str, str]] = ( + SUPPORTED_HEADERS_NON_CTK_WINDOWS if IS_WINDOWS else SUPPORTED_HEADERS_NON_CTK_LINUX +) SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK = { "cutensor": ("cutensor/include",), diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py index 2e8efbebc9..9eedead024 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_headers.py +++ b/cuda_pathfinder/tests/test_find_nvidia_headers.py @@ -27,6 +27,7 @@ SUPPORTED_HEADERS_CTK, SUPPORTED_HEADERS_CTK_ALL, SUPPORTED_HEADERS_NON_CTK, + SUPPORTED_HEADERS_NON_CTK_ALL, SUPPORTED_INSTALL_DIRS_NON_CTK, SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK, ) @@ -47,7 +48,7 @@ def test_unknown_libname(): def test_non_ctk_importlib_metadata_distributions_names(): # Ensure the dict keys above stay in sync with supported_nvidia_headers - assert sorted(NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES) == sorted(SUPPORTED_HEADERS_NON_CTK) + assert sorted(NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES) == sorted(SUPPORTED_HEADERS_NON_CTK_ALL) @functools.cache From 28e720697b0e74de0c49337e952a57ba8e4665c5 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 22:48:09 -0700 Subject: [PATCH 09/21] Bug fix (existing code): conda cccl header directory --- .../cuda/pathfinder/_headers/find_nvidia_headers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index 4cf0ba1b68..b82cb54a1d 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -38,6 +38,14 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) parts.append("include") idir = os.path.join(*parts) if libname == "cccl": + if IS_WINDOWS: + cdir_ctk12 = os.path.join(idir, "targets", "x64") + cdir_ctk13 = os.path.join(cdir_ctk12, "cccl") + if _joined_isfile(cdir_ctk13, h_basename): + return cdir_ctk13 + if _joined_isfile(cdir_ctk12, h_basename): + return cdir_ctk12 + return None cdir = os.path.join(idir, "cccl") # CTK 13 if _joined_isfile(cdir, h_basename): return cdir From d36a62d82aef5ca155edf6dd677ff72d90ccb79e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Oct 2025 22:59:04 -0700 Subject: [PATCH 10/21] test_find_nvidia_headers.py: refer to toolshed/conda_create_for_pathfinder_testing.* --- cuda_pathfinder/tests/test_find_nvidia_headers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py index 9eedead024..bbc920d765 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_headers.py +++ b/cuda_pathfinder/tests/test_find_nvidia_headers.py @@ -3,10 +3,7 @@ # Currently these installations are only manually tested: -# conda create --yes -n pathfinder_cu13 python=3.13 cuda-toolkit=13.0.2 -# conda activate pathfinder_cu13 -# conda install -y conda-forge::cutensor -# conda install -y conda-forge::libnvshmem3 conda-forge::libnvshmem-dev +# ../toolshed/conda_create_for_pathfinder_testing.* # wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb # sudo dpkg -i cuda-keyring_1.1-1_all.deb From 0734ac7770d206f2abc23cc5cd318c9890776156 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 09:09:59 -0700 Subject: [PATCH 11/21] nvidia-libmathdx-... only exists for cu12: tolerate abs_path=None in test_load_nvidia_dynamic_lib.py --- cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 652c8e57c0..cff5b74290 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -71,6 +71,7 @@ def test_runtime_error_on_non_64bit_python(): IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES = { + "cufftMp": r"^nvidia-cufftmp-.*$", "mathdx": r"^nvidia-libmathdx-.*$", } From 3b156b70eef6af0a844c234ae29a0f9152f97b81 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 09:36:49 -0700 Subject: [PATCH 12/21] find_nvidia_headers.py cccl IS_WINDOWS: fall-through after checking for conda anomaly, to restore proper functioning for standard CTK installations --- cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index b82cb54a1d..3d7939df08 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -39,13 +39,13 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) idir = os.path.join(*parts) if libname == "cccl": if IS_WINDOWS: + # conda has this anomaly cdir_ctk12 = os.path.join(idir, "targets", "x64") cdir_ctk13 = os.path.join(cdir_ctk12, "cccl") if _joined_isfile(cdir_ctk13, h_basename): return cdir_ctk13 if _joined_isfile(cdir_ctk12, h_basename): return cdir_ctk12 - return None cdir = os.path.join(idir, "cccl") # CTK 13 if _joined_isfile(cdir, h_basename): return cdir From 7aa6679d49baa40f482049189248e03661d0d9b7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 09:42:45 -0700 Subject: [PATCH 13/21] Add cublasmp DIRECT_DEPENDENCIES (closes #1116) --- .../cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py index e0ab545925..a0e13ab397 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py @@ -82,6 +82,7 @@ } DIRECT_DEPENDENCIES = DIRECT_DEPENDENCIES_CTK | { "mathdx": ("nvrtc",), + "cublasmp": ("cublas", "cublasLt", "nvshmem_host"), "cufftMp": ("nvshmem_host",), "cudss": ("cublas", "cublasLt"), "cutensor": ("cublasLt",), From 9bfef5fabc110c8a5a5ff846afb21e7ef65abc82 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 10:57:12 -0700 Subject: [PATCH 14/21] Add SUPPORTED_HEADERS_NON_CTK to cuda_pathfinder/docs/source/api.rst --- cuda_pathfinder/cuda/pathfinder/__init__.py | 5 ++++- cuda_pathfinder/docs/source/api.rst | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py index d931a264cf..a7fa17af39 100644 --- a/cuda_pathfinder/cuda/pathfinder/__init__.py +++ b/cuda_pathfinder/cuda/pathfinder/__init__.py @@ -11,14 +11,17 @@ ) from cuda.pathfinder._headers.find_nvidia_headers import find_nvidia_header_directory as find_nvidia_header_directory from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK +from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_NON_CTK as _SUPPORTED_HEADERS_NON_CTK from cuda.pathfinder._version import __version__ as __version__ -# Indirection to help Sphinx find the docstring. +# Indirections to help Sphinx find the docstrings. #: Mapping from short CUDA Toolkit (CTK) library names to their canonical #: header basenames (used to validate a discovered include directory). #: Example: ``"cublas" → "cublas.h"``. The key set is platform-aware #: (e.g., ``"cufile"`` may be Linux-only). SUPPORTED_HEADERS_CTK = _SUPPORTED_HEADERS_CTK +#: Mapping from non-CTK library names to their canonical header basenames. +SUPPORTED_HEADERS_NON_CTK = _SUPPORTED_HEADERS_NON_CTK # Backward compatibility: _find_nvidia_header_directory was added in release 1.2.2. # It will be removed in release 1.2.4. diff --git a/cuda_pathfinder/docs/source/api.rst b/cuda_pathfinder/docs/source/api.rst index 3cae4b6f70..72e5e40724 100644 --- a/cuda_pathfinder/docs/source/api.rst +++ b/cuda_pathfinder/docs/source/api.rst @@ -18,4 +18,5 @@ and experimental APIs for locating NVIDIA C/C++ header directories. DynamicLibNotFoundError SUPPORTED_HEADERS_CTK + SUPPORTED_HEADERS_NON_CTK find_nvidia_header_directory From 3a44b6abbf3b5d512b04068ea88cad5f5628d7de Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 10:57:39 -0700 Subject: [PATCH 15/21] Add 1.3.2-notes.rst --- cuda_pathfinder/docs/nv-versions.json | 4 ++++ .../docs/source/release/1.3.2-notes.rst | 15 +++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 cuda_pathfinder/docs/source/release/1.3.2-notes.rst diff --git a/cuda_pathfinder/docs/nv-versions.json b/cuda_pathfinder/docs/nv-versions.json index f25c50bc6d..2bec723d4b 100644 --- a/cuda_pathfinder/docs/nv-versions.json +++ b/cuda_pathfinder/docs/nv-versions.json @@ -3,6 +3,10 @@ "version": "latest", "url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/" }, + { + "version": "1.3.2", + "url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/1.3.2/" + }, { "version": "1.3.1", "url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/1.3.1/" diff --git a/cuda_pathfinder/docs/source/release/1.3.2-notes.rst b/cuda_pathfinder/docs/source/release/1.3.2-notes.rst new file mode 100644 index 0000000000..a0b3efa247 --- /dev/null +++ b/cuda_pathfinder/docs/source/release/1.3.2-notes.rst @@ -0,0 +1,15 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: Apache-2.0 + +.. py:currentmodule:: cuda.pathfinder + +``cuda-pathfinder`` 1.3.2 Release notes +======================================= + +Released on Oct 29, 2025 + +Highlights +---------- + +* Add cuTENSOR support & bug fixes discovered while working on conda testing + (`PR #1194 `_) From 0d71a85fca055ede9f7ba27f61cee2ce8c66c3c9 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 12:07:58 -0700 Subject: [PATCH 16/21] Add nvidia-cufftmp-cu13 data in supported_nvidia_libs.py Manually tested with: pip install nvidia-cufftmp-cu13==12.1.3.1 That wheel was yanked, therefore not adding to pyproject.toml --- .../cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py index a0e13ab397..c205b5eda4 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py @@ -241,7 +241,7 @@ } SUPPORTED_LINUX_SONAMES_OTHER = { "cublasmp": ("libcublasmp.so.0",), - "cufftMp": ("libcufftMp.so.11",), + "cufftMp": ("libcufftMp.so.12", "libcufftMp.so.11"), "mathdx": ("libmathdx.so.0",), "cudss": ("libcudss.so.0",), "cutensor": ("libcutensor.so.2",), @@ -453,7 +453,7 @@ SITE_PACKAGES_LIBDIRS_LINUX_OTHER = { "cublasmp": ("nvidia/cublasmp/cu13/lib", "nvidia/cublasmp/cu12/lib"), "cudss": ("nvidia/cu13/lib", "nvidia/cu12/lib"), - "cufftMp": ("nvidia/cufftmp/cu12/lib",), + "cufftMp": ("nvidia/cufftmp/cu13/lib", "nvidia/cufftmp/cu12/lib"), "cutensor": ("cutensor/lib",), "cutensorMg": ("cutensor/lib",), "mathdx": ("nvidia/cu13/lib", "nvidia/cu12/lib"), From 90ed27a61016e3a87dc232743f59b4a48d03ff32 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 12:56:06 -0700 Subject: [PATCH 17/21] Add missing comma in toolshed/conda_create_for_pathfinder_testing.ps1 --- toolshed/conda_create_for_pathfinder_testing.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolshed/conda_create_for_pathfinder_testing.ps1 b/toolshed/conda_create_for_pathfinder_testing.ps1 index 0c550e4183..a2c6c31ef0 100644 --- a/toolshed/conda_create_for_pathfinder_testing.ps1 +++ b/toolshed/conda_create_for_pathfinder_testing.ps1 @@ -20,7 +20,7 @@ $cpkgs = @( "libcufftmp-dev", "libmathdx-dev", "libnvshmem3", - "libnvshmem-dev" + "libnvshmem-dev", "libnvpl-fft-dev" ) From 9671b24a29105da6f1fe7dc78e7cf09e354af3e7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 13:05:45 -0700 Subject: [PATCH 18/21] Systematically add _abs_norm() in find_nvidia_header_directory() --- .../cuda/pathfinder/_headers/find_nvidia_headers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index 3d7939df08..beb6519b6e 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -147,15 +147,15 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]: hdr_dir: Optional[str] # help mypy for cdir in candidate_dirs: if hdr_dir := _find_under_site_packages(cdir, h_basename): - return hdr_dir + return _abs_norm(hdr_dir) if hdr_dir := _find_based_on_conda_layout(libname, h_basename, False): - return hdr_dir + return _abs_norm(hdr_dir) candidate_dirs = supported_nvidia_headers.SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname, []) for cdir in candidate_dirs: for hdr_dir in sorted(glob.glob(cdir), reverse=True): if _joined_isfile(hdr_dir, h_basename): - return hdr_dir + return _abs_norm(hdr_dir) return None From 4d5a41af4f8fc8bb8cd4ba39d58e462bce10e933 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 17:29:03 -0700 Subject: [PATCH 19/21] Move "conda has this anomaly" comment to the end of the line --- .../cuda/pathfinder/_headers/find_nvidia_headers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index beb6519b6e..3d422e1287 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -39,8 +39,7 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str) idir = os.path.join(*parts) if libname == "cccl": if IS_WINDOWS: - # conda has this anomaly - cdir_ctk12 = os.path.join(idir, "targets", "x64") + cdir_ctk12 = os.path.join(idir, "targets", "x64") # conda has this anomaly cdir_ctk13 = os.path.join(cdir_ctk12, "cccl") if _joined_isfile(cdir_ctk13, h_basename): return cdir_ctk13 From 03e10b17cda174fc93e36f2728d7b5b2fbebf5eb Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 28 Oct 2025 23:46:44 -0700 Subject: [PATCH 20/21] Add empty SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY in supported_nvidia_headers.py (mirrors supported_nvidia_libs.py) --- .../cuda/pathfinder/_headers/supported_nvidia_headers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py index be41f72971..7fdf4159ff 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py @@ -64,9 +64,12 @@ SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY = { "nvshmem": "nvshmem.h", } +SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY: Final[dict[str, str]] = {} SUPPORTED_HEADERS_NON_CTK_LINUX = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY -SUPPORTED_HEADERS_NON_CTK_WINDOWS = SUPPORTED_HEADERS_NON_CTK_COMMON -SUPPORTED_HEADERS_NON_CTK_ALL = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY +SUPPORTED_HEADERS_NON_CTK_WINDOWS = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY +SUPPORTED_HEADERS_NON_CTK_ALL = ( + SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY | SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY +) SUPPORTED_HEADERS_NON_CTK: Final[dict[str, str]] = ( SUPPORTED_HEADERS_NON_CTK_WINDOWS if IS_WINDOWS else SUPPORTED_HEADERS_NON_CTK_LINUX ) From 43e81b405168d22f4dc2790f9e607801d153e02b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 29 Oct 2025 13:33:56 -0700 Subject: [PATCH 21/21] Remove SUPPORTED_HEADERS_NON_CTK from public API, as requested by Leo offline --- cuda_pathfinder/cuda/pathfinder/__init__.py | 3 --- .../cuda/pathfinder/_headers/find_nvidia_headers.py | 8 -------- 2 files changed, 11 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py index a7fa17af39..143c4b45cc 100644 --- a/cuda_pathfinder/cuda/pathfinder/__init__.py +++ b/cuda_pathfinder/cuda/pathfinder/__init__.py @@ -11,7 +11,6 @@ ) from cuda.pathfinder._headers.find_nvidia_headers import find_nvidia_header_directory as find_nvidia_header_directory from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK -from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_NON_CTK as _SUPPORTED_HEADERS_NON_CTK from cuda.pathfinder._version import __version__ as __version__ # Indirections to help Sphinx find the docstrings. @@ -20,8 +19,6 @@ #: Example: ``"cublas" → "cublas.h"``. The key set is platform-aware #: (e.g., ``"cufile"`` may be Linux-only). SUPPORTED_HEADERS_CTK = _SUPPORTED_HEADERS_CTK -#: Mapping from non-CTK library names to their canonical header basenames. -SUPPORTED_HEADERS_NON_CTK = _SUPPORTED_HEADERS_NON_CTK # Backward compatibility: _find_nvidia_header_directory was added in release 1.2.2. # It will be removed in release 1.2.4. diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index 3d422e1287..b141700ab7 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -125,14 +125,6 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]: 3. **CUDA Toolkit environment variables** - Use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order). - - Notes: - - The ``SUPPORTED_HEADERS_CTK`` dictionary maps each supported CUDA Toolkit - (CTK) libname to the name of its canonical header (e.g., ``"cublas" → - "cublas.h"``). This is used to verify that the located directory is valid. - - Similarly, the ``SUPPORTED_HEADERS_NON_CTK`` dictionary maps non-CTK - libnames to the name of the corresponding canonical header. """ if libname in supported_nvidia_headers.SUPPORTED_HEADERS_CTK: