Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2d1d8fe
Add support for cutensor. Still works for existing libnames, cutensor…
rwgk Oct 27, 2025
986a83f
Generalize tests/test_find_nvidia_headers.py to also cover cutensor
rwgk Oct 27, 2025
0583393
test_find_nvidia_headers.py conda testing and fix
rwgk Oct 27, 2025
1d1f534
test_load_nvidia_dynamic_lib.py fix conda testing
rwgk Oct 27, 2025
e0a4ca6
Add conda_create_for_pathfinder_testing.ps1
rwgk Oct 28, 2025
25dd364
Bug fix: SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER cutensor, cutensorMg paths
rwgk Oct 28, 2025
cdab969
Add cudss paths to SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER
rwgk Oct 28, 2025
6d490f5
Add SUPPORTED_HEADERS_NON_CTK_ALL to fix Windows site-packages tests
rwgk Oct 28, 2025
28e7206
Bug fix (existing code): conda cccl header directory
rwgk Oct 28, 2025
d36a62d
test_find_nvidia_headers.py: refer to toolshed/conda_create_for_pathf…
rwgk Oct 28, 2025
0734ac7
nvidia-libmathdx-... only exists for cu12: tolerate abs_path=None in …
rwgk Oct 28, 2025
3b156b7
find_nvidia_headers.py cccl IS_WINDOWS: fall-through after checking f…
rwgk Oct 28, 2025
7aa6679
Add cublasmp DIRECT_DEPENDENCIES (closes #1116)
rwgk Oct 28, 2025
9bfef5f
Add SUPPORTED_HEADERS_NON_CTK to cuda_pathfinder/docs/source/api.rst
rwgk Oct 28, 2025
3a44b6a
Add 1.3.2-notes.rst
rwgk Oct 28, 2025
0d71a85
Add nvidia-cufftmp-cu13 data in supported_nvidia_libs.py
rwgk Oct 28, 2025
b079c84
Merge branch 'main' into cutensor_support
rwgk Oct 28, 2025
90ed27a
Add missing comma in toolshed/conda_create_for_pathfinder_testing.ps1
rwgk Oct 28, 2025
9671b24
Systematically add _abs_norm() in find_nvidia_header_directory()
rwgk Oct 28, 2025
4d5a41a
Move "conda has this anomaly" comment to the end of the line
rwgk Oct 29, 2025
75d8874
Merge branch 'main' into cutensor_support
rwgk Oct 29, 2025
03e10b1
Add empty SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY in supported_nvidia_…
rwgk Oct 29, 2025
43e81b4
Remove SUPPORTED_HEADERS_NON_CTK from public API, as requested by Leo…
rwgk Oct 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cuda_pathfinder/cuda/pathfinder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK
from cuda.pathfinder._version import __version__ as __version__

# Indirection to help Sphinx find the docstring.
# Indirections to help Sphinx find the docstrings.
#: Mapping from short CUDA Toolkit (CTK) library names to their canonical
#: header basenames (used to validate a discovered include directory).
#: Example: ``"cublas" → "cublas.h"``. The key set is platform-aware
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,11 @@
}
DIRECT_DEPENDENCIES = DIRECT_DEPENDENCIES_CTK | {
"mathdx": ("nvrtc",),
"cublasmp": ("cublas", "cublasLt", "nvshmem_host"),
"cufftMp": ("nvshmem_host",),
"cudss": ("cublas", "cublasLt"),
"cutensor": ("cublasLt",),
"cutensorMg": ("cutensor", "cublasLt"),
}

# Based on these released files:
Expand Down Expand Up @@ -238,9 +241,11 @@
}
SUPPORTED_LINUX_SONAMES_OTHER = {
"cublasmp": ("libcublasmp.so.0",),
"cufftMp": ("libcufftMp.so.11",),
"cufftMp": ("libcufftMp.so.12", "libcufftMp.so.11"),
"mathdx": ("libmathdx.so.0",),
"cudss": ("libcudss.so.0",),
"cutensor": ("libcutensor.so.2",),
"cutensorMg": ("libcutensorMg.so.2",),
"nccl": ("libnccl.so.2",),
"nvpl_fftw": ("libnvpl_fftw.so.0",),
"nvshmem_host": ("libnvshmem_host.so.3",),
Expand Down Expand Up @@ -402,6 +407,8 @@
SUPPORTED_WINDOWS_DLLS_OTHER = {
"mathdx": ("mathdx64_0.dll",),
"cudss": ("cudss64_0.dll",),
"cutensor": ("cutensor.dll",),
"cutensorMg": ("cutensorMg.dll",),
}
SUPPORTED_WINDOWS_DLLS = SUPPORTED_WINDOWS_DLLS_CTK | SUPPORTED_WINDOWS_DLLS_OTHER

Expand Down Expand Up @@ -446,7 +453,9 @@
SITE_PACKAGES_LIBDIRS_LINUX_OTHER = {
"cublasmp": ("nvidia/cublasmp/cu13/lib", "nvidia/cublasmp/cu12/lib"),
"cudss": ("nvidia/cu13/lib", "nvidia/cu12/lib"),
"cufftMp": ("nvidia/cufftmp/cu12/lib",),
"cufftMp": ("nvidia/cufftmp/cu13/lib", "nvidia/cufftmp/cu12/lib"),
"cutensor": ("cutensor/lib",),
"cutensorMg": ("cutensor/lib",),
"mathdx": ("nvidia/cu13/lib", "nvidia/cu12/lib"),
"nccl": ("nvidia/nccl/lib",),
"nvpl_fftw": ("nvpl/lib",),
Expand Down Expand Up @@ -484,7 +493,10 @@
"nvvm": ("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvcc/nvvm/bin"),
}
SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER = {
"cudss": ("nvidia/cu13/bin", "nvidia/cu12/bin"),
"mathdx": ("nvidia/cu13/bin/x86_64", "nvidia/cu12/bin"),
"cutensor": ("cutensor/bin",),
"cutensorMg": ("cutensor/bin",),
}
SITE_PACKAGES_LIBDIRS_WINDOWS = SITE_PACKAGES_LIBDIRS_WINDOWS_CTK | SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER

Expand Down
98 changes: 50 additions & 48 deletions cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,12 @@ def _joined_isfile(dirpath: str, basename: str) -> bool:
return os.path.isfile(os.path.join(dirpath, basename))


def _find_nvshmem_header_directory() -> Optional[str]:
if IS_WINDOWS:
# nvshmem has no Windows support.
return None

def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]:
# Installed from a wheel
nvidia_sub_dirs = ("nvidia", "nvshmem", "include")
hdr_dir: str # help mypy
for hdr_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs):
if _joined_isfile(hdr_dir, "nvshmem.h"):
return hdr_dir

conda_prefix = os.environ.get("CONDA_PREFIX")
if conda_prefix and os.path.isdir(conda_prefix):
hdr_dir = os.path.join(conda_prefix, "include")
if _joined_isfile(hdr_dir, "nvshmem.h"):
return hdr_dir

for hdr_dir in sorted(glob.glob("/usr/include/nvshmem_*"), reverse=True):
if _joined_isfile(hdr_dir, "nvshmem.h"):
for hdr_dir in find_sub_dirs_all_sitepackages(tuple(sub_dir.split("/"))):
if _joined_isfile(hdr_dir, h_basename):
return hdr_dir

return None


Expand All @@ -54,6 +38,13 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str)
parts.append("include")
idir = os.path.join(*parts)
if libname == "cccl":
if IS_WINDOWS:
cdir_ctk12 = os.path.join(idir, "targets", "x64") # conda has this anomaly
cdir_ctk13 = os.path.join(cdir_ctk12, "cccl")
if _joined_isfile(cdir_ctk13, h_basename):
return cdir_ctk13
if _joined_isfile(cdir_ctk12, h_basename):
return cdir_ctk12
cdir = os.path.join(idir, "cccl") # CTK 13
if _joined_isfile(cdir, h_basename):
return cdir
Expand All @@ -62,38 +53,40 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str)
return None


def _find_based_on_conda_layout(libname: str, h_basename: str, conda_prefix: str) -> Optional[str]:
def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> Optional[str]:
conda_prefix = os.environ.get("CONDA_PREFIX")
if not conda_prefix:
return None
if IS_WINDOWS:
anchor_point = os.path.join(conda_prefix, "Library")
if not os.path.isdir(anchor_point):
return None
else:
targets_include_path = glob.glob(os.path.join(conda_prefix, "targets", "*", "include"))
if not targets_include_path:
return None
if len(targets_include_path) != 1:
# Conda does not support multiple architectures.
# QUESTION(PR#956): Do we want to issue a warning?
return None
anchor_point = os.path.dirname(targets_include_path[0])
if ctk_layout:
targets_include_path = glob.glob(os.path.join(conda_prefix, "targets", "*", "include"))
if not targets_include_path:
return None
if len(targets_include_path) != 1:
# Conda does not support multiple architectures.
# QUESTION(PR#956): Do we want to issue a warning?
return None
include_path = targets_include_path[0]
else:
include_path = os.path.join(conda_prefix, "include")
anchor_point = os.path.dirname(include_path)
return _find_based_on_ctk_layout(libname, h_basename, anchor_point)


def _find_ctk_header_directory(libname: str) -> Optional[str]:
h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_CTK[libname]
candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK[libname]

# Installed from a wheel
for cdir in candidate_dirs:
hdr_dir: str # help mypy
for hdr_dir in find_sub_dirs_all_sitepackages(tuple(cdir.split("/"))):
if _joined_isfile(hdr_dir, h_basename):
return hdr_dir
if hdr_dir := _find_under_site_packages(cdir, h_basename):
return hdr_dir

conda_prefix = os.environ.get("CONDA_PREFIX")
if conda_prefix: # noqa: SIM102
if result := _find_based_on_conda_layout(libname, h_basename, conda_prefix):
return result
if hdr_dir := _find_based_on_conda_layout(libname, h_basename, True):
return hdr_dir

cuda_home = get_cuda_home_or_path()
if cuda_home: # noqa: SIM102
Expand Down Expand Up @@ -132,19 +125,28 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]:
3. **CUDA Toolkit environment variables**

- Use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).

Notes:
- The ``SUPPORTED_HEADERS_CTK`` dictionary maps each supported CUDA Toolkit
(CTK) library to the name of its canonical header (e.g., ``"cublas" →
"cublas.h"``). This is used to verify that the located directory is valid.

- The only supported non-CTK library at present is ``nvshmem``.
"""

if libname == "nvshmem":
return _abs_norm(_find_nvshmem_header_directory())

if libname in supported_nvidia_headers.SUPPORTED_HEADERS_CTK:
return _abs_norm(_find_ctk_header_directory(libname))

raise RuntimeError(f"UNKNOWN {libname=}")
h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_NON_CTK.get(libname)
if h_basename is None:
raise RuntimeError(f"UNKNOWN {libname=}")

candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname, [])
hdr_dir: Optional[str] # help mypy
for cdir in candidate_dirs:
if hdr_dir := _find_under_site_packages(cdir, h_basename):
return _abs_norm(hdr_dir)

if hdr_dir := _find_based_on_conda_layout(libname, h_basename, False):
return _abs_norm(hdr_dir)

candidate_dirs = supported_nvidia_headers.SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname, [])
for cdir in candidate_dirs:
for hdr_dir in sorted(glob.glob(cdir), reverse=True):
if _joined_isfile(hdr_dir, h_basename):
return _abs_norm(hdr_dir)

return None
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,28 @@
"nvrtc": ("nvidia/cu13/include", "nvidia/cuda_nvrtc/include"),
"nvvm": ("nvidia/cu13/include", "nvidia/cuda_nvcc/nvvm/include"),
}

SUPPORTED_HEADERS_NON_CTK_COMMON = {
"cutensor": "cutensor.h",
}
SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY = {
"nvshmem": "nvshmem.h",
}
SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY: Final[dict[str, str]] = {}
SUPPORTED_HEADERS_NON_CTK_LINUX = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY
SUPPORTED_HEADERS_NON_CTK_WINDOWS = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY
SUPPORTED_HEADERS_NON_CTK_ALL = (
SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY | SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY
)
SUPPORTED_HEADERS_NON_CTK: Final[dict[str, str]] = (
SUPPORTED_HEADERS_NON_CTK_WINDOWS if IS_WINDOWS else SUPPORTED_HEADERS_NON_CTK_LINUX
)

SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK = {
"cutensor": ("cutensor/include",),
"nvshmem": ("nvidia/nvshmem/include",),
}

SUPPORTED_INSTALL_DIRS_NON_CTK = {
"nvshmem": ("/usr/include/nvshmem_*",),
}
2 changes: 1 addition & 1 deletion cuda_pathfinder/cuda/pathfinder/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

__version__ = "1.3.1"
__version__ = "1.3.2"
4 changes: 4 additions & 0 deletions cuda_pathfinder/docs/nv-versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
"version": "latest",
"url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/"
},
{
"version": "1.3.2",
"url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/1.3.2/"
},
{
"version": "1.3.1",
"url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/1.3.1/"
Expand Down
1 change: 1 addition & 0 deletions cuda_pathfinder/docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ and experimental APIs for locating NVIDIA C/C++ header directories.
DynamicLibNotFoundError

SUPPORTED_HEADERS_CTK
SUPPORTED_HEADERS_NON_CTK
find_nvidia_header_directory
15 changes: 15 additions & 0 deletions cuda_pathfinder/docs/source/release/1.3.2-notes.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
.. SPDX-License-Identifier: Apache-2.0
.. py:currentmodule:: cuda.pathfinder
``cuda-pathfinder`` 1.3.2 Release notes
=======================================

Released on Oct 29, 2025

Highlights
----------

* Add cuTENSOR support & bug fixes discovered while working on conda testing
(`PR #1194 <https://github.com/NVIDIA/cuda-python/pull/1194>`_)
2 changes: 2 additions & 0 deletions cuda_pathfinder/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ test = [
cu12 = [
"cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg,cccl]==12.*",
"cuda-toolkit[cufile]==12.*; sys_platform != 'win32'",
"cutensor-cu12",
"nvidia-cublasmp-cu12; sys_platform != 'win32'",
"nvidia-cudss-cu12",
"nvidia-cufftmp-cu12; sys_platform != 'win32'",
Expand All @@ -28,6 +29,7 @@ cu12 = [
cu13 = [
"cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg,cccl,nvvm]==13.*",
"cuda-toolkit[cufile]==13.*; sys_platform != 'win32'",
"cutensor-cu13",
"nvidia-cublasmp-cu13; sys_platform != 'win32'",
"nvidia-cudss-cu13",
"nvidia-nccl-cu13; sys_platform != 'win32'",
Expand Down
16 changes: 16 additions & 0 deletions cuda_pathfinder/tests/local_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import functools
import importlib.metadata
import re


@functools.cache
def have_distribution(name_pattern: str) -> bool:
re_name_pattern = re.compile(name_pattern)
return any(
re_name_pattern.match(dist.metadata["Name"])
for dist in importlib.metadata.distributions()
if "Name" in dist.metadata
)
Loading
Loading