diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index b775670a9..88c8626f5 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -321,16 +321,15 @@ jobs: pip install $(ls cuda_python*.whl)[all] fi - - name: Install cuda.pathfinder nvidia_wheels_cu13 - if: startsWith(matrix.CUDA_VER, '13.') + - name: Install cuda.pathfinder extra wheels for testing run: | + set -euo pipefail pushd cuda_pathfinder - pip install -v .[nvidia_wheels_cu13] - pip freeze + pip install --only-binary=:all: -v ".[nvidia_wheels_cu${TEST_CUDA_MAJOR},nvidia_wheels_host]" + pip list popd - name: Run cuda.pathfinder tests with all_must_work - if: startsWith(matrix.CUDA_VER, '13.') env: CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS: all_must_work run: run-tests pathfinder diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 18ddbcb45..797e082bf 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -288,17 +288,15 @@ jobs: pip install "$((Get-ChildItem -Filter cuda_python*.whl).FullName)[all]" } - - name: Install cuda.pathfinder nvidia_wheels_cu13 - if: startsWith(matrix.CUDA_VER, '13.') + - name: Install cuda.pathfinder extra wheels for testing shell: bash --noprofile --norc -xeuo pipefail {0} run: | pushd cuda_pathfinder - pip install -v .[nvidia_wheels_cu13] - pip freeze + pip install --only-binary=:all: -v ".[nvidia_wheels_cu${TEST_CUDA_MAJOR},nvidia_wheels_host]" + pip list popd - name: Run cuda.pathfinder tests with all_must_work - if: startsWith(matrix.CUDA_VER, '13.') env: CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS: all_must_work shell: bash --noprofile --norc -xeuo pipefail {0} diff --git a/ci/tools/env-vars b/ci/tools/env-vars index 3dcb81a4c..19126cd13 100755 --- a/ci/tools/env-vars +++ b/ci/tools/env-vars @@ -69,6 +69,7 @@ elif [[ "${1}" == "test" ]]; then echo "SETUP_SANITIZER=${SETUP_SANITIZER}" >> $GITHUB_ENV echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV echo "SKIP_CYTHON_TEST=${SKIP_CYTHON_TEST}" >> $GITHUB_ENV + echo "TEST_CUDA_MAJOR=${TEST_CUDA_MAJOR}" >> $GITHUB_ENV fi echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py index bb6c32b63..18708a2b3 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py @@ -10,6 +10,8 @@ from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( IS_WINDOWS, + SITE_PACKAGES_LIBDIRS_LINUX, + SITE_PACKAGES_LIBDIRS_WINDOWS, is_suppressed_dll_file, ) from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs, find_sub_dirs_all_sitepackages @@ -28,22 +30,25 @@ def _no_such_file_in_sub_dirs( def _find_so_using_nvidia_lib_dirs( libname: str, so_basename: str, error_messages: list[str], attachments: list[str] ) -> Optional[str]: - file_wild = so_basename + "*" - nvidia_sub_dirs_list: list[tuple[str, ...]] = [("nvidia", "*", "lib")] # works also for CTK 13 nvvm - if libname == "nvvm": - nvidia_sub_dirs_list.append(("nvidia", "*", "nvvm", "lib64")) # CTK 12 - for nvidia_sub_dirs in nvidia_sub_dirs_list: - for lib_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): - # First look for an exact match - so_name = os.path.join(lib_dir, so_basename) - if os.path.isfile(so_name): - return so_name - # Look for a versioned library - # Using sort here mainly to make the result deterministic. - for so_name in sorted(glob.glob(os.path.join(lib_dir, file_wild))): + rel_dirs = SITE_PACKAGES_LIBDIRS_LINUX.get(libname) + if rel_dirs is not None: + sub_dirs_searched = [] + file_wild = so_basename + "*" + for rel_dir in rel_dirs: + sub_dir = tuple(rel_dir.split(os.path.sep)) + for abs_dir in find_sub_dirs_all_sitepackages(sub_dir): + # First look for an exact match + so_name = os.path.join(abs_dir, so_basename) if os.path.isfile(so_name): return so_name - _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + # Look for a versioned library + # Using sort here mainly to make the result deterministic. + for so_name in sorted(glob.glob(os.path.join(abs_dir, file_wild))): + if os.path.isfile(so_name): + return so_name + sub_dirs_searched.append(sub_dir) + for sub_dir in sub_dirs_searched: + _no_such_file_in_sub_dirs(sub_dir, file_wild, error_messages, attachments) return None @@ -59,18 +64,18 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]: def _find_dll_using_nvidia_bin_dirs( libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str] ) -> Optional[str]: - nvidia_sub_dirs_list: list[tuple[str, ...]] = [ - ("nvidia", "*", "bin"), # CTK 12 - ("nvidia", "*", "bin", "*"), # CTK 13, e.g. site-packages\nvidia\cu13\bin\x86_64\ - ] - if libname == "nvvm": - nvidia_sub_dirs_list.append(("nvidia", "*", "nvvm", "bin")) # Only for CTK 12 - for nvidia_sub_dirs in nvidia_sub_dirs_list: - for bin_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): - dll_name = _find_dll_under_dir(bin_dir, lib_searched_for) - if dll_name is not None: - return dll_name - _no_such_file_in_sub_dirs(nvidia_sub_dirs, lib_searched_for, error_messages, attachments) + rel_dirs = SITE_PACKAGES_LIBDIRS_WINDOWS.get(libname) + if rel_dirs is not None: + sub_dirs_searched = [] + for rel_dir in rel_dirs: + sub_dir = tuple(rel_dir.split(os.path.sep)) + for abs_dir in find_sub_dirs_all_sitepackages(sub_dir): + dll_name = _find_dll_under_dir(abs_dir, lib_searched_for) + if dll_name is not None: + return dll_name + sub_dirs_searched.append(sub_dir) + for sub_dir in sub_dirs_searched: + _no_such_file_in_sub_dirs(sub_dir, lib_searched_for, error_messages, attachments) return None diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py index a0bcbbd73..ef7f078c9 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py @@ -8,7 +8,10 @@ from typing import Optional, cast from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL -from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import SUPPORTED_LINUX_SONAMES +from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( + LIBNAMES_REQUIRING_RTLD_DEEPBIND, + SUPPORTED_LINUX_SONAMES, +) CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL @@ -138,6 +141,13 @@ def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) - return None +def _load_lib(libname: str, filename: str) -> ctypes.CDLL: + cdll_mode = CDLL_MODE + if libname in LIBNAMES_REQUIRING_RTLD_DEEPBIND: + cdll_mode |= os.RTLD_DEEPBIND + return ctypes.CDLL(filename, cdll_mode) + + def load_with_system_search(libname: str) -> Optional[LoadedDL]: """Try to load a library using system search paths. @@ -152,13 +162,14 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]: """ for soname in get_candidate_sonames(libname): try: - handle = ctypes.CDLL(soname, CDLL_MODE) + handle = _load_lib(libname, soname) + except OSError: + pass + else: abs_path = abs_path_for_dynamic_library(libname, handle) if abs_path is None: raise RuntimeError(f"No expected symbol for {libname=!r}") return LoadedDL(abs_path, False, handle._handle) - except OSError: - pass return None @@ -196,7 +207,7 @@ def load_with_abs_path(libname: str, found_path: str) -> LoadedDL: """ _work_around_known_bugs(libname, found_path) try: - handle = ctypes.CDLL(found_path, CDLL_MODE) + handle = _load_lib(libname, found_path) except OSError as e: raise RuntimeError(f"Failed to dlopen {found_path}: {e}") from e return LoadedDL(found_path, False, handle._handle) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py index ee41a48b4..c2c0a4b3a 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py @@ -63,7 +63,7 @@ SUPPORTED_LIBNAMES = SUPPORTED_LIBNAMES_WINDOWS if IS_WINDOWS else SUPPORTED_LIBNAMES_LINUX # Based on ldd output for Linux x86_64 nvidia-*-cu12 wheels (12.8.1) -DIRECT_DEPENDENCIES = { +DIRECT_DEPENDENCIES_CTK = { "cublas": ("cublasLt",), "cufftw": ("cufft",), # "cufile_rdma": ("cufile",), @@ -82,6 +82,10 @@ "npps": ("nppc",), "nvblas": ("cublas", "cublasLt"), } +DIRECT_DEPENDENCIES = DIRECT_DEPENDENCIES_CTK | { + "mathdx": ("nvrtc",), + "cufftMp": ("nvshmem_host",), +} # Based on these released files: # cuda_11.0.3_450.51.06_linux.run @@ -104,7 +108,7 @@ # cuda_12.9.1_575.57.08_linux.run # cuda_13.0.0_580.65.06_linux.run # Generated with toolshed/build_pathfinder_sonames.py -SUPPORTED_LINUX_SONAMES = { +SUPPORTED_LINUX_SONAMES_CTK = { "cublas": ( "libcublas.so.11", "libcublas.so.12", @@ -232,6 +236,13 @@ "libnvvm.so.4", ), } +SUPPORTED_LINUX_SONAMES_OTHER = { + "cufftMp": ("libcufftMp.so.11",), + "mathdx": ("libmathdx.so.0",), + "nvpl_fftw": ("libnvpl_fftw.so.0",), + "nvshmem_host": ("libnvshmem_host.so.3",), +} +SUPPORTED_LINUX_SONAMES = SUPPORTED_LINUX_SONAMES_CTK | SUPPORTED_LINUX_SONAMES_OTHER # Based on these released files: # cuda_11.0.3_451.82_win10.exe @@ -254,7 +265,7 @@ # cuda_12.9.1_576.57_windows.exe # cuda_13.0.0_windows.exe # Generated with toolshed/build_pathfinder_dlls.py -SUPPORTED_WINDOWS_DLLS = { +SUPPORTED_WINDOWS_DLLS_CTK = { "cublas": ( "cublas64_11.dll", "cublas64_12.dll", @@ -384,12 +395,91 @@ "nvvm70.dll", ), } +SUPPORTED_WINDOWS_DLLS_OTHER = { + "mathdx": ("mathdx64_0.dll",), +} +SUPPORTED_WINDOWS_DLLS = SUPPORTED_WINDOWS_DLLS_CTK | SUPPORTED_WINDOWS_DLLS_OTHER LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY = ( "cufft", "nvrtc", ) +LIBNAMES_REQUIRING_RTLD_DEEPBIND = ("cufftMp",) + +# Based on output of toolshed/make_site_packages_libdirs_linux.py +SITE_PACKAGES_LIBDIRS_LINUX_CTK = { + "cublas": ("nvidia/cu13/lib", "nvidia/cublas/lib"), + "cublasLt": ("nvidia/cu13/lib", "nvidia/cublas/lib"), + "cudart": ("nvidia/cu13/lib", "nvidia/cuda_runtime/lib"), + "cufft": ("nvidia/cu13/lib", "nvidia/cufft/lib"), + "cufftw": ("nvidia/cu13/lib", "nvidia/cufft/lib"), + "cufile": ("nvidia/cu13/lib", "nvidia/cufile/lib"), + # "cufile_rdma": ("nvidia/cu13/lib", "nvidia/cufile/lib"), + "curand": ("nvidia/cu13/lib", "nvidia/curand/lib"), + "cusolver": ("nvidia/cu13/lib", "nvidia/cusolver/lib"), + "cusolverMg": ("nvidia/cu13/lib", "nvidia/cusolver/lib"), + "cusparse": ("nvidia/cu13/lib", "nvidia/cusparse/lib"), + "nppc": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppial": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppicc": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppidei": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppif": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppig": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppim": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppist": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppisu": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nppitc": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "npps": ("nvidia/cu13/lib", "nvidia/npp/lib"), + "nvJitLink": ("nvidia/cu13/lib", "nvidia/nvjitlink/lib"), + "nvblas": ("nvidia/cu13/lib", "nvidia/cublas/lib"), + "nvfatbin": ("nvidia/cu13/lib", "nvidia/nvfatbin/lib"), + "nvjpeg": ("nvidia/cu13/lib", "nvidia/nvjpeg/lib"), + "nvrtc": ("nvidia/cu13/lib", "nvidia/cuda_nvrtc/lib"), + "nvvm": ("nvidia/cu13/lib", "nvidia/cuda_nvcc/nvvm/lib64"), +} +SITE_PACKAGES_LIBDIRS_LINUX_OTHER = { + "cufftMp": ("nvidia/cufftmp/cu12/lib",), + "mathdx": ("nvidia/cu13/lib", "nvidia/cu12/lib"), + "nvpl_fftw": ("nvpl/lib",), + "nvshmem_host": ("nvidia/nvshmem/lib",), +} +SITE_PACKAGES_LIBDIRS_LINUX = SITE_PACKAGES_LIBDIRS_LINUX_CTK | SITE_PACKAGES_LIBDIRS_LINUX_OTHER + +# Based on output of toolshed/make_site_packages_libdirs_windows.py +SITE_PACKAGES_LIBDIRS_WINDOWS_CTK = { + "cublas": ("nvidia/cu13/bin/x86_64", "nvidia/cublas/bin"), + "cublasLt": ("nvidia/cu13/bin/x86_64", "nvidia/cublas/bin"), + "cudart": ("nvidia/cu13/bin/x86_64", "nvidia/cuda_runtime/bin"), + "cufft": ("nvidia/cu13/bin/x86_64", "nvidia/cufft/bin"), + "cufftw": ("nvidia/cu13/bin/x86_64", "nvidia/cufft/bin"), + "curand": ("nvidia/cu13/bin/x86_64", "nvidia/curand/bin"), + "cusolver": ("nvidia/cu13/bin/x86_64", "nvidia/cusolver/bin"), + "cusolverMg": ("nvidia/cu13/bin/x86_64", "nvidia/cusolver/bin"), + "cusparse": ("nvidia/cu13/bin/x86_64", "nvidia/cusparse/bin"), + "nppc": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppial": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppicc": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppidei": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppif": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppig": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppim": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppist": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppisu": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nppitc": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "npps": ("nvidia/cu13/bin/x86_64", "nvidia/npp/bin"), + "nvJitLink": ("nvidia/cu13/bin/x86_64", "nvidia/nvjitlink/bin"), + "nvblas": ("nvidia/cu13/bin/x86_64", "nvidia/cublas/bin"), + "nvfatbin": ("nvidia/cu13/bin/x86_64", "nvidia/nvfatbin/bin"), + "nvjpeg": ("nvidia/cu13/bin/x86_64", "nvidia/nvjpeg/bin"), + "nvrtc": ("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvrtc/bin"), + "nvvm": ("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvcc/nvvm/bin"), +} +SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER = { + "mathdx": ("nvidia/cu13/bin/x86_64", "nvidia/cu12/bin"), +} +SITE_PACKAGES_LIBDIRS_WINDOWS = SITE_PACKAGES_LIBDIRS_WINDOWS_CTK | SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER + def is_suppressed_dll_file(path_basename: str) -> bool: if path_basename.startswith("nvrtc"): diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py new file mode 100644 index 000000000..2f5695093 --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_dll.py @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import collections +import functools +import importlib.metadata + + +@functools.cache +def find_all_dll_files_via_metadata() -> dict[str, tuple[str, ...]]: + results: collections.defaultdict[str, list[str]] = collections.defaultdict(list) + + # sort dists for deterministic output + for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)): + files = dist.files + if not files: + continue + for relpath in sorted(files, key=lambda p: str(p)): # deterministic + relname = relpath.name.lower() + if not relname.endswith(".dll"): + continue + abs_path = str(dist.locate_file(relpath)) + results[relname].append(abs_path) + + # plain dicts; sort inner list for stability + return {k: tuple(sorted(v)) for k, v in results.items()} diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py new file mode 100644 index 000000000..69e7eea3a --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_utils/find_site_packages_so.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import collections +import functools +import importlib.metadata +import re + +_SO_RE = re.compile(r"\.so(?:$|\.)") # matches libfoo.so or libfoo.so.1.2.3 + + +def split_so_version_suffix(so_filename: str) -> tuple[str, str]: + idx = so_filename.rfind(".so") + assert idx > 0, so_filename + idx += 3 + return (so_filename[:idx], so_filename[idx:]) + + +@functools.cache +def find_all_so_files_via_metadata() -> dict[str, dict[str, tuple[str, ...]]]: + results: collections.defaultdict[str, collections.defaultdict[str, list[str]]] = collections.defaultdict( + lambda: collections.defaultdict(list) + ) + + # sort dists for deterministic output + for dist in sorted(importlib.metadata.distributions(), key=lambda d: (d.metadata.get("Name", ""), d.version)): + files = dist.files + if not files: + continue + for relpath in sorted(files, key=lambda p: str(p)): # deterministic + relname = relpath.name + if not _SO_RE.search(relname): + continue + so_basename, so_version_suffix = split_so_version_suffix(relname) + abs_path = str(dist.locate_file(relpath)) + results[so_basename][so_version_suffix].append(abs_path) + + # plain dicts; sort inner lists for stability + return {k: {kk: tuple(sorted(vv)) for kk, vv in v.items()} for k, v in results.items()} diff --git a/cuda_pathfinder/cuda/pathfinder/_version.py b/cuda_pathfinder/cuda/pathfinder/_version.py index adcedad4d..b64ff9550 100644 --- a/cuda_pathfinder/cuda/pathfinder/_version.py +++ b/cuda_pathfinder/cuda/pathfinder/_version.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -__version__ = "1.1.1a2" +__version__ = "1.1.1a3" diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml index 6545c4e51..fc5dc74d8 100644 --- a/cuda_pathfinder/pyproject.toml +++ b/cuda_pathfinder/pyproject.toml @@ -17,10 +17,17 @@ test = [ nvidia_wheels_cu12 = [ "cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg]==12.*", "cuda-toolkit[cufile]==12.*; sys_platform != 'win32'", + "nvidia-libmathdx-cu12", + "nvidia-cufftmp-cu12; sys_platform != 'win32'", + "nvidia-nvshmem-cu12; sys_platform != 'win32'", ] nvidia_wheels_cu13 = [ "cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg,nvvm]==13.*", "cuda-toolkit[cufile]==13.*; sys_platform != 'win32'", + "nvidia-nvshmem-cu13; sys_platform != 'win32'", +] +nvidia_wheels_host = [ + "nvpl-fft; platform_system == 'Linux' and platform_machine == 'aarch64'", ] [project.urls] diff --git a/cuda_pathfinder/tests/child_load_nvidia_dynamic_lib_helper.py b/cuda_pathfinder/tests/child_load_nvidia_dynamic_lib_helper.py new file mode 100644 index 000000000..4ca905989 --- /dev/null +++ b/cuda_pathfinder/tests/child_load_nvidia_dynamic_lib_helper.py @@ -0,0 +1,53 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# This helper is factored out so spawned child processes only import this +# lightweight module. That avoids re-importing the test module (and +# repeating its potentially expensive setup) in every child process. + +import os +import sys + + +def build_child_process_failed_for_libname_message(libname, result): + return ( + f"Child process failed for {libname=!r} with exit code {result.returncode}\n" + f"--- stdout-from-child-process ---\n{result.stdout}\n" + f"--- stderr-from-child-process ---\n{result.stderr}\n" + ) + + +def validate_abs_path(abs_path): + assert abs_path, f"empty path: {abs_path=!r}" + assert os.path.isabs(abs_path), f"not absolute: {abs_path=!r}" + assert os.path.isfile(abs_path), f"not a file: {abs_path=!r}" + + +def child_process_func(libname): + from cuda.pathfinder import load_nvidia_dynamic_lib + from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import _load_lib_no_cache + from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( + IS_WINDOWS, + SUPPORTED_LINUX_SONAMES, + SUPPORTED_WINDOWS_DLLS, + ) + + loaded_dl_fresh = load_nvidia_dynamic_lib(libname) + if loaded_dl_fresh.was_already_loaded_from_elsewhere: + raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") + validate_abs_path(loaded_dl_fresh.abs_path) + + loaded_dl_from_cache = load_nvidia_dynamic_lib(libname) + if loaded_dl_from_cache is not loaded_dl_fresh: + raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") + + loaded_dl_no_cache = _load_lib_no_cache(libname) + # check_if_already_loaded_from_elsewhere relies on these: + supported_libs = SUPPORTED_WINDOWS_DLLS if IS_WINDOWS else SUPPORTED_LINUX_SONAMES + if not loaded_dl_no_cache.was_already_loaded_from_elsewhere and libname in supported_libs: + raise RuntimeError("not loaded_dl_no_cache.was_already_loaded_from_elsewhere") + if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path): + raise RuntimeError(f"not os.path.samefile({loaded_dl_no_cache.abs_path=!r}, {loaded_dl_fresh.abs_path=!r})") + validate_abs_path(loaded_dl_no_cache.abs_path) + + sys.stdout.write(f"{loaded_dl_fresh.abs_path!r}\n") diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 6b8302c15..5f35d996d 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -1,15 +1,18 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import functools import os -import sys from unittest.mock import patch import pytest import spawned_process_runner +from child_load_nvidia_dynamic_lib_helper import build_child_process_failed_for_libname_message, child_process_func from cuda.pathfinder import SUPPORTED_NVIDIA_LIBNAMES, load_nvidia_dynamic_lib from cuda.pathfinder._dynamic_libs import supported_nvidia_libs +from cuda.pathfinder._utils.find_site_packages_dll import find_all_dll_files_via_metadata +from cuda.pathfinder._utils.find_site_packages_so import find_all_so_files_via_metadata STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") @@ -17,13 +20,25 @@ def test_supported_libnames_linux_sonames_consistency(): assert tuple(sorted(supported_nvidia_libs.SUPPORTED_LIBNAMES_LINUX)) == tuple( - sorted(supported_nvidia_libs.SUPPORTED_LINUX_SONAMES.keys()) + sorted(supported_nvidia_libs.SUPPORTED_LINUX_SONAMES_CTK.keys()) ) def test_supported_libnames_windows_dlls_consistency(): assert tuple(sorted(supported_nvidia_libs.SUPPORTED_LIBNAMES_WINDOWS)) == tuple( - sorted(supported_nvidia_libs.SUPPORTED_WINDOWS_DLLS.keys()) + sorted(supported_nvidia_libs.SUPPORTED_WINDOWS_DLLS_CTK.keys()) + ) + + +def test_supported_libnames_linux_site_packages_libdirs_ctk_consistency(): + assert tuple(sorted(supported_nvidia_libs.SUPPORTED_LIBNAMES_LINUX)) == tuple( + sorted(supported_nvidia_libs.SITE_PACKAGES_LIBDIRS_LINUX_CTK.keys()) + ) + + +def test_supported_libnames_windows_site_packages_libdirs_ctk_consistency(): + assert tuple(sorted(supported_nvidia_libs.SUPPORTED_LIBNAMES_WINDOWS)) == tuple( + sorted(supported_nvidia_libs.SITE_PACKAGES_LIBDIRS_WINDOWS_CTK.keys()) ) @@ -54,45 +69,28 @@ def test_runtime_error_on_non_64bit_python(): load_nvidia_dynamic_lib("not_used") -def build_child_process_failed_for_libname_message(libname, result): - return ( - f"Child process failed for {libname=!r} with exit code {result.returncode}\n" - f"--- stdout-from-child-process ---\n{result.stdout}\n" - f"--- stderr-from-child-process ---\n{result.stderr}\n" - ) - - -def validate_abs_path(abs_path): - assert abs_path, f"empty path: {abs_path=!r}" - assert os.path.isabs(abs_path), f"not absolute: {abs_path=!r}" - assert os.path.isfile(abs_path), f"not a file: {abs_path=!r}" - - -def child_process_func(libname): - import os - - from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import _load_lib_no_cache - - loaded_dl_fresh = load_nvidia_dynamic_lib(libname) - if loaded_dl_fresh.was_already_loaded_from_elsewhere: - raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") - validate_abs_path(loaded_dl_fresh.abs_path) - - loaded_dl_from_cache = load_nvidia_dynamic_lib(libname) - if loaded_dl_from_cache is not loaded_dl_fresh: - raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") - - loaded_dl_no_cache = _load_lib_no_cache(libname) - if not loaded_dl_no_cache.was_already_loaded_from_elsewhere: - raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere") - if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path): - raise RuntimeError(f"not os.path.samefile({loaded_dl_no_cache.abs_path=!r}, {loaded_dl_fresh.abs_path=!r})") - validate_abs_path(loaded_dl_no_cache.abs_path) +@functools.cache +def _get_libnames_for_test_load_nvidia_dynamic_lib(): + result = list(SUPPORTED_NVIDIA_LIBNAMES) + if supported_nvidia_libs.IS_WINDOWS: + spld_other = supported_nvidia_libs.SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER + all_dyn_libs = find_all_dll_files_via_metadata() + for libname in spld_other: + for dll_name in all_dyn_libs: + if dll_name.startswith(libname): + result.append(libname) + else: + spld_other = supported_nvidia_libs.SITE_PACKAGES_LIBDIRS_LINUX_OTHER + all_dyn_libs = find_all_so_files_via_metadata() + for libname in spld_other: + so_basename = f"lib{libname}.so" + if so_basename in all_dyn_libs: + result.append(libname) - sys.stdout.write(f"{loaded_dl_fresh.abs_path!r}\n") + return tuple(result) -@pytest.mark.parametrize("libname", SUPPORTED_NVIDIA_LIBNAMES) +@pytest.mark.parametrize("libname", _get_libnames_for_test_load_nvidia_dynamic_lib()) def test_load_nvidia_dynamic_lib(info_summary_append, libname): # We intentionally run each dynamic library operation in a child process # to ensure isolation of global dynamic linking state (e.g., dlopen handles). diff --git a/toolshed/collect_site_packages_dll_files.ps1 b/toolshed/collect_site_packages_dll_files.ps1 new file mode 100644 index 000000000..9f1ccce93 --- /dev/null +++ b/toolshed/collect_site_packages_dll_files.ps1 @@ -0,0 +1,44 @@ +# collect_site_packages_dll_files.ps1 + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Usage: +# cd cuda-python +# powershell -File toolshed\collect_site_packages_dll_files.ps1 +# python .\toolshed\make_site_packages_libdirs.py windows site_packages_dll.txt + +$ErrorActionPreference = 'Stop' + +function Fresh-Venv { + param( + [Parameter(Mandatory=$true)] + [string] $Path + ) + & python3 -m venv $Path + . (Join-Path $Path 'Scripts\Activate.ps1') + python -m pip install --upgrade pip +} + +Set-Location -Path 'cuda_pathfinder' + +Fresh-Venv -Path '..\TmpCp12Venv' +pip install --only-binary=:all: -e '.[test,nvidia_wheels_cu12,nvidia_wheels_host]' +deactivate + +Fresh-Venv -Path '..\TmpCp13Venv' +pip install --only-binary=:all: -e '.[test,nvidia_wheels_cu13,nvidia_wheels_host]' +deactivate + +Set-Location -Path '..' + +$venvs = @('TmpCp12Venv', 'TmpCp13Venv') + +$matches = + Get-ChildItem -Path $venvs -Recurse -File -Include '*.dll' | + Where-Object { $_.FullName -match '(?i)(nvidia|nvpl)' } | + Select-Object -ExpandProperty FullName | + Sort-Object -Unique + +$outFile = 'site_packages_dll.txt' +$matches | Set-Content -Path $outFile -Encoding utf8 diff --git a/toolshed/collect_site_packages_so_files.sh b/toolshed/collect_site_packages_so_files.sh new file mode 100755 index 000000000..000bdb64c --- /dev/null +++ b/toolshed/collect_site_packages_so_files.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Usage: +# cd cuda-python +# ./toolshed/collect_site_packages_so_files.sh +# ./toolshed/make_site_packages_libdirs.py linux site_packages_so.txt + +set -euo pipefail +fresh_venv() { + python3 -m venv "$1" + . "$1/bin/activate" + pip install --upgrade pip +} +cd cuda_pathfinder/ +fresh_venv ../TmpCp12Venv +set -x +pip install --only-binary=:all: -e .[test,nvidia_wheels_cu12,nvidia_wheels_host] +set +x +deactivate +fresh_venv ../TmpCp13Venv +set -x +pip install --only-binary=:all: -e .[test,nvidia_wheels_cu13,nvidia_wheels_host] +set +x +deactivate +cd .. +set -x +find TmpCp12Venv TmpCp13Venv -name 'lib*.so*' | grep -e nvidia -e nvpl >site_packages_so.txt diff --git a/toolshed/make_site_packages_libdirs.py b/toolshed/make_site_packages_libdirs.py new file mode 100755 index 000000000..b4feaec2e --- /dev/null +++ b/toolshed/make_site_packages_libdirs.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# For usage see top of collect_site_packages_*_files.* + +import os +import re +import argparse +from typing import Optional, Dict, Set + +_SITE_PACKAGES_RE = re.compile(r"(?i)^.*?/site-packages/") + + +def strip_site_packages_prefix(p: str) -> str: + """Remove any leading '.../site-packages/' (handles '\' or '/', case-insensitive).""" + p = p.replace("\\", "/") + return _SITE_PACKAGES_RE.sub("", p) + + +def parse_lines_linux(lines) -> Dict[str, Set[str]]: + d = {} # name -> set of dirs + for raw in lines: + line = raw.strip() + if not line or line.startswith("#"): + continue + line = strip_site_packages_prefix(line) + dirpath, fname = os.path.split(line) + # Require something like libNAME.so, libNAME.so.12, libNAME.so.12.1, etc. + i = fname.find(".so") + if not fname.startswith("lib") or i == -1: + # Skip lines that don't look like shared libs + continue + name = fname[:i] # e.g. "libnvrtc" + name = name[3:] # drop leading "lib" -> "nvrtc" + d.setdefault(name, set()).add(dirpath) + return d + + +def extract_libname_from_dll(fname: str) -> Optional[str]: + """Return base libname per the heuristic, or None if not a .dll.""" + base = os.path.basename(fname) + if not base.lower().endswith(".dll"): + return None + stem = base[:-4] # drop ".dll" + out = [] + for ch in stem: + if ch == "_" or ch.isdigit(): + break + out.append(ch) + name = "".join(out) + return name or None + + +def parse_lines_windows(lines) -> Dict[str, Set[str]]: + """Collect {libname: set(dirnames)} with deduped directories.""" + m: Dict[str, Set[str]] = {} + for raw in lines: + line = raw.strip() + if not line or line.startswith("#"): + continue + line = strip_site_packages_prefix(line) + dirpath, fname = os.path.split(line) + libname = extract_libname_from_dll(fname) + if not libname: + continue + m.setdefault(libname, set()).add(dirpath) + return m + + +def dict_literal(d: Dict[str, Set[str]]) -> str: + """Pretty, stable dict literal with tuple values (singletons keep trailing comma).""" + lines = ["{"] + for k in sorted(d): + dirs = sorted(d[k]) + tup = ( + "(" + + ", ".join(repr(x) for x in dirs) + + ("," if len(dirs) == 1 else "") + + ")" + ) + lines.append(f" {k!r}: {tup},") + lines.append("}") + return "\n".join(lines) + + +def main() -> None: + ap = argparse.ArgumentParser( + description="Convert a list of site-packages library paths into {name: (dirs, ...)}" + ) + ap.add_argument( + "platform", choices=["linux", "windows"], help="Target platform to parse" + ) + ap.add_argument("path", help="Text file with one library path per line") + args = ap.parse_args() + + with open(args.path, "r", encoding="utf-8") as f: + lines = f.read().splitlines() + + if args.platform == "linux": + m = parse_lines_linux(lines) + else: + m = parse_lines_windows(lines) + print(dict_literal(m)) + + +if __name__ == "__main__": + main()