From d4ab83f26410a0e2e21852665a3de6d04bb3f8a1 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Tue, 7 Jan 2025 16:15:09 -0800 Subject: [PATCH 1/5] Support discovery of nvrtc and nvjitlink libraries at run time CTK installations distribute their libraries using personal packages: - nvidia-nvjitlink-cuXX - nvidia-cuda-nvrtc-cuXX The relative path of their libraries to cuda-bindings is consistent, and allows us to use relative paths to discover them when loading at run time. --- cuda_bindings/setup.py | 48 +++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 6217d88a8f..9a2ccff52e 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -20,15 +20,14 @@ from Cython.Build import cythonize from pyclibrary import CParser from setuptools import find_packages, setup +from setuptools.command.bdist_wheel import bdist_wheel from setuptools.command.build_ext import build_ext from setuptools.extension import Extension # ---------------------------------------------------------------------- # Fetch configuration options -CUDA_HOME = os.environ.get("CUDA_HOME") -if not CUDA_HOME: - CUDA_HOME = os.environ.get("CUDA_PATH") +CUDA_HOME = os.environ.get("CUDA_HOME", os.environ.get("CUDA_PATH", None)) if not CUDA_HOME: raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set") @@ -283,24 +282,49 @@ def do_cythonize(extensions): extensions += prep_extensions(sources) # --------------------------------------------------------------------- -# Custom build_ext command -# Files are build in two steps: -# 1) Cythonized (in the do_cythonize() command) -# 2) Compiled to .o files as part of build_ext -# This class is solely for passing the value of nthreads to build_ext +# Custom cmdclass extensions + +building_wheel = False + + +class WheelsBuildExtensions(bdist_wheel): + def run(self): + global building_wheel + building_wheel = True + super().run() class ParallelBuildExtensions(build_ext): def initialize_options(self): - build_ext.initialize_options(self) + super().initialize_options() if nthreads > 0: self.parallel = nthreads - def finalize_options(self): - build_ext.finalize_options(self) + def build_extension(self, ext): + if building_wheel: + # Strip binaries to remove debug symbols + extra_linker_flags = ["-Wl,--strip-all"] + + # Allow extensions to discover libraries at runtime + # relative their wheels installation. + ldflag = "-Wl,--disable-new-dtags" + if ext.name == "cuda.bindings._bindings.cynvrtc": + ldflag += f",-rpath,$ORIGIN/../../../nvidia/cuda_nvrtc/lib" + elif ext.name == "cuda.bindings._internal.nvjitlink": + ldflag += f",-rpath,$ORIGIN/../../../nvidia/nvjitlink/lib" + + extra_linker_flags.append(ldflag) + else: + extra_linker_flags = [] + + ext.extra_link_args += extra_linker_flags + super().build_extension(ext) -cmdclass = {"build_ext": ParallelBuildExtensions} +cmdclass = { + "bdist_wheel": WheelsBuildExtensions, + "build_ext": ParallelBuildExtensions, + } # ---------------------------------------------------------------------- # Setup From 400e4eaa8abcb58dc1e996977db6a3eba2b05a6e Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Wed, 8 Jan 2025 11:13:41 -0800 Subject: [PATCH 2/5] Support wheels for Windows --- .../cuda/bindings/_bindings/cynvrtc.pyx.in | 36 +++++++++++++++++-- .../docs/source/release/12.x.y-notes.md | 20 ++++++++++- cuda_bindings/pyproject.toml | 6 ++++ cuda_bindings/setup.py | 12 ++++--- 4 files changed, 65 insertions(+), 9 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index bb4b4cdb14..e74041d739 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -6,8 +6,10 @@ # this software and related documentation outside the terms of the EULA # is strictly prohibited. {{if 'Windows' == platform.system()}} -import win32api +import os +import site import struct +import win32api from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn @@ -44,11 +46,39 @@ cdef int cuPythonInit() except -1 nogil: # Load library {{if 'Windows' == platform.system()}} - LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 + handle = NULL with gil: + # First check if the DLL has been loaded by 3rd parties try: - handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) + handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") except: + pass + + # Try default search + if handle == NULL: + LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 + try: + handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) + except: + pass + + # Check if DLLs are found within pip installations + if handle == NULL: + site_packages = [site.getusersitepackages()] + site.getsitepackages() + for sp in site_packages: + mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, "nvrtc64_120_0.dll"), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + + if handle == NULL: raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') {{else}} handle = NULL diff --git a/cuda_bindings/docs/source/release/12.x.y-notes.md b/cuda_bindings/docs/source/release/12.x.y-notes.md index 5cc6039249..9eff5ac704 100644 --- a/cuda_bindings/docs/source/release/12.x.y-notes.md +++ b/cuda_bindings/docs/source/release/12.x.y-notes.md @@ -3,4 +3,22 @@ Released on MM DD, 20YY. ## Highlights -- Added bindings for nvJitLink. It requires nvJitLink from CUDA 12.3 or above. +- Add bindings for nvJitLink. It requires nvJitLink from CUDA 12.3 or above. +- Add optional dependencies to wheels for NVRTC and nvJitLink +- Enable discovery and loading of shared library dependencies from wheels + +## Wheels support for optional dependencies + +Optional dependencies are added for packages: + +- nvidia-nvjitlink-cuXX +- nvidia-cuda-nvrtc-cuXX + +Installing these dependencies with cuda-python can be done using: +```{code-block} shell +pip install cuda-python[all] +``` + +## Discovery and loading of shared library dependencies from wheels + +Shared library search paths for wheel builds are now extended to check site-packages. This allows users to seamlessly use their wheel installation of the CUDA Toolkit with cuda-python. diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index 12de0ae80b..7ea87f967a 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -32,6 +32,12 @@ dependencies = [ "pywin32; sys_platform == 'win32'", ] +[project.optional-dependencies] +all = [ + "nvidia-cuda-nvrtc-cu12", + "nvidia-nvjitlink-cu12>=12.3" +] + [project.urls] Repository = "https://github.com/NVIDIA/cuda-python" Documentation = "https://nvidia.github.io/cuda-python/" diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 9a2ccff52e..7c8725ee84 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -301,19 +301,21 @@ def initialize_options(self): self.parallel = nthreads def build_extension(self, ext): - if building_wheel: + if building_wheel and sys.platform == "linux": # Strip binaries to remove debug symbols extra_linker_flags = ["-Wl,--strip-all"] # Allow extensions to discover libraries at runtime # relative their wheels installation. - ldflag = "-Wl,--disable-new-dtags" if ext.name == "cuda.bindings._bindings.cynvrtc": - ldflag += f",-rpath,$ORIGIN/../../../nvidia/cuda_nvrtc/lib" + ldflag = f"-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/cuda_nvrtc/lib" elif ext.name == "cuda.bindings._internal.nvjitlink": - ldflag += f",-rpath,$ORIGIN/../../../nvidia/nvjitlink/lib" + ldflag = f"-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/nvjitlink/lib" + else: + ldflag = None - extra_linker_flags.append(ldflag) + if ldflag: + extra_linker_flags.append(ldflag) else: extra_linker_flags = [] From 0e51e7d70be6c6562d15d722ec85c6dc24dba2ff Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Wed, 8 Jan 2025 14:42:48 -0800 Subject: [PATCH 3/5] Cleanup Windows support after testing --- .../cuda/bindings/_bindings/cynvrtc.pyx.in | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index e74041d739..c20be0abbd 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -46,16 +46,15 @@ cdef int cuPythonInit() except -1 nogil: # Load library {{if 'Windows' == platform.system()}} - handle = NULL with gil: # First check if the DLL has been loaded by 3rd parties try: handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") except: - pass + handle = None # Try default search - if handle == NULL: + if not handle: LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 try: handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) @@ -63,22 +62,29 @@ cdef int cuPythonInit() except -1 nogil: pass # Check if DLLs are found within pip installations - if handle == NULL: + if not handle: site_packages = [site.getusersitepackages()] + site.getsitepackages() for sp in site_packages: mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") if not os.path.isdir(mod_path): continue os.add_dll_directory(mod_path) + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 try: handle = win32api.LoadLibraryEx( # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... os.path.join(mod_path, "nvrtc64_120_0.dll"), 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + + # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is + # located in the same mod_path. + # Update PATH environ so that the two dlls can find each other + os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) except: pass - if handle == NULL: + if not handle: raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') {{else}} handle = NULL From 0f7c7778edd490f2b0d70b0109839140b206b4bd Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Wed, 8 Jan 2025 14:45:24 -0800 Subject: [PATCH 4/5] Simplify dlopen call --- cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index c20be0abbd..4b77aa8139 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -87,9 +87,7 @@ cdef int cuPythonInit() except -1 nogil: if not handle: raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') {{else}} - handle = NULL - if handle == NULL: - handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) + handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) if handle == NULL: with gil: raise RuntimeError('Failed to dlopen libnvrtc.so.12') From c13613073f71d6fb8e0c335644f405a27c62e3f7 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Wed, 8 Jan 2025 14:48:52 -0800 Subject: [PATCH 5/5] Wording --- cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index 4b77aa8139..5cc8219c08 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -53,7 +53,7 @@ cdef int cuPythonInit() except -1 nogil: except: handle = None - # Try default search + # Else try default search if not handle: LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 try: @@ -61,7 +61,7 @@ cdef int cuPythonInit() except -1 nogil: except: pass - # Check if DLLs are found within pip installations + # Final check if DLLs can be found within pip installations if not handle: site_packages = [site.getusersitepackages()] + site.getsitepackages() for sp in site_packages: