From b36d799f9b073b8be9349738da9ff7248a40dcb0 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 15 Sep 2025 10:59:45 -0400 Subject: [PATCH 1/3] Fix #962: Don't perform unnecessary version checks --- .../cuda/bindings/_internal/cufile_linux.pyx | 20 -------- .../bindings/_internal/nvjitlink_linux.pyx | 20 -------- .../bindings/_internal/nvjitlink_windows.pyx | 48 ------------------- .../cuda/bindings/_internal/nvvm_linux.pyx | 20 -------- .../cuda/bindings/_internal/nvvm_windows.pyx | 48 ------------------- 5 files changed, 156 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx index 74079c2ef6..199a55d509 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx @@ -32,24 +32,6 @@ cdef extern from "" nogil: const void* RTLD_DEFAULT 'RTLD_DEFAULT' -cdef int get_cuda_version(): - cdef void* handle = NULL - cdef int err, driver_ver = 0 - - # Load driver to check version - handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - err_msg = dlerror() - raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') - cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") - if cuDriverGetVersion == NULL: - raise RuntimeError('something went wrong') - err = (cuDriverGetVersion)(&driver_ver) - if err != 0: - raise RuntimeError('something went wrong') - - return driver_ver - ############################################################################### # Wrapper init @@ -116,8 +98,6 @@ cdef int _check_or_init_cufile() except -1 nogil: cdef void* handle = NULL with gil, __symbol_lock: - driver_ver = get_cuda_version() - # Load function global __cuFileHandleRegister __cuFileHandleRegister = dlsym(RTLD_DEFAULT, 'cuFileHandleRegister') diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index db68c647cc..a10b03bb28 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -30,24 +30,6 @@ cdef extern from "" nogil: const void* RTLD_DEFAULT 'RTLD_DEFAULT' -cdef int get_cuda_version(): - cdef void* handle = NULL - cdef int err, driver_ver = 0 - - # Load driver to check version - handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - err_msg = dlerror() - raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') - cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") - if cuDriverGetVersion == NULL: - raise RuntimeError('something went wrong') - err = (cuDriverGetVersion)(&driver_ver) - if err != 0: - raise RuntimeError('something went wrong') - - return driver_ver - ############################################################################### # Wrapper init @@ -85,8 +67,6 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: cdef void* handle = NULL with gil, __symbol_lock: - driver_ver = get_cuda_version() - # Load function global __nvJitLinkCreate __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index efc15834a8..e52a8e31c1 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -11,64 +11,18 @@ from .utils import FunctionNotFoundError, NotSupportedError from cuda.pathfinder import load_nvidia_dynamic_lib -from libc.stddef cimport wchar_t from libc.stdint cimport uintptr_t -from cpython cimport PyUnicode_AsWideCharString, PyMem_Free - -from .utils import NotSupportedError cdef extern from "windows.h" nogil: ctypedef void* HMODULE - ctypedef void* HANDLE ctypedef void* FARPROC - ctypedef unsigned long DWORD - ctypedef const wchar_t *LPCWSTR ctypedef const char *LPCSTR - cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 - cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 - cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 - - HMODULE _LoadLibraryExW "LoadLibraryExW"( - LPCWSTR lpLibFileName, - HANDLE hFile, - DWORD dwFlags - ) - FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) -cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): - cdef uintptr_t result - cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) - with nogil: - result = _LoadLibraryExW( - wpath, - hFile, - dwFlags - ) - PyMem_Free(wpath) - return result - cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: return _GetProcAddress(hModule, lpProcName) -cdef int get_cuda_version(): - cdef int err, driver_ver = 0 - - # Load driver to check version - handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) - if handle == 0: - raise NotSupportedError('CUDA driver is not found') - cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') - if cuDriverGetVersion == NULL: - raise RuntimeError('something went wrong') - err = (cuDriverGetVersion)(&driver_ver) - if err != 0: - raise RuntimeError('something went wrong') - - return driver_ver - - ############################################################################### # Wrapper init @@ -99,8 +53,6 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 with gil, __symbol_lock: - driver_ver = get_cuda_version() - # Load library handle = load_nvidia_dynamic_lib("nvJitLink")._handle_uint diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 2eaff11c31..5fd2979829 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -30,24 +30,6 @@ cdef extern from "" nogil: const void* RTLD_DEFAULT 'RTLD_DEFAULT' -cdef int get_cuda_version(): - cdef void* handle = NULL - cdef int err, driver_ver = 0 - - # Load driver to check version - handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - err_msg = dlerror() - raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') - cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") - if cuDriverGetVersion == NULL: - raise RuntimeError('something went wrong') - err = (cuDriverGetVersion)(&driver_ver) - if err != 0: - raise RuntimeError('something went wrong') - - return driver_ver - ############################################################################### # Wrapper init @@ -84,8 +66,6 @@ cdef int _check_or_init_nvvm() except -1 nogil: cdef void* handle = NULL with gil, __symbol_lock: - driver_ver = get_cuda_version() - # Load function global __nvvmGetErrorString __nvvmGetErrorString = dlsym(RTLD_DEFAULT, 'nvvmGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index d2f0e48c44..215efeca15 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -11,64 +11,18 @@ from .utils import FunctionNotFoundError, NotSupportedError from cuda.pathfinder import load_nvidia_dynamic_lib -from libc.stddef cimport wchar_t from libc.stdint cimport uintptr_t -from cpython cimport PyUnicode_AsWideCharString, PyMem_Free - -from .utils import NotSupportedError cdef extern from "windows.h" nogil: ctypedef void* HMODULE - ctypedef void* HANDLE ctypedef void* FARPROC - ctypedef unsigned long DWORD - ctypedef const wchar_t *LPCWSTR ctypedef const char *LPCSTR - cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 - cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 - cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 - - HMODULE _LoadLibraryExW "LoadLibraryExW"( - LPCWSTR lpLibFileName, - HANDLE hFile, - DWORD dwFlags - ) - FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) -cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): - cdef uintptr_t result - cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) - with nogil: - result = _LoadLibraryExW( - wpath, - hFile, - dwFlags - ) - PyMem_Free(wpath) - return result - cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: return _GetProcAddress(hModule, lpProcName) -cdef int get_cuda_version(): - cdef int err, driver_ver = 0 - - # Load driver to check version - handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) - if handle == 0: - raise NotSupportedError('CUDA driver is not found') - cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') - if cuDriverGetVersion == NULL: - raise RuntimeError('something went wrong') - err = (cuDriverGetVersion)(&driver_ver) - if err != 0: - raise RuntimeError('something went wrong') - - return driver_ver - - ############################################################################### # Wrapper init @@ -98,8 +52,6 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 with gil, __symbol_lock: - driver_ver = get_cuda_version() - # Load library handle = load_nvidia_dynamic_lib("nvvm")._handle_uint From bb84cfe152b061910a8877db295db0f752870091 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 15 Sep 2025 11:46:09 -0400 Subject: [PATCH 2/3] Fix code having no version checks on Linux --- .../cuda/bindings/_internal/cufile_linux.pyx | 88 +++++++++---------- .../bindings/_internal/nvjitlink_linux.pyx | 30 +++---- .../cuda/bindings/_internal/nvvm_linux.pyx | 28 +++--- 3 files changed, 73 insertions(+), 73 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx index 199a55d509..339ea2b809 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx @@ -85,7 +85,7 @@ cdef void* __cuFileSetParameterPosixPoolSlabArray = NULL cdef void* __cuFileGetParameterPosixPoolSlabArray = NULL -cdef void* load_library(const int driver_ver) except* with gil: +cdef void* load_library() except* with gil: cdef uintptr_t handle = load_nvidia_dynamic_lib("cufile")._handle_uint return handle @@ -103,301 +103,301 @@ cdef int _check_or_init_cufile() except -1 nogil: __cuFileHandleRegister = dlsym(RTLD_DEFAULT, 'cuFileHandleRegister') if __cuFileHandleRegister == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileHandleRegister = dlsym(handle, 'cuFileHandleRegister') global __cuFileHandleDeregister __cuFileHandleDeregister = dlsym(RTLD_DEFAULT, 'cuFileHandleDeregister') if __cuFileHandleDeregister == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileHandleDeregister = dlsym(handle, 'cuFileHandleDeregister') global __cuFileBufRegister __cuFileBufRegister = dlsym(RTLD_DEFAULT, 'cuFileBufRegister') if __cuFileBufRegister == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileBufRegister = dlsym(handle, 'cuFileBufRegister') global __cuFileBufDeregister __cuFileBufDeregister = dlsym(RTLD_DEFAULT, 'cuFileBufDeregister') if __cuFileBufDeregister == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileBufDeregister = dlsym(handle, 'cuFileBufDeregister') global __cuFileRead __cuFileRead = dlsym(RTLD_DEFAULT, 'cuFileRead') if __cuFileRead == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileRead = dlsym(handle, 'cuFileRead') global __cuFileWrite __cuFileWrite = dlsym(RTLD_DEFAULT, 'cuFileWrite') if __cuFileWrite == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileWrite = dlsym(handle, 'cuFileWrite') global __cuFileDriverOpen __cuFileDriverOpen = dlsym(RTLD_DEFAULT, 'cuFileDriverOpen') if __cuFileDriverOpen == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverOpen = dlsym(handle, 'cuFileDriverOpen') global __cuFileDriverClose_v2 __cuFileDriverClose_v2 = dlsym(RTLD_DEFAULT, 'cuFileDriverClose_v2') if __cuFileDriverClose_v2 == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverClose_v2 = dlsym(handle, 'cuFileDriverClose_v2') global __cuFileUseCount __cuFileUseCount = dlsym(RTLD_DEFAULT, 'cuFileUseCount') if __cuFileUseCount == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileUseCount = dlsym(handle, 'cuFileUseCount') global __cuFileDriverGetProperties __cuFileDriverGetProperties = dlsym(RTLD_DEFAULT, 'cuFileDriverGetProperties') if __cuFileDriverGetProperties == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverGetProperties = dlsym(handle, 'cuFileDriverGetProperties') global __cuFileDriverSetPollMode __cuFileDriverSetPollMode = dlsym(RTLD_DEFAULT, 'cuFileDriverSetPollMode') if __cuFileDriverSetPollMode == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverSetPollMode = dlsym(handle, 'cuFileDriverSetPollMode') global __cuFileDriverSetMaxDirectIOSize __cuFileDriverSetMaxDirectIOSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxDirectIOSize') if __cuFileDriverSetMaxDirectIOSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverSetMaxDirectIOSize = dlsym(handle, 'cuFileDriverSetMaxDirectIOSize') global __cuFileDriverSetMaxCacheSize __cuFileDriverSetMaxCacheSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxCacheSize') if __cuFileDriverSetMaxCacheSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverSetMaxCacheSize = dlsym(handle, 'cuFileDriverSetMaxCacheSize') global __cuFileDriverSetMaxPinnedMemSize __cuFileDriverSetMaxPinnedMemSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxPinnedMemSize') if __cuFileDriverSetMaxPinnedMemSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverSetMaxPinnedMemSize = dlsym(handle, 'cuFileDriverSetMaxPinnedMemSize') global __cuFileBatchIOSetUp __cuFileBatchIOSetUp = dlsym(RTLD_DEFAULT, 'cuFileBatchIOSetUp') if __cuFileBatchIOSetUp == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileBatchIOSetUp = dlsym(handle, 'cuFileBatchIOSetUp') global __cuFileBatchIOSubmit __cuFileBatchIOSubmit = dlsym(RTLD_DEFAULT, 'cuFileBatchIOSubmit') if __cuFileBatchIOSubmit == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileBatchIOSubmit = dlsym(handle, 'cuFileBatchIOSubmit') global __cuFileBatchIOGetStatus __cuFileBatchIOGetStatus = dlsym(RTLD_DEFAULT, 'cuFileBatchIOGetStatus') if __cuFileBatchIOGetStatus == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileBatchIOGetStatus = dlsym(handle, 'cuFileBatchIOGetStatus') global __cuFileBatchIOCancel __cuFileBatchIOCancel = dlsym(RTLD_DEFAULT, 'cuFileBatchIOCancel') if __cuFileBatchIOCancel == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileBatchIOCancel = dlsym(handle, 'cuFileBatchIOCancel') global __cuFileBatchIODestroy __cuFileBatchIODestroy = dlsym(RTLD_DEFAULT, 'cuFileBatchIODestroy') if __cuFileBatchIODestroy == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileBatchIODestroy = dlsym(handle, 'cuFileBatchIODestroy') global __cuFileReadAsync __cuFileReadAsync = dlsym(RTLD_DEFAULT, 'cuFileReadAsync') if __cuFileReadAsync == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileReadAsync = dlsym(handle, 'cuFileReadAsync') global __cuFileWriteAsync __cuFileWriteAsync = dlsym(RTLD_DEFAULT, 'cuFileWriteAsync') if __cuFileWriteAsync == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileWriteAsync = dlsym(handle, 'cuFileWriteAsync') global __cuFileStreamRegister __cuFileStreamRegister = dlsym(RTLD_DEFAULT, 'cuFileStreamRegister') if __cuFileStreamRegister == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileStreamRegister = dlsym(handle, 'cuFileStreamRegister') global __cuFileStreamDeregister __cuFileStreamDeregister = dlsym(RTLD_DEFAULT, 'cuFileStreamDeregister') if __cuFileStreamDeregister == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileStreamDeregister = dlsym(handle, 'cuFileStreamDeregister') global __cuFileGetVersion __cuFileGetVersion = dlsym(RTLD_DEFAULT, 'cuFileGetVersion') if __cuFileGetVersion == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetVersion = dlsym(handle, 'cuFileGetVersion') global __cuFileGetParameterSizeT __cuFileGetParameterSizeT = dlsym(RTLD_DEFAULT, 'cuFileGetParameterSizeT') if __cuFileGetParameterSizeT == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetParameterSizeT = dlsym(handle, 'cuFileGetParameterSizeT') global __cuFileGetParameterBool __cuFileGetParameterBool = dlsym(RTLD_DEFAULT, 'cuFileGetParameterBool') if __cuFileGetParameterBool == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetParameterBool = dlsym(handle, 'cuFileGetParameterBool') global __cuFileGetParameterString __cuFileGetParameterString = dlsym(RTLD_DEFAULT, 'cuFileGetParameterString') if __cuFileGetParameterString == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetParameterString = dlsym(handle, 'cuFileGetParameterString') global __cuFileSetParameterSizeT __cuFileSetParameterSizeT = dlsym(RTLD_DEFAULT, 'cuFileSetParameterSizeT') if __cuFileSetParameterSizeT == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileSetParameterSizeT = dlsym(handle, 'cuFileSetParameterSizeT') global __cuFileSetParameterBool __cuFileSetParameterBool = dlsym(RTLD_DEFAULT, 'cuFileSetParameterBool') if __cuFileSetParameterBool == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileSetParameterBool = dlsym(handle, 'cuFileSetParameterBool') global __cuFileSetParameterString __cuFileSetParameterString = dlsym(RTLD_DEFAULT, 'cuFileSetParameterString') if __cuFileSetParameterString == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileSetParameterString = dlsym(handle, 'cuFileSetParameterString') global __cuFileDriverClose __cuFileDriverClose = dlsym(RTLD_DEFAULT, 'cuFileDriverClose') if __cuFileDriverClose == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileDriverClose = dlsym(handle, 'cuFileDriverClose') global __cuFileGetParameterMinMaxValue __cuFileGetParameterMinMaxValue = dlsym(RTLD_DEFAULT, 'cuFileGetParameterMinMaxValue') if __cuFileGetParameterMinMaxValue == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetParameterMinMaxValue = dlsym(handle, 'cuFileGetParameterMinMaxValue') global __cuFileSetStatsLevel __cuFileSetStatsLevel = dlsym(RTLD_DEFAULT, 'cuFileSetStatsLevel') if __cuFileSetStatsLevel == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileSetStatsLevel = dlsym(handle, 'cuFileSetStatsLevel') global __cuFileGetStatsLevel __cuFileGetStatsLevel = dlsym(RTLD_DEFAULT, 'cuFileGetStatsLevel') if __cuFileGetStatsLevel == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetStatsLevel = dlsym(handle, 'cuFileGetStatsLevel') global __cuFileStatsStart __cuFileStatsStart = dlsym(RTLD_DEFAULT, 'cuFileStatsStart') if __cuFileStatsStart == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileStatsStart = dlsym(handle, 'cuFileStatsStart') global __cuFileStatsStop __cuFileStatsStop = dlsym(RTLD_DEFAULT, 'cuFileStatsStop') if __cuFileStatsStop == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileStatsStop = dlsym(handle, 'cuFileStatsStop') global __cuFileStatsReset __cuFileStatsReset = dlsym(RTLD_DEFAULT, 'cuFileStatsReset') if __cuFileStatsReset == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileStatsReset = dlsym(handle, 'cuFileStatsReset') global __cuFileGetStatsL1 __cuFileGetStatsL1 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL1') if __cuFileGetStatsL1 == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetStatsL1 = dlsym(handle, 'cuFileGetStatsL1') global __cuFileGetStatsL2 __cuFileGetStatsL2 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL2') if __cuFileGetStatsL2 == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetStatsL2 = dlsym(handle, 'cuFileGetStatsL2') global __cuFileGetStatsL3 __cuFileGetStatsL3 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL3') if __cuFileGetStatsL3 == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetStatsL3 = dlsym(handle, 'cuFileGetStatsL3') global __cuFileGetBARSizeInKB __cuFileGetBARSizeInKB = dlsym(RTLD_DEFAULT, 'cuFileGetBARSizeInKB') if __cuFileGetBARSizeInKB == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetBARSizeInKB = dlsym(handle, 'cuFileGetBARSizeInKB') global __cuFileSetParameterPosixPoolSlabArray __cuFileSetParameterPosixPoolSlabArray = dlsym(RTLD_DEFAULT, 'cuFileSetParameterPosixPoolSlabArray') if __cuFileSetParameterPosixPoolSlabArray == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileSetParameterPosixPoolSlabArray = dlsym(handle, 'cuFileSetParameterPosixPoolSlabArray') global __cuFileGetParameterPosixPoolSlabArray __cuFileGetParameterPosixPoolSlabArray = dlsym(RTLD_DEFAULT, 'cuFileGetParameterPosixPoolSlabArray') if __cuFileGetParameterPosixPoolSlabArray == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __cuFileGetParameterPosixPoolSlabArray = dlsym(handle, 'cuFileGetParameterPosixPoolSlabArray') __py_cufile_init = True diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index a10b03bb28..9bdbdf6ea7 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -54,7 +54,7 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(int driver_ver) except* with gil: +cdef void* load_library() except* with gil: cdef uintptr_t handle = load_nvidia_dynamic_lib("nvJitLink")._handle_uint return handle @@ -72,98 +72,98 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') if __nvJitLinkCreate == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') global __nvJitLinkDestroy __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') if __nvJitLinkDestroy == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') global __nvJitLinkAddData __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') if __nvJitLinkAddData == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') global __nvJitLinkAddFile __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') if __nvJitLinkAddFile == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') global __nvJitLinkComplete __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') if __nvJitLinkComplete == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') global __nvJitLinkGetLinkedCubinSize __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') if __nvJitLinkGetLinkedCubinSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') global __nvJitLinkGetLinkedCubin __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') if __nvJitLinkGetLinkedCubin == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') global __nvJitLinkGetLinkedPtxSize __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') if __nvJitLinkGetLinkedPtxSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') global __nvJitLinkGetLinkedPtx __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') if __nvJitLinkGetLinkedPtx == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') global __nvJitLinkGetErrorLogSize __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') if __nvJitLinkGetErrorLogSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') global __nvJitLinkGetErrorLog __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') if __nvJitLinkGetErrorLog == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') global __nvJitLinkGetInfoLogSize __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') if __nvJitLinkGetInfoLogSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') global __nvJitLinkGetInfoLog __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') if __nvJitLinkGetInfoLog == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') global __nvJitLinkVersion __nvJitLinkVersion = dlsym(RTLD_DEFAULT, 'nvJitLinkVersion') if __nvJitLinkVersion == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvJitLinkVersion = dlsym(handle, 'nvJitLinkVersion') __py_nvjitlink_init = True diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 5fd2979829..0fb420d669 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -53,7 +53,7 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef void* load_library(const int driver_ver) except* with gil: +cdef void* load_library() except* with gil: cdef uintptr_t handle = load_nvidia_dynamic_lib("nvvm")._handle_uint return handle @@ -71,91 +71,91 @@ cdef int _check_or_init_nvvm() except -1 nogil: __nvvmGetErrorString = dlsym(RTLD_DEFAULT, 'nvvmGetErrorString') if __nvvmGetErrorString == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmGetErrorString = dlsym(handle, 'nvvmGetErrorString') global __nvvmVersion __nvvmVersion = dlsym(RTLD_DEFAULT, 'nvvmVersion') if __nvvmVersion == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmVersion = dlsym(handle, 'nvvmVersion') global __nvvmIRVersion __nvvmIRVersion = dlsym(RTLD_DEFAULT, 'nvvmIRVersion') if __nvvmIRVersion == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmIRVersion = dlsym(handle, 'nvvmIRVersion') global __nvvmCreateProgram __nvvmCreateProgram = dlsym(RTLD_DEFAULT, 'nvvmCreateProgram') if __nvvmCreateProgram == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmCreateProgram = dlsym(handle, 'nvvmCreateProgram') global __nvvmDestroyProgram __nvvmDestroyProgram = dlsym(RTLD_DEFAULT, 'nvvmDestroyProgram') if __nvvmDestroyProgram == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmDestroyProgram = dlsym(handle, 'nvvmDestroyProgram') global __nvvmAddModuleToProgram __nvvmAddModuleToProgram = dlsym(RTLD_DEFAULT, 'nvvmAddModuleToProgram') if __nvvmAddModuleToProgram == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmAddModuleToProgram = dlsym(handle, 'nvvmAddModuleToProgram') global __nvvmLazyAddModuleToProgram __nvvmLazyAddModuleToProgram = dlsym(RTLD_DEFAULT, 'nvvmLazyAddModuleToProgram') if __nvvmLazyAddModuleToProgram == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmLazyAddModuleToProgram = dlsym(handle, 'nvvmLazyAddModuleToProgram') global __nvvmCompileProgram __nvvmCompileProgram = dlsym(RTLD_DEFAULT, 'nvvmCompileProgram') if __nvvmCompileProgram == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmCompileProgram = dlsym(handle, 'nvvmCompileProgram') global __nvvmVerifyProgram __nvvmVerifyProgram = dlsym(RTLD_DEFAULT, 'nvvmVerifyProgram') if __nvvmVerifyProgram == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmVerifyProgram = dlsym(handle, 'nvvmVerifyProgram') global __nvvmGetCompiledResultSize __nvvmGetCompiledResultSize = dlsym(RTLD_DEFAULT, 'nvvmGetCompiledResultSize') if __nvvmGetCompiledResultSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmGetCompiledResultSize = dlsym(handle, 'nvvmGetCompiledResultSize') global __nvvmGetCompiledResult __nvvmGetCompiledResult = dlsym(RTLD_DEFAULT, 'nvvmGetCompiledResult') if __nvvmGetCompiledResult == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmGetCompiledResult = dlsym(handle, 'nvvmGetCompiledResult') global __nvvmGetProgramLogSize __nvvmGetProgramLogSize = dlsym(RTLD_DEFAULT, 'nvvmGetProgramLogSize') if __nvvmGetProgramLogSize == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmGetProgramLogSize = dlsym(handle, 'nvvmGetProgramLogSize') global __nvvmGetProgramLog __nvvmGetProgramLog = dlsym(RTLD_DEFAULT, 'nvvmGetProgramLog') if __nvvmGetProgramLog == NULL: if handle == NULL: - handle = load_library(driver_ver) + handle = load_library() __nvvmGetProgramLog = dlsym(handle, 'nvvmGetProgramLog') __py_nvvm_init = True From 9e5e4321ab5c014c4523a71645a0fb73392e6788 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 15 Sep 2025 13:04:35 -0400 Subject: [PATCH 3/3] Restore things removed from cybind --- .../cuda/bindings/_internal/cufile_linux.pyx | 18 ++++++++ .../bindings/_internal/nvjitlink_linux.pyx | 18 ++++++++ .../bindings/_internal/nvjitlink_windows.pyx | 46 +++++++++++++++++++ .../cuda/bindings/_internal/nvvm_linux.pyx | 18 ++++++++ .../cuda/bindings/_internal/nvvm_windows.pyx | 46 +++++++++++++++++++ 5 files changed, 146 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx index 339ea2b809..ffc92f2282 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx @@ -32,6 +32,24 @@ cdef extern from "" nogil: const void* RTLD_DEFAULT 'RTLD_DEFAULT' +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + ############################################################################### # Wrapper init diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 9bdbdf6ea7..af060f318e 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -30,6 +30,24 @@ cdef extern from "" nogil: const void* RTLD_DEFAULT 'RTLD_DEFAULT' +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + ############################################################################### # Wrapper init diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index e52a8e31c1..730a415561 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -11,18 +11,64 @@ from .utils import FunctionNotFoundError, NotSupportedError from cuda.pathfinder import load_nvidia_dynamic_lib +from libc.stddef cimport wchar_t from libc.stdint cimport uintptr_t +from cpython cimport PyUnicode_AsWideCharString, PyMem_Free + +from .utils import NotSupportedError cdef extern from "windows.h" nogil: ctypedef void* HMODULE + ctypedef void* HANDLE ctypedef void* FARPROC + ctypedef unsigned long DWORD + ctypedef const wchar_t *LPCWSTR ctypedef const char *LPCSTR + cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + + HMODULE _LoadLibraryExW "LoadLibraryExW"( + LPCWSTR lpLibFileName, + HANDLE hFile, + DWORD dwFlags + ) + FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) +cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): + cdef uintptr_t result + cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) + with nogil: + result = _LoadLibraryExW( + wpath, + hFile, + dwFlags + ) + PyMem_Free(wpath) + return result + cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: return _GetProcAddress(hModule, lpProcName) +cdef int get_cuda_version(): + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) + if handle == 0: + raise NotSupportedError('CUDA driver is not found') + cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') + if cuDriverGetVersion == NULL: + raise RuntimeError('something went wrong') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError('something went wrong') + + return driver_ver + + ############################################################################### # Wrapper init diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 0fb420d669..add0ccfb7d 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -30,6 +30,24 @@ cdef extern from "" nogil: const void* RTLD_DEFAULT 'RTLD_DEFAULT' +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + ############################################################################### # Wrapper init diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 215efeca15..3eb0daa9d7 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -11,18 +11,64 @@ from .utils import FunctionNotFoundError, NotSupportedError from cuda.pathfinder import load_nvidia_dynamic_lib +from libc.stddef cimport wchar_t from libc.stdint cimport uintptr_t +from cpython cimport PyUnicode_AsWideCharString, PyMem_Free + +from .utils import NotSupportedError cdef extern from "windows.h" nogil: ctypedef void* HMODULE + ctypedef void* HANDLE ctypedef void* FARPROC + ctypedef unsigned long DWORD + ctypedef const wchar_t *LPCWSTR ctypedef const char *LPCSTR + cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + + HMODULE _LoadLibraryExW "LoadLibraryExW"( + LPCWSTR lpLibFileName, + HANDLE hFile, + DWORD dwFlags + ) + FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) +cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): + cdef uintptr_t result + cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) + with nogil: + result = _LoadLibraryExW( + wpath, + hFile, + dwFlags + ) + PyMem_Free(wpath) + return result + cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: return _GetProcAddress(hModule, lpProcName) +cdef int get_cuda_version(): + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) + if handle == 0: + raise NotSupportedError('CUDA driver is not found') + cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') + if cuDriverGetVersion == NULL: + raise RuntimeError('something went wrong') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError('something went wrong') + + return driver_ver + + ############################################################################### # Wrapper init