diff --git a/cuda_bindings/cuda/bindings/_internal/cufile.pxd b/cuda_bindings/cuda/bindings/_internal/cufile.pxd index 97b1b387f..585fefe3f 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile.pxd +++ b/cuda_bindings/cuda/bindings/_internal/cufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from ..cycufile cimport * @@ -38,11 +38,10 @@ cdef CUfileError_t _cuFileGetVersion(int* version) except?CUFILE_ cdef CUfileError_t _cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx index e333c5081..f53769575 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from libc.stdint cimport intptr_t, uintptr_t import threading @@ -85,11 +85,10 @@ cdef void* __cuFileGetVersion = NULL cdef void* __cuFileGetParameterSizeT = NULL cdef void* __cuFileGetParameterBool = NULL cdef void* __cuFileGetParameterString = NULL +cdef void* __cuFileGetParameterMinMaxValue = NULL cdef void* __cuFileSetParameterSizeT = NULL cdef void* __cuFileSetParameterBool = NULL cdef void* __cuFileSetParameterString = NULL -cdef void* __cuFileDriverClose = NULL -cdef void* __cuFileGetParameterMinMaxValue = NULL cdef void* __cuFileSetStatsLevel = NULL cdef void* __cuFileGetStatsLevel = NULL cdef void* __cuFileStatsStart = NULL @@ -103,7 +102,7 @@ cdef void* __cuFileSetParameterPosixPoolSlabArray = NULL cdef void* __cuFileGetParameterPosixPoolSlabArray = NULL -cdef void* load_library() except* with gil: +cdef void* load_library(const int driver_ver) except* with gil: cdef uintptr_t handle = load_nvidia_dynamic_lib("cufile")._handle_uint return handle @@ -114,306 +113,301 @@ cdef int __check_or_init_cufile() except -1 nogil: cdef void* handle = NULL with gil, __symbol_lock: + driver_ver = get_cuda_version() + # Load function global __cuFileHandleRegister __cuFileHandleRegister = dlsym(RTLD_DEFAULT, 'cuFileHandleRegister') if __cuFileHandleRegister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileHandleRegister = dlsym(handle, 'cuFileHandleRegister') global __cuFileHandleDeregister __cuFileHandleDeregister = dlsym(RTLD_DEFAULT, 'cuFileHandleDeregister') if __cuFileHandleDeregister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileHandleDeregister = dlsym(handle, 'cuFileHandleDeregister') global __cuFileBufRegister __cuFileBufRegister = dlsym(RTLD_DEFAULT, 'cuFileBufRegister') if __cuFileBufRegister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBufRegister = dlsym(handle, 'cuFileBufRegister') global __cuFileBufDeregister __cuFileBufDeregister = dlsym(RTLD_DEFAULT, 'cuFileBufDeregister') if __cuFileBufDeregister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBufDeregister = dlsym(handle, 'cuFileBufDeregister') global __cuFileRead __cuFileRead = dlsym(RTLD_DEFAULT, 'cuFileRead') if __cuFileRead == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileRead = dlsym(handle, 'cuFileRead') global __cuFileWrite __cuFileWrite = dlsym(RTLD_DEFAULT, 'cuFileWrite') if __cuFileWrite == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileWrite = dlsym(handle, 'cuFileWrite') global __cuFileDriverOpen __cuFileDriverOpen = dlsym(RTLD_DEFAULT, 'cuFileDriverOpen') if __cuFileDriverOpen == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverOpen = dlsym(handle, 'cuFileDriverOpen') global __cuFileDriverClose_v2 __cuFileDriverClose_v2 = dlsym(RTLD_DEFAULT, 'cuFileDriverClose_v2') if __cuFileDriverClose_v2 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverClose_v2 = dlsym(handle, 'cuFileDriverClose_v2') global __cuFileUseCount __cuFileUseCount = dlsym(RTLD_DEFAULT, 'cuFileUseCount') if __cuFileUseCount == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileUseCount = dlsym(handle, 'cuFileUseCount') global __cuFileDriverGetProperties __cuFileDriverGetProperties = dlsym(RTLD_DEFAULT, 'cuFileDriverGetProperties') if __cuFileDriverGetProperties == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverGetProperties = dlsym(handle, 'cuFileDriverGetProperties') global __cuFileDriverSetPollMode __cuFileDriverSetPollMode = dlsym(RTLD_DEFAULT, 'cuFileDriverSetPollMode') if __cuFileDriverSetPollMode == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetPollMode = dlsym(handle, 'cuFileDriverSetPollMode') global __cuFileDriverSetMaxDirectIOSize __cuFileDriverSetMaxDirectIOSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxDirectIOSize') if __cuFileDriverSetMaxDirectIOSize == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetMaxDirectIOSize = dlsym(handle, 'cuFileDriverSetMaxDirectIOSize') global __cuFileDriverSetMaxCacheSize __cuFileDriverSetMaxCacheSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxCacheSize') if __cuFileDriverSetMaxCacheSize == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetMaxCacheSize = dlsym(handle, 'cuFileDriverSetMaxCacheSize') global __cuFileDriverSetMaxPinnedMemSize __cuFileDriverSetMaxPinnedMemSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxPinnedMemSize') if __cuFileDriverSetMaxPinnedMemSize == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetMaxPinnedMemSize = dlsym(handle, 'cuFileDriverSetMaxPinnedMemSize') global __cuFileBatchIOSetUp __cuFileBatchIOSetUp = dlsym(RTLD_DEFAULT, 'cuFileBatchIOSetUp') if __cuFileBatchIOSetUp == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOSetUp = dlsym(handle, 'cuFileBatchIOSetUp') global __cuFileBatchIOSubmit __cuFileBatchIOSubmit = dlsym(RTLD_DEFAULT, 'cuFileBatchIOSubmit') if __cuFileBatchIOSubmit == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOSubmit = dlsym(handle, 'cuFileBatchIOSubmit') global __cuFileBatchIOGetStatus __cuFileBatchIOGetStatus = dlsym(RTLD_DEFAULT, 'cuFileBatchIOGetStatus') if __cuFileBatchIOGetStatus == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOGetStatus = dlsym(handle, 'cuFileBatchIOGetStatus') global __cuFileBatchIOCancel __cuFileBatchIOCancel = dlsym(RTLD_DEFAULT, 'cuFileBatchIOCancel') if __cuFileBatchIOCancel == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOCancel = dlsym(handle, 'cuFileBatchIOCancel') global __cuFileBatchIODestroy __cuFileBatchIODestroy = dlsym(RTLD_DEFAULT, 'cuFileBatchIODestroy') if __cuFileBatchIODestroy == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIODestroy = dlsym(handle, 'cuFileBatchIODestroy') global __cuFileReadAsync __cuFileReadAsync = dlsym(RTLD_DEFAULT, 'cuFileReadAsync') if __cuFileReadAsync == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileReadAsync = dlsym(handle, 'cuFileReadAsync') global __cuFileWriteAsync __cuFileWriteAsync = dlsym(RTLD_DEFAULT, 'cuFileWriteAsync') if __cuFileWriteAsync == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileWriteAsync = dlsym(handle, 'cuFileWriteAsync') global __cuFileStreamRegister __cuFileStreamRegister = dlsym(RTLD_DEFAULT, 'cuFileStreamRegister') if __cuFileStreamRegister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStreamRegister = dlsym(handle, 'cuFileStreamRegister') global __cuFileStreamDeregister __cuFileStreamDeregister = dlsym(RTLD_DEFAULT, 'cuFileStreamDeregister') if __cuFileStreamDeregister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStreamDeregister = dlsym(handle, 'cuFileStreamDeregister') global __cuFileGetVersion __cuFileGetVersion = dlsym(RTLD_DEFAULT, 'cuFileGetVersion') if __cuFileGetVersion == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetVersion = dlsym(handle, 'cuFileGetVersion') global __cuFileGetParameterSizeT __cuFileGetParameterSizeT = dlsym(RTLD_DEFAULT, 'cuFileGetParameterSizeT') if __cuFileGetParameterSizeT == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterSizeT = dlsym(handle, 'cuFileGetParameterSizeT') global __cuFileGetParameterBool __cuFileGetParameterBool = dlsym(RTLD_DEFAULT, 'cuFileGetParameterBool') if __cuFileGetParameterBool == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterBool = dlsym(handle, 'cuFileGetParameterBool') global __cuFileGetParameterString __cuFileGetParameterString = dlsym(RTLD_DEFAULT, 'cuFileGetParameterString') if __cuFileGetParameterString == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterString = dlsym(handle, 'cuFileGetParameterString') + global __cuFileGetParameterMinMaxValue + __cuFileGetParameterMinMaxValue = dlsym(RTLD_DEFAULT, 'cuFileGetParameterMinMaxValue') + if __cuFileGetParameterMinMaxValue == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __cuFileGetParameterMinMaxValue = dlsym(handle, 'cuFileGetParameterMinMaxValue') + global __cuFileSetParameterSizeT __cuFileSetParameterSizeT = dlsym(RTLD_DEFAULT, 'cuFileSetParameterSizeT') if __cuFileSetParameterSizeT == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterSizeT = dlsym(handle, 'cuFileSetParameterSizeT') global __cuFileSetParameterBool __cuFileSetParameterBool = dlsym(RTLD_DEFAULT, 'cuFileSetParameterBool') if __cuFileSetParameterBool == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterBool = dlsym(handle, 'cuFileSetParameterBool') global __cuFileSetParameterString __cuFileSetParameterString = dlsym(RTLD_DEFAULT, 'cuFileSetParameterString') if __cuFileSetParameterString == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterString = dlsym(handle, 'cuFileSetParameterString') - global __cuFileDriverClose - __cuFileDriverClose = dlsym(RTLD_DEFAULT, 'cuFileDriverClose') - if __cuFileDriverClose == NULL: - if handle == NULL: - handle = load_library() - __cuFileDriverClose = dlsym(handle, 'cuFileDriverClose') - - global __cuFileGetParameterMinMaxValue - __cuFileGetParameterMinMaxValue = dlsym(RTLD_DEFAULT, 'cuFileGetParameterMinMaxValue') - if __cuFileGetParameterMinMaxValue == NULL: - if handle == NULL: - handle = load_library() - __cuFileGetParameterMinMaxValue = dlsym(handle, 'cuFileGetParameterMinMaxValue') - global __cuFileSetStatsLevel __cuFileSetStatsLevel = dlsym(RTLD_DEFAULT, 'cuFileSetStatsLevel') if __cuFileSetStatsLevel == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetStatsLevel = dlsym(handle, 'cuFileSetStatsLevel') global __cuFileGetStatsLevel __cuFileGetStatsLevel = dlsym(RTLD_DEFAULT, 'cuFileGetStatsLevel') if __cuFileGetStatsLevel == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsLevel = dlsym(handle, 'cuFileGetStatsLevel') global __cuFileStatsStart __cuFileStatsStart = dlsym(RTLD_DEFAULT, 'cuFileStatsStart') if __cuFileStatsStart == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStatsStart = dlsym(handle, 'cuFileStatsStart') global __cuFileStatsStop __cuFileStatsStop = dlsym(RTLD_DEFAULT, 'cuFileStatsStop') if __cuFileStatsStop == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStatsStop = dlsym(handle, 'cuFileStatsStop') global __cuFileStatsReset __cuFileStatsReset = dlsym(RTLD_DEFAULT, 'cuFileStatsReset') if __cuFileStatsReset == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStatsReset = dlsym(handle, 'cuFileStatsReset') global __cuFileGetStatsL1 __cuFileGetStatsL1 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL1') if __cuFileGetStatsL1 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsL1 = dlsym(handle, 'cuFileGetStatsL1') global __cuFileGetStatsL2 __cuFileGetStatsL2 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL2') if __cuFileGetStatsL2 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsL2 = dlsym(handle, 'cuFileGetStatsL2') global __cuFileGetStatsL3 __cuFileGetStatsL3 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL3') if __cuFileGetStatsL3 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsL3 = dlsym(handle, 'cuFileGetStatsL3') global __cuFileGetBARSizeInKB __cuFileGetBARSizeInKB = dlsym(RTLD_DEFAULT, 'cuFileGetBARSizeInKB') if __cuFileGetBARSizeInKB == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetBARSizeInKB = dlsym(handle, 'cuFileGetBARSizeInKB') global __cuFileSetParameterPosixPoolSlabArray __cuFileSetParameterPosixPoolSlabArray = dlsym(RTLD_DEFAULT, 'cuFileSetParameterPosixPoolSlabArray') if __cuFileSetParameterPosixPoolSlabArray == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterPosixPoolSlabArray = dlsym(handle, 'cuFileSetParameterPosixPoolSlabArray') global __cuFileGetParameterPosixPoolSlabArray __cuFileGetParameterPosixPoolSlabArray = dlsym(RTLD_DEFAULT, 'cuFileGetParameterPosixPoolSlabArray') if __cuFileGetParameterPosixPoolSlabArray == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterPosixPoolSlabArray = dlsym(handle, 'cuFileGetParameterPosixPoolSlabArray') __py_cufile_init = True @@ -519,6 +513,9 @@ cpdef dict _inspect_function_pointers(): global __cuFileGetParameterString data["__cuFileGetParameterString"] = __cuFileGetParameterString + global __cuFileGetParameterMinMaxValue + data["__cuFileGetParameterMinMaxValue"] = __cuFileGetParameterMinMaxValue + global __cuFileSetParameterSizeT data["__cuFileSetParameterSizeT"] = __cuFileSetParameterSizeT @@ -528,12 +525,6 @@ cpdef dict _inspect_function_pointers(): global __cuFileSetParameterString data["__cuFileSetParameterString"] = __cuFileSetParameterString - global __cuFileDriverClose - data["__cuFileDriverClose"] = __cuFileDriverClose - - global __cuFileGetParameterMinMaxValue - data["__cuFileGetParameterMinMaxValue"] = __cuFileGetParameterMinMaxValue - global __cuFileSetStatsLevel data["__cuFileSetStatsLevel"] = __cuFileSetStatsLevel @@ -854,6 +845,16 @@ cdef CUfileError_t _cuFileGetParameterString(CUFileStringConfigParameter_t param param, desc_str, len) +cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: + global __cuFileGetParameterMinMaxValue + _check_or_init_cufile() + if __cuFileGetParameterMinMaxValue == NULL: + with gil: + raise FunctionNotFoundError("function cuFileGetParameterMinMaxValue is not found") + return (__cuFileGetParameterMinMaxValue)( + param, min_value, max_value) + + cdef CUfileError_t _cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil: global __cuFileSetParameterSizeT _check_or_init_cufile() @@ -884,26 +885,6 @@ cdef CUfileError_t _cuFileSetParameterString(CUFileStringConfigParameter_t param param, desc_str) -cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: - global __cuFileDriverClose - _check_or_init_cufile() - if __cuFileDriverClose == NULL: - with gil: - raise FunctionNotFoundError("function cuFileDriverClose is not found") - return (__cuFileDriverClose)( - ) - - -cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: - global __cuFileGetParameterMinMaxValue - _check_or_init_cufile() - if __cuFileGetParameterMinMaxValue == NULL: - with gil: - raise FunctionNotFoundError("function cuFileGetParameterMinMaxValue is not found") - return (__cuFileGetParameterMinMaxValue)( - param, min_value, max_value) - - cdef CUfileError_t _cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil: global __cuFileSetStatsLevel _check_or_init_cufile() diff --git a/cuda_bindings/cuda/bindings/_internal/cycufile.pxd b/cuda_bindings/cuda/bindings/_internal/cycufile.pxd new file mode 100644 index 000000000..11cf737f0 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/cycufile.pxd @@ -0,0 +1,370 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport uint32_t, uint64_t +from libc.time cimport time_t +from libcpp cimport bool as cpp_bool +from posix.types cimport off_t + +cimport cuda.bindings.cydriver +from cuda.bindings.cydriver cimport CUresult + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# TODO: switch to "from libc.time cimport timespec" once we can use recent +# Cython to build +cdef extern from "": + cdef struct timespec: + time_t tv_sec + long tv_nsec +cdef extern from "": + cdef struct sockaddr: + unsigned short sa_family + char sa_data[14] + ctypedef sockaddr sockaddr_t + + +cdef extern from '': + # enums + ctypedef enum CUfileOpError: + CU_FILE_SUCCESS + CU_FILE_DRIVER_NOT_INITIALIZED + CU_FILE_DRIVER_INVALID_PROPS + CU_FILE_DRIVER_UNSUPPORTED_LIMIT + CU_FILE_DRIVER_VERSION_MISMATCH + CU_FILE_DRIVER_VERSION_READ_ERROR + CU_FILE_DRIVER_CLOSING + CU_FILE_PLATFORM_NOT_SUPPORTED + CU_FILE_IO_NOT_SUPPORTED + CU_FILE_DEVICE_NOT_SUPPORTED + CU_FILE_NVFS_DRIVER_ERROR + CU_FILE_CUDA_DRIVER_ERROR + CU_FILE_CUDA_POINTER_INVALID + CU_FILE_CUDA_MEMORY_TYPE_INVALID + CU_FILE_CUDA_POINTER_RANGE_ERROR + CU_FILE_CUDA_CONTEXT_MISMATCH + CU_FILE_INVALID_MAPPING_SIZE + CU_FILE_INVALID_MAPPING_RANGE + CU_FILE_INVALID_FILE_TYPE + CU_FILE_INVALID_FILE_OPEN_FLAG + CU_FILE_DIO_NOT_SET + CU_FILE_INVALID_VALUE + CU_FILE_MEMORY_ALREADY_REGISTERED + CU_FILE_MEMORY_NOT_REGISTERED + CU_FILE_PERMISSION_DENIED + CU_FILE_DRIVER_ALREADY_OPEN + CU_FILE_HANDLE_NOT_REGISTERED + CU_FILE_HANDLE_ALREADY_REGISTERED + CU_FILE_DEVICE_NOT_FOUND + CU_FILE_INTERNAL_ERROR + CU_FILE_GETNEWFD_FAILED + CU_FILE_NVFS_SETUP_ERROR + CU_FILE_IO_DISABLED + CU_FILE_BATCH_SUBMIT_FAILED + CU_FILE_GPU_MEMORY_PINNING_FAILED + CU_FILE_BATCH_FULL + CU_FILE_ASYNC_NOT_SUPPORTED + CU_FILE_INTERNAL_BATCH_SETUP_ERROR + CU_FILE_INTERNAL_BATCH_SUBMIT_ERROR + CU_FILE_INTERNAL_BATCH_GETSTATUS_ERROR + CU_FILE_INTERNAL_BATCH_CANCEL_ERROR + CU_FILE_NOMEM_ERROR + CU_FILE_IO_ERROR + CU_FILE_INTERNAL_BUF_REGISTER_ERROR + CU_FILE_HASH_OPR_ERROR + CU_FILE_INVALID_CONTEXT_ERROR + CU_FILE_NVFS_INTERNAL_DRIVER_ERROR + CU_FILE_BATCH_NOCOMPAT_ERROR + CU_FILE_IO_MAX_ERROR + + ctypedef enum CUfileDriverStatusFlags_t: + CU_FILE_LUSTRE_SUPPORTED + CU_FILE_WEKAFS_SUPPORTED + CU_FILE_NFS_SUPPORTED + CU_FILE_GPFS_SUPPORTED + CU_FILE_NVME_SUPPORTED + CU_FILE_NVMEOF_SUPPORTED + CU_FILE_SCSI_SUPPORTED + CU_FILE_SCALEFLUX_CSD_SUPPORTED + CU_FILE_NVMESH_SUPPORTED + CU_FILE_BEEGFS_SUPPORTED + CU_FILE_NVME_P2P_SUPPORTED + CU_FILE_SCATEFS_SUPPORTED + + ctypedef enum CUfileDriverControlFlags_t: + CU_FILE_USE_POLL_MODE + CU_FILE_ALLOW_COMPAT_MODE + + ctypedef enum CUfileFeatureFlags_t: + CU_FILE_DYN_ROUTING_SUPPORTED + CU_FILE_BATCH_IO_SUPPORTED + CU_FILE_STREAMS_SUPPORTED + CU_FILE_PARALLEL_IO_SUPPORTED + + ctypedef enum CUfileFileHandleType: + CU_FILE_HANDLE_TYPE_OPAQUE_FD + CU_FILE_HANDLE_TYPE_OPAQUE_WIN32 + CU_FILE_HANDLE_TYPE_USERSPACE_FS + + ctypedef enum CUfileOpcode_t: + CUFILE_READ + CUFILE_WRITE + + ctypedef enum CUfileStatus_t: + CUFILE_WAITING + CUFILE_PENDING + CUFILE_INVALID + CUFILE_CANCELED + CUFILE_COMPLETE + CUFILE_TIMEOUT + CUFILE_FAILED + + ctypedef enum CUfileBatchMode_t: + CUFILE_BATCH + + ctypedef enum CUFileSizeTConfigParameter_t: + CUFILE_PARAM_PROFILE_STATS + CUFILE_PARAM_EXECUTION_MAX_IO_QUEUE_DEPTH + CUFILE_PARAM_EXECUTION_MAX_IO_THREADS + CUFILE_PARAM_EXECUTION_MIN_IO_THRESHOLD_SIZE_KB + CUFILE_PARAM_EXECUTION_MAX_REQUEST_PARALLELISM + CUFILE_PARAM_PROPERTIES_MAX_DIRECT_IO_SIZE_KB + CUFILE_PARAM_PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB + CUFILE_PARAM_PROPERTIES_PER_BUFFER_CACHE_SIZE_KB + CUFILE_PARAM_PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB + CUFILE_PARAM_PROPERTIES_IO_BATCHSIZE + CUFILE_PARAM_POLLTHRESHOLD_SIZE_KB + CUFILE_PARAM_PROPERTIES_BATCH_IO_TIMEOUT_MS + + ctypedef enum CUFileBoolConfigParameter_t: + CUFILE_PARAM_PROPERTIES_USE_POLL_MODE + CUFILE_PARAM_PROPERTIES_ALLOW_COMPAT_MODE + CUFILE_PARAM_FORCE_COMPAT_MODE + CUFILE_PARAM_FS_MISC_API_CHECK_AGGRESSIVE + CUFILE_PARAM_EXECUTION_PARALLEL_IO + CUFILE_PARAM_PROFILE_NVTX + CUFILE_PARAM_PROPERTIES_ALLOW_SYSTEM_MEMORY + CUFILE_PARAM_USE_PCIP2PDMA + CUFILE_PARAM_PREFER_IO_URING + CUFILE_PARAM_FORCE_ODIRECT_MODE + CUFILE_PARAM_SKIP_TOPOLOGY_DETECTION + CUFILE_PARAM_STREAM_MEMOPS_BYPASS + + ctypedef enum CUFileStringConfigParameter_t: + CUFILE_PARAM_LOGGING_LEVEL + CUFILE_PARAM_ENV_LOGFILE_PATH + CUFILE_PARAM_LOG_DIR + + ctypedef enum CUFileArrayConfigParameter_t: + CUFILE_PARAM_POSIX_POOL_SLAB_SIZE_KB + CUFILE_PARAM_POSIX_POOL_SLAB_COUNT + + # types + ctypedef void* CUfileHandle_t 'CUfileHandle_t' + ctypedef void* CUfileBatchHandle_t 'CUfileBatchHandle_t' + ctypedef struct CUfileError_t 'CUfileError_t': + CUfileOpError err + CUresult cu_err + cdef struct _anon_pod0 '_anon_pod0': + unsigned int major_version + unsigned int minor_version + size_t poll_thresh_size + size_t max_direct_io_size + unsigned int dstatusflags + unsigned int dcontrolflags + ctypedef struct cufileRDMAInfo_t 'cufileRDMAInfo_t': + int version + int desc_len + char* desc_str + ctypedef struct CUfileFSOps_t 'CUfileFSOps_t': + char* (*fs_type)(void*) + int (*getRDMADeviceList)(void*, sockaddr_t**) + int (*getRDMADevicePriority)(void*, char*, size_t, loff_t, sockaddr_t*) + ssize_t (*read)(void*, char*, size_t, loff_t, cufileRDMAInfo_t*) + ssize_t (*write)(void*, const char*, size_t, loff_t, cufileRDMAInfo_t*) + cdef union _anon_pod1 '_anon_pod1': + int fd + void* handle + cdef struct _anon_pod3 '_anon_pod3': + void* devPtr_base + off_t file_offset + off_t devPtr_offset + size_t size + ctypedef struct CUfileIOEvents_t 'CUfileIOEvents_t': + void* cookie + CUfileStatus_t status + size_t ret + ctypedef struct CUfileOpCounter_t 'CUfileOpCounter_t': + uint64_t ok + uint64_t err + ctypedef struct CUfilePerGpuStats_t 'CUfilePerGpuStats_t': + char uuid[16] + uint64_t read_bytes + uint64_t read_bw_bytes_per_sec + uint64_t read_utilization + uint64_t read_duration_us + uint64_t n_total_reads + uint64_t n_p2p_reads + uint64_t n_nvfs_reads + uint64_t n_posix_reads + uint64_t n_unaligned_reads + uint64_t n_dr_reads + uint64_t n_sparse_regions + uint64_t n_inline_regions + uint64_t n_reads_err + uint64_t writes_bytes + uint64_t write_bw_bytes_per_sec + uint64_t write_utilization + uint64_t write_duration_us + uint64_t n_total_writes + uint64_t n_p2p_writes + uint64_t n_nvfs_writes + uint64_t n_posix_writes + uint64_t n_unaligned_writes + uint64_t n_dr_writes + uint64_t n_writes_err + uint64_t n_mmap + uint64_t n_mmap_ok + uint64_t n_mmap_err + uint64_t n_mmap_free + uint64_t reg_bytes + ctypedef struct CUfileDrvProps_t 'CUfileDrvProps_t': + _anon_pod0 nvfs + unsigned int fflags + unsigned int max_device_cache_size + unsigned int per_buffer_cache_size + unsigned int max_device_pinned_mem_size + unsigned int max_batch_io_size + unsigned int max_batch_io_timeout_msecs + ctypedef struct CUfileDescr_t 'CUfileDescr_t': + CUfileFileHandleType type + _anon_pod1 handle + CUfileFSOps_t* fs_ops + cdef union _anon_pod2 '_anon_pod2': + _anon_pod3 batch + ctypedef struct CUfileStatsLevel1_t 'CUfileStatsLevel1_t': + CUfileOpCounter_t read_ops + CUfileOpCounter_t write_ops + CUfileOpCounter_t hdl_register_ops + CUfileOpCounter_t hdl_deregister_ops + CUfileOpCounter_t buf_register_ops + CUfileOpCounter_t buf_deregister_ops + uint64_t read_bytes + uint64_t write_bytes + uint64_t read_bw_bytes_per_sec + uint64_t write_bw_bytes_per_sec + uint64_t read_lat_avg_us + uint64_t write_lat_avg_us + uint64_t read_ops_per_sec + uint64_t write_ops_per_sec + uint64_t read_lat_sum_us + uint64_t write_lat_sum_us + CUfileOpCounter_t batch_submit_ops + CUfileOpCounter_t batch_complete_ops + CUfileOpCounter_t batch_setup_ops + CUfileOpCounter_t batch_cancel_ops + CUfileOpCounter_t batch_destroy_ops + CUfileOpCounter_t batch_enqueued_ops + CUfileOpCounter_t batch_posix_enqueued_ops + CUfileOpCounter_t batch_processed_ops + CUfileOpCounter_t batch_posix_processed_ops + CUfileOpCounter_t batch_nvfs_submit_ops + CUfileOpCounter_t batch_p2p_submit_ops + CUfileOpCounter_t batch_aio_submit_ops + CUfileOpCounter_t batch_iouring_submit_ops + CUfileOpCounter_t batch_mixed_io_submit_ops + CUfileOpCounter_t batch_total_submit_ops + uint64_t batch_read_bytes + uint64_t batch_write_bytes + uint64_t batch_read_bw_bytes + uint64_t batch_write_bw_bytes + uint64_t batch_submit_lat_avg_us + uint64_t batch_completion_lat_avg_us + uint64_t batch_submit_ops_per_sec + uint64_t batch_complete_ops_per_sec + uint64_t batch_submit_lat_sum_us + uint64_t batch_completion_lat_sum_us + uint64_t last_batch_read_bytes + uint64_t last_batch_write_bytes + ctypedef struct CUfileIOParams_t 'CUfileIOParams_t': + CUfileBatchMode_t mode + _anon_pod2 u + CUfileHandle_t fh + CUfileOpcode_t opcode + void* cookie + ctypedef struct CUfileStatsLevel2_t 'CUfileStatsLevel2_t': + CUfileStatsLevel1_t basic + uint64_t read_size_kb_hist[32] + uint64_t write_size_kb_hist[32] + ctypedef struct CUfileStatsLevel3_t 'CUfileStatsLevel3_t': + CUfileStatsLevel2_t detailed + uint32_t num_gpus + CUfilePerGpuStats_t per_gpu_stats[16] + + +cdef extern from *: + """ + // This is the missing piece we need to supply to help Cython & C++ compilers. + inline bool operator==(const CUfileError_t& lhs, const CUfileError_t& rhs) { + return (lhs.err == rhs.err) && (lhs.cu_err == rhs.cu_err); + } + static CUfileError_t CUFILE_LOADING_ERROR{(CUfileOpError)-1, (CUresult)-1}; + """ + const CUfileError_t CUFILE_LOADING_ERROR + ctypedef void* CUstream "CUstream" + + const char* cufileop_status_error(CUfileOpError) + + +############################################################################### +# Functions +############################################################################### + +cdef CUfileError_t cuFileHandleRegister(CUfileHandle_t* fh, CUfileDescr_t* descr) except?CUFILE_LOADING_ERROR nogil +cdef void cuFileHandleDeregister(CUfileHandle_t fh) except* nogil +cdef CUfileError_t cuFileBufRegister(const void* bufPtr_base, size_t length, int flags) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBufDeregister(const void* bufPtr_base) except?CUFILE_LOADING_ERROR nogil +cdef ssize_t cuFileRead(CUfileHandle_t fh, void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil +cdef ssize_t cuFileWrite(CUfileHandle_t fh, const void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil +cdef CUfileError_t cuFileDriverOpen() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil +cdef long cuFileUseCount() except* nogil +cdef CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t* props) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetPollMode(cpp_bool poll, size_t poll_threshold_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t* batch_idp, unsigned nr) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t* iocbp, unsigned int flags) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr, CUfileIOEvents_t* iocbp, timespec* timeout) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp) except?CUFILE_LOADING_ERROR nogil +cdef void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp) except* nogil +cdef CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_read_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_written_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStreamDeregister(CUstream stream) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetVersion(int* version) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStatsStop() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStatsReset() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsL1(CUfileStatsLevel1_t* stats) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsL2(CUfileStatsLevel2_t* stats) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsL3(CUfileStatsLevel3_t* stats) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetBARSizeInKB(int gpuIndex, size_t* barSize) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterPosixPoolSlabArray(const size_t* size_values, const size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterPosixPoolSlabArray(size_t* size_values, size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/_internal/cycufile.pyx b/cuda_bindings/cuda/bindings/_internal/cycufile.pyx new file mode 100644 index 000000000..e23177137 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/cycufile.pyx @@ -0,0 +1,182 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ._internal cimport cufile as _cufile + +import cython + +############################################################################### +# Wrapper functions +############################################################################### + +cdef CUfileError_t cuFileHandleRegister(CUfileHandle_t* fh, CUfileDescr_t* descr) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileHandleRegister(fh, descr) + + +@cython.show_performance_hints(False) +cdef void cuFileHandleDeregister(CUfileHandle_t fh) except* nogil: + _cufile._cuFileHandleDeregister(fh) + + +cdef CUfileError_t cuFileBufRegister(const void* bufPtr_base, size_t length, int flags) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBufRegister(bufPtr_base, length, flags) + + +cdef CUfileError_t cuFileBufDeregister(const void* bufPtr_base) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBufDeregister(bufPtr_base) + + +cdef ssize_t cuFileRead(CUfileHandle_t fh, void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil: + return _cufile._cuFileRead(fh, bufPtr_base, size, file_offset, bufPtr_offset) + + +cdef ssize_t cuFileWrite(CUfileHandle_t fh, const void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil: + return _cufile._cuFileWrite(fh, bufPtr_base, size, file_offset, bufPtr_offset) + + +cdef CUfileError_t cuFileDriverOpen() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverOpen() + + +cdef CUfileError_t cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverClose_v2() + + +cdef long cuFileUseCount() except* nogil: + return _cufile._cuFileUseCount() + + +cdef CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t* props) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverGetProperties(props) + + +cdef CUfileError_t cuFileDriverSetPollMode(cpp_bool poll, size_t poll_threshold_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetPollMode(poll, poll_threshold_size) + + +cdef CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetMaxDirectIOSize(max_direct_io_size) + + +cdef CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetMaxCacheSize(max_cache_size) + + +cdef CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetMaxPinnedMemSize(max_pinned_size) + + +cdef CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t* batch_idp, unsigned nr) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOSetUp(batch_idp, nr) + + +cdef CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t* iocbp, unsigned int flags) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOSubmit(batch_idp, nr, iocbp, flags) + + +cdef CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr, CUfileIOEvents_t* iocbp, timespec* timeout) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOGetStatus(batch_idp, min_nr, nr, iocbp, timeout) + + +cdef CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOCancel(batch_idp) + + +@cython.show_performance_hints(False) +cdef void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp) except* nogil: + _cufile._cuFileBatchIODestroy(batch_idp) + + +cdef CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_read_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileReadAsync(fh, bufPtr_base, size_p, file_offset_p, bufPtr_offset_p, bytes_read_p, stream) + + +cdef CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_written_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileWriteAsync(fh, bufPtr_base, size_p, file_offset_p, bufPtr_offset_p, bytes_written_p, stream) + + +cdef CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStreamRegister(stream, flags) + + +cdef CUfileError_t cuFileStreamDeregister(CUstream stream) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStreamDeregister(stream) + + +cdef CUfileError_t cuFileGetVersion(int* version) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetVersion(version) + + +cdef CUfileError_t cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterSizeT(param, value) + + +cdef CUfileError_t cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterBool(param, value) + + +cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterString(param, desc_str, len) + + +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterMinMaxValue(param, min_value, max_value) + + +cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterSizeT(param, value) + + +cdef CUfileError_t cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterBool(param, value) + + +cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterString(param, desc_str) + + +cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetStatsLevel(level) + + +cdef CUfileError_t cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsLevel(level) + + +cdef CUfileError_t cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStatsStart() + + +cdef CUfileError_t cuFileStatsStop() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStatsStop() + + +cdef CUfileError_t cuFileStatsReset() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStatsReset() + + +cdef CUfileError_t cuFileGetStatsL1(CUfileStatsLevel1_t* stats) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsL1(stats) + + +cdef CUfileError_t cuFileGetStatsL2(CUfileStatsLevel2_t* stats) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsL2(stats) + + +cdef CUfileError_t cuFileGetStatsL3(CUfileStatsLevel3_t* stats) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsL3(stats) + + +cdef CUfileError_t cuFileGetBARSizeInKB(int gpuIndex, size_t* barSize) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetBARSizeInKB(gpuIndex, barSize) + + +cdef CUfileError_t cuFileSetParameterPosixPoolSlabArray(const size_t* size_values, const size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterPosixPoolSlabArray(size_values, count_values, len) + + +cdef CUfileError_t cuFileGetParameterPosixPoolSlabArray(size_t* size_values, size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterPosixPoolSlabArray(size_values, count_values, len) diff --git a/cuda_bindings/cuda/bindings/cufile.pxd b/cuda_bindings/cuda/bindings/cufile.pxd index a343caa21..67f34e4b1 100644 --- a/cuda_bindings/cuda/bindings/cufile.pxd +++ b/cuda_bindings/cuda/bindings/cufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from libc.stdint cimport intptr_t @@ -74,6 +74,18 @@ cpdef int get_version() except? 0 cpdef size_t get_parameter_size_t(int param) except? 0 cpdef bint get_parameter_bool(int param) except? 0 cpdef str get_parameter_string(int param, int len) +cpdef get_parameter_min_max_value(int param, intptr_t min_value, intptr_t max_value) cpdef set_parameter_size_t(int param, size_t value) cpdef set_parameter_bool(int param, bint value) cpdef set_parameter_string(int param, intptr_t desc_str) +cpdef set_stats_level(int level) +cpdef int get_stats_level() except? 0 +cpdef stats_start() +cpdef stats_stop() +cpdef stats_reset() +cpdef get_stats_l1(intptr_t stats) +cpdef get_stats_l2(intptr_t stats) +cpdef get_stats_l3(intptr_t stats) +cpdef size_t get_bar_size_in_kb(int gpu_ind_ex) except? 0 +cpdef set_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len) +cpdef get_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 66b3aca2d..9fc5e787a 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. cimport cython # NOQA from libc cimport errno @@ -1124,7 +1124,7 @@ cpdef driver_get_properties(intptr_t props): """Gets the Driver session properties. Args: - props (intptr_t): Properties to set. + props (intptr_t): to set. .. seealso:: `cuFileDriverGetProperties` """ @@ -1273,6 +1273,21 @@ cpdef str get_parameter_string(int param, int len): return _desc_str_.decode() +cpdef get_parameter_min_max_value(int param, intptr_t min_value, intptr_t max_value): + """Get both the minimum and maximum settable values for a given size_t parameter in a single call. + + Args: + param (SizeTConfigParameter): CUfile SizeT configuration parameter. + min_value (intptr_t): Pointer to store the minimum value. + max_value (intptr_t): Pointer to store the maximum value. + + .. seealso:: `cuFileGetParameterMinMaxValue` + """ + with nogil: + status = cuFileGetParameterMinMaxValue(<_SizeTConfigParameter>param, min_value, max_value) + check_status(status) + + cpdef set_parameter_size_t(int param, size_t value): with nogil: status = cuFileSetParameterSizeT(<_SizeTConfigParameter>param, value) @@ -1291,6 +1306,141 @@ cpdef set_parameter_string(int param, intptr_t desc_str): check_status(status) +cpdef set_stats_level(int level): + """Set the level of statistics collection for cuFile operations. This will override the cufile.json settings for stats. + + Args: + level (int): Statistics level (0 = disabled, 1 = basic, 2 = detailed, 3 = verbose). + + .. seealso:: `cuFileSetStatsLevel` + """ + with nogil: + status = cuFileSetStatsLevel(level) + check_status(status) + + +cpdef int get_stats_level() except? 0: + """Get the current level of statistics collection for cuFile operations. + + Returns: + int: Pointer to store the current statistics level. + + .. seealso:: `cuFileGetStatsLevel` + """ + cdef int level + with nogil: + status = cuFileGetStatsLevel(&level) + check_status(status) + return level + + +cpdef stats_start(): + """Start collecting cuFile statistics. + + .. seealso:: `cuFileStatsStart` + """ + with nogil: + status = cuFileStatsStart() + check_status(status) + + +cpdef stats_stop(): + """Stop collecting cuFile statistics. + + .. seealso:: `cuFileStatsStop` + """ + with nogil: + status = cuFileStatsStop() + check_status(status) + + +cpdef stats_reset(): + """Reset all cuFile statistics counters. + + .. seealso:: `cuFileStatsReset` + """ + with nogil: + status = cuFileStatsReset() + check_status(status) + + +cpdef get_stats_l1(intptr_t stats): + """Get Level 1 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel1_t structure to be filled. + + .. seealso:: `cuFileGetStatsL1` + """ + with nogil: + status = cuFileGetStatsL1(stats) + check_status(status) + + +cpdef get_stats_l2(intptr_t stats): + """Get Level 2 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel2_t structure to be filled. + + .. seealso:: `cuFileGetStatsL2` + """ + with nogil: + status = cuFileGetStatsL2(stats) + check_status(status) + + +cpdef get_stats_l3(intptr_t stats): + """Get Level 3 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel3_t structure to be filled. + + .. seealso:: `cuFileGetStatsL3` + """ + with nogil: + status = cuFileGetStatsL3(stats) + check_status(status) + + +cpdef size_t get_bar_size_in_kb(int gpu_ind_ex) except? 0: + cdef size_t bar_size + with nogil: + status = cuFileGetBARSizeInKB(gpu_ind_ex, &bar_size) + check_status(status) + return bar_size + + +cpdef set_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len): + """Set both POSIX pool slab size and count parameters as a pair. + + Args: + size_values (intptr_t): Array of slab sizes in KB. + count_values (intptr_t): Array of slab counts. + len (int): Length of both arrays (must be the same). + + .. seealso:: `cuFileSetParameterPosixPoolSlabArray` + """ + with nogil: + status = cuFileSetParameterPosixPoolSlabArray(size_values, count_values, len) + check_status(status) + + +cpdef get_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len): + """Get both POSIX pool slab size and count parameters as a pair. + + Args: + size_values (intptr_t): Buffer to receive slab sizes in KB. + count_values (intptr_t): Buffer to receive slab counts. + len (int): Buffer size (must match the actual parameter length). + + .. seealso:: `cuFileGetParameterPosixPoolSlabArray` + """ + with nogil: + status = cuFileGetParameterPosixPoolSlabArray(size_values, count_values, len) + check_status(status) + + cpdef str op_status_error(int status): """cufileop status string. diff --git a/cuda_bindings/cuda/bindings/cycufile.pxd b/cuda_bindings/cuda/bindings/cycufile.pxd index 39142aa1f..11cf737f0 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pxd +++ b/cuda_bindings/cuda/bindings/cycufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from libc.stdint cimport uint32_t, uint64_t from libc.time cimport time_t @@ -353,11 +353,10 @@ cdef CUfileError_t cuFileGetVersion(int* version) except?CUFILE_L cdef CUfileError_t cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/cycufile.pyx b/cuda_bindings/cuda/bindings/cycufile.pyx index d6bbb2745..e23177137 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pyx +++ b/cuda_bindings/cuda/bindings/cycufile.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from ._internal cimport cufile as _cufile @@ -122,6 +122,10 @@ cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, return _cufile._cuFileGetParameterString(param, desc_str, len) +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterMinMaxValue(param, min_value, max_value) + + cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil: return _cufile._cuFileSetParameterSizeT(param, value) @@ -134,14 +138,6 @@ cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, return _cufile._cuFileSetParameterString(param, desc_str) -cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: - return _cufile._cuFileDriverClose() - - -cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: - return _cufile._cuFileGetParameterMinMaxValue(param, min_value, max_value) - - cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil: return _cufile._cuFileSetStatsLevel(level) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index c9a910f5e..b37e639cf 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -10,9 +10,9 @@ import tempfile from contextlib import suppress from functools import cache - -import cuda.bindings.driver as cuda import pytest +import cuda.bindings.driver as cuda + # Configure logging to show INFO level and above logging.basicConfig( @@ -118,8 +118,6 @@ def isSupportedFilesystem(): # Global skip condition for all tests if cuFile library is not available pytestmark = pytest.mark.skipif(not cufileLibraryAvailable(), reason="cuFile library not available on this system") - - def safe_decode_string(raw_value): """Safely decode a string value from ctypes buffer.""" # Find null terminator if present @@ -1415,10 +1413,10 @@ def test_batch_io_cancel(): cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device) - @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") def test_batch_io_large_operations(): """Test batch IO with large buffer operations.""" + # Initialize CUDA (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -1477,11 +1475,11 @@ def test_batch_io_large_operations(): handle = cufile.handle_register(descr.ptr) # Set up batch IO - batch_handle = cufile.batch_io_set_up(num_operations * 2) # 2 writes + 2 reads + batch_handle = cufile.batch_io_set_up(num_operations) # Only for writes # Create IOParams array for batch operations - io_params = cufile.IOParams(num_operations * 2) - io_events = cufile.IOEvents(num_operations * 2) + io_params = cufile.IOParams(num_operations) + io_events = cufile.IOEvents(num_operations) # Prepare test data test_strings = [ @@ -1498,7 +1496,7 @@ def test_batch_io_large_operations(): test_data = test_data[:buf_size] host_buf = ctypes.create_string_buffer(test_data, buf_size) cuda.cuMemcpyHtoDAsync(write_buffers[i], host_buf, buf_size, 0) - cuda.cuStreamSynchronize(0) + cuda.cuStreamSynchronize(0) # Set up write operations for i in range(num_operations): @@ -1511,47 +1509,70 @@ def test_batch_io_large_operations(): io_params[i].u.batch.dev_ptr_offset = 0 io_params[i].u.batch.size_ = buf_size - # Set up read operations - for i in range(num_operations): - idx = i + num_operations - io_params[idx].mode = cufile.BatchMode.BATCH # Batch mode - io_params[idx].fh = handle - io_params[idx].opcode = cufile.Opcode.READ # Read opcode - io_params[idx].cookie = i + 100 - io_params[idx].u.batch.dev_ptr_base = int(read_buffers[i]) - io_params[idx].u.batch.file_offset = i * buf_size - io_params[idx].u.batch.dev_ptr_offset = 0 - io_params[idx].u.batch.size_ = buf_size + # Submit writes + cufile.batch_io_submit(batch_handle, num_operations, io_params.ptr, 0) - # Submit batch operations - cufile.batch_io_submit(batch_handle, num_operations * 2, io_params.ptr, 0) + # Wait for writes to complete + nr_completed_writes = ctypes.c_uint(num_operations) + timeout = ctypes.c_int(10000) + cufile.batch_io_get_status( + batch_handle, num_operations, ctypes.addressof(nr_completed_writes), + io_events.ptr, ctypes.addressof(timeout) + ) - # Get batch status - min_nr = num_operations * 2 # Wait for all operations to complete - nr_completed = ctypes.c_uint(num_operations * 2) # Initialize to max operations posted - timeout = ctypes.c_int(10000) # 10 second timeout for large operations + # Verify writes succeeded + for i in range(nr_completed_writes.value): + assert io_events[i].status == cufile.Status.COMPLETE, ( + f"Write {i} failed with status {io_events[i].status}" + ) + + # Force file sync + os.fsync(fd) + # Clean up write batch + cufile.batch_io_destroy(batch_handle) + + # Now submit reads separately + read_batch_handle = cufile.batch_io_set_up(num_operations) + read_io_params = cufile.IOParams(num_operations) + read_io_events = cufile.IOEvents(num_operations) + + # Set up read operations + for i in range(num_operations): + read_io_params[i].mode = cufile.BatchMode.BATCH + read_io_params[i].fh = handle + read_io_params[i].opcode = cufile.Opcode.READ + read_io_params[i].cookie = i + 100 + read_io_params[i].u.batch.dev_ptr_base = int(read_buffers[i]) + read_io_params[i].u.batch.file_offset = i * buf_size + read_io_params[i].u.batch.dev_ptr_offset = 0 + read_io_params[i].u.batch.size_ = buf_size + + # Submit reads + cufile.batch_io_submit(read_batch_handle, num_operations, read_io_params.ptr, 0) + + # Wait for reads + nr_completed = ctypes.c_uint(num_operations) cufile.batch_io_get_status( - batch_handle, min_nr, ctypes.addressof(nr_completed), io_events.ptr, ctypes.addressof(timeout) + read_batch_handle, num_operations, ctypes.addressof(nr_completed), + read_io_events.ptr, ctypes.addressof(timeout) ) # Verify all operations completed successfully - assert nr_completed.value == num_operations * 2, ( - f"Expected {num_operations * 2} operations, got {nr_completed.value}" + assert nr_completed.value == num_operations, ( + f"Expected {num_operations} operations, got {nr_completed.value}" ) # Collect all returned cookies returned_cookies = set() - for i in range(num_operations * 2): - assert io_events[i].status == cufile.Status.COMPLETE, ( - f"Operation {i} failed with status {io_events[i].status}" + for i in range(num_operations): + assert read_io_events[i].status == cufile.Status.COMPLETE, ( + f"Operation {i} failed with status {read_io_events[i].status}" ) - returned_cookies.add(io_events[i].cookie) + returned_cookies.add(read_io_events[i].cookie) # Verify all expected cookies are present - expected_cookies = set(range(num_operations)) | set( - range(100, 100 + num_operations) - ) # write cookies 0,1 + read cookies 100,101 + expected_cookies = set(range(100, 100 + num_operations)) assert returned_cookies == expected_cookies, ( f"Cookie mismatch. Expected {expected_cookies}, got {returned_cookies}" ) @@ -1579,7 +1600,7 @@ def test_batch_io_large_operations(): ) # Clean up batch IO - cufile.batch_io_destroy(batch_handle) + cufile.batch_io_destroy(read_batch_handle) # Deregister file handle cufile.handle_deregister(handle) @@ -1604,8 +1625,7 @@ def test_batch_io_large_operations(): # Close cuFile driver cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device) - - + @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) @@ -1719,6 +1739,7 @@ def test_set_get_parameter_size_t(): @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) + def test_set_get_parameter_bool(): """Test setting and getting boolean parameters with cuFile validation.""" @@ -1800,10 +1821,6 @@ def test_set_get_parameter_bool(): finally: cuda.cuDevicePrimaryCtxRelease(device) - -@pytest.mark.skipif( - cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" -) def test_set_get_parameter_string(): """Test setting and getting string parameters with cuFile validation.""" @@ -1885,3 +1902,659 @@ def test_set_get_parameter_string(): finally: cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_set_stats_level(): + """Test cuFile statistics level configuration.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Test setting different statistics levels + valid_levels = [0, 1, 2, 3] # 0=disabled, 1=basic, 2=detailed, 3=verbose + + for level in valid_levels: + cufile.set_stats_level(level) + + # Verify the level was set correctly + current_level = cufile.get_stats_level() + assert current_level == level, f"Expected stats level {level}, but got {current_level}" + + logging.info(f"Successfully set and verified stats level {level}") + + # Test invalid level (should raise an error) + try: + cufile.set_stats_level(-1) # Invalid negative level + assert False, "Expected an error for invalid stats level -1" + except Exception as e: + logging.info(f"Correctly caught error for invalid stats level: {e}") + + try: + cufile.set_stats_level(4) # Invalid level > 3 + assert False, "Expected an error for invalid stats level 4" + except Exception as e: + logging.info(f"Correctly caught error for invalid stats level: {e}") + + # Reset to level 0 (disabled) for cleanup + cufile.set_stats_level(0) + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_stats_start(): + """Test cuFile statistics collection start.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Set statistics level first (required before starting stats) + cufile.set_stats_level(1) # Level 1 = basic statistics + + # Start collecting cuFile statistics + cufile.stats_start() + + # Verify statistics collection is active + # Note: Additional verification would require stats_get() or similar functions + logging.info("cuFile statistics collection started successfully") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_stats_stop(): + """Test cuFile statistics collection stop.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Set statistics level first (required before starting stats) + cufile.set_stats_level(1) # Level 1 = basic statistics + # Start collecting cuFile statistics first + cufile.stats_start() + + # Stop collecting cuFile statistics + cufile.stats_stop() + + # Verify statistics collection is stopped + logging.info("cuFile statistics collection stopped successfully") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_stats_reset(): + """Test cuFile statistics reset.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Set statistics level first (required before starting stats) + cufile.set_stats_level(1) # Level 1 = basic statistics + # Start collecting cuFile statistics first + + cufile.stats_start() + + # Reset cuFile statistics to clear all counters + cufile.stats_reset() + + # Verify statistics reset completed successfully + logging.info("cuFile statistics reset successfully") + + # Stop statistics collection + cufile.stats_stop() + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l1(): + """Test cuFile L1 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l1.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + cufile.set_stats_level(1) # L1 = basic operation counts + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 4096 # 4KB, aligned to 4096 bytes + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L1 stats test data" * 100 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform cuFile operations to generate L1 statistics + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + + # Allocate buffer for L1 statistics + stats_buffer = ctypes.create_string_buffer(1024) # Allocate sufficient space + stats_ptr = ctypes.addressof(stats_buffer) + + # Get L1 statistics (basic operation counts) + cufile.get_stats_l1(stats_ptr) + + # Verify that statistics data was written to the buffer + # Convert buffer to bytes and check that it's not all zeros + buffer_bytes = bytes(stats_buffer) + non_zero_bytes = sum(1 for b in buffer_bytes if b != 0) + assert non_zero_bytes > 0, f"Expected statistics data to be written to buffer, but got {non_zero_bytes} non-zero bytes" + + # Verify statistics retrieval completed successfully + logging.info("cuFile L1 statistics retrieved successfully after file operations") + + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + + finally: + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l2(): + """Test cuFile L2 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l2.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + cufile.set_stats_level(2) # L2 = detailed performance metrics + + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 8192 # 8KB for more detailed stats + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L2 detailed stats test data" * 150 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform multiple cuFile operations to generate detailed L2 statistics + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.write(handle, buf_ptr_int, buffer_size, buffer_size, 0) # Different offset + cufile.read(handle, buf_ptr_int, buffer_size, buffer_size, 0) + + # Allocate buffer for L2 statistics + stats_buffer = ctypes.create_string_buffer(2048) # Larger buffer for detailed stats + stats_ptr = ctypes.addressof(stats_buffer) + + # Get L2 statistics (detailed performance metrics) + cufile.get_stats_l2(stats_ptr) + + # Verify that statistics data was written to the buffer + buffer_bytes = bytes(stats_buffer) + non_zero_bytes = sum(1 for b in buffer_bytes if b != 0) + assert non_zero_bytes > 0, f"Expected statistics data to be written to buffer, but got {non_zero_bytes} non-zero bytes" + + # Verify statistics retrieval completed successfully + logging.info("cuFile L2 statistics retrieved successfully after file operations") + + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + + finally: + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l3(): + """Test cuFile L3 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l3.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + cufile.set_stats_level(3) # L3 = comprehensive diagnostic data + + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 16384 # 16KB for comprehensive stats testing + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L3 comprehensive stats test data" * 200 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform comprehensive cuFile operations to generate L3 statistics + # Multiple writes and reads at different offsets to generate rich stats + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.write(handle, buf_ptr_int, buffer_size, buffer_size, 0) # Different offset + cufile.read(handle, buf_ptr_int, buffer_size, buffer_size, 0) + cufile.write(handle, buf_ptr_int, buffer_size // 2, buffer_size * 2, 0) # Partial write + cufile.read(handle, buf_ptr_int, buffer_size // 2, buffer_size * 2, 0) # Partial read + + # Allocate buffer for L3 statistics + stats_buffer = ctypes.create_string_buffer(4096) # Largest buffer for comprehensive stats + stats_ptr = ctypes.addressof(stats_buffer) + + # Get L3 statistics (comprehensive diagnostic data) + cufile.get_stats_l3(stats_ptr) + + # Verify that statistics data was written to the buffer + buffer_bytes = bytes(stats_buffer) + non_zero_bytes = sum(1 for b in buffer_bytes if b != 0) + assert non_zero_bytes > 0, f"Expected statistics data to be written to buffer, but got {non_zero_bytes} non-zero bytes" + + # Verify statistics retrieval completed successfully + logging.info("cuFile L3 statistics retrieved successfully after file operations") + + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + + finally: + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_get_bar_size_in_kb(): + """Test cuFile BAR (Base Address Register) size retrieval.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Get BAR size in kilobytes + bar_size_kb = cufile.get_bar_size_in_kb(0) + + # Verify BAR size is a reasonable value + assert isinstance(bar_size_kb, int), "BAR size should be an integer" + assert bar_size_kb > 0, "BAR size should be positive" + + logging.info(f"GPU BAR size: {bar_size_kb} KB ({bar_size_kb / 1024 / 1024:.2f} GB)") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_set_parameter_posix_pool_slab_array(): + """Test cuFile POSIX pool slab array configuration.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Define slab sizes for POSIX I/O pool (common I/O buffer sizes) - BEFORE driver open + import ctypes + slab_sizes = [ + 4096, # 4KB - small files + 65536, # 64KB - medium files + 1048576, # 1MB - large files + 16777216, # 16MB - very large files + ] + + # Define counts for each slab size (number of buffers) + slab_counts = [ + 10, # 10 buffers of 4KB + 5, # 5 buffers of 64KB + 3, # 3 buffers of 1MB + 2, # 2 buffers of 16MB + ] + + # Convert to ctypes arrays + size_array_type = ctypes.c_size_t * len(slab_sizes) + count_array_type = ctypes.c_size_t * len(slab_counts) + size_array = size_array_type(*slab_sizes) + count_array = count_array_type(*slab_counts) + + # Set POSIX pool slab array configuration BEFORE opening driver + cufile.set_parameter_posix_pool_slab_array(ctypes.addressof(size_array), ctypes.addressof(count_array), len(slab_sizes)) + + # Open cuFile driver AFTER setting parameters + cufile.driver_open() + + try: + # After setting parameters, retrieve them back to verify + retrieved_sizes = (ctypes.c_size_t * len(slab_sizes))() + retrieved_counts = (ctypes.c_size_t * len(slab_counts))() + + cufile.get_parameter_posix_pool_slab_array(ctypes.addressof(retrieved_sizes), ctypes.addressof(retrieved_counts), len(slab_sizes)) + + # Verify they match what we set + for i in range(len(slab_sizes)): + assert retrieved_sizes[i] == slab_sizes[i], f"Size mismatch at index {i}: expected {slab_sizes[i]}, got {retrieved_sizes[i]}" + assert retrieved_counts[i] == slab_counts[i], f"Count mismatch at index {i}: expected {slab_counts[i]}, got {retrieved_counts[i]}" + + # Verify configuration was accepted successfully + logging.info(f"POSIX pool slab array configured with {len(slab_sizes)} slab sizes") + logging.info(f"Slab sizes: {[f'{size//1024}KB' for size in slab_sizes]}") + logging.info("Round-trip verification successful: set and retrieved values match") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" +) +def test_set_get_parameter_size_t(): + """Test setting and getting size_t parameters with cuFile validation.""" + + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + try: + # Test setting and getting various size_t parameters + + # Test poll threshold size (in KB) + poll_threshold_kb = 64 # 64KB threshold + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, poll_threshold_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB) + assert retrieved_value == poll_threshold_kb, ( + f"Poll threshold mismatch: set {poll_threshold_kb}, got {retrieved_value}" + ) + + # Test max direct IO size (in KB) + max_direct_io_kb = 1024 # 1MB max direct IO size + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, max_direct_io_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB) + assert retrieved_value == max_direct_io_kb, ( + f"Max direct IO size mismatch: set {max_direct_io_kb}, got {retrieved_value}" + ) + + # Test max device cache size (in KB) + max_cache_kb = 512 # 512KB max cache size + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, max_cache_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB) + assert retrieved_value == max_cache_kb, f"Max cache size mismatch: set {max_cache_kb}, got {retrieved_value}" + + # Test per buffer cache size (in KB) + per_buffer_cache_kb = 128 # 128KB per buffer cache + cufile.set_parameter_size_t( + cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, per_buffer_cache_kb + ) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB) + assert retrieved_value == per_buffer_cache_kb, ( + f"Per buffer cache size mismatch: set {per_buffer_cache_kb}, got {retrieved_value}" + ) + + # Test max device pinned memory size (in KB) + max_pinned_kb = 2048 # 2MB max pinned memory + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb) + retrieved_value = cufile.get_parameter_size_t( + cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB + ) + assert retrieved_value == max_pinned_kb, ( + f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}" + ) + + # Test IO batch size + batch_size = 16 # 16 operations per batch + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, batch_size) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE) + assert retrieved_value == batch_size, f"IO batch size mismatch: set {batch_size}, got {retrieved_value}" + + # Test batch IO timeout (in milliseconds) + timeout_ms = 5000 # 5 second timeout + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, timeout_ms) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS) + assert retrieved_value == timeout_ms, f"Batch IO timeout mismatch: set {timeout_ms}, got {retrieved_value}" + + # Test execution parameters + max_io_queue_depth = 32 # Max 32 operations in queue + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, max_io_queue_depth) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH) + assert retrieved_value == max_io_queue_depth, ( + f"Max IO queue depth mismatch: set {max_io_queue_depth}, got {retrieved_value}" + ) + + max_io_threads = 8 # Max 8 IO threads + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, max_io_threads) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS) + assert retrieved_value == max_io_threads, ( + f"Max IO threads mismatch: set {max_io_threads}, got {retrieved_value}" + ) + + min_io_threshold_kb = 4 # 4KB minimum IO threshold + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, min_io_threshold_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB) + assert retrieved_value == min_io_threshold_kb, ( + f"Min IO threshold mismatch: set {min_io_threshold_kb}, got {retrieved_value}" + ) + + max_request_parallelism = 4 # Max 4 parallel requests + cufile.set_parameter_size_t( + cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, max_request_parallelism + ) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM) + assert retrieved_value == max_request_parallelism, ( + f"Max request parallelism mismatch: set {max_request_parallelism}, got {retrieved_value}" + ) + + finally: + cuda.cuDevicePrimaryCtxRelease(device)