From 044f179998fc2ed232792a31cdb060234489087e Mon Sep 17 00:00:00 2001 From: Kamil Tokarski Date: Thu, 27 Nov 2025 14:27:39 +0100 Subject: [PATCH] Fix potential leaks for interrupted dlpack capsule creation Signed-off-by: Kamil Tokarski --- cuda_core/cuda/core/experimental/_dlpack.pyx | 127 +++++++++++++------ cuda_core/tests/test_memory.py | 13 ++ 2 files changed, 103 insertions(+), 37 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_dlpack.pyx b/cuda_core/cuda/core/experimental/_dlpack.pyx index 075462b067..c549c83228 100644 --- a/cuda_core/cuda/core/experimental/_dlpack.pyx +++ b/cuda_core/cuda/core/experimental/_dlpack.pyx @@ -26,49 +26,54 @@ cdef void pycapsule_deleter(object capsule) noexcept: cdef void deleter(DLManagedTensor* tensor) noexcept with gil: - stdlib.free(tensor.dl_tensor.shape) - if tensor.manager_ctx: - cpython.Py_DECREF(tensor.manager_ctx) - tensor.manager_ctx = NULL - stdlib.free(tensor) + if tensor: + if tensor.dl_tensor.shape: + stdlib.free(tensor.dl_tensor.shape) + if tensor.manager_ctx: + cpython.Py_DECREF(tensor.manager_ctx) + tensor.manager_ctx = NULL + stdlib.free(tensor) cdef void versioned_deleter(DLManagedTensorVersioned* tensor) noexcept with gil: - stdlib.free(tensor.dl_tensor.shape) - if tensor.manager_ctx: - cpython.Py_DECREF(tensor.manager_ctx) - tensor.manager_ctx = NULL - stdlib.free(tensor) - - -cpdef object make_py_capsule(object buf, bint versioned): - cdef DLManagedTensor* dlm_tensor - cdef DLManagedTensorVersioned* dlm_tensor_ver - cdef DLTensor* dl_tensor - cdef void* tensor_ptr - cdef const char* capsule_name - - if versioned: + if tensor: + if tensor.dl_tensor.shape: + stdlib.free(tensor.dl_tensor.shape) + if tensor.manager_ctx: + cpython.Py_DECREF(tensor.manager_ctx) + tensor.manager_ctx = NULL + stdlib.free(tensor) + + +cdef inline DLManagedTensorVersioned* allocate_dlm_tensor_versioned() except? NULL: + cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL + try: dlm_tensor_ver = ( stdlib.malloc(sizeof(DLManagedTensorVersioned))) - dlm_tensor_ver.version.major = DLPACK_MAJOR_VERSION - dlm_tensor_ver.version.minor = DLPACK_MINOR_VERSION - dlm_tensor_ver.manager_ctx = buf - dlm_tensor_ver.deleter = versioned_deleter - dlm_tensor_ver.flags = 0 - dl_tensor = &dlm_tensor_ver.dl_tensor - tensor_ptr = dlm_tensor_ver - capsule_name = DLPACK_VERSIONED_TENSOR_UNUSED_NAME - else: + dlm_tensor_ver.dl_tensor.shape = NULL + dlm_tensor_ver.manager_ctx = NULL + return dlm_tensor_ver + except: + if dlm_tensor_ver: + stdlib.free(dlm_tensor_ver) + raise + + +cdef inline DLManagedTensor* allocate_dlm_tensor() except? NULL: + cdef DLManagedTensor* dlm_tensor = NULL + try: dlm_tensor = ( stdlib.malloc(sizeof(DLManagedTensor))) - dl_tensor = &dlm_tensor.dl_tensor - dlm_tensor.manager_ctx = buf - dlm_tensor.deleter = deleter - tensor_ptr = dlm_tensor - capsule_name = DLPACK_TENSOR_UNUSED_NAME + dlm_tensor.dl_tensor.shape = NULL + dlm_tensor.manager_ctx = NULL + return dlm_tensor + except: + if dlm_tensor: + stdlib.free(dlm_tensor) + raise - dl_tensor.data = (int(buf.handle)) + +cdef inline int setup_dl_tensor_layout(DLTensor* dl_tensor, object buf) except -1: dl_tensor.ndim = 1 cdef int64_t* shape_strides = \ stdlib.malloc(sizeof(int64_t) * 2) @@ -77,7 +82,10 @@ cpdef object make_py_capsule(object buf, bint versioned): dl_tensor.shape = shape_strides dl_tensor.strides = NULL dl_tensor.byte_offset = 0 + return 0 + +cdef inline int setup_dl_tensor_device(DLTensor* dl_tensor, object buf) except -1: cdef DLDevice* device = &dl_tensor.device # buf should be a Buffer instance if buf.is_device_accessible and not buf.is_host_accessible: @@ -91,14 +99,59 @@ cpdef object make_py_capsule(object buf, bint versioned): device.device_id = 0 else: # not buf.is_device_accessible and not buf.is_host_accessible raise BufferError("invalid buffer") + return 0 + +cdef inline int setup_dl_tensor_dtype(DLTensor* dl_tensor) except -1 nogil: cdef DLDataType* dtype = &dl_tensor.dtype dtype.code = kDLInt dtype.lanes = 1 dtype.bits = 8 + return 0 + - cpython.Py_INCREF(buf) - return cpython.PyCapsule_New(tensor_ptr, capsule_name, pycapsule_deleter) +cpdef object make_py_capsule(object buf, bint versioned): + cdef DLManagedTensor* dlm_tensor = NULL + cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL + cdef DLTensor* dl_tensor + cdef void* tensor_ptr + cdef const char* capsule_name + cdef object ret = None + + try: + if versioned: + dlm_tensor_ver = allocate_dlm_tensor_versioned() + # Transfer the reference to manager_ctx + cpython.Py_INCREF(buf) + dlm_tensor_ver.manager_ctx = buf + dlm_tensor_ver.deleter = versioned_deleter + dlm_tensor_ver.version.major = DLPACK_MAJOR_VERSION + dlm_tensor_ver.version.minor = DLPACK_MINOR_VERSION + dlm_tensor_ver.flags = 0 + dl_tensor = &dlm_tensor_ver.dl_tensor + tensor_ptr = dlm_tensor_ver + capsule_name = DLPACK_VERSIONED_TENSOR_UNUSED_NAME + else: + dlm_tensor = allocate_dlm_tensor() + # Transfer the reference to manager_ctx + cpython.Py_INCREF(buf) + dlm_tensor.manager_ctx = buf + dlm_tensor.deleter = deleter + dl_tensor = &dlm_tensor.dl_tensor + tensor_ptr = dlm_tensor + capsule_name = DLPACK_TENSOR_UNUSED_NAME + + dl_tensor.data = (int(buf.handle)) + setup_dl_tensor_layout(dl_tensor, buf) + setup_dl_tensor_device(dl_tensor, buf) + setup_dl_tensor_dtype(dl_tensor) + ret = cpython.PyCapsule_New(tensor_ptr, capsule_name, pycapsule_deleter) + except: + if ret is None: + deleter(dlm_tensor) + versioned_deleter(dlm_tensor_ver) + raise + return ret class DLDeviceType(IntEnum): diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index 74ec40eab5..796c12ea7d 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -278,6 +278,19 @@ def test_buffer_dunder_dlpack_device_failure(): buffer.__dlpack_device__() +def test_buffer_dlpack_failure_clean_up(): + dummy_mr = NullMemoryResource() + buffer = dummy_mr.allocate(size=1024) + before = sys.getrefcount(buffer) + with pytest.raises(BufferError, match="invalid buffer"): + buffer.__dlpack__() + after = sys.getrefcount(buffer) + # we use the buffer refcount as sentinel for proper clean-up here, + # hoping that malloc and frees did the right thing + # as they are handled by the same deleter + assert after == before + + @pytest.mark.parametrize("use_device_object", [True, False]) def test_device_memory_resource_initialization(use_device_object): """Test that DeviceMemoryResource can be initialized successfully.