diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/experimental/_memoryview.pyx index ea8fb01b67..6bec14def3 100644 --- a/cuda_core/cuda/core/experimental/_memoryview.pyx +++ b/cuda_core/cuda/core/experimental/_memoryview.pyx @@ -105,6 +105,23 @@ cdef class StridedMemoryView: else: pass + def __dealloc__(self): + if self.dl_tensor == NULL: + return + + if cpython.PyCapsule_IsValid( + self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME): + data = cpython.PyCapsule_GetPointer( + self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME) + dlm_tensor_ver = data + dlm_tensor_ver.deleter(dlm_tensor_ver) + elif cpython.PyCapsule_IsValid( + self.metadata, DLPACK_TENSOR_USED_NAME): + data = cpython.PyCapsule_GetPointer( + self.metadata, DLPACK_TENSOR_USED_NAME) + dlm_tensor = data + dlm_tensor.deleter(dlm_tensor) + @property def shape(self) -> tuple[int]: if self._shape is None and self.exporting_obj is not None: diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index 433e34353c..454a9d465d 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -48,3 +48,4 @@ Fixes and enhancements - Make :class:`Buffer` creation more performant. - Enabled :class:`MemoryResource` subclasses to accept :class:`Device` objects, in addition to previously supported device ordinals. - Fixed a bug in :class:`Stream` and other classes where object cleanup would error during interpreter shutdown. +- :class:`StridedMemoryView` of an underlying array using the DLPack protocol will no longer leak memory. diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index 5872edc7e2..4ffa813d63 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -1,11 +1,16 @@ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import sys + try: from cuda.bindings import driver except ImportError: from cuda import cuda as driver - +try: + import numpy as np +except ImportError: + np = None import ctypes import platform @@ -13,6 +18,7 @@ from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, MemoryResource from cuda.core.experimental._memory import DLDeviceType, IPCBufferDescriptor from cuda.core.experimental._utils.cuda_utils import handle_return +from cuda.core.experimental.utils import StridedMemoryView POOL_SIZE = 2097152 # 2MB size @@ -437,3 +443,14 @@ def test_mempool_attributes_ownership(mempool_device): with pytest.raises(RuntimeError, match="DeviceMemoryResource is expired"): _ = attributes.used_mem_high mr._mempool_handle = old_handle + + +# Ensure that memory views dellocate their reference to dlpack tensors +@pytest.mark.skipif(np is None, reason="numpy is not installed") +def test_strided_memory_view_leak(): + arr = np.zeros(1048576, dtype=np.uint8) + before = sys.getrefcount(arr) + for idx in range(10): + StridedMemoryView(arr, stream_ptr=-1) + after = sys.getrefcount(arr) + assert before == after