diff --git a/cuda_bindings/cuda/bindings/utils/__init__.py b/cuda_bindings/cuda/bindings/utils/__init__.py index 5f9288b81..a2dfe7ce8 100644 --- a/cuda_bindings/cuda/bindings/utils/__init__.py +++ b/cuda_bindings/cuda/bindings/utils/__init__.py @@ -1,6 +1,5 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE - from typing import Any, Callable from ._ptx_utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver diff --git a/cuda_core/cuda/core/experimental/_event.pyx b/cuda_core/cuda/core/experimental/_event.pyx index 10ac2f590..41c0b1ce6 100644 --- a/cuda_core/cuda/core/experimental/_event.pyx +++ b/cuda_core/cuda/core/experimental/_event.pyx @@ -18,7 +18,7 @@ from cuda.core.experimental._utils.cuda_utils import ( driver, handle_return, ) - +import sys if TYPE_CHECKING: import cuda.bindings from cuda.core.experimental._device import Device @@ -108,15 +108,20 @@ cdef class Event: self._ctx_handle = ctx_handle return self - cpdef close(self): - """Destroy the event.""" + cdef _shutdown_safe_close(self, is_shutting_down=sys.is_finalizing): + if is_shutting_down and is_shutting_down(): + return if self._handle is not None: err, = driver.cuEventDestroy(self._handle) self._handle = None raise_if_driver_error(err) + cpdef close(self): + """Destroy the event.""" + self._shutdown_safe_close(is_shutting_down=None) + def __del__(self): - self.close() + self._shutdown_safe_close() def __isub__(self, other): return NotImplemented diff --git a/cuda_core/cuda/core/experimental/_memory.pyx b/cuda_core/cuda/core/experimental/_memory.pyx index 41a506a58..ace146bdf 100644 --- a/cuda_core/cuda/core/experimental/_memory.pyx +++ b/cuda_core/cuda/core/experimental/_memory.pyx @@ -9,6 +9,7 @@ from cuda.core.experimental._utils.cuda_utils cimport ( _check_driver_error as raise_if_driver_error, check_or_create_options, ) +import sys from dataclasses import dataclass from typing import TypeVar, Union, TYPE_CHECKING @@ -69,7 +70,16 @@ cdef class Buffer: return self def __del__(self): - self.close() + self._shutdown_safe_close() + + cdef _shutdown_safe_close(self, stream: Stream = None, is_shutting_down=sys.is_finalizing): + if is_shutting_down and is_shutting_down(): + return + if self._ptr and self._mr is not None: + self._mr.deallocate(self._ptr, self._size, stream) + self._ptr = 0 + self._mr = None + self._ptr_obj = None cpdef close(self, stream: Stream = None): """Deallocate this buffer asynchronously on the given stream. @@ -83,11 +93,7 @@ cdef class Buffer: The stream object to use for asynchronous deallocation. If None, the behavior depends on the underlying memory resource. """ - if self._ptr and self._mr is not None: - self._mr.deallocate(self._ptr, self._size, stream) - self._ptr = 0 - self._mr = None - self._ptr_obj = None + self._shutdown_safe_close(stream, is_shutting_down=None) @property def handle(self) -> DevicePointerT: diff --git a/cuda_core/cuda/core/experimental/_stream.pyx b/cuda_core/cuda/core/experimental/_stream.pyx index 9d9271f65..a2c1a90b9 100644 --- a/cuda_core/cuda/core/experimental/_stream.pyx +++ b/cuda_core/cuda/core/experimental/_stream.pyx @@ -8,6 +8,7 @@ from cuda.core.experimental._utils.cuda_utils cimport ( _check_driver_error as raise_if_driver_error, check_or_create_options, ) +import sys import cython import os @@ -186,15 +187,12 @@ cdef class Stream: return self def __del__(self): - self.close() + self._shutdown_safe_close() - cpdef close(self): - """Destroy the stream. + cdef _shutdown_safe_close(self, is_shutting_down=sys.is_finalizing): + if is_shutting_down and is_shutting_down(): + return - Destroy the stream if we own it. Borrowed foreign stream - object will instead have their references released. - - """ if self._owner is None: if self._handle and not self._builtin: handle_return(driver.cuStreamDestroy(self._handle)) @@ -202,6 +200,15 @@ cdef class Stream: self._owner = None self._handle = None + cpdef close(self): + """Destroy the stream. + + Destroy the stream if we own it. Borrowed foreign stream + object will instead have their references released. + + """ + self._shutdown_safe_close(is_shutting_down=None) + def __cuda_stream__(self) -> tuple[int, int]: """Return an instance of a __cuda_stream__ protocol.""" return (0, int(self.handle)) diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index 55ef4a241..433e34353 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -47,3 +47,4 @@ Fixes and enhancements - Fixed a bug in :class:`GraphBuilder.add_child` where dependencies extracted from capturing stream were passed inconsistently with num_dependencies parameter (addresses issue #843). - Make :class:`Buffer` creation more performant. - Enabled :class:`MemoryResource` subclasses to accept :class:`Device` objects, in addition to previously supported device ordinals. +- Fixed a bug in :class:`Stream` and other classes where object cleanup would error during interpreter shutdown.