diff --git a/Cargo.toml b/Cargo.toml index 178aaddca14..815f322505f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -106,6 +106,9 @@ auto-initialize = [] # Allows use of the deprecated "GIL Refs" APIs. gil-refs = [] +# Enables `Clone`ing references to Python objects which aborts if the GIL is not held. +py-clone = [] + # Disables global reference pool which allows `Drop`ing references to Python objects without the GIL being held. disable-reference-pool = [] diff --git a/guide/src/migration.md b/guide/src/migration.md index d855f69d396..05a211ef49b 100644 --- a/guide/src/migration.md +++ b/guide/src/migration.md @@ -3,6 +3,17 @@ This guide can help you upgrade code through breaking changes from one PyO3 version to the next. For a detailed list of all changes, see the [CHANGELOG](changelog.md). +## from 0.21.* to 0.22 +### Introduction of the `py-clone` feature +
+Click to expand +If you rely on `impl Clone for Py` to fulfil trait requirements imposed by existing Rust code written without PyO3-based code in mind, the newly introduced feature `py-clone` must be enabled. + +However, take care to note that the behaviour is different from previous versions. If `Clone` was called without the GIL being held, we tried to delay the application of these reference count increments until PyO3-based code would re-acquire it. This turned out to be impossible to implement in a sound manner and hence was removed. Now, if `Clone` is called without the GIL being held, we panic instead for which calling code might not be prepared. + +Related to this, we also added a `disable-reference-pool` feature which removes the infrastructure necessary to apply delayed reference count decrements implied by `impl Drop for Py`. They do not appear to be a soundness hazard as they should lead to memory leaks in the worst case. However, the global synchronization adds significant overhead to cross the Python-Rust boundary. Enabling this feature will remove these costs and make the `Drop` implementation abort the process if called without the GIL being held instead. +
+ ## from 0.20.* to 0.21
Click to expand diff --git a/guide/src/performance.md b/guide/src/performance.md index 5d60ad69d08..d2092e04617 100644 --- a/guide/src/performance.md +++ b/guide/src/performance.md @@ -99,9 +99,9 @@ impl PartialEq for FooBound<'_> { ## Disable the global reference pool -PyO3 uses global mutable state to keep track of deferred reference count updates implied by `impl Clone for Py` and `impl Drop for Py` being called without the currently GIL being held. The necessary synchronization to obtain and apply these reference count updates when PyO3-based code next acquires the GIL is somewhat expensive and can become a significant part of the cost of crossing the Python-Rust boundary. +PyO3 uses global mutable state to keep track of deferred reference count updates implied by `impl Drop for Py` being called without the currently GIL being held. The necessary synchronization to obtain and apply these reference count updates when PyO3-based code next acquires the GIL is somewhat expensive and can become a significant part of the cost of crossing the Python-Rust boundary. -This functionality can be avoided by adding the `disable-reference-pool` feature. This removes the global reference pool and the associated costs completely. However, it does _not_ remove the `Clone` and `Drop` implementations for `Py` which are often necessary to fulfil trait requirements imposed by existing Rust code written without PyO3-based code in mind. To stay compatible with the wider Rust ecosystem in these cases, we keep the implementations but panic when `Clone` is called without the GIL being held and abort when `Drop` is called without the GIL being held. +This functionality can be avoided by adding the `disable-reference-pool` feature. This removes the global reference pool and the associated costs completely. However, it does _not_ remove the `Drop` implementation for `Py` which is necessary to interoperate with existing Rust code written without PyO3-based code in mind. To stay compatible with the wider Rust ecosystem in these cases, we keep the implementation but abort when `Drop` is called without the GIL being held. This limitation is important to keep in mind when this setting is used, especially when embedding Python code into a Rust application as it is quite easy to accidentally drop a `Py` returned from `Python::with_gil` without making sure to re-acquire the GIL beforehand. For example, the following code diff --git a/newsfragments/4095.changed.md b/newsfragments/4095.changed.md new file mode 100644 index 00000000000..7f155ae04ef --- /dev/null +++ b/newsfragments/4095.changed.md @@ -0,0 +1 @@ +`Clone`ing pointers into the Python heap has been moved behind the `py-clone` feature, as it must panic without the GIL being held as a soundness fix. diff --git a/src/err/err_state.rs b/src/err/err_state.rs index 9f85296f661..14345b275c9 100644 --- a/src/err/err_state.rs +++ b/src/err/err_state.rs @@ -5,7 +5,6 @@ use crate::{ Bound, IntoPy, Py, PyAny, PyObject, PyTypeInfo, Python, }; -#[derive(Clone)] pub(crate) struct PyErrStateNormalized { #[cfg(not(Py_3_12))] ptype: Py, @@ -63,6 +62,19 @@ impl PyErrStateNormalized { ptraceback: Py::from_owned_ptr_or_opt(py, ptraceback), } } + + pub fn clone_ref(&self, py: Python<'_>) -> Self { + Self { + #[cfg(not(Py_3_12))] + ptype: self.ptype.clone_ref(py), + pvalue: self.pvalue.clone_ref(py), + #[cfg(not(Py_3_12))] + ptraceback: self + .ptraceback + .as_ref() + .map(|ptraceback| ptraceback.clone_ref(py)), + } + } } pub(crate) struct PyErrStateLazyFnOutput { diff --git a/src/err/mod.rs b/src/err/mod.rs index a61c8c62d31..7ef57c4ea7b 100644 --- a/src/err/mod.rs +++ b/src/err/mod.rs @@ -856,7 +856,7 @@ impl PyErr { /// ``` #[inline] pub fn clone_ref(&self, py: Python<'_>) -> PyErr { - PyErr::from_state(PyErrState::Normalized(self.normalized(py).clone())) + PyErr::from_state(PyErrState::Normalized(self.normalized(py).clone_ref(py))) } /// Return the cause (either an exception instance, or None, set by `raise ... from ...`) diff --git a/src/gil.rs b/src/gil.rs index 36592defdec..13c15324207 100644 --- a/src/gil.rs +++ b/src/gil.rs @@ -253,41 +253,29 @@ type PyObjVec = Vec>; #[cfg(not(feature = "disable-reference-pool"))] /// Thread-safe storage for objects which were inc_ref / dec_ref while the GIL was not held. struct ReferencePool { - // .0 is INCREFs, .1 is DECREFs - pointer_ops: Mutex<(PyObjVec, PyObjVec)>, + pending_decrefs: Mutex, } #[cfg(not(feature = "disable-reference-pool"))] impl ReferencePool { const fn new() -> Self { Self { - pointer_ops: const_mutex((Vec::new(), Vec::new())), + pending_decrefs: const_mutex(Vec::new()), } } - fn register_incref(&self, obj: NonNull) { - self.pointer_ops.lock().0.push(obj); - } - fn register_decref(&self, obj: NonNull) { - self.pointer_ops.lock().1.push(obj); + self.pending_decrefs.lock().push(obj); } fn update_counts(&self, _py: Python<'_>) { - let mut ops = self.pointer_ops.lock(); - if ops.0.is_empty() && ops.1.is_empty() { + let mut pending_decrefs = self.pending_decrefs.lock(); + if pending_decrefs.is_empty() { return; } - let (increfs, decrefs) = mem::take(&mut *ops); - drop(ops); - - // Always increase reference counts first - as otherwise objects which have a - // nonzero total reference count might be incorrectly dropped by Python during - // this update. - for ptr in increfs { - unsafe { ffi::Py_INCREF(ptr.as_ptr()) }; - } + let decrefs = mem::take(&mut *pending_decrefs); + drop(pending_decrefs); for ptr in decrefs { unsafe { ffi::Py_DECREF(ptr.as_ptr()) }; @@ -457,14 +445,12 @@ impl Drop for GILPool { /// /// # Safety /// The object must be an owned Python reference. +#[cfg(feature = "py-clone")] #[track_caller] pub unsafe fn register_incref(obj: NonNull) { if gil_is_acquired() { ffi::Py_INCREF(obj.as_ptr()) } else { - #[cfg(not(feature = "disable-reference-pool"))] - POOL.register_incref(obj); - #[cfg(feature = "disable-reference-pool")] panic!("Cannot clone pointer into Python heap without the GIL being held."); } } @@ -562,29 +548,18 @@ mod tests { len } - #[cfg(not(feature = "disable-reference-pool"))] - fn pool_inc_refs_does_not_contain(obj: &PyObject) -> bool { - !POOL - .pointer_ops - .lock() - .0 - .contains(&unsafe { NonNull::new_unchecked(obj.as_ptr()) }) - } - #[cfg(not(feature = "disable-reference-pool"))] fn pool_dec_refs_does_not_contain(obj: &PyObject) -> bool { !POOL - .pointer_ops + .pending_decrefs .lock() - .1 .contains(&unsafe { NonNull::new_unchecked(obj.as_ptr()) }) } #[cfg(all(not(feature = "disable-reference-pool"), not(target_arch = "wasm32")))] fn pool_dec_refs_contains(obj: &PyObject) -> bool { - POOL.pointer_ops + POOL.pending_decrefs .lock() - .1 .contains(&unsafe { NonNull::new_unchecked(obj.as_ptr()) }) } @@ -658,8 +633,6 @@ mod tests { assert_eq!(obj.get_refcnt(py), 2); #[cfg(not(feature = "disable-reference-pool"))] - assert!(pool_inc_refs_does_not_contain(&obj)); - #[cfg(not(feature = "disable-reference-pool"))] assert!(pool_dec_refs_does_not_contain(&obj)); // With the GIL held, reference count will be decreased immediately. @@ -667,8 +640,6 @@ mod tests { assert_eq!(obj.get_refcnt(py), 1); #[cfg(not(feature = "disable-reference-pool"))] - assert!(pool_inc_refs_does_not_contain(&obj)); - #[cfg(not(feature = "disable-reference-pool"))] assert!(pool_dec_refs_does_not_contain(&obj)); }); } @@ -682,7 +653,6 @@ mod tests { let reference = obj.clone_ref(py); assert_eq!(obj.get_refcnt(py), 2); - assert!(pool_inc_refs_does_not_contain(&obj)); assert!(pool_dec_refs_does_not_contain(&obj)); // Drop reference in a separate thread which doesn't have the GIL. @@ -691,7 +661,6 @@ mod tests { // The reference count should not have changed (the GIL has always // been held by this thread), it is remembered to release later. assert_eq!(obj.get_refcnt(py), 2); - assert!(pool_inc_refs_does_not_contain(&obj)); assert!(pool_dec_refs_contains(&obj)); obj }); @@ -699,9 +668,7 @@ mod tests { // Next time the GIL is acquired, the reference is released Python::with_gil(|py| { assert_eq!(obj.get_refcnt(py), 1); - let non_null = unsafe { NonNull::new_unchecked(obj.as_ptr()) }; - assert!(!POOL.pointer_ops.lock().0.contains(&non_null)); - assert!(!POOL.pointer_ops.lock().1.contains(&non_null)); + assert!(pool_dec_refs_does_not_contain(&obj)); }); } @@ -758,19 +725,14 @@ mod tests { } #[test] - #[cfg_attr(feature = "disable-reference-pool", should_panic)] + #[should_panic] fn test_allow_threads_updates_refcounts() { Python::with_gil(|py| { // Make a simple object with 1 reference let obj = get_object(py); assert!(obj.get_refcnt(py) == 1); - // Clone the object without the GIL to use internal tracking - let escaped_ref = py.allow_threads(|| obj.clone()); - // But after the block the refcounts are updated - assert!(obj.get_refcnt(py) == 2); - drop(escaped_ref); - assert!(obj.get_refcnt(py) == 1); - drop(obj); + // Clone the object without the GIL which should panic + py.allow_threads(|| obj.clone()); }); } @@ -826,117 +788,6 @@ mod tests { } } - #[test] - #[cfg(all(not(feature = "disable-reference-pool"), not(target_arch = "wasm32")))] // We are building wasm Python with pthreads disabled - fn test_clone_without_gil() { - use crate::{Py, PyAny}; - use std::{sync::Arc, thread}; - - // Some events for synchronizing - static GIL_ACQUIRED: Event = Event::new(); - static OBJECT_CLONED: Event = Event::new(); - static REFCNT_CHECKED: Event = Event::new(); - - Python::with_gil(|py| { - let obj: Arc> = Arc::new(get_object(py)); - let thread_obj = Arc::clone(&obj); - - let count = obj.get_refcnt(py); - println!( - "1: The object has been created and its reference count is {}", - count - ); - - let handle = thread::spawn(move || { - Python::with_gil(move |py| { - println!("3. The GIL has been acquired on another thread."); - GIL_ACQUIRED.set(); - - // Wait while the main thread registers obj in POOL - OBJECT_CLONED.wait(); - println!("5. Checking refcnt"); - assert_eq!(thread_obj.get_refcnt(py), count); - - REFCNT_CHECKED.set(); - }) - }); - - let cloned = py.allow_threads(|| { - println!("2. The GIL has been released."); - - // Wait until the GIL has been acquired on the thread. - GIL_ACQUIRED.wait(); - - println!("4. The other thread is now hogging the GIL, we clone without it held"); - // Cloning without GIL should not update reference count - let cloned = Py::clone(&*obj); - OBJECT_CLONED.set(); - cloned - }); - - REFCNT_CHECKED.wait(); - - println!("6. The main thread has acquired the GIL again and processed the pool."); - - // Total reference count should be one higher - assert_eq!(obj.get_refcnt(py), count + 1); - - // Clone dropped - drop(cloned); - // Ensure refcount of the arc is 1 - handle.join().unwrap(); - - // Overall count is now back to the original, and should be no pending change - assert_eq!(Arc::try_unwrap(obj).unwrap().get_refcnt(py), count); - }); - } - - #[test] - #[cfg(all(not(feature = "disable-reference-pool"), not(target_arch = "wasm32")))] // We are building wasm Python with pthreads disabled - fn test_clone_in_other_thread() { - use crate::Py; - use std::{sync::Arc, thread}; - - // Some events for synchronizing - static OBJECT_CLONED: Event = Event::new(); - - let (obj, count, ptr) = Python::with_gil(|py| { - let obj = Arc::new(get_object(py)); - let count = obj.get_refcnt(py); - let thread_obj = Arc::clone(&obj); - - // Start a thread which does not have the GIL, and clone it - let t = thread::spawn(move || { - // Cloning without GIL should not update reference count - #[allow(clippy::redundant_clone)] - let _ = Py::clone(&*thread_obj); - OBJECT_CLONED.set(); - }); - - OBJECT_CLONED.wait(); - assert_eq!(count, obj.get_refcnt(py)); - - t.join().unwrap(); - let ptr = NonNull::new(obj.as_ptr()).unwrap(); - - // The pointer should appear once in the incref pool, and once in the - // decref pool (for the clone being created and also dropped) - assert!(POOL.pointer_ops.lock().0.contains(&ptr)); - assert!(POOL.pointer_ops.lock().1.contains(&ptr)); - - (obj, count, ptr) - }); - - Python::with_gil(|py| { - // Acquiring the gil clears the pool - assert!(!POOL.pointer_ops.lock().0.contains(&ptr)); - assert!(!POOL.pointer_ops.lock().1.contains(&ptr)); - - // Overall count is still unchanged - assert_eq!(count, obj.get_refcnt(py)); - }); - } - #[test] #[cfg(not(feature = "disable-reference-pool"))] fn test_update_counts_does_not_deadlock() { diff --git a/src/instance.rs b/src/instance.rs index 6d445130ab3..8140c8da618 100644 --- a/src/instance.rs +++ b/src/instance.rs @@ -1810,6 +1810,7 @@ where /// If the GIL is held this increments `self`'s reference count. /// Otherwise this registers the [`Py`]`` instance to have its reference count /// incremented the next time PyO3 acquires the GIL. +#[cfg(feature = "py-clone")] impl Clone for Py { #[track_caller] fn clone(&self) -> Self { diff --git a/src/pybacked.rs b/src/pybacked.rs index e0bacb86144..504e62e7866 100644 --- a/src/pybacked.rs +++ b/src/pybacked.rs @@ -13,7 +13,7 @@ use crate::{ /// A wrapper around `str` where the storage is owned by a Python `bytes` or `str` object. /// /// This type gives access to the underlying data via a `Deref` implementation. -#[derive(Clone)] +#[cfg_attr(feature = "py-clone", derive(Clone))] pub struct PyBackedStr { #[allow(dead_code)] // only held so that the storage is not dropped storage: Py, @@ -88,7 +88,7 @@ impl FromPyObject<'_> for PyBackedStr { /// A wrapper around `[u8]` where the storage is either owned by a Python `bytes` object, or a Rust `Box<[u8]>`. /// /// This type gives access to the underlying data via a `Deref` implementation. -#[derive(Clone)] +#[cfg_attr(feature = "py-clone", derive(Clone))] pub struct PyBackedBytes { #[allow(dead_code)] // only held so that the storage is not dropped storage: PyBackedBytesStorage, @@ -96,7 +96,7 @@ pub struct PyBackedBytes { } #[allow(dead_code)] -#[derive(Clone)] +#[cfg_attr(feature = "py-clone", derive(Clone))] enum PyBackedBytesStorage { Python(Py), Rust(Arc<[u8]>),