diff --git a/Cargo.toml b/Cargo.toml
index 178aaddca14..815f322505f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -106,6 +106,9 @@ auto-initialize = []
# Allows use of the deprecated "GIL Refs" APIs.
gil-refs = []
+# Enables `Clone`ing references to Python objects which aborts if the GIL is not held.
+py-clone = []
+
# Disables global reference pool which allows `Drop`ing references to Python objects without the GIL being held.
disable-reference-pool = []
diff --git a/guide/src/migration.md b/guide/src/migration.md
index d855f69d396..05a211ef49b 100644
--- a/guide/src/migration.md
+++ b/guide/src/migration.md
@@ -3,6 +3,17 @@
This guide can help you upgrade code through breaking changes from one PyO3 version to the next.
For a detailed list of all changes, see the [CHANGELOG](changelog.md).
+## from 0.21.* to 0.22
+### Introduction of the `py-clone` feature
+
+Click to expand
+If you rely on `impl Clone for Py` to fulfil trait requirements imposed by existing Rust code written without PyO3-based code in mind, the newly introduced feature `py-clone` must be enabled.
+
+However, take care to note that the behaviour is different from previous versions. If `Clone` was called without the GIL being held, we tried to delay the application of these reference count increments until PyO3-based code would re-acquire it. This turned out to be impossible to implement in a sound manner and hence was removed. Now, if `Clone` is called without the GIL being held, we panic instead for which calling code might not be prepared.
+
+Related to this, we also added a `disable-reference-pool` feature which removes the infrastructure necessary to apply delayed reference count decrements implied by `impl Drop for Py`. They do not appear to be a soundness hazard as they should lead to memory leaks in the worst case. However, the global synchronization adds significant overhead to cross the Python-Rust boundary. Enabling this feature will remove these costs and make the `Drop` implementation abort the process if called without the GIL being held instead.
+
+
## from 0.20.* to 0.21
Click to expand
diff --git a/guide/src/performance.md b/guide/src/performance.md
index 5d60ad69d08..d2092e04617 100644
--- a/guide/src/performance.md
+++ b/guide/src/performance.md
@@ -99,9 +99,9 @@ impl PartialEq for FooBound<'_> {
## Disable the global reference pool
-PyO3 uses global mutable state to keep track of deferred reference count updates implied by `impl Clone for Py` and `impl Drop for Py` being called without the currently GIL being held. The necessary synchronization to obtain and apply these reference count updates when PyO3-based code next acquires the GIL is somewhat expensive and can become a significant part of the cost of crossing the Python-Rust boundary.
+PyO3 uses global mutable state to keep track of deferred reference count updates implied by `impl Drop for Py` being called without the currently GIL being held. The necessary synchronization to obtain and apply these reference count updates when PyO3-based code next acquires the GIL is somewhat expensive and can become a significant part of the cost of crossing the Python-Rust boundary.
-This functionality can be avoided by adding the `disable-reference-pool` feature. This removes the global reference pool and the associated costs completely. However, it does _not_ remove the `Clone` and `Drop` implementations for `Py` which are often necessary to fulfil trait requirements imposed by existing Rust code written without PyO3-based code in mind. To stay compatible with the wider Rust ecosystem in these cases, we keep the implementations but panic when `Clone` is called without the GIL being held and abort when `Drop` is called without the GIL being held.
+This functionality can be avoided by adding the `disable-reference-pool` feature. This removes the global reference pool and the associated costs completely. However, it does _not_ remove the `Drop` implementation for `Py` which is necessary to interoperate with existing Rust code written without PyO3-based code in mind. To stay compatible with the wider Rust ecosystem in these cases, we keep the implementation but abort when `Drop` is called without the GIL being held.
This limitation is important to keep in mind when this setting is used, especially when embedding Python code into a Rust application as it is quite easy to accidentally drop a `Py` returned from `Python::with_gil` without making sure to re-acquire the GIL beforehand. For example, the following code
diff --git a/newsfragments/4095.changed.md b/newsfragments/4095.changed.md
new file mode 100644
index 00000000000..7f155ae04ef
--- /dev/null
+++ b/newsfragments/4095.changed.md
@@ -0,0 +1 @@
+`Clone`ing pointers into the Python heap has been moved behind the `py-clone` feature, as it must panic without the GIL being held as a soundness fix.
diff --git a/src/err/err_state.rs b/src/err/err_state.rs
index 9f85296f661..14345b275c9 100644
--- a/src/err/err_state.rs
+++ b/src/err/err_state.rs
@@ -5,7 +5,6 @@ use crate::{
Bound, IntoPy, Py, PyAny, PyObject, PyTypeInfo, Python,
};
-#[derive(Clone)]
pub(crate) struct PyErrStateNormalized {
#[cfg(not(Py_3_12))]
ptype: Py,
@@ -63,6 +62,19 @@ impl PyErrStateNormalized {
ptraceback: Py::from_owned_ptr_or_opt(py, ptraceback),
}
}
+
+ pub fn clone_ref(&self, py: Python<'_>) -> Self {
+ Self {
+ #[cfg(not(Py_3_12))]
+ ptype: self.ptype.clone_ref(py),
+ pvalue: self.pvalue.clone_ref(py),
+ #[cfg(not(Py_3_12))]
+ ptraceback: self
+ .ptraceback
+ .as_ref()
+ .map(|ptraceback| ptraceback.clone_ref(py)),
+ }
+ }
}
pub(crate) struct PyErrStateLazyFnOutput {
diff --git a/src/err/mod.rs b/src/err/mod.rs
index a61c8c62d31..7ef57c4ea7b 100644
--- a/src/err/mod.rs
+++ b/src/err/mod.rs
@@ -856,7 +856,7 @@ impl PyErr {
/// ```
#[inline]
pub fn clone_ref(&self, py: Python<'_>) -> PyErr {
- PyErr::from_state(PyErrState::Normalized(self.normalized(py).clone()))
+ PyErr::from_state(PyErrState::Normalized(self.normalized(py).clone_ref(py)))
}
/// Return the cause (either an exception instance, or None, set by `raise ... from ...`)
diff --git a/src/gil.rs b/src/gil.rs
index 36592defdec..13c15324207 100644
--- a/src/gil.rs
+++ b/src/gil.rs
@@ -253,41 +253,29 @@ type PyObjVec = Vec>;
#[cfg(not(feature = "disable-reference-pool"))]
/// Thread-safe storage for objects which were inc_ref / dec_ref while the GIL was not held.
struct ReferencePool {
- // .0 is INCREFs, .1 is DECREFs
- pointer_ops: Mutex<(PyObjVec, PyObjVec)>,
+ pending_decrefs: Mutex,
}
#[cfg(not(feature = "disable-reference-pool"))]
impl ReferencePool {
const fn new() -> Self {
Self {
- pointer_ops: const_mutex((Vec::new(), Vec::new())),
+ pending_decrefs: const_mutex(Vec::new()),
}
}
- fn register_incref(&self, obj: NonNull) {
- self.pointer_ops.lock().0.push(obj);
- }
-
fn register_decref(&self, obj: NonNull) {
- self.pointer_ops.lock().1.push(obj);
+ self.pending_decrefs.lock().push(obj);
}
fn update_counts(&self, _py: Python<'_>) {
- let mut ops = self.pointer_ops.lock();
- if ops.0.is_empty() && ops.1.is_empty() {
+ let mut pending_decrefs = self.pending_decrefs.lock();
+ if pending_decrefs.is_empty() {
return;
}
- let (increfs, decrefs) = mem::take(&mut *ops);
- drop(ops);
-
- // Always increase reference counts first - as otherwise objects which have a
- // nonzero total reference count might be incorrectly dropped by Python during
- // this update.
- for ptr in increfs {
- unsafe { ffi::Py_INCREF(ptr.as_ptr()) };
- }
+ let decrefs = mem::take(&mut *pending_decrefs);
+ drop(pending_decrefs);
for ptr in decrefs {
unsafe { ffi::Py_DECREF(ptr.as_ptr()) };
@@ -457,14 +445,12 @@ impl Drop for GILPool {
///
/// # Safety
/// The object must be an owned Python reference.
+#[cfg(feature = "py-clone")]
#[track_caller]
pub unsafe fn register_incref(obj: NonNull) {
if gil_is_acquired() {
ffi::Py_INCREF(obj.as_ptr())
} else {
- #[cfg(not(feature = "disable-reference-pool"))]
- POOL.register_incref(obj);
- #[cfg(feature = "disable-reference-pool")]
panic!("Cannot clone pointer into Python heap without the GIL being held.");
}
}
@@ -562,29 +548,18 @@ mod tests {
len
}
- #[cfg(not(feature = "disable-reference-pool"))]
- fn pool_inc_refs_does_not_contain(obj: &PyObject) -> bool {
- !POOL
- .pointer_ops
- .lock()
- .0
- .contains(&unsafe { NonNull::new_unchecked(obj.as_ptr()) })
- }
-
#[cfg(not(feature = "disable-reference-pool"))]
fn pool_dec_refs_does_not_contain(obj: &PyObject) -> bool {
!POOL
- .pointer_ops
+ .pending_decrefs
.lock()
- .1
.contains(&unsafe { NonNull::new_unchecked(obj.as_ptr()) })
}
#[cfg(all(not(feature = "disable-reference-pool"), not(target_arch = "wasm32")))]
fn pool_dec_refs_contains(obj: &PyObject) -> bool {
- POOL.pointer_ops
+ POOL.pending_decrefs
.lock()
- .1
.contains(&unsafe { NonNull::new_unchecked(obj.as_ptr()) })
}
@@ -658,8 +633,6 @@ mod tests {
assert_eq!(obj.get_refcnt(py), 2);
#[cfg(not(feature = "disable-reference-pool"))]
- assert!(pool_inc_refs_does_not_contain(&obj));
- #[cfg(not(feature = "disable-reference-pool"))]
assert!(pool_dec_refs_does_not_contain(&obj));
// With the GIL held, reference count will be decreased immediately.
@@ -667,8 +640,6 @@ mod tests {
assert_eq!(obj.get_refcnt(py), 1);
#[cfg(not(feature = "disable-reference-pool"))]
- assert!(pool_inc_refs_does_not_contain(&obj));
- #[cfg(not(feature = "disable-reference-pool"))]
assert!(pool_dec_refs_does_not_contain(&obj));
});
}
@@ -682,7 +653,6 @@ mod tests {
let reference = obj.clone_ref(py);
assert_eq!(obj.get_refcnt(py), 2);
- assert!(pool_inc_refs_does_not_contain(&obj));
assert!(pool_dec_refs_does_not_contain(&obj));
// Drop reference in a separate thread which doesn't have the GIL.
@@ -691,7 +661,6 @@ mod tests {
// The reference count should not have changed (the GIL has always
// been held by this thread), it is remembered to release later.
assert_eq!(obj.get_refcnt(py), 2);
- assert!(pool_inc_refs_does_not_contain(&obj));
assert!(pool_dec_refs_contains(&obj));
obj
});
@@ -699,9 +668,7 @@ mod tests {
// Next time the GIL is acquired, the reference is released
Python::with_gil(|py| {
assert_eq!(obj.get_refcnt(py), 1);
- let non_null = unsafe { NonNull::new_unchecked(obj.as_ptr()) };
- assert!(!POOL.pointer_ops.lock().0.contains(&non_null));
- assert!(!POOL.pointer_ops.lock().1.contains(&non_null));
+ assert!(pool_dec_refs_does_not_contain(&obj));
});
}
@@ -758,19 +725,14 @@ mod tests {
}
#[test]
- #[cfg_attr(feature = "disable-reference-pool", should_panic)]
+ #[should_panic]
fn test_allow_threads_updates_refcounts() {
Python::with_gil(|py| {
// Make a simple object with 1 reference
let obj = get_object(py);
assert!(obj.get_refcnt(py) == 1);
- // Clone the object without the GIL to use internal tracking
- let escaped_ref = py.allow_threads(|| obj.clone());
- // But after the block the refcounts are updated
- assert!(obj.get_refcnt(py) == 2);
- drop(escaped_ref);
- assert!(obj.get_refcnt(py) == 1);
- drop(obj);
+ // Clone the object without the GIL which should panic
+ py.allow_threads(|| obj.clone());
});
}
@@ -826,117 +788,6 @@ mod tests {
}
}
- #[test]
- #[cfg(all(not(feature = "disable-reference-pool"), not(target_arch = "wasm32")))] // We are building wasm Python with pthreads disabled
- fn test_clone_without_gil() {
- use crate::{Py, PyAny};
- use std::{sync::Arc, thread};
-
- // Some events for synchronizing
- static GIL_ACQUIRED: Event = Event::new();
- static OBJECT_CLONED: Event = Event::new();
- static REFCNT_CHECKED: Event = Event::new();
-
- Python::with_gil(|py| {
- let obj: Arc> = Arc::new(get_object(py));
- let thread_obj = Arc::clone(&obj);
-
- let count = obj.get_refcnt(py);
- println!(
- "1: The object has been created and its reference count is {}",
- count
- );
-
- let handle = thread::spawn(move || {
- Python::with_gil(move |py| {
- println!("3. The GIL has been acquired on another thread.");
- GIL_ACQUIRED.set();
-
- // Wait while the main thread registers obj in POOL
- OBJECT_CLONED.wait();
- println!("5. Checking refcnt");
- assert_eq!(thread_obj.get_refcnt(py), count);
-
- REFCNT_CHECKED.set();
- })
- });
-
- let cloned = py.allow_threads(|| {
- println!("2. The GIL has been released.");
-
- // Wait until the GIL has been acquired on the thread.
- GIL_ACQUIRED.wait();
-
- println!("4. The other thread is now hogging the GIL, we clone without it held");
- // Cloning without GIL should not update reference count
- let cloned = Py::clone(&*obj);
- OBJECT_CLONED.set();
- cloned
- });
-
- REFCNT_CHECKED.wait();
-
- println!("6. The main thread has acquired the GIL again and processed the pool.");
-
- // Total reference count should be one higher
- assert_eq!(obj.get_refcnt(py), count + 1);
-
- // Clone dropped
- drop(cloned);
- // Ensure refcount of the arc is 1
- handle.join().unwrap();
-
- // Overall count is now back to the original, and should be no pending change
- assert_eq!(Arc::try_unwrap(obj).unwrap().get_refcnt(py), count);
- });
- }
-
- #[test]
- #[cfg(all(not(feature = "disable-reference-pool"), not(target_arch = "wasm32")))] // We are building wasm Python with pthreads disabled
- fn test_clone_in_other_thread() {
- use crate::Py;
- use std::{sync::Arc, thread};
-
- // Some events for synchronizing
- static OBJECT_CLONED: Event = Event::new();
-
- let (obj, count, ptr) = Python::with_gil(|py| {
- let obj = Arc::new(get_object(py));
- let count = obj.get_refcnt(py);
- let thread_obj = Arc::clone(&obj);
-
- // Start a thread which does not have the GIL, and clone it
- let t = thread::spawn(move || {
- // Cloning without GIL should not update reference count
- #[allow(clippy::redundant_clone)]
- let _ = Py::clone(&*thread_obj);
- OBJECT_CLONED.set();
- });
-
- OBJECT_CLONED.wait();
- assert_eq!(count, obj.get_refcnt(py));
-
- t.join().unwrap();
- let ptr = NonNull::new(obj.as_ptr()).unwrap();
-
- // The pointer should appear once in the incref pool, and once in the
- // decref pool (for the clone being created and also dropped)
- assert!(POOL.pointer_ops.lock().0.contains(&ptr));
- assert!(POOL.pointer_ops.lock().1.contains(&ptr));
-
- (obj, count, ptr)
- });
-
- Python::with_gil(|py| {
- // Acquiring the gil clears the pool
- assert!(!POOL.pointer_ops.lock().0.contains(&ptr));
- assert!(!POOL.pointer_ops.lock().1.contains(&ptr));
-
- // Overall count is still unchanged
- assert_eq!(count, obj.get_refcnt(py));
- });
- }
-
#[test]
#[cfg(not(feature = "disable-reference-pool"))]
fn test_update_counts_does_not_deadlock() {
diff --git a/src/instance.rs b/src/instance.rs
index 6d445130ab3..8140c8da618 100644
--- a/src/instance.rs
+++ b/src/instance.rs
@@ -1810,6 +1810,7 @@ where
/// If the GIL is held this increments `self`'s reference count.
/// Otherwise this registers the [`Py`]`` instance to have its reference count
/// incremented the next time PyO3 acquires the GIL.
+#[cfg(feature = "py-clone")]
impl Clone for Py {
#[track_caller]
fn clone(&self) -> Self {
diff --git a/src/pybacked.rs b/src/pybacked.rs
index e0bacb86144..504e62e7866 100644
--- a/src/pybacked.rs
+++ b/src/pybacked.rs
@@ -13,7 +13,7 @@ use crate::{
/// A wrapper around `str` where the storage is owned by a Python `bytes` or `str` object.
///
/// This type gives access to the underlying data via a `Deref` implementation.
-#[derive(Clone)]
+#[cfg_attr(feature = "py-clone", derive(Clone))]
pub struct PyBackedStr {
#[allow(dead_code)] // only held so that the storage is not dropped
storage: Py,
@@ -88,7 +88,7 @@ impl FromPyObject<'_> for PyBackedStr {
/// A wrapper around `[u8]` where the storage is either owned by a Python `bytes` object, or a Rust `Box<[u8]>`.
///
/// This type gives access to the underlying data via a `Deref` implementation.
-#[derive(Clone)]
+#[cfg_attr(feature = "py-clone", derive(Clone))]
pub struct PyBackedBytes {
#[allow(dead_code)] // only held so that the storage is not dropped
storage: PyBackedBytesStorage,
@@ -96,7 +96,7 @@ pub struct PyBackedBytes {
}
#[allow(dead_code)]
-#[derive(Clone)]
+#[cfg_attr(feature = "py-clone", derive(Clone))]
enum PyBackedBytesStorage {
Python(Py),
Rust(Arc<[u8]>),