Skip to content

Commit

Permalink
Merge pull request #2269 from PyO3/intern-macro
Browse files Browse the repository at this point in the history
RFC: Add intern! macro which can be used to amortize the cost of creating Python objects by storing them inside a GILOnceCell.
  • Loading branch information
davidhewitt committed Apr 4, 2022
2 parents 981eb2d + 89577a2 commit d3ac71a
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added methods on `InterpreterConfig` to run Python scripts using the configured executable. [#2092](https://github.com/PyO3/pyo3/pull/2092)
- Added FFI definitions for `PyType_FromModuleAndSpec`, `PyType_GetModule`, `PyType_GetModuleState` and `PyModule_AddType`. [#2250](https://github.com/PyO3/pyo3/pull/2250)
- Add `PyString::intern` to enable usage of the Python's built-in string interning. [#2268](https://github.com/PyO3/pyo3/pull/2268)
- Add `intern!` macro which can be used to amortize the cost of creating Python strings by storing them inside a `GILOnceCell`. [#2269](https://github.com/PyO3/pyo3/pull/2269)

### Changed

Expand Down
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ harness = false
name = "bench_tuple"
harness = false

[[bench]]
name = "bench_intern"
harness = false

[workspace]
members = [
"pyo3-ffi",
Expand Down
33 changes: 33 additions & 0 deletions benches/bench_intern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use criterion::{criterion_group, criterion_main, Bencher, Criterion};

use pyo3::prelude::*;

use pyo3::{intern, prepare_freethreaded_python};

fn getattr_direct(b: &mut Bencher<'_>) {
prepare_freethreaded_python();

Python::with_gil(|py| {
let sys = py.import("sys").unwrap();

b.iter(|| sys.getattr("version").unwrap());
});
}

fn getattr_intern(b: &mut Bencher<'_>) {
prepare_freethreaded_python();

Python::with_gil(|py| {
let sys = py.import("sys").unwrap();

b.iter(|| sys.getattr(intern!(py, "version")).unwrap());
});
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("getattr_direct", getattr_direct);
c.bench_function("getattr_intern", getattr_intern);
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ pub mod impl_;
mod instance;
pub mod marker;
pub mod marshal;
#[macro_use]
pub mod once_cell;
pub mod panic;
pub mod prelude;
Expand Down
89 changes: 87 additions & 2 deletions src/once_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ impl<T> GILOnceCell<T> {
}

/// Get a reference to the contained value, or `None` if the cell has not yet been written.
#[inline]
pub fn get(&self, _py: Python<'_>) -> Option<&T> {
// Safe because if the cell has not yet been written, None is returned.
unsafe { &*self.0.get() }.as_ref()
Expand All @@ -61,15 +62,23 @@ impl<T> GILOnceCell<T> {
/// exactly once, even if multiple threads attempt to call `get_or_init`
/// 4) if f() can panic but still does not release the GIL, it may be called multiple times,
/// but it is guaranteed that f() will never be called concurrently
#[inline]
pub fn get_or_init<F>(&self, py: Python<'_>, f: F) -> &T
where
F: FnOnce() -> T,
{
let inner = unsafe { &*self.0.get() }.as_ref();
if let Some(value) = inner {
if let Some(value) = self.get(py) {
return value;
}

self.init(py, f)
}

#[cold]
fn init<F>(&self, py: Python<'_>, f: F) -> &T
where
F: FnOnce() -> T,
{
// Note that f() could temporarily release the GIL, so it's possible that another thread
// writes to this GILOnceCell before f() finishes. That's fine; we'll just have to discard
// the value computed here and accept a bit of wasted computation.
Expand Down Expand Up @@ -101,3 +110,79 @@ impl<T> GILOnceCell<T> {
Ok(())
}
}

/// Interns `text` as a Python string and stores a reference to it in static storage.
///
/// A reference to the same Python string is returned on each invocation.
///
/// # Example: Using `intern!` to avoid needlessly recreating the same Python string
///
/// ```
/// use pyo3::intern;
/// # use pyo3::{pyfunction, types::PyDict, wrap_pyfunction, PyResult, Python};
///
/// #[pyfunction]
/// fn create_dict(py: Python<'_>) -> PyResult<&PyDict> {
/// let dict = PyDict::new(py);
/// // 👇 A new `PyString` is created
/// // for every call of this function.
/// dict.set_item("foo", 42)?;
/// Ok(dict)
/// }
///
/// #[pyfunction]
/// fn create_dict_faster(py: Python<'_>) -> PyResult<&PyDict> {
/// let dict = PyDict::new(py);
/// // 👇 A `PyString` is created once and reused
/// // for the lifetime of the program.
/// dict.set_item(intern!(py, "foo"), 42)?;
/// Ok(dict)
/// }
/// #
/// # Python::with_gil(|py| {
/// # let fun = wrap_pyfunction!(create_dict_faster, py).unwrap();
/// # let dict = fun.call0().unwrap();
/// # assert!(dict.contains("foo").unwrap());
/// # });
/// ```
#[macro_export]
macro_rules! intern {
($py: expr, $text: expr) => {{
fn isolate_from_dyn_env(py: $crate::Python<'_>) -> &$crate::types::PyString {
static INTERNED: $crate::once_cell::GILOnceCell<$crate::Py<$crate::types::PyString>> =
$crate::once_cell::GILOnceCell::new();

INTERNED
.get_or_init(py, || {
$crate::conversion::IntoPy::into_py(
$crate::types::PyString::intern(py, $text),
py,
)
})
.as_ref(py)
}

isolate_from_dyn_env($py)
}};
}

#[cfg(test)]
mod tests {
use super::*;

use crate::types::PyDict;

#[test]
fn test_intern() {
Python::with_gil(|py| {
let foo1 = "foo";
let foo2 = intern!(py, "foo");
let foo3 = intern!(py, stringify!(foo));

let dict = PyDict::new(py);
dict.set_item(foo1, 42_usize).unwrap();
assert!(dict.contains(foo2).unwrap());
assert_eq!(dict.get_item(foo3).unwrap().extract::<usize>().unwrap(), 42);
});
}
}
6 changes: 6 additions & 0 deletions src/test_hygiene/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,9 @@ enum Derive4 {

crate::create_exception!(mymodule, CustomError, crate::exceptions::PyException);
crate::import_exception!(socket, gaierror);

#[allow(dead_code)]
fn intern(py: crate::Python<'_>) {
let _foo = crate::intern!(py, "foo");
let _bar = crate::intern!(py, stringify!(bar));
}

0 comments on commit d3ac71a

Please sign in to comment.