From a62a0a34a772e6fa899a2d516b4c439295e921ab Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 11 Mar 2024 17:27:45 +0100 Subject: [PATCH 1/5] Draft: Allows to introspect Python modules from cdylib --- .github/workflows/ci.yml | 44 ++++ Cargo.toml | 3 +- newsfragments/3977.added.md | 1 + noxfile.py | 16 +- pyo3-introspection/Cargo.toml | 18 ++ pyo3-introspection/LICENSE-APACHE | 1 + pyo3-introspection/LICENSE-MIT | 1 + pyo3-introspection/src/introspection.rs | 259 +++++++++++++++++++++++ pyo3-introspection/src/lib.rs | 8 + pyo3-introspection/src/model.rs | 17 ++ pyo3-introspection/src/stubs.rs | 52 +++++ pyo3-introspection/tests/test.rs | 77 +++++++ pyo3-macros-backend/Cargo.toml | 1 + pyo3-macros-backend/src/introspection.rs | 123 +++++++++++ pyo3-macros-backend/src/lib.rs | 2 + pyo3-macros-backend/src/module.rs | 14 ++ pyo3-macros-backend/src/pyclass.rs | 21 +- pyo3-macros-backend/src/pyfunction.rs | 8 +- pyo3-macros/Cargo.toml | 1 + pytests/Cargo.toml | 2 +- pytests/README.md | 3 + pytests/src/lib.rs | 73 ++++--- pytests/src/pyclasses.rs | 11 +- pytests/stubs/__init__.pyi | 0 pytests/stubs/pyclasses.pyi | 4 + pytests/stubs/pyfunctions.pyi | 0 src/impl_.rs | 2 + src/impl_/concat.rs | 29 +++ src/types/mod.rs | 4 + 29 files changed, 746 insertions(+), 49 deletions(-) create mode 100644 newsfragments/3977.added.md create mode 100644 pyo3-introspection/Cargo.toml create mode 120000 pyo3-introspection/LICENSE-APACHE create mode 120000 pyo3-introspection/LICENSE-MIT create mode 100644 pyo3-introspection/src/introspection.rs create mode 100644 pyo3-introspection/src/lib.rs create mode 100644 pyo3-introspection/src/model.rs create mode 100644 pyo3-introspection/src/stubs.rs create mode 100644 pyo3-introspection/tests/test.rs create mode 100644 pyo3-macros-backend/src/introspection.rs create mode 100644 pytests/stubs/__init__.pyi create mode 100644 pytests/stubs/pyclasses.pyi create mode 100644 pytests/stubs/pyfunctions.pyi create mode 100644 src/impl_/concat.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8379232b7fb..e89340a10e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -588,6 +588,49 @@ jobs: with: path: ~/.cache/cargo-xwin key: cargo-xwin-cache + + test-introspection: + needs: [fmt] + strategy: + matrix: + platform: [ + { + os: "macos-latest", + python-architecture: "arm64", + rust-target: "aarch64-apple-darwin", + }, + { + os: "ubuntu-latest", + python-architecture: "x64", + rust-target: "x86_64-unknown-linux-gnu", + }, + { + os: "windows-latest", + python-architecture: "x64", + rust-target: "x86_64-pc-windows-msvc", + }, + { + os: "windows-latest", + python-architecture: "x86", + rust-target: "i686-pc-windows-msvc", + }, + ] + runs-on: ${{ matrix.platform.os }} + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.platform.rust-target }} + components: rust-src + - uses: actions/setup-python@v5 + with: + architecture: ${{ matrix.platform.python-architecture }} + - uses: Swatinem/rust-cache@v2 + with: + save-if: ${{ github.event_name != 'merge_group' }} + - run: python -m pip install --upgrade pip && pip install nox + - run: nox -s test-introspection + conclusion: needs: - fmt @@ -605,6 +648,7 @@ jobs: - check-feature-powerset - test-cross-compilation - test-cross-compilation-windows + - test-introspection if: always() runs-on: ubuntu-latest steps: diff --git a/Cargo.toml b/Cargo.toml index 5d3888dfda1..1c23f95e865 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,7 +73,7 @@ experimental-async = ["macros", "pyo3-macros/experimental-async"] # Enables pyo3::inspect module and additional type information on FromPyObject # and IntoPy traits -experimental-inspect = [] +experimental-inspect = ["pyo3-macros/experimental-inspect"] # Enables macros: #[pyclass], #[pymodule], #[pyfunction] etc. macros = ["pyo3-macros", "indoc", "unindent"] @@ -141,6 +141,7 @@ members = [ "pyo3-build-config", "pyo3-macros", "pyo3-macros-backend", + "pyo3-introspection", "pytests", "examples", ] diff --git a/newsfragments/3977.added.md b/newsfragments/3977.added.md new file mode 100644 index 00000000000..58f116cd837 --- /dev/null +++ b/newsfragments/3977.added.md @@ -0,0 +1 @@ +Basic introspection and stub generation based on metadata embedded in produced cdylib. \ No newline at end of file diff --git a/noxfile.py b/noxfile.py index 96bd587bee8..c72dccca9ea 100644 --- a/noxfile.py +++ b/noxfile.py @@ -732,6 +732,19 @@ def update_ui_tests(session: nox.Session): _run_cargo(session, *command, "--features=abi3,full", env=env) +@nox.session(name="test-introspection") +def test_introspection(session: nox.Session): + session.run_always("python", "-m", "pip", "install", "-v", "./pytests") + # We look for the built library + lib_file = None + for file in Path(session.virtualenv.location).rglob("pyo3_pytests.*"): + if file.is_file(): + lib_file = str(file.resolve()) + _run_cargo_test( + session, package="pyo3-introspection", env={"PYO3_PYTEST_LIB_PATH": lib_file} + ) + + def _build_docs_for_ffi_check(session: nox.Session) -> None: # pyo3-ffi-check needs to scrape docs of pyo3-ffi env = os.environ.copy() @@ -848,6 +861,7 @@ def _run_cargo_test( *, package: Optional[str] = None, features: Optional[str] = None, + env: Optional[Dict[str, str]] = None, ) -> None: command = ["cargo"] if "careful" in session.posargs: @@ -860,7 +874,7 @@ def _run_cargo_test( if features: command.append(f"--features={features}") - _run(session, *command, external=True) + _run(session, *command, external=True, env=env or {}) def _run_cargo_publish(session: nox.Session, *, package: str) -> None: diff --git a/pyo3-introspection/Cargo.toml b/pyo3-introspection/Cargo.toml new file mode 100644 index 00000000000..b2849a19245 --- /dev/null +++ b/pyo3-introspection/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "pyo3-introspection" +version = "0.22.0-dev" +description = "Introspect dynamic libraries built with PyO3 to get metadata about the exported Python types" +authors = ["PyO3 Project and Contributors "] +homepage = "https://github.com/pyo3/pyo3" +repository = "https://github.com/pyo3/pyo3" +license = "MIT OR Apache-2.0" +edition = "2021" + +[dependencies] +anyhow = "1" +goblin = "0.8.0" +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +[lints] +workspace = true diff --git a/pyo3-introspection/LICENSE-APACHE b/pyo3-introspection/LICENSE-APACHE new file mode 120000 index 00000000000..965b606f331 --- /dev/null +++ b/pyo3-introspection/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/pyo3-introspection/LICENSE-MIT b/pyo3-introspection/LICENSE-MIT new file mode 120000 index 00000000000..76219eb72e8 --- /dev/null +++ b/pyo3-introspection/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/pyo3-introspection/src/introspection.rs b/pyo3-introspection/src/introspection.rs new file mode 100644 index 00000000000..3097f273c77 --- /dev/null +++ b/pyo3-introspection/src/introspection.rs @@ -0,0 +1,259 @@ +use crate::model::{Class, Function, Module}; +use anyhow::{bail, Context, Result}; +use goblin::elf::Elf; +use goblin::mach::{Mach, MachO, SingleArch}; +use goblin::pe::PE; +use goblin::Object; +use serde::Deserialize; +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +/// Introspect a cdylib built with PyO3 and returns the definition of a Python module. +/// +/// This function currently supports the ELF (most *nix including Linux), Match-O (macOS) and PE (Windows) formats. +pub fn introspect_cdylib(library_path: impl AsRef, main_module_name: &str) -> Result { + let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?; + parse_chunks(&chunks, main_module_name) +} + +/// Parses the introspection chunks found in the binary +fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result { + let chunks_by_id = chunks + .iter() + .map(|c| { + ( + match c { + Chunk::Module { id, .. } => id, + Chunk::Class { id, .. } => id, + Chunk::Function { id, .. } => id, + }, + c, + ) + }) + .collect::>(); + // We look for the root chunk + for chunk in chunks { + if let Chunk::Module { + name, + members, + id: _, + } = chunk + { + if name == main_module_name { + return parse_module(name, members, &chunks_by_id); + } + } + } + bail!("No module named {main_module_name} found") +} + +fn parse_module( + name: &str, + members: &[String], + chunks_by_id: &HashMap<&String, &Chunk>, +) -> Result { + let mut modules = Vec::new(); + let mut classes = Vec::new(); + let mut functions = Vec::new(); + for member in members { + if let Some(chunk) = chunks_by_id.get(member) { + match chunk { + Chunk::Module { + name, + members, + id: _, + } => { + modules.push(parse_module(name, members, chunks_by_id)?); + } + Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }), + Chunk::Function { name, id: _ } => functions.push(Function { name: name.into() }), + } + } + } + Ok(Module { + name: name.into(), + modules, + classes, + functions, + }) +} + +fn find_introspection_chunks_in_binary_object(path: &Path) -> Result> { + let library_content = + fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?; + match Object::parse(&library_content) + .context("The built library is not valid or not supported by our binary parser")? + { + Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content), + Object::Mach(Mach::Binary(matcho)) => { + find_introspection_chunks_in_matcho(&matcho, &library_content) + } + Object::Mach(Mach::Fat(multi_arch)) => { + for arch in &multi_arch { + match arch? { + SingleArch::MachO(matcho) => { + return find_introspection_chunks_in_matcho(&matcho, &library_content) + } + SingleArch::Archive(_) => (), + } + } + bail!("No Match-o chunk found in the multi-arch Match-o container") + } + Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content), + _ => { + bail!("Only ELF, Match-o and PE containers can be introspected") + } + } +} + +fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result> { + let pyo3_data_section_header = elf + .section_headers + .iter() + .find(|section| elf.shdr_strtab.get_at(section.sh_name).unwrap_or_default() == ".pyo3i0") + .context("No .pyo3i0 section found")?; + let sh_offset = + usize::try_from(pyo3_data_section_header.sh_offset).context("Section offset overflow")?; + let sh_size = + usize::try_from(pyo3_data_section_header.sh_size).context("Section len overflow")?; + if elf.is_64 { + read_section_with_ptr_and_len_64bits( + &library_content[sh_offset..sh_offset + sh_size], + 0, + library_content, + ) + } else { + read_section_with_ptr_and_len_32bits( + &library_content[sh_offset..sh_offset + sh_size], + 0, + library_content, + ) + } +} + +fn find_introspection_chunks_in_matcho( + matcho: &MachO<'_>, + library_content: &[u8], +) -> Result> { + if !matcho.little_endian { + bail!("Only little endian Match-o binaries are supported"); + } + let text_segment = matcho + .segments + .iter() + .find(|s| s.segname == *b"__TEXT\0\0\0\0\0\0\0\0\0\0") + .context("No __TEXT segment found")?; + let (_, pyo3_data_section) = text_segment + .sections()? + .into_iter() + .find(|s| s.0.sectname == *b"__pyo3i0\0\0\0\0\0\0\0\0") + .context("No __pyo3i0 section found")?; + if matcho.is_64 { + read_section_with_ptr_and_len_64bits(pyo3_data_section, 0, library_content) + } else { + read_section_with_ptr_and_len_32bits(pyo3_data_section, 0, library_content) + } +} + +fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result> { + let rdata_data_section = pe + .sections + .iter() + .find(|section| section.name().unwrap_or_default() == ".rdata") + .context("No .rdata section found")?; + let rdata_shift = pe.image_base + + usize::try_from(rdata_data_section.virtual_address) + .context(".rdata virtual_address overflow")? + - usize::try_from(rdata_data_section.pointer_to_raw_data) + .context(".rdata pointer_to_raw_data overflow")?; + let pyo3_data_section = pe + .sections + .iter() + .find(|section| section.name().unwrap_or_default() == ".pyo3i0") + .context("No .pyo3i0 section found")?; + let pyo3_data = pyo3_data_section + .data(library_content)? + .context("Not able to find the .pyo3i0 section content")?; + if pe.is_64 { + read_section_with_ptr_and_len_64bits(&pyo3_data, rdata_shift, library_content) + } else { + read_section_with_ptr_and_len_32bits(&pyo3_data, rdata_shift, library_content) + } +} + +fn read_section_with_ptr_and_len_32bits( + slice: &[u8], + shift: usize, + full_library_content: &[u8], +) -> Result> { + slice + .chunks_exact(8) + .filter_map(|element| { + let (ptr, len) = element.split_at(4); + let ptr = match usize::try_from(u32::from_le_bytes(ptr.try_into().unwrap())) { + Ok(ptr) => ptr, + Err(e) => return Some(Err(e).context("Pointer overflow")), + }; + let len = match usize::try_from(u32::from_le_bytes(len.try_into().unwrap())) { + Ok(ptr) => ptr, + Err(e) => return Some(Err(e).context("Length overflow")), + }; + if ptr == 0 || len == 0 { + // Workaround for PE + return None; + } + Some( + serde_json::from_slice(&full_library_content[ptr - shift..ptr - shift + len]) + .context("Failed to parse introspection chunk"), + ) + }) + .collect() +} + +fn read_section_with_ptr_and_len_64bits( + slice: &[u8], + shift: usize, + full_library_content: &[u8], +) -> Result> { + slice + .chunks_exact(16) + .filter_map(|element| { + let (ptr, len) = element.split_at(8); + let ptr = match usize::try_from(u64::from_le_bytes(ptr.try_into().unwrap())) { + Ok(ptr) => ptr, + Err(e) => return Some(Err(e).context("Pointer overflow")), + }; + let len = match usize::try_from(u64::from_le_bytes(len.try_into().unwrap())) { + Ok(ptr) => ptr, + Err(e) => return Some(Err(e).context("Length overflow")), + }; + if ptr == 0 || len == 0 { + // Workaround for PE + return None; + } + Some( + serde_json::from_slice(&full_library_content[ptr - shift..ptr - shift + len]) + .context("Failed to parse introspection chunk"), + ) + }) + .collect() +} + +#[derive(Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +enum Chunk { + Module { + id: String, + name: String, + members: Vec, + }, + Class { + id: String, + name: String, + }, + Function { + id: String, + name: String, + }, +} diff --git a/pyo3-introspection/src/lib.rs b/pyo3-introspection/src/lib.rs new file mode 100644 index 00000000000..22aac933e85 --- /dev/null +++ b/pyo3-introspection/src/lib.rs @@ -0,0 +1,8 @@ +//! Utilities to introspect cdylib built using PyO3 and generate [type stubs](https://typing.readthedocs.io/en/latest/source/stubs.html). + +pub use crate::introspection::introspect_cdylib; +pub use crate::stubs::module_stub_files; + +mod introspection; +pub mod model; +mod stubs; diff --git a/pyo3-introspection/src/model.rs b/pyo3-introspection/src/model.rs new file mode 100644 index 00000000000..73a4c27d082 --- /dev/null +++ b/pyo3-introspection/src/model.rs @@ -0,0 +1,17 @@ +#[derive(Debug, Eq, PartialEq, Clone, Hash)] +pub struct Module { + pub name: String, + pub modules: Vec, + pub classes: Vec, + pub functions: Vec, +} + +#[derive(Debug, Eq, PartialEq, Clone, Hash)] +pub struct Class { + pub name: String, +} + +#[derive(Debug, Eq, PartialEq, Clone, Hash)] +pub struct Function { + pub name: String, +} diff --git a/pyo3-introspection/src/stubs.rs b/pyo3-introspection/src/stubs.rs new file mode 100644 index 00000000000..0705911032f --- /dev/null +++ b/pyo3-introspection/src/stubs.rs @@ -0,0 +1,52 @@ +use crate::model::{Class, Function, Module}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +/// Generates the [type stubs](https://typing.readthedocs.io/en/latest/source/stubs.html) of a given module. +/// It returns a map between the file name and the file content. +/// The root module stubs will be in the `__init__.pyi` file and the submodules directory +/// in files with a relevant name. +pub fn module_stub_files(module: &Module) -> HashMap { + let mut output_files = HashMap::new(); + add_module_stub_files(module, Path::new(""), &mut output_files); + output_files +} + +fn add_module_stub_files( + module: &Module, + module_path: &Path, + output_files: &mut HashMap, +) { + output_files.insert(module_path.join("__init__.pyi"), module_stubs(module)); + for submodule in &module.modules { + if submodule.modules.is_empty() { + output_files.insert( + module_path.join(format!("{}.pyi", submodule.name)), + module_stubs(submodule), + ); + } else { + add_module_stub_files(submodule, &module_path.join(&submodule.name), output_files); + } + } +} + +/// Generates the module stubs to a String, not including submodules +fn module_stubs(module: &Module) -> String { + let mut elements = Vec::new(); + for class in &module.classes { + elements.push(class_stubs(class)); + } + for function in &module.functions { + elements.push(function_stubs(function)); + } + elements.push(String::new()); // last line jump + elements.join("\n") +} + +fn class_stubs(class: &Class) -> String { + format!("class {}: ...", class.name) +} + +fn function_stubs(function: &Function) -> String { + format!("def {}(*args, **kwargs): ...", function.name) +} diff --git a/pyo3-introspection/tests/test.rs b/pyo3-introspection/tests/test.rs new file mode 100644 index 00000000000..37070a53a13 --- /dev/null +++ b/pyo3-introspection/tests/test.rs @@ -0,0 +1,77 @@ +use anyhow::Result; +use pyo3_introspection::{introspect_cdylib, module_stub_files}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::{env, fs}; + +#[test] +fn pytests_stubs() -> Result<()> { + // We run the introspection + let binary = env::var_os("PYO3_PYTEST_LIB_PATH") + .expect("The PYO3_PYTEST_LIB_PATH constant must be set and target the pyo3-pytests cdylib"); + let module = introspect_cdylib(binary, "pyo3_pytests")?; + let actual_stubs = module_stub_files(&module); + + // We read the expected stubs + let expected_subs_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join("pytests") + .join("stubs"); + let mut expected_subs = HashMap::new(); + add_dir_files( + &expected_subs_dir, + &expected_subs_dir.canonicalize()?, + &mut expected_subs, + )?; + + // We ensure we do not have extra generated files + for file_name in actual_stubs.keys() { + assert!( + expected_subs.contains_key(file_name), + "The generated file {} is not in the expected stubs directory pytests/stubs", + file_name.display() + ); + } + + // We ensure the expected files are generated properly + for (file_name, expected_file_content) in &expected_subs { + let actual_file_content = actual_stubs.get(file_name).unwrap_or_else(|| { + panic!( + "The expected stub file {} has not been generated", + file_name.display() + ) + }); + assert_eq!( + &expected_file_content.replace('\r', ""), // Windows compatibility + actual_file_content, + "The content of file {} is different", + file_name.display() + ) + } + + Ok(()) +} + +fn add_dir_files( + dir_path: &Path, + base_dir_path: &Path, + output: &mut HashMap, +) -> Result<()> { + for entry in fs::read_dir(dir_path)? { + let entry = entry?; + if entry.file_type()?.is_dir() { + add_dir_files(&entry.path(), base_dir_path, output)?; + } else { + output.insert( + entry + .path() + .canonicalize()? + .strip_prefix(base_dir_path)? + .into(), + fs::read_to_string(entry.path())?, + ); + } + } + Ok(()) +} diff --git a/pyo3-macros-backend/Cargo.toml b/pyo3-macros-backend/Cargo.toml index 264134d5249..0a2fc902124 100644 --- a/pyo3-macros-backend/Cargo.toml +++ b/pyo3-macros-backend/Cargo.toml @@ -32,4 +32,5 @@ workspace = true [features] experimental-async = [] +experimental-inspect = [] gil-refs = [] diff --git a/pyo3-macros-backend/src/introspection.rs b/pyo3-macros-backend/src/introspection.rs new file mode 100644 index 00000000000..5a9baaf6bfc --- /dev/null +++ b/pyo3-macros-backend/src/introspection.rs @@ -0,0 +1,123 @@ +//! Generates introspection data i.e. JSON strings in the .pyo3i0 section. +//! +//! There is a JSON per PyO3 proc macro (pyclass, pymodule, pyfunction...). +//! +//! These JSON blobs can refer to each others via the _PYO3_INTROSPECTION_ID constants +//! providing unique ids for each element. + +use crate::utils::PyO3CratePath; +use proc_macro2::{Span, TokenStream}; +use quote::{format_ident, quote, ToTokens}; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use syn::Ident; + +static GLOBAL_COUNTER_FOR_UNIQUE_NAMES: AtomicUsize = AtomicUsize::new(0); + +pub fn module_introspection_code<'a>( + pyo3_crate_path: &PyO3CratePath, + name: &str, + members: impl IntoIterator, +) -> TokenStream { + let mut to_concat = Vec::new(); + to_concat.push(quote! { "{\"type\":\"module\",\"id\":\"" }); + to_concat.push(quote! { _PYO3_INTROSPECTION_ID }); + to_concat.push(quote! { "\",\"name\":\""}); + to_concat.push(quote! { #name }); + to_concat.push(quote! { "\",\"members\":["}); + let mut start = true; + for member in members { + if start { + start = false; + } else { + to_concat.push(quote! { "," }); + } + to_concat.push(quote! { "\"" }); + to_concat.push(quote! { + #member::_PYO3_INTROSPECTION_ID + }); + to_concat.push(quote! { "\"" }); + } + to_concat.push(quote! { "]}" }); + let stub = stub_section(quote! { + #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) + }); + let introspection_id = introspection_id_const(); + quote! { + #stub + #introspection_id + } +} + +pub fn class_introspection_code( + pyo3_crate_path: &PyO3CratePath, + ident: &Ident, + name: &str, +) -> TokenStream { + let mut to_concat = Vec::new(); + to_concat.push(quote! { "{\"type\":\"class\",\"id\":\"" }); + to_concat.push(quote! { #ident::_PYO3_INTROSPECTION_ID }); + to_concat.push(quote! { "\",\"name\":\""}); + to_concat.push(quote! { #name }); + to_concat.push(quote! { "\"}" }); + let stub = stub_section(quote! { + #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) + }); + let introspection_id = introspection_id_const(); + quote! { + #stub + impl #ident { + #introspection_id + } + } +} + +pub fn function_introspection_code(pyo3_crate_path: &PyO3CratePath, name: &str) -> TokenStream { + let mut to_concat = Vec::new(); + to_concat.push(quote! { "{\"type\":\"function\",\"id\":\"" }); + to_concat.push(quote! { _PYO3_INTROSPECTION_ID }); + to_concat.push(quote! { "\",\"name\":\""}); + to_concat.push(quote! { #name }); + to_concat.push(quote! { "\"}" }); + let stub = stub_section(quote! { + #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) + }); + let introspection_id = introspection_id_const(); + quote! { + #stub + #introspection_id + } +} + +fn stub_section(content: impl ToTokens) -> TokenStream { + let static_name = format_ident!("PYO3_INTRS_{}", unique_element_id()); + // #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too. + quote! { + const _: () = { + #[used] + #[cfg(not(target_family = "wasm"))] + #[cfg_attr(any(target_os = "macos", target_os = "ios", target_os = "tvos", target_os = "watchos"), link_section = "__TEXT,__pyo3i0")] + #[cfg_attr(not(any(target_os = "macos", target_os = "ios", target_os = "tvos", target_os = "watchos")), link_section = ".pyo3i0")] + #[no_mangle] + static #static_name: &'static str = #content; + }; + } +} + +fn introspection_id_const() -> TokenStream { + let id = unique_element_id().to_string(); + quote! { + #[doc(hidden)] + pub const _PYO3_INTROSPECTION_ID: &'static str = #id; + } +} + +fn unique_element_id() -> u64 { + let mut hasher = DefaultHasher::new(); + format!("{:?}", Span::call_site()).hash(&mut hasher); // Distinguishes between call sites + GLOBAL_COUNTER_FOR_UNIQUE_NAMES + .fetch_add(1, Ordering::Relaxed) + .hash(&mut hasher); // If there are multiple elements in the same call site + hasher.finish() +} diff --git a/pyo3-macros-backend/src/lib.rs b/pyo3-macros-backend/src/lib.rs index 5d7437a4295..615d37b30f0 100644 --- a/pyo3-macros-backend/src/lib.rs +++ b/pyo3-macros-backend/src/lib.rs @@ -11,6 +11,8 @@ mod utils; mod attributes; mod deprecations; mod frompyobject; +#[cfg(feature = "experimental-inspect")] +mod introspection; mod konst; mod method; mod module; diff --git a/pyo3-macros-backend/src/module.rs b/pyo3-macros-backend/src/module.rs index faa7032de80..95358625c50 100644 --- a/pyo3-macros-backend/src/module.rs +++ b/pyo3-macros-backend/src/module.rs @@ -1,5 +1,7 @@ //! Code generation for the function that initializes a python module and adds classes and function. +#[cfg(feature = "experimental-inspect")] +use crate::introspection::module_introspection_code; use crate::{ attributes::{ self, take_attributes, take_pyo3_options, CrateAttribute, ModuleAttribute, NameAttribute, @@ -303,6 +305,11 @@ pub fn pymodule_module_impl( } } + #[cfg(feature = "experimental-inspect")] + let introspection = module_introspection_code(pyo3_path, &name.to_string(), &module_items); + #[cfg(not(feature = "experimental-inspect"))] + let introspection = quote! {}; + let module_def = quote! {{ use #pyo3_path::impl_::pymodule as impl_; const INITIALIZER: impl_::ModuleInitializer = impl_::ModuleInitializer(__pyo3_pymodule); @@ -321,6 +328,7 @@ pub fn pymodule_module_impl( #(#items)* #initialization + #introspection fn __pyo3_pymodule(module: &#pyo3_path::Bound<'_, #pyo3_path::types::PyModule>) -> #pyo3_path::PyResult<()> { use #pyo3_path::impl_::pymodule::PyAddToModule; @@ -350,6 +358,11 @@ pub fn pymodule_function_impl(mut function: syn::ItemFn) -> Result let initialization = module_initialization(&name, ctx, quote! { MakeDef::make_def() }, false); + #[cfg(feature = "experimental-inspect")] + let introspection = module_introspection_code(pyo3_path, &name.to_string(), &[]); + #[cfg(not(feature = "experimental-inspect"))] + let introspection = quote! {}; + // Module function called with optional Python<'_> marker as first arg, followed by the module. let mut module_args = Vec::new(); if function.sig.inputs.len() == 2 { @@ -387,6 +400,7 @@ pub fn pymodule_function_impl(mut function: syn::ItemFn) -> Result #[doc(hidden)] #vis mod #ident { #initialization + #introspection } // Generate the definition inside an anonymous function in the same scope as the original function - diff --git a/pyo3-macros-backend/src/pyclass.rs b/pyo3-macros-backend/src/pyclass.rs index fd85cfa3bb6..5d5091b16f2 100644 --- a/pyo3-macros-backend/src/pyclass.rs +++ b/pyo3-macros-backend/src/pyclass.rs @@ -13,6 +13,8 @@ use crate::attributes::{ ModuleAttribute, NameAttribute, NameLitStr, RenameAllAttribute, }; use crate::deprecations::Deprecations; +#[cfg(feature = "experimental-inspect")] +use crate::introspection::class_introspection_code; use crate::konst::{ConstAttributes, ConstSpec}; use crate::method::{FnArg, FnSpec, PyArg, RegularArg}; use crate::pyfunction::ConstructorAttribute; @@ -948,6 +950,7 @@ fn impl_complex_enum( impl_builder.impl_pyclassimpl(ctx)?, impl_builder.impl_add_to_module(ctx), impl_builder.impl_freelist(ctx), + impl_builder.impl_introspection(ctx), ] .into_iter() .collect(); @@ -1932,17 +1935,17 @@ impl<'a> PyClassImplsBuilder<'a> { } fn impl_all(&self, ctx: &Ctx) -> Result { - let tokens = [ + Ok([ self.impl_pyclass(ctx), self.impl_extractext(ctx), self.impl_into_py(ctx), self.impl_pyclassimpl(ctx)?, self.impl_add_to_module(ctx), self.impl_freelist(ctx), + self.impl_introspection(ctx), ] .into_iter() - .collect(); - Ok(tokens) + .collect()) } fn impl_pyclass(&self, ctx: &Ctx) -> TokenStream { @@ -2247,6 +2250,18 @@ impl<'a> PyClassImplsBuilder<'a> { Vec::new() } } + + #[cfg(feature = "experimental-inspect")] + fn impl_introspection(&self, ctx: &Ctx) -> TokenStream { + let Ctx { pyo3_path } = ctx; + let name = get_class_python_name(self.cls, self.attr).to_string(); + class_introspection_code(pyo3_path, self.cls, &name) + } + + #[cfg(not(feature = "experimental-inspect"))] + fn impl_introspection(&self, _ctx: &Ctx) -> TokenStream { + quote! {} + } } fn define_inventory_class(inventory_class_name: &syn::Ident, ctx: &Ctx) -> TokenStream { diff --git a/pyo3-macros-backend/src/pyfunction.rs b/pyo3-macros-backend/src/pyfunction.rs index 25f0d5b37ae..ef03b5b610a 100644 --- a/pyo3-macros-backend/src/pyfunction.rs +++ b/pyo3-macros-backend/src/pyfunction.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "experimental-inspect")] +use crate::introspection::function_introspection_code; use crate::utils::Ctx; use crate::{ attributes::{ @@ -261,15 +263,19 @@ pub fn impl_wrap_pyfunction( let wrapper_ident = format_ident!("__pyfunction_{}", spec.name); let wrapper = spec.get_wrapper_function(&wrapper_ident, None, ctx)?; let methoddef = spec.get_methoddef(wrapper_ident, &spec.get_doc(&func.attrs, ctx), ctx); + #[cfg(feature = "experimental-inspect")] + let introspection = function_introspection_code(pyo3_path, &name.to_string()); + #[cfg(not(feature = "experimental-inspect"))] + let introspection = quote! {}; let wrapped_pyfunction = quote! { - // Create a module with the same name as the `#[pyfunction]` - this way `use ` // will actually bring both the module and the function into scope. #[doc(hidden)] #vis mod #name { pub(crate) struct MakeDef; pub const _PYO3_DEF: #pyo3_path::impl_::pymethods::PyMethodDef = MakeDef::_PYO3_DEF; + #introspection } // Generate the definition inside an anonymous function in the same scope as the original function - diff --git a/pyo3-macros/Cargo.toml b/pyo3-macros/Cargo.toml index 0dbbdd7cac9..4271c2913fc 100644 --- a/pyo3-macros/Cargo.toml +++ b/pyo3-macros/Cargo.toml @@ -16,6 +16,7 @@ proc-macro = true [features] multiple-pymethods = [] experimental-async = ["pyo3-macros-backend/experimental-async"] +experimental-inspect = ["pyo3-macros-backend/experimental-inspect"] gil-refs = ["pyo3-macros-backend/gil-refs"] [dependencies] diff --git a/pytests/Cargo.toml b/pytests/Cargo.toml index 255094a6c40..758764d8c1b 100644 --- a/pytests/Cargo.toml +++ b/pytests/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" publish = false [dependencies] -pyo3 = { path = "../", features = ["extension-module"] } +pyo3 = { path = "../", features = ["extension-module", "experimental-declarative-modules", "experimental-inspect"] } [build-dependencies] pyo3-build-config = { path = "../pyo3-build-config" } diff --git a/pytests/README.md b/pytests/README.md index 7ced072aa36..1016baa7209 100644 --- a/pytests/README.md +++ b/pytests/README.md @@ -2,6 +2,9 @@ An extension module built using PyO3, used to test and benchmark PyO3 from Python. +The `stubs` directory contains Python stubs used to test the automated stubs introspection. +To test them run `nox -s test-introspection`. + ## Testing This package is intended to be built using `maturin`. Once built, you can run the tests using `pytest`: diff --git a/pytests/src/lib.rs b/pytests/src/lib.rs index cbd65c8012c..3f3cdd3ed3a 100644 --- a/pytests/src/lib.rs +++ b/pytests/src/lib.rs @@ -18,43 +18,48 @@ pub mod sequence; pub mod subclassing; #[pymodule] -fn pyo3_pytests(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_wrapped(wrap_pymodule!(awaitable::awaitable))?; - #[cfg(not(Py_LIMITED_API))] - m.add_wrapped(wrap_pymodule!(buf_and_str::buf_and_str))?; - m.add_wrapped(wrap_pymodule!(comparisons::comparisons))?; - #[cfg(not(Py_LIMITED_API))] - m.add_wrapped(wrap_pymodule!(datetime::datetime))?; - m.add_wrapped(wrap_pymodule!(dict_iter::dict_iter))?; - m.add_wrapped(wrap_pymodule!(enums::enums))?; - m.add_wrapped(wrap_pymodule!(misc::misc))?; - m.add_wrapped(wrap_pymodule!(objstore::objstore))?; - m.add_wrapped(wrap_pymodule!(othermod::othermod))?; - m.add_wrapped(wrap_pymodule!(path::path))?; - m.add_wrapped(wrap_pymodule!(pyclasses::pyclasses))?; - m.add_wrapped(wrap_pymodule!(pyfunctions::pyfunctions))?; - m.add_wrapped(wrap_pymodule!(sequence::sequence))?; - m.add_wrapped(wrap_pymodule!(subclassing::subclassing))?; +mod pyo3_pytests { + use super::*; + + #[pymodule_export] + use {pyclasses::pyclasses, pyfunctions::pyfunctions}; // Inserting to sys.modules allows importing submodules nicely from Python // e.g. import pyo3_pytests.buf_and_str as bas + #[pymodule_init] + fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_wrapped(wrap_pymodule!(awaitable::awaitable))?; + #[cfg(not(Py_LIMITED_API))] + m.add_wrapped(wrap_pymodule!(buf_and_str::buf_and_str))?; + m.add_wrapped(wrap_pymodule!(comparisons::comparisons))?; + #[cfg(not(Py_LIMITED_API))] + m.add_wrapped(wrap_pymodule!(datetime::datetime))?; + m.add_wrapped(wrap_pymodule!(dict_iter::dict_iter))?; + m.add_wrapped(wrap_pymodule!(enums::enums))?; + m.add_wrapped(wrap_pymodule!(misc::misc))?; + m.add_wrapped(wrap_pymodule!(objstore::objstore))?; + m.add_wrapped(wrap_pymodule!(othermod::othermod))?; + m.add_wrapped(wrap_pymodule!(path::path))?; + m.add_wrapped(wrap_pymodule!(sequence::sequence))?; + m.add_wrapped(wrap_pymodule!(subclassing::subclassing))?; - let sys = PyModule::import_bound(py, "sys")?; - let sys_modules = sys.getattr("modules")?.downcast_into::()?; - sys_modules.set_item("pyo3_pytests.awaitable", m.getattr("awaitable")?)?; - sys_modules.set_item("pyo3_pytests.buf_and_str", m.getattr("buf_and_str")?)?; - sys_modules.set_item("pyo3_pytests.comparisons", m.getattr("comparisons")?)?; - sys_modules.set_item("pyo3_pytests.datetime", m.getattr("datetime")?)?; - sys_modules.set_item("pyo3_pytests.dict_iter", m.getattr("dict_iter")?)?; - sys_modules.set_item("pyo3_pytests.enums", m.getattr("enums")?)?; - sys_modules.set_item("pyo3_pytests.misc", m.getattr("misc")?)?; - sys_modules.set_item("pyo3_pytests.objstore", m.getattr("objstore")?)?; - sys_modules.set_item("pyo3_pytests.othermod", m.getattr("othermod")?)?; - sys_modules.set_item("pyo3_pytests.path", m.getattr("path")?)?; - sys_modules.set_item("pyo3_pytests.pyclasses", m.getattr("pyclasses")?)?; - sys_modules.set_item("pyo3_pytests.pyfunctions", m.getattr("pyfunctions")?)?; - sys_modules.set_item("pyo3_pytests.sequence", m.getattr("sequence")?)?; - sys_modules.set_item("pyo3_pytests.subclassing", m.getattr("subclassing")?)?; + let sys = PyModule::import_bound(m.py(), "sys")?; + let sys_modules = sys.getattr("modules")?.downcast_into::()?; + sys_modules.set_item("pyo3_pytests.awaitable", m.getattr("awaitable")?)?; + sys_modules.set_item("pyo3_pytests.buf_and_str", m.getattr("buf_and_str")?)?; + sys_modules.set_item("pyo3_pytests.comparisons", m.getattr("comparisons")?)?; + sys_modules.set_item("pyo3_pytests.datetime", m.getattr("datetime")?)?; + sys_modules.set_item("pyo3_pytests.dict_iter", m.getattr("dict_iter")?)?; + sys_modules.set_item("pyo3_pytests.enums", m.getattr("enums")?)?; + sys_modules.set_item("pyo3_pytests.misc", m.getattr("misc")?)?; + sys_modules.set_item("pyo3_pytests.objstore", m.getattr("objstore")?)?; + sys_modules.set_item("pyo3_pytests.othermod", m.getattr("othermod")?)?; + sys_modules.set_item("pyo3_pytests.path", m.getattr("path")?)?; + sys_modules.set_item("pyo3_pytests.pyclasses", m.getattr("pyclasses")?)?; + sys_modules.set_item("pyo3_pytests.pyfunctions", m.getattr("pyfunctions")?)?; + sys_modules.set_item("pyo3_pytests.sequence", m.getattr("sequence")?)?; + sys_modules.set_item("pyo3_pytests.subclassing", m.getattr("subclassing")?)?; - Ok(()) + Ok(()) + } } diff --git a/pytests/src/pyclasses.rs b/pytests/src/pyclasses.rs index f7e4681af70..a6973feab2e 100644 --- a/pytests/src/pyclasses.rs +++ b/pytests/src/pyclasses.rs @@ -78,12 +78,7 @@ impl ClassWithDict { } #[pymodule] -pub fn pyclasses(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - Ok(()) +pub mod pyclasses { + #[pymodule_export] + use super::{AssertingBaseClass, ClassWithoutConstructor, EmptyClass, PyClassIter, ClassWithDict}; } diff --git a/pytests/stubs/__init__.pyi b/pytests/stubs/__init__.pyi new file mode 100644 index 00000000000..e69de29bb2d diff --git a/pytests/stubs/pyclasses.pyi b/pytests/stubs/pyclasses.pyi new file mode 100644 index 00000000000..db688c368ef --- /dev/null +++ b/pytests/stubs/pyclasses.pyi @@ -0,0 +1,4 @@ +class AssertingBaseClass: ... +class ClassWithoutConstructor: ... +class EmptyClass: ... +class PyClassIter: ... diff --git a/pytests/stubs/pyfunctions.pyi b/pytests/stubs/pyfunctions.pyi new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/impl_.rs b/src/impl_.rs index 71ba397cb94..890819a89e1 100644 --- a/src/impl_.rs +++ b/src/impl_.rs @@ -6,6 +6,8 @@ //! APIs may may change at any time without documentation in the CHANGELOG and without //! breaking semver guarantees. +#[cfg(feature = "experimental-inspect")] +pub mod concat; #[cfg(feature = "experimental-async")] pub mod coroutine; pub mod deprecations; diff --git a/src/impl_/concat.rs b/src/impl_/concat.rs new file mode 100644 index 00000000000..0a31300aabc --- /dev/null +++ b/src/impl_/concat.rs @@ -0,0 +1,29 @@ +/// `concat!` but working with constants +#[macro_export] +#[doc(hidden)] +macro_rules! const_concat { + ($e:expr) => {{ + $e + }}; + ($l:expr, $($r:expr),+ $(,)?) => {{ + const L: &'static str = $l; + const R: &'static str = $crate::impl_::concat::const_concat!($($r),*); + const LEN: usize = L.len() + R.len(); + const fn combine(l: &'static [u8], r: &'static [u8]) -> [u8; LEN] { + let mut out = [0u8; LEN]; + let mut i = 0; + while i < l.len() { + out[i] = l[i]; + i += 1; + } + while i < LEN { + out[i] = r[i - l.len()]; + i += 1; + } + out + } + unsafe { ::std::str::from_utf8_unchecked(&combine(L.as_bytes(), R.as_bytes())) } + }} +} + +pub use const_concat; diff --git a/src/types/mod.rs b/src/types/mod.rs index d74c7bc234c..571b8794e40 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -268,6 +268,10 @@ macro_rules! pyobject_native_type_info( impl $name { #[doc(hidden)] pub const _PYO3_DEF: $crate::impl_::pymodule::AddTypeToModule = $crate::impl_::pymodule::AddTypeToModule::new(); + + #[allow(dead_code)] + #[doc(hidden)] + pub const _PYO3_INTROSPECTION_ID: &'static str = concat!(stringify!($module), stringify!($name)); } }; ); From 0d78198ecde1a8fa548851a3c4c623e7cb2b1592 Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Tue, 11 Jun 2024 12:34:19 +0200 Subject: [PATCH 2/5] Do not use a custom link_section to get WASM compatibility --- guide/src/features.md | 4 +- pyo3-introspection/src/introspection.rs | 200 +++++++++++------------ pyo3-macros-backend/src/introspection.rs | 174 ++++++++++++++------ pyo3-macros-backend/src/pyclass.rs | 2 +- pytests/Cargo.toml | 2 +- pytests/src/pyclasses.rs | 4 +- pytests/stubs/pyclasses.pyi | 1 + 7 files changed, 221 insertions(+), 166 deletions(-) diff --git a/guide/src/features.md b/guide/src/features.md index d801e2dd1e4..48beb5d016f 100644 --- a/guide/src/features.md +++ b/guide/src/features.md @@ -59,7 +59,9 @@ The feature has some unfinished refinements and performance improvements. To hel ### `experimental-inspect` -This feature adds the `pyo3::inspect` module, as well as `IntoPy::type_output` and `FromPyObject::type_input` APIs to produce Python type "annotations" for Rust types. +This feature adds to the built binaries introspection data that can be then retrieved using the `pyo3-introspection` crate to generate [type stubs](https://typing.readthedocs.io/en/latest/source/stubs.html). + +Also, this feature adds the `pyo3::inspect` module, as well as `IntoPy::type_output` and `FromPyObject::type_input` APIs to produce Python type "annotations" for Rust types. This is a first step towards adding first-class support for generating type annotations automatically in PyO3, however work is needed to finish this off. All feedback and offers of help welcome on [issue #2454](https://github.com/PyO3/pyo3/issues/2454). diff --git a/pyo3-introspection/src/introspection.rs b/pyo3-introspection/src/introspection.rs index 3097f273c77..208a3fd87b9 100644 --- a/pyo3-introspection/src/introspection.rs +++ b/pyo3-introspection/src/introspection.rs @@ -86,74 +86,72 @@ fn find_introspection_chunks_in_binary_object(path: &Path) -> Result> .context("The built library is not valid or not supported by our binary parser")? { Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content), - Object::Mach(Mach::Binary(matcho)) => { - find_introspection_chunks_in_matcho(&matcho, &library_content) + Object::Mach(Mach::Binary(macho)) => { + find_introspection_chunks_in_macho(&macho, &library_content) } Object::Mach(Mach::Fat(multi_arch)) => { for arch in &multi_arch { match arch? { - SingleArch::MachO(matcho) => { - return find_introspection_chunks_in_matcho(&matcho, &library_content) + SingleArch::MachO(macho) => { + return find_introspection_chunks_in_macho(&macho, &library_content) } SingleArch::Archive(_) => (), } } - bail!("No Match-o chunk found in the multi-arch Match-o container") + bail!("No Mach-o chunk found in the multi-arch Mach-o container") } Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content), _ => { - bail!("Only ELF, Match-o and PE containers can be introspected") + bail!("Only ELF, Mach-o and PE containers can be introspected") } } } fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result> { - let pyo3_data_section_header = elf - .section_headers - .iter() - .find(|section| elf.shdr_strtab.get_at(section.sh_name).unwrap_or_default() == ".pyo3i0") - .context("No .pyo3i0 section found")?; - let sh_offset = - usize::try_from(pyo3_data_section_header.sh_offset).context("Section offset overflow")?; - let sh_size = - usize::try_from(pyo3_data_section_header.sh_size).context("Section len overflow")?; - if elf.is_64 { - read_section_with_ptr_and_len_64bits( - &library_content[sh_offset..sh_offset + sh_size], - 0, - library_content, - ) - } else { - read_section_with_ptr_and_len_32bits( - &library_content[sh_offset..sh_offset + sh_size], - 0, - library_content, - ) + let mut chunks = Vec::new(); + for sym in &elf.syms { + if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) { + let section_header = &elf.section_headers[sym.st_shndx]; + let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr; + chunks.push(read_symbol_value_with_ptr_and_len( + &library_content[usize::try_from(data_offset).context("File offset overflow")?..], + 0, + library_content, + elf.is_64, + )?); + } } + Ok(chunks) } -fn find_introspection_chunks_in_matcho( - matcho: &MachO<'_>, +fn find_introspection_chunks_in_macho( + macho: &MachO<'_>, library_content: &[u8], ) -> Result> { - if !matcho.little_endian { - bail!("Only little endian Match-o binaries are supported"); + if !macho.little_endian { + bail!("Only little endian Mach-o binaries are supported"); } - let text_segment = matcho + + let sections = macho .segments - .iter() - .find(|s| s.segname == *b"__TEXT\0\0\0\0\0\0\0\0\0\0") - .context("No __TEXT segment found")?; - let (_, pyo3_data_section) = text_segment - .sections()? - .into_iter() - .find(|s| s.0.sectname == *b"__pyo3i0\0\0\0\0\0\0\0\0") - .context("No __pyo3i0 section found")?; - if matcho.is_64 { - read_section_with_ptr_and_len_64bits(pyo3_data_section, 0, library_content) - } else { - read_section_with_ptr_and_len_32bits(pyo3_data_section, 0, library_content) + .sections() + .flatten() + .map(|t| t.map(|s| s.0)) + .collect::, _>>()?; + let mut chunks = Vec::new(); + for (name, nlist) in macho.symbols().flatten() { + if is_introspection_symbol(name) { + let section = §ions[nlist.n_sect]; + let data_offset = nlist.n_value + u64::from(section.offset) - section.addr; + chunks.push(read_symbol_value_with_ptr_and_len( + &library_content[usize::try_from(data_offset).context("File offset overflow")?..], + 0, + library_content, + macho.is_64, + )?); + } } + Ok(chunks) } fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result> { @@ -167,77 +165,63 @@ fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Resul .context(".rdata virtual_address overflow")? - usize::try_from(rdata_data_section.pointer_to_raw_data) .context(".rdata pointer_to_raw_data overflow")?; - let pyo3_data_section = pe - .sections - .iter() - .find(|section| section.name().unwrap_or_default() == ".pyo3i0") - .context("No .pyo3i0 section found")?; - let pyo3_data = pyo3_data_section - .data(library_content)? - .context("Not able to find the .pyo3i0 section content")?; - if pe.is_64 { - read_section_with_ptr_and_len_64bits(&pyo3_data, rdata_shift, library_content) - } else { - read_section_with_ptr_and_len_32bits(&pyo3_data, rdata_shift, library_content) + + let mut chunks = Vec::new(); + for export in &pe.exports { + if is_introspection_symbol(export.name.unwrap_or_default()) { + chunks.push(read_symbol_value_with_ptr_and_len( + &library_content[export.offset.context("No symbol offset")?..], + rdata_shift, + library_content, + pe.is_64, + )?); + } } + Ok(chunks) } -fn read_section_with_ptr_and_len_32bits( - slice: &[u8], +fn read_symbol_value_with_ptr_and_len( + value_slice: &[u8], shift: usize, full_library_content: &[u8], -) -> Result> { - slice - .chunks_exact(8) - .filter_map(|element| { - let (ptr, len) = element.split_at(4); - let ptr = match usize::try_from(u32::from_le_bytes(ptr.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Pointer overflow")), - }; - let len = match usize::try_from(u32::from_le_bytes(len.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Length overflow")), - }; - if ptr == 0 || len == 0 { - // Workaround for PE - return None; - } - Some( - serde_json::from_slice(&full_library_content[ptr - shift..ptr - shift + len]) - .context("Failed to parse introspection chunk"), - ) - }) - .collect() + is_64: bool, +) -> Result { + let (ptr, len) = if is_64 { + let (ptr, len) = value_slice[..16].split_at(8); + let ptr = usize::try_from(u64::from_le_bytes( + ptr.try_into().context("Too short symbol value")?, + )) + .context("Pointer overflow")?; + let len = usize::try_from(u64::from_le_bytes( + len.try_into().context("Too short symbol value")?, + )) + .context("Length overflow")?; + (ptr, len) + } else { + let (ptr, len) = value_slice[..8].split_at(4); + let ptr = usize::try_from(u32::from_le_bytes( + ptr.try_into().context("Too short symbol value")?, + )) + .context("Pointer overflow")?; + let len = usize::try_from(u32::from_le_bytes( + len.try_into().context("Too short symbol value")?, + )) + .context("Length overflow")?; + (ptr, len) + }; + let chunk = &full_library_content[ptr - shift..ptr - shift + len]; + serde_json::from_slice(chunk).with_context(|| { + format!( + "Failed to parse introspection chunk: '{}'", + String::from_utf8_lossy(chunk) + ) + }) } -fn read_section_with_ptr_and_len_64bits( - slice: &[u8], - shift: usize, - full_library_content: &[u8], -) -> Result> { - slice - .chunks_exact(16) - .filter_map(|element| { - let (ptr, len) = element.split_at(8); - let ptr = match usize::try_from(u64::from_le_bytes(ptr.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Pointer overflow")), - }; - let len = match usize::try_from(u64::from_le_bytes(len.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Length overflow")), - }; - if ptr == 0 || len == 0 { - // Workaround for PE - return None; - } - Some( - serde_json::from_slice(&full_library_content[ptr - shift..ptr - shift + len]) - .context("Failed to parse introspection chunk"), - ) - }) - .collect() +fn is_introspection_symbol(name: &str) -> bool { + name.strip_prefix('_') + .unwrap_or(name) + .starts_with("PYO3_INTROSPECTION_0_") } #[derive(Deserialize)] diff --git a/pyo3-macros-backend/src/introspection.rs b/pyo3-macros-backend/src/introspection.rs index 5a9baaf6bfc..f2f2492b39f 100644 --- a/pyo3-macros-backend/src/introspection.rs +++ b/pyo3-macros-backend/src/introspection.rs @@ -7,8 +7,9 @@ use crate::utils::PyO3CratePath; use proc_macro2::{Span, TokenStream}; -use quote::{format_ident, quote, ToTokens}; +use quote::{format_ident, quote}; use std::collections::hash_map::DefaultHasher; +use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::sync::atomic::{AtomicUsize, Ordering}; use syn::Ident; @@ -20,29 +21,24 @@ pub fn module_introspection_code<'a>( name: &str, members: impl IntoIterator, ) -> TokenStream { - let mut to_concat = Vec::new(); - to_concat.push(quote! { "{\"type\":\"module\",\"id\":\"" }); - to_concat.push(quote! { _PYO3_INTROSPECTION_ID }); - to_concat.push(quote! { "\",\"name\":\""}); - to_concat.push(quote! { #name }); - to_concat.push(quote! { "\",\"members\":["}); - let mut start = true; - for member in members { - if start { - start = false; - } else { - to_concat.push(quote! { "," }); - } - to_concat.push(quote! { "\"" }); - to_concat.push(quote! { - #member::_PYO3_INTROSPECTION_ID - }); - to_concat.push(quote! { "\"" }); - } - to_concat.push(quote! { "]}" }); - let stub = stub_section(quote! { - #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) - }); + let stub = IntrospectionNode::Map( + [ + ("type", IntrospectionNode::String("module")), + ("id", IntrospectionNode::IntrospectionId(None)), + ("name", IntrospectionNode::String(name)), + ( + "members", + IntrospectionNode::List( + members + .into_iter() + .map(|member| IntrospectionNode::IntrospectionId(Some(member))) + .collect(), + ), + ), + ] + .into(), + ) + .emit(pyo3_crate_path); let introspection_id = introspection_id_const(); quote! { #stub @@ -55,15 +51,15 @@ pub fn class_introspection_code( ident: &Ident, name: &str, ) -> TokenStream { - let mut to_concat = Vec::new(); - to_concat.push(quote! { "{\"type\":\"class\",\"id\":\"" }); - to_concat.push(quote! { #ident::_PYO3_INTROSPECTION_ID }); - to_concat.push(quote! { "\",\"name\":\""}); - to_concat.push(quote! { #name }); - to_concat.push(quote! { "\"}" }); - let stub = stub_section(quote! { - #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) - }); + let stub = IntrospectionNode::Map( + [ + ("type", IntrospectionNode::String("class")), + ("id", IntrospectionNode::IntrospectionId(Some(ident))), + ("name", IntrospectionNode::String(name)), + ] + .into(), + ) + .emit(pyo3_crate_path); let introspection_id = introspection_id_const(); quote! { #stub @@ -74,15 +70,15 @@ pub fn class_introspection_code( } pub fn function_introspection_code(pyo3_crate_path: &PyO3CratePath, name: &str) -> TokenStream { - let mut to_concat = Vec::new(); - to_concat.push(quote! { "{\"type\":\"function\",\"id\":\"" }); - to_concat.push(quote! { _PYO3_INTROSPECTION_ID }); - to_concat.push(quote! { "\",\"name\":\""}); - to_concat.push(quote! { #name }); - to_concat.push(quote! { "\"}" }); - let stub = stub_section(quote! { - #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) - }); + let stub = IntrospectionNode::Map( + [ + ("type", IntrospectionNode::String("function")), + ("id", IntrospectionNode::IntrospectionId(None)), + ("name", IntrospectionNode::String(name)), + ] + .into(), + ) + .emit(pyo3_crate_path); let introspection_id = introspection_id_const(); quote! { #stub @@ -90,19 +86,89 @@ pub fn function_introspection_code(pyo3_crate_path: &PyO3CratePath, name: &str) } } -fn stub_section(content: impl ToTokens) -> TokenStream { - let static_name = format_ident!("PYO3_INTRS_{}", unique_element_id()); - // #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too. - quote! { - const _: () = { - #[used] - #[cfg(not(target_family = "wasm"))] - #[cfg_attr(any(target_os = "macos", target_os = "ios", target_os = "tvos", target_os = "watchos"), link_section = "__TEXT,__pyo3i0")] - #[cfg_attr(not(any(target_os = "macos", target_os = "ios", target_os = "tvos", target_os = "watchos")), link_section = ".pyo3i0")] - #[no_mangle] - static #static_name: &'static str = #content; - }; +enum IntrospectionNode<'a> { + String(&'a str), + IntrospectionId(Option<&'a Ident>), + Map(HashMap<&'static str, IntrospectionNode<'a>>), + List(Vec>), +} + +impl IntrospectionNode<'_> { + fn emit(&self, pyo3_crate_path: &PyO3CratePath) -> TokenStream { + let mut content = Vec::new(); + self.add_to_serialization(&mut content); + + let static_name = format_ident!("PYO3_INTROSPECTION_0_{}", unique_element_id()); + // #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too. + quote! { + const _: () = { + #[used] + #[no_mangle] + static #static_name: &'static str = #pyo3_crate_path::impl_::concat::const_concat!(#(#content , )*); + }; + } + } + + fn add_to_serialization(&self, content: &mut Vec) { + match self { + Self::String(string) => { + let string = escape_json_string(string); + content.push(quote! { #string }); + } + Self::IntrospectionId(ident) => { + content.push(quote! { "\"" }); + content.push(if let Some(ident) = ident { + quote! { #ident::_PYO3_INTROSPECTION_ID} + } else { + quote! { _PYO3_INTROSPECTION_ID } + }); + content.push(quote! { "\"" }); + } + Self::Map(map) => { + content.push(quote! { "{" }); + for (i, (key, value)) in map.iter().enumerate() { + if i > 0 { + content.push(quote! { "," }); + } + let key = escape_json_string(key); + content.push(quote! { #key }); + content.push(quote! { ":" }); + value.add_to_serialization(content); + } + content.push(quote! { "}" }); + } + Self::List(list) => { + content.push(quote! { "[" }); + for (i, value) in list.iter().enumerate() { + if i > 0 { + content.push(quote! { "," }); + } + value.add_to_serialization(content); + } + content.push(quote! { "]" }); + } + } + } +} + +fn escape_json_string(s: &str) -> String { + let mut buffer = String::with_capacity(s.len() + 2); + buffer.push('"'); + for c in s.chars() { + match c { + '\\' => buffer.push_str("\\\\"), + '"' => buffer.push_str("\\\""), + c => { + if c < char::from(32) { + panic!("ASCII chars below 32 are not allowed") + } else { + buffer.push(c) + } + } + } } + buffer.push('"'); + buffer } fn introspection_id_const() -> TokenStream { diff --git a/pyo3-macros-backend/src/pyclass.rs b/pyo3-macros-backend/src/pyclass.rs index 5d5091b16f2..9000eff93a1 100644 --- a/pyo3-macros-backend/src/pyclass.rs +++ b/pyo3-macros-backend/src/pyclass.rs @@ -2253,7 +2253,7 @@ impl<'a> PyClassImplsBuilder<'a> { #[cfg(feature = "experimental-inspect")] fn impl_introspection(&self, ctx: &Ctx) -> TokenStream { - let Ctx { pyo3_path } = ctx; + let Ctx { pyo3_path, .. } = ctx; let name = get_class_python_name(self.cls, self.attr).to_string(); class_introspection_code(pyo3_path, self.cls, &name) } diff --git a/pytests/Cargo.toml b/pytests/Cargo.toml index 758764d8c1b..3ad3e7ab5ce 100644 --- a/pytests/Cargo.toml +++ b/pytests/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" publish = false [dependencies] -pyo3 = { path = "../", features = ["extension-module", "experimental-declarative-modules", "experimental-inspect"] } +pyo3 = { path = "../", features = ["extension-module", "experimental-inspect"] } [build-dependencies] pyo3-build-config = { path = "../pyo3-build-config" } diff --git a/pytests/src/pyclasses.rs b/pytests/src/pyclasses.rs index a6973feab2e..bfc0d8ec139 100644 --- a/pytests/src/pyclasses.rs +++ b/pytests/src/pyclasses.rs @@ -80,5 +80,7 @@ impl ClassWithDict { #[pymodule] pub mod pyclasses { #[pymodule_export] - use super::{AssertingBaseClass, ClassWithoutConstructor, EmptyClass, PyClassIter, ClassWithDict}; + use super::{ + AssertingBaseClass, ClassWithDict, ClassWithoutConstructor, EmptyClass, PyClassIter, + }; } diff --git a/pytests/stubs/pyclasses.pyi b/pytests/stubs/pyclasses.pyi index db688c368ef..86ae67e21f8 100644 --- a/pytests/stubs/pyclasses.pyi +++ b/pytests/stubs/pyclasses.pyi @@ -1,4 +1,5 @@ class AssertingBaseClass: ... +class ClassWithDict: ... class ClassWithoutConstructor: ... class EmptyClass: ... class PyClassIter: ... From afe23f23d8c3fcb2706f7a802db4e158e6a0ebe5 Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Fri, 5 Jul 2024 15:14:27 +0200 Subject: [PATCH 3/5] Introspection macro: generate less concatenations --- pyo3-macros-backend/src/introspection.rs | 92 ++++++++++++++++-------- 1 file changed, 61 insertions(+), 31 deletions(-) diff --git a/pyo3-macros-backend/src/introspection.rs b/pyo3-macros-backend/src/introspection.rs index f2f2492b39f..6cb6819a061 100644 --- a/pyo3-macros-backend/src/introspection.rs +++ b/pyo3-macros-backend/src/introspection.rs @@ -11,6 +11,7 @@ use quote::{format_ident, quote}; use std::collections::hash_map::DefaultHasher; use std::collections::HashMap; use std::hash::{Hash, Hasher}; +use std::mem::take; use std::sync::atomic::{AtomicUsize, Ordering}; use syn::Ident; @@ -95,8 +96,9 @@ enum IntrospectionNode<'a> { impl IntrospectionNode<'_> { fn emit(&self, pyo3_crate_path: &PyO3CratePath) -> TokenStream { - let mut content = Vec::new(); + let mut content = ConcatenationBuilder::default(); self.add_to_serialization(&mut content); + let content = content.into_token_stream(pyo3_crate_path); let static_name = format_ident!("PYO3_INTROSPECTION_0_{}", unique_element_id()); // #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too. @@ -104,71 +106,99 @@ impl IntrospectionNode<'_> { const _: () = { #[used] #[no_mangle] - static #static_name: &'static str = #pyo3_crate_path::impl_::concat::const_concat!(#(#content , )*); + static #static_name: &'static str = #content; }; } } - fn add_to_serialization(&self, content: &mut Vec) { + fn add_to_serialization(&self, content: &mut ConcatenationBuilder) { match self { Self::String(string) => { - let string = escape_json_string(string); - content.push(quote! { #string }); + content.push_str_to_escape(string); } Self::IntrospectionId(ident) => { - content.push(quote! { "\"" }); - content.push(if let Some(ident) = ident { + content.push_str("\""); + content.push_token(if let Some(ident) = ident { quote! { #ident::_PYO3_INTROSPECTION_ID} } else { quote! { _PYO3_INTROSPECTION_ID } }); - content.push(quote! { "\"" }); + content.push_str("\""); } Self::Map(map) => { - content.push(quote! { "{" }); + content.push_str("{"); for (i, (key, value)) in map.iter().enumerate() { if i > 0 { - content.push(quote! { "," }); + content.push_str(","); } - let key = escape_json_string(key); - content.push(quote! { #key }); - content.push(quote! { ":" }); + content.push_str_to_escape(key); + content.push_str(":"); value.add_to_serialization(content); } - content.push(quote! { "}" }); + content.push_str("}"); } Self::List(list) => { - content.push(quote! { "[" }); + content.push_str("["); for (i, value) in list.iter().enumerate() { if i > 0 { - content.push(quote! { "," }); + content.push_str(","); } value.add_to_serialization(content); } - content.push(quote! { "]" }); + content.push_str("]"); } } } } -fn escape_json_string(s: &str) -> String { - let mut buffer = String::with_capacity(s.len() + 2); - buffer.push('"'); - for c in s.chars() { - match c { - '\\' => buffer.push_str("\\\\"), - '"' => buffer.push_str("\\\""), - c => { - if c < char::from(32) { - panic!("ASCII chars below 32 are not allowed") - } else { - buffer.push(c) +#[derive(Default)] +struct ConcatenationBuilder { + elements: Vec, + current_string: String, +} + +impl ConcatenationBuilder { + fn push_token(&mut self, token: TokenStream) { + if !self.current_string.is_empty() { + let str = take(&mut self.current_string); + self.elements.push(quote! { #str }); + } + self.elements.push(token); + } + + fn push_str(&mut self, value: &str) { + self.current_string.push_str(value); + } + + fn push_str_to_escape(&mut self, value: &str) { + self.current_string.push('"'); + for c in value.chars() { + match c { + '\\' => self.current_string.push_str("\\\\"), + '"' => self.current_string.push_str("\\\""), + c => { + if c < char::from(32) { + panic!("ASCII chars below 32 are not allowed") + } else { + self.current_string.push(c); + } } } } + self.current_string.push('"'); + } + + fn into_token_stream(self, pyo3_crate_path: &PyO3CratePath) -> TokenStream { + let mut elements = self.elements; + if !self.current_string.is_empty() { + let str = self.current_string; + elements.push(quote! { #str }); + } + + quote! { + #pyo3_crate_path::impl_::concat::const_concat!(#(#elements , )*) + } } - buffer.push('"'); - buffer } fn introspection_id_const() -> TokenStream { From 77e1b4d63b0923a23c0b98c77a7796a164e1eaa8 Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Mon, 19 Aug 2024 11:58:27 +0200 Subject: [PATCH 4/5] Fixes dev mach-o artifacts introspection --- noxfile.py | 19 ++++++++++--------- pyo3-introspection/src/introspection.rs | 3 ++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/noxfile.py b/noxfile.py index c72dccca9ea..3fdf86ebdda 100644 --- a/noxfile.py +++ b/noxfile.py @@ -734,15 +734,16 @@ def update_ui_tests(session: nox.Session): @nox.session(name="test-introspection") def test_introspection(session: nox.Session): - session.run_always("python", "-m", "pip", "install", "-v", "./pytests") - # We look for the built library - lib_file = None - for file in Path(session.virtualenv.location).rglob("pyo3_pytests.*"): - if file.is_file(): - lib_file = str(file.resolve()) - _run_cargo_test( - session, package="pyo3-introspection", env={"PYO3_PYTEST_LIB_PATH": lib_file} - ) + for options in ((), ("--release",), ("--strip",)): + session.run_always("maturin", "develop", "-m", "./pytests/Cargo.toml", *options) + # We look for the built library + lib_file = None + for file in Path(session.virtualenv.location).rglob("pyo3_pytests.*"): + if file.is_file(): + lib_file = str(file.resolve()) + _run_cargo_test( + session, package="pyo3-introspection", env={"PYO3_PYTEST_LIB_PATH": lib_file} + ) def _build_docs_for_ffi_check(session: nox.Session) -> None: diff --git a/pyo3-introspection/src/introspection.rs b/pyo3-introspection/src/introspection.rs index 208a3fd87b9..b96daccbe4b 100644 --- a/pyo3-introspection/src/introspection.rs +++ b/pyo3-introspection/src/introspection.rs @@ -1,6 +1,7 @@ use crate::model::{Class, Function, Module}; use anyhow::{bail, Context, Result}; use goblin::elf::Elf; +use goblin::mach::symbols::N_SECT; use goblin::mach::{Mach, MachO, SingleArch}; use goblin::pe::PE; use goblin::Object; @@ -140,7 +141,7 @@ fn find_introspection_chunks_in_macho( .collect::, _>>()?; let mut chunks = Vec::new(); for (name, nlist) in macho.symbols().flatten() { - if is_introspection_symbol(name) { + if nlist.is_global() && nlist.get_type() == N_SECT && is_introspection_symbol(name) { let section = §ions[nlist.n_sect]; let data_offset = nlist.n_value + u64::from(section.offset) - section.addr; chunks.push(read_symbol_value_with_ptr_and_len( From 90f8e90241f3e0b2c7559bfa5788bacced67dd9d Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Mon, 19 Aug 2024 12:16:16 +0200 Subject: [PATCH 5/5] Fixes test-introspection --- .github/workflows/ci.yml | 2 ++ noxfile.py | 6 +++++- src/impl_/concat.rs | 1 + tests/test_compile_error.rs | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3e9b9699de8..940bddc4130 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -685,6 +685,8 @@ jobs: save-if: ${{ github.event_name != 'merge_group' }} - run: python -m pip install --upgrade pip && pip install nox - run: nox -s test-introspection + env: + CARGO_BUILD_TARGET: ${{ matrix.platform.rust-target }} conclusion: needs: diff --git a/noxfile.py b/noxfile.py index 9680a43a368..1ad274744f5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -752,7 +752,11 @@ def update_ui_tests(session: nox.Session): @nox.session(name="test-introspection") def test_introspection(session: nox.Session): - for options in ((), ("--release",), ("--strip",)): + session.install("maturin") + target = os.environ.get("CARGO_BUILD_TARGET") + for options in ([], ["--release"]): + if target is not None: + options += ("--target", target) session.run_always("maturin", "develop", "-m", "./pytests/Cargo.toml", *options) # We look for the built library lib_file = None diff --git a/src/impl_/concat.rs b/src/impl_/concat.rs index 0a31300aabc..4993eb45bc5 100644 --- a/src/impl_/concat.rs +++ b/src/impl_/concat.rs @@ -22,6 +22,7 @@ macro_rules! const_concat { } out } + #[allow(unsafe_code)] unsafe { ::std::str::from_utf8_unchecked(&combine(L.as_bytes(), R.as_bytes())) } }} } diff --git a/tests/test_compile_error.rs b/tests/test_compile_error.rs index b1fcdc09fb7..b4e34eb8a2e 100644 --- a/tests/test_compile_error.rs +++ b/tests/test_compile_error.rs @@ -61,5 +61,6 @@ fn test_compile_errors() { t.compile_fail("tests/ui/abi3_weakref.rs"); #[cfg(all(Py_LIMITED_API, not(Py_3_9)))] t.compile_fail("tests/ui/abi3_dict.rs"); + #[cfg(not(feature = "experimental-inspect"))] t.compile_fail("tests/ui/duplicate_pymodule_submodule.rs"); }