Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allows to introspect Python modules from cdylib: first step #3977

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
44 changes: 44 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,49 @@ jobs:
with:
path: ~/.cache/cargo-xwin
key: cargo-xwin-cache

test-introspection:
needs: [fmt]
strategy:
matrix:
platform: [
{
os: "macos-latest",
python-architecture: "arm64",
rust-target: "aarch64-apple-darwin",
},
{
os: "ubuntu-latest",
python-architecture: "x64",
rust-target: "x86_64-unknown-linux-gnu",
},
{
os: "windows-latest",
python-architecture: "x64",
rust-target: "x86_64-pc-windows-msvc",
},
{
os: "windows-latest",
python-architecture: "x86",
rust-target: "i686-pc-windows-msvc",
},
]
runs-on: ${{ matrix.platform.os }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
targets: ${{ matrix.platform.rust-target }}
components: rust-src
- uses: actions/setup-python@v5
with:
architecture: ${{ matrix.platform.python-architecture }}
- uses: Swatinem/rust-cache@v2
with:
save-if: ${{ github.event_name != 'merge_group' }}
- run: python -m pip install --upgrade pip && pip install nox
- run: nox -s test-introspection

conclusion:
needs:
- fmt
Expand All @@ -615,6 +658,7 @@ jobs:
- check-feature-powerset
- test-cross-compilation
- test-cross-compilation-windows
- test-introspection
if: always()
runs-on: ubuntu-latest
steps:
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ experimental-async = ["macros", "pyo3-macros/experimental-async"]

# Enables pyo3::inspect module and additional type information on FromPyObject
# and IntoPy traits
experimental-inspect = []
experimental-inspect = ["pyo3-macros/experimental-inspect"]

# Enables macros: #[pyclass], #[pymodule], #[pyfunction] etc.
macros = ["pyo3-macros", "indoc", "unindent"]
Expand Down Expand Up @@ -141,6 +141,7 @@ members = [
"pyo3-build-config",
"pyo3-macros",
"pyo3-macros-backend",
"pyo3-introspection",
"pytests",
"examples",
]
Expand Down
4 changes: 3 additions & 1 deletion guide/src/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ The feature has some unfinished refinements and performance improvements. To hel

### `experimental-inspect`

This feature adds the `pyo3::inspect` module, as well as `IntoPy::type_output` and `FromPyObject::type_input` APIs to produce Python type "annotations" for Rust types.
This feature adds to the built binaries introspection data that can be then retrieved using the `pyo3-introspection` crate to generate [type stubs](https://typing.readthedocs.io/en/latest/source/stubs.html).

Also, this feature adds the `pyo3::inspect` module, as well as `IntoPy::type_output` and `FromPyObject::type_input` APIs to produce Python type "annotations" for Rust types.

This is a first step towards adding first-class support for generating type annotations automatically in PyO3, however work is needed to finish this off. All feedback and offers of help welcome on [issue #2454](https://github.com/PyO3/pyo3/issues/2454).

Expand Down
1 change: 1 addition & 0 deletions newsfragments/3977.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Basic introspection and stub generation based on metadata embedded in produced cdylib.
16 changes: 15 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,19 @@ def update_ui_tests(session: nox.Session):
_run_cargo(session, *command, "--features=abi3,full", env=env)


@nox.session(name="test-introspection")
def test_introspection(session: nox.Session):
session.run_always("python", "-m", "pip", "install", "-v", "./pytests")
# We look for the built library
lib_file = None
for file in Path(session.virtualenv.location).rglob("pyo3_pytests.*"):
if file.is_file():
lib_file = str(file.resolve())
_run_cargo_test(
session, package="pyo3-introspection", env={"PYO3_PYTEST_LIB_PATH": lib_file}
)


def _build_docs_for_ffi_check(session: nox.Session) -> None:
# pyo3-ffi-check needs to scrape docs of pyo3-ffi
env = os.environ.copy()
Expand Down Expand Up @@ -849,6 +862,7 @@ def _run_cargo_test(
*,
package: Optional[str] = None,
features: Optional[str] = None,
env: Optional[Dict[str, str]] = None,
) -> None:
command = ["cargo"]
if "careful" in session.posargs:
Expand All @@ -861,7 +875,7 @@ def _run_cargo_test(
if features:
command.append(f"--features={features}")

_run(session, *command, external=True)
_run(session, *command, external=True, env=env or {})


def _run_cargo_publish(session: nox.Session, *, package: str) -> None:
Expand Down
18 changes: 18 additions & 0 deletions pyo3-introspection/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "pyo3-introspection"
version = "0.22.0-dev"
description = "Introspect dynamic libraries built with PyO3 to get metadata about the exported Python types"
authors = ["PyO3 Project and Contributors <https://github.com/PyO3>"]
homepage = "https://github.com/pyo3/pyo3"
repository = "https://github.com/pyo3/pyo3"
license = "MIT OR Apache-2.0"
edition = "2021"

[dependencies]
anyhow = "1"
goblin = "0.8.0"
serde = { version = "1", features = ["derive"] }
serde_json = "1"

[lints]
workspace = true
1 change: 1 addition & 0 deletions pyo3-introspection/LICENSE-APACHE
1 change: 1 addition & 0 deletions pyo3-introspection/LICENSE-MIT
243 changes: 243 additions & 0 deletions pyo3-introspection/src/introspection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
use crate::model::{Class, Function, Module};
use anyhow::{bail, Context, Result};
use goblin::elf::Elf;
use goblin::mach::{Mach, MachO, SingleArch};
use goblin::pe::PE;
use goblin::Object;
use serde::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::path::Path;

/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
///
/// This function currently supports the ELF (most *nix including Linux), Match-O (macOS) and PE (Windows) formats.
pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
parse_chunks(&chunks, main_module_name)
}

/// Parses the introspection chunks found in the binary
fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
let chunks_by_id = chunks
.iter()
.map(|c| {
(
match c {
Chunk::Module { id, .. } => id,
Chunk::Class { id, .. } => id,
Chunk::Function { id, .. } => id,
},
c,
)
})
.collect::<HashMap<_, _>>();
// We look for the root chunk
for chunk in chunks {
if let Chunk::Module {
name,
members,
id: _,
} = chunk
{
if name == main_module_name {
return parse_module(name, members, &chunks_by_id);
}
}
}
bail!("No module named {main_module_name} found")
}

fn parse_module(
name: &str,
members: &[String],
chunks_by_id: &HashMap<&String, &Chunk>,
) -> Result<Module> {
let mut modules = Vec::new();
let mut classes = Vec::new();
let mut functions = Vec::new();
for member in members {
if let Some(chunk) = chunks_by_id.get(member) {
match chunk {
Chunk::Module {
name,
members,
id: _,
} => {
modules.push(parse_module(name, members, chunks_by_id)?);
}
Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
Chunk::Function { name, id: _ } => functions.push(Function { name: name.into() }),
}
}
}
Ok(Module {
name: name.into(),
modules,
classes,
functions,
})
}

fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
let library_content =
fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
match Object::parse(&library_content)
.context("The built library is not valid or not supported by our binary parser")?
{
Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
Object::Mach(Mach::Binary(macho)) => {
find_introspection_chunks_in_macho(&macho, &library_content)
}
Object::Mach(Mach::Fat(multi_arch)) => {
for arch in &multi_arch {
match arch? {
SingleArch::MachO(macho) => {
return find_introspection_chunks_in_macho(&macho, &library_content)
}
SingleArch::Archive(_) => (),
}
}
bail!("No Mach-o chunk found in the multi-arch Mach-o container")
}
Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
_ => {
bail!("Only ELF, Mach-o and PE containers can be introspected")
}
}
}

fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
let mut chunks = Vec::new();
for sym in &elf.syms {
if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
let section_header = &elf.section_headers[sym.st_shndx];
let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
chunks.push(read_symbol_value_with_ptr_and_len(
&library_content[usize::try_from(data_offset).context("File offset overflow")?..],
0,
library_content,
elf.is_64,
)?);
}
}
Ok(chunks)
}

fn find_introspection_chunks_in_macho(
macho: &MachO<'_>,
library_content: &[u8],
) -> Result<Vec<Chunk>> {
if !macho.little_endian {
bail!("Only little endian Mach-o binaries are supported");
}

let sections = macho
.segments
.sections()
.flatten()
.map(|t| t.map(|s| s.0))
.collect::<Result<Vec<_>, _>>()?;
let mut chunks = Vec::new();
for (name, nlist) in macho.symbols().flatten() {
if is_introspection_symbol(name) {
let section = &sections[nlist.n_sect];
let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
chunks.push(read_symbol_value_with_ptr_and_len(
&library_content[usize::try_from(data_offset).context("File offset overflow")?..],
0,
library_content,
macho.is_64,
)?);
}
}
Ok(chunks)
}

fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
let rdata_data_section = pe
.sections
.iter()
.find(|section| section.name().unwrap_or_default() == ".rdata")
.context("No .rdata section found")?;
let rdata_shift = pe.image_base
+ usize::try_from(rdata_data_section.virtual_address)
.context(".rdata virtual_address overflow")?
- usize::try_from(rdata_data_section.pointer_to_raw_data)
.context(".rdata pointer_to_raw_data overflow")?;

let mut chunks = Vec::new();
for export in &pe.exports {
if is_introspection_symbol(export.name.unwrap_or_default()) {
chunks.push(read_symbol_value_with_ptr_and_len(
&library_content[export.offset.context("No symbol offset")?..],
rdata_shift,
library_content,
pe.is_64,
)?);
}
}
Ok(chunks)
}

fn read_symbol_value_with_ptr_and_len(
value_slice: &[u8],
shift: usize,
full_library_content: &[u8],
is_64: bool,
) -> Result<Chunk> {
let (ptr, len) = if is_64 {
let (ptr, len) = value_slice[..16].split_at(8);
let ptr = usize::try_from(u64::from_le_bytes(
ptr.try_into().context("Too short symbol value")?,
))
.context("Pointer overflow")?;
let len = usize::try_from(u64::from_le_bytes(
len.try_into().context("Too short symbol value")?,
))
.context("Length overflow")?;
(ptr, len)
} else {
let (ptr, len) = value_slice[..8].split_at(4);
let ptr = usize::try_from(u32::from_le_bytes(
ptr.try_into().context("Too short symbol value")?,
))
.context("Pointer overflow")?;
let len = usize::try_from(u32::from_le_bytes(
len.try_into().context("Too short symbol value")?,
))
.context("Length overflow")?;
(ptr, len)
};
let chunk = &full_library_content[ptr - shift..ptr - shift + len];
serde_json::from_slice(chunk).with_context(|| {
format!(
"Failed to parse introspection chunk: '{}'",
String::from_utf8_lossy(chunk)
)
})
}

fn is_introspection_symbol(name: &str) -> bool {
name.strip_prefix('_')
.unwrap_or(name)
.starts_with("PYO3_INTROSPECTION_0_")
}

#[derive(Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
enum Chunk {
Module {
id: String,
name: String,
members: Vec<String>,
},
Class {
id: String,
name: String,
},
Function {
id: String,
name: String,
},
}