Skip to content

Commit

Permalink
Draft: Allows to introspect Python modules from cdylib
Browse files Browse the repository at this point in the history
  • Loading branch information
Tpt committed Jun 10, 2024
1 parent f66124a commit 82e672f
Show file tree
Hide file tree
Showing 29 changed files with 714 additions and 48 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,49 @@ jobs:
with:
path: ~/.cache/cargo-xwin
key: cargo-xwin-cache

test-introspection:
needs: [fmt]
strategy:
matrix:
platform: [
{
os: "macos-latest",
python-architecture: "arm64",
rust-target: "aarch64-apple-darwin",
},
{
os: "ubuntu-latest",
python-architecture: "x64",
rust-target: "x86_64-unknown-linux-gnu",
},
{
os: "windows-latest",
python-architecture: "x64",
rust-target: "x86_64-pc-windows-msvc",
},
{
os: "windows-latest",
python-architecture: "x86",
rust-target: "i686-pc-windows-msvc",
},
]
runs-on: ${{ matrix.platform.os }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
targets: ${{ matrix.platform.rust-target }}
components: rust-src
- uses: actions/setup-python@v5
with:
architecture: ${{ matrix.platform.python-architecture }}
- uses: Swatinem/rust-cache@v2
with:
save-if: ${{ github.event_name != 'merge_group' }}
- run: python -m pip install --upgrade pip && pip install nox
- run: nox -s test-introspection

conclusion:
needs:
- fmt
Expand All @@ -615,6 +658,7 @@ jobs:
- check-feature-powerset
- test-cross-compilation
- test-cross-compilation-windows
- test-introspection
if: always()
runs-on: ubuntu-latest
steps:
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ experimental-async = ["macros", "pyo3-macros/experimental-async"]

# Enables pyo3::inspect module and additional type information on FromPyObject
# and IntoPy traits
experimental-inspect = []
experimental-inspect = ["pyo3-macros/experimental-inspect"]

# Enables annotating Rust inline modules with #[pymodule] to build Python modules declaratively
experimental-declarative-modules = ["pyo3-macros/experimental-declarative-modules", "macros"]
Expand Down Expand Up @@ -145,6 +145,7 @@ members = [
"pyo3-build-config",
"pyo3-macros",
"pyo3-macros-backend",
"pyo3-introspection",
"pytests",
"examples",
]
Expand Down
1 change: 1 addition & 0 deletions newsfragments/3977.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Basic introspection and stub generation based on metadata embedded in produced cdylib.
16 changes: 15 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,19 @@ def update_ui_tests(session: nox.Session):
_run_cargo(session, *command, "--features=abi3,full", env=env)


@nox.session(name="test-introspection")
def test_introspection(session: nox.Session):
session.run_always("python", "-m", "pip", "install", "-v", "./pytests")
# We look for the built library
lib_file = None
for file in Path(session.virtualenv.location).rglob("pyo3_pytests.*"):
if file.is_file():
lib_file = str(file.resolve())
_run_cargo_test(
session, package="pyo3-introspection", env={"PYO3_PYTEST_LIB_PATH": lib_file}
)


def _build_docs_for_ffi_check(session: nox.Session) -> None:
# pyo3-ffi-check needs to scrape docs of pyo3-ffi
env = os.environ.copy()
Expand Down Expand Up @@ -849,6 +862,7 @@ def _run_cargo_test(
*,
package: Optional[str] = None,
features: Optional[str] = None,
env: Optional[Dict[str, str]] = None,
) -> None:
command = ["cargo"]
if "careful" in session.posargs:
Expand All @@ -861,7 +875,7 @@ def _run_cargo_test(
if features:
command.append(f"--features={features}")

_run(session, *command, external=True)
_run(session, *command, external=True, env=env or {})


def _run_cargo_publish(session: nox.Session, *, package: str) -> None:
Expand Down
18 changes: 18 additions & 0 deletions pyo3-introspection/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "pyo3-introspection"
version = "0.22.0-dev"
description = "Introspect dynamic libraries built with PyO3 to get metadata about the exported Python types"
authors = ["PyO3 Project and Contributors <https://github.com/PyO3>"]
homepage = "https://github.com/pyo3/pyo3"
repository = "https://github.com/pyo3/pyo3"
license = "MIT OR Apache-2.0"
edition = "2021"

[dependencies]
anyhow = "1"
goblin = "0.8.0"
serde = { version = "1", features = ["derive"] }
serde_json = "1"

[lints]
workspace = true
1 change: 1 addition & 0 deletions pyo3-introspection/LICENSE-APACHE
1 change: 1 addition & 0 deletions pyo3-introspection/LICENSE-MIT
224 changes: 224 additions & 0 deletions pyo3-introspection/src/introspection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
use crate::model::{Class, Function, Module};
use anyhow::{bail, Context, Result};
use goblin::elf::Elf;
use goblin::mach::{Mach, MachO, SingleArch};
use goblin::pe::PE;
use goblin::Object;
use serde::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::path::Path;

/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
///
/// This function currently supports the ELF (most *nix including Linux), Match-O (macOS) and PE (Windows) formats.
pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
parse_chunks(&chunks, main_module_name)
}

/// Parses the introspection chunks found in the binary
fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
let chunks_by_id = chunks
.iter()
.map(|c| {
(
match c {
Chunk::Module { id, .. } => id,
Chunk::Class { id, .. } => id,
Chunk::Function { id, .. } => id,
},
c,
)
})
.collect::<HashMap<_, _>>();
// We look for the root chunk
for chunk in chunks {
if let Chunk::Module {
name,
members,
id: _,
} = chunk
{
if name == main_module_name {
return parse_module(name, members, &chunks_by_id);
}
}
}
bail!("No module named {main_module_name} found")
}

fn parse_module(
name: &str,
members: &[String],
chunks_by_id: &HashMap<&String, &Chunk>,
) -> Result<Module> {
let mut modules = Vec::new();
let mut classes = Vec::new();
let mut functions = Vec::new();
for member in members {
if let Some(chunk) = chunks_by_id.get(member) {
match chunk {
Chunk::Module {
name,
members,
id: _,
} => {
modules.push(parse_module(name, members, chunks_by_id)?);
}
Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
Chunk::Function { name, id: _ } => functions.push(Function { name: name.into() }),
}
}
}
Ok(Module {
name: name.into(),
modules,
classes,
functions,
})
}

fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
let library_content =
fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
match Object::parse(&library_content)
.context("The built library is not valid or not supported by our binary parser")?
{
Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
Object::Mach(Mach::Binary(matcho)) => {
find_introspection_chunks_in_matcho(&matcho, &library_content)
}
Object::Mach(Mach::Fat(multi_arch)) => {
for arch in &multi_arch {
match arch? {
SingleArch::MachO(matcho) => {
return find_introspection_chunks_in_matcho(&matcho, &library_content)
}
SingleArch::Archive(_) => (),
}
}
bail!("No Match-o chunk found in the multi-arch Match-o container")
}
Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
_ => {
bail!("Only ELF, Match-o and PE containers can be introspected")
}
}
}

fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
let alignment = if elf.is_64 { 8 } else { 4 };
let (pyo3_data_section_index, pyo3_data_section_header) = elf
.section_headers
.iter()
.enumerate()
.find(|(_, section)| {
elf.shdr_strtab.get_at(section.sh_name).unwrap_or_default() == ".pyo3i0"
})
.context("No .pyo3i0 section found")?;
elf.syms
.iter()
.filter(|symbol| symbol.st_shndx == pyo3_data_section_index)
.map(|symbol| {
let symbol_offset = symbol.st_value - pyo3_data_section_header.sh_addr
+ pyo3_data_section_header.sh_offset;
let content_start = &library_content[symbol_offset.try_into().unwrap()..];
let ptr = usize::from_le_bytes(content_start[..alignment].try_into().unwrap());
let len =
usize::from_le_bytes(content_start[alignment..2 * alignment].try_into().unwrap());
Ok(serde_json::from_slice(&library_content[ptr..ptr + len])?)
})
.collect()
}

fn find_introspection_chunks_in_matcho(
matcho: &MachO<'_>,
library_content: &[u8],
) -> Result<Vec<Chunk>> {
if !matcho.is_64 {
bail!("Only 64 bits Match-o binaries are supported");
}
if !matcho.little_endian {
bail!("Only little endian Match-o binaries are supported");
}
let text_segment = matcho
.segments
.iter()
.find(|s| s.segname == *b"__TEXT\0\0\0\0\0\0\0\0\0\0")
.context("No __TEXT segment found")?;
let (_, pyo3_data_section) = text_segment
.sections()?
.into_iter()
.find(|s| s.0.sectname == *b"__pyo3i0\0\0\0\0\0\0\0\0")
.context("No __pyo3i0 section found")?;
pyo3_data_section
.chunks(16)
.map(|element| {
let ptr = usize::from_le_bytes(element[..8].try_into().unwrap());
let len = usize::from_le_bytes(element[8..].try_into().unwrap());
Ok(serde_json::from_slice(&library_content[ptr..ptr + len])?)
})
.collect()
}

fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
let alignment = if pe.is_64 { 8 } else { 4 };
let pyo3_data_section = pe
.sections
.iter()
.find(|section| section.name().unwrap_or_default() == ".pyo3i0")
.context("No .pyo3i0 section found")?;
pe.exports
.iter()
.filter(|export| {
export.offset.map_or(false, |offset| {
// We check we are in the correct section
pyo3_data_section.pointer_to_raw_data <= u32::try_from(offset).unwrap()
&& u32::try_from(offset).unwrap()
<= pyo3_data_section.pointer_to_raw_data
+ pyo3_data_section.size_of_raw_data
})
})
.map(|export| {
let offset = export.offset.unwrap();
let content_start = &library_content[offset..];
let ptr = usize::from_le_bytes(content_start[..alignment].try_into().unwrap());
let len =
usize::from_le_bytes(content_start[alignment..2 * alignment].try_into().unwrap());
dbg!(
ptr,
len,
export.rva,
export.size,
pyo3_data_section.pointer_to_raw_data,
pyo3_data_section.size_of_raw_data,
pyo3_data_section.virtual_address,
pyo3_data_section.virtual_size,
);
let resolved_ptr = ptr - usize::try_from(pyo3_data_section.virtual_address).unwrap()
+ usize::try_from(pyo3_data_section.pointer_to_raw_data).unwrap();
Ok(serde_json::from_slice(
&library_content[resolved_ptr..resolved_ptr + len],
)?)
})
.collect()
}

#[derive(Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
enum Chunk {
Module {
id: String,
name: String,
members: Vec<String>,
},
Class {
id: String,
name: String,
},
Function {
id: String,
name: String,
},
}
8 changes: 8 additions & 0 deletions pyo3-introspection/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//! Utilities to introspect cdylib built using PyO3 and generate [type stubs](https://typing.readthedocs.io/en/latest/source/stubs.html).

pub use crate::introspection::introspect_cdylib;
pub use crate::stubs::module_stub_files;

mod introspection;
pub mod model;
mod stubs;
17 changes: 17 additions & 0 deletions pyo3-introspection/src/model.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
pub struct Module {
pub name: String,
pub modules: Vec<Module>,
pub classes: Vec<Class>,
pub functions: Vec<Function>,
}

#[derive(Debug, Eq, PartialEq, Clone, Hash)]
pub struct Class {
pub name: String,
}

#[derive(Debug, Eq, PartialEq, Clone, Hash)]
pub struct Function {
pub name: String,
}
Loading

0 comments on commit 82e672f

Please sign in to comment.