Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
testdata/perf_map/cpp_my_benchmark.bin filter=lfs diff=lfs merge=lfs -text
testdata/perf_map/go_fib.bin filter=lfs diff=lfs merge=lfs -text
testdata/perf_map/divan_sleep_benches.bin filter=lfs diff=lfs merge=lfs -text
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
- uses: moonrepo/setup-rust@v1
- run: cargo test --all
61 changes: 33 additions & 28 deletions src/run/runner/wall_time/perf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,36 +239,41 @@ impl PerfRunner {
_ => None,
};

if let Some(path) = &path {
symbols_by_pid
.entry(pid)
.or_insert(ProcessSymbols::new(pid))
.add_mapping(pid, path, base_addr, end_addr);
debug!("Added mapping for module {path:?}");

let Some(path) = &path else {
if map.perms.contains(MMPermissions::EXECUTE) {
match UnwindData::new(
path.to_string_lossy().as_bytes(),
page_offset,
base_addr,
end_addr - base_addr,
None,
) {
Ok(unwind_data) => {
unwind_data_by_pid.entry(pid).or_default().push(unwind_data);
debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})");
}
Err(error) => {
debug!(
"Failed to create unwind data for module {}: {}",
path.display(),
error
);
}
}
debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}");
}
continue;
};

if !map.perms.contains(MMPermissions::EXECUTE) {
continue;
}

symbols_by_pid
.entry(pid)
.or_insert(ProcessSymbols::new(pid))
.add_mapping(pid, path, base_addr, end_addr, map.offset);
debug!("Added mapping for module {path:?}");

match UnwindData::new(
path.to_string_lossy().as_bytes(),
page_offset,
base_addr,
end_addr - base_addr,
None,
) {
Ok(unwind_data) => {
unwind_data_by_pid.entry(pid).or_default().push(unwind_data);
debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})");
}
Err(error) => {
debug!(
"Failed to create unwind data for module {}: {}",
path.display(),
error
);
}
} else if map.perms.contains(MMPermissions::EXECUTE) {
debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}");
}
}

Expand Down
193 changes: 147 additions & 46 deletions src/run/runner/wall_time/perf/perf_map.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,40 @@
use crate::prelude::*;
use object::{Object, ObjectSymbol, ObjectSymbolTable};
use object::{Object, ObjectSegment, ObjectSymbol, ObjectSymbolTable};
use std::{
collections::HashMap,
fmt::Debug,
io::Write,
path::{Path, PathBuf},
};

#[derive(Debug, Hash, PartialEq, Eq, Clone)]
#[derive(Hash, PartialEq, Eq, Clone)]
struct Symbol {
offset: u64,
addr: u64,
size: u64,
name: String,
}

impl Debug for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Symbol {{ offset: {:x}, size: {:x}, name: {} }}",
self.addr, self.size, self.name
)
}
}

#[derive(Debug, Clone)]
pub struct ModuleSymbols {
path: PathBuf,
symbols: Vec<Symbol>,
}

impl ModuleSymbols {
pub fn new<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
pub fn new<P: AsRef<Path>>(
path: P,
runtime_start_addr: u64,
runtime_offset: u64,
) -> anyhow::Result<Self> {
let content = std::fs::read(path.as_ref())?;
let object = object::File::parse(&*content)?;

Expand All @@ -29,7 +43,7 @@ impl ModuleSymbols {
if let Some(symbol_table) = object.symbol_table() {
symbols.extend(symbol_table.symbols().filter_map(|symbol| {
Some(Symbol {
offset: symbol.address(),
addr: symbol.address(),
size: symbol.size(),
name: symbol.name().ok()?.to_string(),
})
Expand All @@ -39,52 +53,110 @@ impl ModuleSymbols {
if let Some(symbol_table) = object.dynamic_symbol_table() {
symbols.extend(symbol_table.symbols().filter_map(|symbol| {
Some(Symbol {
offset: symbol.address(),
addr: symbol.address(),
size: symbol.size(),
name: symbol.name().ok()?.to_string(),
})
}));
}

symbols.retain(|symbol| symbol.offset > 0 && symbol.size > 0);
symbols.retain(|symbol| symbol.addr > 0 && symbol.size > 0);
if symbols.is_empty() {
return Err(anyhow::anyhow!("No symbols found"));
}

// The base_addr from the mapping is where the module is actually loaded in memory (See ProcessSymbols::add_mapping),
// but the symbol addresses from the ELF file assume the module is loaded at its preferred virtual address. We need to:
// 1. Find the module's preferred base address from the ELF file or symbols
// 2. Calculate the offset: actual_base - preferred_base
// 3. Apply this offset to the symbol addresses

// Find the preferred base address from the minimum symbol address
let preferred_base = symbols.iter().map(|s| s.offset).min().unwrap_or(0) & !0xfff; // Align to page boundary

// Convert absolute addresses to relative offsets
let load_bias = Self::compute_load_bias(runtime_start_addr, runtime_offset, &object)?;
for symbol in &mut symbols {
symbol.offset = symbol.offset.saturating_sub(preferred_base);
symbol.addr = symbol.addr.wrapping_add(load_bias);
}

Ok(Self {
path: path.as_ref().to_path_buf(),
symbols,
})
Ok(Self { symbols })
}

fn append_to_file<P: AsRef<Path>>(&self, path: P, base_addr: u64) -> anyhow::Result<()> {
fn compute_load_bias(
runtime_start_addr: u64,
runtime_offset: u64,
object: &object::File,
) -> anyhow::Result<u64> {
// The addresses of symbols read from an ELF file on disk are not their final runtime addresses.
// This is due to Address Space Layout Randomization (ASLR) and the way the OS loader maps
// file segments into virtual memory.
//
// Step 1: Find the corresponding ELF segment.
// We must find the `PT_LOAD` segment that corresponds to the executable memory region we found
// in /proc/<pid>/maps. We do this by comparing the `runtime_offset` against the offset in the file.
//
// For example, if we have the following `/proc/<pid>/maps` output:
// ```
// 00400000-00402000 r--p 00000000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
// 00402000-0050f000 r-xp 00002000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin <-- we find this
// 0050f000-0064b000 r--p 0010f000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
// 0064b000-0064c000 r--p 0024a000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
// 0064c000-0065e000 rw-p 0024b000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
// 0065e000-00684000 rw-p 00000000 00:00 0
// ```
//
// We'll match the PT_LOAD segment with the same offset (0x2000):
// ```
// $ readelf -l testdata/perf_map/go_fib.bin
// Elf file type is EXEC (Executable file)
// Entry point 0x402490
// There are 15 program headers, starting at offset 64
//
// Program Headers:
// Type Offset VirtAddr PhysAddr
// PHDR 0x0000000000000040 0x0000000000400040 0x0000000000400040
// 0x0000000000000348 0x0000000000000348 R 0x8
// INTERP 0x0000000000000430 0x0000000000400430 0x0000000000400430
// 0x0000000000000053 0x0000000000000053 R 0x1
// LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
// 0x0000000000001640 0x0000000000001640 R 0x1000
// LOAD 0x0000000000002000 0x0000000000402000 0x0000000000402000 <-- we'll match this
// 0x000000000010ceb1 0x000000000010ceb1 R E 0x1000
// ```
let load_segment = object
.segments()
.find(|segment| {
// When the kernel loads an ELF file, it maps entire pages (usually 4KB aligned),
// not just the exact segment boundaries. Here's what happens:
//
// **ELF File Structure**:
// - LOAD segment 1: file offset 0x0 - 0x4d26a (data/code)
// - LOAD segment 2: file offset 0x4d26c - 0x13c4b6 (executable code)
//
// **Kernel Memory Mapping**: The kernel rounds down to page boundaries when mapping:
// - Maps pages starting at offset 0x0 (covers segment 1)
// - Maps pages starting at offset 0x4d000 (page-aligned, covers segment 2)
//
// (the example values are based on the `test_rust_divan_symbols` test)
let (file_offset, file_size) = segment.file_range();
runtime_offset >= file_offset && runtime_offset < file_offset + file_size
})
.context("Failed to find a matching PT_LOAD segment")?;

// Step 2: Calculate the "load bias".
// The bias is the difference between where the segment *actually* is in memory versus where the
// ELF file *preferred* it to be.
//
// load_bias = runtime_start_addr - segment_preferred_vaddr
//
// - `runtime_start_addr`: The actual base address of this segment in memory (from `/proc/maps`).
// - `load_segment.address()`: The preferred virtual address (`p_vaddr`) from the ELF file itself.
//
// This single calculation correctly handles both PIE/shared-objects and non-PIE executables:
// - For PIE/.so files: `0x7f... (random) - 0x... (small) = <large_bias>`
// - For non-PIE files: `0x402000 (fixed) - 0x402000 (fixed) = 0`
Ok(runtime_start_addr.wrapping_sub(load_segment.address()))
}

fn append_to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
let mut file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)?;

for symbol in &self.symbols {
writeln!(
file,
"{:x} {:x} {}",
base_addr + symbol.offset,
symbol.size,
symbol.name
)?;
writeln!(file, "{:x} {:x} {}", symbol.addr, symbol.size, symbol.name)?;
}

Ok(())
Expand Down Expand Up @@ -113,23 +185,21 @@ impl ProcessSymbols {
module_path: P,
start_addr: u64,
end_addr: u64,
file_offset: u64,
) {
if self.pid != pid {
warn!("pid mismatch: {} != {}", self.pid, pid);
return;
}

debug!("Loading module symbols at {start_addr:x}-{end_addr:x} (offset: {file_offset:x})");
let path = module_path.as_ref().to_path_buf();
match ModuleSymbols::new(module_path) {
match ModuleSymbols::new(module_path, start_addr, file_offset) {
Ok(symbol) => {
self.modules.entry(path.clone()).or_insert(symbol);
}
Err(error) => {
debug!(
"Failed to load symbols for module {}: {}",
path.display(),
error
);
debug!("Failed to load symbols for module {path:?}: {error}");
}
}

Expand All @@ -155,17 +225,48 @@ impl ProcessSymbols {

let symbols_path = folder.as_ref().join(format!("perf-{}.map", self.pid));
for module in self.modules.values() {
let Some((base_addr, _)) = self
.module_mappings
.get(&module.path)
.and_then(|bounds| bounds.iter().min_by_key(|(start, _)| start))
else {
warn!("No bounds found for module: {}", module.path.display());
continue;
};
module.append_to_file(&symbols_path, *base_addr)?;
module.append_to_file(&symbols_path)?;
}

Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_golang_symbols() {
let module_symbols =
ModuleSymbols::new("testdata/perf_map/go_fib.bin", 0x00402000, 0x00002000).unwrap();
insta::assert_debug_snapshot!(module_symbols.symbols);
}

#[test]
fn test_cpp_symbols() {
const MODULE_PATH: &str = "testdata/perf_map/cpp_my_benchmark.bin";
let module_symbols = ModuleSymbols::new(MODULE_PATH, 0x00400000, 0x00000000).unwrap();
insta::assert_debug_snapshot!(module_symbols.symbols);
}

#[test]
fn test_rust_divan_symbols() {
const MODULE_PATH: &str = "testdata/perf_map/divan_sleep_benches.bin";

// Segments in the file:
// Segment: Segment { address: 0, size: 4d26a }
// Segment: Segment { address: 4e26c, size: ef24a }
// Segment: Segment { address: 13e4b8, size: ab48 }
// Segment: Segment { address: 1499b0, size: 11a5 }
//
// Segments in memory:
// 0x0000555555554000 0x00005555555a2000 0x4e000 0x0 r--p
// 0x00005555555a2000 0x0000555555692000 0xf0000 0x4d000 r-xp <--
// 0x0000555555692000 0x000055555569d000 0xb000 0x13c000 r--p
// 0x000055555569d000 0x000055555569f000 0x2000 0x146000 rw-p
//
let module_symbols = ModuleSymbols::new(MODULE_PATH, 0x00005555555a2000, 0x4d000).unwrap();
insta::assert_debug_snapshot!(module_symbols.symbols);
}
}
Loading