diff --git a/Cargo.lock b/Cargo.lock index 07324d99..779d8b14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -178,6 +178,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -309,6 +320,7 @@ dependencies = [ "serde_json", "serde_yaml", "sha256", + "shell-quote", "simplelog", "sysinfo", "temp-env", @@ -2050,6 +2062,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "shell-quote" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb502615975ae2365825521fa1529ca7648fd03ce0b0746604e0683856ecd7e4" +dependencies = [ + "bstr", +] + [[package]] name = "shlex" version = "1.3.0" diff --git a/Cargo.toml b/Cargo.toml index 2f7035d6..f9b3ac7f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,7 @@ debugid = "0.8.0" memmap2 = "0.9.5" nix = { version = "0.29.0", features = ["fs", "user"] } futures = "0.3.31" +shell-quote = "0.7.2" [target.'cfg(target_os = "linux")'.dependencies] procfs = "0.17.0" diff --git a/src/run/runner/tests.rs b/src/run/runner/tests.rs index ab668383..26570a27 100644 --- a/src/run/runner/tests.rs +++ b/src/run/runner/tests.rs @@ -4,6 +4,7 @@ use crate::run::runner::executor::Executor; use crate::run::runner::interfaces::RunData; use crate::run::runner::valgrind::executor::ValgrindExecutor; use crate::run::{RunnerMode, runner::wall_time::executor::WallTimeExecutor}; +use shell_quote::{Bash, QuoteRefExt}; use tempfile::TempDir; use tokio::sync::{OnceCell, Semaphore, SemaphorePermit}; @@ -19,19 +20,66 @@ const DIRECTORY_CHECK_SCRIPT: &str = "cd /tmp if [ $(basename $(pwd)) != \"tmp\" ]; then exit 1 fi"; -const ENV_VAR_VALIDATION_SCRIPT: &str = " -output=$(echo \"$MY_ENV_VAR\") -if [ \"$output\" != \"Hello\" ]; then - echo \"Assertion failed: Expected 'Hello' but got '$output'\" +const QUOTE_ESCAPE_SCRIPT: &str = "#!/bin/bash +VALUE=\"He said \\\"Hello 'world'\\\" & echo \\$HOME\" +if [ \"$VALUE\" = \"He said \\\"Hello 'world'\\\" & echo \\$HOME\" ]; then + echo \"Quote test passed\" +else + echo \"ERROR: Quote handling failed\" + exit 1 +fi"; +const COMMAND_SUBSTITUTION_SCRIPT: &str = "#!/bin/bash +RESULT=$(echo \"test 'nested' \\\"quotes\\\" here\") +COUNT=$(echo \"$RESULT\" | wc -w) +if [ \"$COUNT\" -eq \"4\" ]; then + echo \"Command substitution test passed\" +else + echo \"ERROR: Expected 4 words, got $COUNT\" exit 1 fi"; -const TESTS: [&str; 5] = [ +const TESTS: [&str; 6] = [ SIMPLE_ECHO_SCRIPT, MULTILINE_ECHO_SCRIPT, MULTILINE_ECHO_WITH_SEMICOLONS, DIRECTORY_CHECK_SCRIPT, - ENV_VAR_VALIDATION_SCRIPT, + QUOTE_ESCAPE_SCRIPT, + COMMAND_SUBSTITUTION_SCRIPT, +]; + +fn env_var_validation_script(env: &str, expected: &str) -> String { + let expected: String = expected.quoted(Bash); + format!( + r#" +if [ "${env}" != {expected} ]; then + echo "FAIL: Environment variable not set correctly" + echo "Got: '${env}'" + exit 1 +fi +"# + ) +} + +const QUOTES_AND_ESCAPES: &str = r#""'He said "Hello 'world' `date`" & echo "done" with \\n\\t\\"#; +const MULTILINE_AND_WHITESPACE: &str = "Line 1\nLine 2\tTabbed\n \t "; +const SHELL_METACHARACTERS: &str = + r#"*.txt | grep "test" && echo "found" || echo "error" ; ls > /tmp/out"#; +const VARIABLES_AND_COMMANDS: &str = + r#"$HOME ${PATH} $((1+1)) $(echo "embedded") VAR="value with spaces""#; +const UNICODE_AND_SPECIAL: &str = "🚀 café naïve\u{200b}hidden\x1b[31mRed\x1b[0m\x01\x02"; +const COMPLEX_MIXED: &str = r#"start'single'middle"double"end $VAR | cmd && echo "done" || fail"#; +const EMPTY: &str = ""; +const SPACE_ONLY: &str = " "; + +const ENV_TESTS: [(&str, &str); 8] = [ + ("quotes_and_escapes", QUOTES_AND_ESCAPES), + ("multiline_and_whitespace", MULTILINE_AND_WHITESPACE), + ("shell_metacharacters", SHELL_METACHARACTERS), + ("variables_and_commands", VARIABLES_AND_COMMANDS), + ("unicode_and_special", UNICODE_AND_SPECIAL), + ("complex_mixed", COMPLEX_MIXED), + ("empty", EMPTY), + ("space_only", SPACE_ONLY), ]; async fn create_test_setup() -> (SystemInfo, RunData, TempDir) { @@ -73,6 +121,8 @@ mod valgrind { #[case(TESTS[1])] #[case(TESTS[2])] #[case(TESTS[3])] + #[case(TESTS[4])] + #[case(TESTS[5])] #[tokio::test] async fn test_valgrind_executor(#[case] cmd: &str) { let (system_info, run_data, _temp_dir) = create_test_setup().await; @@ -86,18 +136,23 @@ mod valgrind { } #[rstest::rstest] - #[case("MY_ENV_VAR", "Hello", ENV_VAR_VALIDATION_SCRIPT)] + #[case(ENV_TESTS[0])] + #[case(ENV_TESTS[1])] + #[case(ENV_TESTS[2])] + #[case(ENV_TESTS[3])] + #[case(ENV_TESTS[4])] + #[case(ENV_TESTS[5])] + #[case(ENV_TESTS[6])] + #[case(ENV_TESTS[7])] #[tokio::test] - async fn test_valgrind_executor_with_env( - #[case] env_var: &str, - #[case] env_value: &str, - #[case] cmd: &str, - ) { + async fn test_valgrind_executor_with_env(#[case] env_case: (&str, &str)) { let (system_info, run_data, _temp_dir) = create_test_setup().await; let executor = get_valgrind_executor().await; + let (env_var, env_value) = env_case; temp_env::async_with_vars(&[(env_var, Some(env_value))], async { - let config = valgrind_config(cmd); + let cmd = env_var_validation_script(env_var, env_value); + let config = valgrind_config(&cmd); executor .run(&config, &system_info, &run_data, &None) .await @@ -150,6 +205,10 @@ mod walltime { #[case(TESTS[2], true)] #[case(TESTS[3], false)] #[case(TESTS[3], true)] + #[case(TESTS[4], false)] + #[case(TESTS[4], true)] + #[case(TESTS[5], false)] + #[case(TESTS[5], true)] #[tokio::test] async fn test_walltime_executor(#[case] cmd: &str, #[case] enable_perf: bool) { let (system_info, run_data, _temp_dir) = create_test_setup().await; @@ -163,20 +222,34 @@ mod walltime { } #[rstest::rstest] - #[case("MY_ENV_VAR", "Hello", ENV_VAR_VALIDATION_SCRIPT, false)] - #[case("MY_ENV_VAR", "Hello", ENV_VAR_VALIDATION_SCRIPT, true)] + #[case(ENV_TESTS[0], false)] + #[case(ENV_TESTS[0], true)] + #[case(ENV_TESTS[1], false)] + #[case(ENV_TESTS[1], true)] + #[case(ENV_TESTS[2], false)] + #[case(ENV_TESTS[2], true)] + #[case(ENV_TESTS[3], false)] + #[case(ENV_TESTS[3], true)] + #[case(ENV_TESTS[4], false)] + #[case(ENV_TESTS[4], true)] + #[case(ENV_TESTS[5], false)] + #[case(ENV_TESTS[5], true)] + #[case(ENV_TESTS[6], false)] + #[case(ENV_TESTS[6], true)] + #[case(ENV_TESTS[7], false)] + #[case(ENV_TESTS[7], true)] #[tokio::test] async fn test_walltime_executor_with_env( - #[case] env_var: &str, - #[case] env_value: &str, - #[case] cmd: &str, + #[case] env_case: (&str, &str), #[case] enable_perf: bool, ) { let (system_info, run_data, _temp_dir) = create_test_setup().await; let (_permit, executor) = get_walltime_executor().await; + let (env_var, env_value) = env_case; temp_env::async_with_vars(&[(env_var, Some(env_value))], async { - let config = walltime_config(cmd, enable_perf); + let cmd = env_var_validation_script(env_var, env_value); + let config = walltime_config(&cmd, enable_perf); executor .run(&config, &system_info, &run_data, &None) .await diff --git a/src/run/runner/valgrind/measure.rs b/src/run/runner/valgrind/measure.rs index 4f39955b..d35c45ec 100644 --- a/src/run/runner/valgrind/measure.rs +++ b/src/run/runner/valgrind/measure.rs @@ -55,11 +55,11 @@ fn create_run_script() -> anyhow::Result { // Args: // 1. The command to execute // 2. The path to the file where the exit code will be written - const WRAPPER_SCRIPT: &str = r#"#!/bin/sh - sh -c "$1" - status=$? - echo -n "$status" > "$2" - "#; + const WRAPPER_SCRIPT: &str = r#"#!/usr/bin/env bash +bash -c "$1" +status=$? +echo -n "$status" > "$2" +"#; let rwx = std::fs::Permissions::from_mode(0o777); let mut script_file = tempfile::Builder::new() diff --git a/src/run/runner/wall_time/executor.rs b/src/run/runner/wall_time/executor.rs index ae43199b..1b29e81c 100644 --- a/src/run/runner/wall_time/executor.rs +++ b/src/run/runner/wall_time/executor.rs @@ -63,6 +63,25 @@ impl Drop for HookScriptsGuard { } } +/// Returns a list of exported environment variables which can be loaded with `source` in a shell. +/// +/// Example: `declare -x outputs="out"` +fn get_exported_system_env() -> Result { + let output = Command::new("bash") + .arg("-c") + .arg("export") + .output() + .context("Failed to run `export`")?; + if !output.status.success() { + bail!( + "Failed to get system environment variables: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + String::from_utf8(output.stdout).context("Failed to parse export output as UTF-8") +} + pub struct WallTimeExecutor { perf: Option, } @@ -74,36 +93,51 @@ impl WallTimeExecutor { } } - fn walltime_bench_cmd(config: &Config, run_data: &RunData) -> Result<(NamedTempFile, String)> { + fn walltime_bench_cmd( + config: &Config, + run_data: &RunData, + ) -> Result<(NamedTempFile, NamedTempFile, String)> { let bench_cmd = get_bench_command(config)?; - let combined_env = std::env::vars() - .chain( - get_base_injected_env(RunnerMode::Walltime, &run_data.profile_folder) - .into_iter() - .map(|(k, v)| (k.into(), v)), - ) - .map(|(env, value)| format!("{env}={value}")) - .collect::>() - .join("\n"); + let system_env = get_exported_system_env()?; + let base_injected_env = + get_base_injected_env(RunnerMode::Walltime, &run_data.profile_folder) + .into_iter() + .map(|(k, v)| format!("export {k}={v}",)) + .collect::>() + .join("\n"); + let combined_env = format!("{system_env}\n{base_injected_env}"); let mut env_file = NamedTempFile::new()?; env_file.write_all(combined_env.as_bytes())?; + let script_file = { + let mut file = NamedTempFile::new()?; + let bash_command = format!("source {} && {}", env_file.path().display(), bench_cmd); + debug!("Bash command: {bash_command}"); + file.write_all(bash_command.as_bytes())?; + file + }; + let quiet_flag = if is_codspeed_debug_enabled() { "--quiet" } else { "" }; + // Remarks: + // - We're using --scope so that perf is able to capture the events of the benchmark process. + // - We can't user `--user` here because we need to run in `codspeed.slice`, otherwise we'd run in + // user.slice` (which is isolated). We can use `--gid` and `--uid` to run the command as the current user. + // - We must use `bash` here instead of `sh` since `source` isn't available when symlinked to `dash`. + // - We have to pass the environment variables because `--scope` only inherits the system and not the user environment variables. let uid = nix::unistd::Uid::current().as_raw(); let gid = nix::unistd::Gid::current().as_raw(); let cmd = format!( - "systemd-run {quiet_flag} --pipe --collect --wait --slice=codspeed.slice --same-dir --uid={uid} --gid={gid} --property=EnvironmentFile={} -- sh -c '{}'", - env_file.path().display(), - bench_cmd.replace("'", "\"") + "systemd-run {quiet_flag} --scope --slice=codspeed.slice --same-dir --uid={uid} --gid={gid} -- bash {}", + script_file.path().display() ); - Ok((env_file, cmd)) + Ok((env_file, script_file, cmd)) } } @@ -138,7 +172,7 @@ impl Executor for WallTimeExecutor { let status = { let _guard = HookScriptsGuard::setup(); - let (_env_file, bench_cmd) = Self::walltime_bench_cmd(config, run_data)?; + let (_env_file, _script_file, bench_cmd) = Self::walltime_bench_cmd(config, run_data)?; if let Some(perf) = &self.perf && config.enable_perf { diff --git a/src/run/runner/wall_time/perf/helpers.rs b/src/run/runner/wall_time/perf/helpers.rs index 419bdf4c..2eb8777f 100644 --- a/src/run/runner/wall_time/perf/helpers.rs +++ b/src/run/runner/wall_time/perf/helpers.rs @@ -1,7 +1,6 @@ -use std::collections::HashMap; - -use anyhow::{anyhow, bail}; +use crate::prelude::*; use linux_perf_data::{PerfFileReader, PerfFileRecord, linux_perf_event_reader::EventRecord}; +use std::collections::HashMap; /// Tries to find the pid of the sampled process within a perf.data file. pub fn find_pid>(perf_file: P) -> anyhow::Result { @@ -48,6 +47,7 @@ pub fn find_pid>(perf_file: P) -> anyhow::Result } } } + debug!("Pid frequency: {pid_freq:?}"); // Choose the pid with the highest frequency. However, we can only use a pid if more than N% of the // events are from that pid. diff --git a/src/run/runner/wall_time/perf/mod.rs b/src/run/runner/wall_time/perf/mod.rs index 48e2de08..33a71aa8 100644 --- a/src/run/runner/wall_time/perf/mod.rs +++ b/src/run/runner/wall_time/perf/mod.rs @@ -198,7 +198,10 @@ impl PerfRunner { let perf_map_pids = futures::future::try_join_all(copy_tasks) .await? .into_iter() - .filter_map(Result::ok) + .filter_map(|result| { + debug!("Copy task result: {result:?}"); + result.ok() + }) .collect::>(); harvest_perf_maps_for_pids(profile_folder, &perf_map_pids).await?; @@ -210,6 +213,62 @@ impl PerfRunner { Ok(()) } + #[cfg(target_os = "linux")] + fn process_memory_mappings( + pid: u32, + symbols_by_pid: &mut HashMap, + unwind_data_by_pid: &mut HashMap>, + ) -> anyhow::Result<()> { + use procfs::process::MMPermissions; + + let bench_proc = + procfs::process::Process::new(pid as _).expect("Failed to find benchmark process"); + let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps"); + + for map in &exe_maps { + let page_offset = map.offset; + let (base_addr, end_addr) = map.address; + let path = match &map.pathname { + procfs::process::MMapPath::Path(path) => Some(path.clone()), + _ => None, + }; + + if let Some(path) = &path { + symbols_by_pid + .entry(pid) + .or_insert(ProcessSymbols::new(pid)) + .add_mapping(pid, path, base_addr, end_addr); + debug!("Added mapping for module {path:?}"); + + if map.perms.contains(MMPermissions::EXECUTE) { + match UnwindData::new( + path.to_string_lossy().as_bytes(), + page_offset, + base_addr, + end_addr - base_addr, + None, + ) { + Ok(unwind_data) => { + unwind_data_by_pid.entry(pid).or_default().push(unwind_data); + debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})"); + } + Err(error) => { + debug!( + "Failed to create unwind data for module {}: {}", + path.display(), + error + ); + } + } + } + } else if map.perms.contains(MMPermissions::EXECUTE) { + debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}"); + } + } + + Ok(()) + } + async fn handle_fifo( mut runner_fifo: RunnerFifo, mut perf_fifo: PerfFifo, @@ -239,59 +298,11 @@ impl PerfRunner { #[cfg(target_os = "linux")] if !symbols_by_pid.contains_key(&pid) && !unwind_data_by_pid.contains_key(&pid) { - use procfs::process::MMPermissions; - - let bench_proc = procfs::process::Process::new(pid as _) - .expect("Failed to find benchmark process"); - let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps"); - - for map in &exe_maps { - let page_offset = map.offset; - let (base_addr, end_addr) = map.address; - let path = match &map.pathname { - procfs::process::MMapPath::Path(path) => Some(path.clone()), - _ => None, - }; - - if let Some(path) = &path { - symbols_by_pid - .entry(pid) - .or_insert(ProcessSymbols::new(pid)) - .add_mapping(pid, path, base_addr, end_addr); - debug!("Added mapping for module {path:?}"); - - if map.perms.contains(MMPermissions::EXECUTE) { - match UnwindData::new( - path.to_string_lossy().as_bytes(), - page_offset, - base_addr, - end_addr - base_addr, - None, - ) { - Ok(unwind_data) => { - unwind_data_by_pid - .entry(pid) - .or_default() - .push(unwind_data); - debug!( - "Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})" - ); - } - Err(error) => { - debug!( - "Failed to create unwind data for module {}: {}", - path.display(), - error - ); - } - } - } - } else if map.perms.contains(MMPermissions::EXECUTE) { - debug!( - "Found executable mapping without path: {base_addr:x} - {end_addr:x}" - ); - } - } + Self::process_memory_mappings( + pid, + &mut symbols_by_pid, + &mut unwind_data_by_pid, + )?; } runner_fifo.send_cmd(FifoCommand::Ack).await?; diff --git a/src/run/runner/wall_time/perf/perf_map.rs b/src/run/runner/wall_time/perf/perf_map.rs index 3bc2ba65..767e2925 100644 --- a/src/run/runner/wall_time/perf/perf_map.rs +++ b/src/run/runner/wall_time/perf/perf_map.rs @@ -51,6 +51,20 @@ impl ModuleSymbols { return Err(anyhow::anyhow!("No symbols found")); } + // The base_addr from the mapping is where the module is actually loaded in memory (See ProcessSymbols::add_mapping), + // but the symbol addresses from the ELF file assume the module is loaded at its preferred virtual address. We need to: + // 1. Find the module's preferred base address from the ELF file or symbols + // 2. Calculate the offset: actual_base - preferred_base + // 3. Apply this offset to the symbol addresses + + // Find the preferred base address from the minimum symbol address + let preferred_base = symbols.iter().map(|s| s.offset).min().unwrap_or(0) & !0xfff; // Align to page boundary + + // Convert absolute addresses to relative offsets + for symbol in &mut symbols { + symbol.offset = symbol.offset.saturating_sub(preferred_base); + } + Ok(Self { path: path.as_ref().to_path_buf(), symbols,