Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ rusqlite = { version = "0.31", features = ["bundled"] }
shell-words = "1"
bincode = "1"
zccache-artifact = "1.4.0"
rayon = "1"
tracing-test = "0.2"

# Process containment: all subprocess spawns the daemon performs (compilers,
# esptool, qemu, simavr, node, npm, …) and any grandchildren they fork must
Expand Down
2 changes: 2 additions & 0 deletions crates/fbuild-header-scan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ rust-version.workspace = true
license.workspace = true

[dependencies]
rayon = { workspace = true }
tracing = { workspace = true }

[dev-dependencies]
criterion = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion crates/fbuild-header-scan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod scanner;
mod walker;

pub use scanner::{scan, IncludeKind, IncludeRef, Span};
pub use walker::{walk, WalkResult};
pub use walker::{walk, walk_with_state, WalkResult, WalkState};

/// Bumped whenever the scanner output shape changes. Mixed into cache keys so a
/// scanner change invalidates memoized library-selection results.
Expand Down
153 changes: 133 additions & 20 deletions crates/fbuild-header-scan/src/walker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,56 +5,169 @@
//! plus the set of include strings that could not be resolved. The walker is
//! BFS over a visited set so cycles, diamonds, and arbitrary depth all
//! terminate correctly.
//!
//! Two public entry points:
//! * [`walk`] -- one-shot convenience wrapper that allocates a fresh
//! [`WalkState`] internally. `WalkResult::reached` is the full set of files
//! reached from `seeds`.
//! * [`walk_with_state`] -- accepts a caller-owned [`WalkState`] so multiple
//! walks can share a scan cache and a `visited` set across calls (used by
//! `fbuild-library-select` to avoid re-reading files between LDF passes).
//! `WalkResult::reached` is the *delta* of canonical paths newly discovered
//! in this call; the union of deltas across calls equals the full set.

use std::collections::{BTreeSet, HashSet, VecDeque};
use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
use std::path::{Path, PathBuf};

use rayon::prelude::*;

use crate::scanner::{scan, IncludeKind, IncludeRef};

/// Result of a walk. `reached` and `unresolved` are sorted for deterministic
/// cache keys.
///
/// For [`walk`] (fresh-state wrapper) `reached` is the full set of files
/// transitively reached from the seeds. For [`walk_with_state`] the same
/// fields contain only the *delta* added in this call -- files already
/// present in the shared `WalkState::visited` set are not re-emitted.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct WalkResult {
pub reached: Vec<PathBuf>,
pub unresolved: Vec<String>,
}

/// State that can be shared across multiple [`walk_with_state`] calls so the
/// include-scan results are memoized and each on-disk file is read at most
/// once for the lifetime of the state.
///
/// Used by `fbuild-library-select::resolve_with_stats` to share scan results
/// across LDF passes -- pass 1 reads every file once, pass 2 re-seeds with
/// library `.cpp` files but reuses the cached scans for everything already
/// reached.
#[derive(Debug, Default)]
pub struct WalkState {
/// Canonical paths the walker has already enqueued/visited.
visited: HashSet<PathBuf>,
/// Canonical path -> parsed include list. Populated lazily on first read.
/// Missing entries mean either "not yet read" or "read failed" -- they are
/// indistinguishable here, matching the existing `let Ok(...) else
/// { continue }` semantics of the original walker.
scan_cache: HashMap<PathBuf, Vec<IncludeRef>>,
/// Number of successful `std::fs::read_to_string` invocations across the
/// lifetime of this state. Each unique file is counted exactly once
/// because subsequent walks hit `scan_cache` instead.
files_read: usize,
}

impl WalkState {
/// Create an empty state. No files have been scanned, nothing is visited.
pub fn new() -> Self {
Self::default()
}

/// Number of files physically read from disk so far. Used by
/// `resolve_with_stats` to assert the no-re-read contract in tests.
pub fn files_read(&self) -> usize {
self.files_read
}
}

/// Walk the include graph starting from `seeds` over `search_paths`.
///
/// `search_paths` is consulted in order for `<...>` includes and as a
/// secondary lookup for `"..."` includes (after the same-directory check).
/// A file is added to `reached` exactly once. Files outside `search_paths`
/// are still reached if they are seeds or `"..."`-resolved relative to a
/// seed/visited file.
///
/// Allocates a fresh [`WalkState`] internally, so `WalkResult::reached`
/// contains every file transitively reached from `seeds`.
pub fn walk(seeds: &[PathBuf], search_paths: &[PathBuf]) -> WalkResult {
let mut state = WalkState::new();
walk_with_state(seeds, search_paths, &mut state)
}

/// Walk the include graph using a caller-owned [`WalkState`] so the scan cache
/// and visited set persist across calls.
///
/// `WalkResult::reached` contains only the *delta* of canonical paths newly
/// reached in this call. Files already in `state.visited` from a previous
/// call are not re-emitted (and not re-read).
///
/// The BFS proceeds in waves: each wave reads all not-yet-cached files in
/// parallel via rayon, then resolves every `#include` in every cached scan
/// result to enqueue the next wave.
#[tracing::instrument(
name = "ldf_walk",
skip_all,
fields(seeds = seeds.len(), search_paths = search_paths.len())
)]
pub fn walk_with_state(
seeds: &[PathBuf],
search_paths: &[PathBuf],
state: &mut WalkState,
) -> WalkResult {
tracing::debug!(
seeds = seeds.len(),
search_paths = search_paths.len(),
"ldf_walk"
);
let mut reached: BTreeSet<PathBuf> = BTreeSet::new();
let mut unresolved: BTreeSet<String> = BTreeSet::new();
let mut visited: HashSet<PathBuf> = HashSet::new();
let mut queue: VecDeque<PathBuf> = VecDeque::new();
let mut frontier: VecDeque<PathBuf> = VecDeque::new();

for seed in seeds {
let canon = canon(seed);
if visited.insert(canon.clone()) {
queue.push_back(canon.clone());
if state.visited.insert(canon.clone()) {
frontier.push_back(canon.clone());
reached.insert(canon);
}
}

while let Some(file) = queue.pop_front() {
let Ok(text) = std::fs::read_to_string(&file) else {
continue;
};
for inc in scan(&text) {
match resolve(&inc, &file, search_paths) {
Some(resolved) => {
let canon = canon(&resolved);
if visited.insert(canon.clone()) {
reached.insert(canon.clone());
queue.push_back(canon);
while !frontier.is_empty() {
// Read all not-yet-cached files in the current wave in parallel.
let to_read: Vec<PathBuf> = frontier
.iter()
.filter(|p| !state.scan_cache.contains_key(*p))
.cloned()
.collect();

if !to_read.is_empty() {
let scanned: Vec<(PathBuf, Vec<IncludeRef>)> = to_read
.par_iter()
.filter_map(|p| {
let text = std::fs::read_to_string(p).ok()?;
Some((p.clone(), scan(&text)))
})
.collect();

for (path, includes) in scanned {
state.scan_cache.insert(path, includes);
state.files_read += 1;
}
}

// Resolve includes for every file in the frontier and build the next
// wave from any newly discovered canonical paths.
let current: Vec<PathBuf> = frontier.drain(..).collect();
for file in &current {
let Some(includes) = state.scan_cache.get(file).cloned() else {
// Read failed (file is a directory, permission denied, etc.).
// Match the existing behavior: silently skip.
continue;
};
for inc in &includes {
match resolve_include(inc, file, search_paths) {
Some(resolved) => {
let canon = canon(&resolved);
if state.visited.insert(canon.clone()) {
reached.insert(canon.clone());
frontier.push_back(canon);
}
}
None => {
unresolved.insert(inc.path.clone());
}
}
None => {
unresolved.insert(inc.path.clone());
}
}
}
Expand All @@ -66,7 +179,7 @@ pub fn walk(seeds: &[PathBuf], search_paths: &[PathBuf]) -> WalkResult {
}
}

fn resolve(inc: &IncludeRef, from: &Path, search_paths: &[PathBuf]) -> Option<PathBuf> {
fn resolve_include(inc: &IncludeRef, from: &Path, search_paths: &[PathBuf]) -> Option<PathBuf> {
if inc.kind == IncludeKind::Quoted {
if let Some(parent) = from.parent() {
let candidate = parent.join(&inc.path);
Expand Down
1 change: 1 addition & 0 deletions crates/fbuild-library-select/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ zccache-artifact = { workspace = true }
tempfile = { workspace = true }
criterion = { workspace = true }
fbuild-test-support = { path = "../fbuild-test-support" }
tracing-test = { workspace = true, features = ["no-env-filter"] }

[[bench]]
name = "resolve_cold"
Expand Down
Loading
Loading