From 227a56d0654adeab18a28f0ea4c1ab6957e5a7c0 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 10 Apr 2026 21:33:02 +0100 Subject: [PATCH 1/4] Changed: Rework path resolvers to use workspace-rooted semantics (#98) The multi-base + external_permission fallback model introduced complexity (canonicalization edge cases, permission bypass surface, cross-platform path comparison issues) without clear benefit over workspace-rooted globs. - Add resolve_workspace_root() (git root > cwd) in new workspace module - Rework AllowedGlobResolver from multi-base-directory to single workspace root - Remove external_permission fallback from AllowedPathResolver and AllowedGlobResolver - Add GlobPolicy::builder_with_base() for workspace-rooted pattern resolution - Update benchmarks and documentation for new workspace-rooted semantics --- src/llm-coding-tools-agents/README.md | 3 + src/llm-coding-tools-core/README.md | 21 +- .../benches/path_resolvers.rs | 134 +-- src/llm-coding-tools-core/src/lib.rs | 2 + src/llm-coding-tools-core/src/path/allowed.rs | 206 +--- .../src/path/allowed_glob/mod.rs | 883 ++++++------------ .../src/path/allowed_glob/policy.rs | 347 +++++-- src/llm-coding-tools-core/src/path/mod.rs | 17 - src/llm-coding-tools-core/src/workspace.rs | 99 ++ 9 files changed, 719 insertions(+), 993 deletions(-) create mode 100644 src/llm-coding-tools-core/src/workspace.rs diff --git a/src/llm-coding-tools-agents/README.md b/src/llm-coding-tools-agents/README.md index 588147f..d3b0da4 100644 --- a/src/llm-coding-tools-agents/README.md +++ b/src/llm-coding-tools-agents/README.md @@ -110,6 +110,9 @@ permission: task: allow # Required to delegate to subagents ``` +**Glob patterns in permissions:** Permission values use globs (`*` = one component, `**` = any depth). +Bare `allow` equals `**`. Patterns are workspace-relative. + **Note:** `task` is special - when omitted, it allows delegation to all callable subagents for OpenCode compatibility. To disable delegation, explicitly set `task: deny`. diff --git a/src/llm-coding-tools-core/README.md b/src/llm-coding-tools-core/README.md index 7d15dab..0c55369 100644 --- a/src/llm-coding-tools-core/README.md +++ b/src/llm-coding-tools-core/README.md @@ -69,13 +69,13 @@ Path-based tools are generic over [`PathResolver`], so wrappers can choose unres - [`AbsolutePathResolver`] enforces absolute-path inputs (unrestricted mode). - [`AllowedPathResolver`] constrains operations to configured directories (sandbox mode). -- [`AllowedGlobResolver`] constrains to directories with glob pattern filtering (fine-grained sandbox mode). +- [`AllowedGlobResolver`] constrains to a workspace root with glob pattern filtering (fine-grained sandbox mode). - Failed resolution rejects traversal and out-of-sandbox paths before tool execution. ```rust,no_run use llm_coding_tools_core::{ path::{AllowedGlobResolver, GlobPolicy, RuleAction}, - AbsolutePathResolver, AllowedPathResolver, PathResolver, ToolResult, + resolve_workspace_root, AbsolutePathResolver, AllowedPathResolver, PathResolver, ToolResult, }; fn demo() -> ToolResult<()> { @@ -83,12 +83,13 @@ fn demo() -> ToolResult<()> { let any_path = AbsolutePathResolver; let _hosts = any_path.resolve("/etc/hosts")?; - // Sandboxed mode: only configured directories are allowed. + // Sandboxed mode: multiple allowed directories. let sandbox = AllowedPathResolver::new(["/workspace/project", "/tmp"])?; let _lib = sandbox.resolve("src/lib.rs")?; - // Fine-grained sandbox (last-match-wins). - let glob = AllowedGlobResolver::new(["/workspace/project"])? + // Fine-grained sandbox with glob policy (workspace-relative patterns). + let root = resolve_workspace_root()?; + let glob = AllowedGlobResolver::new(&root)? .with_policy( GlobPolicy::builder() .add("src/**", RuleAction::Allow)? // Matches src/lib.rs @@ -103,6 +104,16 @@ fn demo() -> ToolResult<()> { } ``` +#### Permission glob semantics + +- `*` matches any characters within a single path component (e.g., `*.rs` matches `lib.rs` but not `src/lib.rs`). +- `**` matches any number of path components (e.g., `src/**/*.rs` matches `src/deep/nested/mod.rs`). +- Bare `allow` maps to `**` (all files under the workspace root). +- Relative patterns are implicitly joined with the workspace root at construction time. +- Absolute patterns (leading `/` or drive-root like `C:/`) are treated as-is. + +Last-match-wins: both deny-then-allow and allow-then-deny orders work depending on whether you want a default-deny or default-allow posture. + #### Linux shell sandboxing Enable the `linux-bubblewrap` feature flag to sandbox [`bash`] ([`execute_command`]) diff --git a/src/llm-coding-tools-core/benches/path_resolvers.rs b/src/llm-coding-tools-core/benches/path_resolvers.rs index 4264121..7ce7220 100644 --- a/src/llm-coding-tools-core/benches/path_resolvers.rs +++ b/src/llm-coding-tools-core/benches/path_resolvers.rs @@ -7,7 +7,6 @@ //! - `resolvers`: Compares [`AllowedPathResolver`] and [`AllowedGlobResolver`] on the same paths //! - `multiple_bases`: Tests [`AllowedPathResolver`] with multiple base directories //! - `canonicalize`: Isolates `canonicalize` vs `soft_canonicalize` performance -//! - `external_directory`: Tests external directory permission fallback for absolute paths //! //! # Test Cases (resolvers) //! @@ -55,18 +54,6 @@ //! canonicalize/existing_file_soft_canonicalize ~5.3 µs (2.7x slower than canonicalize) //! canonicalize/new_file_shallow_soft_canonicalize ~7.2 µs //! canonicalize/new_file_deep_soft_canonicalize ~8.4 µs -//! -//! external_directory/AllowedPathResolver/external_existing_file ~548 ns (canonicalize + permission) -//! external_directory/AllowedPathResolver/external_new_file ~3.3 µs (soft_canonicalize + permission) -//! external_directory/AllowedPathResolver/external_rejected ~2.4 µs (canonicalize + deny) -//! external_directory/AllowedPathResolver/external_no_ruleset ~2.3 µs (canonicalize, no permission) -//! external_directory/AllowedPathResolver/relative_still_fails ~9.8 µs (soft_canonicalize, not external) -//! -//! external_directory/AllowedGlobResolver/external_existing_file ~535 ns (canonicalize + permission) -//! external_directory/AllowedGlobResolver/external_new_file ~3.3 µs (soft_canonicalize + permission) -//! external_directory/AllowedGlobResolver/external_rejected ~2.3 µs (canonicalize + deny) -//! external_directory/AllowedGlobResolver/external_no_ruleset ~2.3 µs (canonicalize, no permission) -//! external_directory/AllowedGlobResolver/relative_still_fails ~9.8 µs (soft_canonicalize, not external) //! ``` //! //! # Platform Differences @@ -93,10 +80,8 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Through use llm_coding_tools_core::path::{ AllowedGlobResolver, AllowedPathResolver, GlobPolicy, GlobPolicyBuilder, PathResolver, }; -use llm_coding_tools_core::permissions::{PermissionAction, Rule, Ruleset}; use soft_canonicalize::soft_canonicalize; use std::fs; -use std::sync::Arc; use tempfile::TempDir; const EXISTING_FILE: &str = "src/lib.rs"; @@ -112,7 +97,8 @@ fn build_policy(f: F) -> llm_coding_tools_core::error::ToolResult where F: FnOnce(GlobPolicyBuilder) -> llm_coding_tools_core::error::ToolResult, { - f(GlobPolicy::builder()).and_then(|b| b.build()) + let base = soft_canonicalize(std::env::current_dir().unwrap()).unwrap(); + f(GlobPolicy::builder_with_base(&base)?).and_then(|b| b.build()) } /// Benchmarks [`AllowedPathResolver`] and [`AllowedGlobResolver`] on the same paths. @@ -149,13 +135,11 @@ fn bench_resolvers_same_paths(c: &mut Criterion) { let allowed = AllowedPathResolver::new(vec![current_dir.clone()]).unwrap(); // Simple policy: single glob pattern (src/**/*.rs) - // This tests minimal glob matching overhead. let simple_policy = build_policy(|b| b.allow("src/**/*.rs")).unwrap(); // Complex policy: 10 rules simulating a realistic project configuration. - // Tests last-match-wins semantics and rule iteration overhead. let complex_policy = build_policy(|b| { - b.allow("src/**/*.rs")? + b.allow("src/**")? .deny("target/**")? .allow("*.toml")? .deny("*.log")? @@ -167,10 +151,10 @@ fn bench_resolvers_same_paths(c: &mut Criterion) { }) .unwrap(); - let glob_simple = AllowedGlobResolver::new(vec![current_dir.clone()]) + let glob_simple = AllowedGlobResolver::new(¤t_dir) .unwrap() .with_policy(simple_policy); - let glob_complex = AllowedGlobResolver::new(vec![current_dir.clone()]) + let glob_complex = AllowedGlobResolver::new(¤t_dir) .unwrap() .with_policy(complex_policy); @@ -306,117 +290,11 @@ fn bench_canonicalize_vs_soft(c: &mut Criterion) { group.finish(); } -/// Benchmarks the external directory permission fallback path. -/// -/// Tests the performance of resolving paths that are outside all base -/// directories but may be allowed via an `"external_directory"` permission rule. -/// -/// # Test Cases -/// -/// ```text -/// | Case | Description | -/// |-------------------------|----------------------------------------------------------| -/// | external_existing_file | Absolute path to file in allowed temp dir (fast path) | -/// | external_new_file | Absolute path to new file in allowed temp dir (soft_can) | -/// | external_rejected | Absolute path denied by permission ruleset (early exit) | -/// | external_no_ruleset | Absolute path with no external_permission configured | -/// | relative_still_fails | Relative path even when external_permission is set | -/// ``` -fn bench_external_directory(c: &mut Criterion) { - let mut group = c.benchmark_group("external_directory"); - - let current_dir = std::env::current_dir().unwrap(); - let external_dir = TempDir::new().unwrap(); - let existing_file = external_dir.path().join("existing.txt"); - fs::write(&existing_file, "content").unwrap(); - let new_file = external_dir.path().join("subdir/new_file.txt"); - - let canon_external = soft_canonicalize(external_dir.path()).unwrap(); - let allow_pattern = canon_external.join("*").to_str().unwrap().to_owned(); - - let mut allow_ruleset = Ruleset::new(); - allow_ruleset.push( - Rule::new( - "external_directory", - allow_pattern.as_str(), - PermissionAction::Allow, - ) - .unwrap(), - ); - - let mut deny_ruleset = Ruleset::new(); - deny_ruleset.push(Rule::new("external_directory", "*", PermissionAction::Deny).unwrap()); - - let existing_file_str = existing_file.to_str().unwrap().to_owned(); - let new_file_str = new_file.to_str().unwrap().to_owned(); - let rejected_path = std::env::temp_dir() - .join("rejected_external") - .join("path.txt") - .to_str() - .unwrap() - .to_owned(); - - let allowed_path_resolver = AllowedPathResolver::new(vec![current_dir.clone()]) - .unwrap() - .with_external_permission(Arc::new(allow_ruleset.clone())); - - let denied_path_resolver = AllowedPathResolver::new(vec![current_dir.clone()]) - .unwrap() - .with_external_permission(Arc::new(deny_ruleset.clone())); - - let no_permission_resolver = AllowedPathResolver::new(vec![current_dir.clone()]).unwrap(); - - let allowed_glob_resolver = AllowedGlobResolver::new(vec![current_dir.clone()]) - .unwrap() - .with_external_permission(Arc::new(allow_ruleset)); - - let denied_glob_resolver = AllowedGlobResolver::new(vec![current_dir.clone()]) - .unwrap() - .with_external_permission(Arc::new(deny_ruleset)); - - let no_permission_glob_resolver = AllowedGlobResolver::new(vec![current_dir.clone()]).unwrap(); - - group.throughput(Throughput::Elements(1)); - - for (case_name, path_input) in [ - ("external_existing_file", existing_file_str.as_str()), - ("external_new_file", new_file_str.as_str()), - ("external_rejected", rejected_path.as_str()), - ("external_no_ruleset", rejected_path.as_str()), - ("relative_still_fails", "relative/path.txt"), - ] { - let (path_resolver, glob_resolver) = match case_name { - "external_existing_file" | "external_new_file" => { - (&allowed_path_resolver, &allowed_glob_resolver) - } - "external_rejected" => (&denied_path_resolver, &denied_glob_resolver), - "external_no_ruleset" => (&no_permission_resolver, &no_permission_glob_resolver), - "relative_still_fails" => (&allowed_path_resolver, &allowed_glob_resolver), - _ => unreachable!(), - }; - - group.bench_with_input( - BenchmarkId::new("AllowedPathResolver", case_name), - path_resolver, - |b, resolver| b.iter(|| resolver.resolve(black_box(path_input))), - ); - - group.bench_with_input( - BenchmarkId::new("AllowedGlobResolver", case_name), - glob_resolver, - |b, resolver| b.iter(|| resolver.resolve(black_box(path_input))), - ); - } - - group.finish(); -} - criterion_group!( benches, bench_resolvers_same_paths, bench_multiple_bases, - bench_canonicalize_vs_soft, - bench_external_directory + bench_canonicalize_vs_soft ); criterion_main!(benches); diff --git a/src/llm-coding-tools-core/src/lib.rs b/src/llm-coding-tools-core/src/lib.rs index 8371d24..1aa1ed0 100644 --- a/src/llm-coding-tools-core/src/lib.rs +++ b/src/llm-coding-tools-core/src/lib.rs @@ -20,6 +20,7 @@ pub mod system_prompt; pub mod tool_metadata; pub mod tools; pub mod util; +pub mod workspace; mod internal; @@ -29,6 +30,7 @@ pub use error::{ToolError, ToolResult}; pub use output::ToolOutput; pub use path::{AbsolutePathResolver, AllowedGlobResolver, AllowedPathResolver, PathResolver}; pub use system_prompt::SystemPromptBuilder; +pub use workspace::resolve_workspace_root; // Re-export tools (always available, sync or async based on runtime feature) pub use tools::{ diff --git a/src/llm-coding-tools-core/src/path/allowed.rs b/src/llm-coding-tools-core/src/path/allowed.rs index cdd707d..af947b5 100644 --- a/src/llm-coding-tools-core/src/path/allowed.rs +++ b/src/llm-coding-tools-core/src/path/allowed.rs @@ -25,13 +25,11 @@ //! absolute path itself, we canonicalize once and check all bases - no //! per-base filesystem calls. //! -//! If no base accepts, fall through to [`try_external`] which checks the -//! optional external-directory permission ruleset as a last resort. +//! If no base accepts, reject with "not within allowed directories". use super::{relative_path_escapes_base, resolve_new_file_fast, PathResolver}; use crate::context::PathMode; use crate::error::{ToolError, ToolResult}; -use crate::permissions::{PermissionAction, Ruleset}; use soft_canonicalize::soft_canonicalize; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -62,11 +60,6 @@ use std::sync::Arc; pub struct AllowedPathResolver { /// Canonicalized allowed base directories. allowed_paths: Arc<[PathBuf]>, - /// Optional permission ruleset for paths outside allowed bases. - /// - /// If a path doesn't resolve within any base, the `"external_directory"` key - /// is checked against the canonicalized path. Only absolute paths are eligible. - external_permission: Option>, } impl AllowedPathResolver { @@ -99,7 +92,6 @@ impl AllowedPathResolver { Ok(Self { allowed_paths: canonicalized?, - external_permission: None, }) } @@ -123,7 +115,6 @@ impl AllowedPathResolver { .into_iter() .map(|p| p.as_ref().to_path_buf()) .collect(), - external_permission: None, } } @@ -131,24 +122,6 @@ impl AllowedPathResolver { pub fn allowed_paths(&self) -> &[PathBuf] { &self.allowed_paths } - - /// Allows access to paths outside allowed directories via a permission ruleset. - /// - /// Paths that don't resolve within any base are checked against the - /// `"external_directory"` permission key. Only absolute paths are eligible; - /// relative paths always fail. - /// - /// # Arguments - /// - `permission` - [`Ruleset`] controlling external directory access. - /// - /// # Returns - /// The modified resolver for chaining. This method always returns `Self` and - /// is infallible. - #[must_use] - pub fn with_external_permission(mut self, permission: Arc) -> Self { - self.external_permission = Some(permission); - self - } } impl PathResolver for AllowedPathResolver { @@ -158,19 +131,11 @@ impl PathResolver for AllowedPathResolver { let input_path = Path::new(path); if relative_path_escapes_base(input_path) { - return Err(ToolError::InvalidPath(format!( - "path '{}' is not within allowed directories", - path - ))); + return not_allowed(path); } if input_path.is_absolute() { - return resolve_absolute( - &self.allowed_paths, - self.external_permission.as_deref(), - path, - input_path, - ); + return resolve_absolute(&self.allowed_paths, path, input_path); } resolve_relative(&self.allowed_paths, path, input_path) @@ -218,10 +183,7 @@ fn resolve_relative( } } - Err(ToolError::InvalidPath(format!( - "path '{}' is not within allowed directories", - path - ))) + not_allowed(path) } /// For absolute paths, `base.join(input) == input` regardless of base. @@ -234,10 +196,9 @@ fn resolve_relative( /// 3. Fall back to `soft_canonicalize()` for paths with missing parent dirs. /// /// If the resolved path lands inside any allowed base, accept it. -/// Otherwise, hand off to [`try_external`] as a last resort. +/// Otherwise, reject with "not within allowed directories". fn resolve_absolute( allowed_paths: &[PathBuf], - external_permission: Option<&Ruleset>, path: &str, input_path: &Path, ) -> ToolResult { @@ -246,7 +207,7 @@ fn resolve_absolute( if allowed_paths.iter().any(|base| canonical.starts_with(base)) { return Ok(canonical); } - return try_external(external_permission, path, canonical); + return not_allowed(path); } // Step 2: fast path for new files in existing directories. @@ -254,7 +215,7 @@ fn resolve_absolute( if allowed_paths.iter().any(|base| resolved.starts_with(base)) { return Ok(resolved); } - return try_external(external_permission, path, resolved); + return not_allowed(path); } // Step 3: fallback for paths with missing parent dirs. @@ -262,52 +223,14 @@ fn resolve_absolute( if allowed_paths.iter().any(|base| resolved.starts_with(base)) { return Ok(resolved); } - return try_external(external_permission, path, resolved); + return not_allowed(path); } - // Nothing resolved the path - still try external in case the ruleset - // permits the raw input via soft_canonicalize inside try_external. - try_external(external_permission, path, None) + not_allowed(path) } -/// Last-resort check for paths that didn't land inside any allowed base. -/// -/// Steps: -/// 1. If no `external_permission` ruleset is configured, reject immediately. -/// 2. Use the caller's cached canonical form if available, otherwise -/// `soft_canonicalize` the raw input. -/// 3. Evaluate the canonicalized path against the `"external_directory"` key -/// in the permission ruleset. Return `Ok` if allowed, otherwise reject. #[inline] -fn try_external( - external_permission: Option<&Ruleset>, - path: &str, - cached_canonical: impl Into>, -) -> ToolResult { - // Step 1: no ruleset or empty ruleset - reject immediately. - let perm = match external_permission { - Some(p) if !p.is_empty() => p, - _ => { - return Err(ToolError::InvalidPath(format!( - "path '{}' is not within allowed directories", - path - ))); - } - }; - - // Step 2: use cached canonical form, or soft_canonicalize now. - let canon = match cached_canonical.into() { - Some(c) => c, - None => soft_canonicalize(Path::new(path)).map_err(|_| { - ToolError::InvalidPath(format!("path '{}' is not within allowed directories", path)) - })?, - }; - - // Step 3: evaluate the canonicalized path against the external_directory ruleset. - if perm.evaluate("external_directory", super::path_as_str(&canon)) == PermissionAction::Allow { - return Ok(canon); - } - +fn not_allowed(path: &str) -> ToolResult { Err(ToolError::InvalidPath(format!( "path '{}' is not within allowed directories", path @@ -317,11 +240,8 @@ fn try_external( #[cfg(test)] mod tests { use super::*; - use crate::permissions::{PermissionAction, Rule, Ruleset}; use rstest::rstest; - use soft_canonicalize::soft_canonicalize; use std::fs; - use std::sync::Arc; use tempfile::TempDir; fn setup_test_dir() -> TempDir { @@ -332,18 +252,6 @@ mod tests { dir } - fn resolver_with_external_rule( - dir: &TempDir, - pattern: &str, - action: PermissionAction, - ) -> AllowedPathResolver { - let mut ruleset = Ruleset::new(); - ruleset.push(Rule::new("external_directory", pattern, action).unwrap()); - AllowedPathResolver::new(vec![dir.path().to_path_buf()]) - .unwrap() - .with_external_permission(Arc::new(ruleset)) - } - /// Verifies that valid paths resolve successfully, including both existing /// files and new files that don't exist yet (important for write operations). #[rstest] @@ -423,43 +331,10 @@ mod tests { ); } - // --- external directory permission --- - #[test] - fn resolves_external_path_when_permission_allows() { + fn rejects_absolute_path_outside_all_bases() { let dir = setup_test_dir(); - let external_dir = TempDir::new().unwrap(); - let external_file = external_dir.path().join("external.txt"); - fs::write(&external_file, "content").unwrap(); - - // Grant access to anything under external_dir. - // Use soft_canonicalize to match the resolver's internal canonicalization, - // ensuring consistent path format across platforms (macOS symlinks, Windows UNC). - let canon_external = soft_canonicalize(external_dir.path()).unwrap(); - let pattern = canon_external.join("*").to_str().unwrap().to_owned(); - let resolver = resolver_with_external_rule(&dir, &pattern, PermissionAction::Allow); - - let result = resolver.resolve(external_file.to_str().unwrap()); - let resolved = result.expect("external path allowed by permission should resolve"); - assert!(resolved.is_absolute(), "resolved path must be absolute"); - assert_eq!( - resolved, - soft_canonicalize(&external_file).unwrap(), - "resolved path must be canonical" - ); - } - - /// External paths are rejected whether explicitly denied or no ruleset is configured. - #[rstest] - #[case::deny_rule(true)] - #[case::no_ruleset(false)] - fn rejects_external_path_without_allow(#[case] with_deny_ruleset: bool) { - let dir = setup_test_dir(); - let resolver = if with_deny_ruleset { - resolver_with_external_rule(&dir, "*", PermissionAction::Deny) - } else { - AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap() - }; + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); let external_path = std::env::temp_dir() .join("some") .join("external") @@ -468,61 +343,4 @@ mod tests { let err = result.expect_err("external path should be rejected"); assert!(err.to_string().contains("not within allowed")); } - - #[test] - fn rejects_relative_path_even_with_external_permission() { - let mut ruleset = Ruleset::new(); - ruleset.push(Rule::new("external_directory", "*", PermissionAction::Allow).unwrap()); - - // No base directories - external permission allows everything, but only - // absolute paths. Relative paths must still be rejected. - let resolver = AllowedPathResolver::from_canonical(Vec::::new()) - .with_external_permission(Arc::new(ruleset)); - - let result = resolver.resolve("relative/path.txt"); - assert!( - result.is_err(), - "relative paths must not be resolved externally" - ); - let err = result.unwrap_err(); - assert!(err.to_string().contains("not within allowed")); - } - - /// Permission checks the canonicalized path, not the raw input. - /// Input like `{tmp}/allowed/../allowed/secret.txt` canonicalizes to - /// `{tmp}/allowed/secret.txt` which matches the exact pattern. - #[test] - fn canonicalizes_path_before_permission_check() { - let dir = setup_test_dir(); - let tmp = TempDir::new().unwrap(); - let subdir = tmp.path().join("allowed"); - fs::create_dir_all(&subdir).unwrap(); - fs::write(subdir.join("secret.txt"), "content").unwrap(); - - let canon_tmp = soft_canonicalize(tmp.path()).unwrap(); - let pattern = canon_tmp - .join("allowed") - .join("secret.txt") - .to_str() - .unwrap() - .to_owned(); - let resolver = resolver_with_external_rule(&dir, &pattern, PermissionAction::Allow); - - let input = canon_tmp - .join("allowed") - .join("..") - .join("allowed") - .join("secret.txt"); - let result = resolver.resolve(&input.to_string_lossy()); - assert!( - result.is_ok(), - "canonicalized path must match exact pattern" - ); - let resolved = result.unwrap(); - assert!( - resolved.ends_with(Path::new("allowed").join("secret.txt")), - "resolved path should be canonical: {:?}", - resolved - ); - } } diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs index 5666fbe..9588dcc 100644 --- a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs +++ b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs @@ -1,41 +1,32 @@ -//! Glob-aware allowed directory path resolver implementation. +//! Glob-aware workspace-rooted path resolver implementation. //! -//! Provides [`AllowedGlobResolver`] which restricts path access to allowed -//! directories with glob pattern filtering. +//! Provides [`AllowedGlobResolver`] which restricts path access to a workspace +//! root directory with glob pattern filtering. //! //! # Resolution algorithm //! //! The entry point (`PathResolver::resolve`) rejects paths that lexically -//! escape the base directory, then dispatches to one of two branches: +//! escape the workspace root, then joins relative paths with the workspace root +//! and dispatches to [`resolve_candidate`]. //! -//! ## Relative paths - `resolve_relative` +//! ## `resolve_candidate` //! -//! For each configured base directory, try in order: +//! Three resolution tiers, cheapest first: //! //! 1. **Canonicalize** - if the file exists, resolve symlinks and normalize. -//! Accept if the result stays inside the base and passes policy. +//! Accept if the result stays inside the workspace root and passes policy. //! 2. **New-file fast path** - if only the file is new but its parent //! directory exists, canonicalize the parent and join the filename. -//! Accept if the result stays inside the base and passes policy. +//! Accept if the result stays inside the workspace root and passes policy. //! 3. **Soft canonicalize** - for paths where even the parent doesn't exist, //! resolve as far as possible. Accept if the result stays inside the -//! base and passes policy. +//! workspace root and passes policy. //! -//! After each resolution tier, re-check glob policy if normalization or -//! symlinks changed the relative path the policy would see. +//! After each resolution tier, check glob policy against the full canonicalized +//! absolute path. If policy denies, reject immediately. //! -//! If no base directory accepts the path, reject. -//! -//! ## Absolute paths - `resolve_absolute` -//! -//! Same three resolution tiers (canonicalize, new-file-fast, soft_canonicalize). -//! Since `base.join(absolute)` equals the absolute path itself, we canonicalize -//! once and check all bases - no per-base filesystem calls. -//! -//! For each candidate we also verify glob policy. -//! -//! If no base accepts, fall through to [`try_external`] which checks the -//! optional external-directory permission ruleset as a last resort. +//! If no tier succeeds or policy denies the path, reject with +//! "not within allowed directories". pub(crate) mod normalize; mod policy; @@ -43,152 +34,113 @@ mod policy; use super::{path_analysis, resolve_new_file_fast, PathResolver}; use crate::context::PathMode; use crate::error::{ToolError, ToolResult}; -use crate::permissions::{PermissionAction, Ruleset}; -use normalize::expand_shell; +use normalize::{expand_shell, normalize_path}; use soft_canonicalize::soft_canonicalize; use std::path::{Path, PathBuf}; use std::sync::Arc; pub use policy::{GlobPolicy, GlobPolicyBuilder, RuleAction}; -/// Path resolver that restricts access to allowed directories with glob pattern filtering. +/// Path resolver that restricts access to a workspace root with glob pattern filtering. /// -/// Resolves paths relative to configured base directories, validating they stay within -/// allowed boundaries. Prevents path traversal attacks and applies glob policy filtering. +/// This is the agent-facing resolver, used when file permissions are configured +/// via frontmatter. For code-level multi-directory containment without glob +/// filtering, use [`super::AllowedPathResolver`]. /// /// # Path Semantics /// -/// - **Slash normalization**: Paths normalized to `/` for consistent cross-platform matching -/// - **Shell expansion (directories only)**: Base directories support `~/`, -/// `$HOME/`, and other shell patterns. Patterns are always relative to base -/// directory without shell expansion. -/// - **Relative matching**: Patterns match relative paths within base -/// directories (e.g., `src/lib.rs`) -/// - **Last-match-wins**: Last matching rule wins, enabling override patterns via reverse -/// iteration for O(k) short-circuit. -/// -/// # Security -/// -/// Path traversal is prevented by resolving symlinks and normalizing the -/// resolved path, verifying it stays within allowed base directories, and -/// applying glob policy. Patterns match normalized relative paths with -/// last-match-wins semantics. Unmatched paths are denied. +/// - **Absolute paths**: Canonicalized and checked against the policy directly. +/// - **Relative paths**: Joined with the workspace root before canonicalization. +/// - **Policy matching**: Last-match-wins against the full canonicalized absolute path. +/// - **Unmatched paths**: Denied. #[derive(Debug, Clone)] pub struct AllowedGlobResolver { - /// Allowed base directories. - base_directories: Arc<[Arc]>, - /// Optional glob policy for file filtering. + /// Canonicalized workspace root directory. + workspace_root: Arc, + /// Optional glob policy matching against absolute paths. policy: Option>, - /// Optional permission ruleset for paths outside allowed bases. - /// - /// If a path doesn't resolve within any base, the `"external_directory"` key - /// is checked against the canonicalized path. Only absolute paths are eligible. - external_permission: Option>, } impl AllowedGlobResolver { - /// Creates a new resolver with the given allowed directories. + /// Creates a new resolver with the given workspace root directory. + /// + /// The directory is canonicalized during construction. Shell patterns (`~/`, + /// `$HOME/`, etc.) are expanded before resolution. + /// + /// # Arguments /// - /// Directories are resolved (symlinks expanded, made absolute, and - /// normalized) during construction. Shell patterns (`~/`, `$HOME/`, `$VAR`, - /// etc.) are expanded before resolution. + /// - `workspace_root`: Path to the workspace root directory. /// - /// Returns `ToolError::InvalidPath` if any directory doesn't exist or cannot - /// be resolved. + /// # Returns /// - /// # Example + /// A new [`AllowedGlobResolver`] with no glob policy attached. /// - /// ``` - /// use llm_coding_tools_core::path::AllowedGlobResolver; - /// use std::path::PathBuf; + /// # Errors /// - /// # fn example() -> Result<(), Box> { - /// let directories = vec![PathBuf::from("/home/user/project")]; - /// let resolver = AllowedGlobResolver::new(directories)?; - /// # Ok(()) - /// # } - /// ``` - pub fn new(directories: impl IntoIterator>) -> ToolResult { - let resolved: Result]>, _> = directories - .into_iter() - .map(|p| { - let path = p.as_ref(); - let expanded = expand_shell(&path.to_string_lossy())?; - if !expanded.is_dir() { - return Err(ToolError::InvalidPath(format!( - "failed to resolve base directory '{}': path is not an existing directory", - path.display() - ))); - } - - soft_canonicalize(&expanded) - .map(|pb| Arc::from(pb.into_boxed_path())) - .map_err(|e| { - ToolError::InvalidPath(format!( - "failed to resolve base directory '{}': {}", - path.display(), - e - )) - }) - }) - .collect(); - + /// Returns [`ToolError::InvalidPath`] if: + /// - The expanded path is not an existing directory. + /// - The path cannot be canonicalized. + pub fn new(workspace_root: impl AsRef) -> ToolResult { + let path = workspace_root.as_ref(); + // Expand shell patterns like ~/, $HOME/, etc. + let expanded = expand_shell(&path.to_string_lossy())?; + // Verify the path points to an existing directory. + if !expanded.is_dir() { + return Err(ToolError::InvalidPath(format!( + "failed to resolve workspace root '{}': path is not an existing directory", + path.display() + ))); + } + // Canonicalize to resolve symlinks and normalize the path. + let canonicalized = soft_canonicalize(&expanded).map_err(|e| { + ToolError::InvalidPath(format!( + "failed to resolve workspace root '{}': {}", + path.display(), + e + )) + })?; Ok(Self { - base_directories: resolved?, + workspace_root: Arc::from(canonicalized.into_boxed_path()), policy: None, - external_permission: None, }) } - /// Creates a resolver from already-resolved paths, skipping filesystem - /// validation. + /// Creates a resolver from an already-canonicalized workspace root. + /// + /// Skips shell expansion and canonicalization — use only when the path + /// is already fully resolved. /// - /// A resolved path is absolute, with all symlinks expanded and all `.` and - /// `..` components normalized. Use [`std::fs::canonicalize`] or - /// [`std::path::Path::canonicalize`] to resolve paths. + /// # Arguments + /// + /// - `workspace_root`: An already-canonicalized absolute path to the workspace root. + /// + /// # Returns /// - /// Caller must ensure paths are resolved. Using non-resolved paths may - /// allow path traversal attacks. - pub fn from_canonical(directories: impl IntoIterator>) -> Self { + /// A new [`AllowedGlobResolver`] with no glob policy attached. + pub fn from_canonical(workspace_root: impl AsRef) -> Self { Self { - base_directories: directories - .into_iter() - .map(|p| Arc::from(p.as_ref())) - .collect(), + workspace_root: Arc::from(workspace_root.as_ref()), policy: None, - external_permission: None, } } /// Sets the glob policy for this resolver. /// - /// Returns self for method chaining. - pub fn with_policy(mut self, policy: GlobPolicy) -> Self { - self.policy = Some(Arc::new(policy)); - self - } - - /// Allows access to paths outside base directories via a permission ruleset. - /// - /// Paths that don't resolve within any base are checked against the - /// `"external_directory"` permission key. Only absolute paths are eligible; - /// relative paths always fail. - /// /// # Arguments - /// - `permission` - [`Ruleset`] controlling external directory access. + /// + /// - `policy`: The [`GlobPolicy`] to apply during path resolution. /// /// # Returns - /// The modified resolver for chaining. This method always returns `Self` and - /// is infallible. - #[must_use] - pub fn with_external_permission(mut self, permission: Arc) -> Self { - self.external_permission = Some(permission); + /// + /// `self` with the policy attached, for method chaining. + pub fn with_policy(mut self, policy: GlobPolicy) -> Self { + self.policy = Some(Arc::new(policy)); self } - /// Returns the allowed base directories. - pub fn base_directories(&self) -> &[Arc] { - &self.base_directories + /// Returns the canonicalized workspace root directory. + pub fn workspace_root(&self) -> &Path { + &self.workspace_root } /// Returns the current glob policy, if any. @@ -200,30 +152,42 @@ impl AllowedGlobResolver { impl PathResolver for AllowedGlobResolver { const PATH_MODE: PathMode = PathMode::Allowed; + /// Resolves a path against the workspace root and checks glob policy. + /// + /// Rejects paths that lexically escape the workspace root, then resolves + /// the candidate through canonicalization tiers and policy checks. + /// + /// # Arguments + /// + /// - `path`: A relative or absolute path string to resolve. + /// + /// # Returns + /// + /// The canonicalized absolute [`PathBuf`] if the path is within the + /// workspace root and allowed by policy. + /// + /// # Errors + /// + /// Returns [`ToolError::InvalidPath`] if: + /// - The path lexically escapes the workspace root. + /// - The resolved path falls outside the workspace root. + /// - The glob policy denies the resolved path. + /// - No resolution tier succeeds. fn resolve(&self, path: &str) -> ToolResult { let input_path = Path::new(path); let policy = self.policy.as_deref(); let analysis = path_analysis(input_path); if analysis.escapes { - return Err(not_allowed_error(path)); + return Err(reject(path)); } - if input_path.is_absolute() { - return resolve_absolute( - &self.base_directories, - self.external_permission.as_deref(), - policy, - path, - input_path, - ); - } - - resolve_relative(&self.base_directories, policy, path, input_path) + let candidate = self.workspace_root.join(input_path); + resolve_candidate(&self.workspace_root, policy, path, &candidate) } } -/// For each configured base directory, try to resolve the relative input. +/// Resolve a candidate path against the workspace root with glob policy filtering. /// /// Three resolution tiers, cheapest first: /// @@ -231,437 +195,250 @@ impl PathResolver for AllowedGlobResolver { /// 2. `resolve_new_file_fast()` for new files in existing dirs. /// 3. `soft_canonicalize()` fallback for missing parent dirs. /// -/// After each tier, re-check glob policy if the resolved relative path -/// differs from the raw input. Accept the first base that passes both -/// the containment check and policy. -fn resolve_relative( - base_directories: &[Arc], +/// After each tier, re-check glob policy against the normalized absolute path. +/// Accept the first tier that passes both the containment check and policy. +fn resolve_candidate( + workspace_root: &Path, policy: Option<&GlobPolicy>, path: &str, - input_path: &Path, + candidate: &Path, ) -> ToolResult { - for base_dir in base_directories.iter() { - let candidate = base_dir.join(input_path); - - // Step 1: canonicalize for existing files - resolves symlinks and normalizes. - if let Ok(resolved) = candidate.canonicalize() { - if !resolved.starts_with(base_dir) { - continue; - } - - // Re-check policy if resolution changed the relative path. - if let Some(policy) = policy { - let relative_path = resolved.strip_prefix(base_dir).unwrap_or(Path::new("")); - let normalized_relative = normalize::normalize_path(relative_path); - if !policy.is_allowed(&normalized_relative) { - continue; - } - } - - return Ok(resolved); - } - - // Step 2: fast path for new files in existing directories. - if let Some(resolved) = resolve_new_file_fast(&candidate) { - if !resolved.starts_with(base_dir) { - continue; - } - - // Re-check policy if resolution changed the relative path. - if let Some(policy) = policy { - let relative_path = resolved.strip_prefix(base_dir).unwrap_or(Path::new("")); - let normalized_relative = normalize::normalize_path(relative_path); - if !policy.is_allowed(&normalized_relative) { - continue; - } - } - - return Ok(resolved); - } - - // Step 3: fallback for paths with missing parent dirs. - if let Ok(target_path) = soft_canonicalize(&candidate) { - if !target_path.starts_with(base_dir) { - continue; - } - - // Re-check policy if resolution changed the relative path. - if let Some(policy) = policy { - let relative_path = target_path.strip_prefix(base_dir).unwrap_or(Path::new("")); - let normalized_relative = normalize::normalize_path(relative_path); - if !policy.is_allowed(&normalized_relative) { - continue; - } - } - - return Ok(target_path); + // Step 1: canonicalize for existing files - resolves symlinks and normalizes. + if let Ok(resolved) = candidate.canonicalize() { + if !resolved.starts_with(workspace_root) { + return Err(reject(path)); } - } - - Err(not_allowed_error(path)) -} - -/// Absolute-path branch - same three resolution tiers as [`resolve_relative`] -/// but canonicalize once and check all bases (no per-base FS calls). -/// -/// For each candidate, verify glob policy. Every tier calls -/// [`GlobPolicy::is_allowed`] with the normalized relative form. -/// -/// If no base accepts, fall through to [`try_external`]. -fn resolve_absolute( - base_directories: &[Arc], - external_permission: Option<&Ruleset>, - policy: Option<&GlobPolicy>, - path: &str, - input_path: &Path, -) -> ToolResult { - // Step 1: canonicalize for existing files. - if let Ok(resolved) = input_path.canonicalize() { - let mut inside_any_base = false; - let accepted = base_directories.iter().any(|base_dir| { - if !resolved.starts_with(base_dir) { - return false; + if let Some(policy) = policy { + let abs_str = normalize_path(&resolved); + if !policy.is_allowed(abs_str.as_ref()) { + return Err(reject(path)); } - inside_any_base = true; - if let Some(policy) = policy { - let relative_path = resolved.strip_prefix(base_dir).unwrap_or(Path::new("")); - let normalized_relative = normalize::normalize_path(relative_path); - if !policy.is_allowed(&normalized_relative) { - return false; - } - } - true - }); - if accepted { - return Ok(resolved); - } - if inside_any_base { - // in base directory but denied by glob policy; must NOT be approved via external_permission - return Err(not_allowed_error(path)); } - return try_external(external_permission, path, resolved); + return Ok(resolved); } // Step 2: fast path for new files in existing directories. - if let Some(resolved) = resolve_new_file_fast(input_path) { - let mut inside_any_base = false; - let accepted = base_directories.iter().any(|base_dir| { - if !resolved.starts_with(base_dir) { - return false; - } - inside_any_base = true; - if let Some(policy) = policy { - let relative_path = resolved.strip_prefix(base_dir).unwrap_or(Path::new("")); - let normalized_relative = normalize::normalize_path(relative_path); - if !policy.is_allowed(&normalized_relative) { - return false; - } - } - true - }); - if accepted { - return Ok(resolved); + if let Some(resolved) = resolve_new_file_fast(candidate) { + if !resolved.starts_with(workspace_root) { + return Err(reject(path)); } - if inside_any_base { - // in base directory but denied by glob policy; must NOT be approved via external_permission - return Err(not_allowed_error(path)); + if let Some(policy) = policy { + let abs_str = normalize_path(&resolved); + if !policy.is_allowed(abs_str.as_ref()) { + return Err(reject(path)); + } } - return try_external(external_permission, path, resolved); + return Ok(resolved); } // Step 3: fallback for paths with missing parent dirs. - if let Ok(resolved) = soft_canonicalize(input_path) { - let mut inside_any_base = false; - let accepted = base_directories.iter().any(|base_dir| { - if !resolved.starts_with(base_dir) { - return false; - } - inside_any_base = true; - if let Some(policy) = policy { - let relative_path = resolved.strip_prefix(base_dir).unwrap_or(Path::new("")); - let normalized_relative = normalize::normalize_path(relative_path); - if !policy.is_allowed(&normalized_relative) { - return false; - } - } - true - }); - if accepted { - return Ok(resolved); - } - if inside_any_base { - // in base directory but denied by glob policy; must NOT be approved via external_permission - return Err(not_allowed_error(path)); + if let Ok(resolved) = soft_canonicalize(candidate) { + if !resolved.starts_with(workspace_root) { + return Err(reject(path)); } - return try_external(external_permission, path, resolved); - } - - try_external(external_permission, path, None) -} - -/// Last-resort check for paths that didn't land inside any allowed base. -/// -/// Steps: -/// 1. If no `external_permission` ruleset is configured, reject immediately. -/// 2. Use the caller's cached canonical form if available, otherwise -/// `soft_canonicalize` the raw input. -/// 3. Evaluate the canonicalized path against the `"external_directory"` key -/// in the permission ruleset. Return `Ok` if allowed, otherwise reject. -#[inline] -fn try_external( - external_permission: Option<&Ruleset>, - path: &str, - cached_canonical: impl Into>, -) -> ToolResult { - // Step 1: no ruleset or empty ruleset - reject immediately. - let perm = match external_permission { - Some(p) if !p.is_empty() => p, - _ => { - return Err(not_allowed_error(path)); + if let Some(policy) = policy { + let abs_str = normalize_path(&resolved); + if !policy.is_allowed(abs_str.as_ref()) { + return Err(reject(path)); + } } - }; - - // Step 2: use cached canonical form, or soft_canonicalize now. - let canon = match cached_canonical.into() { - Some(c) => c, - None => soft_canonicalize(Path::new(path)).map_err(|_| not_allowed_error(path))?, - }; - - // Step 3: evaluate the canonicalized path against the external_directory ruleset. - if perm.evaluate("external_directory", super::path_as_str(&canon)) == PermissionAction::Allow { - return Ok(canon); + return Ok(resolved); } - Err(not_allowed_error(path)) + Err(reject(path)) } -/// Creates a standard "path not within allowed directories" error. #[inline] -fn not_allowed_error(path: &str) -> ToolError { +fn reject(path: &str) -> ToolError { ToolError::InvalidPath(format!("path '{}' is not within allowed directories", path)) } #[cfg(test)] mod tests { use super::*; - use crate::permissions::{PermissionAction, Rule, Ruleset}; use rstest::rstest; use soft_canonicalize::soft_canonicalize; + use std::error::Error; use std::fs; - use std::sync::Arc; use tempfile::TempDir; - fn setup_test_dir() -> TempDir { - let dir = TempDir::new().unwrap(); - fs::create_dir_all(dir.path().join("src")).unwrap(); - fs::create_dir_all(dir.path().join("target/debug")).unwrap(); - fs::write(dir.path().join("src/lib.rs"), "content").unwrap(); - fs::write(dir.path().join("src/main.rs"), "content").unwrap(); - fs::write(dir.path().join("Cargo.toml"), "content").unwrap(); - fs::write(dir.path().join("target/debug/app"), "binary").unwrap(); - dir + fn setup_test_dir() -> Result> { + let dir = TempDir::new()?; + fs::create_dir_all(dir.path().join("src"))?; + fs::create_dir_all(dir.path().join("target/debug"))?; + fs::write(dir.path().join("src/lib.rs"), "content")?; + fs::write(dir.path().join("src/main.rs"), "content")?; + fs::write(dir.path().join("Cargo.toml"), "content")?; + fs::write(dir.path().join("target/debug/app"), "binary")?; + Ok(dir) } - // Keeps policy-focused tests small and readable. - fn resolver_with_policy(dir: &TempDir, pattern: &str) -> AllowedGlobResolver { - let policy = GlobPolicy::builder() - .allow(pattern) - .unwrap() - .build() - .unwrap(); - - AllowedGlobResolver::new(vec![dir.path().to_path_buf()]) - .unwrap() - .with_policy(policy) - } - - fn resolver_with_external_rule( + fn resolver_with_policy( dir: &TempDir, pattern: &str, - action: PermissionAction, - ) -> AllowedGlobResolver { - let mut ruleset = Ruleset::new(); - ruleset.push(Rule::new("external_directory", pattern, action).unwrap()); - AllowedGlobResolver::new(vec![dir.path().to_path_buf()]) - .unwrap() - .with_external_permission(Arc::new(ruleset)) - } - - // Builds a deeper tree for globstar matching cases. - fn setup_src_globstar_dir() -> TempDir { - let dir = TempDir::new().unwrap(); - fs::create_dir_all(dir.path().join("src/deep/nested")).unwrap(); - fs::write(dir.path().join("src/deep/nested/mod.rs"), "content").unwrap(); - fs::write(dir.path().join("src/other.rs"), "content").unwrap(); - fs::write(dir.path().join("src/main.rs"), "content").unwrap(); - fs::write(dir.path().join("src/lib.rs"), "content").unwrap(); - dir + ) -> Result> { + let root = soft_canonicalize(dir.path())?; + let policy = GlobPolicy::builder_with_base(&root)? + .allow(pattern)? + .build()?; + Ok(AllowedGlobResolver::new(dir.path())?.with_policy(policy)) } - #[test] - fn constructs_with_valid_directories() { - let dir = setup_test_dir(); - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]); - assert!(resolver.is_ok()); + fn setup_src_globstar_dir() -> Result> { + let dir = TempDir::new()?; + fs::create_dir_all(dir.path().join("src/deep/nested"))?; + fs::write(dir.path().join("src/deep/nested/mod.rs"), "content")?; + fs::write(dir.path().join("src/other.rs"), "content")?; + fs::write(dir.path().join("src/main.rs"), "content")?; + fs::write(dir.path().join("src/lib.rs"), "content")?; + Ok(dir) } #[test] - fn constructs_with_non_resolved_directory_and_stores_resolved() { - let dir = setup_test_dir(); - let resolved = dir.path().canonicalize().unwrap(); - - fs::create_dir_all(dir.path().join("subdir")).unwrap(); - - // Build a path that resolves back to the temp dir. - let non_resolved = dir.path().join("subdir").join(".."); - - // Construction should canonicalize before storing the base directory. - let resolver = AllowedGlobResolver::new(vec![&non_resolved]).unwrap(); - - let stored = resolver.base_directories()[0].as_ref(); - assert_eq!( - stored, - resolved.as_path(), - "stored directory should be resolved" - ); - - // The stored path should not retain unresolved components. - assert!( - !stored.to_string_lossy().contains("/subdir/../"), - "stored path should not contain unresolved components" - ); + fn constructs_with_valid_directory_and_stores_resolved() -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolved = soft_canonicalize(dir.path())?; + let resolver = AllowedGlobResolver::new(dir.path())?; + assert_eq!(resolver.workspace_root(), resolved); + Ok(()) } #[test] fn fails_to_construct_with_nonexistent_directory() { - let result = AllowedGlobResolver::new(vec![PathBuf::from("/nonexistent/path/xyz")]); + let result = AllowedGlobResolver::new(PathBuf::from("/nonexistent/path/xyz")); assert!(result.is_err()); let err = result.unwrap_err(); - assert!(err.to_string().contains("failed to resolve base directory")); + assert!(err.to_string().contains("failed to resolve workspace root")); } #[test] fn from_canonical_skips_validation() { - let resolver = AllowedGlobResolver::from_canonical(vec![PathBuf::from("/some/path")]); - assert_eq!(resolver.base_directories().len(), 1); + let resolver = AllowedGlobResolver::from_canonical(PathBuf::from("/nonexistent/path")); + assert_eq!(resolver.workspace_root(), Path::new("/nonexistent/path")); } #[test] - fn resolves_existing_file_within_base_directory() { - let dir = setup_test_dir(); - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + fn resolves_existing_file_within_workspace_root() -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = AllowedGlobResolver::new(dir.path())?; // lib.rs exists from setup_test_dir call, should pass. let result = resolver.resolve("src/lib.rs"); assert!(result.is_ok()); - assert!(result.unwrap().ends_with("lib.rs")); + assert!(result?.ends_with("lib.rs")); + Ok(()) } #[test] - fn resolves_new_file_within_base_directory() { - let dir = setup_test_dir(); - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + fn resolves_new_file_within_workspace_root() -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = AllowedGlobResolver::new(dir.path())?; // new_file.rs does not exist initially but is in allowed directory, should pass. let result = resolver.resolve("src/new_file.rs"); assert!(result.is_ok()); - assert!(result.unwrap().ends_with("new_file.rs")); + assert!(result?.ends_with("new_file.rs")); + Ok(()) } #[test] - fn resolves_new_file_when_intermediate_directories_do_not_exist() { - let dir = setup_test_dir(); - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + fn resolves_new_file_when_intermediate_directories_do_not_exist() -> Result<(), Box> + { + let dir = setup_test_dir()?; + let resolver = AllowedGlobResolver::new(dir.path())?; // write targets should still resolve when some parent directories do not exist yet. let result = resolver.resolve("src/new_dir/nested/new_file.rs"); assert!(result.is_ok()); - assert!(result.unwrap().ends_with("src/new_dir/nested/new_file.rs")); + assert!(result?.ends_with("src/new_dir/nested/new_file.rs")); + Ok(()) } #[test] - fn resolves_new_file_with_missing_directories_under_matching_policy() { - let dir = setup_test_dir(); - let resolver = resolver_with_policy(&dir, "src/**/*.rs"); + fn resolves_new_file_with_missing_directories_under_matching_policy( + ) -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = resolver_with_policy(&dir, "src/**/*.rs")?; let result = resolver.resolve("src/new_dir/nested/new_file.rs"); assert!(result.is_ok()); - assert!(result.unwrap().ends_with("src/new_dir/nested/new_file.rs")); + assert!(result?.ends_with("src/new_dir/nested/new_file.rs")); + Ok(()) } #[test] - fn resolves_existing_file_through_missing_directory_parent_traversal() { - let dir = setup_test_dir(); - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + fn resolves_existing_file_through_missing_directory_parent_traversal( + ) -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = AllowedGlobResolver::new(dir.path())?; let result = resolver.resolve("src/new_dir/../../Cargo.toml"); assert!(result.is_ok()); - assert!(result.unwrap().ends_with("Cargo.toml")); + assert!(result?.ends_with("Cargo.toml")); + Ok(()) } #[rstest] #[case::parent_traversal("../../../etc/passwd")] #[case::nested_traversal("src/../../../etc/passwd")] #[case::simple_parent("../Cargo.toml")] - fn rejects_path_traversal_attempts(#[case] input: &str) { - let dir = setup_test_dir(); - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + fn rejects_path_traversal_attempts(#[case] input: &str) -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = AllowedGlobResolver::new(dir.path())?; // Different traversal shapes should all be rejected the same way. let result = resolver.resolve(input); assert!(result.is_err()); let err = result.unwrap_err(); assert!(err.to_string().contains("not within allowed")); + Ok(()) } #[test] #[cfg(unix)] - fn rejects_symlink_escape_attempt() { + fn rejects_symlink_escape_attempt() -> Result<(), Box> { use std::os::unix::fs::symlink; // We don't allow escapes via symlinks. // The resolver should catch/reject the path. - let dir = setup_test_dir(); - let escape_target = TempDir::new().unwrap(); - fs::write(escape_target.path().join("secret.txt"), "secret").unwrap(); + let dir = setup_test_dir()?; + let escape_target = TempDir::new()?; + fs::write(escape_target.path().join("secret.txt"), "secret")?; let symlink_path = dir.path().join("escape_link"); - symlink(escape_target.path(), &symlink_path).unwrap(); + symlink(escape_target.path(), &symlink_path)?; - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + let resolver = AllowedGlobResolver::new(dir.path())?; let result = resolver.resolve("escape_link/secret.txt"); assert!(result.is_err(), "symlink escape should be blocked"); let err = result.unwrap_err(); assert!(err.to_string().contains("not within allowed")); + Ok(()) } /// Regression test: symlink under allowed dir pointing to denied dir /// must be blocked after canonicalization re-checks policy. #[test] #[cfg(unix)] - fn rejects_symlink_policy_bypass_attempt() { + fn rejects_symlink_policy_bypass_attempt() -> Result<(), Box> { use std::os::unix::fs::symlink; - let dir = setup_test_dir(); - fs::create_dir_all(dir.path().join("src")).unwrap(); - fs::create_dir_all(dir.path().join("target")).unwrap(); - fs::write(dir.path().join("target/app"), "binary").unwrap(); + let dir = setup_test_dir()?; + fs::create_dir_all(dir.path().join("src"))?; + fs::create_dir_all(dir.path().join("target"))?; + fs::write(dir.path().join("target/app"), "binary")?; let symlink_path = dir.path().join("src/link"); - symlink(dir.path().join("target"), &symlink_path).unwrap(); + symlink(dir.path().join("target"), &symlink_path)?; - let policy = GlobPolicy::builder() - .allow("src/**") - .unwrap() - .deny("target/**") - .unwrap() - .build() - .unwrap(); + let root = soft_canonicalize(dir.path())?; + let policy = GlobPolicy::builder_with_base(&root)? + .allow("src/**")? + .deny("target/**")? + .build()?; - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]) - .unwrap() - .with_policy(policy); + let resolver = AllowedGlobResolver::new(dir.path())?.with_policy(policy); let result = resolver.resolve("src/link/app"); assert!( @@ -670,22 +447,37 @@ mod tests { ); let err = result.unwrap_err(); assert!(err.to_string().contains("not within allowed")); + Ok(()) } - // When multiple base directories are present; we should test all of them - // in a specified order. #[test] - fn tries_multiple_base_directories() { - let dir1 = setup_test_dir(); - let dir2 = setup_test_dir(); - fs::write(dir2.path().join("only_in_dir2.txt"), "content").unwrap(); - - let resolver = - AllowedGlobResolver::new(vec![dir1.path().to_path_buf(), dir2.path().to_path_buf()]) - .unwrap(); + fn rejects_absolute_path_outside_workspace_root() -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = AllowedGlobResolver::new(dir.path())?; + let external = std::env::temp_dir().join("some-external-path.txt"); + let result = resolver.resolve(external.to_str().ok_or("invalid path")?); + let err = result.expect_err("external path should be rejected"); + assert!(err.to_string().contains("not within allowed")); + Ok(()) + } - let result = resolver.resolve("only_in_dir2.txt"); - assert!(result.is_ok()); + /// In-base paths denied by absolute policy are rejected. + #[rstest] + #[case::cargo_toml("Cargo.toml")] + #[case::target_binary("target/debug/app")] + fn rejects_in_base_path_denied_by_absolute_policy( + #[case] path: &str, + ) -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = resolver_with_policy(&dir, "src/**")?; + let full_path = dir.path().join(path); + let result = resolver.resolve(full_path.to_str().ok_or("invalid path")?); + assert!( + result.is_err(), + "'{}' is inside workspace root but denied by absolute src/** policy", + path + ); + Ok(()) } #[rstest] @@ -695,25 +487,29 @@ mod tests { fn resolver_with_src_policy_should_allow_only_matching_relative_paths( #[case] input: &str, #[case] expected_ok: bool, - ) { - let dir = setup_test_dir(); - let resolver = resolver_with_policy(&dir, "src/**"); + ) -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = resolver_with_policy(&dir, "src/**")?; // One policy, multiple paths: only matching relative paths should resolve. let result = resolver.resolve(input); assert!( result.is_ok() == expected_ok, - "path '{input}' should {}match 'src/**'", + "path '{input}' should {}match absolute src/**", if expected_ok { "" } else { "not " } ); + Ok(()) } #[rstest] #[case::new_rs_file_should_be_allowed("new_file.rs", true)] #[case::new_txt_file_should_be_denied("new_file.txt", false)] - fn resolver_should_check_policy_for_new_paths(#[case] input: &str, #[case] expected_ok: bool) { - let dir = setup_test_dir(); - let resolver = resolver_with_policy(&dir, "*.rs"); + fn resolver_should_check_policy_for_new_paths( + #[case] input: &str, + #[case] expected_ok: bool, + ) -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = resolver_with_policy(&dir, "*.rs")?; // New paths are checked against policy before the file exists. let result = resolver.resolve(input); @@ -722,18 +518,20 @@ mod tests { "path '{input}' should {}match '*.rs'", if expected_ok { "" } else { "not " } ); + Ok(()) } #[test] - fn returns_resolved_path_without_dotdots() { - let dir = setup_test_dir(); - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + fn returns_resolved_path_without_dotdots() -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = AllowedGlobResolver::new(dir.path())?; - let resolved = resolver.resolve("src/../Cargo.toml").unwrap(); + let resolved = resolver.resolve("src/../Cargo.toml")?; assert!( !resolved.to_string_lossy().contains(".."), "resolved path should not contain '..'" ); + Ok(()) } #[rstest] @@ -746,9 +544,9 @@ mod tests { fn resolver_with_src_globstar_policy_should_match_expected_paths( #[case] input: &str, #[case] expected_ok: bool, - ) { - let dir = setup_src_globstar_dir(); - let resolver = resolver_with_policy(&dir, "src/**/*.rs"); + ) -> Result<(), Box> { + let dir = setup_src_globstar_dir()?; + let resolver = resolver_with_policy(&dir, "src/**/*.rs")?; // Verify globstar behavior across shallow, deep, and denied paths. let result = resolver.resolve(input); @@ -757,136 +555,27 @@ mod tests { "path '{input}' should {}match 'src/**/*.rs'", if expected_ok { "" } else { "not " } ); - } - - // --- external directory permission --- - - #[test] - fn resolves_external_path_when_permission_allows() { - let dir = setup_test_dir(); - let external_dir = TempDir::new().unwrap(); - let external_file = external_dir.path().join("external.txt"); - fs::write(&external_file, "content").unwrap(); - - // Grant access to anything under external_dir. - // Use soft_canonicalize to match the resolver's internal canonicalization, - // ensuring consistent path format across platforms (macOS symlinks, Windows UNC). - let canon_external = soft_canonicalize(external_dir.path()).unwrap(); - let pattern = canon_external.join("*").to_str().unwrap().to_owned(); - let resolver = resolver_with_external_rule(&dir, &pattern, PermissionAction::Allow); - - let result = resolver.resolve(external_file.to_str().unwrap()); - let resolved = result.expect("external path allowed by permission should resolve"); - assert!(resolved.is_absolute(), "resolved path must be absolute"); - assert_eq!( - resolved, - soft_canonicalize(&external_file).unwrap(), - "resolved path must be canonical" - ); - } - - /// External paths are rejected whether explicitly denied or no ruleset is configured. - #[rstest] - #[case::deny_rule(true)] - #[case::no_ruleset(false)] - fn rejects_external_path_without_allow(#[case] with_deny_ruleset: bool) { - let dir = setup_test_dir(); - let resolver = if with_deny_ruleset { - resolver_with_external_rule(&dir, "*", PermissionAction::Deny) - } else { - AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap() - }; - let external_path = std::env::temp_dir().join("some-external-path.txt"); - let result = resolver.resolve(external_path.to_str().unwrap()); - let err = result.expect_err("external path should be rejected"); - assert!(err.to_string().contains("not within allowed")); + Ok(()) } #[test] - fn rejects_relative_path_even_with_external_permission() { - let mut ruleset = Ruleset::new(); - ruleset.push(Rule::new("external_directory", "*", PermissionAction::Allow).unwrap()); - - // No base directories - external permission allows everything, but only - // absolute paths. Relative paths must still be rejected. - let resolver = AllowedGlobResolver::from_canonical(Vec::::new()) - .with_external_permission(Arc::new(ruleset)); - - let result = resolver.resolve("relative/path.txt"); - assert!( - result.is_err(), - "relative paths must not be resolved externally" - ); - let err = result.unwrap_err(); - assert!(err.to_string().contains("not within allowed")); - } - - /// A path inside a base directory that is denied by the glob - /// policy must NOT be approved via `external_permission`. - #[test] - fn rejects_in_base_path_denied_by_policy_even_with_external_permission() { - let dir = setup_test_dir(); - - let policy = GlobPolicy::builder() - .allow("src/**") - .unwrap() - .build() - .unwrap(); - - let mut ruleset = Ruleset::new(); - ruleset.push(Rule::new("external_directory", "*", PermissionAction::Allow).unwrap()); - - let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]) - .unwrap() - .with_policy(policy) - .with_external_permission(Arc::new(ruleset)); - - let cargo_path = dir.path().join("Cargo.toml"); - let result = resolver.resolve(cargo_path.to_str().unwrap()); - assert!( - result.is_err(), - "Cargo.toml is inside base but denied by 'src/**' policy; \ - external_permission must not override that" - ); - let err = result.unwrap_err(); - assert!(err.to_string().contains("not within allowed")); - } - - /// Permission checks the canonicalized path, not the raw input. - /// Input like `{tmp}/allowed/../allowed/secret.txt` canonicalizes to - /// `{tmp}/allowed/secret.txt` which matches the exact pattern. - #[test] - fn canonicalizes_path_before_permission_check() { - let dir = setup_test_dir(); - let tmp = TempDir::new().unwrap(); - let subdir = tmp.path().join("allowed"); - fs::create_dir_all(&subdir).unwrap(); - fs::write(subdir.join("secret.txt"), "content").unwrap(); - - let canon_tmp = soft_canonicalize(tmp.path()).unwrap(); - let pattern = canon_tmp - .join("allowed") - .join("secret.txt") - .to_str() - .unwrap() - .to_owned(); - let resolver = resolver_with_external_rule(&dir, &pattern, PermissionAction::Allow); - - let input = canon_tmp - .join("allowed") + fn canonicalizes_path_before_policy_check() -> Result<(), Box> { + let dir = setup_test_dir()?; + let resolver = resolver_with_policy(&dir, "src/**")?; + + let input = dir + .path() + .join("src") + .join("new_dir") .join("..") - .join("allowed") - .join("secret.txt"); + .join("lib.rs"); let result = resolver.resolve(&input.to_string_lossy()); assert!( result.is_ok(), - "canonicalized path must match exact pattern" - ); - let resolved = result.unwrap(); - assert!( - resolved.ends_with(Path::new("allowed").join("secret.txt")), - "resolved path should be canonical: {:?}", - resolved + "canonicalized path should match absolute policy" ); + let resolved = result?; + assert!(!resolved.to_string_lossy().contains("..")); + Ok(()) } } diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs b/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs index c7e33e4..a25e39c 100644 --- a/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs +++ b/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs @@ -4,14 +4,30 @@ //! **last-match-wins** precedence using reverse iteration for efficiency. //! If no patterns match, access is denied. //! -//! Patterns are always relative to the base directory (root-relative) and -//! support: +//! When built via [`GlobPolicy::builder_with_base`], patterns may be relative +//! (joined with the base path), absolute (used as-is), or contain shell +//! variables (`~`, `$HOME`). When built via [`GlobPolicy::builder`], patterns +//! are used verbatim. Pattern syntax supports: //! - `*` to match any number of characters within a path component //! - `?` to match a single character //! - `**` to match any number of path components (including `/`) +//! +//! ```rust +//! use llm_coding_tools_core::path::{GlobPolicy, GlobPolicyBuilder, RuleAction}; +//! +//! # fn example() -> Result<(), Box> { +//! let policy = GlobPolicy::builder_with_base("/workspace")? +//! .add("src/**/*.rs", RuleAction::Allow)? +//! .add("target/**", RuleAction::Deny)? +//! .build()?; +//! # Ok(()) +//! # } +//! ``` +use super::normalize::{expand_shell, normalize_path}; use crate::error::{ToolError, ToolResult}; use globset::{Glob, GlobMatcher, GlobSet, GlobSetBuilder}; +use std::path::{Path, PathBuf}; /// Action to take when a glob pattern matches. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -57,11 +73,42 @@ impl std::fmt::Debug for GlobPolicy { } impl GlobPolicy { - /// Creates a new policy builder. + /// Creates a new policy builder with no base path. + /// + /// Patterns added via [`GlobPolicyBuilder::add()`] are used verbatim: + /// no shell expansion or base-path resolution is performed. For workspace-rooted + /// resolution, use [`GlobPolicy::builder_with_base()`] instead. pub fn builder() -> GlobPolicyBuilder { GlobPolicyBuilder::new() } + /// Creates a builder with a workspace root for resolving relative patterns. + /// + /// The base path is shell-expanded (`~`, `$HOME`, `$VAR`). Relative patterns + /// added via [`GlobPolicyBuilder::add()`] are joined with this base path + /// and normalized to forward slashes before compilation. Absolute patterns + /// are used as-is. + /// + /// # Arguments + /// + /// - `base`: Workspace root path. Shell-expanded before use. + /// + /// # Returns + /// + /// `Ok(GlobPolicyBuilder)` with `base_path` set to the expanded absolute path. + /// + /// # Errors + /// + /// Returns [`ToolError::InvalidPath`] if shell expansion fails (e.g., unset + /// environment variable). + pub fn builder_with_base(base: impl AsRef) -> ToolResult { + let expanded = expand_shell(&base.as_ref().to_string_lossy())?; + Ok(GlobPolicyBuilder { + base_path: Some(expanded), + rules: Vec::new(), + }) + } + /// Checks if a normalized path string is allowed by this policy. /// /// The path must already be normalized to forward slashes. Patterns are @@ -103,23 +150,73 @@ impl GlobPolicy { } /// Builder for constructing [`GlobPolicy`] instances. -#[derive(Debug, Default)] +#[derive(Debug)] pub struct GlobPolicyBuilder { + /// Optional workspace root. When set, relative patterns are joined with + /// this path before compilation. + base_path: Option, rules: Vec<(Glob, RuleAction)>, } +#[allow(clippy::derivable_impls)] // Explicit impl for clarity; base_path=None is required by spec +impl Default for GlobPolicyBuilder { + fn default() -> Self { + Self { + base_path: None, + rules: Vec::new(), + } + } +} + impl GlobPolicyBuilder { /// Creates a new empty policy builder. pub fn new() -> Self { Self::default() } + /// Resolves a glob pattern against the base path. + /// + /// 1. Shell-expand the pattern (`$HOME`, `~/`, `$VAR`, `${VAR:-default}`). + /// 2. If `base_path` is set AND the expanded path is relative, join with base. + /// 3. Normalize to forward slashes via [`normalize_path`]. + /// + /// # Arguments + /// + /// - `pattern`: Glob pattern string, which may contain shell variables (`$HOME`, + /// `~/`, `$VAR`, `${VAR:-default}`) and be relative or absolute. + /// + /// # Returns + /// + /// A normalized absolute or relative path string with forward slashes, ready + /// for [`Glob::new`]. + /// + /// # Errors + /// + /// Returns [`ToolError::InvalidPath`] if shell expansion fails (e.g., unset + /// environment variable in pattern). + fn resolve_pattern(&self, pattern: &str) -> ToolResult { + let expanded = expand_shell(pattern)?; + let path = Path::new(&expanded); + + let resolved = if let Some(ref base) = self.base_path { + if path.is_absolute() { + path.to_path_buf() + } else { + base.join(path) + } + } else { + path.to_path_buf() + }; + + Ok(normalize_path(&resolved).into_owned()) + } + /// Adds a pattern with the specified action. /// /// Patterns are evaluated in the order they are added with last-match-wins - /// semantics. Patterns are always relative to the base directory and do - /// NOT support `~/` or `$HOME/` prefixes (use home expansion in base - /// directory paths instead). + /// semantics. When the builder was created via [`GlobPolicy::builder_with_base()`], + /// the pattern is shell-expanded (`~`, `$HOME`, `$VAR`) and, if relative, joined + /// with the base path before compilation. Absolute patterns are used as-is. /// /// Pattern syntax: /// - `*` matches any number of characters @@ -135,9 +232,9 @@ impl GlobPolicyBuilder { /// /// # Arguments /// - /// * `pattern` - The glob pattern string (relative to base directory, no - /// home expansion) - /// * `action` - The rule action (`Allow` or `Deny`) + /// - `pattern`: The glob pattern string. Shell-expanded and resolved against + /// the base path when [`GlobPolicy::builder_with_base()`] was used. + /// - `action`: The rule action (`Allow` or `Deny`). /// /// # Returns /// @@ -145,9 +242,11 @@ impl GlobPolicyBuilder { /// /// # Errors /// - /// Returns `ToolError::InvalidPattern` if the pattern syntax is invalid. + /// Returns [`ToolError::InvalidPath`] if shell expansion fails. + /// Returns [`ToolError::InvalidPattern`] if the pattern syntax is invalid. pub fn add(mut self, pattern: &str, action: RuleAction) -> ToolResult { - let glob = Glob::new(pattern).map_err(|e| { + let resolved = self.resolve_pattern(pattern)?; + let glob = Glob::new(&resolved).map_err(|e| { ToolError::InvalidPattern(format!("invalid glob pattern '{}': {}", pattern, e)) })?; self.rules.push((glob, action)); @@ -156,31 +255,61 @@ impl GlobPolicyBuilder { /// Adds an allow pattern. /// - /// This is a convenience wrapper around `add()` with `RuleAction::Allow`. - /// See `add()` for pattern syntax and behavior details. + /// Convenience wrapper around [`Self::add()`] with [`RuleAction::Allow`]. + /// The pattern is shell-expanded and resolved against the base path when + /// [`GlobPolicy::builder_with_base()`] was used. See [`Self::add()`] for + /// full pattern-syntax details. /// - /// Patterns are relative to base directory and do NOT support `~/` or - /// `$HOME/` prefixes. + /// # Arguments + /// + /// - `pattern`: The glob pattern string. Shell-expanded and resolved against + /// the base path when [`GlobPolicy::builder_with_base()`] was used. + /// + /// # Returns + /// + /// `Ok(Self)` on success, allowing method chaining. + /// + /// # Errors + /// + /// Returns [`ToolError::InvalidPath`] if shell expansion fails. + /// Returns [`ToolError::InvalidPattern`] if the pattern syntax is invalid. pub fn allow(self, pattern: &str) -> ToolResult { self.add(pattern, RuleAction::Allow) } /// Adds a deny pattern. /// - /// This is a convenience wrapper around `add()` with `RuleAction::Deny`. - /// See `add()` for pattern syntax and behavior details. + /// Convenience wrapper around [`Self::add()`] with [`RuleAction::Deny`]. + /// The pattern is shell-expanded and resolved against the base path when + /// [`GlobPolicy::builder_with_base()`] was used. See [`Self::add()`] for + /// full pattern-syntax details. + /// + /// # Arguments + /// + /// - `pattern`: The glob pattern string. Shell-expanded and resolved against + /// the base path when [`GlobPolicy::builder_with_base()`] was used. /// - /// Patterns are relative to base directory and do NOT support `~/` or - /// `$HOME/` prefixes. + /// # Returns + /// + /// `Ok(Self)` on success, allowing method chaining. + /// + /// # Errors + /// + /// Returns [`ToolError::InvalidPath`] if shell expansion fails. + /// Returns [`ToolError::InvalidPattern`] if the pattern syntax is invalid. pub fn deny(self, pattern: &str) -> ToolResult { self.add(pattern, RuleAction::Deny) } - /// Builds the policy, compiling all patterns. + /// Builds the policy, compiling all patterns into a [`GlobSet`]. + /// + /// # Returns + /// + /// `Ok(GlobPolicy)` with all patterns compiled for matching. /// /// # Errors /// - /// Returns `ToolError::InvalidPattern` if any pattern fails to compile. + /// Returns [`ToolError::InvalidPattern`] if any pattern fails to compile. pub fn build(self) -> ToolResult { let mut builder = GlobSetBuilder::new(); let mut rules = Vec::with_capacity(self.rules.len()); @@ -202,6 +331,10 @@ impl GlobPolicyBuilder { mod tests { use super::*; use rstest::rstest; + use std::error::Error; + use std::fs; + use temp_env; + use tempfile::TempDir; #[rstest] #[case::src_lib_rs_allowed("src/lib.rs", true)] @@ -211,41 +344,34 @@ mod tests { fn builder_should_apply_allow_and_deny_rules( #[case] path: &str, #[case] expected_allowed: bool, - ) { + ) -> Result<(), Box> { let policy = GlobPolicy::builder() - .allow("*.rs") - .unwrap() - .deny("target/**") - .unwrap() - .build() - .unwrap(); + .allow("*.rs")? + .deny("target/**")? + .build()?; assert_eq!(policy.is_allowed(path), expected_allowed); + Ok(()) } #[test] - fn glob_policy_last_match_wins() { + fn glob_policy_last_match_wins() -> Result<(), Box> { let policy = GlobPolicy::builder() - .deny("target/**") - .unwrap() - .allow("target/debug/app") - .unwrap() - .build() - .unwrap(); + .deny("target/**")? + .allow("target/debug/app")? + .build()?; assert!(policy.is_allowed("target/debug/app")); assert!(!policy.is_allowed("target/release/app")); assert!(!policy.is_allowed("target/anything.txt")); let policy2 = GlobPolicy::builder() - .allow("target/debug/app") - .unwrap() - .deny("target/**") - .unwrap() - .build() - .unwrap(); + .allow("target/debug/app")? + .deny("target/**")? + .build()?; assert!(!policy2.is_allowed("target/debug/app")); + Ok(()) } #[test] @@ -257,19 +383,16 @@ mod tests { #[rstest] #[case::anything_txt_denied("anything.txt")] #[case::src_lib_rs_denied("src/lib.rs")] - fn empty_policy_should_deny_any_path(#[case] path: &str) { - let policy = GlobPolicy::builder().build().unwrap(); + fn empty_policy_should_deny_any_path(#[case] path: &str) -> Result<(), Box> { + let policy = GlobPolicy::builder().build()?; assert!(!policy.is_allowed(path)); + Ok(()) } // Keep policy construction out of the case table so each case reads as // just input path + expected decision. - fn src_rs_policy() -> GlobPolicy { - GlobPolicy::builder() - .allow("src/**/*.rs") - .unwrap() - .build() - .unwrap() + fn src_rs_policy() -> Result> { + Ok(GlobPolicy::builder().allow("src/**/*.rs")?.build()?) } #[rstest] @@ -288,8 +411,8 @@ mod tests { fn src_globstar_rs_policy_should_match_only_normalized_src_rs_paths( #[case] path: &str, #[case] expected_allowed: bool, - ) { - let policy = src_rs_policy(); + ) -> Result<(), Box> { + let policy = src_rs_policy()?; let result = policy.is_allowed(path); assert_eq!( @@ -299,5 +422,125 @@ mod tests { path, if expected_allowed { "" } else { "not " } ); + Ok(()) + } + + // --- builder (no base path) --- + + #[test] + fn default_builder_has_no_base_path() -> Result<(), Box> { + // Default builder uses patterns verbatim (no base-path resolution). + let policy = GlobPolicyBuilder::default().allow("src/**/*.rs")?.build()?; + // If base_path were Some(...), this relative path would not match. + assert!(policy.is_allowed("src/lib.rs")); + Ok(()) + } + + // Even without a base path, patterns still undergo shell expansion (~, + // $HOME) so that "~/src/**" resolves to the absolute home directory. + #[test] + fn shell_expansion_in_pattern_without_base() -> Result<(), Box> { + let temp = TempDir::new()?; + let temp_home = temp.path().canonicalize()?; + + temp_env::with_var("HOME", Some(&temp_home), || { + let policy = GlobPolicy::builder().allow("~/src/**")?.build()?; + let abs_path = format!( + "{}/src/lib.rs", + temp_home.to_str().unwrap().trim_end_matches('/') + ); + assert!(policy.is_allowed(&abs_path)); + Ok(()) + }) + } + + // --- builder_with_base --- + + // A relative pattern like "src/**" is joined with the base path, so the + // resulting compiled glob matches the absolute path under that base. + #[test] + fn relative_pattern_with_base_matches_absolute_path() -> Result<(), Box> { + let dir = TempDir::new()?; + let base = dir.path().canonicalize()?; + // Create a real file so the canonicalized base is a valid directory tree. + fs::create_dir_all(dir.path().join("src"))?; + fs::write(dir.path().join("src/lib.rs"), "content")?; + + // "src/**" is relative, so builder_with_base joins it with `base` + // to produce an absolute glob like "/tmp/.tmpXXX/src/**". + let policy = GlobPolicy::builder_with_base(&base)? + .allow("src/**")? + .build()?; + + // Reconstruct the absolute path the policy should match against. + let abs_path = format!( + "{}/src/lib.rs", + base.to_str().unwrap().trim_end_matches('/') + ); + assert!( + policy.is_allowed(&abs_path), + "relative pattern 'src/**' should match absolute path '{}'", + abs_path + ); + Ok(()) + } + + // Absolute patterns bypass base-path joining entirely - the leading `/` + // means `resolve_pattern` uses the pattern verbatim regardless of base. + #[test] + fn absolute_pattern_with_base_used_as_is() -> Result<(), Box> { + let dir = TempDir::new()?; + let base = dir.path().canonicalize()?; + + let abs_pattern = "/some/absolute/path/**/*.rs"; + // base is set but ignored because the pattern starts with `/`. + let policy = GlobPolicy::builder_with_base(&base)? + .allow(abs_pattern)? + .build()?; + + assert!(policy.is_allowed("/some/absolute/path/file.rs")); + assert!(!policy.is_allowed("/other/path/file.rs")); + Ok(()) + } + + // Shell expansion applies to the base path itself, not just patterns. + // Here `$HOME/project` is expanded before becoming the base, then the + // relative "src/**" pattern is joined against that resolved base. + #[test] + fn shell_expansion_in_pattern_with_base() -> Result<(), Box> { + let temp = TempDir::new()?; + let temp_home = temp.path().canonicalize()?; + let project_dir = temp.path().join("project"); + fs::create_dir_all(project_dir.join("src"))?; + + temp_env::with_var("HOME", Some(&temp_home), || { + let policy = GlobPolicy::builder_with_base("$HOME/project")? + .allow("src/**")? + .build()?; + + // The final glob matches $HOME/project/src/**. + let abs_path = format!( + "{}/project/src/lib.rs", + temp_home.to_str().unwrap().trim_end_matches('/') + ); + assert!(policy.is_allowed(&abs_path)); + Ok(()) + }) + } + + #[test] + fn builder_with_base_fails_on_invalid_shell_expansion() { + temp_env::with_var("NONEXISTENT_VAR_99999", None::<&str>, || { + let result = GlobPolicy::builder_with_base("$NONEXISTENT_VAR_99999/path"); + assert!(result.is_err()); + }); + } + + #[test] + fn add_fails_on_invalid_shell_expansion_in_pattern() { + temp_env::with_var("NONEXISTENT_VAR_99999", None::<&str>, || { + let result = GlobPolicy::builder().allow("$NONEXISTENT_VAR_99999/**"); + assert!(result.is_err()); + }); } } diff --git a/src/llm-coding-tools-core/src/path/mod.rs b/src/llm-coding-tools-core/src/path/mod.rs index e76a439..67e8620 100644 --- a/src/llm-coding-tools-core/src/path/mod.rs +++ b/src/llm-coding-tools-core/src/path/mod.rs @@ -127,20 +127,3 @@ pub(crate) fn resolve_new_file_fast(candidate: &Path) -> Option { None } } - -#[cfg(unix)] -#[inline] -pub(crate) fn path_as_str(path: &Path) -> &str { - use std::os::unix::ffi::OsStrExt; - let os_str = path.as_os_str(); - match std::str::from_utf8(os_str.as_bytes()) { - Ok(s) => s, - Err(_) => path.to_str().unwrap_or(""), - } -} - -#[cfg(not(unix))] -#[inline] -pub(crate) fn path_as_str(path: &Path) -> &str { - path.to_str().unwrap_or("") -} diff --git a/src/llm-coding-tools-core/src/workspace.rs b/src/llm-coding-tools-core/src/workspace.rs new file mode 100644 index 0000000..ecb6983 --- /dev/null +++ b/src/llm-coding-tools-core/src/workspace.rs @@ -0,0 +1,99 @@ +//! Workspace root resolution. +//! +//! Provides [`resolve_workspace_root`] for determining the workspace root +//! directory. Re-exported at the crate root for convenience. +//! +//! Call once at construction time and cache the result. + +use std::path::PathBuf; + +/// Resolves the workspace root directory. +/// +/// Prefers the git repository root (closest ancestor containing `.git`). +/// Falls back to the current working directory when not inside a git repo. +/// +/// Call once at construction time and cache the result. Do not call per-request. +/// +/// # Errors +/// +/// Returns [`std::io::Error`] if the current working directory cannot be +/// determined (e.g. it has been deleted or permissions prevent access). +/// +/// # Returns +/// +/// `Ok(absolute_path)` on success - an absolute [`PathBuf`] pointing at the +/// workspace root. +pub fn resolve_workspace_root() -> Result { + let cwd = std::env::current_dir()?; + let mut candidate = cwd.as_path(); + loop { + if candidate.join(".git").exists() { + return Ok(candidate.to_path_buf()); + } + match candidate.parent() { + Some(parent) => candidate = parent, + None => break, + } + } + Ok(cwd) +} + +#[cfg(test)] +mod tests { + use super::*; + use serial_test::serial; + use soft_canonicalize::soft_canonicalize; + use tempfile::TempDir; + + #[test] + fn resolve_workspace_root_should_return_absolute_path() { + let root = resolve_workspace_root().unwrap(); + assert!( + root.is_absolute(), + "workspace root must be absolute: {:?}", + root + ); + } + + #[test] + fn resolve_workspace_root_should_return_existing_directory() { + let root = resolve_workspace_root().unwrap(); + assert!(root.is_dir(), "workspace root must exist: {:?}", root); + } + + #[test] + fn resolve_workspace_root_should_prefer_git_root_when_in_repo() { + let root = resolve_workspace_root().unwrap(); + let in_repo = std::env::current_dir() + .unwrap() + .ancestors() + .any(|p| p.join(".git").exists()); + assert!( + !in_repo || root.join(".git").exists(), + "inside a git repo, workspace root should contain .git: {:?}", + root + ); + } + + #[test] + #[serial] + fn resolve_workspace_root_should_fall_back_to_cwd_when_not_in_repo() { + let temp = TempDir::new().unwrap(); + + if temp.path().ancestors().any(|p| p.join(".git").exists()) { + return; + } + + let original = std::env::current_dir().unwrap(); + std::env::set_current_dir(temp.path()).unwrap(); + + let root = resolve_workspace_root().unwrap(); + assert_eq!( + soft_canonicalize(&root).unwrap(), + soft_canonicalize(temp.path()).unwrap(), + "outside git repo, workspace root should equal cwd" + ); + + std::env::set_current_dir(original).unwrap(); + } +} From a6054fd6146fe53a592da0db3f50c0b6a9fa99c2 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 10 Apr 2026 22:52:56 +0100 Subject: [PATCH 2/4] Changed: Extract validate_resolved helper to eliminate code duplication Consolidate identical containment and policy checks from three resolution branches (canonicalize, resolve_new_file_fast, soft_canonicalize) into a single validate_resolved helper function. --- .../src/path/allowed_glob/mod.rs | 58 +++++++++---------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs index 9588dcc..0719212 100644 --- a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs +++ b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs @@ -205,44 +205,17 @@ fn resolve_candidate( ) -> ToolResult { // Step 1: canonicalize for existing files - resolves symlinks and normalizes. if let Ok(resolved) = candidate.canonicalize() { - if !resolved.starts_with(workspace_root) { - return Err(reject(path)); - } - if let Some(policy) = policy { - let abs_str = normalize_path(&resolved); - if !policy.is_allowed(abs_str.as_ref()) { - return Err(reject(path)); - } - } - return Ok(resolved); + return validate_resolved(resolved, path, workspace_root, policy); } // Step 2: fast path for new files in existing directories. if let Some(resolved) = resolve_new_file_fast(candidate) { - if !resolved.starts_with(workspace_root) { - return Err(reject(path)); - } - if let Some(policy) = policy { - let abs_str = normalize_path(&resolved); - if !policy.is_allowed(abs_str.as_ref()) { - return Err(reject(path)); - } - } - return Ok(resolved); + return validate_resolved(resolved, path, workspace_root, policy); } // Step 3: fallback for paths with missing parent dirs. if let Ok(resolved) = soft_canonicalize(candidate) { - if !resolved.starts_with(workspace_root) { - return Err(reject(path)); - } - if let Some(policy) = policy { - let abs_str = normalize_path(&resolved); - if !policy.is_allowed(abs_str.as_ref()) { - return Err(reject(path)); - } - } - return Ok(resolved); + return validate_resolved(resolved, path, workspace_root, policy); } Err(reject(path)) @@ -253,6 +226,31 @@ fn reject(path: &str) -> ToolError { ToolError::InvalidPath(format!("path '{}' is not within allowed directories", path)) } +/// Validates a resolved path against workspace containment and policy. +/// +/// Checks that the resolved path: +/// 1. Is within the workspace root directory +/// 2. Passes the glob policy (if one is configured) +/// +/// Returns the resolved path if valid, or a rejection error otherwise. +fn validate_resolved( + resolved: PathBuf, + path: &str, + workspace_root: &Path, + policy: Option<&GlobPolicy>, +) -> ToolResult { + if !resolved.starts_with(workspace_root) { + return Err(reject(path)); + } + if let Some(policy) = policy { + let abs_str = normalize_path(&resolved); + if !policy.is_allowed(abs_str.as_ref()) { + return Err(reject(path)); + } + } + Ok(resolved) +} + #[cfg(test)] mod tests { use super::*; From 4a5915c2e0d813c4d0e9233acbffbdc152f02a4a Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 10 Apr 2026 22:54:02 +0100 Subject: [PATCH 3/4] Added: Debug assertion in AllowedGlob::from_canonical --- src/llm-coding-tools-core/src/path/allowed_glob/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs index 0719212..0a2e42e 100644 --- a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs +++ b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs @@ -118,6 +118,10 @@ impl AllowedGlobResolver { /// /// A new [`AllowedGlobResolver`] with no glob policy attached. pub fn from_canonical(workspace_root: impl AsRef) -> Self { + debug_assert!( + workspace_root.as_ref().is_absolute(), + "from_canonical expects an absolute workspace_root" + ); Self { workspace_root: Arc::from(workspace_root.as_ref()), policy: None, From 1e9e0c6442f83b7439966fbda614e41c1fd7851c Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Fri, 10 Apr 2026 23:06:16 +0100 Subject: [PATCH 4/4] Fixed: Use platform-aware paths in glob policy tests on Windows Unix-style /some/path is not absolute on Windows (no drive letter), causing three tests to fail. Use cfg!(windows) to pick appropriate paths and normalize test paths through the same pipeline as production code. --- .../src/path/allowed_glob/mod.rs | 9 +++++-- .../src/path/allowed_glob/policy.rs | 27 ++++++++++++------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs index 0a2e42e..578e513 100644 --- a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs +++ b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs @@ -315,8 +315,13 @@ mod tests { #[test] fn from_canonical_skips_validation() { - let resolver = AllowedGlobResolver::from_canonical(PathBuf::from("/nonexistent/path")); - assert_eq!(resolver.workspace_root(), Path::new("/nonexistent/path")); + let fake_abs = if cfg!(windows) { + PathBuf::from("C:\\nonexistent\\path") + } else { + PathBuf::from("/nonexistent/path") + }; + let resolver = AllowedGlobResolver::from_canonical(&fake_abs); + assert_eq!(resolver.workspace_root(), fake_abs); } #[test] diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs b/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs index a25e39c..2aa2a98 100644 --- a/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs +++ b/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs @@ -444,11 +444,9 @@ mod tests { let temp_home = temp.path().canonicalize()?; temp_env::with_var("HOME", Some(&temp_home), || { - let policy = GlobPolicy::builder().allow("~/src/**")?.build()?; - let abs_path = format!( - "{}/src/lib.rs", - temp_home.to_str().unwrap().trim_end_matches('/') - ); + let policy = GlobPolicy::builder().allow("$HOME/src/**")?.build()?; + let abs_path = temp_home.join("src").join("lib.rs"); + let abs_path = normalize_path(&abs_path); assert!(policy.is_allowed(&abs_path)); Ok(()) }) @@ -492,14 +490,25 @@ mod tests { let dir = TempDir::new()?; let base = dir.path().canonicalize()?; - let abs_pattern = "/some/absolute/path/**/*.rs"; - // base is set but ignored because the pattern starts with `/`. + let (abs_pattern, file_path, other_path) = if cfg!(windows) { + ( + "C:/some/absolute/path/**/*.rs", + "C:/some/absolute/path/file.rs", + "D:/other/path/file.rs", + ) + } else { + ( + "/some/absolute/path/**/*.rs", + "/some/absolute/path/file.rs", + "/other/path/file.rs", + ) + }; let policy = GlobPolicy::builder_with_base(&base)? .allow(abs_pattern)? .build()?; - assert!(policy.is_allowed("/some/absolute/path/file.rs")); - assert!(!policy.is_allowed("/other/path/file.rs")); + assert!(policy.is_allowed(file_path)); + assert!(!policy.is_allowed(other_path)); Ok(()) }