From 96a1caa4a140ae8744b117f786b7e811cd9bee18 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 22 May 2026 10:37:09 +0200 Subject: [PATCH] fix(sandbox): restore GPU proc baseline Signed-off-by: Evan Lezar --- architecture/security-policy.md | 9 + crates/openshell-sandbox/src/lib.rs | 446 +++++++++++++++++++++------- docs/sandboxes/manage-sandboxes.mdx | 5 + docs/sandboxes/policies.mdx | 2 + 4 files changed, 357 insertions(+), 105 deletions(-) diff --git a/architecture/security-policy.md b/architecture/security-policy.md index b0d56e3a2..ad9de7afa 100644 --- a/architecture/security-policy.md +++ b/architecture/security-policy.md @@ -21,6 +21,15 @@ For the field-by-field YAML reference, use Filesystem and process policy are startup-time controls. Network policy is dynamic and can be hot-reloaded when the new policy validates successfully. +The sandbox supervisor also injects runtime baseline filesystem paths before +the child process starts. Proxy mode adds the standard read-only system paths +and writable work paths needed by the proxy and shell environment. GPU runtimes +add the NVIDIA or WSL2 device nodes exposed inside the sandbox and promote +`/proc` to read-write for default-like policies because CUDA initialization +writes `/proc//task//comm`. Custom policies that explicitly keep a +GPU-required path read-only fail at startup with an actionable diagnostic +instead of being silently widened. + ## Network Decisions Ordinary network traffic follows this order: diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 576919444..d49d4903e 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -1450,6 +1450,29 @@ fn enumerate_gpu_device_nodes() -> Vec { paths } +#[derive(Debug, Clone)] +struct BaselineEnrichmentPaths { + read_only: Vec, + read_write: Vec, + gpu_read_write: Vec, +} + +impl BaselineEnrichmentPaths { + fn is_empty(&self) -> bool { + self.read_only.is_empty() && self.read_write.is_empty() + } + + fn is_gpu_read_write(&self, path: &str) -> bool { + self.gpu_read_write.iter().any(|p| p == path) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BaselinePolicySource { + DefaultLike, + Custom, +} + fn push_unique(paths: &mut Vec, path: String) { if !paths.iter().any(|p| p == &path) { paths.push(path); @@ -1460,40 +1483,47 @@ fn collect_baseline_enrichment_paths( include_proxy: bool, include_gpu: bool, gpu_device_nodes: Vec, -) -> (Vec, Vec) { - let mut ro = Vec::new(); - let mut rw = Vec::new(); +) -> BaselineEnrichmentPaths { + let mut read_only = Vec::new(); + let mut read_write = Vec::new(); + let mut gpu_read_write = Vec::new(); if include_proxy { for &path in PROXY_BASELINE_READ_ONLY { - push_unique(&mut ro, path.to_string()); + push_unique(&mut read_only, path.to_string()); } for &path in PROXY_BASELINE_READ_WRITE { - push_unique(&mut rw, path.to_string()); + push_unique(&mut read_write, path.to_string()); } } if include_gpu { for &path in GPU_BASELINE_READ_ONLY { - push_unique(&mut ro, path.to_string()); + push_unique(&mut read_only, path.to_string()); } for &path in GPU_BASELINE_READ_WRITE { - push_unique(&mut rw, path.to_string()); + push_unique(&mut read_write, path.to_string()); + push_unique(&mut gpu_read_write, path.to_string()); } for path in gpu_device_nodes { - push_unique(&mut rw, path); + push_unique(&mut read_write, path.clone()); + push_unique(&mut gpu_read_write, path); } } // A path promoted to read_write (e.g. /proc for GPU) should not also - // appear in read_only — Landlock handles the overlap correctly but the - // duplicate is confusing when inspecting the effective policy. - ro.retain(|p| !rw.contains(p)); + // appear in read_only. Landlock handles the overlap correctly, but the + // duplicate obscures the effective policy when operators inspect it. + read_only.retain(|p| !read_write.contains(p)); - (ro, rw) + BaselineEnrichmentPaths { + read_only, + read_write, + gpu_read_write, + } } -fn active_baseline_enrichment_paths(include_proxy: bool) -> (Vec, Vec) { +fn active_baseline_enrichment_paths(include_proxy: bool) -> BaselineEnrichmentPaths { let include_gpu = has_gpu_devices(); let gpu_device_nodes = if include_gpu { enumerate_gpu_device_nodes() @@ -1507,20 +1537,49 @@ fn active_baseline_enrichment_paths(include_proxy: bool) -> (Vec, Vec (Vec, Vec) { - active_baseline_enrichment_paths(true) + let paths = active_baseline_enrichment_paths(true); + (paths.read_only, paths.read_write) +} + +fn proto_policy_source(proto: &openshell_core::proto::SandboxPolicy) -> BaselinePolicySource { + if proto == &openshell_policy::restrictive_default_policy() { + BaselinePolicySource::DefaultLike + } else { + BaselinePolicySource::Custom + } +} + +fn gpu_read_write_conflict(path: &str) -> miette::Report { + miette::miette!( + "GPU sandbox policy keeps required path '{path}' read-only. \ + CUDA workloads require this path to be read-write under the current Landlock runtime. \ + Move '{path}' from filesystem_policy.read_only to filesystem_policy.read_write, \ + or omit the custom filesystem policy to use OpenShell's GPU default." + ) +} + +fn emit_baseline_enriched_event() { + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "enriched") + .message("Enriched policy with baseline filesystem paths") + .build() + ); } fn enrich_proto_baseline_paths_with( proto: &mut openshell_core::proto::SandboxPolicy, - ro: &[String], - rw: &[String], + paths: &BaselineEnrichmentPaths, + source: BaselinePolicySource, path_exists: F, -) -> bool +) -> Result where F: Fn(&str) -> bool, { - if ro.is_empty() && rw.is_empty() { - return false; + if paths.is_empty() { + return Ok(false); } let fs = proto @@ -1530,24 +1589,26 @@ where ..Default::default() }); + // Baseline paths are system-injected. Skip paths that do not exist in + // this container image to avoid noisy warnings from Landlock and, more + // critically, to prevent a single missing baseline path from abandoning + // the entire Landlock ruleset under best-effort mode (see issue #664). let mut modified = false; - for path in ro { - if !fs.read_only.iter().any(|p| p == path) && !fs.read_write.iter().any(|p| p == path) { - if !path_exists(path) { - debug!( - path, - "Baseline read-only path does not exist, skipping enrichment" - ); - continue; - } - fs.read_only.push(path.clone()); - modified = true; - } - } - for path in rw { + for path in &paths.read_only { if fs.read_only.iter().any(|p| p == path) || fs.read_write.iter().any(|p| p == path) { continue; } + if !path_exists(path) { + debug!( + path, + "Baseline read-only path does not exist, skipping enrichment" + ); + continue; + } + fs.read_only.push(path.clone()); + modified = true; + } + for path in &paths.read_write { if !path_exists(path) { debug!( path, @@ -1555,95 +1616,143 @@ where ); continue; } + + if fs.read_write.iter().any(|p| p == path) { + if source == BaselinePolicySource::DefaultLike && paths.is_gpu_read_write(path) { + let before = fs.read_only.len(); + fs.read_only.retain(|p| p != path); + modified |= fs.read_only.len() != before; + } + continue; + } + + if fs.read_only.iter().any(|p| p == path) { + if paths.is_gpu_read_write(path) { + if source == BaselinePolicySource::Custom { + return Err(gpu_read_write_conflict(path)); + } + fs.read_only.retain(|p| p != path); + fs.read_write.push(path.clone()); + modified = true; + } + continue; + } + fs.read_write.push(path.clone()); modified = true; } - modified + if modified { + emit_baseline_enriched_event(); + } + + Ok(modified) } -/// Ensure a proto `SandboxPolicy` includes the baseline filesystem paths -/// required by proxy-mode sandboxes and GPU runtimes. Paths are only added if -/// missing; user-specified paths are never removed. +/// Ensure a proto `SandboxPolicy` includes the active baseline filesystem +/// paths required by proxy mode and GPU runtimes. /// /// Returns `true` if the policy was modified (caller may want to sync back). -fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) -> bool { - let (ro, rw) = active_baseline_enrichment_paths(!proto.network_policies.is_empty()); - - // Baseline paths are system-injected, not user-specified. Skip paths - // that do not exist in this container image to avoid noisy warnings from - // Landlock and, more critically, to prevent a single missing baseline - // path from abandoning the entire Landlock ruleset under best-effort - // mode (see issue #664). - let modified = enrich_proto_baseline_paths_with(proto, &ro, &rw, |path| { +fn enrich_proto_baseline_paths_from_source( + proto: &mut openshell_core::proto::SandboxPolicy, + source: BaselinePolicySource, +) -> Result { + let include_proxy = !proto.network_policies.is_empty(); + let paths = active_baseline_enrichment_paths(include_proxy); + enrich_proto_baseline_paths_with(proto, &paths, source, |path| { std::path::Path::new(path).exists() - }); - - if modified { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Informational) - .status(StatusId::Success) - .state(StateId::Enabled, "enriched") - .message("Enriched policy with baseline filesystem paths for proxy mode") - .build() - ); - } + }) +} - modified +fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) -> Result { + enrich_proto_baseline_paths_from_source(proto, proto_policy_source(proto)) } -/// Ensure a `SandboxPolicy` (Rust type) includes the baseline filesystem -/// paths required by proxy-mode sandboxes and GPU runtimes. Used for the -/// local-file code path where no proto is available. -fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) { - let (ro, rw) = - active_baseline_enrichment_paths(matches!(policy.network.mode, NetworkMode::Proxy)); - if ro.is_empty() && rw.is_empty() { - return; +fn enrich_sandbox_baseline_paths_with( + policy: &mut SandboxPolicy, + paths: &BaselineEnrichmentPaths, + source: BaselinePolicySource, + path_exists: F, +) -> Result +where + F: Fn(&str) -> bool, +{ + if paths.is_empty() { + return Ok(false); } let mut modified = false; - for path in &ro { - let p = std::path::PathBuf::from(path); - if !policy.filesystem.read_only.contains(&p) && !policy.filesystem.read_write.contains(&p) { - if !p.exists() { - debug!( - path, - "Baseline read-only path does not exist, skipping enrichment" - ); - continue; - } - policy.filesystem.read_only.push(p); - modified = true; - } - } - for path in &rw { + for path in &paths.read_only { let p = std::path::PathBuf::from(path); if policy.filesystem.read_only.contains(&p) || policy.filesystem.read_write.contains(&p) { continue; } - if !p.exists() { + if !path_exists(path) { + debug!( + path, + "Baseline read-only path does not exist, skipping enrichment" + ); + continue; + } + policy.filesystem.read_only.push(p); + modified = true; + } + for path in &paths.read_write { + let p = std::path::PathBuf::from(path); + if !path_exists(path) { debug!( path, "Baseline read-write path does not exist, skipping enrichment" ); continue; } + + if policy.filesystem.read_write.contains(&p) { + if source == BaselinePolicySource::DefaultLike && paths.is_gpu_read_write(path) { + let before = policy.filesystem.read_only.len(); + policy + .filesystem + .read_only + .retain(|existing| existing != &p); + modified |= policy.filesystem.read_only.len() != before; + } + continue; + } + + if policy.filesystem.read_only.contains(&p) { + if paths.is_gpu_read_write(path) { + if source == BaselinePolicySource::Custom { + return Err(gpu_read_write_conflict(path)); + } + policy + .filesystem + .read_only + .retain(|existing| existing != &p); + policy.filesystem.read_write.push(p); + modified = true; + } + continue; + } + policy.filesystem.read_write.push(p); modified = true; } if modified { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Informational) - .status(StatusId::Success) - .state(StateId::Enabled, "enriched") - .message("Enriched policy with baseline filesystem paths for proxy mode") - .build() - ); + emit_baseline_enriched_event(); } + + Ok(modified) +} + +/// Ensure a `SandboxPolicy` (Rust type) includes active baseline filesystem +/// paths. Used for the local-file code path where no proto is available. +fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) -> Result { + let include_proxy = matches!(policy.network.mode, NetworkMode::Proxy); + let paths = active_baseline_enrichment_paths(include_proxy); + enrich_sandbox_baseline_paths_with(policy, &paths, BaselinePolicySource::Custom, |path| { + std::path::Path::new(path).exists() + }) } #[cfg(test)] @@ -1744,7 +1853,7 @@ mod baseline_tests { }, ); - enrich_proto_baseline_paths(&mut policy); + enrich_proto_baseline_paths(&mut policy).expect("baseline enrichment should succeed"); let filesystem = policy.filesystem.expect("filesystem policy"); assert!( @@ -1758,30 +1867,120 @@ mod baseline_tests { } #[test] - fn proto_gpu_enrichment_adds_devices_without_network_policy() { + fn proto_gpu_enrichment_promotes_default_proc_without_network_policy() { let mut policy = openshell_policy::restrictive_default_policy(); assert!( policy.network_policies.is_empty(), "regression setup must exercise the no-network default path" ); - let (ro, rw) = - collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); - let enriched = enrich_proto_baseline_paths_with(&mut policy, &ro, &rw, |path| { - matches!(path, "/proc" | "/dev/nvidia0") - }); + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + let enriched = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::DefaultLike, + |_| true, + ) + .expect("default GPU enrichment should succeed"); let filesystem = policy.filesystem.expect("filesystem policy"); + assert!(enriched, "default GPU enrichment should modify policy"); assert!( - enriched, - "GPU enrichment should not require network policies" + !filesystem.read_only.contains(&"/proc".to_string()), + "default GPU enrichment should remove /proc from read_only" + ); + assert!( + filesystem.read_write.contains(&"/proc".to_string()), + "default GPU enrichment should promote /proc to read_write" ); assert!( filesystem.read_write.contains(&"/dev/nvidia0".to_string()), - "GPU enrichment should add enumerated device nodes without network policies" + "default GPU enrichment should add enumerated GPU devices" ); } + #[test] + fn proto_gpu_enrichment_promotes_discovered_image_policy_proc() { + let mut policy = openshell_policy::restrictive_default_policy(); + policy.network_policies.insert( + "image-policy".into(), + openshell_core::proto::NetworkPolicyRule { + name: "image-policy".into(), + endpoints: vec![openshell_core::proto::NetworkEndpoint { + host: "example.com".into(), + port: 443, + ..Default::default() + }], + ..Default::default() + }, + ); + + let paths = collect_baseline_enrichment_paths(true, true, vec!["/dev/nvidia0".to_string()]); + let enriched = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::DefaultLike, + |_| true, + ) + .expect("discovered image policy GPU enrichment should succeed"); + + let filesystem = policy.filesystem.expect("filesystem policy"); + assert!(enriched, "discovered image policy should be enriched"); + assert!( + !filesystem.read_only.contains(&"/proc".to_string()), + "image-discovered GPU policy should remove /proc from read_only" + ); + assert!( + filesystem.read_write.contains(&"/proc".to_string()), + "image-discovered GPU policy should promote /proc to read_write" + ); + } + + #[test] + fn proto_gpu_enrichment_adds_missing_custom_gpu_paths() { + let mut policy = openshell_policy::restrictive_default_policy(); + policy.filesystem = Some(openshell_core::proto::FilesystemPolicy { + read_only: vec![], + read_write: vec![], + include_workdir: true, + }); + + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + let enriched = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::Custom, + |path| matches!(path, "/proc" | "/dev/nvidia0"), + ) + .expect("custom GPU enrichment should add omitted baseline paths"); + + let filesystem = policy.filesystem.expect("filesystem policy"); + assert!(enriched, "custom GPU enrichment should modify policy"); + assert!(filesystem.read_write.contains(&"/proc".to_string())); + assert!(filesystem.read_write.contains(&"/dev/nvidia0".to_string())); + } + + #[test] + fn proto_gpu_enrichment_rejects_custom_read_only_proc() { + let mut policy = openshell_policy::restrictive_default_policy(); + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + + let error = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::Custom, + |path| path == "/proc", + ) + .expect_err("custom read-only /proc should conflict with GPU baseline"); + + let message = error.to_string(); + assert!(message.contains("GPU sandbox policy keeps required path '/proc' read-only")); + assert!(message.contains("filesystem_policy.read_write")); + } + #[test] fn gpu_baseline_read_write_contains_dxg() { // /dev/dxg must be present so WSL2 sandboxes get the Landlock @@ -1810,7 +2009,7 @@ mod baseline_tests { process: ProcessPolicy::default(), }; - enrich_sandbox_baseline_paths(&mut policy); + enrich_sandbox_baseline_paths(&mut policy).expect("baseline enrichment should succeed"); assert!( policy @@ -1828,6 +2027,40 @@ mod baseline_tests { ); } + #[test] + fn local_gpu_enrichment_rejects_custom_read_only_proc() { + let mut policy = SandboxPolicy { + version: 1, + filesystem: FilesystemPolicy { + read_only: vec![std::path::PathBuf::from("/proc")], + read_write: vec![], + include_workdir: false, + }, + network: NetworkPolicy { + mode: NetworkMode::Block, + proxy: None, + }, + landlock: LandlockPolicy::default(), + process: ProcessPolicy::default(), + }; + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + + let error = enrich_sandbox_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::Custom, + |path| path == "/proc", + ) + .expect_err("custom read-only /proc should conflict with GPU baseline"); + + assert!( + error + .to_string() + .contains("GPU sandbox policy keeps required path '/proc' read-only") + ); + } + #[test] fn gpu_baseline_read_only_contains_usr_lib_wsl() { // /usr/lib/wsl must be present so CDI-injected WSL2 GPU library @@ -1963,7 +2196,7 @@ async fn load_policy( landlock: config.landlock, process: config.process, }; - enrich_sandbox_baseline_paths(&mut policy); + enrich_sandbox_baseline_paths(&mut policy)?; return Ok((policy, Some(Arc::new(engine)), None)); } @@ -1994,7 +2227,10 @@ async fn load_policy( let mut discovered = discover_policy_from_disk_or_default(); // Enrich before syncing so the gateway baseline includes // baseline paths from the start. - enrich_proto_baseline_paths(&mut discovered); + enrich_proto_baseline_paths_from_source( + &mut discovered, + BaselinePolicySource::DefaultLike, + )?; let sandbox = sandbox.as_deref().ok_or_else(|| { miette::miette!( "Cannot sync discovered policy: sandbox not available.\n\ @@ -2013,7 +2249,7 @@ async fn load_policy( // Ensure baseline filesystem paths are present for proxy-mode // sandboxes. If the policy was enriched, sync the updated version // back to the gateway so users can see the effective policy. - let enriched = enrich_proto_baseline_paths(&mut proto_policy); + let enriched = enrich_proto_baseline_paths(&mut proto_policy)?; if enriched && let Some(sandbox_name) = sandbox.as_deref() && let Err(e) = grpc_client::sync_policy(endpoint, sandbox_name, &proto_policy).await diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index a62f57bf5..083e620c5 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -55,6 +55,11 @@ For Docker-backed sandboxes, GPU injection uses Docker CDI. If you enable Docker CDI after the gateway starts, restart the gateway so OpenShell can detect the updated Docker daemon capability. +The built-in default policy includes the filesystem access CUDA needs after GPU +devices are injected. If you use a custom policy with `--gpu`, keep the injected +GPU device nodes and `/proc` in `filesystem_policy.read_write`; CUDA +initialization may fail when `/proc` remains read-only. + ### Custom Containers Use `--from` to create a sandbox from the base image, another pre-built sandbox name, a local directory, or a container image: diff --git a/docs/sandboxes/policies.mdx b/docs/sandboxes/policies.mdx index 4ead66a5e..b3c64ccfc 100644 --- a/docs/sandboxes/policies.mdx +++ b/docs/sandboxes/policies.mdx @@ -62,6 +62,8 @@ Raw streams are connection-scoped and outside L7 live-reload guarantees. This in When a sandbox runs in proxy mode (the default), OpenShell automatically adds baseline filesystem paths required for the sandbox child process to function: `/usr`, `/lib`, `/etc`, `/var/log` (read-only) and `/sandbox`, `/tmp` (read-write). Paths like `/app` are included in the baseline set but are only added if they exist in the container image. +When GPU devices are present in the sandbox container, OpenShell also adds the GPU filesystem baseline. This includes the injected NVIDIA or WSL2 device nodes as read-write paths. For the built-in default policy, OpenShell promotes `/proc` from read-only to read-write because CUDA initialization writes `/proc//task//comm`. If a custom policy explicitly keeps a GPU-required path such as `/proc` in `read_only`, sandbox startup fails and tells you to move that path to `read_write`. + This filtering prevents a missing baseline path from degrading Landlock enforcement. Without it, a single missing path could cause the entire Landlock ruleset to fail, leaving the sandbox with no filesystem restrictions at all. User-specified paths in your policy YAML are not pre-filtered. If you list a path that does not exist: