diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index a44a2b35..6d8e1ee5 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -77,8 +77,8 @@ body: - type: input id: version attributes: - label: "rig-coding-tools version" - description: The version of rig-coding-tools you're using + label: "llm-coding-tools version" + description: The version of llm-coding-tools you're using placeholder: e.g. 0.1.0 validations: required: true diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 08e2d585..d0737b29 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -2,6 +2,5 @@ diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index fd02cc7b..a9a7ac07 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -2,15 +2,13 @@ name: Rust on: push: - branches: [ main ] + branches: [main] tags: - - '*' + - "*" pull_request: - branches: [ main ] + branches: [main] workflow_dispatch: - - jobs: build-and-test: strategy: @@ -19,13 +17,19 @@ jobs: - os: ubuntu-latest target: x86_64-unknown-linux-gnu use-cross: false + - os: windows-latest + target: x86_64-pc-windows-msvc + use-cross: false + - os: macos-latest + target: aarch64-apple-darwin + use-cross: false runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v6 - - name: Run Tests and Upload Coverage + - name: Run Tests (async mode) and Upload Coverage uses: Reloaded-Project/devops-rust-test-and-coverage@v1 with: rust-project-path: ./src @@ -33,6 +37,17 @@ jobs: codecov-token: ${{ secrets.CODECOV_TOKEN }} target: ${{ matrix.target }} use-cross: ${{ matrix.use-cross }} + + - name: Run Tests (blocking mode) and Upload Coverage + uses: Reloaded-Project/devops-rust-test-and-coverage@v1 + with: + rust-project-path: ./src + upload-coverage: true + codecov-token: ${{ secrets.CODECOV_TOKEN }} + target: ${{ matrix.target }} + use-cross: ${{ matrix.use-cross }} + cargo-test-args: "-p llm-coding-tools-core --no-default-features --features blocking" + # Note: The GitHub Runner Images will contain an up to date Rust Stable Toolchain # thus as per recommendation of cargo-semver-checks, we're using stable here. # @@ -42,29 +57,37 @@ jobs: working-directory: src shell: bash run: | - SEARCH_RESULT=$(cargo search "^rig-coding-tools$" --limit 1) + # Note: binstall is available after devops-rust-test-and-coverage@v1 call + cargo +stable binstall --no-confirm cargo-semver-checks --force + rustup +stable target add ${{ matrix.target }} - if echo "$SEARCH_RESULT" | grep -q "^rig-coding-tools "; then - # Run semver checks on stable, because nightly sometimes gets borked in cargo-semver-checks. - rustup +stable target add ${{ matrix.target }} - # Note: binstall is available after devops-rust-test-and-coverage@v1 call - cargo +stable binstall --no-confirm cargo-semver-checks --force - cargo +stable semver-checks --target ${{ matrix.target }} - else - echo "No previous version found on crates.io. Skipping semver checks." - fi + for CRATE in "llm-coding-tools-core" "llm-coding-tools-rig"; do + SEARCH_RESULT=$(cargo search "^${CRATE}$" --limit 1) + if echo "$SEARCH_RESULT" | grep -q "^${CRATE} "; then + echo "Running semver checks for ${CRATE}..." + cargo +stable semver-checks -p "${CRATE}" --target ${{ matrix.target }} + else + echo "No previous version of ${CRATE} found on crates.io. Skipping semver checks." + fi + done - name: Check documentation is valid if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') working-directory: src env: RUSTDOCFLAGS: "-D warnings" - run: cargo doc --workspace --all-features --document-private-items --target ${{ matrix.target }} + # Note: Can't use --all-features at workspace level because tokio/blocking are mutually exclusive + run: | + cargo doc -p llm-coding-tools-core --features tokio --document-private-items --target ${{ matrix.target }} + cargo doc -p llm-coding-tools-rig --document-private-items --target ${{ matrix.target }} - name: Run linter if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/tags/') working-directory: src - run: cargo clippy --workspace --all-features --target ${{ matrix.target }} -- -D warnings + # Note: Can't use --all-features at workspace level because tokio/blocking are mutually exclusive + run: | + cargo clippy -p llm-coding-tools-core --features tokio --target ${{ matrix.target }} -- -D warnings + cargo clippy -p llm-coding-tools-rig --target ${{ matrix.target }} -- -D warnings - name: Run formatter check uses: actions-rust-lang/rustfmt@v1 @@ -85,12 +108,14 @@ jobs: uses: Reloaded-Project/devops-publish-action@v3 with: rust-crates-io-token: ${{ secrets.CRATES_IO_TOKEN }} - rust-cargo-project-paths: src/rig-coding-tools + rust-cargo-project-paths: | + src/llm-coding-tools-core + src/llm-coding-tools-rig compression-tool: 7z artifact-groups-file: .github/artifact-groups.yml - changelog-enabled: 'true' + changelog-enabled: "true" changelog-template: .github/changelog.hbs changelog-is-release: ${{ startsWith(github.ref, 'refs/tags/') }} changelog-release-tag: ${{ github.ref_name }} - changelog-override-starting-version: 'true' - changelog-hide-credit: 'true' \ No newline at end of file + changelog-override-starting-version: "true" + changelog-hide-credit: "true" diff --git a/README.MD b/README.MD index 9cc77d4f..c2f926b4 100644 --- a/README.MD +++ b/README.MD @@ -1,49 +1,33 @@ -# rig-coding-tools +# llm-coding-tools -## Additional Setup Required!! +[![Crates.io - llm-coding-tools-core](https://img.shields.io/crates/v/llm-coding-tools-core.svg)](https://crates.io/crates/llm-coding-tools-core) +[![Crates.io - llm-coding-tools-rig](https://img.shields.io/crates/v/llm-coding-tools-rig.svg)](https://crates.io/crates/llm-coding-tools-rig) +[![Docs.rs](https://docs.rs/llm-coding-tools-rig/badge.svg)](https://docs.rs/llm-coding-tools-rig) +[![CI](https://github.com/Sewer56/llm-coding-tools/actions/workflows/rust.yml/badge.svg)](https://github.com/Sewer56/llm-coding-tools/actions) - +Lightweight, high-performance coding tool implementations for LLM-powered development agents. Plug and play into your favourite frameworks like [Rig](https://github.com/0xPlaygrounds/rig). -Additional setup might be required. -Once you do the stuff under this text, delete this section. +## About This Workspace -### Code Coverage -To setup code coverage for this project: +This workspace contains multiple Rust crates for integrating coding tools with LLM agents: -1. Go to [Codecov][codecov], add your project. -2. Go to [Settings -> Secrets and variables -> Actions][gh-actions-secrets] in your repo and add a repository secret named `CODECOV_TOKEN`. -3. Do the same in [Settings -> Secrets and variables -> Dependabot][gh-actions-dependabot]. +- **[llm-coding-tools-core](./src/llm-coding-tools-core/)**: Framework-agnostic core operations and utilities +- **[llm-coding-tools-rig](./src/llm-coding-tools-rig/)**: Rig framework-specific Tool implementations -The instructions on the Codecov page will provide the token to use. +## Features +- **File Operations**: Read, write, edit files with line-numbered output +- **Search**: Glob pattern matching and regex content search +- **Shell**: Cross-platform command execution with timeout +- **Web**: URL fetching with HTML-to-markdown conversion +- **Task Delegation**: Sub-agent spawning for complex workflows +- **Path Security**: Choose between unrestricted or sandboxed file access +- **Context Strings**: Embedded LLM guidance for tool usage -### Setup API Key (crates.io) +## Feature Flags (llm-coding-tools-core) -You'll need to set up an API key to publish to `crates.io` on tag. - -- Generate your API key in [crates.io (Account Settings -> API Tokens)][crates-io-key]. -- Go to [Settings -> Secrets and variables -> Actions][gh-actions-secrets] in your repo and add environment secret named `CRATES_IO_TOKEN`. -- Paste your API key. - - - - -### Fill in README Documentation - -This repository uses a two-level README structure: - -| File | Purpose | What to Write | -|------|---------|---------------| -| `README.MD` (this file) | GitHub landing page | Project overview, quick start example, badges | -| [`src/rig-coding-tools/README.MD`](./src/rig-coding-tools/README.MD) | crates.io page | Detailed features, installation, comprehensive usage examples | - ------------------------ - -[![Crates.io](https://img.shields.io/crates/v/rig-coding-tools.svg)](https://crates.io/crates/rig-coding-tools) -[![Docs.rs](https://docs.rs/rig-coding-tools/badge.svg)](https://docs.rs/rig-coding-tools) -[![CI](https://github.com/Sewer56/rig-coding-tools/actions/workflows/rust.yml/badge.svg)](https://github.com/Sewer56/rig-coding-tools/actions) - -Basic coding tools for rig based LLM agents +- `tokio` (default): Async mode with tokio runtime +- `blocking`: Sync/blocking mode, mutually exclusive with `async` ## Quick Start @@ -51,33 +35,60 @@ Add to your `Cargo.toml`: ```toml [dependencies] -rig-coding-tools = "0.1.0" +llm-coding-tools-rig = "0.1" ``` ```rust -// TODO: Add a minimal example showing the most common use case +use llm_coding_tools_rig::absolute::{ReadTool, WriteTool, GlobTool}; +use llm_coding_tools_rig::{BashTool, PreambleBuilder, TodoTools}; +use rig::tool::ToolSet; + +// Track tools and generate LLM guidance +let mut pb = PreambleBuilder::new(); +let todos = TodoTools::new(); + +let toolset = ToolSet::builder() + .static_tool(pb.track(ReadTool::::new())) + .static_tool(pb.track(WriteTool::new())) + .static_tool(pb.track(GlobTool::new())) + .static_tool(pb.track(BashTool::new())) + .static_tool(pb.track(todos.read)) + .static_tool(pb.track(todos.write)) + .build(); + +// Generate preamble for agent system prompt +let preamble = pb.build(); + +// Use with rig agent: +// let agent = client.agent("gpt-4o") +// .preamble(&preamble) +// .tools(toolset) +// .build(); ``` -See the [rig-coding-tools crate documentation](./src/rig-coding-tools/README.MD) for detailed usage. -## Crates +## Examples -- [rig-coding-tools](./src/rig-coding-tools/README.MD): Core library +```bash +# Basic toolset setup +cargo run --example basic -p llm-coding-tools-rig -## Developer Manual +# Complete agent configuration (recommended starting point) +cargo run --example full_agent -p llm-coding-tools-rig -For step-by-step development guidance, see the [Developer Manual](https://reloaded-project.github.io/reloaded-templates-rust/manual/). +# Sandboxed file access +cargo run --example sandboxed -p llm-coding-tools-rig +``` + +## Documentation + +- [llm-coding-tools-core README](./src/llm-coding-tools-core/README.md) +- [llm-coding-tools-rig README](./src/llm-coding-tools-rig/README.md) +- [Developer Guidelines](./src/AGENTS.md) ## Contributing -We welcome contributions! See the [Contributing Guide](https://reloaded-project.github.io/reloaded-templates-rust/manual/#contributing) for details. +Contributions are welcome! Please ensure all tests pass and the code follows our guidelines. ## License Licensed under [Apache 2.0](./LICENSE). - -[codecov]: https://app.codecov.io -[gh-actions-secrets]: https://github.com/Sewer56/rig-coding-tools/settings/secrets/actions -[gh-actions-dependabot]: https://github.com/Sewer56/rig-coding-tools/settings/secrets/dependabot -[gh-pages]: https://github.com/Sewer56/rig-coding-tools/settings/pages -[crates-io-key]: https://crates.io/settings/tokens -[nuget-key]: https://www.nuget.org/account/apikeys diff --git a/README.md b/README.md deleted file mode 100644 index 41babafd..00000000 --- a/README.md +++ /dev/null @@ -1 +0,0 @@ -# rig-coding-tools diff --git a/src/.vscode/tasks.json b/src/.vscode/tasks.json index e8a8a80f..76f58d74 100644 --- a/src/.vscode/tasks.json +++ b/src/.vscode/tasks.json @@ -4,7 +4,7 @@ { "label": "Auto Test on Save", "type": "shell", - "command": "cargo install cargo-watch --quiet && cargo watch -x \"test\" -w rig-coding-tools/src", + "command": "cargo install cargo-watch --quiet && cargo watch -x \"test\" -w llm-coding-tools-core/src -w llm-coding-tools-rig/src", "group": "test", "presentation": { "reveal": "always" @@ -14,7 +14,7 @@ { "label": "Auto Coverage on Save", "type": "shell", - "command": "cargo install cargo-watch --quiet && cargo install cargo-tarpaulin --quiet && cargo watch -x \"tarpaulin --skip-clean --out Xml --out Html --engine llvm --target-dir target/coverage-build\" -w rig-coding-tools/src", + "command": "cargo install cargo-watch --quiet && cargo install cargo-tarpaulin --quiet && cargo watch -x \"tarpaulin --skip-clean --out Xml --out Html --engine llvm --target-dir target/coverage-build\" -w llm-coding-tools-core/src -w llm-coding-tools-rig/src", "group": "test", "presentation": { "reveal": "always" diff --git a/src/AGENTS.md b/src/AGENTS.md index c3a6b5ed..204d5671 100644 --- a/src/AGENTS.md +++ b/src/AGENTS.md @@ -1,18 +1,62 @@ -# rig-coding-tools +Basic coding oriented tools for LLM agents -Basic coding tools for rig based LLM agents +# Feature Flags (llm-coding-tools-core) + +- `tokio` (default): Async mode with tokio runtime. Enables async function signatures. +- `blocking`: Sync/blocking mode. Mutually exclusive with `tokio`/`async`. +- `async`: Base async signatures (internal use). Do not enable directly; use `tokio`. + +The `async` and `blocking` features are mutually exclusive - enabling both causes a compile error. # Project Structure -- `rig-coding-tools/` - Main library crate - - `src/` - Library source code +- `llm-coding-tools-core/` - Framework-agnostic core library + - `src/operations/` - Core operation implementations (read, write, edit, glob, grep, bash, etc.) + - `src/path/` - Path resolution (absolute and allowed) + - `src/error.rs` - Unified error types + - `src/output.rs` - Tool output formatting + - `src/util.rs` - Shared utilities +- `llm-coding-tools-rig/` - Rig framework Tool implementations + - `src/absolute/` - Unrestricted file system tools + - `src/allowed/` - Sandboxed file system tools + - `src/bash.rs`, `src/task.rs`, etc. - Standalone tools + +# Code & Performance Guidelines + +This is a high-performance library. Optimize aggressively. + +## Memory & Allocation + +- Preallocate collections when size is known or estimable: + - `String::with_capacity(estimated_len)` + - `Vec::with_capacity(count)` + - `BufReader::with_capacity(size, reader)` +- Use power-of-two sizes for allocator efficiency: `.next_power_of_two()` +- Prefer `&str` / `&[T]` returns over owned types when lifetime allows +- Use `Cow<'_, str>` for conditional ownership (e.g., `String::from_utf8_lossy`) +- Use `&'static str` for compile-time constant strings +- Reuse buffers: `.clear()` and reuse `Vec`/`String` instead of reallocating + +## Zero-Cost Abstractions + +- Use const generics for compile-time branching (e.g., ``) +- Use `#[inline]` on small, hot-path functions +- Prefer `core` over `std` where possible (`core::mem` over `std::mem`) + +## I/O Efficiency -# Code Guidelines +- Stream data instead of loading entire files when possible +- Use `memchr` for fast byte searching over manual iteration -- Optimize for performance; use zero-cost abstractions, avoid allocations. -- Keep modules under 500 lines (excluding tests); split if larger. -- Place `use` inside functions only for `#[cfg]` conditional compilation. -- Prefer `core` over `std` where possible (`core::mem` over `std::mem`). +## Dependencies + +- Prefer performance-oriented crates: `parking_lot` over `std::sync`, `memchr` for byte search +- Keep dependency footprint minimal + +## General + +- Keep modules under 500 lines (excluding tests); split if larger +- Place `use` inside functions only for `#[cfg]` conditional compilation # Documentation Standards @@ -27,10 +71,23 @@ Basic coding tools for rig based LLM agents All must pass without warnings: ```bash -cargo build --workspace --all-features --all-targets --quiet -cargo test --workspace --all-features --quiet -cargo clippy --workspace --all-features --quiet -- -D warnings -cargo doc --workspace --all-features --quiet +# Test async mode (default) +cargo build -p llm-coding-tools-core && cargo build -p llm-coding-tools-rig --quiet +cargo test -p llm-coding-tools-core && cargo test -p llm-coding-tools-rig --quiet +cargo clippy -p llm-coding-tools-core && cargo clippy -p llm-coding-tools-rig --quiet -- -D warnings + +# Test blocking mode (llm-coding-tools-core only, rig is inherently async) +cargo test -p llm-coding-tools-core --no-default-features --features blocking --quiet + +cargo doc --workspace --no-deps --quiet cargo fmt --all --quiet -cargo publish --dry-run --quiet +``` + +Note: `llm-coding-tools-rig` is async-only (implements rig's async `Tool` trait). +The `blocking` feature only applies to `llm-coding-tools-core`. + +For individual crates: +```bash +cargo publish --dry-run -p llm-coding-tools-core --quiet +cargo publish --dry-run -p llm-coding-tools-rig --quiet ``` diff --git a/src/Cargo.lock b/src/Cargo.lock index 63edc89d..8eebfa82 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -3,10 +3,19 @@ version = 4 [[package]] -name = "anyhow" -version = "1.0.100" +name = "aho-corasick" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "as-any" @@ -14,6 +23,25 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "astral-tl" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d90933ffb0f97e2fc2e0de21da9d3f20597b804012d199843a6fe7c2810d28f3" +dependencies = [ + "memchr", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -36,6 +64,17 @@ dependencies = [ "syn", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -48,6 +87,28 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-lc-rs" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "base64" version = "0.22.1" @@ -60,6 +121,17 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.1" @@ -79,9 +151,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cfg-if" version = "1.0.4" @@ -94,6 +174,25 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "cmake" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +dependencies = [ + "cc", +] + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -104,12 +203,65 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "displaydoc" version = "0.2.5" @@ -121,6 +273,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "dyn-clone" version = "1.0.20" @@ -136,6 +294,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "encoding_rs_io" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" +dependencies = [ + "encoding_rs", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -181,6 +348,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -190,6 +363,22 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -318,11 +507,61 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "grep-matcher" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36d7b71093325ab22d780b40d7df3066ae4aebb518ba719d38c697a8228a8023" +dependencies = [ + "memchr", +] + +[[package]] +name = "grep-regex" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce0c256c3ad82bcc07b812c15a45ec1d398122e8e15124f96695234db7112ef" +dependencies = [ + "bstr", + "grep-matcher", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "grep-searcher" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac63295322dc48ebb20a25348147905d816318888e64f531bfc2a2bc0577dc34" +dependencies = [ + "bstr", + "encoding_rs", + "encoding_rs_io", + "grep-matcher", + "log", + "memchr", + "memmap2", +] + [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -342,6 +581,55 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + +[[package]] +name = "html-to-markdown-rs" +version = "2.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b44ff13ff909885d418b0c63d9a485382cdc1b3a3e016a100f8e79e5df934d21" +dependencies = [ + "astral-tl", + "base64", + "html-escape", + "html5ever", + "lru", + "markup5ever_rcdom", + "once_cell", + "regex", + "serde", + "serde_json", + "thiserror 2.0.17", +] + +[[package]] +name = "html5ever" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6452c4751a24e1b99c3260d505eaeee76a050573e61f30ac2c924ddc7236f01e" +dependencies = [ + "log", + "markup5ever", +] [[package]] name = "http" @@ -382,6 +670,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "hyper" version = "1.8.1" @@ -396,6 +690,7 @@ dependencies = [ "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -549,11 +844,27 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "ignore" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown", @@ -567,9 +878,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" dependencies = [ "memchr", "serde", @@ -577,9 +888,41 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee5b5339afb4c41626dde77b7a611bd4f2c202b897852b4bcf5d03eddc61010" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if", + "combine", + "jni-sys", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] [[package]] name = "js-sys" @@ -591,11 +934,23 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" -version = "0.2.178" +version = "0.2.179" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "c5a2d376baa530d1238d133232d15e239abad80d05838b4b59354e5268af431f" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" @@ -603,6 +958,43 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +[[package]] +name = "llm-coding-tools-core" +version = "0.1.0" +dependencies = [ + "async-trait", + "globset", + "grep-regex", + "grep-searcher", + "html-to-markdown-rs", + "ignore", + "maybe-async", + "memchr", + "parking_lot", + "reqwest 0.13.1", + "schemars", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.17", + "tokio", + "wiremock", +] + +[[package]] +name = "llm-coding-tools-rig" +version = "0.1.0" +dependencies = [ + "llm-coding-tools-core", + "reqwest 0.13.1", + "rig-core", + "schemars", + "serde", + "serde_json", + "tempfile", + "tokio", +] + [[package]] name = "lock_api" version = "0.4.14" @@ -618,18 +1010,76 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96051b46fc183dc9cd4a223960ef37b9af631b55191852a8274bfef064cda20f" +dependencies = [ + "hashbrown", +] + [[package]] name = "lru-slab" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c3294c4d74d0742910f8c7b466f44dda9eb2d5742c1e430138df290a1e8451c" +dependencies = [ + "log", + "tendril", + "web_atoms", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.36.0+unofficial" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e5fc8802e8797c0dfdd2ce5c21aa0aee21abbc7b3b18559100651b3352a7b63" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + +[[package]] +name = "maybe-async" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cf92c10c7e361d6b99666ec1c6f9805b0bea2c3bd8c78dc6fe98ac5bd78db11" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "memchr" version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -663,6 +1113,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nom" version = "7.1.3" @@ -682,12 +1138,28 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "openssl-probe" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" + [[package]] name = "ordered-float" version = "5.1.0" @@ -724,7 +1196,46 @@ dependencies = [ name = "percent-encoding" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] [[package]] name = "pin-project" @@ -776,11 +1287,17 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] @@ -799,7 +1316,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror", + "thiserror 2.0.17", "tokio", "tracing", "web-time", @@ -811,6 +1328,7 @@ version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -820,7 +1338,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.17", "tinyvec", "tracing", "web-time", @@ -837,14 +1355,14 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -913,6 +1431,35 @@ dependencies = [ "syn", ] +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "reqwest" version = "0.12.28" @@ -959,20 +1506,48 @@ dependencies = [ ] [[package]] -name = "rig-coding-tools" -version = "0.1.0" +name = "reqwest" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e9018c9d814e5f30cc16a0f03271aeab3571e609612d9fe78c1aa8d11c2f62" dependencies = [ - "anyhow", - "reqwest", - "rig-core", + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "rustls-platform-verifier", + "sync_wrapper", "tokio", + "tokio-rustls", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", ] [[package]] name = "rig-core" -version = "0.27.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3799afd8ba38d90d9886be5bf596b0159043f88598b40e1f5aa08aad488f2223" +checksum = "5b1a48121c1ecd6f6ce59d64ec353c791aac6fc07bf4aa353380e8185659e6eb" dependencies = [ "as-any", "async-stream", @@ -988,11 +1563,11 @@ dependencies = [ "mime_guess", "ordered-float", "pin-project-lite", - "reqwest", + "reqwest 0.12.28", "schemars", "serde", "serde_json", - "thiserror", + "thiserror 2.0.17", "tokio", "tracing", "tracing-futures", @@ -1019,12 +1594,26 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ + "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", @@ -1033,6 +1622,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pki-types" version = "1.13.2" @@ -1043,12 +1644,40 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -1062,9 +1691,27 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.21" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +dependencies = [ + "windows-sys 0.61.2", +] [[package]] name = "schemars" @@ -1097,6 +1744,29 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "security-framework" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +dependencies = [ + "bitflags", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.228" @@ -1140,9 +1810,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.147" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", @@ -1179,6 +1849,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.11" @@ -1207,6 +1883,31 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "string_cache" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "subtle" version = "2.6.1" @@ -1215,9 +1916,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -1251,7 +1952,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags", - "core-foundation", + "core-foundation 0.9.4", "system-configuration-sys", ] @@ -1265,13 +1966,57 @@ dependencies = [ "libc", ] +[[package]] +name = "tempfile" +version = "3.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.17", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1312,14 +2057,13 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.48.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ "bytes", "libc", "mio", - "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -1350,9 +2094,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -1457,9 +2201,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" @@ -1475,9 +2219,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -1485,12 +2229,34 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091" + [[package]] name = "utf8_iter" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -1606,15 +2372,45 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web_atoms" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acd0c322f146d0f8aad130ce6c187953889359584497dac6561204c8e17bb43d" +dependencies = [ + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", +] + +[[package]] +name = "webpki-root-certs" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" dependencies = [ "rustls-pki-types", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -1650,6 +2446,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -1677,6 +2482,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -1710,6 +2530,12 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -1722,6 +2548,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -1734,6 +2566,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -1758,6 +2596,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -1770,6 +2614,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -1782,6 +2632,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -1794,6 +2650,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -1806,6 +2668,29 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64", + "deadpool", + "futures", + "http", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wit-bindgen" version = "0.46.0" @@ -1818,6 +2703,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xml5ever" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f57dd51b88a4b9f99f9b55b136abb86210629d61c48117ddb87f567e51e66be7" +dependencies = [ + "log", + "markup5ever", +] + [[package]] name = "yoke" version = "0.8.1" @@ -1843,18 +2738,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", @@ -1923,6 +2818,6 @@ dependencies = [ [[package]] name = "zmij" -version = "0.1.10" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4af59da1029247450b54ba43e0b62c8e376582464bbe5504dd525fe521e7e8fd" +checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" diff --git a/src/Cargo.toml b/src/Cargo.toml index ba30e887..6584a8a7 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -1,7 +1,7 @@ [workspace] resolver = "2" -members = ["rig-coding-tools"] +members = ["llm-coding-tools-core", "llm-coding-tools-rig"] # Profile Build [profile.profile] diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml new file mode 100644 index 00000000..b80e4dcd --- /dev/null +++ b/src/llm-coding-tools-core/Cargo.toml @@ -0,0 +1,61 @@ +[package] +name = "llm-coding-tools-core" +version = "0.1.0" +edition = "2021" +description = "Lightweight, high-performance core types and utilities for coding tools - framework agnostic" +repository = "https://github.com/Sewer56/llm-coding-tools" +license = "Apache-2.0" +include = ["src/**/*", "README.md"] +readme = "README.md" + +[features] +default = ["tokio"] +# Base async signatures - requires a runtime, do not enable directly +async = ["dep:async-trait"] +# Async with tokio runtime (default) +tokio = ["async", "dep:tokio", "dep:reqwest"] +# Blocking/sync mode - mutually exclusive with async +blocking = ["maybe-async/is_sync", "dep:reqwest", "reqwest?/blocking"] + +[dependencies] +# Tool outputs (BashOutput, GrepOutput, etc.) serialize to JSON for LLM consumption +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +# ToolError type uses thiserror for ergonomic error definitions +thiserror = "2.0" + +# Todo types derive JsonSchema for LLM tool parameter validation +schemars = "1.2" + +# Sync RwLock for TodoState (no tokio dependency) +parking_lot = "0.12" + +# Glob and grep tool implementations (aligned with ripgrep) +globset = "0.4.18" # Glob matching with ripgrep-optimized engine +grep-regex = "0.1.14" # Regex matcher for grep_search +grep-searcher = "0.1.16" # File content searching for grep_search +ignore = "0.4.25" # Respects .gitignore when walking directories +memchr = "2.6.3" # Fast newline detection in read_file + +# Webfetch tool converts HTML to markdown for LLM-friendly output +html-to-markdown-rs = "2.20" +reqwest = { version = "0.13", default-features = false, features = [ + "rustls", + "rustls-native-certs", +], optional = true } + +# Unifies async/sync code via procedural macros +maybe-async = "0.2" + +# TaskExecutor trait requires async methods +async-trait = { version = "0.1", optional = true } + +# Async file I/O, process execution, and timeouts +tokio = { version = "1.49", features = ["fs", "io-util", "process", "time"], optional = true } + +[dev-dependencies] +tempfile = "3.24" +# For async tests (when async feature enabled) +tokio = { version = "1.49", features = ["rt", "macros"] } +wiremock = "0.6" diff --git a/src/llm-coding-tools-core/README.md b/src/llm-coding-tools-core/README.md new file mode 100644 index 00000000..85d76062 --- /dev/null +++ b/src/llm-coding-tools-core/README.md @@ -0,0 +1,65 @@ +# llm-coding-tools-core + +Lightweight, high-performance core types and utilities for coding tools - framework agnostic. + +## Overview + +This crate provides the foundational building blocks for coding tool implementations: + +- `ToolError` - Unified error type for all tool operations +- `ToolResult` - Result type alias using ToolError +- `ToolOutput` - Wrapper for tool responses with truncation metadata +- Utility functions for text processing and formatting +- `context` module - LLM guidance strings for tool usage + +## Features + +- `tokio` (default): Async mode with tokio runtime. Enables async function signatures. +- `blocking`: Sync/blocking mode. Mutually exclusive with `tokio`/`async`. +- `async`: Base async signatures (internal). Requires a runtime; use `tokio` instead. + +The `async` and `blocking` features are mutually exclusive - enabling both causes a compile error. + +Future runtimes (smol, async-std) can be added following the same pattern as `tokio`. + +## Usage + +```rust +use llm_coding_tools_core::{ToolError, ToolResult, ToolOutput}; +use llm_coding_tools_core::util::{truncate_text, format_numbered_line}; +``` + +## Context Module + +The `context` module provides embedded strings containing usage guidance for LLM agents. +These can be appended to tool descriptions or system prompts. + +Path-based tools have two variants: +- `*_ABSOLUTE`: For unrestricted filesystem access (absolute paths required) +- `*_ALLOWED`: For sandboxed access (paths relative to allowed directories) + +```rust +use llm_coding_tools_core::context::{BASH, READ_ABSOLUTE, READ_ALLOWED}; + +// Non-path tools have a single variant +println!("{}", BASH); + +// Path-based tools have absolute and allowed variants +println!("{}", READ_ABSOLUTE); +println!("{}", READ_ALLOWED); +``` + +Available context strings: +- `BASH`, `TASK`, `TODO_READ`, `TODO_WRITE`, `WEBFETCH` - standalone tools +- `READ_ABSOLUTE`, `READ_ALLOWED` - file reading +- `WRITE_ABSOLUTE`, `WRITE_ALLOWED` - file writing +- `EDIT_ABSOLUTE`, `EDIT_ALLOWED` - file editing +- `GLOB_ABSOLUTE`, `GLOB_ALLOWED` - pattern matching +- `GREP_ABSOLUTE`, `GREP_ALLOWED` - content search + +## Design Principles + +- No framework-specific dependencies, plug and play into any LLM framework/library + - See [llm-coding-tools-rig](https://crates.io/crates/llm-coding-tools-rig) for an integration example with [rig](https://crates.io/crates/rig) +- Minimal dependency footprint +- Performance-oriented (optimized) with zero-cost abstractions diff --git a/src/llm-coding-tools-core/src/context/bash.txt b/src/llm-coding-tools-core/src/context/bash.txt new file mode 100644 index 00000000..4102ccbd --- /dev/null +++ b/src/llm-coding-tools-core/src/context/bash.txt @@ -0,0 +1,120 @@ +Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures. + +All commands run in the working directory shown in the Environment section above. Use the `workdir` parameter if you need to run a command in a different directory. AVOID using `cd && ` patterns - use `workdir` instead. + +IMPORTANT: This tool is for terminal operations like git, npm, docker, etc. DO NOT use it for file operations (reading, writing, editing, searching, finding files) - use the specialized tools for this instead. + +Before executing the command, please follow these steps: + +1. Directory Verification: + - If the command will create new directories or files, first use `ls` to verify the parent directory exists and is the correct location + - For example, before running "mkdir foo/bar", first use `ls foo` to check that "foo" exists and is the intended parent directory + +2. Command Execution: + - Always quote file paths that contain spaces with double quotes (e.g., rm "path with spaces/file.txt") + - Examples of proper quoting: + - mkdir "/Users/name/My Documents" (correct) + - mkdir /Users/name/My Documents (incorrect - will fail) + - python "/path/with spaces/script.py" (correct) + - python /path/with spaces/script.py (incorrect - will fail) + - After ensuring proper quoting, execute the command. + - Capture the output of the command. + +Usage notes: + - The `command` argument is required. + - You can specify an optional `timeout_ms` in milliseconds (up to 600000ms / 10 minutes). If not specified, commands will timeout after 120000ms (2 minutes). + - It is very helpful if you write a clear, concise description of what this command does in 5-10 words. + - If the output exceeds 30000 characters, output will be truncated before being returned to you. + + - Avoid using Bash with the `find`, `grep`, `cat`, `head`, `tail`, `sed`, `awk`, or `echo` commands, unless explicitly instructed or when these commands are truly necessary for the task. Instead, always prefer using the dedicated tools for these commands: + - File search: Use Glob (NOT find or ls) + - Content search: Use Grep (NOT grep) + - Read files: Use Read (NOT cat/head/tail) + - Edit files: Use Edit (NOT sed/awk) + - Write files: Use Write (NOT echo >/cat < && `. Use the `workdir` parameter to change directories instead. + + Use workdir="/foo/bar" with command: pytest tests + + + cd /foo/bar && pytest tests + + +# Committing changes with git + +Only create commits when requested by the user. If unclear, ask first. When the user asks you to create a new git commit, follow these steps carefully: + +Git Safety Protocol: +- NEVER update the git config +- NEVER run destructive/irreversible git commands (like push --force, hard reset, etc) unless the user explicitly requests them +- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it +- NEVER run force push to main/master, warn the user if they request it +- Avoid git commit --amend. ONLY use --amend when ALL conditions are met: + (1) User explicitly requested amend, OR commit SUCCEEDED but pre-commit hook auto-modified files that need including + (2) HEAD commit was created by you in this conversation (verify: git log -1 --format='%an %ae') + (3) Commit has NOT been pushed to remote (verify: git status shows "Your branch is ahead") +- CRITICAL: If commit FAILED or was REJECTED by hook, NEVER amend - fix the issue and create a NEW commit +- CRITICAL: If you already pushed to remote, NEVER amend unless user explicitly requests it (requires force push) +- NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive. + +1. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following bash commands in parallel, each using the Bash tool: + - Run a git status command to see all untracked files. + - Run a git diff command to see both staged and unstaged changes that will be committed. + - Run a git log command to see recent commit messages, so that you can follow this repository's commit message style. +2. Analyze all staged changes (both previously staged and newly added) and draft a commit message: + - Summarize the nature of the changes (eg. new feature, enhancement to an existing feature, bug fix, refactoring, test, docs, etc.). Ensure the message accurately reflects the changes and their purpose (i.e. "add" means a wholly new feature, "update" means an enhancement to an existing feature, "fix" means a bug fix, etc.). + - Do not commit files that likely contain secrets (.env, credentials.json, etc.). Warn the user if they specifically request to commit those files + - Draft a concise (1-2 sentences) commit message that focuses on the "why" rather than the "what" + - Ensure it accurately reflects the changes and their purpose +3. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following commands: + - Add relevant untracked files to the staging area. + - Create the commit with a message + - Run git status after the commit completes to verify success. + Note: git status depends on the commit completing, so run it sequentially after the commit. +4. If the commit fails due to pre-commit hook, fix the issue and create a NEW commit (see amend rules above) + +Important notes: +- NEVER run additional commands to read or explore code, besides git bash commands +- NEVER use the TodoWrite or Task tools +- DO NOT push to the remote repository unless the user explicitly asks you to do so +- IMPORTANT: Never use git commands with the -i flag (like git rebase -i or git add -i) since they require interactive input which is not supported. +- If there are no changes to commit (i.e., no untracked files and no modifications), do not create an empty commit + +# Creating pull requests +Use the gh command via the Bash tool for ALL GitHub-related tasks including working with issues, pull requests, checks, and releases. If given a Github URL use the gh command to get the information needed. + +IMPORTANT: When the user asks you to create a pull request, follow these steps carefully: + +1. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following bash commands in parallel using the Bash tool, in order to understand the current state of the branch since it diverged from the main branch: + - Run a git status command to see all untracked files + - Run a git diff command to see both staged and unstaged changes that will be committed + - Check if the current branch tracks a remote branch and is up to date with the remote, so you know if you need to push to the remote + - Run a git log command and `git diff [base-branch]...HEAD` to understand the full commit history for the current branch (from the time it diverged from the base branch) +2. Analyze all changes that will be included in the pull request, making sure to look at all relevant commits (NOT just the latest commit, but ALL commits that will be included in the pull request!!!), and draft a pull request summary +3. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following commands in parallel: + - Create new branch if needed + - Push to remote with -u flag if needed + - Create PR using gh pr create with the format below. Use a HEREDOC to pass the body to ensure correct formatting. + +gh pr create --title "the pr title" --body "$(cat <<'EOF' +## Summary +<1-3 bullet points> + +## Test plan +[Bulleted markdown checklist of TODOs for testing the pull request...] +EOF +)" + + +Important: +- DO NOT use the TodoWrite or Task tools +- Return the PR URL when you're done, so the user can see it + +# Other common operations +- View comments on a Github PR: gh api repos/foo/bar/pulls/123/comments diff --git a/src/llm-coding-tools-core/src/context/edit_absolute.txt b/src/llm-coding-tools-core/src/context/edit_absolute.txt new file mode 100644 index 00000000..9b54d7d2 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/edit_absolute.txt @@ -0,0 +1,74 @@ +Performs exact string replacements in files. + +Usage: +- You must use your Read tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file. +- When editing text from Read tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears AFTER the line number prefix. The line number prefix format is "L{n}: ". Everything after that prefix is the actual file content to match. Never include any part of the line number prefix in `old_string` or `new_string`. +- ALWAYS prefer editing existing files in the codebase. NEVER write new files unless explicitly required. +- Only use emojis if the user explicitly requests it. Avoid adding emojis to files unless asked. + +## Parameters + +- `file_path`: Absolute path to the file to modify (required) +- `old_string`: Exact text to find and replace (required) +- `new_string`: Replacement text (required) +- `replace_all`: Replace all occurrences when true, default false (optional) + +## Error Behavior + +- The edit will FAIL if `old_string` is not found in the file with an error "oldString not found in content". +- The edit will FAIL if `old_string` is found multiple times in the file with an error "oldString found multiple times and requires more code context to uniquely identify the intended match". Either provide a larger string with more surrounding context to make it unique or use `replace_all` to change every instance of `old_string`. + +## When to Use This Tool + +- Making targeted changes to existing files +- Fixing bugs in specific code sections +- Updating function implementations +- Renaming variables across a file (with `replace_all: true`) +- Adding new code to existing files + +## When NOT to Use This Tool + +- Creating new files - use Write tool instead +- When most of a file needs to change - use Write tool instead +- When you haven't read the file yet - read it first! + +## Examples + +Replacing a single occurrence: +``` +file_path: "/home/user/project/src/main.rs" +old_string: "fn old_name() {" +new_string: "fn new_name() {" +``` + +Renaming a variable everywhere in a file: +``` +file_path: "/home/user/project/src/main.rs" +old_string: "old_var" +new_string: "new_var" +replace_all: true +``` + +Adding code after an existing line: +``` +file_path: "/home/user/project/src/main.rs" +old_string: "use std::io;" +new_string: "use std::io;\nuse std::fs;" +``` + +## Best Practices + +1. Always read the file first using the Read tool +2. Copy the exact text from the Read output, preserving whitespace and indentation +3. Include enough context in `old_string` to make it unique +4. When adding new code, include the line before/after in `old_string` for context +5. Use `replace_all: true` when renaming variables or making consistent changes +6. Don't include line number prefixes (like "L42: ") in your old_string or new_string + +## Common Mistakes to Avoid + +- Forgetting to read the file first +- Including line number prefixes in old_string +- Not including enough context (causes "found multiple times" error) +- Changing indentation unintentionally +- Forgetting that old_string must match EXACTLY (including whitespace) diff --git a/src/llm-coding-tools-core/src/context/edit_allowed.txt b/src/llm-coding-tools-core/src/context/edit_allowed.txt new file mode 100644 index 00000000..615c42a0 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/edit_allowed.txt @@ -0,0 +1,77 @@ +Performs exact string replacements in files within allowed directories. + +Usage: +- You must use your Read tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file. +- Paths can be relative to configured allowed directories, or absolute paths within allowed directories +- Paths outside allowed directories will be rejected +- When editing text from Read tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears AFTER the line number prefix. The line number prefix format is "L{n}: ". Everything after that prefix is the actual file content to match. Never include any part of the line number prefix in `old_string` or `new_string`. +- ALWAYS prefer editing existing files in the codebase. NEVER write new files unless explicitly required. +- Only use emojis if the user explicitly requests it. Avoid adding emojis to files unless asked. + +## Parameters + +- `file_path`: Path to the file to modify - can be relative or absolute within allowed directories (required) +- `old_string`: Exact text to find and replace (required) +- `new_string`: Replacement text (required) +- `replace_all`: Replace all occurrences when true, default false (optional) + +## Error Behavior + +- The edit will FAIL if `old_string` is not found in the file with an error "oldString not found in content". +- The edit will FAIL if `old_string` is found multiple times in the file with an error "oldString found multiple times and requires more code context to uniquely identify the intended match". Either provide a larger string with more surrounding context to make it unique or use `replace_all` to change every instance of `old_string`. + +## When to Use This Tool + +- Making targeted changes to existing files +- Fixing bugs in specific code sections +- Updating function implementations +- Renaming variables across a file (with `replace_all: true`) +- Adding new code to existing files + +## When NOT to Use This Tool + +- Creating new files - use Write tool instead +- When most of a file needs to change - use Write tool instead +- When you haven't read the file yet - read it first! + +## Examples + +Replacing a single occurrence: +``` +file_path: "src/main.rs" +old_string: "fn old_name() {" +new_string: "fn new_name() {" +``` + +Renaming a variable everywhere in a file: +``` +file_path: "src/main.rs" +old_string: "old_var" +new_string: "new_var" +replace_all: true +``` + +Adding code after an existing line: +``` +file_path: "src/main.rs" +old_string: "use std::io;" +new_string: "use std::io;\nuse std::fs;" +``` + +## Best Practices + +1. Always read the file first using the Read tool +2. Copy the exact text from the Read output, preserving whitespace and indentation +3. Include enough context in `old_string` to make it unique +4. When adding new code, include the line before/after in `old_string` for context +5. Use `replace_all: true` when renaming variables or making consistent changes +6. Don't include line number prefixes (like "L42: ") in your old_string or new_string +7. Relative paths are resolved against allowed directories + +## Common Mistakes to Avoid + +- Forgetting to read the file first +- Including line number prefixes in old_string +- Not including enough context (causes "found multiple times" error) +- Changing indentation unintentionally +- Forgetting that old_string must match EXACTLY (including whitespace) diff --git a/src/llm-coding-tools-core/src/context/glob_absolute.txt b/src/llm-coding-tools-core/src/context/glob_absolute.txt new file mode 100644 index 00000000..b8f9f3bc --- /dev/null +++ b/src/llm-coding-tools-core/src/context/glob_absolute.txt @@ -0,0 +1,64 @@ +Fast file pattern matching tool that works with any codebase size. + +- Supports glob patterns like "**/*.js" or "src/**/*.ts" +- Returns matching file paths sorted by modification time (newest first) +- Respects .gitignore rules +- Use this tool when you need to find files by name patterns +- When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the Task tool instead + +## Parameters + +- `pattern`: Glob pattern to match files against (required) + - `*` matches any characters except path separators + - `**` matches any characters including path separators (recursive) + - `?` matches a single character + - `[abc]` matches any character in the brackets + - `{a,b}` matches either pattern +- `path`: Absolute directory path to search in (required) + +## When to Use This Tool + +- Finding files by extension: `**/*.rs`, `**/*.tsx` +- Finding files by name pattern: `**/test_*.py`, `**/*_spec.js` +- Locating configuration files: `**/Cargo.toml`, `**/package.json` +- Finding files in specific directories: `src/**/*.rs` + +## When NOT to Use This Tool + +- Searching for content inside files - use Grep instead +- Reading file contents - use Read instead +- Complex multi-step searches - use Task tool instead + +## Examples + +Find all Rust files: +``` +pattern: "**/*.rs" +path: "/home/user/project" +``` + +Find all test files: +``` +pattern: "**/test_*.py" +path: "/home/user/project" +``` + +Find TypeScript and TSX files: +``` +pattern: "**/*.{ts,tsx}" +path: "/home/user/project/src" +``` + +Find Cargo.toml files anywhere: +``` +pattern: "**/Cargo.toml" +path: "/home/user/project" +``` + +## Best Practices + +1. You can call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful. +2. Start with broader patterns and narrow down if needed +3. Use `**` for recursive searches across all subdirectories +4. Combine with Read tool to examine found files +5. Results are sorted by modification time - most recently changed files appear first diff --git a/src/llm-coding-tools-core/src/context/glob_allowed.txt b/src/llm-coding-tools-core/src/context/glob_allowed.txt new file mode 100644 index 00000000..5fef3589 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/glob_allowed.txt @@ -0,0 +1,67 @@ +Fast file pattern matching tool that works with any codebase size. + +- Searches within configured allowed directories only +- Paths can be relative to allowed directories; paths outside will be rejected +- Supports glob patterns like "**/*.js" or "src/**/*.ts" +- Returns matching file paths sorted by modification time (newest first) +- Respects .gitignore rules +- Use this tool when you need to find files by name patterns +- When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the Task tool instead + +## Parameters + +- `pattern`: Glob pattern to match files against (required) + - `*` matches any characters except path separators + - `**` matches any characters including path separators (recursive) + - `?` matches a single character + - `[abc]` matches any character in the brackets + - `{a,b}` matches either pattern +- `path`: Directory path to search in - can be relative or absolute within allowed directories (required) + +## When to Use This Tool + +- Finding files by extension: `**/*.rs`, `**/*.tsx` +- Finding files by name pattern: `**/test_*.py`, `**/*_spec.js` +- Locating configuration files: `**/Cargo.toml`, `**/package.json` +- Finding files in specific directories: `src/**/*.rs` + +## When NOT to Use This Tool + +- Searching for content inside files - use Grep instead +- Reading file contents - use Read instead +- Complex multi-step searches - use Task tool instead + +## Examples + +Find all Rust files: +``` +pattern: "**/*.rs" +path: "." +``` + +Find all test files: +``` +pattern: "**/test_*.py" +path: "." +``` + +Find TypeScript and TSX files: +``` +pattern: "**/*.{ts,tsx}" +path: "src" +``` + +Find Cargo.toml files anywhere: +``` +pattern: "**/Cargo.toml" +path: "." +``` + +## Best Practices + +1. You can call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful. +2. Start with broader patterns and narrow down if needed +3. Use `**` for recursive searches across all subdirectories +4. Combine with Read tool to examine found files +5. Results are sorted by modification time - most recently changed files appear first +6. Paths outside allowed directories will be rejected diff --git a/src/llm-coding-tools-core/src/context/grep_absolute.txt b/src/llm-coding-tools-core/src/context/grep_absolute.txt new file mode 100644 index 00000000..c9684db8 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/grep_absolute.txt @@ -0,0 +1,78 @@ +Fast content search tool built on ripgrep. Works with any codebase size. + +- Searches file contents using regular expressions +- Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+") +- Filter files by pattern with the `include` parameter (e.g., "*.rs", "*.{ts,tsx}") +- Returns file paths, line numbers, and matching content sorted by modification time (newest first) +- Use this tool when you need to find files containing specific patterns +- When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the Task tool instead + +IMPORTANT: ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access. + +## Parameters + +- `pattern`: Regex pattern to search for in file contents (required) +- `path`: Absolute directory path to search in (required) +- `include`: Optional file glob filter (e.g., "*.rs", "*.{ts,tsx}") +- `limit`: Maximum number of matches to return (default: 100, max: 2000) + +## Pattern Syntax Notes (ripgrep-based) + +- Literal braces need escaping: use `interface\\{\\}` to find `interface{}` in Go code +- Use `\\b` for word boundaries: `\\bfoo\\b` matches "foo" but not "foobar" +- Use `\\s` for whitespace, `\\w` for word characters +- Use `.*` for any characters: `error.*failed` matches "error: connection failed" +- Use `|` for alternation: `TODO|FIXME` matches either +- Patterns match within single lines only; multiline patterns are not supported + +## When to Use This Tool + +- Finding function definitions: `fn\\s+process_` +- Finding usages of a variable or function: `\\bmy_function\\(` +- Finding TODO comments: `TODO|FIXME|HACK` +- Finding error messages: `error.*failed` +- Finding imports: `^use\\s+` + +## When NOT to Use This Tool + +- Finding files by name - use Glob instead +- Reading entire file contents - use Read instead +- Complex multi-step research - use Task tool instead + +## Examples + +Find all function definitions: +``` +pattern: "fn\\s+\\w+" +path: "/home/user/project" +include: "*.rs" +``` + +Find TODO comments: +``` +pattern: "TODO|FIXME" +path: "/home/user/project" +``` + +Find usage of a specific function: +``` +pattern: "\\bprocess_request\\(" +path: "/home/user/project/src" +``` + +Find error handling patterns: +``` +pattern: "Err\\(|Error::" +path: "/home/user/project" +include: "*.rs" +limit: 50 +``` + +## Best Practices + +1. You can call multiple tools in a single response. It is always better to speculatively perform multiple searches in parallel if they are potentially useful. +2. Use the `include` parameter to narrow searches to relevant file types +3. Use word boundaries (`\\b`) to avoid partial matches +4. Escape special regex characters when searching for literal text +5. Start with broader patterns and refine based on results +6. Use `limit` parameter if you expect many matches but only need a sample diff --git a/src/llm-coding-tools-core/src/context/grep_allowed.txt b/src/llm-coding-tools-core/src/context/grep_allowed.txt new file mode 100644 index 00000000..9fe4edc4 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/grep_allowed.txt @@ -0,0 +1,81 @@ +Fast content search tool built on ripgrep. Works with any codebase size. + +- Searches within configured allowed directories only +- Paths can be relative to allowed directories; paths outside will be rejected +- Searches file contents using regular expressions +- Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+") +- Filter files by pattern with the `include` parameter (e.g., "*.rs", "*.{ts,tsx}") +- Returns file paths, line numbers, and matching content sorted by modification time (newest first) +- Use this tool when you need to find files containing specific patterns +- When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the Task tool instead + +IMPORTANT: ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access. + +## Parameters + +- `pattern`: Regex pattern to search for in file contents (required) +- `path`: Directory path to search in - can be relative or absolute within allowed directories (required) +- `include`: Optional file glob filter (e.g., "*.rs", "*.{ts,tsx}") +- `limit`: Maximum number of matches to return (default: 100, max: 2000) + +## Pattern Syntax Notes (ripgrep-based) + +- Literal braces need escaping: use `interface\\{\\}` to find `interface{}` in Go code +- Use `\\b` for word boundaries: `\\bfoo\\b` matches "foo" but not "foobar" +- Use `\\s` for whitespace, `\\w` for word characters +- Use `.*` for any characters: `error.*failed` matches "error: connection failed" +- Use `|` for alternation: `TODO|FIXME` matches either +- Patterns match within single lines only; multiline patterns are not supported + +## When to Use This Tool + +- Finding function definitions: `fn\\s+process_` +- Finding usages of a variable or function: `\\bmy_function\\(` +- Finding TODO comments: `TODO|FIXME|HACK` +- Finding error messages: `error.*failed` +- Finding imports: `^use\\s+` + +## When NOT to Use This Tool + +- Finding files by name - use Glob instead +- Reading entire file contents - use Read instead +- Complex multi-step research - use Task tool instead + +## Examples + +Find all function definitions: +``` +pattern: "fn\\s+\\w+" +path: "." +include: "*.rs" +``` + +Find TODO comments: +``` +pattern: "TODO|FIXME" +path: "." +``` + +Find usage of a specific function: +``` +pattern: "\\bprocess_request\\(" +path: "src" +``` + +Find error handling patterns: +``` +pattern: "Err\\(|Error::" +path: "." +include: "*.rs" +limit: 50 +``` + +## Best Practices + +1. You can call multiple tools in a single response. It is always better to speculatively perform multiple searches in parallel if they are potentially useful. +2. Use the `include` parameter to narrow searches to relevant file types +3. Use word boundaries (`\\b`) to avoid partial matches +4. Escape special regex characters when searching for literal text +5. Start with broader patterns and refine based on results +6. Use `limit` parameter if you expect many matches but only need a sample +7. Paths outside allowed directories will be rejected diff --git a/src/llm-coding-tools-core/src/context/mod.rs b/src/llm-coding-tools-core/src/context/mod.rs new file mode 100644 index 00000000..97339a79 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/mod.rs @@ -0,0 +1,199 @@ +//! Tool context strings for LLM agents. +//! +//! These provide usage guidance, best practices, and behavioral instructions +//! for LLM agents when using coding tools. Context strings are sourced from +//! OpenCode's tool documentation. +//! +//! # Path-based Tools +//! +//! Tools operating on file paths have two variants: +//! - `*_ABSOLUTE`: For unrestricted filesystem access (absolute paths required) +//! - `*_ALLOWED`: For sandboxed access (paths relative to allowed directories) +//! +//! # Example +//! +//! ```rust +//! use llm_coding_tools_core::context::{BASH, READ_ABSOLUTE, READ_ALLOWED}; +//! +//! // Use BASH context for bash tool +//! println!("Bash guidance: {}", BASH); +//! +//! // Use appropriate read context based on path resolver +//! let sandboxed = true; +//! let read_context = if sandboxed { READ_ALLOWED } else { READ_ABSOLUTE }; +//! ``` + +/// Bash tool context - shell command execution guidance. +pub const BASH: &str = include_str!("bash.txt"); + +/// Task tool context - agent delegation guidance. +pub const TASK: &str = include_str!("task.txt"); + +/// Todo read tool context - reading task lists. +pub const TODO_READ: &str = include_str!("todoread.txt"); + +/// Todo write tool context - managing task lists. +pub const TODO_WRITE: &str = include_str!("todowrite.txt"); + +/// Webfetch tool context - URL content retrieval. +pub const WEBFETCH: &str = include_str!("webfetch.txt"); + +/// Read tool context for absolute path mode. +pub const READ_ABSOLUTE: &str = include_str!("read_absolute.txt"); + +/// Read tool context for allowed/sandboxed path mode. +pub const READ_ALLOWED: &str = include_str!("read_allowed.txt"); + +/// Write tool context for absolute path mode. +pub const WRITE_ABSOLUTE: &str = include_str!("write_absolute.txt"); + +/// Write tool context for allowed/sandboxed path mode. +pub const WRITE_ALLOWED: &str = include_str!("write_allowed.txt"); + +/// Edit tool context for absolute path mode. +pub const EDIT_ABSOLUTE: &str = include_str!("edit_absolute.txt"); + +/// Edit tool context for allowed/sandboxed path mode. +pub const EDIT_ALLOWED: &str = include_str!("edit_allowed.txt"); + +/// Glob tool context for absolute path mode. +pub const GLOB_ABSOLUTE: &str = include_str!("glob_absolute.txt"); + +/// Glob tool context for allowed/sandboxed path mode. +pub const GLOB_ALLOWED: &str = include_str!("glob_allowed.txt"); + +/// Grep tool context for absolute path mode. +pub const GREP_ABSOLUTE: &str = include_str!("grep_absolute.txt"); + +/// Grep tool context for allowed/sandboxed path mode. +pub const GREP_ALLOWED: &str = include_str!("grep_allowed.txt"); + +/// Trait for tools that provide usage context for LLM preambles. +/// +/// Implement this trait on tool types (for frameworks like rig) to enable automatic preamble +/// generation via [`PreambleBuilder`](crate::PreambleBuilder). +/// +/// # Example +/// +/// ```rust +/// use llm_coding_tools_core::context::ToolContext; +/// +/// struct MyTool; +/// +/// impl ToolContext for MyTool { +/// const NAME: &'static str = "mytool"; +/// +/// fn context(&self) -> &'static str { +/// "Instructions for using MyTool..." +/// } +/// } +/// ``` +pub trait ToolContext { + /// Tool name used for section headers in generated preamble. + /// + /// Should be lowercase (e.g., "read", "bash", "glob"). + /// PreambleBuilder capitalizes this for display. + const NAME: &'static str; + + /// Returns the tool's context string for preamble generation. + /// + /// This should return one of the context constants from this module. + fn context(&self) -> &'static str; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn context_strings_are_not_empty() { + // Non-path tools + assert!(!BASH.is_empty(), "BASH context should not be empty"); + assert!(!TASK.is_empty(), "TASK context should not be empty"); + assert!( + !TODO_READ.is_empty(), + "TODO_READ context should not be empty" + ); + assert!( + !TODO_WRITE.is_empty(), + "TODO_WRITE context should not be empty" + ); + assert!(!WEBFETCH.is_empty(), "WEBFETCH context should not be empty"); + + // Path-based tools (absolute variants) + assert!( + !READ_ABSOLUTE.is_empty(), + "READ_ABSOLUTE context should not be empty" + ); + assert!( + !WRITE_ABSOLUTE.is_empty(), + "WRITE_ABSOLUTE context should not be empty" + ); + assert!( + !EDIT_ABSOLUTE.is_empty(), + "EDIT_ABSOLUTE context should not be empty" + ); + assert!( + !GLOB_ABSOLUTE.is_empty(), + "GLOB_ABSOLUTE context should not be empty" + ); + assert!( + !GREP_ABSOLUTE.is_empty(), + "GREP_ABSOLUTE context should not be empty" + ); + + // Path-based tools (allowed variants) + assert!( + !READ_ALLOWED.is_empty(), + "READ_ALLOWED context should not be empty" + ); + assert!( + !WRITE_ALLOWED.is_empty(), + "WRITE_ALLOWED context should not be empty" + ); + assert!( + !EDIT_ALLOWED.is_empty(), + "EDIT_ALLOWED context should not be empty" + ); + assert!( + !GLOB_ALLOWED.is_empty(), + "GLOB_ALLOWED context should not be empty" + ); + assert!( + !GREP_ALLOWED.is_empty(), + "GREP_ALLOWED context should not be empty" + ); + } + + #[test] + fn absolute_variants_mention_absolute_path() { + assert!( + READ_ABSOLUTE.contains("absolute path"), + "READ_ABSOLUTE should mention absolute path" + ); + } + + #[test] + fn allowed_variants_mention_allowed_directories() { + assert!( + READ_ALLOWED.contains("allowed directories"), + "READ_ALLOWED should mention allowed directories" + ); + assert!( + WRITE_ALLOWED.contains("allowed directories"), + "WRITE_ALLOWED should mention allowed directories" + ); + assert!( + EDIT_ALLOWED.contains("allowed directories"), + "EDIT_ALLOWED should mention allowed directories" + ); + assert!( + GLOB_ALLOWED.contains("allowed directories"), + "GLOB_ALLOWED should mention allowed directories" + ); + assert!( + GREP_ALLOWED.contains("allowed directories"), + "GREP_ALLOWED should mention allowed directories" + ); + } +} diff --git a/src/llm-coding-tools-core/src/context/read_absolute.txt b/src/llm-coding-tools-core/src/context/read_absolute.txt new file mode 100644 index 00000000..b6fe01da --- /dev/null +++ b/src/llm-coding-tools-core/src/context/read_absolute.txt @@ -0,0 +1,49 @@ +Reads a file from the local filesystem. You can access any file directly by using this tool. +Assume this tool is able to read all files on the machine. If the User provides a path to a file assume that path is valid. It is okay to read a file that does not exist; an error will be returned. + +Usage: +- The `file_path` parameter must be an absolute path, not a relative path +- By default, it reads up to 2000 lines starting from line 1 +- You can optionally specify `offset` (1-indexed starting line) and `limit` (max lines), but it's recommended to read the whole file by not providing these parameters +- Any lines longer than 2000 characters will be truncated +- Results are returned with line numbers prefixed in "L{n}: content" format (e.g., "L1: first line") +- This tool can read image files (eg PNG, JPG, etc). When reading an image file the contents are presented visually. +- This tool can only read files, not directories. To list directory contents, use bash with `ls`. +- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents. + +You can call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful. + +## When to Use This Tool + +- Reading source code files to understand implementation +- Viewing configuration files +- Checking file contents before making edits +- Reading log files for debugging +- Viewing images and screenshots provided by the user + +## When NOT to Use This Tool + +- To list directory contents - use bash with `ls` instead +- To search for patterns across files - use Grep instead +- To find files by name - use Glob instead + +## Examples + +Reading a full file: +``` +file_path: "/home/user/project/src/main.rs" +``` + +Reading specific lines (lines 100-200): +``` +file_path: "/home/user/project/src/main.rs" +offset: 100 +limit: 100 +``` + +## Best Practices + +1. Read files before editing them - the Edit tool requires you to have read the file first +2. When exploring a codebase, read multiple related files in parallel to save time +3. For large files, consider reading specific sections using offset/limit if you know what you're looking for +4. Always use absolute paths - relative paths will be rejected diff --git a/src/llm-coding-tools-core/src/context/read_allowed.txt b/src/llm-coding-tools-core/src/context/read_allowed.txt new file mode 100644 index 00000000..ba55cba5 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/read_allowed.txt @@ -0,0 +1,50 @@ +Reads a file from the local filesystem within allowed directories. +Assume this tool is able to read files within the configured allowed directories. If the User provides a path to a file assume that path is valid. It is okay to read a file that does not exist; an error will be returned. + +Usage: +- Paths can be relative to configured allowed directories, or absolute paths within allowed directories +- Paths outside allowed directories will be rejected +- By default, it reads up to 2000 lines starting from line 1 +- You can optionally specify `offset` (1-indexed starting line) and `limit` (max lines), but it's recommended to read the whole file by not providing these parameters +- Any lines longer than 2000 characters will be truncated +- Results are returned with line numbers prefixed in "L{n}: content" format (e.g., "L1: first line") +- This tool can read image files (eg PNG, JPG, etc). When reading an image file the contents are presented visually. +- This tool can only read files, not directories. To list directory contents, use bash with `ls`. +- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents. + +You can call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful. + +## When to Use This Tool + +- Reading source code files to understand implementation +- Viewing configuration files +- Checking file contents before making edits +- Reading log files for debugging +- Viewing images and screenshots provided by the user + +## When NOT to Use This Tool + +- To list directory contents - use bash with `ls` instead +- To search for patterns across files - use Grep instead +- To find files by name - use Glob instead + +## Examples + +Reading a full file: +``` +file_path: "src/main.rs" +``` + +Reading specific lines (lines 100-200): +``` +file_path: "src/main.rs" +offset: 100 +limit: 100 +``` + +## Best Practices + +1. Read files before editing them - the Edit tool requires you to have read the file first +2. When exploring a codebase, read multiple related files in parallel to save time +3. For large files, consider reading specific sections using offset/limit if you know what you're looking for +4. Relative paths are resolved against allowed directories diff --git a/src/llm-coding-tools-core/src/context/task.txt b/src/llm-coding-tools-core/src/context/task.txt new file mode 100644 index 00000000..9c68820a --- /dev/null +++ b/src/llm-coding-tools-core/src/context/task.txt @@ -0,0 +1,53 @@ +Launch a new agent to handle complex, multistep tasks autonomously. + +The Task tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it. + +Available agent types and the tools they have access to: +{agents} + +When using the Task tool, you must specify a `subagent_type` parameter to select which agent type to use. + +## When NOT to Use the Task Tool + +- If you want to read a specific file path, use the Read or Glob tool instead, to find the match more quickly +- If you are searching for a specific class definition like "class Foo", use the Glob tool instead, to find the match more quickly +- If you are searching for code within a specific file or set of 2-3 files, use the Read tool instead, to find the match more quickly +- Other tasks that are not related to the agent descriptions above + +## Usage Notes + +- Always include a short description (3-5 words) summarizing what the agent will do +- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses +- When the agent is done, it will return a single message back to you. The result returned by the agent is NOT visible to the user. To show the user the result, you must send a text message with a concise summary of the result. +- Each agent invocation is stateless unless you provide a `session_id`. Your prompt should contain a highly detailed task description for the agent to perform autonomously, and you should specify exactly what information the agent should return back to you. +- The agent's outputs should generally be trusted +- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent +- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement. +- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need. + +## Agent Guidelines + +When agents are running, they follow these principles: +- For file searches: Use Grep or Glob when searching broadly. Use Read when the specific file path is known. +- For analysis: Start broad and narrow down. Use multiple search strategies if the first doesn't yield results. +- Be thorough: Check multiple locations, consider different naming conventions, look for related files. +- NEVER create files unless absolutely necessary for achieving the goal. ALWAYS prefer editing existing files. +- NEVER proactively create documentation files (*.md) or README files. +- Agent threads always have their cwd reset between bash calls - use absolute file paths. +- In final responses, share relevant file names and code snippets. All file paths must be absolute. + +## Examples + + +User: "Please write a function that checks if a number is prime" +Assistant: Sure, let me write that function. +*Writes the function using Edit/Write tools* +Since significant code was written, now use a test-runner agent to verify: +*Uses Task tool with: subagent_type="test-runner", prompt="Run tests for the prime checking function"* + + + +User: "Find all places where we handle authentication errors" +*For a broad search across the codebase:* +*Uses Task tool with: subagent_type="researcher", prompt="Search for all authentication error handling in the codebase. Look for patterns like AuthError, authentication failed, login error, etc. Return file paths and relevant code snippets."* + diff --git a/src/llm-coding-tools-core/src/context/todoread.txt b/src/llm-coding-tools-core/src/context/todoread.txt new file mode 100644 index 00000000..9ef8d913 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/todoread.txt @@ -0,0 +1,14 @@ +Use this tool to read the current to-do list for the session. This tool should be used proactively and frequently to ensure that you are aware of +the status of the current task list. You should make use of this tool as often as possible, especially in the following situations: +- At the beginning of conversations to see what's pending +- Before starting new tasks to prioritize work +- When the user asks about previous tasks or plans +- Whenever you're uncertain about what to do next +- After completing tasks to update your understanding of remaining work +- After every few messages to ensure you're on track + +Usage: +- This tool takes in no parameters. So leave the input blank or empty. DO NOT include a dummy object, placeholder string or a key like "input" or "empty". LEAVE IT BLANK. +- Returns a list of todo items with their status, priority, and content +- Use this information to track progress and plan next steps +- If no todos exist yet, an empty list will be returned diff --git a/src/llm-coding-tools-core/src/context/todowrite.txt b/src/llm-coding-tools-core/src/context/todowrite.txt new file mode 100644 index 00000000..9204ec48 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/todowrite.txt @@ -0,0 +1,124 @@ +Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user. +It also helps the user understand the progress of the task and overall progress of their requests. + +## When to Use This Tool + +Use this tool proactively in these scenarios: + +1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions +2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations +3. User explicitly requests todo list - When the user directly asks you to use the todo list +4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated) +5. After receiving new instructions - Immediately capture user requirements as todos +6. When you start working on a task - Mark it as in_progress BEFORE beginning work. Ideally you should only have one todo as in_progress at a time +7. After completing a task - Mark it as completed and add any new follow-up tasks discovered during implementation + +## When NOT to Use This Tool + +Skip using this tool when: +1. There is only a single, straightforward task +2. The task is trivial and tracking it provides no organizational benefit +3. The task can be completed in less than 3 trivial steps +4. The task is purely conversational or informational + +NOTE that you should not use this tool if there is only one trivial task to do. In this case you are better off just doing the task directly. + +## Examples of When to Use the Todo List + + +User: I want to add a dark mode toggle to the application settings. Make sure you run the tests and build when you're done! +Assistant: I'll help add a dark mode toggle to your application settings. Let me create a todo list to track this implementation. +*Creates todo list with the following items:* +1. Creating dark mode toggle component in Settings page +2. Adding dark mode state management (context/store) +3. Implementing CSS-in-JS styles for dark theme +4. Updating existing components to support theme switching +5. Running tests and build process, addressing any failures or errors that occur +*Begins working on the first task* + + +The assistant used the todo list because: +1. Adding dark mode is a multi-step feature requiring UI, state management, and styling changes +2. The user explicitly requested tests and build be run afterward +3. The assistant inferred that tests and build need to pass by adding "Ensure tests and build succeed" as the final task + + + + +User: Help me rename the function getCwd to getCurrentWorkingDirectory across my project +Assistant: Let me first search through your codebase to find all occurrences of 'getCwd'. +*Uses grep or search tools to locate all instances of getCwd in the codebase* +Assistant: I've found 15 instances of 'getCwd' across 8 different files. Let me create a todo list to track these changes. +*Creates todo list with specific items for each file that needs updating* + + +The assistant used the todo list because: +1. First, the assistant searched to understand the scope of the task +2. Upon finding multiple occurrences across different files, it determined this was a complex task with multiple steps +3. The todo list helps ensure every instance is tracked and updated systematically +4. This approach prevents missing any occurrences and maintains code consistency + + + +## Examples of When NOT to Use the Todo List + + +User: How do I print 'Hello World' in Python? +Assistant: In Python, you can print "Hello World" with this simple code: + +```python +print("Hello World") +``` + +This will output the text "Hello World" to the console when executed. + + +The assistant did not use the todo list because this is a single, trivial task that can be completed in one step. + + + + +User: Can you add a comment to the calculateTotal function to explain what it does? +Assistant: Sure, let me add a comment to the calculateTotal function. +*Uses the Edit tool to add a comment* + + +The assistant did not use the todo list because this is a single, straightforward task confined to one location in the code. + + + +## Task States and Management + +1. **Task States**: Use these states to track progress: + - pending: Task not yet started + - in_progress: Currently working on (limit to ONE task at a time) + - completed: Task finished successfully + + **IMPORTANT**: Task descriptions should use imperative form describing what needs to be done: + - "Run tests" (not "Running tests") + - "Build the project" (not "Building the project") + - "Fix authentication bug" (not "Fixing authentication bug") + +2. **Task Management**: + - Update task status in real-time as you work + - Mark tasks complete IMMEDIATELY after finishing (don't batch completions) + - Exactly ONE task must be in_progress at any time (not less, not more) + - Complete current tasks before starting new ones + - Remove tasks that are no longer relevant from the list entirely + +3. **Task Completion Requirements**: + - ONLY mark a task as completed when you have FULLY accomplished it + - If you encounter errors, blockers, or cannot finish, keep the task as in_progress + - When blocked, create a new task describing what needs to be resolved + - Never mark a task as completed if: + - Tests are failing + - Implementation is partial + - You encountered unresolved errors + - You couldn't find necessary files or dependencies + +4. **Task Breakdown**: + - Create specific, actionable items + - Break complex tasks into smaller, manageable steps + - Use clear, descriptive task names + +When in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully. diff --git a/src/llm-coding-tools-core/src/context/webfetch.txt b/src/llm-coding-tools-core/src/context/webfetch.txt new file mode 100644 index 00000000..3375178f --- /dev/null +++ b/src/llm-coding-tools-core/src/context/webfetch.txt @@ -0,0 +1,56 @@ +Fetches content from a specified URL and processes it for analysis. + +- Takes a URL and optional `timeout_ms` as input +- HTML content is automatically converted to markdown for easier reading +- JSON content is automatically prettified +- Other content types are returned as-is +- Use this tool when you need to retrieve and analyze web content + +## Parameters + +- `url`: The URL to fetch content from (required) + - Must be a fully-formed valid URL + - HTTP URLs will be automatically upgraded to HTTPS +- `timeout_ms`: Optional timeout in milliseconds (default varies by implementation) + +## Usage Notes + +- IMPORTANT: If another tool is present that offers better web fetching capabilities, is more targeted to the task, or has fewer restrictions, prefer using that tool instead of this one. +- The URL must be a fully-formed valid URL (e.g., "https://example.com/page") +- HTTP URLs will be automatically upgraded to HTTPS for security +- Redirects are followed automatically +- This tool is read-only and does not modify any files +- Results may be summarized if the content is very large + +## When to Use This Tool + +- Fetching documentation from the web +- Reading API references or library documentation +- Retrieving content from URLs provided by the user +- Checking website content for analysis + +## When NOT to Use This Tool + +- For local file operations - use Read tool instead +- For searching the web - this only fetches specific URLs +- When another MCP tool offers better web capabilities + +## Examples + +Fetching a documentation page: +``` +url: "https://docs.rust-lang.org/book/ch01-00-getting-started.html" +``` + +Fetching with custom timeout: +``` +url: "https://api.example.com/large-response" +timeout_ms: 30000 +``` + +## Best Practices + +1. Provide complete URLs including the protocol (https://) +2. Use this tool for specific URLs, not for web searching +3. If content is very large, results may be summarized - ask for specific sections if needed +4. Consider timeout settings for slow-loading pages diff --git a/src/llm-coding-tools-core/src/context/write_absolute.txt b/src/llm-coding-tools-core/src/context/write_absolute.txt new file mode 100644 index 00000000..547d1e16 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/write_absolute.txt @@ -0,0 +1,48 @@ +Writes a file to the local filesystem. Creates parent directories if they don't exist. + +Usage: +- This tool will overwrite the existing file if there is one at the provided path. +- If this is an existing file, you MUST use the Read tool first to read the file's contents. This tool will fail if you did not read the file first. +- ALWAYS prefer editing existing files in the codebase using the Edit tool. NEVER write new files unless explicitly required. +- NEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User. +- Only use emojis if the user explicitly requests it. Avoid writing emojis to files unless asked. + +## Parameters + +- `file_path`: Absolute path for the file to write (required) +- `content`: Content to write to the file (required) + +## When to Use This Tool + +- Creating new files that don't exist yet +- Completely rewriting a file when most content changes +- Writing generated output (build artifacts, reports, etc.) +- Creating new source files when explicitly requested + +## When NOT to Use This Tool + +- Modifying existing files - use Edit tool instead (more precise, less error-prone) +- Creating documentation unless explicitly requested +- Writing files you haven't read first (if they exist) + +## Examples + +Creating a new file: +``` +file_path: "/home/user/project/src/new_module.rs" +content: "//! New module\n\npub fn hello() {\n println!(\"Hello!\");\n}\n" +``` + +## Best Practices + +1. ALWAYS read existing files with Read tool before overwriting them +2. Prefer Edit tool for making changes to existing files - it's safer and more precise +3. When creating new files, ensure the content is complete and correct +4. Don't create files proactively - wait for explicit user requests +5. Use absolute paths only - relative paths will be rejected + +## Error Handling + +- If you try to overwrite a file you haven't read, the operation will fail +- Permission errors will be returned if you can't write to the location +- Parent directories are created automatically if they don't exist diff --git a/src/llm-coding-tools-core/src/context/write_allowed.txt b/src/llm-coding-tools-core/src/context/write_allowed.txt new file mode 100644 index 00000000..41d9b5c3 --- /dev/null +++ b/src/llm-coding-tools-core/src/context/write_allowed.txt @@ -0,0 +1,51 @@ +Writes a file to the local filesystem within allowed directories. Creates parent directories if they don't exist. + +Usage: +- This tool will overwrite the existing file if there is one at the provided path. +- Paths can be relative to configured allowed directories, or absolute paths within allowed directories +- Paths outside allowed directories will be rejected +- If this is an existing file, you MUST use the Read tool first to read the file's contents. This tool will fail if you did not read the file first. +- ALWAYS prefer editing existing files in the codebase using the Edit tool. NEVER write new files unless explicitly required. +- NEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User. +- Only use emojis if the user explicitly requests it. Avoid writing emojis to files unless asked. + +## Parameters + +- `file_path`: Path for the file to write - can be relative or absolute within allowed directories (required) +- `content`: Content to write to the file (required) + +## When to Use This Tool + +- Creating new files that don't exist yet +- Completely rewriting a file when most content changes +- Writing generated output (build artifacts, reports, etc.) +- Creating new source files when explicitly requested + +## When NOT to Use This Tool + +- Modifying existing files - use Edit tool instead (more precise, less error-prone) +- Creating documentation unless explicitly requested +- Writing files you haven't read first (if they exist) + +## Examples + +Creating a new file: +``` +file_path: "src/new_module.rs" +content: "//! New module\n\npub fn hello() {\n println!(\"Hello!\");\n}\n" +``` + +## Best Practices + +1. ALWAYS read existing files with Read tool before overwriting them +2. Prefer Edit tool for making changes to existing files - it's safer and more precise +3. When creating new files, ensure the content is complete and correct +4. Don't create files proactively - wait for explicit user requests +5. Relative paths are resolved against allowed directories + +## Error Handling + +- If you try to overwrite a file you haven't read, the operation will fail +- Paths outside allowed directories will be rejected +- Permission errors will be returned if you can't write to the location +- Parent directories are created automatically if they don't exist diff --git a/src/llm-coding-tools-core/src/error.rs b/src/llm-coding-tools-core/src/error.rs new file mode 100644 index 00000000..ebf25292 --- /dev/null +++ b/src/llm-coding-tools-core/src/error.rs @@ -0,0 +1,77 @@ +//! Common error types for coding tools. + +use thiserror::Error; + +/// Unified error type for all tool operations. +#[derive(Debug, Error)] +pub enum ToolError { + /// File I/O operation failed. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Path validation failed (not absolute, doesn't exist, etc.). + #[error("invalid path: {0}")] + InvalidPath(String), + + /// Requested offset/limit exceeds file bounds. + #[error("out of bounds: {0}")] + OutOfBounds(String), + + /// Glob/regex pattern is invalid. + #[error("invalid pattern: {0}")] + InvalidPattern(String), + + /// HTTP request failed. + #[error("HTTP error: {0}")] + Http(String), + + /// Command execution failed. + #[error("execution error: {0}")] + Execution(String), + + /// Timeout exceeded. + #[error("timeout: {0}")] + Timeout(String), + + /// Validation failed. + #[error("validation error: {0}")] + Validation(String), + + /// JSON serialization/deserialization failed. + #[error("JSON error: {0}")] + Json(#[from] serde_json::Error), +} + +/// Result type alias for tool operations. +pub type ToolResult = Result; + +impl From for ToolError { + fn from(e: globset::Error) -> Self { + ToolError::InvalidPattern(e.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tool_error_displays_io_error() { + let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found"); + let err: ToolError = io_err.into(); + assert!(err.to_string().contains("I/O error")); + } + + #[test] + fn tool_error_displays_invalid_path() { + let err = ToolError::InvalidPath("not absolute".into()); + assert!(err.to_string().contains("invalid path")); + } + + #[test] + fn tool_error_from_glob_pattern_error() { + let glob_err = globset::Glob::new("[invalid").unwrap_err(); + let err: ToolError = glob_err.into(); + assert!(matches!(err, ToolError::InvalidPattern(_))); + } +} diff --git a/src/llm-coding-tools-core/src/fs.rs b/src/llm-coding-tools-core/src/fs.rs new file mode 100644 index 00000000..4f73a03d --- /dev/null +++ b/src/llm-coding-tools-core/src/fs.rs @@ -0,0 +1,95 @@ +//! Filesystem abstraction layer. +//! +//! Provides unified APIs that work with both sync and async runtimes. +//! When the `blocking` feature is disabled (default), async operations use tokio. +//! When `blocking` is enabled, all operations are synchronous. + +use crate::error::ToolResult; +use std::path::Path; + +// ============================================================================ +// Async implementations (blocking feature disabled) +// ============================================================================ + +/// Reads a file to string. +#[cfg(not(feature = "blocking"))] +pub async fn read_to_string(path: impl AsRef) -> ToolResult { + Ok(tokio::fs::read_to_string(path).await?) +} + +/// Writes content to a file. +#[cfg(not(feature = "blocking"))] +pub async fn write(path: impl AsRef, contents: impl AsRef<[u8]>) -> ToolResult<()> { + Ok(tokio::fs::write(path, contents).await?) +} + +/// Creates a directory and all parent directories. +#[cfg(not(feature = "blocking"))] +pub async fn create_dir_all(path: impl AsRef) -> ToolResult<()> { + Ok(tokio::fs::create_dir_all(path).await?) +} + +/// Opens a file for buffered reading. +#[cfg(not(feature = "blocking"))] +pub async fn open_buffered( + path: impl AsRef, + capacity: usize, +) -> ToolResult> { + let file = tokio::fs::File::open(path).await?; + Ok(tokio::io::BufReader::with_capacity(capacity, file)) +} + +// ============================================================================ +// Sync implementations (blocking feature enabled) +// ============================================================================ + +/// Reads a file to string. +#[cfg(feature = "blocking")] +pub fn read_to_string(path: impl AsRef) -> ToolResult { + Ok(std::fs::read_to_string(path)?) +} + +/// Writes content to a file. +#[cfg(feature = "blocking")] +pub fn write(path: impl AsRef, contents: impl AsRef<[u8]>) -> ToolResult<()> { + Ok(std::fs::write(path, contents)?) +} + +/// Creates a directory and all parent directories. +#[cfg(feature = "blocking")] +pub fn create_dir_all(path: impl AsRef) -> ToolResult<()> { + Ok(std::fs::create_dir_all(path)?) +} + +/// Opens a file for buffered reading. +#[cfg(feature = "blocking")] +pub fn open_buffered( + path: impl AsRef, + capacity: usize, +) -> ToolResult> { + let file = std::fs::File::open(path)?; + Ok(std::io::BufReader::with_capacity(capacity, file)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write as _; + use tempfile::NamedTempFile; + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn read_to_string_works() { + let mut file = NamedTempFile::new().unwrap(); + file.write_all(b"hello world").unwrap(); + let content = read_to_string(file.path()).await.unwrap(); + assert_eq!(content, "hello world"); + } + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn write_works() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("test.txt"); + write(&path, b"hello").await.unwrap(); + assert_eq!(std::fs::read_to_string(&path).unwrap(), "hello"); + } +} diff --git a/src/llm-coding-tools-core/src/lib.rs b/src/llm-coding-tools-core/src/lib.rs new file mode 100644 index 00000000..1cae7760 --- /dev/null +++ b/src/llm-coding-tools-core/src/lib.rs @@ -0,0 +1,50 @@ +#![warn(missing_docs)] +//! Core types and utilities for coding tools. +//! +//! This crate provides framework-agnostic building blocks: +//! - [`ToolError`] and [`ToolResult`] for error handling +//! - [`ToolOutput`] for tool responses with truncation metadata +//! - Utility functions for text processing +//! +//! # Features +//! +//! - `async`: Enables async function signatures and async-only modules. +//! - `tokio` (default): Enables async via tokio runtime (implies `async`). +//! When disabled, all operations are synchronous. + +// Validate feature combinations at compile time +#[cfg(all(feature = "async", not(feature = "tokio")))] +compile_error!("Feature `async` requires a runtime. Enable `tokio` feature instead."); + +#[cfg(all(feature = "async", feature = "blocking"))] +compile_error!("Features `async` and `blocking` are mutually exclusive."); + +pub mod context; +pub mod error; +pub mod fs; +pub mod operations; +pub mod output; +pub mod path; +pub mod preamble; +pub mod util; + +pub use context::ToolContext; +pub use error::{ToolError, ToolResult}; +pub use output::ToolOutput; +pub use path::{AbsolutePathResolver, AllowedPathResolver, PathResolver}; +pub use preamble::{PreambleBuilder, Substitute}; + +// Re-export operations (always available, sync or async based on runtime feature) +pub use operations::{ + edit_file, execute_command, glob_files, grep_search, read_file, read_todos, write_file, + write_todos, BashOutput, EditError, GlobOutput, GrepFileMatches, GrepLineMatch, GrepOutput, + Todo, TodoPriority, TodoState, TodoStatus, +}; + +// Re-export webfetch operations (requires async or blocking feature) +#[cfg(any(feature = "async", feature = "blocking"))] +pub use operations::{fetch_url, format_json, html_to_markdown, WebFetchOutput}; + +// Re-export async-only operations (requires async feature) +#[cfg(feature = "async")] +pub use operations::{MockTaskExecutor, TaskArgs, TaskExecutor, TaskResult}; diff --git a/src/llm-coding-tools-core/src/operations/bash/async_impl.rs b/src/llm-coding-tools-core/src/operations/bash/async_impl.rs new file mode 100644 index 00000000..283e5107 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/bash/async_impl.rs @@ -0,0 +1,139 @@ +//! Async shell command execution. + +use super::BashOutput; +use crate::error::{ToolError, ToolResult}; +use std::path::Path; +use std::process::Stdio; +use std::time::Duration; +use tokio::process::Command; + +/// Executes a shell command with optional working directory and timeout. +/// +/// Uses bash on Unix, cmd on Windows. +pub async fn execute_command( + command: &str, + workdir: Option<&Path>, + timeout: Duration, +) -> ToolResult { + if let Some(dir) = workdir { + if !dir.is_absolute() { + return Err(ToolError::InvalidPath(format!( + "working directory must be an absolute path: {}", + dir.display() + ))); + } + if !dir.is_dir() { + return Err(ToolError::InvalidPath(format!( + "working directory does not exist: {}", + dir.display() + ))); + } + } + + let mut cmd = if cfg!(target_os = "windows") { + let mut c = Command::new("cmd"); + c.args(["/C", command]); + c + } else { + let mut c = Command::new("bash"); + c.args(["-c", command]); + c + }; + + if let Some(dir) = workdir { + cmd.current_dir(dir); + } + + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .kill_on_drop(true); + + let result = tokio::time::timeout(timeout, cmd.output()).await; + + match result { + Ok(Ok(output)) => Ok(BashOutput { + exit_code: output.status.code(), + stdout: String::from_utf8_lossy(&output.stdout).into_owned(), + stderr: String::from_utf8_lossy(&output.stderr).into_owned(), + }), + Ok(Err(e)) => Err(ToolError::Execution(e.to_string())), + Err(_) => Err(ToolError::Timeout(format!( + "command timed out after {}ms", + timeout.as_millis() + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn execute_echo_returns_output() { + let result = execute_command("echo hello", None, Duration::from_secs(5)) + .await + .unwrap(); + + assert_eq!(result.exit_code, Some(0)); + assert!(result.stdout.contains("hello")); + } + + #[tokio::test] + async fn respects_working_directory() { + let temp = TempDir::new().unwrap(); + let cmd = if cfg!(target_os = "windows") { + "cd" + } else { + "pwd" + }; + + let result = execute_command(cmd, Some(temp.path()), Duration::from_secs(5)) + .await + .unwrap(); + + assert_eq!(result.exit_code, Some(0)); + let temp_path = temp.path().to_string_lossy(); + assert!(result.stdout.contains(temp_path.as_ref())); + } + + #[tokio::test] + async fn timeout_returns_error() { + let cmd = if cfg!(target_os = "windows") { + "ping -n 10 127.0.0.1" + } else { + "sleep 10" + }; + + let result = execute_command(cmd, None, Duration::from_millis(100)).await; + assert!(matches!(result, Err(ToolError::Timeout(_)))); + } + + #[tokio::test] + async fn invalid_workdir_returns_error() { + let result = execute_command( + "echo hello", + Some(Path::new("/nonexistent/path")), + Duration::from_secs(5), + ) + .await; + + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } + + #[tokio::test] + async fn captures_exit_code() { + let cmd = if cfg!(target_os = "windows") { + "exit /b 42" + } else { + "exit 42" + }; + + let result = execute_command(cmd, None, Duration::from_secs(5)) + .await + .unwrap(); + + assert_eq!(result.exit_code, Some(42)); + } +} diff --git a/src/llm-coding-tools-core/src/operations/bash/blocking_impl.rs b/src/llm-coding-tools-core/src/operations/bash/blocking_impl.rs new file mode 100644 index 00000000..a1da5ab9 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/bash/blocking_impl.rs @@ -0,0 +1,148 @@ +//! Blocking shell command execution. + +use super::BashOutput; +use crate::error::{ToolError, ToolResult}; +use std::path::Path; +use std::process::{Command, Stdio}; +use std::thread; +use std::time::{Duration, Instant}; + +/// Executes a shell command with optional working directory and timeout. +/// +/// Uses bash on Unix, cmd on Windows. +pub fn execute_command( + command: &str, + workdir: Option<&Path>, + timeout: Duration, +) -> ToolResult { + if let Some(dir) = workdir { + if !dir.is_absolute() { + return Err(ToolError::InvalidPath(format!( + "working directory must be an absolute path: {}", + dir.display() + ))); + } + if !dir.is_dir() { + return Err(ToolError::InvalidPath(format!( + "working directory does not exist: {}", + dir.display() + ))); + } + } + + let mut cmd = if cfg!(target_os = "windows") { + let mut c = Command::new("cmd"); + c.args(["/C", command]); + c + } else { + let mut c = Command::new("bash"); + c.args(["-c", command]); + c + }; + + if let Some(dir) = workdir { + cmd.current_dir(dir); + } + + cmd.stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let start = Instant::now(); + let mut child = cmd + .spawn() + .map_err(|e| ToolError::Execution(e.to_string()))?; + + // Poll for completion with timeout + loop { + match child.try_wait() { + Ok(Some(status)) => { + let output = child + .wait_with_output() + .map_err(|e| ToolError::Execution(e.to_string()))?; + return Ok(BashOutput { + exit_code: status.code(), + stdout: String::from_utf8_lossy(&output.stdout).into_owned(), + stderr: String::from_utf8_lossy(&output.stderr).into_owned(), + }); + } + Ok(None) => { + if start.elapsed() >= timeout { + let _ = child.kill(); + return Err(ToolError::Timeout(format!( + "command timed out after {}ms", + timeout.as_millis() + ))); + } + thread::sleep(Duration::from_millis(10)); + } + Err(e) => return Err(ToolError::Execution(e.to_string())), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn execute_echo_returns_output() { + let result = execute_command("echo hello", None, Duration::from_secs(5)).unwrap(); + + assert_eq!(result.exit_code, Some(0)); + assert!(result.stdout.contains("hello")); + } + + #[test] + fn respects_working_directory() { + let temp = TempDir::new().unwrap(); + let cmd = if cfg!(target_os = "windows") { + "cd" + } else { + "pwd" + }; + + let result = execute_command(cmd, Some(temp.path()), Duration::from_secs(5)).unwrap(); + + assert_eq!(result.exit_code, Some(0)); + let temp_path = temp.path().to_string_lossy(); + assert!(result.stdout.contains(temp_path.as_ref())); + } + + #[test] + fn timeout_returns_error() { + let cmd = if cfg!(target_os = "windows") { + "ping -n 10 127.0.0.1" + } else { + "sleep 10" + }; + + let result = execute_command(cmd, None, Duration::from_millis(100)); + assert!(matches!(result, Err(ToolError::Timeout(_)))); + } + + #[test] + fn invalid_workdir_returns_error() { + let result = execute_command( + "echo hello", + Some(Path::new("/nonexistent/path")), + Duration::from_secs(5), + ); + + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } + + #[test] + fn captures_exit_code() { + let cmd = if cfg!(target_os = "windows") { + "exit /b 42" + } else { + "exit 42" + }; + + let result = execute_command(cmd, None, Duration::from_secs(5)).unwrap(); + + assert_eq!(result.exit_code, Some(42)); + } +} diff --git a/src/llm-coding-tools-core/src/operations/bash/mod.rs b/src/llm-coding-tools-core/src/operations/bash/mod.rs new file mode 100644 index 00000000..fb550bd2 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/bash/mod.rs @@ -0,0 +1,24 @@ +//! Shell command execution operation. + +use serde::Serialize; + +/// Result of shell command execution. +#[derive(Debug, Clone, Serialize)] +pub struct BashOutput { + /// Exit code from the command (None if killed by timeout). + pub exit_code: Option, + /// Standard output from the command. + pub stdout: String, + /// Standard error output from the command. + pub stderr: String, +} + +#[cfg(not(feature = "blocking"))] +mod async_impl; +#[cfg(not(feature = "blocking"))] +pub use async_impl::execute_command; + +#[cfg(feature = "blocking")] +mod blocking_impl; +#[cfg(feature = "blocking")] +pub use blocking_impl::execute_command; diff --git a/src/llm-coding-tools-core/src/operations/edit.rs b/src/llm-coding-tools-core/src/operations/edit.rs new file mode 100644 index 00000000..08464115 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/edit.rs @@ -0,0 +1,126 @@ +//! File editing operation with exact string replacement. + +use crate::error::ToolError; +use crate::fs; +use crate::path::PathResolver; +use thiserror::Error; + +/// Errors specific to edit operations. +#[derive(Debug, Error)] +pub enum EditError { + /// I/O or path validation error. + #[error(transparent)] + Tool(#[from] ToolError), + /// The old_string parameter was empty. + #[error("old_string must not be empty")] + EmptyOldString, + /// The old_string and new_string are identical. + #[error("old_string and new_string must be different")] + IdenticalStrings, + /// The old_string was not found in the file. + #[error("old_string not found in file content")] + NotFound, + /// Multiple matches found when replace_all is false. + #[error("oldString found {0} times and requires more code context to uniquely identify the intended match")] + AmbiguousMatch(usize), +} + +impl From for EditError { + fn from(e: std::io::Error) -> Self { + EditError::Tool(ToolError::from(e)) + } +} + +/// Performs exact string replacement in a file. +/// +/// Returns success message with replacement count. +#[maybe_async::maybe_async] +pub async fn edit_file( + resolver: &R, + file_path: &str, + old_string: &str, + new_string: &str, + replace_all: bool, +) -> Result { + if old_string.is_empty() { + return Err(EditError::EmptyOldString); + } + if old_string == new_string { + return Err(EditError::IdenticalStrings); + } + + let path = resolver.resolve(file_path)?; + let content = fs::read_to_string(&path).await?; + + let count = content.matches(old_string).count(); + + if count == 0 { + return Err(EditError::NotFound); + } + + if !replace_all && count > 1 { + return Err(EditError::AmbiguousMatch(count)); + } + + let new_content = if replace_all { + content.replace(old_string, new_string) + } else { + content.replacen(old_string, new_string, 1) + }; + + fs::write(&path, &new_content).await?; + + Ok(format!("Successfully replaced {} occurrence(s)", count)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::path::AbsolutePathResolver; + use std::io::Write; + use tempfile::NamedTempFile; + + fn create_temp_file(content: &str) -> NamedTempFile { + let mut file = NamedTempFile::new().unwrap(); + file.write_all(content.as_bytes()).unwrap(); + file.flush().unwrap(); + file + } + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn single_replacement_succeeds() { + let file = create_temp_file("hello world"); + let resolver = AbsolutePathResolver; + + let result = edit_file( + &resolver, + file.path().to_str().unwrap(), + "world", + "rust", + false, + ) + .await + .unwrap(); + + assert!(result.contains("1 occurrence")); + let content = std::fs::read_to_string(file.path()).unwrap(); + assert_eq!(content, "hello rust"); + } + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn not_found_returns_error() { + let file = create_temp_file("hello world"); + let resolver = AbsolutePathResolver; + + let err = edit_file( + &resolver, + file.path().to_str().unwrap(), + "missing", + "x", + false, + ) + .await + .unwrap_err(); + assert!(matches!(err, EditError::NotFound)); + } +} diff --git a/src/llm-coding-tools-core/src/operations/glob.rs b/src/llm-coding-tools-core/src/operations/glob.rs new file mode 100644 index 00000000..25d331ef --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/glob.rs @@ -0,0 +1,199 @@ +//! Glob pattern file matching operation. + +use crate::error::{ToolError, ToolResult}; +use crate::path::PathResolver; +use globset::Glob; +use ignore::WalkBuilder; +use serde::Serialize; +use std::time::SystemTime; + +const MAX_RESULTS: usize = 1000; + +/// Output from glob file matching. +#[derive(Debug, Serialize)] +pub struct GlobOutput { + /// Matched file paths relative to search directory, sorted by mtime (newest first). + pub files: Vec, + /// Whether results were truncated due to limit. + #[serde(skip_serializing_if = "std::ops::Not::not")] + pub truncated: bool, +} + +/// Finds files matching a glob pattern in the given directory. +/// +/// Results are sorted by modification time (newest first) and respect `.gitignore`. +pub fn glob_files( + resolver: &R, + pattern: &str, + search_path: &str, +) -> ToolResult { + let path = resolver.resolve(search_path)?; + + if !path.is_dir() { + return Err(ToolError::InvalidPath(format!( + "path is not a directory: {}", + path.display() + ))); + } + + let matcher = Glob::new(pattern)?.compile_matcher(); + + let mut files_with_mtime: Vec<(String, SystemTime)> = Vec::new(); + + let walker = WalkBuilder::new(&path) + .hidden(false) + .git_ignore(true) + .git_global(true) + .git_exclude(true) + .build(); + + for entry_result in walker { + let entry = match entry_result { + Ok(e) => e, + Err(_) => continue, + }; + + if let Some(ft) = entry.file_type() { + if ft.is_dir() { + continue; + } + } else { + continue; + } + + let rel_path = match entry.path().strip_prefix(&path) { + Ok(p) => p.to_string_lossy().into_owned(), + Err(_) => continue, + }; + + // Normalize Windows backslashes to forward slashes for glob pattern matching + #[cfg(windows)] + let rel_path = rel_path.replace('\\', "/"); + + if rel_path.is_empty() { + continue; + } + + if !matcher.is_match(&rel_path) { + continue; + } + + let mtime = entry + .metadata() + .ok() + .and_then(|m| m.modified().ok()) + .unwrap_or(SystemTime::UNIX_EPOCH); + + files_with_mtime.push((rel_path, mtime)); + } + + files_with_mtime.sort_by(|a, b| b.1.cmp(&a.1)); + + let truncated = files_with_mtime.len() > MAX_RESULTS; + + let files: Vec = files_with_mtime + .into_iter() + .take(MAX_RESULTS) + .map(|(path, _)| path) + .collect(); + + Ok(GlobOutput { files, truncated }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::path::AbsolutePathResolver; + use std::fs::{self, File, FileTimes}; + use std::io::Write; + use std::time::{Duration, SystemTime}; + use tempfile::TempDir; + + fn create_test_tree() -> TempDir { + let dir = TempDir::new().unwrap(); + let base = dir.path(); + fs::create_dir_all(base.join(".git")).unwrap(); + fs::create_dir_all(base.join("src")).unwrap(); + File::create(base.join("src/lib.rs")).unwrap(); + File::create(base.join("Cargo.toml")).unwrap(); + fs::create_dir_all(base.join("target")).unwrap(); + File::create(base.join("target/binary")).unwrap(); + let mut gitignore = File::create(base.join(".gitignore")).unwrap(); + writeln!(gitignore, "target/").unwrap(); + dir + } + + #[test] + fn glob_matches_pattern() { + let dir = create_test_tree(); + let resolver = AbsolutePathResolver; + let result = glob_files(&resolver, "**/*.rs", dir.path().to_str().unwrap()).unwrap(); + assert!(result.files.iter().any(|f| f.ends_with("lib.rs"))); + } + + #[test] + fn glob_respects_gitignore() { + let dir = create_test_tree(); + let resolver = AbsolutePathResolver; + let result = glob_files(&resolver, "**/*", dir.path().to_str().unwrap()).unwrap(); + assert!(!result.files.iter().any(|f| f.contains("target"))); + } + + #[test] + fn glob_sorts_by_mtime_desc() { + let dir = TempDir::new().unwrap(); + let base = dir.path(); + let resolver = AbsolutePathResolver; + + let older_path = base.join("older.txt"); + let newer_path = base.join("newer.txt"); + let older_time = SystemTime::UNIX_EPOCH + Duration::from_secs(1); + let newer_time = SystemTime::UNIX_EPOCH + Duration::from_secs(2); + + let older_file = File::create(&older_path).unwrap(); + older_file + .set_times(FileTimes::new().set_modified(older_time)) + .unwrap(); + let newer_file = File::create(&newer_path).unwrap(); + newer_file + .set_times(FileTimes::new().set_modified(newer_time)) + .unwrap(); + + let result = glob_files(&resolver, "**/*.txt", base.to_str().unwrap()).unwrap(); + + let newer_index = result + .files + .iter() + .position(|path| path.ends_with("newer.txt")) + .unwrap(); + let older_index = result + .files + .iter() + .position(|path| path.ends_with("older.txt")) + .unwrap(); + + assert!( + newer_index < older_index, + "expected newer file before older: {:?}", + result.files + ); + } + + #[test] + fn glob_returns_forward_slash_paths() { + // Patterns and returned paths use forward slashes on all platforms + let dir = create_test_tree(); + let resolver = AbsolutePathResolver; + let result = glob_files(&resolver, "**/*.rs", dir.path().to_str().unwrap()).unwrap(); + + // Verify matching works with forward-slash patterns + assert_eq!(result.files.len(), 1); + assert!(result.files[0].ends_with("lib.rs")); + + // Verify returned paths use forward slashes (critical for Windows) + for path in &result.files { + assert!(!path.contains('\\'), "expected forward slashes: {path}"); + } + assert!(result.files.iter().any(|f| f.contains('/'))); + } +} diff --git a/src/llm-coding-tools-core/src/operations/grep.rs b/src/llm-coding-tools-core/src/operations/grep.rs new file mode 100644 index 00000000..052c2466 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/grep.rs @@ -0,0 +1,212 @@ +//! Grep content search operation. + +use crate::error::{ToolError, ToolResult}; +use crate::path::PathResolver; +use globset::Glob; +use grep_regex::RegexMatcher; +use grep_searcher::sinks::UTF8; +use grep_searcher::{BinaryDetection, Searcher, SearcherBuilder}; +use ignore::WalkBuilder; +use serde::Serialize; +use std::path::Path; +use std::time::SystemTime; + +/// A single line match within a file. +#[derive(Debug, Clone, Serialize)] +pub struct GrepLineMatch { + /// 1-indexed line number. + pub line_num: u64, + /// Content of the matched line. + pub line_text: String, +} + +/// All matches within a single file. +#[derive(Debug, Clone, Serialize)] +pub struct GrepFileMatches { + /// File path. + pub path: String, + /// Matches in this file, in line order. + pub matches: Vec, + #[serde(skip)] + pub(crate) mtime: SystemTime, +} + +/// Output from grep search. +#[derive(Debug, Serialize)] +pub struct GrepOutput { + /// Files with matches, sorted by modification time (newest first). + pub files: Vec, + /// Total match count across all files. + pub match_count: usize, + /// Whether results were truncated due to limit. + pub truncated: bool, +} + +/// Searches for content matching a regex pattern. +/// +/// Results are sorted by modification time (newest first). +/// Binary files are automatically skipped. +pub fn grep_search( + resolver: &R, + pattern: &str, + include: Option<&str>, + search_path: &str, + limit: usize, +) -> ToolResult { + let path = resolver.resolve(search_path)?; + + let matcher = + RegexMatcher::new(pattern).map_err(|e| ToolError::InvalidPattern(e.to_string()))?; + + // Optional filename filter via glob. + let glob_matcher = include + .map(|pattern| Glob::new(pattern).map(|glob| glob.compile_matcher())) + .transpose()?; + + let mut searcher = SearcherBuilder::new() + .binary_detection(BinaryDetection::quit(0)) + .build(); + + let mut files: Vec = Vec::with_capacity(64); + + let walker = WalkBuilder::new(&path) + .hidden(false) + .git_ignore(true) + .git_global(true) + .git_exclude(true) + .build(); + + for entry_result in walker { + let entry = match entry_result { + Ok(e) => e, + Err(_) => continue, + }; + + // Skip directories and non-regular files. + match entry.file_type() { + Some(ft) if ft.is_file() => {} + _ => continue, + } + + let entry_path = entry.path(); + + // Apply include glob to basename when requested. + if let Some(ref matcher) = glob_matcher { + let file_name = match entry_path.file_name().and_then(|n| n.to_str()) { + Some(name) => name, + None => continue, + }; + if !matcher.is_match(file_name) { + continue; + } + } + + let matches = collect_file_matches(&matcher, &mut searcher, entry_path); + if matches.is_empty() { + continue; + } + + let mtime = entry + .metadata() + .ok() + .and_then(|m| m.modified().ok()) + .unwrap_or(SystemTime::UNIX_EPOCH); + + files.push(GrepFileMatches { + path: entry_path.to_string_lossy().into_owned(), + matches, + mtime, + }); + } + + // Sort newest files first. + files.sort_by(|a, b| b.mtime.cmp(&a.mtime)); + + let mut match_count = 0; + let mut truncate_at = files.len(); + let mut truncated = false; + + // Enforce overall match limit across files. + for (x, file) in files.iter_mut().enumerate() { + let remaining = limit - match_count; + if file.matches.len() > remaining { + file.matches.truncate(remaining); + match_count += remaining; + truncate_at = x + 1; + truncated = true; + break; + } + match_count += file.matches.len(); + } + + files.truncate(truncate_at); + + Ok(GrepOutput { + files, + match_count, + truncated, + }) +} + +#[inline] +fn collect_file_matches( + matcher: &RegexMatcher, + searcher: &mut Searcher, + path: &Path, +) -> Vec { + let mut matches = Vec::new(); + + let _ = searcher.search_path( + matcher, + path, + UTF8(|line_num, line| { + matches.push(GrepLineMatch { + line_num, + line_text: line.trim_end().to_string(), + }); + Ok(true) + }), + ); + + matches +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::path::AbsolutePathResolver; + use tempfile::tempdir; + + #[test] + fn grep_finds_matches() { + let temp = tempdir().unwrap(); + std::fs::write(temp.path().join("match.txt"), "hello world").unwrap(); + let resolver = AbsolutePathResolver; + + let result = + grep_search(&resolver, "hello", None, temp.path().to_str().unwrap(), 10).unwrap(); + + assert_eq!(result.files.len(), 1); + assert_eq!(result.match_count, 1); + } + + #[test] + fn grep_respects_glob_filter() { + let temp = tempdir().unwrap(); + std::fs::write(temp.path().join("match.rs"), "hello").unwrap(); + std::fs::write(temp.path().join("match.txt"), "hello").unwrap(); + let resolver = AbsolutePathResolver; + + let result = grep_search( + &resolver, + "hello", + Some("*.rs"), + temp.path().to_str().unwrap(), + 10, + ) + .unwrap(); + + assert_eq!(result.files.len(), 1); + assert!(result.files[0].path.ends_with(".rs")); + } +} diff --git a/src/llm-coding-tools-core/src/operations/mod.rs b/src/llm-coding-tools-core/src/operations/mod.rs new file mode 100644 index 00000000..ace8e5d7 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/mod.rs @@ -0,0 +1,37 @@ +//! Core operations for file systems and utilities. +//! +//! This module contains framework-agnostic implementations of: +//! - File operations (read, write, edit, glob, grep, bash, todo) - always available +//! - Web fetching (fetch_url) - requires `async` or `blocking` feature +//! - Task delegation (task execution and mocking) - requires `async` feature + +// Always available (sync or async based on runtime feature) +pub mod bash; +pub mod edit; +pub mod glob; +pub mod grep; +pub mod read; +pub mod todo; +pub mod write; + +pub use bash::{execute_command, BashOutput}; +pub use edit::{edit_file, EditError}; +pub use glob::{glob_files, GlobOutput}; +pub use grep::{grep_search, GrepFileMatches, GrepLineMatch, GrepOutput}; +pub use read::read_file; +pub use todo::{read_todos, write_todos, Todo, TodoPriority, TodoState, TodoStatus}; +pub use write::write_file; + +// Webfetch available in both async and blocking modes +#[cfg(any(feature = "async", feature = "blocking"))] +pub mod webfetch; + +#[cfg(any(feature = "async", feature = "blocking"))] +pub use webfetch::{fetch_url, format_json, html_to_markdown, WebFetchOutput}; + +// Task module requires async (trait uses async_trait) +#[cfg(feature = "async")] +pub mod task; + +#[cfg(feature = "async")] +pub use task::{MockTaskExecutor, TaskArgs, TaskExecutor, TaskResult}; diff --git a/src/llm-coding-tools-core/src/operations/read.rs b/src/llm-coding-tools-core/src/operations/read.rs new file mode 100644 index 00000000..14b95eae --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/read.rs @@ -0,0 +1,201 @@ +//! File reading operation. + +use crate::error::{ToolError, ToolResult}; +use crate::fs; +use crate::output::ToolOutput; +use crate::path::PathResolver; +use crate::util::{truncate_line, ESTIMATED_CHARS_PER_LINE}; +use memchr::memchr; +use std::borrow::Cow; +use std::fmt::Write; + +const MAX_LINE_LENGTH: usize = 2000; + +/// Strips trailing CR from a line (for CRLF handling). +#[inline] +fn strip_cr(line: &[u8]) -> &[u8] { + line.strip_suffix(b"\r").unwrap_or(line) +} + +/// Processes a single line, appending it to output with optional line numbers. +#[inline] +fn process_line( + line_bytes: &[u8], + line_number: usize, + output: &mut String, + lines_output: &mut usize, +) { + let line_bytes = strip_cr(line_bytes); + let content: Cow<'_, str> = String::from_utf8_lossy(line_bytes); + let (truncated_content, _) = truncate_line(&content, MAX_LINE_LENGTH); + + if *lines_output > 0 { + output.push('\n'); + } + + if LINE_NUMBERS { + let _ = write!(output, "L{}: {}", line_number, truncated_content); + } else { + output.push_str(truncated_content); + } + + *lines_output += 1; +} + +/// Reads a file and returns formatted content, optionally with line numbers. +/// +/// When `LINE_NUMBERS` is `true`, each line is prefixed with `L{number}: `. +/// When `false`, raw content is returned without prefixes. +#[maybe_async::maybe_async] +pub async fn read_file( + resolver: &R, + file_path: &str, + offset: usize, + limit: usize, +) -> ToolResult { + // Conditional trait import for consume() method + #[cfg(feature = "blocking")] + use std::io::BufRead as _; + #[cfg(not(feature = "blocking"))] + use tokio::io::AsyncBufReadExt as _; + + if offset == 0 { + return Err(ToolError::OutOfBounds( + "offset must be >= 1 (1-indexed)".into(), + )); + } + if limit == 0 { + return Err(ToolError::OutOfBounds("limit must be >= 1".into())); + } + + let path = resolver.resolve(file_path)?; + let buf_capacity = (limit * ESTIMATED_CHARS_PER_LINE).next_power_of_two(); + let mut reader = fs::open_buffered(&path, buf_capacity).await?; + + let estimated_capacity = limit * ESTIMATED_CHARS_PER_LINE; + let mut output = String::with_capacity(estimated_capacity); + // Holds a partial line that spans multiple buffers. + let mut overflow: Vec = Vec::new(); + let mut line_number = 0usize; + let mut lines_output = 0usize; + + // Stream buffered chunks, splitting into lines as we go. + loop { + let buf = reader.fill_buf().await?; + // Flush any trailing partial line at EOF. + if buf.is_empty() { + if !overflow.is_empty() { + line_number += 1; + if line_number >= offset && lines_output < limit { + process_line::( + &overflow, + line_number, + &mut output, + &mut lines_output, + ); + } + } + break; + } + + let mut pos = 0; + while pos < buf.len() { + // Fast newline search to delimit lines. + if let Some(newline_offset) = memchr(b'\n', &buf[pos..]) { + let newline_pos = pos + newline_offset; + line_number += 1; + + // Only emit lines within the requested window. + if line_number >= offset && lines_output < limit { + if overflow.is_empty() { + // Fast path: line is fully in this buffer. + process_line::( + &buf[pos..newline_pos], + line_number, + &mut output, + &mut lines_output, + ); + } else { + // Slow path: prepend buffered fragment. + overflow.extend_from_slice(&buf[pos..newline_pos]); + process_line::( + &overflow, + line_number, + &mut output, + &mut lines_output, + ); + overflow.clear(); + } + } else if !overflow.is_empty() { + overflow.clear(); + } + + pos = newline_pos + 1; + + if lines_output >= limit { + break; + } + } else { + overflow.extend_from_slice(&buf[pos..]); + pos = buf.len(); + } + } + + reader.consume(pos); + + if lines_output >= limit { + break; + } + } + + if line_number < offset { + return Err(ToolError::OutOfBounds(format!( + "offset {} exceeds file length of {} lines", + offset, line_number + ))); + } + + Ok(ToolOutput::new(output)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::path::AbsolutePathResolver; + use std::io::Write as _; + use tempfile::NamedTempFile; + + #[maybe_async::maybe_async] + async fn read_temp_file( + content: &[u8], + offset: usize, + limit: usize, + ) -> ToolResult { + let mut temp = NamedTempFile::new().unwrap(); + temp.write_all(content).unwrap(); + let resolver = AbsolutePathResolver; + read_file::<_, LINE_NUMBERS>(&resolver, temp.path().to_str().unwrap(), offset, limit).await + } + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn reads_basic_file_with_line_numbers() { + let result = read_temp_file::(b"hello\nworld\n", 1, 2000) + .await + .unwrap(); + assert_eq!(result.content, "L1: hello\nL2: world"); + } + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn reads_basic_file_without_line_numbers() { + let result = read_temp_file::(b"hello\nworld\n", 1, 2000) + .await + .unwrap(); + assert_eq!(result.content, "hello\nworld"); + } + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn errors_on_offset_zero() { + let err = read_temp_file::(b"test\n", 0, 10).await.unwrap_err(); + assert!(matches!(err, ToolError::OutOfBounds(_))); + } +} diff --git a/src/llm-coding-tools-core/src/operations/task.rs b/src/llm-coding-tools-core/src/operations/task.rs new file mode 100644 index 00000000..9ddebacc --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/task.rs @@ -0,0 +1,198 @@ +//! Task execution types and mock executor. + +use crate::error::ToolResult; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::RwLock; + +/// Input arguments for task execution. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct TaskArgs { + /// Short 3-5 word task description. + pub description: String, + /// Detailed instructions for the sub-agent. + pub prompt: String, + /// Type of agent to use (e.g., "general", "coder"). + pub subagent_type: String, + /// Existing session to continue. + #[serde(default)] + pub session_id: Option, +} + +/// Result from task execution. +#[derive(Debug, Clone, Serialize)] +pub struct TaskResult { + /// The task description. + pub description: String, + /// The agent type used. + pub subagent_type: String, + /// Session ID (new or continued). + pub session_id: String, + /// Result message from the agent. + pub result: String, +} + +impl TaskResult { + /// Formats the result for display. + pub fn format(&self) -> String { + format!( + "Task: {}\nAgent: {}\nSession: {}\nStatus: completed\n\nResult: {}", + self.description, self.subagent_type, self.session_id, self.result + ) + } +} + +/// Trait for executing tasks. +/// +/// Implement this to provide custom execution logic (e.g., real LLM agent). +#[async_trait] +pub trait TaskExecutor: Send + Sync { + /// Execute a task with the given arguments. + async fn execute(&self, args: &TaskArgs) -> ToolResult; +} + +/// Mock task executor for testing. +/// +/// Returns predefined responses without LLM calls. +#[derive(Debug, Default)] +pub struct MockTaskExecutor { + responses: RwLock>, + session_counter: AtomicU64, +} + +impl MockTaskExecutor { + /// Creates a new mock executor. + pub fn new() -> Self { + Self::default() + } + + /// Sets a custom response for a specific description. + pub fn set_response(&self, description: impl Into, response: impl Into) { + self.responses + .write() + .expect("lock poisoned") + .insert(description.into(), response.into()); + } + + fn next_session_id(&self) -> String { + let id = self.session_counter.fetch_add(1, Ordering::Relaxed); + format!("mock-session-{id}") + } +} + +#[async_trait] +impl TaskExecutor for MockTaskExecutor { + async fn execute(&self, args: &TaskArgs) -> ToolResult { + let session_id = args + .session_id + .clone() + .unwrap_or_else(|| self.next_session_id()); + + let result = self + .responses + .read() + .expect("lock poisoned") + .get(&args.description) + .cloned() + .unwrap_or_else(|| { + format!( + "Task '{}' completed successfully by {} agent.", + args.description, args.subagent_type + ) + }); + + Ok(TaskResult { + description: args.description.clone(), + subagent_type: args.subagent_type.clone(), + session_id, + result, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn mock_executor_returns_default_response() { + let executor = MockTaskExecutor::new(); + let args = TaskArgs { + description: "test task".into(), + prompt: "do something".into(), + subagent_type: "general".into(), + session_id: None, + }; + + let result = executor.execute(&args).await.unwrap(); + + assert_eq!(result.description, "test task"); + assert!(result.session_id.starts_with("mock-session-")); + assert!(result.result.contains("test task")); + } + + #[tokio::test] + async fn mock_executor_uses_custom_response() { + let executor = MockTaskExecutor::new(); + executor.set_response("custom task", "Custom result!"); + + let args = TaskArgs { + description: "custom task".into(), + prompt: "details".into(), + subagent_type: "coder".into(), + session_id: None, + }; + + let result = executor.execute(&args).await.unwrap(); + assert_eq!(result.result, "Custom result!"); + } + + #[tokio::test] + async fn mock_executor_continues_session() { + let executor = MockTaskExecutor::new(); + let args = TaskArgs { + description: "task".into(), + prompt: "prompt".into(), + subagent_type: "general".into(), + session_id: Some("existing-session".into()), + }; + + let result = executor.execute(&args).await.unwrap(); + assert_eq!(result.session_id, "existing-session"); + } + + #[tokio::test] + async fn session_ids_increment() { + let executor = MockTaskExecutor::new(); + let args = TaskArgs { + description: "task".into(), + prompt: "prompt".into(), + subagent_type: "general".into(), + session_id: None, + }; + + let r1 = executor.execute(&args).await.unwrap(); + let r2 = executor.execute(&args).await.unwrap(); + + assert_eq!(r1.session_id, "mock-session-0"); + assert_eq!(r2.session_id, "mock-session-1"); + } + + #[test] + fn task_result_formats_correctly() { + let result = TaskResult { + description: "my task".into(), + subagent_type: "coder".into(), + session_id: "sess-1".into(), + result: "Done!".into(), + }; + + let formatted = result.format(); + assert!(formatted.contains("Task: my task")); + assert!(formatted.contains("Agent: coder")); + assert!(formatted.contains("Session: sess-1")); + assert!(formatted.contains("Result: Done!")); + } +} diff --git a/src/llm-coding-tools-core/src/operations/todo.rs b/src/llm-coding-tools-core/src/operations/todo.rs new file mode 100644 index 00000000..c53012ea --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/todo.rs @@ -0,0 +1,212 @@ +//! Todo list management operation. +//! +//! This module is only available with the `async` feature. + +use crate::error::{ToolError, ToolResult}; +use parking_lot::RwLock; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::fmt::Write; +use std::sync::Arc; + +/// Task status. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum TodoStatus { + /// Not yet started. + Pending, + /// Currently being worked on. + InProgress, + /// Successfully finished. + Completed, + /// Abandoned or no longer relevant. + Cancelled, +} + +impl TodoStatus { + /// Returns the status indicator icon. + #[inline] + pub const fn icon(self) -> &'static str { + match self { + Self::Pending => "[ ]", + Self::InProgress => "[>]", + Self::Completed => "[x]", + Self::Cancelled => "[-]", + } + } +} + +/// Task priority level. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum TodoPriority { + /// Urgent, should be addressed first. + High, + /// Normal priority. + Medium, + /// Can be deferred. + Low, +} + +/// A single task item. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct Todo { + /// Unique identifier for the task. + pub id: String, + /// Task description. + pub content: String, + /// Current status. + pub status: TodoStatus, + /// Priority level. + pub priority: TodoPriority, +} + +/// Thread-safe shared state for todo list. +#[derive(Debug, Clone, Default)] +pub struct TodoState { + todos: Arc>>, +} + +impl TodoState { + /// Creates a new empty todo state. + #[inline] + pub fn new() -> Self { + Self::default() + } +} + +/// Writes/replaces the todo list with new items. +/// +/// Validates that all todos have non-empty id and content. +pub fn write_todos(state: &TodoState, todos: Vec) -> ToolResult { + for todo in &todos { + if todo.id.trim().is_empty() { + return Err(ToolError::Validation("todo id cannot be empty".into())); + } + if todo.content.trim().is_empty() { + return Err(ToolError::Validation("todo content cannot be empty".into())); + } + } + + let count = todos.len(); + *state.todos.write() = todos; + Ok(format!("Updated todo list with {count} task(s).")) +} + +/// Reads and formats the current todo list. +pub fn read_todos(state: &TodoState) -> String { + let todos = state.todos.read(); + + if todos.is_empty() { + return "No tasks.".to_string(); + } + + let mut output = format!("Tasks ({} total):\n", todos.len()); + for todo in todos.iter() { + let _ = writeln!( + output, + "{} ({:?}) {}: {}", + todo.status.icon(), + todo.priority, + todo.id, + todo.content + ); + } + + output.truncate(output.trim_end().len()); + output +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_todo(id: &str, status: TodoStatus) -> Todo { + Todo { + id: id.to_string(), + content: format!("Task {id}"), + status, + priority: TodoPriority::Medium, + } + } + + #[test] + fn write_and_read_todos() { + let state = TodoState::new(); + + let todos = vec![ + make_todo("1", TodoStatus::Completed), + make_todo("2", TodoStatus::InProgress), + make_todo("3", TodoStatus::Pending), + ]; + + let result = write_todos(&state, todos).unwrap(); + assert!(result.contains("3 task(s)")); + + let output = read_todos(&state); + assert!(output.contains("[x]")); + assert!(output.contains("[>]")); + assert!(output.contains("[ ]")); + } + + #[test] + fn read_empty_list() { + let state = TodoState::new(); + let output = read_todos(&state); + assert_eq!(output, "No tasks."); + } + + #[test] + fn write_replaces_existing() { + let state = TodoState::new(); + + write_todos(&state, vec![make_todo("a", TodoStatus::Pending)]).unwrap(); + write_todos(&state, vec![make_todo("b", TodoStatus::Completed)]).unwrap(); + + let output = read_todos(&state); + assert!(!output.contains("Task a")); + assert!(output.contains("Task b")); + } + + #[test] + fn write_validates_empty_id() { + let state = TodoState::new(); + let todo = Todo { + id: "".to_string(), + content: "Task".to_string(), + status: TodoStatus::Pending, + priority: TodoPriority::Low, + }; + let result = write_todos(&state, vec![todo]); + assert!(matches!(result, Err(ToolError::Validation(_)))); + } + + #[test] + fn write_validates_empty_content() { + let state = TodoState::new(); + let todo = Todo { + id: "1".to_string(), + content: " ".to_string(), + status: TodoStatus::Pending, + priority: TodoPriority::Low, + }; + let result = write_todos(&state, vec![todo]); + assert!(matches!(result, Err(ToolError::Validation(_)))); + } + + #[test] + fn status_icons_are_correct() { + assert_eq!(TodoStatus::Pending.icon(), "[ ]"); + assert_eq!(TodoStatus::InProgress.icon(), "[>]"); + assert_eq!(TodoStatus::Completed.icon(), "[x]"); + assert_eq!(TodoStatus::Cancelled.icon(), "[-]"); + } + + #[test] + fn status_serde_roundtrip() { + let json = serde_json::to_string(&TodoStatus::InProgress).unwrap(); + assert_eq!(json, "\"in_progress\""); + let parsed: TodoStatus = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed, TodoStatus::InProgress); + } +} diff --git a/src/llm-coding-tools-core/src/operations/webfetch/async_impl.rs b/src/llm-coding-tools-core/src/operations/webfetch/async_impl.rs new file mode 100644 index 00000000..e2b3d76f --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/webfetch/async_impl.rs @@ -0,0 +1,173 @@ +//! Async web content fetching. + +use super::{categorize_reqwest_error, check_size, process_content, WebFetchOutput}; +use crate::error::{ToolError, ToolResult}; +use std::time::Duration; + +/// Fetches content from a URL and returns processed content. +/// +/// - HTML is converted to markdown +/// - JSON is pretty-printed +/// - Other content types returned as-is +pub async fn fetch_url( + client: &reqwest::Client, + url: &str, + timeout: Duration, +) -> ToolResult { + let mut response = client + .get(url) + .timeout(timeout) + .send() + .await + .map_err(|e| categorize_reqwest_error(e, url))?; + + let status = response.status(); + if !status.is_success() { + return Err(ToolError::Http(format!("HTTP {} for {}", status, url))); + } + + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or("text/plain") + .to_string(); + + // Check Content-Length header if available for early rejection and preallocation + let content_length = response.content_length().map(|len| len as usize); + if let Some(len) = content_length { + check_size(len, url)?; + } + + // Stream response body with incremental size checks to avoid memory exhaustion + let mut bytes = content_length.map_or_else(Vec::new, Vec::with_capacity); + let mut total_len: usize = 0; + + while let Some(chunk) = response + .chunk() + .await + .map_err(|e| ToolError::Http(e.to_string()))? + { + total_len += chunk.len(); + check_size(total_len, url)?; + bytes.extend_from_slice(&chunk); + } + + let byte_length = total_len; + let raw_content = String::from_utf8_lossy(&bytes); + let content = process_content(&raw_content, &content_type); + + Ok(WebFetchOutput { + content, + content_type, + byte_length, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + fn test_client() -> reqwest::Client { + reqwest::Client::builder() + .build() + .expect("client build failed") + } + + #[tokio::test] + async fn fetches_plain_text() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/text")) + .respond_with( + ResponseTemplate::new(200) + .set_body_bytes("Hello, world!") + .insert_header("content-type", "text/plain"), + ) + .mount(&server) + .await; + + let client = test_client(); + let result = fetch_url( + &client, + &format!("{}/text", server.uri()), + Duration::from_secs(5), + ) + .await + .unwrap(); + + assert!(result.content.contains("Hello, world!")); + assert!(result.content_type.contains("text/plain")); + } + + #[tokio::test] + async fn converts_html_to_markdown() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/html")) + .respond_with( + ResponseTemplate::new(200) + .set_body_bytes("

Hello

World

") + .insert_header("content-type", "text/html"), + ) + .mount(&server) + .await; + + let client = test_client(); + let result = fetch_url( + &client, + &format!("{}/html", server.uri()), + Duration::from_secs(5), + ) + .await + .unwrap(); + + assert!(result.content.contains("Hello")); + assert!(!result.content.contains("

")); + } + + #[tokio::test] + async fn formats_json() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/json")) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({"key":"value"})), + ) + .mount(&server) + .await; + + let client = test_client(); + let result = fetch_url( + &client, + &format!("{}/json", server.uri()), + Duration::from_secs(5), + ) + .await + .unwrap(); + + assert!(result.content.contains("\"key\"")); + } + + #[tokio::test] + async fn handles_404() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/notfound")) + .respond_with(ResponseTemplate::new(404)) + .mount(&server) + .await; + + let client = test_client(); + let result = fetch_url( + &client, + &format!("{}/notfound", server.uri()), + Duration::from_secs(5), + ) + .await; + + assert!(matches!(result, Err(ToolError::Http(_)))); + } +} diff --git a/src/llm-coding-tools-core/src/operations/webfetch/blocking_impl.rs b/src/llm-coding-tools-core/src/operations/webfetch/blocking_impl.rs new file mode 100644 index 00000000..e2662963 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/webfetch/blocking_impl.rs @@ -0,0 +1,119 @@ +//! Blocking web content fetching. + +use super::{categorize_reqwest_error, check_size, process_content, WebFetchOutput}; +use crate::error::{ToolError, ToolResult}; +use std::io::Read; +use std::mem::MaybeUninit; +use std::time::Duration; + +/// Fetches content from a URL and returns processed content. +/// +/// - HTML is converted to markdown +/// - JSON is pretty-printed +/// - Other content types returned as-is +pub fn fetch_url( + client: &reqwest::blocking::Client, + url: &str, + timeout: Duration, +) -> ToolResult { + let mut response = client + .get(url) + .timeout(timeout) + .send() + .map_err(|e| categorize_reqwest_error(e, url))?; + + let status = response.status(); + if !status.is_success() { + return Err(ToolError::Http(format!("HTTP {} for {}", status, url))); + } + + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or("text/plain") + .to_string(); + + // Check Content-Length header if available for early rejection and preallocation + let content_length = response.content_length().map(|len| len as usize); + if let Some(len) = content_length { + check_size(len, url)?; + } + + // Stream response body with incremental size checks to avoid memory exhaustion + let mut bytes = content_length.map_or_else(Vec::new, Vec::with_capacity); + let mut total_len: usize = 0; + let mut buffer = [MaybeUninit::::uninit(); 8192]; + let buffer_ptr = buffer.as_mut_ptr() as *mut u8; + let buffer_len = buffer.len(); + + loop { + let n = { + let buf = unsafe { std::slice::from_raw_parts_mut(buffer_ptr, buffer_len) }; + response + .read(buf) + .map_err(|e| ToolError::Http(e.to_string()))? + }; + if n == 0 { + break; + } + total_len += n; + check_size(total_len, url)?; + let initialized = unsafe { std::slice::from_raw_parts(buffer_ptr, n) }; + bytes.extend_from_slice(initialized); + } + + let byte_length = total_len; + let raw_content = String::from_utf8_lossy(&bytes); + let content = process_content(&raw_content, &content_type); + + Ok(WebFetchOutput { + content, + content_type, + byte_length, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_client() -> reqwest::blocking::Client { + reqwest::blocking::Client::builder() + .build() + .expect("client build failed") + } + + #[test] + fn fetches_plain_text() { + // Use httpbin.org for blocking tests since wiremock is async-only + let client = test_client(); + let result = fetch_url( + &client, + "https://httpbin.org/robots.txt", + Duration::from_secs(10), + ); + + // This test requires network access, so we just check it doesn't panic + // In CI, this might fail due to network restrictions + if let Ok(output) = result { + assert!(!output.content.is_empty()); + assert!(!output.content_type.is_empty()); + } + } + + #[test] + fn handles_404() { + let client = test_client(); + let result = fetch_url( + &client, + "https://httpbin.org/status/404", + Duration::from_secs(10), + ); + + // In case of network issues, just verify we get some result + if let Err(e) = result { + assert!(matches!(e, ToolError::Http(_))); + } + } +} diff --git a/src/llm-coding-tools-core/src/operations/webfetch/mod.rs b/src/llm-coding-tools-core/src/operations/webfetch/mod.rs new file mode 100644 index 00000000..ccbd8873 --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/webfetch/mod.rs @@ -0,0 +1,129 @@ +//! Web content fetching operation. + +use crate::error::{ToolError, ToolResult}; +use html_to_markdown_rs::{convert, ConversionOptions, PreprocessingOptions, PreprocessingPreset}; + +/// Maximum response size to accept (5MB). +pub(crate) const MAX_RESPONSE_SIZE: usize = 5 * 1_024 * 1_024; + +/// Result from URL fetch operation. +#[derive(Debug, Clone)] +pub struct WebFetchOutput { + /// The processed content (HTML converted to markdown, JSON prettified). + pub content: String, + /// The Content-Type header value. + pub content_type: String, + /// Original byte length before processing. + pub byte_length: usize, +} + +/// Processes raw response content based on content type. +pub(crate) fn process_content(raw_content: &str, content_type: &str) -> String { + if content_type.contains("text/html") { + html_to_markdown(raw_content) + } else if content_type.contains("application/json") { + format_json(raw_content) + } else { + raw_content.to_owned() + } +} + +/// Categorizes reqwest errors into appropriate [`ToolError`] variants. +pub(crate) fn categorize_reqwest_error(e: reqwest::Error, url: &str) -> ToolError { + if e.is_timeout() { + ToolError::Timeout(format!("Request timed out for {}", url)) + } else if e.is_connect() { + ToolError::Http(format!("Connection failed for {}: {}", url, e)) + } else if e.is_redirect() { + ToolError::Http(format!("Too many redirects for {}", url)) + } else { + ToolError::Http(e.to_string()) + } +} + +/// Returns an error if the response size exceeds the maximum. +#[inline] +pub(crate) fn check_size(len: usize, url: &str) -> ToolResult<()> { + if len > MAX_RESPONSE_SIZE { + return Err(ToolError::Http(format!( + "Response too large: {} bytes (max {}) for {}", + len, MAX_RESPONSE_SIZE, url + ))); + } + Ok(()) +} + +/// Converts HTML to markdown for LLM-friendly output. +pub fn html_to_markdown(html: &str) -> String { + let options = ConversionOptions { + preprocessing: PreprocessingOptions { + enabled: true, + preset: PreprocessingPreset::Aggressive, + remove_navigation: true, + remove_forms: true, + }, + strip_tags: vec![ + "img".into(), + "svg".into(), + "script".into(), + "style".into(), + "noscript".into(), + ], + ..Default::default() + }; + + convert(html, Some(options)).unwrap_or_else(|_| html.to_string()) +} + +/// Formats JSON content for readability. +pub fn format_json(json_str: &str) -> String { + match serde_json::from_str::(json_str) { + Ok(value) => serde_json::to_string_pretty(&value).unwrap_or_else(|_| json_str.to_string()), + Err(_) => json_str.to_string(), + } +} + +#[cfg(not(feature = "blocking"))] +mod async_impl; +#[cfg(not(feature = "blocking"))] +pub use async_impl::fetch_url; + +#[cfg(feature = "blocking")] +mod blocking_impl; +#[cfg(feature = "blocking")] +pub use blocking_impl::fetch_url; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn html_to_markdown_strips_scripts() { + let html = "

Before

After

"; + let result = html_to_markdown(html); + assert!(!result.contains("alert")); + } + + #[test] + fn format_json_prettifies() { + let json = r#"{"a":1}"#; + let result = format_json(json); + assert!(result.contains("\"a\": 1")); + } + + #[test] + fn format_json_returns_original_on_invalid() { + let invalid = "not json"; + assert_eq!(format_json(invalid), "not json"); + } + + #[test] + fn check_size_ok_for_small_content() { + assert!(check_size(1000, "http://example.com").is_ok()); + } + + #[test] + fn check_size_fails_for_large_content() { + assert!(check_size(MAX_RESPONSE_SIZE + 1, "http://example.com").is_err()); + } +} diff --git a/src/llm-coding-tools-core/src/operations/write.rs b/src/llm-coding-tools-core/src/operations/write.rs new file mode 100644 index 00000000..6b258bda --- /dev/null +++ b/src/llm-coding-tools-core/src/operations/write.rs @@ -0,0 +1,67 @@ +//! File writing operation. + +use crate::error::ToolResult; +use crate::fs; +use crate::path::PathResolver; + +/// Writes content to a file, creating parent directories if needed. +/// +/// Overwrites existing files. Returns a success message with byte count. +#[maybe_async::maybe_async] +pub async fn write_file( + resolver: &R, + file_path: &str, + content: &str, +) -> ToolResult { + let path = resolver.resolve(file_path)?; + + // Create parent directories if they don't exist + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() { + fs::create_dir_all(parent).await?; + } + } + + let bytes = content.as_bytes(); + fs::write(&path, bytes).await?; + + Ok(format!( + "Successfully wrote {} bytes to {}", + bytes.len(), + path.display() + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::path::AbsolutePathResolver; + use tempfile::TempDir; + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn write_creates_new_file() { + let temp = TempDir::new().unwrap(); + let file_path = temp.path().join("new_file.txt"); + let resolver = AbsolutePathResolver; + + let result = write_file(&resolver, file_path.to_str().unwrap(), "hello world") + .await + .unwrap(); + + assert!(result.contains("11 bytes")); + assert_eq!(std::fs::read_to_string(&file_path).unwrap(), "hello world"); + } + + #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))] + async fn write_creates_parent_directories() { + let temp = TempDir::new().unwrap(); + let file_path = temp.path().join("a/b/c/deep.txt"); + let resolver = AbsolutePathResolver; + + write_file(&resolver, file_path.to_str().unwrap(), "nested") + .await + .unwrap(); + + assert!(file_path.exists()); + } +} diff --git a/src/llm-coding-tools-core/src/output.rs b/src/llm-coding-tools-core/src/output.rs new file mode 100644 index 00000000..a1c3c067 --- /dev/null +++ b/src/llm-coding-tools-core/src/output.rs @@ -0,0 +1,83 @@ +//! Common output types for tool responses. + +use serde::Serialize; + +/// Wrapper for tool output with truncation metadata. +#[derive(Debug, Clone, Serialize)] +pub struct ToolOutput { + /// The main content returned by the tool. + pub content: String, + /// Whether the output was truncated due to size limits. + #[serde(skip_serializing_if = "std::ops::Not::not")] + pub truncated: bool, +} + +impl ToolOutput { + /// Creates a new output with the given content. + #[inline] + pub fn new(content: impl Into) -> Self { + Self { + content: content.into(), + truncated: false, + } + } + + /// Creates a truncated output. + #[inline] + pub fn truncated(content: impl Into) -> Self { + Self { + content: content.into(), + truncated: true, + } + } +} + +impl From for ToolOutput { + fn from(content: String) -> Self { + Self::new(content) + } +} + +impl From<&str> for ToolOutput { + fn from(content: &str) -> Self { + Self::new(content) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tool_output_new_creates_non_truncated() { + let output = ToolOutput::new("content"); + assert_eq!(output.content, "content"); + assert!(!output.truncated); + } + + #[test] + fn tool_output_truncated_marks_truncated() { + let output = ToolOutput::truncated("partial"); + assert!(output.truncated); + } + + #[test] + fn tool_output_from_string() { + let output: ToolOutput = "hello".into(); + assert_eq!(output.content, "hello"); + } + + #[test] + fn tool_output_serializes_without_truncated_when_false() { + let output = ToolOutput::new("content"); + let json = serde_json::to_string(&output).unwrap(); + assert!(!json.contains("truncated")); + } + + #[test] + fn tool_output_serializes_with_truncated_when_true() { + let output = ToolOutput::truncated("content"); + let json = serde_json::to_string(&output).unwrap(); + assert!(json.contains("truncated")); + } +} diff --git a/src/llm-coding-tools-core/src/path/absolute.rs b/src/llm-coding-tools-core/src/path/absolute.rs new file mode 100644 index 00000000..d957598f --- /dev/null +++ b/src/llm-coding-tools-core/src/path/absolute.rs @@ -0,0 +1,80 @@ +//! Absolute path resolver implementation. + +use super::PathResolver; +use crate::error::{ToolError, ToolResult}; +use std::path::PathBuf; + +/// Path resolver that requires absolute paths. +/// +/// This is the simplest resolver - it validates that paths are absolute +/// and returns them as-is. No directory restrictions are applied. +/// +/// # Example +/// +/// ``` +/// use llm_coding_tools_core::path::{PathResolver, AbsolutePathResolver}; +/// +/// let resolver = AbsolutePathResolver; +/// #[cfg(windows)] +/// assert!(resolver.resolve("C:\\Users\\user\\file.txt").is_ok()); +/// #[cfg(not(windows))] +/// assert!(resolver.resolve("/home/user/file.txt").is_ok()); +/// assert!(resolver.resolve("relative/path.txt").is_err()); +/// ``` +#[derive(Debug, Clone, Copy, Default)] +pub struct AbsolutePathResolver; + +impl PathResolver for AbsolutePathResolver { + fn resolve(&self, path: &str) -> ToolResult { + let path = PathBuf::from(path); + if !path.is_absolute() { + return Err(ToolError::InvalidPath(format!( + "path must be absolute: {}", + path.display() + ))); + } + Ok(path) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn accepts_absolute_path() { + let resolver = AbsolutePathResolver; + #[cfg(windows)] + let path = "C:\\Users\\user\\file.txt"; + #[cfg(not(windows))] + let path = "/home/user/file.txt"; + + let result = resolver.resolve(path); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), PathBuf::from(path)); + } + + #[test] + fn rejects_relative_path() { + let resolver = AbsolutePathResolver; + let result = resolver.resolve("relative/path.txt"); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(matches!(err, ToolError::InvalidPath(_))); + assert!(err.to_string().contains("must be absolute")); + } + + #[test] + fn rejects_dot_relative_path() { + let resolver = AbsolutePathResolver; + assert!(resolver.resolve("./file.txt").is_err()); + assert!(resolver.resolve("../file.txt").is_err()); + } + + #[cfg(windows)] + #[test] + fn accepts_windows_absolute_path() { + let resolver = AbsolutePathResolver; + assert!(resolver.resolve("C:\\Users\\file.txt").is_ok()); + } +} diff --git a/src/llm-coding-tools-core/src/path/allowed.rs b/src/llm-coding-tools-core/src/path/allowed.rs new file mode 100644 index 00000000..8b81898e --- /dev/null +++ b/src/llm-coding-tools-core/src/path/allowed.rs @@ -0,0 +1,221 @@ +//! Allowed directory path resolver implementation. + +use super::PathResolver; +use crate::error::{ToolError, ToolResult}; +use std::path::PathBuf; + +/// Path resolver that restricts access to allowed directories. +/// +/// Paths are resolved relative to configured base directories. +/// Prevents path traversal attacks by validating resolved paths +/// stay within allowed boundaries. +/// +/// # Security +/// +/// This resolver protects against path traversal by: +/// 1. Canonicalizing the resolved path to eliminate `..` and symlinks +/// 2. Verifying the result starts with an allowed base directory +/// +/// ## Bash Tool Bypasses Path Restrictions +/// +/// **When the bash/shell tool is enabled, this resolver's protections are effectively +/// advisory.** The bash tool permits arbitrary shell commands, meaning an LLM can +/// directly read, write, or delete any file the process has OS-level permissions for +/// (e.g., `cat /etc/passwd`, `rm -rf /`, `curl ... | sh`). +/// +/// This resolver only restricts the structured file operations (`read`, `write`, `edit`, +/// `glob`, `grep`). If your threat model requires actual filesystem sandboxing, you must +/// either: +/// +/// - Disable the bash tool entirely, or +/// - Run the process in an OS-level sandbox (containers, seccomp, landlock, etc.) +#[derive(Debug, Clone)] +pub struct AllowedPathResolver { + /// Canonicalized allowed base directories. + allowed_paths: Vec, +} + +impl AllowedPathResolver { + /// Creates a new resolver with the given allowed directories. + /// + /// Each directory is canonicalized during construction to ensure + /// consistent path comparison. Returns an error if any directory + /// doesn't exist or can't be canonicalized. + pub fn new(allowed_paths: Vec) -> ToolResult { + let canonicalized: Result, _> = allowed_paths + .into_iter() + .map(|p| { + p.canonicalize().map_err(|e| { + ToolError::InvalidPath(format!( + "failed to canonicalize allowed path '{}': {}", + p.display(), + e + )) + }) + }) + .collect(); + + Ok(Self { + allowed_paths: canonicalized?, + }) + } + + /// Creates a resolver from already-canonicalized paths. + /// + /// Use this when paths are known to be valid and canonicalized, + /// skipping the filesystem check. + /// + /// # Safety + /// + /// Caller must ensure paths are actually canonical. Using non-canonical + /// paths may allow path traversal attacks. + pub fn from_canonical(allowed_paths: Vec) -> Self { + Self { allowed_paths } + } + + /// Returns the allowed base directories. + pub fn allowed_paths(&self) -> &[PathBuf] { + &self.allowed_paths + } +} + +impl PathResolver for AllowedPathResolver { + fn resolve(&self, path: &str) -> ToolResult { + let input_path = PathBuf::from(path); + + // Try each allowed base directory in order + for base in &self.allowed_paths { + let candidate = base.join(&input_path); + + // Try to canonicalize for existing paths + if let Ok(canonical) = candidate.canonicalize() { + // Security check: resolved path must stay within allowed base + if canonical.starts_with(base) { + return Ok(canonical); + } + // Path escaped allowed directory - try next base + continue; + } + + // For non-existent paths (write operations), validate parent + if let Some(parent) = candidate.parent() { + if let Ok(canonical_parent) = parent.canonicalize() { + if canonical_parent.starts_with(base) { + // Parent is valid, construct the final path + let file_name = candidate.file_name().ok_or_else(|| { + ToolError::InvalidPath("path has no file name".into()) + })?; + return Ok(canonical_parent.join(file_name)); + } + } + } + } + + Err(ToolError::InvalidPath(format!( + "path '{}' is not within allowed directories", + path + ))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + fn setup_test_dir() -> TempDir { + let dir = TempDir::new().unwrap(); + fs::create_dir_all(dir.path().join("subdir")).unwrap(); + fs::write(dir.path().join("file.txt"), "content").unwrap(); + fs::write(dir.path().join("subdir/nested.txt"), "nested").unwrap(); + dir + } + + #[test] + fn resolves_relative_path_in_allowed_dir() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("file.txt"); + assert!(result.is_ok()); + assert!(result.unwrap().ends_with("file.txt")); + } + + #[test] + fn resolves_nested_path() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("subdir/nested.txt"); + assert!(result.is_ok()); + } + + #[test] + fn rejects_path_traversal() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("../../../etc/passwd"); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("not within allowed")); + } + + #[test] + fn allows_non_existent_path_for_write() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("new_file.txt"); + assert!(result.is_ok()); + } + + #[test] + fn allows_nested_non_existent_path() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("subdir/new_file.txt"); + assert!(result.is_ok()); + } + + #[test] + fn rejects_non_existent_path_outside_allowed() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + // Parent traversal in non-existent path + let result = resolver.resolve("subdir/../../../new_file.txt"); + assert!(result.is_err()); + } + + #[test] + fn tries_multiple_allowed_paths() { + let dir1 = setup_test_dir(); + let dir2 = setup_test_dir(); + fs::write(dir2.path().join("only_in_dir2.txt"), "content").unwrap(); + + let resolver = + AllowedPathResolver::new(vec![dir1.path().to_path_buf(), dir2.path().to_path_buf()]) + .unwrap(); + + // File only exists in dir2 + let result = resolver.resolve("only_in_dir2.txt"); + assert!(result.is_ok()); + } + + #[test] + fn returns_canonical_path() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("subdir/../file.txt"); + assert!(result.is_ok()); + // Should resolve to the canonical path without ../ + let resolved = result.unwrap(); + assert!(!resolved.to_string_lossy().contains("..")); + } +} diff --git a/src/llm-coding-tools-core/src/path/mod.rs b/src/llm-coding-tools-core/src/path/mod.rs new file mode 100644 index 00000000..d9111c4b --- /dev/null +++ b/src/llm-coding-tools-core/src/path/mod.rs @@ -0,0 +1,26 @@ +//! Path resolution strategies for tool security. +//! +//! This module provides [`PathResolver`] trait and implementations: +//! - [`AbsolutePathResolver`] - Requires absolute paths only +//! - [`AllowedPathResolver`] - Restricts to allowed directories + +mod absolute; +mod allowed; + +pub use absolute::AbsolutePathResolver; +pub use allowed::AllowedPathResolver; + +use crate::error::ToolResult; +use std::path::PathBuf; + +/// Strategy for resolving and validating file paths. +/// +/// Implementations control whether paths must be absolute, relative to +/// allowed directories, or follow other constraints. +pub trait PathResolver: Send + Sync { + /// Resolves and validates a path string. + /// + /// Returns an absolute path (may or may not be canonical) if valid, + /// or an error describing the issue. + fn resolve(&self, path: &str) -> ToolResult; +} diff --git a/src/llm-coding-tools-core/src/preamble.rs b/src/llm-coding-tools-core/src/preamble.rs new file mode 100644 index 00000000..3b761514 --- /dev/null +++ b/src/llm-coding-tools-core/src/preamble.rs @@ -0,0 +1,502 @@ +//! Preamble generation for LLM agents. +//! +//! Provides [`PreambleBuilder`] for tracking tools and generating formatted +//! preambles containing tool usage context. + +use crate::context::ToolContext; + +/// Entry storing tool name and context string. +struct ContextEntry { + name: &'static str, + context: &'static str, +} + +/// Builder that tracks tools and generates formatted preambles. +/// +/// # Generic Parameters +/// +/// - `ENV`: When `true`, includes an environment section with working directory +/// before tool listings. Defaults to `false` for backwards compatibility. +/// +/// # Example +/// +/// ```no_run +/// use llm_coding_tools_core::context::{ToolContext, READ_ABSOLUTE}; +/// use llm_coding_tools_core::PreambleBuilder; +/// +/// struct ReadTool; +/// +/// impl ToolContext for ReadTool { +/// const NAME: &'static str = "read"; +/// +/// fn context(&self) -> &'static str { +/// READ_ABSOLUTE +/// } +/// } +/// +/// // Without environment section (default) +/// let mut pb = PreambleBuilder::::new(); +/// let _preamble = pb.build(); +/// +/// // With environment section +/// let mut pb = PreambleBuilder::::new() +/// .working_directory(std::env::current_dir().unwrap().display().to_string()); +/// +/// pb.track(ReadTool); +/// +/// let _preamble = pb.build(); +/// ``` +/// +/// # Output +/// +/// The generated preamble is Markdown. For example, with two tools: +/// +/// ```text +/// # Tool Usage Guidelines +/// +/// ## Read Tool +/// +/// Reads files from disk. +/// +/// ## Bash Tool +/// +/// Executes shell commands. +/// ``` +/// +/// When the environment section is enabled and a working directory is provided: +/// +/// ```text +/// # Environment +/// +/// Working directory: /home/user/project +/// +/// # Tool Usage Guidelines +/// +/// ## Read Tool +/// +/// Reads files from disk. +/// ``` +pub struct PreambleBuilder { + entries: Vec, + working_directory: Option, +} + +impl Default for PreambleBuilder { + fn default() -> Self { + Self { + entries: Vec::new(), + working_directory: None, + } + } +} + +impl PreambleBuilder { + /// Creates a new preamble builder. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Records context and returns tool unchanged. + /// + /// Use this to wrap tools before registering them with your tool collection: + /// ```no_run + /// use llm_coding_tools_core::context::{ToolContext, READ_ABSOLUTE}; + /// use llm_coding_tools_core::PreambleBuilder; + /// + /// struct MyTool; + /// + /// impl ToolContext for MyTool { + /// const NAME: &'static str = "read"; + /// + /// fn context(&self) -> &'static str { + /// READ_ABSOLUTE + /// } + /// } + /// + /// let mut pb = PreambleBuilder::::new(); + /// let _my_tool = pb.track(MyTool); + /// // register _my_tool with your tool collection + /// ``` + /// + /// For example, if working with rig's ToolSet builder: + /// ```ignore + /// let mut pb = PreambleBuilder::new(); + /// let toolset = ToolSet::builder() + /// .static_tool(pb.track(ReadTool::new())) + /// .build(); + /// ``` + pub fn track(&mut self, tool: T) -> T { + self.entries.push(ContextEntry { + name: T::NAME, + context: tool.context(), + }); + tool + } +} + +impl PreambleBuilder { + /// Sets the working directory to display in the environment section. + /// + /// Accepts any type that can be converted to String, including: + /// - `&str` + /// - `String` + /// - `PathBuf` or `&Path` (via `.display().to_string()`) + /// + /// Only available when environment section is enabled (`PreambleBuilder`). + /// + /// # Example + /// + /// ```no_run + /// use llm_coding_tools_core::PreambleBuilder; + /// + /// let _pb = PreambleBuilder::::new() + /// .working_directory("/home/user/project"); + /// + /// // With runtime-computed path + /// let _pb = PreambleBuilder::::new() + /// .working_directory(std::env::current_dir().unwrap().display().to_string()); + /// ``` + #[inline] + pub fn working_directory(mut self, path: impl Into) -> Self { + self.working_directory = Some(path.into()); + self + } +} + +impl PreambleBuilder { + /// Generates the preamble string without environment section. + pub fn build(self) -> String { + if self.entries.is_empty() { + return String::new(); + } + + let tools_size: usize = self + .entries + .iter() + .map(|e| e.context.len() + e.name.len() + 20) + .sum(); + + let mut output = String::with_capacity(tools_size + 30); + + output.push_str("# Tool Usage Guidelines\n\n"); + + for entry in self.entries { + output.push_str("## "); + let mut chars = entry.name.chars(); + if let Some(first) = chars.next() { + output.push(first.to_ascii_uppercase()); + output.push_str(chars.as_str()); + } + output.push_str(" Tool\n\n"); + output.push_str(entry.context); + output.push_str("\n\n"); + } + + output.truncate(output.trim_end().len()); + output + } +} + +impl PreambleBuilder { + /// Generates the preamble string with environment section. + pub fn build(self) -> String { + // Environment section size: ~50 bytes header + path length + // "# Environment\n\nWorking directory: \n\n" = ~38 bytes + const ENV_HEADER_SIZE: usize = 50; + + let env_size = self + .working_directory + .as_ref() + .map_or(0, |d| d.len() + ENV_HEADER_SIZE); + + let tools_size: usize = self + .entries + .iter() + .map(|e| e.context.len() + e.name.len() + 20) + .sum(); + + let has_tools = !self.entries.is_empty(); + let has_env = self.working_directory.is_some(); + + // Return empty if nothing to output + if !has_tools && !has_env { + return String::new(); + } + + let total_size = env_size + tools_size + if has_tools { 30 } else { 0 }; + let mut output = String::with_capacity(total_size); + + // Environment section + if let Some(ref dir) = self.working_directory { + output.push_str("# Environment\n\n"); + output.push_str("Working directory: "); + output.push_str(dir); + output.push_str("\n\n"); + } + + // Tool section + if has_tools { + output.push_str("# Tool Usage Guidelines\n\n"); + + for entry in self.entries { + output.push_str("## "); + let mut chars = entry.name.chars(); + if let Some(first) = chars.next() { + output.push(first.to_ascii_uppercase()); + output.push_str(chars.as_str()); + } + output.push_str(" Tool\n\n"); + output.push_str(entry.context); + output.push_str("\n\n"); + } + } + + output.truncate(output.trim_end().len()); + output + } +} + +/// Extension trait for placeholder substitution on preamble strings. +/// +/// Provides simple `{key}` placeholder replacement after building a preamble. +/// Unmatched placeholders are left as-is. +/// +/// # Example +/// +/// ```rust +/// use llm_coding_tools_core::preamble::Substitute; +/// +/// let preamble = "Available agents: {agents}".to_string(); +/// let result = preamble +/// .substitute("agents", "code-review, research") +/// .substitute("missing", "ignored"); +/// +/// assert_eq!(result, "Available agents: code-review, research"); +/// ``` +pub trait Substitute { + /// Replaces `{key}` placeholder with the given value. + /// + /// Returns a new String with the substitution applied. + /// If the placeholder is not found, returns the string unchanged. + fn substitute(self, key: &str, value: &str) -> String; + + /// Replaces multiple `{key}` placeholders with their values. + /// + /// Accepts an iterator of (key, value) pairs. + fn substitute_all<'a>( + self, + substitutions: impl IntoIterator, + ) -> String; +} + +impl Substitute for String { + #[inline] + fn substitute(self, key: &str, value: &str) -> String { + let placeholder = format!("{{{}}}", key); + self.replace(&placeholder, value) + } + + fn substitute_all<'a>( + mut self, + substitutions: impl IntoIterator, + ) -> String { + for (key, value) in substitutions { + let placeholder = format!("{{{}}}", key); + self = self.replace(&placeholder, value); + } + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + struct MockTool { + id: u32, + } + + impl ToolContext for MockTool { + const NAME: &'static str = "mock"; + fn context(&self) -> &'static str { + "Mock tool context." + } + } + + #[test] + fn empty_builder_returns_empty_string() { + let preamble = PreambleBuilder::::new().build(); + assert!(preamble.is_empty()); + } + + #[test] + fn track_returns_tool_unchanged() { + let mut pb = PreambleBuilder::::new(); + let tool = MockTool { id: 42 }; + let returned = pb.track(tool); + assert_eq!(returned.id, 42); + } + + #[test] + fn single_tool_formats_correctly() { + let mut pb = PreambleBuilder::::new(); + let _ = pb.track(MockTool { id: 1 }); + let preamble = pb.build(); + + assert!(preamble.contains("# Tool Usage Guidelines")); + assert!(preamble.contains("## Mock Tool")); + assert!(preamble.contains("Mock tool context.")); + } + + #[test] + fn multiple_tools_preserve_order() { + struct OtherTool; + impl ToolContext for OtherTool { + const NAME: &'static str = "other"; + fn context(&self) -> &'static str { + "Other context." + } + } + + let mut pb = PreambleBuilder::::new(); + let _ = pb.track(MockTool { id: 1 }); + let _ = pb.track(OtherTool); + let preamble = pb.build(); + + let mock_pos = preamble.find("## Mock Tool").unwrap(); + let other_pos = preamble.find("## Other Tool").unwrap(); + assert!( + mock_pos < other_pos, + "Tools should appear in insertion order" + ); + } + + #[test] + fn builder_without_env_omits_environment_section() { + let mut pb = PreambleBuilder::::new(); + let _ = pb.track(MockTool { id: 1 }); + let preamble = pb.build(); + + assert!(!preamble.contains("# Environment")); + assert!(!preamble.contains("Working directory")); + assert!(preamble.contains("# Tool Usage Guidelines")); + } + + #[test] + fn builder_with_env_includes_environment_section() { + let mut pb = PreambleBuilder::::new().working_directory("/home/user/project"); + let _ = pb.track(MockTool { id: 1 }); + let preamble = pb.build(); + + assert!(preamble.contains("# Environment")); + assert!(preamble.contains("Working directory: /home/user/project")); + // Environment should come before tools + let env_pos = preamble.find("# Environment").unwrap(); + let tools_pos = preamble.find("# Tool Usage Guidelines").unwrap(); + assert!(env_pos < tools_pos); + } + + #[test] + fn builder_with_env_no_working_dir_no_tools_returns_empty() { + let pb = PreambleBuilder::::new(); + let preamble = pb.build(); + assert!(preamble.is_empty()); + } + + #[test] + fn builder_with_env_and_working_dir_but_no_tools() { + // Environment section should render even without tools tracked + let pb = PreambleBuilder::::new().working_directory("/home/user/project"); + let preamble = pb.build(); + + assert!(preamble.contains("# Environment")); + assert!(preamble.contains("Working directory: /home/user/project")); + assert!(!preamble.contains("# Tool Usage Guidelines")); + } + + #[test] + fn working_directory_accepts_runtime_string() { + // Simulates std::env::current_dir().unwrap().display().to_string() + let runtime_path = String::from("/runtime/computed/path"); + let pb = PreambleBuilder::::new().working_directory(runtime_path); + let preamble = pb.build(); + + assert!(preamble.contains("Working directory: /runtime/computed/path")); + } + + #[test] + fn working_directory_accepts_str() { + let pb = PreambleBuilder::::new().working_directory("/static/path"); + let preamble = pb.build(); + + assert!(preamble.contains("Working directory: /static/path")); + } + + #[test] + fn substitute_replaces_single_placeholder() { + use super::Substitute; + + let text = "Hello {name}!".to_string(); + let result = text.substitute("name", "World"); + assert_eq!(result, "Hello World!"); + } + + #[test] + fn substitute_leaves_unmatched_placeholders() { + use super::Substitute; + + let text = "Hello {name}, welcome to {place}!".to_string(); + let result = text.substitute("name", "Alice"); + assert_eq!(result, "Hello Alice, welcome to {place}!"); + } + + #[test] + fn substitute_handles_empty_value() { + use super::Substitute; + + let text = "Prefix{middle}Suffix".to_string(); + let result = text.substitute("middle", ""); + assert_eq!(result, "PrefixSuffix"); + } + + #[test] + fn substitute_all_replaces_multiple() { + use super::Substitute; + + let text = "Hello {name}, welcome to {place}!".to_string(); + let result = text.substitute_all([("name", "Alice"), ("place", "Wonderland")]); + assert_eq!(result, "Hello Alice, welcome to Wonderland!"); + } + + #[test] + fn substitute_no_placeholder_returns_unchanged() { + use super::Substitute; + + let text = "No placeholders here".to_string(); + let result = text.substitute("missing", "value"); + assert_eq!(result, "No placeholders here"); + } + + #[test] + fn generic_flag_is_compile_time() { + // This test verifies the generic works at compile time + // If it compiles, the generic system works + let _pb_no_env: PreambleBuilder = PreambleBuilder::new(); + let _pb_with_env: PreambleBuilder = PreambleBuilder::new(); + + // Type inference defaults to false + let _pb_default: PreambleBuilder = PreambleBuilder::new(); + } + + #[test] + fn backwards_compatibility_existing_api() { + // Existing code should work unchanged + let mut pb = PreambleBuilder::::new(); + let _ = pb.track(MockTool { id: 1 }); + let preamble = pb.build(); + + assert!(preamble.contains("# Tool Usage Guidelines")); + assert!(preamble.contains("## Mock Tool")); + } +} diff --git a/src/llm-coding-tools-core/src/util.rs b/src/llm-coding-tools-core/src/util.rs new file mode 100644 index 00000000..efe9f37e --- /dev/null +++ b/src/llm-coding-tools-core/src/util.rs @@ -0,0 +1,99 @@ +//! Shared utilities for tool implementations. + +/// Generous estimate of average characters per line for buffer pre-allocation. +pub const ESTIMATED_CHARS_PER_LINE: usize = 64; + +/// A number of characters per line that's likely to not be exceeded in most files. +pub const LIKELY_CHARS_PER_LINE_MAX: usize = ESTIMATED_CHARS_PER_LINE * 4; + +/// Formats a line with its line number for output. +/// +/// Uses the format: `{spaces}{line_number}\t{content}` where spaces +/// pad the line number to align with the widest number in the range. +#[inline] +pub fn format_numbered_line(line_number: usize, content: &str, max_line_number: usize) -> String { + let width = max_line_number.checked_ilog10().unwrap_or(0) as usize + 1; + format!("{:>width$}\t{}", line_number, content) +} + +/// Truncates text to a maximum byte length, appending a truncation notice. +/// +/// Returns `(truncated_text, was_truncated)`. +pub fn truncate_text(text: &str, max_bytes: usize) -> (&str, bool) { + if text.len() <= max_bytes { + return (text, false); + } + + // Find a valid UTF-8 boundary before max_bytes + let mut end = max_bytes; + while end > 0 && !text.is_char_boundary(end) { + end -= 1; + } + + (&text[..end], true) +} + +/// Truncates a single line to a maximum character count. +pub fn truncate_line(line: &str, max_chars: usize) -> (&str, bool) { + // Fast path: UTF-8 guarantees byte_count >= char_count, + // so if byte length fits, no truncation needed. + if line.len() <= max_chars { + return (line, false); + } + + // Find byte position at max_chars character boundary + let Some((byte_pos, _)) = line.char_indices().nth(max_chars) else { + // Fewer than max_chars characters exist + return (line, false); + }; + + (&line[..byte_pos], true) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn format_numbered_line_pads_correctly() { + assert_eq!(format_numbered_line(1, "hello", 9), "1\thello"); + assert_eq!(format_numbered_line(1, "hello", 10), " 1\thello"); + assert_eq!(format_numbered_line(1, "hello", 100), " 1\thello"); + } + + #[test] + fn truncate_text_preserves_short_text() { + let (text, truncated) = truncate_text("hello", 10); + assert_eq!(text, "hello"); + assert!(!truncated); + } + + #[test] + fn truncate_text_truncates_long_text() { + let (text, truncated) = truncate_text("hello world", 5); + assert_eq!(text, "hello"); + assert!(truncated); + } + + #[test] + fn truncate_text_respects_utf8_boundaries() { + // "héllo" has é which is 2 bytes + let (text, truncated) = truncate_text("héllo", 2); + assert_eq!(text, "h"); + assert!(truncated); + } + + #[test] + fn truncate_line_preserves_short_line() { + let (line, truncated) = truncate_line("hello", 10); + assert_eq!(line, "hello"); + assert!(!truncated); + } + + #[test] + fn truncate_line_truncates_by_char_count() { + let (line, truncated) = truncate_line("héllo", 3); + assert_eq!(line, "hél"); + assert!(truncated); + } +} diff --git a/src/llm-coding-tools-rig/Cargo.toml b/src/llm-coding-tools-rig/Cargo.toml new file mode 100644 index 00000000..7782b23d --- /dev/null +++ b/src/llm-coding-tools-rig/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "llm-coding-tools-rig" +version = "0.1.0" +edition = "2021" +description = "Lightweight, high-performance Rig framework Tool implementations for coding tools" +repository = "https://github.com/Sewer56/llm-coding-tools" +license = "Apache-2.0" +include = ["src/**/*"] +readme = "README.md" + +[dependencies] +# Core tool operations (file read/write/edit, glob, grep, bash, etc.) +llm-coding-tools-core = { version = "0.1.0", path = "../llm-coding-tools-core", features = [ + "tokio", +] } + +# Implements rig_core::tool::Tool trait for each tool +rig-core = { version = "0.28", default-features = false, features = ["reqwest-rustls"] } + +# WebFetchTool needs its own client instance +reqwest = { version = "0.13", default-features = false, features = [ + "rustls", + "rustls-native-certs", +] } + +# Tool::definition() returns JSON Schema for LLM parameter validation +schemars = "1.2" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +[dev-dependencies] +tempfile = "3.24" +tokio = { version = "1.49", features = ["rt-multi-thread", "macros"] } diff --git a/src/llm-coding-tools-rig/README.md b/src/llm-coding-tools-rig/README.md new file mode 100644 index 00000000..89c7ac3c --- /dev/null +++ b/src/llm-coding-tools-rig/README.md @@ -0,0 +1,125 @@ +# llm-coding-tools-rig + +[![Crates.io](https://img.shields.io/crates/v/llm-coding-tools-rig.svg)](https://crates.io/crates/llm-coding-tools-rig) +[![Docs.rs](https://docs.rs/llm-coding-tools-rig/badge.svg)](https://docs.rs/llm-coding-tools-rig) + +Lightweight, high-performance Rig framework Tool implementations for coding tools. + +## Features + +- **File operations** - Read, write, edit, glob, grep with two access modes: + - `absolute::*` - Unrestricted filesystem access + - `allowed::*` - Sandboxed to configured directories +- **Shell execution** - Cross-platform command execution with timeout +- **Web fetching** - URL content retrieval with format conversion +- **Task delegation** - Sub-agent spawning for complex tasks +- **Todo management** - Shared-state todo list tracking +- **Context strings** - LLM guidance text for tool usage (re-exported from core) + +## Installation + +Add to your `Cargo.toml`: + +```toml +[dependencies] +llm-coding-tools-rig = "0.1" +``` + +## Quick Start + +Minimal runnable agent (requires `OPENAI_API_KEY`): + +```rust +use llm_coding_tools_rig::absolute::{GlobTool, GrepTool, ReadTool}; +use llm_coding_tools_rig::{BashTool, PreambleBuilder, TodoTools}; +use rig::providers::openai; +use rig::tool::ToolSet; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let todos = TodoTools::new(); + let mut pb = PreambleBuilder::::new(); + + let toolset = ToolSet::builder() + .static_tool(pb.track(ReadTool::::new())) + .static_tool(pb.track(GlobTool::new())) + .static_tool(pb.track(GrepTool::::new())) + .static_tool(pb.track(BashTool::new())) + .static_tool(pb.track(todos.read)) + .static_tool(pb.track(todos.write)) + .build(); + + let preamble = pb.build(); + + let client = openai::Client::from_env(); + let agent = client + .agent("gpt-4o") + .preamble(&preamble) + .tools(toolset) + .build(); + + let response = agent + .prompt("Search for TODO comments in src/") + .await?; + println!("{response}"); + + Ok(()) +} +``` + +Example preamble output (truncated): + +```text +# Tool Usage Guidelines + +## Read Tool + +Reads files from disk. + +## Bash Tool + +Executes shell commands. +``` + +Run the full example app: + +```bash +OPENAI_API_KEY=... cargo run --example full_agent -p llm-coding-tools-rig +``` + +## Usage + +File tools come in `absolute::*` (unrestricted) and `allowed::*` (sandboxed) variants: + +```rust +use llm_coding_tools_rig::absolute::{ReadTool, WriteTool}; +use llm_coding_tools_rig::allowed::{ReadTool as AllowedReadTool, WriteTool as AllowedWriteTool}; +use llm_coding_tools_rig::AllowedPathResolver; +use std::path::PathBuf; + +let read = ReadTool::::new(); +let resolver = AllowedPathResolver::new([PathBuf::from("/home/user/project")]).unwrap(); +let sandboxed_read: AllowedReadTool = AllowedReadTool::with_resolver(resolver.clone()); +let sandboxed_write = AllowedWriteTool::with_resolver(resolver); +``` + +Other tools: `BashTool`, `WebFetchTool`, `TaskTool`, `TodoTools`. +Use `PreambleBuilder` to register tools and pass `pb.build()` to `.preamble()`. +Context strings are re-exported in `llm_coding_tools_rig::context` (e.g., `BASH`, `READ_ABSOLUTE`). + +## Examples + +```bash +# Basic toolset setup with PreambleBuilder +cargo run --example basic -p llm-coding-tools-rig + +# Complete agent configuration (recommended starting point) +cargo run --example full_agent -p llm-coding-tools-rig + +# Sandboxed file access with allowed::* tools +cargo run --example sandboxed -p llm-coding-tools-rig +``` + +## License + +Apache 2.0 diff --git a/src/llm-coding-tools-rig/examples/basic.rs b/src/llm-coding-tools-rig/examples/basic.rs new file mode 100644 index 00000000..592ff633 --- /dev/null +++ b/src/llm-coding-tools-rig/examples/basic.rs @@ -0,0 +1,65 @@ +//! PreambleBuilder example - pass-through tracking for ToolSet. +//! +//! Demonstrates: +//! - Using PreambleBuilder alongside ToolSet::builder() +//! - Full access to Rig's API (no wrapper limitations) +//! - TodoTools with shared state +//! - Generating and using the preamble string +//! +//! Run: cargo run --example basic -p llm-coding-tools-rig +//! +//! For a complete agent setup, see: cargo run --example full_agent -p llm-coding-tools-rig + +use llm_coding_tools_rig::absolute::{GlobTool, GrepTool, ReadTool}; +use llm_coding_tools_rig::{BashTool, PreambleBuilder, TodoTools}; +use rig::tool::ToolSet; + +#[tokio::main] +async fn main() { + // === Create shared state for todos === + let todos = TodoTools::new(); + + // === Create preamble builder to track tools === + let mut pb = PreambleBuilder::::new(); + + // === Use ToolSet::builder() directly - full Rig API! === + let toolset = ToolSet::builder() + .static_tool(pb.track(ReadTool::::new())) + .static_tool(pb.track(GlobTool::new())) + .static_tool(pb.track(GrepTool::::new())) + .static_tool(pb.track(BashTool::new())) + // Todo tools share state for read/write coordination + .static_tool(pb.track(todos.read)) + .static_tool(pb.track(todos.write)) + // Can use any ToolSet method here - dynamic_tool, etc. + .build(); + + // === Generate preamble string === + let preamble = pb.build(); + + // === Print tool definitions from ToolSet === + println!("=== Tools in ToolSet ==="); + for def in toolset.get_tool_definitions().await.unwrap() { + let truncated_desc: String = def.description.chars().take(60).collect(); + println!(" - {}: {}", def.name, truncated_desc); + } + + // === Print generated preamble === + println!("\n=== Generated Preamble ({} chars) ===\n", preamble.len()); + let truncated_preamble: String = preamble.chars().take(1000).collect(); + println!("{}", truncated_preamble); + if preamble.len() > 1000 { + println!("\n... ({} more chars)", preamble.len() - 1000); + } + + // === Integration with Rig agent === + // IMPORTANT: You must call .preamble() to actually use the generated string! + // + // let agent = openai::Client::from_env() + // .agent("gpt-4o") + // .preamble(&preamble) // <-- Pass preamble to Rig + // .tools(toolset) + // .build(); + // + // let response = agent.prompt("Read main.rs").await?; +} diff --git a/src/llm-coding-tools-rig/examples/full_agent.rs b/src/llm-coding-tools-rig/examples/full_agent.rs new file mode 100644 index 00000000..dc1a12fc --- /dev/null +++ b/src/llm-coding-tools-rig/examples/full_agent.rs @@ -0,0 +1,105 @@ +//! Complete agent example - demonstrates full integration pattern. +//! +//! This example shows the recommended way to build an LLM coding agent +//! with all available tools. Agent execution is commented out as it +//! requires API credentials. +//! +//! Run: cargo run --example full_agent -p llm-coding-tools-rig + +use llm_coding_tools_rig::absolute::{EditTool, GlobTool, GrepTool, ReadTool, WriteTool}; +use llm_coding_tools_rig::{BashTool, PreambleBuilder, TodoTools, WebFetchTool}; +use rig::tool::ToolSet; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // === 1. Create shared state for todos === + // + // TodoTools provides paired read/write tools that share state. + // This allows the LLM to maintain a task list across the conversation. + let todos = TodoTools::new(); + + // === 2. Create preamble builder === + // + // PreambleBuilder tracks which tools are registered and generates + // a combined context string for the system prompt. This gives the + // LLM detailed guidance on how to use each tool effectively. + let mut pb = PreambleBuilder::::new(); + + // === 3. Build toolset with all tools === + // + // Use pb.track() to wrap each tool - this registers it with the + // preamble builder while passing it through unchanged to the toolset. + let toolset = ToolSet::builder() + // File operations (with line numbers enabled) + .static_tool(pb.track(ReadTool::::new())) + .static_tool(pb.track(WriteTool::new())) + .static_tool(pb.track(EditTool::new())) + .static_tool(pb.track(GlobTool::new())) + .static_tool(pb.track(GrepTool::::new())) + // Shell execution + .static_tool(pb.track(BashTool::new())) + // Web content fetching + .static_tool(pb.track(WebFetchTool::new())) + // Todo management (shared state between read and write) + .static_tool(pb.track(todos.read)) + .static_tool(pb.track(todos.write)) + .build(); + + // === 4. Generate preamble === + // + // The preamble contains usage instructions for all tracked tools. + // Pass this to the agent's .preamble() method so the LLM knows + // how to use the tools correctly. + let preamble = pb.build(); + + // === 5. Agent integration (requires API key) === + // + // Uncomment and configure with your preferred LLM provider: + // + // ``` + // use rig::providers::openai; + // + // let client = openai::Client::from_env(); + // let agent = client + // .agent("gpt-4o") + // .preamble(&preamble) + // .tools(toolset) + // .build(); + // + // // Example prompts this agent can handle: + // let response = agent.prompt("Find all Rust files in src/").await?; + // let response = agent.prompt("Read Cargo.toml and summarize dependencies").await?; + // let response = agent.prompt("Search for TODO comments in the codebase").await?; + // let response = agent.prompt("Run 'cargo test' and report results").await?; + // let response = agent.prompt("Fetch https://example.com and summarize").await?; + // ``` + + // === Demo output === + let tool_count = toolset.get_tool_definitions().await?.len(); + + println!("=== Full Agent Configuration ===\n"); + println!("Tools registered: {}", tool_count); + println!("Preamble size: {} chars\n", preamble.len()); + + println!("=== Registered Tools ==="); + for def in toolset.get_tool_definitions().await? { + // Show first 60 chars of description + let desc = &def.description[..60.min(def.description.len())]; + println!(" {}: {}...", def.name, desc); + } + + println!("\n=== Example Prompts ==="); + println!(" - \"Find all Rust files in src/\""); + println!(" - \"Read Cargo.toml and list dependencies\""); + println!(" - \"Search for TODO comments\""); + println!(" - \"Run 'cargo test' and report results\""); + println!(" - \"Create a todo list for implementing feature X\""); + + println!("\n=== Preamble Preview (first 500 chars) ===\n"); + println!("{}", &preamble[..500.min(preamble.len())]); + if preamble.len() > 500 { + println!("\n... ({} more chars)", preamble.len() - 500); + } + + Ok(()) +} diff --git a/src/llm-coding-tools-rig/examples/sandboxed.rs b/src/llm-coding-tools-rig/examples/sandboxed.rs new file mode 100644 index 00000000..8b980a9c --- /dev/null +++ b/src/llm-coding-tools-rig/examples/sandboxed.rs @@ -0,0 +1,91 @@ +//! Sandboxed tools example - restricted file access. +//! +//! Demonstrates using `allowed::*` tools that restrict file operations +//! to specific directories only. This is useful for: +//! +//! - Multi-tenant environments where agents should only access their workspace +//! - Security-conscious deployments limiting filesystem exposure +//! - Project-scoped agents that shouldn't touch system files +//! +//! Run: cargo run --example sandboxed -p llm-coding-tools-rig + +use llm_coding_tools_rig::allowed::{EditTool, GlobTool, GrepTool, ReadTool, WriteTool}; +use llm_coding_tools_rig::{AllowedPathResolver, PreambleBuilder}; +use rig::tool::ToolSet; +use std::path::PathBuf; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // === Define allowed directories === + // + // Only these directories (and their subdirectories) will be accessible. + // Attempts to read/write outside these paths will fail with an error. + // + // NOTE: Paths must exist - AllowedPathResolver canonicalizes them. + // Using current directory and /tmp as they exist on most systems. + let current_dir = std::env::current_dir()?; + let allowed_paths = vec![ + current_dir.clone(), // Current working directory + PathBuf::from("/tmp"), // Temp directory + ]; + + println!("=== Sandboxed Agent Configuration ===\n"); + println!("Allowed directories:"); + for path in &allowed_paths { + println!(" - {}", path.display()); + } + + // === Option 1: Create tools individually === + // + // Each tool gets its own copy of the allowed paths. + // Simple but duplicates the path list. + let _read: ReadTool = ReadTool::new(allowed_paths.clone())?; + let _write = WriteTool::new(allowed_paths.clone())?; + + // === Option 2: Share a resolver (recommended) === + // + // Create one resolver and share it across tools. + // More efficient and ensures consistency. + let resolver = AllowedPathResolver::new(allowed_paths)?; + + let read: ReadTool = ReadTool::with_resolver(resolver.clone()); + let write = WriteTool::with_resolver(resolver.clone()); + let edit = EditTool::with_resolver(resolver.clone()); + let glob = GlobTool::with_resolver(resolver.clone()); + let grep: GrepTool = GrepTool::with_resolver(resolver); + + // === Build toolset === + let mut pb = PreambleBuilder::::new(); + let toolset = ToolSet::builder() + .static_tool(pb.track(read)) + .static_tool(pb.track(write)) + .static_tool(pb.track(edit)) + .static_tool(pb.track(glob)) + .static_tool(pb.track(grep)) + .build(); + + let preamble = pb.build(); + + // === Demo output === + println!( + "\nTools registered: {}", + toolset.get_tool_definitions().await?.len() + ); + println!("Preamble size: {} chars", preamble.len()); + + println!("\n=== Security Behavior ==="); + println!(" Allowed: read(\"{}/Cargo.toml\")", current_dir.display()); + println!(" Allowed: glob(\"/tmp/**/*.txt\")"); + println!(" BLOCKED: read(\"/etc/passwd\")"); + println!(" BLOCKED: write(\"/home/user/.ssh/config\")"); + + println!("\n=== Error Handling ==="); + println!(" When a path is outside allowed directories, tools return:"); + println!(" ToolError::InvalidPath(\"path not within allowed directories\")"); + + println!("\n=== Agent Integration ==="); + println!(" The preamble automatically includes 'allowed path' context,"); + println!(" informing the LLM that paths are relative to allowed directories."); + + Ok(()) +} diff --git a/src/llm-coding-tools-rig/src/absolute/edit.rs b/src/llm-coding-tools-rig/src/absolute/edit.rs new file mode 100644 index 00000000..fcd9e803 --- /dev/null +++ b/src/llm-coding-tools-rig/src/absolute/edit.rs @@ -0,0 +1,118 @@ +//! Edit file tool using [`AbsolutePathResolver`]. + +use llm_coding_tools_core::operations::edit_file; +use llm_coding_tools_core::path::AbsolutePathResolver; +pub use llm_coding_tools_core::EditError; +use llm_coding_tools_core::ToolContext; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; + +/// Arguments for file editing. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct EditArgs { + /// Absolute path to the file to modify. + pub file_path: String, + /// Exact text to find and replace. + pub old_string: String, + /// Replacement text. + pub new_string: String, + /// Replace all occurrences (default false). + #[serde(default)] + pub replace_all: bool, +} + +/// Tool for making exact string replacements in files. +#[derive(Debug, Clone, Default)] +pub struct EditTool; + +impl EditTool { + /// Creates a new edit tool instance. + #[inline] + pub fn new() -> Self { + Self + } +} + +impl Tool for EditTool { + const NAME: &'static str = "edit"; + + type Error = EditError; + type Args = EditArgs; + type Output = String; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Makes exact string replacements in files. Use replace_all=true to \ + replace all occurrences." + .to_string(), + parameters: serde_json::to_value(schema_for!(EditArgs)) + .expect("EditArgs schema generation should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let resolver = AbsolutePathResolver; + edit_file( + &resolver, + &args.file_path, + &args.old_string, + &args.new_string, + args.replace_all, + ) + .await + } +} + +impl ToolContext for EditTool { + const NAME: &'static str = "edit"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::EDIT_ABSOLUTE + } +} + +#[cfg(test)] +mod tests { + use super::*; + use llm_coding_tools_core::ToolError; + use std::io::Write as _; + use tempfile::NamedTempFile; + + #[tokio::test] + async fn replaces_single_occurrence() { + let mut file = NamedTempFile::new().unwrap(); + file.write_all(b"hello world").unwrap(); + file.flush().unwrap(); + let tool = EditTool::new(); + let result = tool + .call(EditArgs { + file_path: file.path().to_string_lossy().to_string(), + old_string: "world".to_string(), + new_string: "rust".to_string(), + replace_all: false, + }) + .await + .unwrap(); + assert!(result.contains("1 occurrence")); + } + + #[tokio::test] + async fn rejects_relative_path() { + let tool = EditTool::new(); + let result = tool + .call(EditArgs { + file_path: "relative/path.txt".to_string(), + old_string: "old".to_string(), + new_string: "new".to_string(), + replace_all: false, + }) + .await; + assert!(matches!( + result, + Err(EditError::Tool(ToolError::InvalidPath(_))) + )); + } +} diff --git a/src/llm-coding-tools-rig/src/absolute/glob.rs b/src/llm-coding-tools-rig/src/absolute/glob.rs new file mode 100644 index 00000000..b3b34fe8 --- /dev/null +++ b/src/llm-coding-tools-rig/src/absolute/glob.rs @@ -0,0 +1,97 @@ +//! Glob pattern file finding tool using [`AbsolutePathResolver`]. + +use llm_coding_tools_core::operations::glob_files; +use llm_coding_tools_core::path::AbsolutePathResolver; +use llm_coding_tools_core::{GlobOutput, ToolContext, ToolError}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; + +/// Arguments for the glob tool. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GlobArgs { + /// Glob pattern to match files against (e.g., "**/*.rs", "src/**/*.ts"). + pub pattern: String, + /// Absolute directory path to search in. + pub path: String, +} + +/// Tool for finding files matching glob patterns. +#[derive(Debug, Default, Clone, Copy)] +pub struct GlobTool; + +impl GlobTool { + /// Creates a new glob tool instance. + #[inline] + pub fn new() -> Self { + Self + } +} + +impl Tool for GlobTool { + const NAME: &'static str = "glob"; + + type Error = ToolError; + type Args = GlobArgs; + type Output = GlobOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Find files matching a glob pattern. Respects .gitignore and \ + returns paths sorted by modification time (newest first)." + .to_string(), + parameters: serde_json::to_value(schema_for!(GlobArgs)) + .expect("schema serialization should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let resolver = AbsolutePathResolver; + glob_files(&resolver, &args.pattern, &args.path) + } +} + +impl ToolContext for GlobTool { + const NAME: &'static str = "glob"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::GLOB_ABSOLUTE + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::{self, File}; + use tempfile::TempDir; + + #[tokio::test] + async fn finds_matching_files() { + let dir = TempDir::new().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + File::create(dir.path().join("src/lib.rs")).unwrap(); + let tool = GlobTool::new(); + let result = tool + .call(GlobArgs { + pattern: "**/*.rs".to_string(), + path: dir.path().to_string_lossy().to_string(), + }) + .await + .unwrap(); + assert!(result.files.iter().any(|f| f.ends_with("lib.rs"))); + } + + #[tokio::test] + async fn rejects_relative_path() { + let tool = GlobTool::new(); + let result = tool + .call(GlobArgs { + pattern: "*.rs".to_string(), + path: "relative/path".to_string(), + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } +} diff --git a/src/llm-coding-tools-rig/src/absolute/grep.rs b/src/llm-coding-tools-rig/src/absolute/grep.rs new file mode 100644 index 00000000..b273164a --- /dev/null +++ b/src/llm-coding-tools-rig/src/absolute/grep.rs @@ -0,0 +1,224 @@ +//! Grep content search tool using [`AbsolutePathResolver`]. + +use llm_coding_tools_core::operations::grep_search; +use llm_coding_tools_core::path::AbsolutePathResolver; +use llm_coding_tools_core::{ToolContext, ToolError, ToolOutput}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::fmt::Write; + +const DEFAULT_LIMIT: usize = 100; +const MAX_LIMIT: usize = 2000; +const MAX_LINE_LENGTH: usize = 2000; + +fn default_limit() -> Option { + Some(DEFAULT_LIMIT) +} + +/// Arguments for the grep tool. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GrepArgs { + /// Regex pattern to search for in file contents. + pub pattern: String, + /// Absolute directory path to search in. + pub path: String, + /// Optional file glob filter (e.g., "*.rs", "*.{ts,tsx}"). + #[serde(default)] + pub include: Option, + /// Maximum number of matches to return (default: 100, max: 2000). + #[serde(default = "default_limit")] + pub limit: Option, +} + +/// Tool for searching file contents using regex patterns. +#[derive(Debug, Clone, Default)] +pub struct GrepTool; + +impl GrepTool { + /// Creates a new grep tool instance. + #[inline] + pub fn new() -> Self { + Self + } +} + +impl Tool for GrepTool { + const NAME: &'static str = "grep"; + + type Error = ToolError; + type Args = GrepArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + let description = if LINE_NUMBERS { + "Search file contents using regex patterns. Returns matches with file paths, \ + line numbers, and content, sorted by file modification time." + } else { + "Search file contents using regex patterns. Returns matches with file paths \ + and content, sorted by file modification time." + }; + ToolDefinition { + name: ::NAME.to_string(), + description: description.to_string(), + parameters: serde_json::to_value(schema_for!(GrepArgs)) + .expect("schema serialization should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let pattern = args.pattern.trim(); + if pattern.is_empty() { + return Err(ToolError::InvalidPattern( + "pattern must not be empty".into(), + )); + } + + let limit = args.limit.unwrap_or(DEFAULT_LIMIT).min(MAX_LIMIT); + if limit == 0 { + return Err(ToolError::Validation( + "limit must be greater than zero".into(), + )); + } + + let include = args.include.as_deref().and_then(|s| { + let trimmed = s.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed) + } + }); + + let resolver = AbsolutePathResolver; + let result = grep_search(&resolver, pattern, include, &args.path, limit)?; + + if result.files.is_empty() { + return Ok(ToolOutput::new("No matches found.")); + } + + // Format output grouped by file + let mut output = String::with_capacity(4096); + let _ = writeln!(&mut output, "Found {} matches", result.match_count); + + for file in &result.files { + let _ = writeln!(&mut output, "\n{}:", file.path); + for m in &file.matches { + // Use floor_char_boundary to avoid panicking on UTF-8 multibyte boundaries + let truncated_text = if m.line_text.len() > MAX_LINE_LENGTH { + &m.line_text[..m.line_text.floor_char_boundary(MAX_LINE_LENGTH)] + } else { + &m.line_text + }; + if LINE_NUMBERS { + let _ = writeln!(&mut output, " L{}: {}", m.line_num, truncated_text); + } else { + let _ = writeln!(&mut output, " {}", truncated_text); + } + } + } + + if result.truncated { + let _ = write!(&mut output, "\n(Results truncated at {} matches)", limit); + } + + Ok(if result.truncated { + ToolOutput::truncated(output) + } else { + ToolOutput::new(output) + }) + } +} + +impl ToolContext for GrepTool { + const NAME: &'static str = "grep"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::GREP_ABSOLUTE + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn finds_matching_content() { + let dir = TempDir::new().unwrap(); + std::fs::write(dir.path().join("test.txt"), "hello world").unwrap(); + let tool: GrepTool = GrepTool::new(); + let result = tool + .call(GrepArgs { + pattern: "hello".to_string(), + path: dir.path().to_string_lossy().to_string(), + include: None, + limit: None, + }) + .await + .unwrap(); + assert!(result.content.contains("Found 1 matches")); + assert!(result.content.contains("L1: hello world")); + } + + #[tokio::test] + async fn rejects_relative_path() { + let tool: GrepTool = GrepTool::new(); + let result = tool + .call(GrepArgs { + pattern: "test".to_string(), + path: "relative/path".to_string(), + include: None, + limit: None, + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } + + #[tokio::test] + async fn rejects_empty_pattern() { + let tool: GrepTool = GrepTool::new(); + let result = tool + .call(GrepArgs { + pattern: " ".to_string(), + path: "/tmp".to_string(), + include: None, + limit: None, + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPattern(_)))); + } + + #[tokio::test] + async fn truncates_long_lines_at_utf8_boundary() { + let dir = TempDir::new().unwrap(); + + // Create a line that's > MAX_LINE_LENGTH (2000) bytes with multibyte chars at the boundary. + // Use 1998 ASCII chars + "日本語" (9 bytes for 3 chars) = 2007 bytes total. + // Truncating at byte 2000 would land inside the multibyte sequence without floor_char_boundary. + let long_line = format!("match_me {}{}", "a".repeat(1989), "日本語"); + assert!( + long_line.len() > 2000, + "test setup: line must exceed MAX_LINE_LENGTH" + ); + + std::fs::write(dir.path().join("utf8_test.txt"), &long_line).unwrap(); + + let tool: GrepTool = GrepTool::new(); + let result = tool + .call(GrepArgs { + pattern: "match_me".to_string(), + path: dir.path().to_string_lossy().to_string(), + include: None, + limit: None, + }) + .await + .unwrap(); + + // Should not panic and output should be valid UTF-8 + assert!(result.content.contains("Found 1 matches")); + assert!(result.content.contains("L1:")); + // The output should be valid UTF-8 (this is implicitly tested by using .contains on a String) + } +} diff --git a/src/llm-coding-tools-rig/src/absolute/mod.rs b/src/llm-coding-tools-rig/src/absolute/mod.rs new file mode 100644 index 00000000..540c67de --- /dev/null +++ b/src/llm-coding-tools-rig/src/absolute/mod.rs @@ -0,0 +1,24 @@ +//! Tools using [`llm_coding_tools_core::path::AbsolutePathResolver`]. +//! +//! These tools require absolute paths and perform no directory restriction. +//! Use for unrestricted file system access. +//! +//! # Available Tools +//! +//! - [`ReadTool`] - Read file contents with optional line numbers +//! - [`WriteTool`] - Write content to files +//! - [`EditTool`] - Make exact string replacements +//! - [`GlobTool`] - Find files by glob pattern +//! - [`GrepTool`] - Search file contents by regex + +mod edit; +mod glob; +mod grep; +mod read; +mod write; + +pub use edit::{EditArgs, EditError, EditTool}; +pub use glob::{GlobArgs, GlobTool}; +pub use grep::{GrepArgs, GrepTool}; +pub use read::{ReadArgs, ReadTool}; +pub use write::{WriteTool, WriteToolArgs}; diff --git a/src/llm-coding-tools-rig/src/absolute/read.rs b/src/llm-coding-tools-rig/src/absolute/read.rs new file mode 100644 index 00000000..b11aadc3 --- /dev/null +++ b/src/llm-coding-tools-rig/src/absolute/read.rs @@ -0,0 +1,113 @@ +//! Read file tool using [`AbsolutePathResolver`]. + +use llm_coding_tools_core::operations::read_file; +use llm_coding_tools_core::path::AbsolutePathResolver; +use llm_coding_tools_core::{ToolContext, ToolError, ToolOutput}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; + +const DEFAULT_OFFSET: usize = 1; +const DEFAULT_LIMIT: usize = 2000; + +fn default_offset() -> usize { + DEFAULT_OFFSET +} + +fn default_limit() -> usize { + DEFAULT_LIMIT +} + +/// Arguments for the read file tool. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct ReadArgs { + /// Absolute path to the file to read. + pub file_path: String, + /// 1-indexed line number to start reading from (default: 1). + #[serde(default = "default_offset")] + pub offset: usize, + /// Maximum number of lines to return (default: 2000). + #[serde(default = "default_limit")] + pub limit: usize, +} + +/// Tool for reading file contents with optional line numbers. +#[derive(Debug, Clone, Default)] +pub struct ReadTool; + +impl ReadTool { + /// Creates a new read tool instance. + #[inline] + pub fn new() -> Self { + Self + } +} + +impl Tool for ReadTool { + const NAME: &'static str = "read"; + + type Error = ToolError; + type Args = ReadArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + let description = if LINE_NUMBERS { + "Read file contents with line numbers. Returns lines prefixed with L{number}: format." + } else { + "Read file contents. Returns raw file content without line number prefixes." + }; + ToolDefinition { + name: ::NAME.to_string(), + description: description.to_string(), + parameters: serde_json::to_value(schema_for!(ReadArgs)) + .expect("schema serialization should never fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let resolver = AbsolutePathResolver; + read_file::<_, LINE_NUMBERS>(&resolver, &args.file_path, args.offset, args.limit).await + } +} + +impl ToolContext for ReadTool { + const NAME: &'static str = "read"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::READ_ABSOLUTE + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write as _; + use tempfile::NamedTempFile; + + #[tokio::test] + async fn reads_file_with_line_numbers() { + let mut temp = NamedTempFile::new().unwrap(); + temp.write_all(b"hello\nworld\n").unwrap(); + let tool: ReadTool = ReadTool::new(); + let args = ReadArgs { + file_path: temp.path().to_string_lossy().to_string(), + offset: 1, + limit: 2000, + }; + let result = tool.call(args).await.unwrap(); + assert_eq!(result.content, "L1: hello\nL2: world"); + } + + #[tokio::test] + async fn rejects_relative_path() { + let tool: ReadTool = ReadTool::new(); + let args = ReadArgs { + file_path: "relative/path.txt".to_string(), + offset: 1, + limit: 100, + }; + let result = tool.call(args).await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } +} diff --git a/src/llm-coding-tools-rig/src/absolute/write.rs b/src/llm-coding-tools-rig/src/absolute/write.rs new file mode 100644 index 00000000..93cdafa7 --- /dev/null +++ b/src/llm-coding-tools-rig/src/absolute/write.rs @@ -0,0 +1,95 @@ +//! Write file tool using [`AbsolutePathResolver`]. + +use llm_coding_tools_core::operations::write_file; +use llm_coding_tools_core::path::AbsolutePathResolver; +use llm_coding_tools_core::{ToolContext, ToolError}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; + +/// Arguments for the write tool. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct WriteToolArgs { + /// Absolute path for the file to write. + pub file_path: String, + /// Content to write to the file. + pub content: String, +} + +/// Tool for writing content to files. +#[derive(Debug, Clone, Default)] +pub struct WriteTool; + +impl WriteTool { + /// Creates a new write tool instance. + #[inline] + pub fn new() -> Self { + Self + } +} + +impl Tool for WriteTool { + const NAME: &'static str = "write"; + + type Error = ToolError; + type Args = WriteToolArgs; + type Output = String; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Write content to a file, creating parent directories if needed. \ + Overwrites existing files." + .to_string(), + parameters: serde_json::to_value(schema_for!(WriteToolArgs)) + .expect("schema generation should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let resolver = AbsolutePathResolver; + write_file(&resolver, &args.file_path, &args.content).await + } +} + +impl ToolContext for WriteTool { + const NAME: &'static str = "write"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::WRITE_ABSOLUTE + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn writes_new_file() { + let temp = TempDir::new().unwrap(); + let file_path = temp.path().join("new.txt"); + let tool = WriteTool::new(); + let result = tool + .call(WriteToolArgs { + file_path: file_path.to_string_lossy().to_string(), + content: "hello".to_string(), + }) + .await + .unwrap(); + assert!(result.contains("5 bytes")); + } + + #[tokio::test] + async fn rejects_relative_path() { + let tool = WriteTool::new(); + let result = tool + .call(WriteToolArgs { + file_path: "relative/path.txt".to_string(), + content: "content".to_string(), + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } +} diff --git a/src/llm-coding-tools-rig/src/allowed/edit.rs b/src/llm-coding-tools-rig/src/allowed/edit.rs new file mode 100644 index 00000000..0e7933e7 --- /dev/null +++ b/src/llm-coding-tools-rig/src/allowed/edit.rs @@ -0,0 +1,130 @@ +//! Edit file tool using [`AllowedPathResolver`]. + +use llm_coding_tools_core::operations::edit_file; +use llm_coding_tools_core::path::AllowedPathResolver; +pub use llm_coding_tools_core::EditError; +use llm_coding_tools_core::{ToolContext, ToolResult}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::path::{Path, PathBuf}; + +/// Arguments for file editing. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct EditArgs { + /// Relative path to the file to modify (within allowed directories). + pub file_path: String, + /// Exact text to find and replace. + pub old_string: String, + /// Replacement text. + pub new_string: String, + /// Replace all occurrences (default false). + #[serde(default)] + pub replace_all: bool, +} + +/// Tool for making exact string replacements in files within allowed directories. +#[derive(Debug, Clone)] +pub struct EditTool { + resolver: AllowedPathResolver, +} + +impl EditTool { + /// Creates a new edit tool restricted to the given directories. + pub fn new(allowed_paths: impl IntoIterator>) -> ToolResult { + let paths: Vec = allowed_paths + .into_iter() + .map(|p| p.as_ref().to_path_buf()) + .collect(); + Ok(Self { + resolver: AllowedPathResolver::new(paths)?, + }) + } + + /// Creates a new edit tool with an existing resolver. + pub fn with_resolver(resolver: AllowedPathResolver) -> Self { + Self { resolver } + } +} + +impl Tool for EditTool { + const NAME: &'static str = "edit"; + + type Error = EditError; + type Args = EditArgs; + type Output = String; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Make exact string replacements in files within allowed directories. \ + Paths are relative to configured base directories." + .to_string(), + parameters: serde_json::to_value(schema_for!(EditArgs)) + .expect("EditArgs schema generation should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + edit_file( + &self.resolver, + &args.file_path, + &args.old_string, + &args.new_string, + args.replace_all, + ) + .await + } +} + +impl ToolContext for EditTool { + const NAME: &'static str = "edit"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::EDIT_ALLOWED + } +} + +#[cfg(test)] +mod tests { + use super::*; + use llm_coding_tools_core::ToolError; + use tempfile::TempDir; + + #[tokio::test] + async fn replaces_single_occurrence() { + let dir = TempDir::new().unwrap(); + std::fs::write(dir.path().join("test.txt"), "hello world").unwrap(); + + let tool = EditTool::new([dir.path()]).unwrap(); + let result = tool + .call(EditArgs { + file_path: "test.txt".to_string(), + old_string: "world".to_string(), + new_string: "rust".to_string(), + replace_all: false, + }) + .await + .unwrap(); + assert!(result.contains("1 occurrence")); + } + + #[tokio::test] + async fn rejects_path_traversal() { + let dir = TempDir::new().unwrap(); + let tool = EditTool::new([dir.path()]).unwrap(); + let result = tool + .call(EditArgs { + file_path: "../../../etc/passwd".to_string(), + old_string: "old".to_string(), + new_string: "new".to_string(), + replace_all: false, + }) + .await; + assert!(matches!( + result, + Err(EditError::Tool(ToolError::InvalidPath(_))) + )); + } +} diff --git a/src/llm-coding-tools-rig/src/allowed/glob.rs b/src/llm-coding-tools-rig/src/allowed/glob.rs new file mode 100644 index 00000000..bea6019d --- /dev/null +++ b/src/llm-coding-tools-rig/src/allowed/glob.rs @@ -0,0 +1,111 @@ +//! Glob pattern file finding tool using [`AllowedPathResolver`]. + +use llm_coding_tools_core::operations::glob_files; +use llm_coding_tools_core::path::AllowedPathResolver; +use llm_coding_tools_core::{GlobOutput, ToolContext, ToolError, ToolResult}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::path::{Path, PathBuf}; + +/// Arguments for the glob tool. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GlobArgs { + /// Glob pattern to match files against (e.g., "**/*.rs", "src/**/*.ts"). + pub pattern: String, + /// Relative directory path to search in (within allowed directories). + pub path: String, +} + +/// Tool for finding files matching glob patterns within allowed directories. +#[derive(Debug, Clone)] +pub struct GlobTool { + resolver: AllowedPathResolver, +} + +impl GlobTool { + /// Creates a new glob tool restricted to the given directories. + pub fn new(allowed_paths: impl IntoIterator>) -> ToolResult { + let paths: Vec = allowed_paths + .into_iter() + .map(|p| p.as_ref().to_path_buf()) + .collect(); + Ok(Self { + resolver: AllowedPathResolver::new(paths)?, + }) + } + + /// Creates a new glob tool with an existing resolver. + pub fn with_resolver(resolver: AllowedPathResolver) -> Self { + Self { resolver } + } +} + +impl Tool for GlobTool { + const NAME: &'static str = "glob"; + + type Error = ToolError; + type Args = GlobArgs; + type Output = GlobOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Find files matching a glob pattern within allowed directories. \ + Paths are relative to configured base directories." + .to_string(), + parameters: serde_json::to_value(schema_for!(GlobArgs)) + .expect("schema serialization should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + glob_files(&self.resolver, &args.pattern, &args.path) + } +} + +impl ToolContext for GlobTool { + const NAME: &'static str = "glob"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::GLOB_ALLOWED + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::{self, File}; + use tempfile::TempDir; + + #[tokio::test] + async fn finds_matching_files() { + let dir = TempDir::new().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + File::create(dir.path().join("src/lib.rs")).unwrap(); + + let tool = GlobTool::new([dir.path()]).unwrap(); + let result = tool + .call(GlobArgs { + pattern: "**/*.rs".to_string(), + path: ".".to_string(), + }) + .await + .unwrap(); + assert!(result.files.iter().any(|f| f.ends_with("lib.rs"))); + } + + #[tokio::test] + async fn rejects_path_traversal() { + let dir = TempDir::new().unwrap(); + let tool = GlobTool::new([dir.path()]).unwrap(); + let result = tool + .call(GlobArgs { + pattern: "*.rs".to_string(), + path: "../../../etc".to_string(), + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } +} diff --git a/src/llm-coding-tools-rig/src/allowed/grep.rs b/src/llm-coding-tools-rig/src/allowed/grep.rs new file mode 100644 index 00000000..0952b67b --- /dev/null +++ b/src/llm-coding-tools-rig/src/allowed/grep.rs @@ -0,0 +1,234 @@ +//! Grep content search tool using [`AllowedPathResolver`]. + +use llm_coding_tools_core::operations::grep_search; +use llm_coding_tools_core::path::AllowedPathResolver; +use llm_coding_tools_core::{ToolContext, ToolError, ToolOutput, ToolResult}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::fmt::Write; +use std::path::{Path, PathBuf}; + +const DEFAULT_LIMIT: usize = 100; +const MAX_LIMIT: usize = 2000; +const MAX_LINE_LENGTH: usize = 2000; + +fn default_limit() -> Option { + Some(DEFAULT_LIMIT) +} + +/// Arguments for the grep tool. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GrepArgs { + /// Regex pattern to search for in file contents. + pub pattern: String, + /// Relative directory path to search in (within allowed directories). + pub path: String, + /// Optional file glob filter (e.g., "*.rs", "*.{ts,tsx}"). + #[serde(default)] + pub include: Option, + /// Maximum number of matches to return (default: 100, max: 2000). + #[serde(default = "default_limit")] + pub limit: Option, +} + +/// Tool for searching file contents within allowed directories. +#[derive(Debug, Clone)] +pub struct GrepTool { + resolver: AllowedPathResolver, +} + +impl GrepTool { + /// Creates a new grep tool restricted to the given directories. + pub fn new(allowed_paths: impl IntoIterator>) -> ToolResult { + let paths: Vec = allowed_paths + .into_iter() + .map(|p| p.as_ref().to_path_buf()) + .collect(); + Ok(Self { + resolver: AllowedPathResolver::new(paths)?, + }) + } + + /// Creates a new grep tool with an existing resolver. + pub fn with_resolver(resolver: AllowedPathResolver) -> Self { + Self { resolver } + } +} + +impl Tool for GrepTool { + const NAME: &'static str = "grep"; + + type Error = ToolError; + type Args = GrepArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Search file contents using regex patterns within allowed directories. \ + Paths are relative to configured base directories." + .to_string(), + parameters: serde_json::to_value(schema_for!(GrepArgs)) + .expect("schema serialization should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let pattern = args.pattern.trim(); + if pattern.is_empty() { + return Err(ToolError::InvalidPattern( + "pattern must not be empty".into(), + )); + } + + let limit = args.limit.unwrap_or(DEFAULT_LIMIT).min(MAX_LIMIT); + if limit == 0 { + return Err(ToolError::Validation( + "limit must be greater than zero".into(), + )); + } + + let include = args.include.as_deref().and_then(|s| { + let trimmed = s.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed) + } + }); + + let result = grep_search(&self.resolver, pattern, include, &args.path, limit)?; + + if result.files.is_empty() { + return Ok(ToolOutput::new("No matches found.")); + } + + // Format output grouped by file + let mut output = String::with_capacity(4096); + let _ = writeln!(&mut output, "Found {} matches", result.match_count); + + for file in &result.files { + let _ = writeln!(&mut output, "\n{}:", file.path); + for m in &file.matches { + // Use floor_char_boundary to avoid panicking on UTF-8 multibyte boundaries + let truncated_text = if m.line_text.len() > MAX_LINE_LENGTH { + &m.line_text[..m.line_text.floor_char_boundary(MAX_LINE_LENGTH)] + } else { + &m.line_text + }; + if LINE_NUMBERS { + let _ = writeln!(&mut output, " L{}: {}", m.line_num, truncated_text); + } else { + let _ = writeln!(&mut output, " {}", truncated_text); + } + } + } + + if result.truncated { + let _ = write!(&mut output, "\n(Results truncated at {} matches)", limit); + } + + Ok(if result.truncated { + ToolOutput::truncated(output) + } else { + ToolOutput::new(output) + }) + } +} + +impl ToolContext for GrepTool { + const NAME: &'static str = "grep"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::GREP_ALLOWED + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn finds_matching_content() { + let dir = TempDir::new().unwrap(); + std::fs::write(dir.path().join("test.txt"), "hello world").unwrap(); + + let tool: GrepTool = GrepTool::new([dir.path()]).unwrap(); + let result = tool + .call(GrepArgs { + pattern: "hello".to_string(), + path: ".".to_string(), + include: None, + limit: None, + }) + .await + .unwrap(); + assert!(result.content.contains("Found 1 matches")); + assert!(result.content.contains("L1: hello world")); + } + + #[tokio::test] + async fn rejects_path_traversal() { + let dir = TempDir::new().unwrap(); + let tool: GrepTool = GrepTool::new([dir.path()]).unwrap(); + let result = tool + .call(GrepArgs { + pattern: "test".to_string(), + path: "../../../etc".to_string(), + include: None, + limit: None, + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } + + #[tokio::test] + async fn rejects_empty_pattern() { + let dir = TempDir::new().unwrap(); + let tool: GrepTool = GrepTool::new([dir.path()]).unwrap(); + let result = tool + .call(GrepArgs { + pattern: " ".to_string(), + path: ".".to_string(), + include: None, + limit: None, + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPattern(_)))); + } + + #[tokio::test] + async fn truncates_long_lines_at_utf8_boundary() { + let dir = TempDir::new().unwrap(); + + // Create a line that's > MAX_LINE_LENGTH (2000) bytes with multibyte chars at the boundary. + // Use 1998 ASCII chars + "日本語" (9 bytes for 3 chars) = 2007 bytes total. + // Truncating at byte 2000 would land inside the multibyte sequence without floor_char_boundary. + let long_line = format!("match_me {}{}", "a".repeat(1989), "日本語"); + assert!( + long_line.len() > 2000, + "test setup: line must exceed MAX_LINE_LENGTH" + ); + + std::fs::write(dir.path().join("utf8_test.txt"), &long_line).unwrap(); + + let tool: GrepTool = GrepTool::new([dir.path()]).unwrap(); + let result = tool + .call(GrepArgs { + pattern: "match_me".to_string(), + path: ".".to_string(), + include: None, + limit: None, + }) + .await + .unwrap(); + + // Should not panic and output should be valid UTF-8 + assert!(result.content.contains("Found 1 matches")); + assert!(result.content.contains("L1:")); + // The output should be valid UTF-8 (this is implicitly tested by using .contains on a String) + } +} diff --git a/src/llm-coding-tools-rig/src/allowed/mod.rs b/src/llm-coding-tools-rig/src/allowed/mod.rs new file mode 100644 index 00000000..76b56336 --- /dev/null +++ b/src/llm-coding-tools-rig/src/allowed/mod.rs @@ -0,0 +1,24 @@ +//! Tools using [`llm_coding_tools_core::path::AllowedPathResolver`]. +//! +//! These tools restrict file access to configured allowed directories. +//! Use for sandboxed file system access. +//! +//! # Available Tools +//! +//! - [`ReadTool`] - Read file contents within allowed paths +//! - [`WriteTool`] - Write file contents within allowed paths +//! - [`EditTool`] - Edit file with search/replace within allowed paths +//! - [`GlobTool`] - Find files by pattern within allowed paths +//! - [`GrepTool`] - Search file contents within allowed paths + +mod edit; +mod glob; +mod grep; +mod read; +mod write; + +pub use edit::{EditArgs, EditError, EditTool}; +pub use glob::{GlobArgs, GlobTool}; +pub use grep::{GrepArgs, GrepTool}; +pub use read::{ReadArgs, ReadTool}; +pub use write::{WriteTool, WriteToolArgs}; diff --git a/src/llm-coding-tools-rig/src/allowed/read.rs b/src/llm-coding-tools-rig/src/allowed/read.rs new file mode 100644 index 00000000..6ae3bf16 --- /dev/null +++ b/src/llm-coding-tools-rig/src/allowed/read.rs @@ -0,0 +1,131 @@ +//! Read file tool using [`AllowedPathResolver`]. + +use llm_coding_tools_core::operations::read_file; +use llm_coding_tools_core::path::AllowedPathResolver; +use llm_coding_tools_core::{ToolContext, ToolError, ToolOutput, ToolResult}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::path::{Path, PathBuf}; + +const DEFAULT_OFFSET: usize = 1; +const DEFAULT_LIMIT: usize = 2000; + +fn default_offset() -> usize { + DEFAULT_OFFSET +} + +fn default_limit() -> usize { + DEFAULT_LIMIT +} + +/// Arguments for the read file tool. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct ReadArgs { + /// Relative path to the file to read (within allowed directories). + pub file_path: String, + /// 1-indexed line number to start reading from (default: 1). + #[serde(default = "default_offset")] + pub offset: usize, + /// Maximum number of lines to return (default: 2000). + #[serde(default = "default_limit")] + pub limit: usize, +} + +/// Tool for reading file contents with optional line numbers. +/// +/// Restricts access to configured allowed directories. +#[derive(Debug, Clone)] +pub struct ReadTool { + resolver: AllowedPathResolver, +} + +impl ReadTool { + /// Creates a new read tool restricted to the given directories. + pub fn new(allowed_paths: impl IntoIterator>) -> ToolResult { + let paths: Vec = allowed_paths + .into_iter() + .map(|p| p.as_ref().to_path_buf()) + .collect(); + Ok(Self { + resolver: AllowedPathResolver::new(paths)?, + }) + } + + /// Creates a new read tool with an existing resolver. + pub fn with_resolver(resolver: AllowedPathResolver) -> Self { + Self { resolver } + } +} + +impl Tool for ReadTool { + const NAME: &'static str = "read"; + + type Error = ToolError; + type Args = ReadArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + let description = if LINE_NUMBERS { + "Read file contents with line numbers from allowed directories. \ + Paths are relative to configured base directories." + } else { + "Read file contents from allowed directories. \ + Paths are relative to configured base directories." + }; + ToolDefinition { + name: ::NAME.to_string(), + description: description.to_string(), + parameters: serde_json::to_value(schema_for!(ReadArgs)) + .expect("schema serialization should never fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + read_file::<_, LINE_NUMBERS>(&self.resolver, &args.file_path, args.offset, args.limit).await + } +} + +impl ToolContext for ReadTool { + const NAME: &'static str = "read"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::READ_ALLOWED + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn reads_file_with_line_numbers() { + let dir = TempDir::new().unwrap(); + let file_path = dir.path().join("test.txt"); + std::fs::write(&file_path, "hello\nworld\n").unwrap(); + + let tool: ReadTool = ReadTool::new([dir.path()]).unwrap(); + let args = ReadArgs { + file_path: "test.txt".to_string(), + offset: 1, + limit: 2000, + }; + let result = tool.call(args).await.unwrap(); + assert_eq!(result.content, "L1: hello\nL2: world"); + } + + #[tokio::test] + async fn rejects_path_traversal() { + let dir = TempDir::new().unwrap(); + let tool: ReadTool = ReadTool::new([dir.path()]).unwrap(); + let args = ReadArgs { + file_path: "../../../etc/passwd".to_string(), + offset: 1, + limit: 100, + }; + let result = tool.call(args).await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } +} diff --git a/src/llm-coding-tools-rig/src/allowed/write.rs b/src/llm-coding-tools-rig/src/allowed/write.rs new file mode 100644 index 00000000..a3f3fd8f --- /dev/null +++ b/src/llm-coding-tools-rig/src/allowed/write.rs @@ -0,0 +1,108 @@ +//! Write file tool using [`AllowedPathResolver`]. + +use llm_coding_tools_core::operations::write_file; +use llm_coding_tools_core::path::AllowedPathResolver; +use llm_coding_tools_core::{ToolContext, ToolError, ToolResult}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::path::{Path, PathBuf}; + +/// Arguments for the write tool. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct WriteToolArgs { + /// Relative path for the file to write (within allowed directories). + pub file_path: String, + /// Content to write to the file. + pub content: String, +} + +/// Tool for writing content to files within allowed directories. +#[derive(Debug, Clone)] +pub struct WriteTool { + resolver: AllowedPathResolver, +} + +impl WriteTool { + /// Creates a new write tool restricted to the given directories. + pub fn new(allowed_paths: impl IntoIterator>) -> ToolResult { + let paths: Vec = allowed_paths + .into_iter() + .map(|p| p.as_ref().to_path_buf()) + .collect(); + Ok(Self { + resolver: AllowedPathResolver::new(paths)?, + }) + } + + /// Creates a new write tool with an existing resolver. + pub fn with_resolver(resolver: AllowedPathResolver) -> Self { + Self { resolver } + } +} + +impl Tool for WriteTool { + const NAME: &'static str = "write"; + + type Error = ToolError; + type Args = WriteToolArgs; + type Output = String; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Write content to a file within allowed directories. \ + Paths are relative to configured base directories." + .to_string(), + parameters: serde_json::to_value(schema_for!(WriteToolArgs)) + .expect("schema generation should not fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + write_file(&self.resolver, &args.file_path, &args.content).await + } +} + +impl ToolContext for WriteTool { + const NAME: &'static str = "write"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::WRITE_ALLOWED + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn writes_new_file() { + let dir = TempDir::new().unwrap(); + let tool = WriteTool::new([dir.path()]).unwrap(); + let result = tool + .call(WriteToolArgs { + file_path: "new.txt".to_string(), + content: "hello".to_string(), + }) + .await + .unwrap(); + assert!(result.contains("5 bytes")); + assert!(dir.path().join("new.txt").exists()); + } + + #[tokio::test] + async fn rejects_path_traversal() { + let dir = TempDir::new().unwrap(); + let tool = WriteTool::new([dir.path()]).unwrap(); + let result = tool + .call(WriteToolArgs { + file_path: "../../../tmp/escape.txt".to_string(), + content: "content".to_string(), + }) + .await; + assert!(matches!(result, Err(ToolError::InvalidPath(_)))); + } +} diff --git a/src/llm-coding-tools-rig/src/bash.rs b/src/llm-coding-tools-rig/src/bash.rs new file mode 100644 index 00000000..89671eaa --- /dev/null +++ b/src/llm-coding-tools-rig/src/bash.rs @@ -0,0 +1,139 @@ +//! Shell command execution tool. +//! +//! Provides cross-platform shell command execution with timeout support. + +use llm_coding_tools_core::operations::execute_command; +use llm_coding_tools_core::{BashOutput, ToolContext, ToolError, ToolOutput}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::path::Path; +use std::time::Duration; + +/// Default timeout: 2 minutes. +const DEFAULT_TIMEOUT_MS: u64 = 120_000; + +fn default_timeout_ms() -> u64 { + DEFAULT_TIMEOUT_MS +} + +/// Arguments for the bash tool. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct BashArgs { + /// The shell command to execute. + pub command: String, + /// Optional working directory (must be absolute path). + pub workdir: Option, + /// Timeout in milliseconds (default: 120000). + #[serde(default = "default_timeout_ms")] + pub timeout_ms: u64, +} + +/// Tool for executing shell commands. +/// +/// Uses bash on Unix, cmd on Windows. +#[derive(Debug, Clone, Copy, Default)] +pub struct BashTool; + +impl BashTool { + /// Creates a new bash tool instance. + #[inline] + pub fn new() -> Self { + Self + } +} + +impl Tool for BashTool { + const NAME: &'static str = "bash"; + + type Error = ToolError; + type Args = BashArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Execute a shell command with optional working directory and timeout." + .to_string(), + parameters: serde_json::to_value(schema_for!(BashArgs)) + .expect("schema serialization should never fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let workdir = args.workdir.as_ref().map(Path::new); + let timeout = Duration::from_millis(args.timeout_ms); + + let result = execute_command(&args.command, workdir, timeout).await?; + Ok(format_bash_output(&result)) + } +} + +impl ToolContext for BashTool { + const NAME: &'static str = "bash"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::BASH + } +} + +fn format_bash_output(output: &BashOutput) -> ToolOutput { + let mut content = String::new(); + + if !output.stdout.is_empty() { + content.push_str(&output.stdout); + } + if !output.stderr.is_empty() { + if !content.is_empty() { + content.push('\n'); + } + content.push_str("[stderr]\n"); + content.push_str(&output.stderr); + } + + if let Some(code) = output.exit_code { + if code != 0 { + if !content.is_empty() { + content.push('\n'); + } + content.push_str(&format!("[exit code: {}]", code)); + } + } + + ToolOutput::new(content) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn executes_echo() { + let tool = BashTool::new(); + let args = BashArgs { + command: "echo hello".to_string(), + workdir: None, + timeout_ms: 5000, + }; + let result = tool.call(args).await.unwrap(); + assert!(result.content.contains("hello")); + } + + #[tokio::test] + async fn timeout_returns_error() { + let tool = BashTool::new(); + let cmd = if cfg!(target_os = "windows") { + "ping -n 10 127.0.0.1" + } else { + "sleep 10" + }; + let args = BashArgs { + command: cmd.to_string(), + workdir: None, + timeout_ms: 100, + }; + let result = tool.call(args).await; + assert!(matches!(result, Err(ToolError::Timeout(_)))); + } +} diff --git a/src/llm-coding-tools-rig/src/lib.rs b/src/llm-coding-tools-rig/src/lib.rs new file mode 100644 index 00000000..ea017183 --- /dev/null +++ b/src/llm-coding-tools-rig/src/lib.rs @@ -0,0 +1,89 @@ +//! Rig framework Tool implementations for coding tools. +//! +//! This crate provides `rig_core::tool::Tool` implementations wrapping +//! the core operations from [`llm_coding_tools_core`]. +//! +//! # Module Organization +//! +//! - [`absolute`] - Tools requiring absolute paths (no path restriction) +//! - [`allowed`] - Tools restricted to allowed directories +//! - Standalone tools (bash, task, todo, webfetch) at crate root +//! +//! # Example +//! +//! ```ignore +//! use llm_coding_tools_rig::absolute::ReadTool; +//! use llm_coding_tools_rig::BashTool; +//! ``` + +#![warn(missing_docs)] + +pub mod absolute; +pub mod allowed; +pub mod bash; +pub mod task; +pub mod todo; +pub mod webfetch; + +// Re-export core types for convenience +pub use llm_coding_tools_core::{ToolError, ToolOutput, ToolResult}; + +// Re-export context module and ToolContext trait for convenience +pub use llm_coding_tools_core::context; +pub use llm_coding_tools_core::ToolContext; + +// Re-export PreambleBuilder and Substitute from core +pub use llm_coding_tools_core::{PreambleBuilder, Substitute}; + +// Re-export path resolvers +pub use llm_coding_tools_core::path::{AbsolutePathResolver, AllowedPathResolver, PathResolver}; + +// Re-export core operation types used by tools +pub use llm_coding_tools_core::{ + BashOutput, EditError, GlobOutput, GrepFileMatches, GrepLineMatch, GrepOutput, + MockTaskExecutor, TaskExecutor, TaskResult, Todo, TodoPriority, TodoState, TodoStatus, + WebFetchOutput, +}; + +// Re-export absolute module tool types +pub use absolute::{ + EditArgs, EditTool, GlobArgs, GlobTool, GrepArgs, GrepTool, ReadArgs, ReadTool, WriteTool, + WriteToolArgs, +}; + +/// Re-export allowed module tool types (namespaced to avoid conflicts) +pub mod allowed_tools { + pub use crate::allowed::{ + EditArgs, EditError, EditTool, GlobArgs, GlobTool, GrepArgs, GrepTool, ReadArgs, ReadTool, + WriteTool, WriteToolArgs, + }; +} + +// Re-export standalone tools +pub use bash::{BashArgs, BashTool}; +pub use task::{TaskArgs, TaskTool}; +pub use todo::{TodoReadArgs, TodoReadTool, TodoTools, TodoWriteArgs, TodoWriteTool}; +pub use webfetch::{WebFetchArgs, WebFetchTool}; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn preamble_builder_with_real_tools() { + let mut pb = PreambleBuilder::::new(); + let read: absolute::ReadTool = pb.track(absolute::ReadTool::new()); + let bash = pb.track(BashTool::new()); + + let preamble = pb.build(); + + assert!(preamble.contains("## Read Tool")); + assert!(preamble.contains("## Bash Tool")); + assert!(preamble.contains("absolute path")); // From READ_ABSOLUTE + + // Tools are returned unchanged + assert_eq!( as rig::tool::Tool>::NAME, "read"); + let _ = read; + let _ = bash; + } +} diff --git a/src/llm-coding-tools-rig/src/task.rs b/src/llm-coding-tools-rig/src/task.rs new file mode 100644 index 00000000..f2932cfa --- /dev/null +++ b/src/llm-coding-tools-rig/src/task.rs @@ -0,0 +1,128 @@ +//! Task tool for launching autonomous sub-agents. +//! +//! Provides [`TaskTool`] for spawning sub-agents to handle complex tasks. + +use llm_coding_tools_core::{ToolContext, ToolError, ToolOutput}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::sync::Arc; + +// Re-export core types +pub use llm_coding_tools_core::{ + MockTaskExecutor, TaskArgs as CoreTaskArgs, TaskExecutor, TaskResult, +}; + +/// Arguments for the task tool (with JsonSchema for rig). +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct TaskArgs { + /// Short 3-5 word task description. + pub description: String, + /// Detailed instructions for the sub-agent. + pub prompt: String, + /// Type of agent to use (e.g., "general", "coder"). + pub subagent_type: String, + /// Existing session to continue. + #[serde(default)] + pub session_id: Option, +} + +impl From for CoreTaskArgs { + fn from(args: TaskArgs) -> Self { + CoreTaskArgs { + description: args.description, + prompt: args.prompt, + subagent_type: args.subagent_type, + session_id: args.session_id, + } + } +} + +/// Tool for delegating tasks to sub-agents. +/// +/// Generic over the executor implementation. +#[derive(Debug, Clone)] +pub struct TaskTool { + executor: Arc, +} + +impl TaskTool { + /// Creates a new task tool with the given executor. + pub fn new(executor: Arc) -> Self { + Self { executor } + } +} + +impl TaskTool { + /// Creates a task tool with mock executor for testing. + pub fn with_mock() -> (Self, Arc) { + let executor = Arc::new(MockTaskExecutor::new()); + (Self::new(executor.clone()), executor) + } +} + +impl Tool for TaskTool { + const NAME: &'static str = "task"; + + type Error = ToolError; + type Args = TaskArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Delegate a task to a specialized sub-agent.".to_string(), + parameters: serde_json::to_value(schema_for!(TaskArgs)) + .expect("schema serialization should never fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let core_args = CoreTaskArgs::from(args); + let result = self.executor.execute(&core_args).await?; + Ok(ToolOutput::new(result.format())) + } +} + +impl ToolContext for TaskTool { + const NAME: &'static str = "task"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::TASK + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn mock_executor_works() { + let (tool, _executor) = TaskTool::with_mock(); + let args = TaskArgs { + description: "test task".to_string(), + prompt: "do something".to_string(), + subagent_type: "general".to_string(), + session_id: None, + }; + let result = tool.call(args).await.unwrap(); + assert!(result.content.contains("test task")); + assert!(result.content.contains("completed")); + } + + #[tokio::test] + async fn custom_mock_response() { + let (tool, executor) = TaskTool::with_mock(); + executor.set_response("custom", "Custom result!"); + + let args = TaskArgs { + description: "custom".to_string(), + prompt: "details".to_string(), + subagent_type: "coder".to_string(), + session_id: None, + }; + let result = tool.call(args).await.unwrap(); + assert!(result.content.contains("Custom result!")); + } +} diff --git a/src/llm-coding-tools-rig/src/todo.rs b/src/llm-coding-tools-rig/src/todo.rs new file mode 100644 index 00000000..e3e66b14 --- /dev/null +++ b/src/llm-coding-tools-rig/src/todo.rs @@ -0,0 +1,197 @@ +//! Todo list management tools. +//! +//! Provides tools for reading and writing todo items. + +use llm_coding_tools_core::operations::{read_todos, write_todos}; +use llm_coding_tools_core::{ToolContext, ToolError, ToolOutput}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; + +// Re-export core types +pub use llm_coding_tools_core::{Todo, TodoPriority, TodoState, TodoStatus}; + +/// Arguments for writing todos. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct TodoWriteArgs { + /// The complete list of todos to set. + pub todos: Vec, +} + +/// Arguments for reading todos (empty). +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct TodoReadArgs {} + +/// Tool for writing/replacing the todo list. +#[derive(Debug, Clone)] +pub struct TodoWriteTool { + state: TodoState, +} + +impl TodoWriteTool { + /// Creates a new todo write tool with the given state. + pub fn new(state: TodoState) -> Self { + Self { state } + } +} + +impl Tool for TodoWriteTool { + const NAME: &'static str = "todowrite"; + + type Error = ToolError; + type Args = TodoWriteArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Replace the todo list with new items.".to_string(), + parameters: serde_json::to_value(schema_for!(TodoWriteArgs)) + .expect("schema serialization should never fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let message = write_todos(&self.state, args.todos)?; + Ok(ToolOutput::new(message)) + } +} + +/// Tool for reading the current todo list. +#[derive(Debug, Clone)] +pub struct TodoReadTool { + state: TodoState, +} + +impl TodoReadTool { + /// Creates a new todo read tool with the given state. + pub fn new(state: TodoState) -> Self { + Self { state } + } +} + +impl Tool for TodoReadTool { + const NAME: &'static str = "todoread"; + + type Error = ToolError; + type Args = TodoReadArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: "Read the current todo list.".to_string(), + parameters: serde_json::to_value(schema_for!(TodoReadArgs)) + .expect("schema serialization should never fail"), + } + } + + async fn call(&self, _args: Self::Args) -> Result { + let content = read_todos(&self.state); + Ok(ToolOutput::new(content)) + } +} + +impl ToolContext for TodoWriteTool { + const NAME: &'static str = "todowrite"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::TODO_WRITE + } +} + +impl ToolContext for TodoReadTool { + const NAME: &'static str = "todoread"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::TODO_READ + } +} + +/// Helper for creating paired todo tools with shared state. +pub struct TodoTools { + /// Tool for writing todos. + pub write: TodoWriteTool, + /// Tool for reading todos. + pub read: TodoReadTool, +} + +impl TodoTools { + /// Creates new todo tools with shared state. + pub fn new() -> Self { + let state = TodoState::new(); + Self { + write: TodoWriteTool::new(state.clone()), + read: TodoReadTool::new(state), + } + } + + /// Creates todo tools with existing state. + pub fn with_state(state: TodoState) -> Self { + Self { + write: TodoWriteTool::new(state.clone()), + read: TodoReadTool::new(state), + } + } +} + +impl Default for TodoTools { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_todo(id: &str, status: TodoStatus) -> Todo { + Todo { + id: id.to_string(), + content: format!("Task {id}"), + status, + priority: TodoPriority::Medium, + } + } + + #[tokio::test] + async fn write_and_read_todos() { + let tools = TodoTools::new(); + + let write_args = TodoWriteArgs { + todos: vec![ + make_todo("1", TodoStatus::Pending), + make_todo("2", TodoStatus::Completed), + ], + }; + let write_result = tools.write.call(write_args).await.unwrap(); + assert!(write_result.content.contains("2 task(s)")); + + let read_result = tools.read.call(TodoReadArgs {}).await.unwrap(); + assert!(read_result.content.contains("Task 1")); + assert!(read_result.content.contains("Task 2")); + } + + #[tokio::test] + async fn shared_state_works() { + let state = TodoState::new(); + let write_tool = TodoWriteTool::new(state.clone()); + let read_tool = TodoReadTool::new(state); + + let write_args = TodoWriteArgs { + todos: vec![make_todo("shared", TodoStatus::InProgress)], + }; + write_tool.call(write_args).await.unwrap(); + + let read_result = read_tool.call(TodoReadArgs {}).await.unwrap(); + assert!(read_result.content.contains("shared")); + } + + #[tokio::test] + async fn empty_list_returns_no_tasks() { + let tools = TodoTools::new(); + let result = tools.read.call(TodoReadArgs {}).await.unwrap(); + assert_eq!(result.content, "No tasks."); + } +} diff --git a/src/llm-coding-tools-rig/src/webfetch.rs b/src/llm-coding-tools-rig/src/webfetch.rs new file mode 100644 index 00000000..824fc7e2 --- /dev/null +++ b/src/llm-coding-tools-rig/src/webfetch.rs @@ -0,0 +1,115 @@ +//! Web content fetching tool. +//! +//! Provides URL fetching with format conversion support. + +use llm_coding_tools_core::operations::fetch_url; +use llm_coding_tools_core::{ToolContext, ToolError, ToolOutput}; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use std::time::Duration; + +/// Default timeout: 30 seconds. +const DEFAULT_TIMEOUT_MS: u64 = 30_000; + +fn default_timeout_ms() -> u64 { + DEFAULT_TIMEOUT_MS +} + +/// Arguments for the webfetch tool. +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct WebFetchArgs { + /// The URL to fetch. + pub url: String, + /// Timeout in milliseconds (default: 30000). + #[serde(default = "default_timeout_ms")] + pub timeout_ms: u64, +} + +/// Tool for fetching web content. +/// +/// - HTML is converted to markdown +/// - JSON is pretty-printed +/// - Other content returned as-is +#[derive(Debug, Clone)] +pub struct WebFetchTool { + client: reqwest::Client, +} + +impl Default for WebFetchTool { + fn default() -> Self { + Self::new() + } +} + +impl WebFetchTool { + /// Creates a new webfetch tool with default client. + pub fn new() -> Self { + Self { + client: reqwest::Client::new(), + } + } + + /// Creates a webfetch tool with a custom client. + pub fn with_client(client: reqwest::Client) -> Self { + Self { client } + } +} + +impl Tool for WebFetchTool { + const NAME: &'static str = "webfetch"; + + type Error = ToolError; + type Args = WebFetchArgs; + type Output = ToolOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: ::NAME.to_string(), + description: + "Fetch content from a URL. HTML is converted to markdown, JSON is prettified." + .to_string(), + parameters: serde_json::to_value(schema_for!(WebFetchArgs)) + .expect("schema serialization should never fail"), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let timeout = Duration::from_millis(args.timeout_ms); + let result = fetch_url(&self.client, &args.url, timeout).await?; + + let content = format!( + "[{} - {} bytes]\n\n{}", + result.content_type, result.byte_length, result.content + ); + Ok(ToolOutput::new(content)) + } +} + +impl ToolContext for WebFetchTool { + const NAME: &'static str = "webfetch"; + + fn context(&self) -> &'static str { + llm_coding_tools_core::context::WEBFETCH + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn creates_with_default_client() { + let _tool = WebFetchTool::new(); + } + + #[test] + fn creates_with_custom_client() { + let client = reqwest::Client::builder() + .user_agent("test") + .build() + .unwrap(); + let _tool = WebFetchTool::with_client(client); + } +} diff --git a/src/rig-coding-tools/Cargo.toml b/src/rig-coding-tools/Cargo.toml deleted file mode 100644 index 401a3ab6..00000000 --- a/src/rig-coding-tools/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "rig-coding-tools" -version = "0.1.0" -edition = "2021" -description = "Basic coding tools for rig based LLM agents" -repository = "https://github.com/Sewer56/rig-coding-tools" -license = "MIT" -include = ["src/**/*"] -readme = "README.MD" - -[dependencies] -rig-core = { version = "0.27", default-features = false, features = ["reqwest-rustls"] } - -[dev-dependencies] -tokio = { version = "1", features = ["full"] } -reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } -anyhow = "1.0" - - diff --git a/src/rig-coding-tools/README.MD b/src/rig-coding-tools/README.MD deleted file mode 100644 index 51402cbd..00000000 --- a/src/rig-coding-tools/README.MD +++ /dev/null @@ -1,54 +0,0 @@ -# rig-coding-tools - -[![Crates.io](https://img.shields.io/crates/v/rig-coding-tools.svg)](https://crates.io/crates/rig-coding-tools) -[![Docs.rs](https://docs.rs/rig-coding-tools/badge.svg)](https://docs.rs/rig-coding-tools) -[![CI](https://github.com/Sewer56/rig-coding-tools/actions/workflows/rust.yml/badge.svg)](https://github.com/Sewer56/rig-coding-tools/actions) - - - -Basic coding tools for rig based LLM agents - -## Features - - - -- Feature 1 -- Feature 2 -- Feature 3 - -## Installation - -Add this to your `Cargo.toml`: - -```toml -[dependencies] -rig-coding-tools = "0.1.0" -``` -### Feature Flags - -| Feature | Description | -| ------- | ----------- | -| `std` | Enable standard library support (disabled by default for `no_std` compatibility) | - -## Usage - - - -### Basic Example - -```rust -// TODO: Add your basic example here -``` - -### Advanced Example - -```rust -// TODO: Add a more advanced example here -``` - -## License - -Licensed under Apache 2.0 diff --git a/src/rig-coding-tools/src/lib.rs b/src/rig-coding-tools/src/lib.rs deleted file mode 100644 index abc68bed..00000000 --- a/src/rig-coding-tools/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ -#![doc = include_str!(concat!("../", env!("CARGO_PKG_README")))] diff --git a/src/test/codex b/src/test/codex deleted file mode 160000 index 810ebe0d..00000000 --- a/src/test/codex +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 810ebe0d2b23cdf29f65e6ca50ee46fa1c24a877