From f9762d8e4adb7c6e4dda75cfa60dc67faeed1bcb Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 15:24:38 +0200 Subject: [PATCH 01/27] feat: Mermaid PR architecture-diff action (level-1 default, nested opt-in) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the webview/Playwright PNG approach with an inline Mermaid diagram that GitHub renders natively in the PR comment โ€” no image, no orphan branch, no contents:write, and fork-friendly. How it works: - Resolve a base ("before") analysis: use the committed .codeboarding/analysis.json at the PR base if present, else generate one via a full engine run on the base commit. - Analyze the PR head incrementally, seeded from the base (stable component ids), falling back to a full run on cache miss. - scripts/diff_to_mermaid.py diffs the two analyses (name-based matching; relation label change => modified) and emits a graph LR with nodes colored via classDef/class and arrows via positional linkStyle: green added, yellow modified, red dashed deleted. Escaping, deleted-namespace keying, and a size guard (GitHub's ~500-edge / 50k-char cap -> changed-only or text fallback). Rendering: - Level 1 (flat, top-level) is the default โ€” readable inline, never trips the size cap. - nested: true draws depth>1 sub-components as subgraphs (leaf nodes filled, parent containers outlined). Optional --font-size/--node-padding/spacing emit an %%{init}%% directive to enlarge nodes. scripts/run_local.sh mirrors the action for local iteration (fast diff-only or full pipeline) and writes a browser HTML preview rendered with mermaid.js. --- .github/workflows/example-usage.yml | 132 +---- .gitignore | 3 + README.md | 173 ++++--- action.yml | 758 +++++++++++++--------------- scripts/diff_to_mermaid.py | 449 ++++++++++++++++ scripts/run_local.sh | 169 +++++++ 6 files changed, 1079 insertions(+), 605 deletions(-) create mode 100644 scripts/diff_to_mermaid.py create mode 100755 scripts/run_local.sh diff --git a/.github/workflows/example-usage.yml b/.github/workflows/example-usage.yml index 1f75c83..6fb9f78 100644 --- a/.github/workflows/example-usage.yml +++ b/.github/workflows/example-usage.yml @@ -1,126 +1,22 @@ -name: Example Usage of CodeBoarding Action +name: Architecture diff on: - workflow_dispatch: - inputs: - repository_url: - description: 'Repository URL to test with' - required: false - default: 'https://github.com/microsoft/markitdown' - type: string - source_branch: - description: 'Source branch for comparison' - required: false - default: 'main' - type: string - target_branch: - description: 'Target branch for comparison' - required: false - default: 'develop' - type: string - output_format: - description: 'Output format for documentation' - required: false - default: '.md' - type: choice - options: - - '.md' - - '.rst' - pull_request: - branches: [ main, master ] - types: [opened, synchronize, reopened] - - schedule: - # Run daily at 2 AM UTC - - cron: '0 2 * * *' + types: [opened, synchronize, reopened, ready_for_review] + +# Only a PR comment is posted โ€” no image is pushed โ€” so contents:write is not needed. +permissions: + pull-requests: write jobs: - update-docs-action-usage: + architecture-diff: runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - + if: github.event.pull_request.draft == false + timeout-minutes: 60 steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - token: ${{ secrets.GITHUB_TOKEN }} - fetch-depth: 0 # Required to access branch history - - # Determine branches based on context - - name: Set branch variables - id: set-branches - run: | - if [ "${{ github.event_name }}" = "pull_request" ]; then - echo "source_branch=${{ github.head_ref }}" >> $GITHUB_OUTPUT - echo "target_branch=${{ github.base_ref }}" >> $GITHUB_OUTPUT - elif [ "${{ github.event.inputs.source_branch }}" != "" ] && [ "${{ github.event.inputs.target_branch }}" != "" ]; then - echo "source_branch=${{ github.event.inputs.source_branch }}" >> $GITHUB_OUTPUT - echo "target_branch=${{ github.event.inputs.target_branch }}" >> $GITHUB_OUTPUT - else - # Default to current branch and main - echo "source_branch=${{ github.ref_name }}" >> $GITHUB_OUTPUT - echo "target_branch=main" >> $GITHUB_OUTPUT - fi - - - name: Fetch CodeBoarding Documentation - id: codeboarding - uses: ./ - with: - repository_url: ${{ github.event.inputs.repository_url }} - source_branch: ${{ steps.set-branches.outputs.source_branch }} - target_branch: ${{ steps.set-branches.outputs.target_branch }} - output_directory: 'docs' - output_format: ${{ github.event.inputs.output_format || '.md' }} - - - name: Display Action Results - run: | - echo "Documentation files created: ${{ steps.codeboarding.outputs.markdown_files_created }}" - echo "JSON files created: ${{ steps.codeboarding.outputs.json_files_created }}" - echo "Documentation directory: ${{ steps.codeboarding.outputs.output_directory }}" - echo "JSON directory: ${{ steps.codeboarding.outputs.json_directory }}" - echo "Has changes: ${{ steps.codeboarding.outputs.has_changes }}" - - # Check if we have any changes to commit - - name: Check for changes - id: git-changes - run: | - if [ -n "$(git status --porcelain)" ]; then - echo "has_git_changes=true" >> $GITHUB_OUTPUT - else - echo "has_git_changes=false" >> $GITHUB_OUTPUT - fi - - - name: Create Pull Request - if: steps.git-changes.outputs.has_git_changes == 'true' && steps.codeboarding.outputs.has_changes == 'true' - uses: peter-evans/create-pull-request@v5 + - uses: codeboarding/codeboarding-action@v1 with: - token: ${{ secrets.GITHUB_TOKEN }} - commit-message: "docs: update codeboarding documentation" - title: "๐Ÿ“š CodeBoarding Documentation Update" - body: | - ## ๐Ÿ“š Documentation Update - - This PR contains updated documentation files fetched from the CodeBoarding service. - - ### ๐Ÿ“Š Summary - - **Documentation files created/updated**: ${{ steps.codeboarding.outputs.markdown_files_created }} - - **JSON files created/updated**: ${{ steps.codeboarding.outputs.json_files_created }} - - **Documentation directory**: `${{ steps.codeboarding.outputs.output_directory }}/` - - **JSON directory**: `${{ steps.codeboarding.outputs.json_directory }}/` - - **Source branch**: `${{ steps.set-branches.outputs.source_branch }}` - - **Target branch**: `${{ steps.set-branches.outputs.target_branch }}` - - **Output format**: `${{ github.event.inputs.output_format || '.md' }}` - - **Repository analyzed**: `${{ steps.codeboarding.outputs.repo_url }}` - - ### ๐Ÿ” Changes - Files have been updated with fresh documentation content based on code changes between branches. - - --- - - ๐Ÿค– This PR was automatically generated by the CodeBoarding documentation update workflow. - branch: docs/codeboarding-update - base: ${{ steps.set-branches.outputs.target_branch }} - delete-branch: true + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} + # depth_level: '1' # 1-3, higher = more detail + # diagram_direction: 'LR' # LR | TD | TB | RL | BT + # changed_only: 'false' # 'true' to draw only changed components diff --git a/.gitignore b/.gitignore index 865fddd..ea8864f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ test_response.json test_codeboarding/ +# Local test harness output (scripts/run_local.sh) +.cb-local/ + # Environment files .env diff --git a/README.md b/README.md index 043c743..3ba9387 100644 --- a/README.md +++ b/README.md @@ -1,111 +1,134 @@
CodeBoarding Logo - - # CodeBoarding [Diagram-First Documentation] - - [![GitHub Action](https://img.shields.io/badge/GitHub-Action-blue?logo=github-actions)](https://github.com/marketplace/actions/codeboarding-diagram-first-documentation) + + # CodeBoarding Architecture Diff (Mermaid) + + Posts a PR comment with a **Mermaid** architecture diagram showing which components changed โ€” **green** added, **yellow** modified, **red** deleted โ€” for both nodes and arrows.
-Generates diagram-first visualizations of your codebase using static analysis and large language models. +## What it does + +On every pull request, this action: + +1. Resolves a **base ("before") analysis**: it reads the `.codeboarding/analysis.json` committed at the PR base commit if one exists; otherwise it runs a full CodeBoarding analysis on the base commit to produce one. +2. Runs an **incremental analysis on the PR head**, seeded from the base analysis โ€” only LLM-calling the components whose code actually changed, so a typical PR costs a handful of LLM calls. +3. **Diffs the two analyses** and renders the architecture graph as a Mermaid block with changed components and relations colored: + - **green** โ€” added + - **yellow** โ€” modified + - **red** (dashed) โ€” deleted +4. Posts a sticky PR comment containing the Mermaid block. **GitHub renders the diagram inline** โ€” no image, no Playwright, no extra branch. ## Usage ```yaml -name: Generate Documentation +name: Architecture diff on: - push: - branches: [ main ] pull_request: - branches: [ main ] - types: [opened, synchronize, reopened] + types: [opened, synchronize, reopened, ready_for_review] + +permissions: + pull-requests: write # the only permission needed โ€” nothing is pushed jobs: - documentation: + diagram: runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + timeout-minutes: 60 steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Required to access branch history - - - name: Generate Documentation - uses: codeboarding/codeboarding-ghaction@v1 + - uses: codeboarding/codeboarding-action@v1 with: - repository_url: ${{ github.server_url }}/${{ github.repository }} - source_branch: ${{ github.head_ref || github.ref_name }} - target_branch: ${{ github.base_ref || 'main' }} - output_directory: 'docs' - output_format: '.md' - - - name: Upload Documentation - uses: actions/upload-artifact@v4 - with: - name: documentation - path: | - docs/ - .codeboarding/ + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} ``` +You need **one secret**: an LLM API key. OpenRouter is the default; pass your own model via the `agent_model` / `parsing_model` inputs if you prefer. + ## Inputs -| Input | Description | Required | Default | -|-------|-------------|----------|---------| -| `repository_url` | Repository URL for which documentation will be generated | Yes | - | -| `source_branch` | Source branch for comparison (typically the PR branch) | Yes | - | -| `target_branch` | Target branch for comparison (typically the base branch) | Yes | - | -| `output_directory` | Directory where documentation files will be saved | No | `docs` | -| `output_format` | Format for documentation files (either `.md` or `.rst`) | No | `.md` | +| Input | Default | Description | +|---|---|---| +| `llm_api_key` | (required) | LLM API key. Currently OpenRouter (`OPENROUTER_API_KEY`). | +| `github_token` | `${{ github.token }}` | Token used to post the comment. | +| `engine_ref` | `main` | Git ref of `CodeBoarding/CodeBoarding`. Pin in production. | +| `depth_level` | `1` | Diagram depth (1โ€“3). Higher = slower + more detail. | +| `agent_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for analysis. | +| `parsing_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for parsing. | +| `comment_header` | `Architecture review` | Header line of the PR comment. | +| `diagram_direction` | `LR` | Mermaid layout direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | +| `changed_only` | `false` | Draw only changed components and their incident edges. | +| `nested` | `false` | Draw depth>1 sub-components as nested subgraphs (pair with `depth_level >= 2`). | ## Outputs | Output | Description | -|--------|-------------| -| `markdown_files_created` | Number of documentation files created | -| `json_files_created` | Number of JSON files created | -| `output_directory` | Directory where documentation files were saved | -| `json_directory` | Directory where JSON files were saved (always `.codeboarding`) | -| `has_changes` | Whether any files were created or changed | +|---|---| +| `diagram_md` | Path to the rendered ```` ```mermaid ```` block in the runner workspace. | +| `n_changed` | Number of top-level components added/modified/deleted. | +| `truncated` | `true` if the diagram was reduced to changed-only to fit GitHub's Mermaid limit. | + +## How the diff is colored + +Nodes are styled with Mermaid `classDef` / `class`; arrows are styled with positional `linkStyle`. A relation counts as **modified** when its endpoints are unchanged but its label text changed. Example of the emitted block: + +```mermaid +graph LR + Api["API Gateway"] + Auth["Auth Service"] + Cache["Cache"] + Api -- "routes to" --> Auth + Auth -- "reads/writes" --> Cache + classDef added fill:#1f883d,stroke:#0b5d23,color:#ffffff; + classDef modified fill:#bf8700,stroke:#7d4e00,color:#ffffff; + classDef deleted fill:#cf222e,stroke:#82071e,color:#ffffff,stroke-dasharray:5 3; + class Cache added; + class Auth modified; + class Api deleted; + linkStyle 0 stroke:#cf222e,stroke-width:2px,stroke-dasharray:5 3; + linkStyle 1 stroke:#1f883d,stroke-width:2px; +``` -## How It Works +## No baseline required -The action works by: +If `.codeboarding/analysis.json` isn't committed at the PR base commit, the action **generates the baseline itself** by running a full analysis on the base commit, then diffs the head against it. Committing a baseline on your default branch makes runs cheaper (the base run is skipped) and the diff more stable, but it is not required. -1. Analyzing the differences introduced in the source branch and putting the results in the target branch -2. Generating documentation files based on the latest version of the source branch -3. Outputting two types of files: - - Documentation files (Markdown or RST) in the specified output directory - - Metadata files in the `.codeboarding` directory +## Fork PRs -## License +Because nothing is pushed (the diagram is inline Mermaid), there is no image step to skip on forks. The one caveat is GitHub's own policy: **secrets are withheld from `pull_request`-triggered runs on forks**, so the LLM key is unavailable and the run fails early with a clear message. A maintainer can re-run from the Actions tab, or use `pull_request_target` if you understand its security implications. -MIT License - see [LICENSE](LICENSE) file for details. +## Limitations -# CodeBoarding GitHub Action +- **GitHub Mermaid caps.** Inline Mermaid in comments is capped (โ‰ˆ500 edges / 50 000 chars). The action stays under this by auto-falling-back to a changed-only graph; if even that overflows it posts a text summary instead of a broken diagram. +- **Nesting.** By default only the top-level component graph is drawn (matching the engine's default `graph LR`). Set `nested: true` with `depth_level >= 2` to draw sub-components as nested subgraphs โ€” leaf nodes filled, parent containers outlined, both colored by status. Large nested graphs are more likely to hit GitHub's Mermaid caps (above), in which case the action degrades to changed-only or a text summary. +- **Renames show as remove + add.** Components are matched across the two analyses by name (the stable join), so a renamed component appears as a red removal plus a green addition rather than a single yellow change. +- **No click-through.** GitHub renders Mermaid in strict security mode, so node hyperlinks are disabled. -## Important: Timeout Configuration +## Local testing -For large repositories, the analysis can take 15-45 minutes. Make sure to configure appropriate timeouts in your workflow: +A GitHub run is slow (engine install + two analyses). To iterate locally, use `scripts/run_local.sh`. It mirrors `action.yml` and writes `.cb-local/diagram.md` plus a `.cb-local/preview.html` you open in a browser (rendered with mermaid.js in GitHub's strict mode, so it looks like the comment will). -```yaml -jobs: - generate-docs: - runs-on: ubuntu-latest - timeout-minutes: 60 # Set to 60+ minutes for large repositories - steps: - - uses: actions/checkout@v4 - - uses: your-username/codeboarding-ghaction@v1 - with: - # your inputs here +**Fast โ€” no LLM, instant.** Diff two existing `analysis.json` files. Great for iterating on colors/layout. For a realistic pair, pull two revisions of a committed analysis: + +```bash +git show :.codeboarding/analysis.json > /tmp/base.json +git show :.codeboarding/analysis.json > /tmp/head.json +scripts/run_local.sh --base-json /tmp/base.json --head-json /tmp/head.json ``` -## Timeout Guidelines +**Full pipeline โ€” needs an LLM key.** Runs the engine on two refs of a local repo exactly like the action (committed-or-generated base, then incremental head): + +```bash +export OPENROUTER_API_KEY=sk-or-... +scripts/run_local.sh --repo /path/to/repo --base --head \ + --engine /path/to/CodeBoarding # defaults to ../CodeBoarding +``` -- **Small repositories** (<1k files): 10-15 minutes -- **Medium repositories** (1k-5k files): 20-30 minutes -- **Large repositories** (5k+ files): 30-60 minutes -- **Very large repositories** (10k+ files): 45-90 minutes +Flags: `--depth N`, `--direction LR|TD|โ€ฆ`, `--nested`, `--changed-only`, `--no-edge-labels`, `--out DIR`, `--no-open`. + +The diagram step alone is also directly runnable: + +```bash +python3 scripts/diff_to_mermaid.py --base base/analysis.json --head head/analysis.json --out diagram.md +``` + +## License -If your workflow consistently times out, consider: -1. Increasing `timeout-minutes` to 90 or higher -2. Running the action on a schedule during off-peak hours -3. Analyzing specific branches with smaller diffs +MIT โ€” see [LICENSE](LICENSE). diff --git a/action.yml b/action.yml index 6f17d1d..3c3c031 100644 --- a/action.yml +++ b/action.yml @@ -1,447 +1,381 @@ -name: 'CodeBoarding [Diagram-First Documentation]' -description: 'Generates diagram-first visualizations of your codebase using static analysis and large language models.' +name: 'CodeBoarding Architecture Diff (Mermaid)' +description: 'Posts a PR comment with a Mermaid architecture diagram showing which components changed (green added / yellow modified / red deleted) โ€” nodes and arrows.' author: 'CodeBoarding' branding: - icon: 'book-open' # or 'layers', 'git-branch', 'book-open', 'target' + icon: 'git-pull-request' color: 'blue' inputs: - output_directory: - description: 'Directory where documentation files will be saved' - required: false - default: 'docs' - repository_url: - description: 'Repository URL to fetch documentation for (defaults to current repository)' - required: true - source_branch: - description: 'Source branch for comparison' - required: true - target_branch: - description: 'Target branch for comparison' + llm_api_key: + description: 'LLM API key (OpenRouter by default). Required.' required: true - output_format: - description: 'Output format for documentation files (.md, .mdx, .rst, or .html)' + github_token: + description: 'GITHUB_TOKEN used to post the PR comment. Defaults to the workflow token.' + required: false + default: ${{ github.token }} + engine_ref: + description: 'Git ref of CodeBoarding/CodeBoarding to use as the analysis engine.' + required: false + default: 'main' + depth_level: + description: 'Diagram depth (1-3). Higher is slower and more detailed.' + required: false + default: '1' + agent_model: + description: 'LLM model identifier used for analysis (AGENT_MODEL env var).' + required: false + default: 'openrouter/anthropic/claude-sonnet-4' + parsing_model: + description: 'LLM model identifier used for parsing (PARSING_MODEL env var).' + required: false + default: 'openrouter/anthropic/claude-sonnet-4' + comment_header: + description: 'Header line used inside the sticky PR comment.' + required: false + default: 'Architecture review' + diagram_direction: + description: 'Mermaid layout direction: LR, TD, TB, RL, or BT.' required: false - default: '.md' + default: 'LR' + changed_only: + description: 'Render only changed components and their incident edges (also auto-applied when the full graph exceeds GitHub''s Mermaid limit).' + required: false + default: 'false' + nested: + description: 'Draw depth>1 sub-components as nested subgraphs (pair with depth_level >= 2).' + required: false + default: 'false' outputs: - markdown_files_created: - description: 'Number of Markdown files created' - value: ${{ steps.process-docs.outputs.markdown_files_created }} - json_files_created: - description: 'Number of JSON files created' - value: ${{ steps.process-docs.outputs.json_files_created }} - output_directory: - description: 'Directory where Markdown files were saved' - value: ${{ steps.process-docs.outputs.output_directory }} - json_directory: - description: 'Directory where JSON files were saved (.codeboarding)' - value: ${{ steps.process-docs.outputs.json_directory }} - has_changes: - description: 'Whether any files were created or changed' - value: ${{ steps.process-docs.outputs.has_changes }} - repo_url: - description: 'Repository URL that was analyzed' - value: ${{ steps.repo-url.outputs.repo_url }} + diagram_md: + description: 'Path to the rendered ```mermaid block (in the runner workspace).' + value: ${{ steps.diagram.outputs.diagram_md }} + n_changed: + description: 'Number of top-level components added/modified/deleted.' + value: ${{ steps.diagram.outputs.n_changed }} + truncated: + description: 'True if the diagram was reduced to changed-only to fit GitHub''s Mermaid limit.' + value: ${{ steps.diagram.outputs.truncated }} runs: using: 'composite' steps: - - name: Determine repository URL - id: repo-url + - name: Guard โ€” PR event only + id: guard shell: bash run: | - # Use the provided repository URL if it's not empty - if [ -n "${{ inputs.repository_url }}" ]; then - REPO_URL="${{ inputs.repository_url }}" - echo "Using provided repository URL: $REPO_URL" - # Otherwise try to determine from git if we're in a git repository - elif git config --get remote.origin.url > /dev/null 2>&1; then - REPO_URL=$(git config --get remote.origin.url) - # Convert SSH URL to HTTPS if needed - if [[ $REPO_URL == git@* ]]; then - REPO_URL=$(echo $REPO_URL | sed 's|git@github.com:|https://github.com/|') - fi - echo "Using git remote URL: $REPO_URL" + if [ -z "${{ github.event.pull_request.number }}" ]; then + echo "::warning::CodeBoarding Architecture Diff only runs on pull_request events. Skipping." + echo "skip=true" >> $GITHUB_OUTPUT else - REPO_URL="${{ github.server_url }}/${{ github.repository }}" - echo "Using GitHub context URL: $REPO_URL" + echo "skip=false" >> $GITHUB_OUTPUT + echo "base_sha=${{ github.event.pull_request.base.sha }}" >> $GITHUB_OUTPUT + echo "head_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT + echo "pr_number=${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT fi - echo "repo_url=$REPO_URL" >> $GITHUB_OUTPUT - - name: Create and poll documentation job - id: fetch-docs + - name: Checkout CodeBoarding engine + if: steps.guard.outputs.skip != 'true' + uses: actions/checkout@v4 + with: + repository: CodeBoarding/CodeBoarding + ref: ${{ inputs.engine_ref }} + path: codeboarding-engine + + - name: Checkout target repository (PR head) + if: steps.guard.outputs.skip != 'true' + uses: actions/checkout@v4 + with: + path: target-repo + fetch-depth: 0 + ref: ${{ steps.guard.outputs.head_sha }} + + - name: Ensure PR base commit is fetched + if: steps.guard.outputs.skip != 'true' shell: bash + working-directory: target-repo run: | - CREATE_JOB_URL="https://server.codeboarding.org/github_action/jobs" - REPO_URL="${{ steps.repo-url.outputs.repo_url }}" - SOURCE_BRANCH="${{ inputs.source_branch }}" - TARGET_BRANCH="${{ inputs.target_branch }}" - OUTPUT_DIRECTORY="${{ inputs.output_directory }}" - OUTPUT_FORMAT="${{ inputs.output_format }}" - - echo "๐Ÿš€ Creating CodeBoarding analysis job...$CREATE_JOB_URL" - echo "๐Ÿ“Š Repository: $REPO_URL" - echo "๐ŸŒฟ Source branch: $SOURCE_BRANCH" - echo "๐ŸŽฏ Target branch: $TARGET_BRANCH" - echo "๐Ÿ“„ Output format: $OUTPUT_FORMAT" - - # Create JSON payload - JSON_PAYLOAD=$(jq -n \ - --arg url "$REPO_URL" \ - --arg source_branch "$SOURCE_BRANCH" \ - --arg target_branch "$TARGET_BRANCH" \ - --arg output_directory "$OUTPUT_DIRECTORY" \ - --arg extension "$OUTPUT_FORMAT" \ - '{ - url: $url, - source_branch: $source_branch, - target_branch: $target_branch, - output_directory: $output_directory, - extension: $extension - }') - - echo "๐Ÿ“‹ Request payload:" - echo "$JSON_PAYLOAD" - - # Create temporary file for response - TEMP_FILE=$(mktemp) - - echo "๐ŸŒ Making API request to create job..." - - # Make the API call to create job - response=$(curl -s -w "%{http_code}" -o "$TEMP_FILE" \ - -X POST \ - -H "Content-Type: application/json" \ - -d "$JSON_PAYLOAD" \ - --max-time 60 \ - --connect-timeout 30 \ - "$CREATE_JOB_URL") - curl_exit_code=$? - - http_code=${response: -3} - - echo "โœ… Job creation request completed!" - echo "๐Ÿ“‹ Response status code: $http_code" - echo "๐Ÿ”ง Curl exit code: $curl_exit_code" - - # Handle curl errors - if [ $curl_exit_code -ne 0 ]; then - echo "โŒ Error: Curl failed with exit code $curl_exit_code" - case $curl_exit_code in - 6) echo "๐ŸŒ Couldn't resolve host - check network connectivity" ;; - 7) echo "๐Ÿ”Œ Failed to connect to host - server might be down" ;; - 28) echo "โฐ Request timed out - server might be busy" ;; - *) echo "โ“ Unknown curl error - check network and server status" ;; - esac - rm -f "$TEMP_FILE" + git fetch origin "${{ steps.guard.outputs.base_sha }}" --depth=1 || true + git cat-file -e "${{ steps.guard.outputs.base_sha }}" && echo "Base commit reachable." || \ + (echo "::error::Base commit ${{ steps.guard.outputs.base_sha }} is not reachable." && exit 1) + + - name: Set up Python 3.13 + if: steps.guard.outputs.skip != 'true' + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - name: Set up Node.js 20 + if: steps.guard.outputs.skip != 'true' + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install uv + if: steps.guard.outputs.skip != 'true' + uses: astral-sh/setup-uv@v4 + + - name: Cache uv venv (engine) + if: steps.guard.outputs.skip != 'true' + uses: actions/cache@v4 + with: + path: codeboarding-engine/.venv + key: cb-uv-${{ runner.os }}-${{ hashFiles('codeboarding-engine/pyproject.toml', 'codeboarding-engine/uv.lock') }} + restore-keys: | + cb-uv-${{ runner.os }}- + + - name: Cache LSP servers + if: steps.guard.outputs.skip != 'true' + uses: actions/cache@v4 + with: + path: | + codeboarding-engine/static_analyzer/servers/node_modules + codeboarding-engine/static_analyzer/servers/bin + key: cb-lsp-${{ runner.os }}-v1 + restore-keys: | + cb-lsp-${{ runner.os }}- + + - name: Install Python dependencies + if: steps.guard.outputs.skip != 'true' + shell: bash + working-directory: codeboarding-engine + run: | + uv venv --clear + uv pip install -e . + + - name: Install LSP servers + if: steps.guard.outputs.skip != 'true' + shell: bash + working-directory: codeboarding-engine + run: | + uv run python install.py --auto-install-npm + + - name: Prepare & verify LLM key + if: steps.guard.outputs.skip != 'true' + shell: bash + env: + RAW_KEY: ${{ inputs.llm_api_key }} + RAW_AGENT_MODEL: ${{ inputs.agent_model }} + RAW_PARSING_MODEL: ${{ inputs.parsing_model }} + run: | + if [ -z "$RAW_KEY" ]; then + echo "::error::llm_api_key is empty. On fork PRs, repo secrets are withheld by GitHub โ€” a maintainer must re-run, or use pull_request_target." exit 1 fi - - if [ "$http_code" != "202" ]; then - echo "โŒ Error: Job creation failed with status code $http_code" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - - # Try to parse as JSON for better error message - if jq -e '.detail' "$TEMP_FILE" > /dev/null 2>&1; then - echo "๐Ÿ” Error details: $(jq -r '.detail' "$TEMP_FILE")" - fi - - rm -f "$TEMP_FILE" + # Pasting a key into the secret UI often picks up trailing newlines, + # wrapping quotes, or a whole `KEY=value` line. Normalize all of that. + _strip() { printf '%s' "$1" | tr -d '[:space:]' | sed -e 's/^"//;s/"$//' -e "s/^'//;s/'\$//"; } + KEY="$(_strip "$RAW_KEY")" + case "$KEY" in + OPENROUTER_API_KEY=*) KEY="${KEY#OPENROUTER_API_KEY=}";; + openrouter_api_key=*) KEY="${KEY#openrouter_api_key=}";; + esac + KEY="$(_strip "$KEY")" + AGENT_MODEL="$(_strip "$RAW_AGENT_MODEL")" + PARSING_MODEL="$(_strip "$RAW_PARSING_MODEL")" + + # Mask the cleaned value (it may differ from the registered secret). + echo "::add-mask::$KEY" + + case "$KEY" in sk-or-v1-*) PFX=1 ;; *) PFX=0 ;; esac + echo "OPENROUTER_API_KEY length: ${#KEY}; looks-like-OpenRouter: $PFX" + STATUS=$(curl -sS -o /tmp/openrouter-auth.json -w "%{http_code}" \ + -H "Authorization: Bearer $KEY" --max-time 10 \ + https://openrouter.ai/api/v1/auth/key || echo "curl-fail") + echo "OpenRouter /auth/key response: HTTP $STATUS" + if [ "$STATUS" != "200" ]; then + echo "::error::OpenRouter rejected the API key (HTTP $STATUS). Verify the OPENROUTER_API_KEY secret." + cat /tmp/openrouter-auth.json 2>/dev/null || true exit 1 fi - - # Check if response is valid JSON - if ! jq empty "$TEMP_FILE" 2>/dev/null; then - echo "โŒ Error: Invalid JSON response" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - rm -f "$TEMP_FILE" - exit 1 + + # Export (masked) for the analysis steps. + { + echo "OPENROUTER_API_KEY=$KEY" + echo "AGENT_MODEL=$AGENT_MODEL" + echo "PARSING_MODEL=$PARSING_MODEL" + } >> "$GITHUB_ENV" + + - name: Resolve base analysis (committed baseline) + if: steps.guard.outputs.skip != 'true' + id: base + shell: bash + working-directory: target-repo + run: | + BASE_SHA="${{ steps.guard.outputs.base_sha }}" + BASE_DIR="${RUNNER_TEMP}/cb-base" + HEAD_DIR="${RUNNER_TEMP}/cb-head" + mkdir -p "$BASE_DIR" "$HEAD_DIR" + echo "base_dir=$BASE_DIR" >> $GITHUB_OUTPUT + echo "head_dir=$HEAD_DIR" >> $GITHUB_OUTPUT + if git show "${BASE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then + git show "${BASE_SHA}:.codeboarding/static_analysis.pkl" > "${BASE_DIR}/static_analysis.pkl" 2>/dev/null \ + && echo "Seeded committed static_analysis.pkl from base." \ + || rm -f "${BASE_DIR}/static_analysis.pkl" + echo "committed=true" >> $GITHUB_OUTPUT + echo "Using committed .codeboarding/analysis.json at ${BASE_SHA}." + else + rm -f "${BASE_DIR}/analysis.json" + echo "committed=false" >> $GITHUB_OUTPUT + echo "No committed baseline at ${BASE_SHA}; will generate one via a full analysis on the base commit." fi - - # Extract job_id from response - JOB_ID=$(jq -r '.job_id' "$TEMP_FILE") - - if [ "$JOB_ID" = "null" ] || [ -z "$JOB_ID" ]; then - echo "โŒ Error: No job_id found in response" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - rm -f "$TEMP_FILE" + + - name: Generate base analysis (no committed baseline) + if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' + shell: bash + working-directory: codeboarding-engine + env: + STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml + PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine + DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }} + CACHING_DOCUMENTATION: 'false' + ENABLE_MONITORING: 'false' + run: | + BASE_SRC="${RUNNER_TEMP}/base-src" + rm -rf "$BASE_SRC" + git -C "${{ github.workspace }}/target-repo" worktree add --detach "$BASE_SRC" "${{ steps.guard.outputs.base_sha }}" + uv run python -c " + from pathlib import Path + from codeboarding_workflows.analysis import run_full + res = run_full( + repo_name='${{ github.event.repository.name }}', + repo_path=Path('$BASE_SRC'), + output_dir=Path('${{ steps.base.outputs.base_dir }}'), + run_id='${{ github.run_id }}-${{ github.run_attempt }}-base', + log_path='/tmp/cb-base.log', + depth_level=int('${{ inputs.depth_level }}'), + source_sha='${{ steps.guard.outputs.base_sha }}', + ) + print(f'Base analysis written: {res}') + " + if [ ! -f "${{ steps.base.outputs.base_dir }}/analysis.json" ]; then + echo "::error::Base full analysis ran but analysis.json is missing." exit 1 fi - - echo "โœ… Job created successfully!" - echo "๐Ÿ†” Job ID: $JOB_ID" - - # Start polling job status - STATUS_URL="https://server.codeboarding.org/github_action/jobs/$JOB_ID" - - echo "๐Ÿ“Š Starting job status polling..." - echo "โฐ This may take 15-45 minutes for large repositories..." - echo "๐Ÿ’ก If your workflow times out, increase 'timeout-minutes' in your job configuration" - - # Polling loop - POLL_COUNT=0 - MAX_POLLS=90 # 90 minutes max (90 * 1 minute intervals) - - while [ $POLL_COUNT -lt $MAX_POLLS ]; do - POLL_COUNT=$((POLL_COUNT + 1)) - - echo "๐Ÿ” Polling attempt $POLL_COUNT of $MAX_POLLS ($(date '+%H:%M:%S'))" - - # Make status check API call - response=$(curl -s -w "%{http_code}" -o "$TEMP_FILE" \ - --max-time 30 \ - --connect-timeout 10 \ - "$STATUS_URL") - - curl_exit_code=$? - http_code=${response: -3} - - # Handle curl errors - if [ $curl_exit_code -ne 0 ]; then - echo "โš ๏ธ Warning: Status check failed with curl exit code $curl_exit_code" - echo "๐Ÿ”„ Retrying in 30 seconds..." - sleep 30 - continue - fi - - if [ "$http_code" != "200" ]; then - echo "โš ๏ธ Warning: Status check failed with HTTP code $http_code" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - echo "๐Ÿ”„ Retrying in 30 seconds..." - sleep 30 - continue - fi - - # Check if response is valid JSON - if ! jq empty "$TEMP_FILE" 2>/dev/null; then - echo "โš ๏ธ Warning: Invalid JSON response" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - echo "๐Ÿ”„ Retrying in 30 seconds..." - sleep 30 - continue - fi - - # Extract status from response - STATUS=$(jq -r '.status' "$TEMP_FILE") - - echo "๐Ÿ“Š Current job status: $STATUS" - - if [ "$STATUS" = "COMPLETED" ]; then - echo "โœ… Job completed successfully!" - - # Check if result field exists and contains files - if jq -e '.result' "$TEMP_FILE" > /dev/null; then - echo "๐Ÿ“ฆ Result field found, preparing output..." - - # Check if result is a JSON string or already a JSON object - RESULT_TYPE=$(jq -r '.result | type' "$TEMP_FILE") - - if [ "$RESULT_TYPE" = "string" ]; then - echo "๐Ÿ”ง Result is a JSON string, parsing it..." - # Parse the JSON string in the result field - jq -r '.result' "$TEMP_FILE" | jq '.' > "${TEMP_FILE}_result" - else - echo "๐Ÿ”ง Result is already a JSON object, extracting it..." - # Extract the result object directly - jq '.result' "$TEMP_FILE" > "${TEMP_FILE}_result" - fi - - # Verify the extracted result - if jq -e '.files' "${TEMP_FILE}_result" > /dev/null; then - echo "โœ… Files extracted successfully" - mv "${TEMP_FILE}_result" "$TEMP_FILE" - echo "response_file=$TEMP_FILE" >> $GITHUB_OUTPUT - exit 0 # Successfully extracted files, exit with success - else - echo "โŒ Error: Extracted result is missing files structure" - echo "๐Ÿ“„ Extracted content:" - cat "${TEMP_FILE}_result" - rm -f "${TEMP_FILE}_result" "$TEMP_FILE" - exit 1 - fi - else - echo "โŒ Error: Job completed but no result or result.files found in response" - echo "๐Ÿ“„ Response structure:" - jq '.' "$TEMP_FILE" - - # If result exists, show what it contains - if jq -e '.result' "$TEMP_FILE" > /dev/null; then - echo "๐Ÿ“„ Result field content:" - RESULT_TYPE=$(jq -r '.result | type' "$TEMP_FILE") - echo "Result type: $RESULT_TYPE" - - if [ "$RESULT_TYPE" = "string" ]; then - echo "Result string content:" - jq -r '.result' "$TEMP_FILE" - else - echo "Result object content:" - jq '.result' "$TEMP_FILE" - fi - fi - - rm -f "$TEMP_FILE" - exit 1 - fi - elif [ "$STATUS" = "FAILED" ] || [ "$STATUS" = "ERROR" ]; then - echo "โŒ Job failed with status: $STATUS" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - rm -f "$TEMP_FILE" - exit 1 - else - # Job still in progress - echo "โณ Job in progress (status: $STATUS)..." - - # Show additional progress information if available - if jq -e '.updated_at' "$TEMP_FILE" > /dev/null; then - UPDATED_AT=$(jq -r '.updated_at' "$TEMP_FILE") - echo "๐Ÿ• Last updated: $UPDATED_AT" - fi - - echo "๐Ÿ’ค Waiting 15 seconds before next check..." - sleep 15 - fi - done - - # Only reach here if we've exceeded max polls without completion - echo "โŒ Error: Job polling timed out after $MAX_POLLS attempts" - echo "๐Ÿ—๏ธ The repository analysis is taking longer than expected." - echo "๐Ÿ“Š This might be due to:" - echo " โ€ข Very large repository size (>10k files)" - echo " โ€ข Complex codebase requiring extensive analysis" - echo " โ€ข Server load or processing delays" - echo "" - echo "๐Ÿ’ก Suggestions:" - echo " โ€ข Try again later when server load might be lower" - echo " โ€ข Consider analyzing smaller branches or specific directories" - echo " โ€ข Increase your GitHub Actions job timeout-minutes to 120+" - echo " โ€ข Contact support if the issue persists" - - rm -f "$TEMP_FILE" - exit 1 - - name: Process documentation files - id: process-docs + - name: Analyze PR head (incremental from base) + if: steps.guard.outputs.skip != 'true' + id: analyze shell: bash + working-directory: codeboarding-engine + env: + STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml + PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine + DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }} + CACHING_DOCUMENTATION: 'false' + ENABLE_MONITORING: 'false' run: | - RESPONSE_FILE="${{ steps.fetch-docs.outputs.response_file }}" - MD_OUTPUT_DIR="${{ inputs.output_directory }}" - JSON_OUTPUT_DIR=".codeboarding" - OUTPUT_FORMAT="${{ inputs.output_format }}" - - # Validate output format - if [[ "$OUTPUT_FORMAT" != ".md" && "$OUTPUT_FORMAT" != ".mdx" && "$OUTPUT_FORMAT" != ".rst" && "$OUTPUT_FORMAT" != ".html" ]]; then - echo "Error: Invalid output format '$OUTPUT_FORMAT'. Must be either '.md', '.mdx', '.rst', or '.html'" + BASE_DIR="${{ steps.base.outputs.base_dir }}" + HEAD_DIR="${{ steps.base.outputs.head_dir }}" + # Seed the head dir from the base analysis so incremental stitches + # component ids from the baseline (stable diff). Base dir is left + # untouched as the "before" snapshot for the diff. + cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true + uv run python -c " + from pathlib import Path + from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError + from diagram_analysis.exceptions import IncrementalCacheMissingError + base_sha='${{ steps.guard.outputs.base_sha }}' + head_sha='${{ steps.guard.outputs.head_sha }}' + repo=Path('${{ github.workspace }}/target-repo') + out=Path('$HEAD_DIR') + name='${{ github.event.repository.name }}' + rid='${{ github.run_id }}-${{ github.run_attempt }}-head' + try: + res = run_incremental( + repo_path=repo, output_dir=out, project_name=name, run_id=rid, + log_path='/tmp/cb-head.log', base_ref=base_sha, target_ref=head_sha, + source_sha=head_sha, + ) + except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: + print(f'Incremental unavailable ({exc}); running full analysis on head.') + for p in out.glob('*'): + if p.is_file(): + p.unlink() + res = run_full( + repo_name=name, repo_path=repo, output_dir=out, run_id=rid, + log_path='/tmp/cb-head.log', depth_level=int('${{ inputs.depth_level }}'), + source_sha=head_sha, + ) + print(f'Head analysis written: {res}') + " + if [ ! -f "$HEAD_DIR/analysis.json" ]; then + echo "::error::Head analysis ran but analysis.json is missing." exit 1 fi - - # Clean and create the output directories - mkdir -p "$MD_OUTPUT_DIR" - - # Remove existing .codeboarding files before adding new ones - if [ -d "$JSON_OUTPUT_DIR" ]; then - echo "Cleaning existing JSON files from $JSON_OUTPUT_DIR" - rm -rf "$JSON_OUTPUT_DIR" - fi - mkdir -p "$JSON_OUTPUT_DIR" - - # Initialize counters - MARKDOWN_FILES_CREATED=0 - JSON_FILES_CREATED=0 - - echo "=== Processing Documentation Files ===" - echo "Response JSON structure:" - jq . "$RESPONSE_FILE" - echo "Using output format: $OUTPUT_FORMAT" - # Parse JSON response and create files using keys as filenames - if jq -e '.files' "$RESPONSE_FILE" > /dev/null; then - echo "Files key found, proceeding to create files..." - - # Check if files object is empty - FILES_COUNT=$(jq '.files | length' "$RESPONSE_FILE") - if [ "$FILES_COUNT" -eq 0 ]; then - echo "โ„น๏ธ No documentation files were generated for this repository/branch combination." - echo "๐Ÿ“ This might be because:" - echo " โ€ข No changes were detected between the source and target branches" - echo " โ€ข The repository or branches don't exist or are not accessible" - echo " โ€ข No analyzable code files were found" - echo " โ€ข The branches are identical (no diff to analyze)" + echo "base_analysis=$BASE_DIR/analysis.json" >> $GITHUB_OUTPUT + echo "head_analysis=$HEAD_DIR/analysis.json" >> $GITHUB_OUTPUT + + - name: Diff analyses โ†’ Mermaid + if: steps.guard.outputs.skip != 'true' + id: diagram + shell: bash + run: | + FLAG="" + [ "${{ inputs.changed_only }}" = "true" ] && FLAG="$FLAG --changed-only" + [ "${{ inputs.nested }}" = "true" ] && FLAG="$FLAG --nested" + META=$(python3 ${{ github.action_path }}/scripts/diff_to_mermaid.py \ + --base "${{ steps.analyze.outputs.base_analysis }}" \ + --head "${{ steps.analyze.outputs.head_analysis }}" \ + --out "${RUNNER_TEMP}/diagram.md" \ + --direction "${{ inputs.diagram_direction }}" $FLAG) + echo "$META" > "${RUNNER_TEMP}/diagram_meta.json" + echo "diff meta: $META" + read N RENDERED TRUNC < <(python3 -c "import json;d=json.load(open('${RUNNER_TEMP}/diagram_meta.json'));print(d['n_changed'], str(d['rendered']).lower(), str(d['truncated']).lower())") + echo "n_changed=$N" >> $GITHUB_OUTPUT + echo "rendered=$RENDERED" >> $GITHUB_OUTPUT + echo "truncated=$TRUNC" >> $GITHUB_OUTPUT + echo "diagram_md=${RUNNER_TEMP}/diagram.md" >> $GITHUB_OUTPUT + + - name: Build PR comment body + if: steps.guard.outputs.skip != 'true' + id: body + shell: bash + run: | + HEADER="${{ inputs.comment_header }}" + BASE_REF="${{ github.event.pull_request.base.ref }}" + N="${{ steps.diagram.outputs.n_changed }}" + RENDERED="${{ steps.diagram.outputs.rendered }}" + TRUNC="${{ steps.diagram.outputs.truncated }}" + BODY_FILE=$(mktemp) + + headline() { + if [ "$1" = "0" ]; then echo "no architectural changes"; + elif [ "$1" = "1" ]; then echo "1 component changed"; + else echo "$1 components changed"; fi + } + + { + echo "### ${HEADER} ยท $(headline "$N")" + echo "" + if [ "$N" = "0" ]; then + echo "No architectural changes detected versus \`${BASE_REF}\`." + elif [ "$RENDERED" = "true" ]; then + cat "${{ steps.diagram.outputs.diagram_md }}" + echo "" + echo "" + echo "๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ deleted โ€” compared against \`${BASE_REF}\`." + if [ "$TRUNC" = "true" ]; then + echo "" + echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." + fi else - # Get each key from files object and create a file with that name - while IFS= read -r filename; do - echo "Processing file: $filename" - - # Get the content for this filename - content=$(jq -r ".files[\"$filename\"]" "$RESPONSE_FILE") - - # Determine file type and destination - if [[ "$filename" == *.json ]]; then - # JSON file - output_dir="$JSON_OUTPUT_DIR" - output_filename="$filename" - echo "$content" > "$output_dir/$output_filename" - echo "Created JSON file: $output_dir/$output_filename" - JSON_FILES_CREATED=$((JSON_FILES_CREATED + 1)) - else - # Documentation file - add appropriate extension if not present - output_dir="$MD_OUTPUT_DIR" - - # Check if filename has an extension - if [[ "$filename" == *.* ]]; then - # Extract basename without extension - basename="${filename%.*}" - else - basename="$filename" - fi - - # Add the selected output format extension - output_filename="${basename}${OUTPUT_FORMAT}" - - echo "$content" > "$output_dir/$output_filename" - echo "Created documentation file: $output_dir/$output_filename" - MARKDOWN_FILES_CREATED=$((MARKDOWN_FILES_CREATED + 1)) - fi - done < <(jq -r '.files | keys[]' "$RESPONSE_FILE") + echo "**$(headline "$N")** versus \`${BASE_REF}\`, but the diagram is too large to render inline (GitHub caps inline Mermaid at 500 edges)." fi - else - echo "No 'files' key found in response JSON - checking if job completed with no results" - fi - - # Clean up temporary file - rm -f "$RESPONSE_FILE" - - # Check if any files were created - TOTAL_FILES=$((MARKDOWN_FILES_CREATED + JSON_FILES_CREATED)) - if [ "$TOTAL_FILES" -gt 0 ]; then - HAS_CHANGES="true" - echo "Created $MARKDOWN_FILES_CREATED Markdown files in $MD_OUTPUT_DIR" - echo "Created $JSON_FILES_CREATED JSON files in $JSON_OUTPUT_DIR" - - # List created files - if [ "$MARKDOWN_FILES_CREATED" -gt 0 ]; then - echo "Markdown files created:" - ls -la "$MD_OUTPUT_DIR" - fi - - if [ "$JSON_FILES_CREATED" -gt 0 ]; then - echo "JSON files created:" - ls -la "$JSON_OUTPUT_DIR" - fi - else - HAS_CHANGES="false" - echo "No files were created" - fi - - # Set outputs - echo "markdown_files_created=$MARKDOWN_FILES_CREATED" >> $GITHUB_OUTPUT - echo "json_files_created=$JSON_FILES_CREATED" >> $GITHUB_OUTPUT - echo "output_directory=$MD_OUTPUT_DIR" >> $GITHUB_OUTPUT - echo "json_directory=$JSON_OUTPUT_DIR" >> $GITHUB_OUTPUT - echo "has_changes=$HAS_CHANGES" >> $GITHUB_OUTPUT \ No newline at end of file + echo "" + echo "codeboarding-action ยท run ${{ github.run_id }}" + } > "$BODY_FILE" + + echo "body_file=$BODY_FILE" >> $GITHUB_OUTPUT + echo "--- comment preview ---" + cat "$BODY_FILE" + echo "--- end preview ---" + + - name: Post sticky PR comment + if: steps.guard.outputs.skip != 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: codeboarding-architecture-diff + path: ${{ steps.body.outputs.body_file }} + GITHUB_TOKEN: ${{ inputs.github_token }} diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py new file mode 100644 index 0000000..6d9f6fd --- /dev/null +++ b/scripts/diff_to_mermaid.py @@ -0,0 +1,449 @@ +"""Diff two CodeBoarding analysis.json files and render the delta as a colored Mermaid graph. + +Reads a *base* (before) and *head* (after) ``analysis.json`` โ€” both already +materialized on disk by the engine โ€” computes a component/relation diff, and +emits a GitHub-renderable ```mermaid block where: + + * nodes are colored green=added / yellow=modified / red=deleted (deleted dashed) + * arrows are colored the same way (red dashed for deleted) + +GitHub renders ```mermaid fenced blocks natively inside PR/issue comments, so the +output goes straight into the sticky comment โ€” no image, no Playwright. + +The diff set-arithmetic is a port of the action's ``compute_diff.py``, with two +differences for this use case: both sides are read from plain file paths (not +``git show``), and a relation whose ``(src, dst)`` is unchanged but whose label +text changed is reported as ``modified`` (the original only did added/deleted). + +Self-contained stdlib. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + +# GitHub's mermaid config caps (config.schema.yaml defaults; NOT raisable on +# GitHub). Exceeding either renders a red error box with no diagram, so we stay +# comfortably under and degrade to a changed-only / text fallback instead. +MAX_EDGES = 480 # hard cap 500 +MAX_TEXT = 45_000 # hard cap 50000 chars + +# Primer-ish fills that read on both light and dark GitHub backgrounds. White +# label text is set explicitly so it survives dark mode. +COLORS = { + "added": {"fill": "#1f883d", "stroke": "#0b5d23"}, + "modified": {"fill": "#bf8700", "stroke": "#7d4e00"}, + "deleted": {"fill": "#cf222e", "stroke": "#82071e"}, +} +CHANGED = ("added", "modified", "deleted") +_EDGE_LABEL_MAX = 48 + + +# --------------------------------------------------------------------------- # +# load +# --------------------------------------------------------------------------- # +def load_analysis(path: Path) -> dict: + try: + return json.loads(path.read_text()) + except (OSError, json.JSONDecodeError) as exc: + sys.exit(f"::error::Could not read analysis JSON at {path}: {exc}") + + +# --------------------------------------------------------------------------- # +# diff (ported from compute_diff.py; relation diff extended with 'modified') +# --------------------------------------------------------------------------- # +def _comp_id(c: dict) -> str: + return c.get("component_id") or c.get("name", "") + + +def _comp_name(c: dict) -> str: + return c.get("name", "") + + +def _file_methods(c: dict) -> list: + return c.get("file_methods") or [] + + +def _methods_by_file(c: dict) -> dict: + by_file: dict = {} + for fm in _file_methods(c): + fp = fm.get("file_path") or "" + names = {m for m in (fm.get("methods") or []) if isinstance(m, str)} + if names: + by_file.setdefault(fp, set()).update(names) + return by_file + + +def _has_structural_changes(base: dict, current: dict) -> bool: + base_files = {fm.get("file_path", "") for fm in _file_methods(base)} + current_files = {fm.get("file_path", "") for fm in _file_methods(current)} + if base_files != current_files: + return True + if len(base.get("components") or []) != len(current.get("components") or []): + return True + return False + + +def _diff_methods(base: dict, current: dict) -> dict: + base_by_file = _methods_by_file(base) + current_by_file = _methods_by_file(current) + added: dict = {} + removed: dict = {} + for file_path in set(base_by_file) | set(current_by_file): + a = sorted(current_by_file.get(file_path, set()) - base_by_file.get(file_path, set())) + r = sorted(base_by_file.get(file_path, set()) - current_by_file.get(file_path, set())) + if a: + added[file_path] = a + if r: + removed[file_path] = r + return {"added": added, "removed": removed} + + +def _rel_key(r: dict) -> tuple: + # Name is the stable join across two independent analyses; component ids are + # positional and can be reshuffled on a full re-run, so prefer names. + return (r.get("src_name") or r.get("src_id") or "", r.get("dst_name") or r.get("dst_id") or "") + + +def _diff_relations(base_rels: list, current_rels: list) -> list: + base_edges = {_rel_key(r): r for r in (base_rels or [])} + current_edges = {_rel_key(r): r for r in (current_rels or [])} + result: list = [] + for key, rel in current_edges.items(): + if key not in base_edges: + status = "added" + elif (base_edges[key].get("relation") or "") != (rel.get("relation") or ""): + status = "modified" + else: + status = "unchanged" + result.append({**rel, "diff_status": status}) + for key, rel in base_edges.items(): + if key not in current_edges: + result.append({**rel, "diff_status": "deleted"}) + return result + + +def _diff_components(base_components: list, current_components: list) -> list: + base = base_components or [] + current = current_components or [] + base_by_name = {_comp_name(c): c for c in base} # name is the stable cross-analysis join + matched_names: set = set() + result: list = [] + + for comp in current: + base_match = base_by_name.get(_comp_name(comp)) + if base_match is None: + result.append({**comp, "diff_status": "added"}) + continue + matched_names.add(_comp_name(base_match)) + structural = _has_structural_changes(base_match, comp) + method_diff = _diff_methods(base_match, comp) + has_method_changes = bool(method_diff["added"] or method_diff["removed"]) + diff_status = "modified" if (structural or has_method_changes) else "unchanged" + + annotated = {**comp, "diff_status": diff_status, "method_diff": method_diff} + + base_subs = base_match.get("components") or [] + current_subs = comp.get("components") or [] + if base_subs or current_subs: + annotated["components"] = _diff_components(base_subs, current_subs) + + base_sub_rels = base_match.get("components_relations") or [] + current_sub_rels = comp.get("components_relations") or [] + if base_sub_rels or current_sub_rels: + annotated["components_relations"] = _diff_relations(base_sub_rels, current_sub_rels) + + result.append(annotated) + + for comp in base: + if _comp_name(comp) not in matched_names: + ghost = {k: v for k, v in comp.items() if k not in ("components", "components_relations", "can_expand")} + ghost["diff_status"] = "deleted" + result.append(ghost) + + return result + + +def build_diff(base: dict, head: dict) -> dict: + return { + "components": _diff_components(base.get("components") or [], head.get("components") or []), + "components_relations": _diff_relations( + base.get("components_relations") or [], + head.get("components_relations") or [], + ), + } + + +# --------------------------------------------------------------------------- # +# mermaid emit +# --------------------------------------------------------------------------- # +def _sanitize(name: str) -> str: + """Match the engine's node-id sanitization (utils.sanitize).""" + return re.sub(r"\W+", "_", name or "") + + +def _esc(text: str) -> str: + """Escape arbitrary text for a mermaid label under GitHub's strict security. + + ``#`` first (so the entities we inject are not re-escaped), then ``"``. + """ + out = (text or "").replace("\n", " ").replace("\r", " ").strip() + out = out.replace("#", "#35;").replace('"', "#quot;") + return out + + +def _truncate(text: str, limit: int = _EDGE_LABEL_MAX) -> str: + text = (text or "").strip() + return text if len(text) <= limit else text[: limit - 1].rstrip() + "โ€ฆ" + + +class _Scope: + """Per-level name/id -> mermaid key resolver for one nesting level. + + Deleted ghosts get a separate ``del_`` key namespace from present nodes so a + reused id/name can't merge an added node onto a deleted one. Keys are made + globally unique via the shared ``used`` set. Resolution is name-first (the + stable cross-analysis join); present edges resolve head-first, deleted edges + ghost-first. ``force`` overrides the per-component diff_status (used when a + wholly-added/deleted parent colors its whole subtree). + """ + + def __init__(self, components: list, used: set, force: str | None = None): + self.entries: list = [] # (key, label, status, component) + self.head_by_id: dict = {} + self.head_by_name: dict = {} + self.del_by_id: dict = {} + self.del_by_name: dict = {} + for comp in components: + status = force or comp.get("diff_status", "unchanged") + present = status != "deleted" + cid, cname = _comp_id(comp), _comp_name(comp) + base = ("n_" if present else "del_") + _sanitize(cname or cid or "node") + key, n = base, 1 + while key in used: + n += 1 + key = f"{base}_{n}" + used.add(key) + self.entries.append((key, cname or cid or "(unnamed)", status, comp)) + by_id = self.head_by_id if present else self.del_by_id + by_name = self.head_by_name if present else self.del_by_name + if cname: + by_name[cname] = key + if cid: + by_id[cid] = key + + def resolve(self, rid: str, rname: str, present: bool) -> str | None: + maps = [(self.head_by_id, self.head_by_name), (self.del_by_id, self.del_by_name)] + if not present: + maps.reverse() + for by_id, by_name in maps: + if rname and rname in by_name: # name-first: stable cross-analysis join + return by_name[rname] + if rid and rid in by_id: + return by_id[rid] + return None + + +def _filter_changed(components: list, relations: list) -> tuple: + """Keep changed components, the endpoints of changed edges, and edges among the kept โ€” the size fallback.""" + changed_rels = [r for r in relations if r.get("diff_status") in CHANGED] + keep_ids: set = set() + keep_names: set = set() + for c in components: + if c.get("diff_status") in CHANGED: + keep_ids.add(_comp_id(c)) + keep_names.add(_comp_name(c)) + for r in changed_rels: # so a changed edge between two unchanged nodes still draws its endpoints + keep_ids.update((r.get("src_id", ""), r.get("dst_id", ""))) + keep_names.update((r.get("src_name", ""), r.get("dst_name", ""))) + + kept = [c for c in components if _comp_id(c) in keep_ids or _comp_name(c) in keep_names] + kept_ids = {_comp_id(c) for c in kept} + kept_names = {_comp_name(c) for c in kept} + + def touches(r: dict, side_id: str, side_name: str) -> bool: + return r.get(side_id, "") in kept_ids or r.get(side_name, "") in kept_names + + rels = [ + r + for r in relations + if r.get("diff_status") in CHANGED + or (touches(r, "src_id", "src_name") and touches(r, "dst_id", "dst_name")) + ] + return kept, rels + + +def _init_directive(font_size, node_padding, node_spacing, rank_spacing) -> str | None: + """Build a Mermaid ``%%{init}%%`` directive to enlarge nodes / spacing. + + Nodes auto-size to their label, so the effective levers are font size and + interior padding (bigger nodes) plus node/rank spacing (less cramped). These + config keys are honored by GitHub's strict renderer. + """ + flowchart: dict = {} + if node_padding is not None: + flowchart["padding"] = node_padding + if node_spacing is not None: + flowchart["nodeSpacing"] = node_spacing + if rank_spacing is not None: + flowchart["rankSpacing"] = rank_spacing + cfg: dict = {} + if flowchart: + cfg["flowchart"] = flowchart + if font_size is not None: + cfg["themeVariables"] = {"fontSize": f"{font_size}px"} + return "%%{init: " + json.dumps(cfg) + "}%%" if cfg else None + + +def render_mermaid( + diff: dict, + direction: str = "LR", + changed_only: bool = False, + edge_labels: bool = True, + nested: bool = False, + font_size: int | None = None, + node_padding: int | None = None, + node_spacing: int | None = None, + rank_spacing: int | None = None, +) -> tuple: + """Return (mermaid_text, meta). ``mermaid_text`` is None when there's nothing to draw. + + With ``nested`` the depth>1 sub-components are drawn as Mermaid subgraphs โ€” + leaf nodes get a filled class, parent containers a stroke-only ``*Box`` + class. A wholly-added parent forces ``added`` onto its subtree (the engine + only diff-annotates surviving branches; an added subtree arrives raw). + """ + components = diff.get("components") or [] + relations = diff.get("components_relations") or [] + n_changed = sum(1 for c in components if c.get("diff_status") in CHANGED) + + if changed_only or len(relations) > MAX_EDGES: + components, relations = _filter_changed(components, relations) + + used: set = set() + body: list = [] + node_classes: dict = {"added": [], "modified": [], "deleted": []} + box_classes: dict = {"added": [], "modified": [], "deleted": []} + edge_styles: dict = {"added": [], "modified": [], "deleted": []} + counters = {"edges": 0, "nodes": 0} + + def emit_edges(rels: list, scope: _Scope, pad: str, force: str | None) -> None: + for rel in rels: + status = force or rel.get("diff_status", "unchanged") + present = status != "deleted" + src = scope.resolve(rel.get("src_id", ""), rel.get("src_name", ""), present) + dst = scope.resolve(rel.get("dst_id", ""), rel.get("dst_name", ""), present) + if src is None or dst is None: + continue # endpoint not drawn โ€” skip, don't consume an edge index + label = _esc(_truncate(rel.get("relation", ""))) if edge_labels else "" + body.append(f'{pad}{src} -- "{label}" --> {dst}' if label else f"{pad}{src} --> {dst}") + if status in edge_styles: + edge_styles[status].append(counters["edges"]) + counters["edges"] += 1 + + def emit_level(comps: list, rels: list, indent: int, force: str | None) -> None: + pad = " " * indent + scope = _Scope(comps, used, force) + for key, label, status, comp in scope.entries: + children = comp.get("components") if nested else None + if children: + body.append(f'{pad}subgraph {key}["{_esc(label)}"]') + if status in box_classes: + box_classes[status].append(key) + child_force = force or (status if status == "added" else None) + emit_level(children, comp.get("components_relations") or [], indent + 1, child_force) + body.append(f"{pad}end") + else: + body.append(f'{pad}{key}["{_esc(label)}"]') + if status in node_classes: + node_classes[status].append(key) + counters["nodes"] += 1 + emit_edges(rels, scope, pad, force) + + emit_level(components, relations, 1, None) + if counters["nodes"] == 0: + return None, {"n_changed": n_changed, "n_nodes": 0, "n_edges": 0, "truncated": False} + + style: list = [ + f' classDef added fill:{COLORS["added"]["fill"]},stroke:{COLORS["added"]["stroke"]},color:#ffffff;', + f' classDef modified fill:{COLORS["modified"]["fill"]},stroke:{COLORS["modified"]["stroke"]},color:#ffffff;', + f' classDef deleted fill:{COLORS["deleted"]["fill"]},stroke:{COLORS["deleted"]["stroke"]},' + f"color:#ffffff,stroke-dasharray:5 3;", + ] + if any(box_classes.values()): # stroke-only containers so big parents aren't solid blocks + for st in CHANGED: + dash = ",stroke-dasharray:5 3" if st == "deleted" else "" + style.append(f' classDef {st}Box stroke:{COLORS[st]["stroke"]},stroke-width:2px,fill:none{dash};') + for status in CHANGED: + if node_classes[status]: + style.append(f' class {",".join(node_classes[status])} {status};') + if box_classes[status]: + style.append(f' class {",".join(box_classes[status])} {status}Box;') + for status in CHANGED: + idxs = edge_styles[status] + if not idxs: + continue + s = f'stroke:{COLORS[status]["stroke"]},stroke-width:2px' + if status == "deleted": + s += ",stroke-dasharray:5 3" + style.append(f' linkStyle {",".join(str(i) for i in idxs)} {s};') + + directive = _init_directive(font_size, node_padding, node_spacing, rank_spacing) + head = ["```mermaid"] + ([directive] if directive else []) + [f"graph {direction}"] + text = "\n".join(head + body + style + ["```"]) + meta = { + "n_changed": n_changed, + "n_nodes": counters["nodes"], + "n_edges": counters["edges"], + "truncated": bool(changed_only or len(diff.get("components_relations") or []) > MAX_EDGES), + } + if len(text) > MAX_TEXT or counters["edges"] > MAX_EDGES: # never trip GitHub's red error box + meta["truncated"] = True + return None, meta + return text, meta + + +# --------------------------------------------------------------------------- # +# cli +# --------------------------------------------------------------------------- # +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--base", required=True, type=Path, help="Path to the base (before) analysis.json") + p.add_argument("--head", required=True, type=Path, help="Path to the head (after) analysis.json") + p.add_argument("--out", required=True, type=Path, help="Where to write the ```mermaid block") + p.add_argument("--direction", default="LR", choices=["LR", "TD", "TB", "RL", "BT"]) + p.add_argument("--changed-only", action="store_true", help="Render only changed components + incident edges") + p.add_argument("--no-edge-labels", dest="edge_labels", action="store_false", help="Draw arrows without relation labels") + p.add_argument("--nested", action="store_true", help="Draw depth>1 sub-components as subgraphs") + p.add_argument("--font-size", type=int, default=None, help="Node label font size in px (bigger label โ‡’ bigger node)") + p.add_argument("--node-padding", type=int, default=None, help="Interior padding around each node label") + p.add_argument("--node-spacing", type=int, default=None, help="Space between nodes in the same rank") + p.add_argument("--rank-spacing", type=int, default=None, help="Space between ranks") + args = p.parse_args() + + diff = build_diff(load_analysis(args.base), load_analysis(args.head)) + mermaid, meta = render_mermaid( + diff, + direction=args.direction, + changed_only=args.changed_only, + edge_labels=args.edge_labels, + nested=args.nested, + font_size=args.font_size, + node_padding=args.node_padding, + node_spacing=args.node_spacing, + rank_spacing=args.rank_spacing, + ) + + args.out.write_text(mermaid if mermaid is not None else "", encoding="utf-8") + meta["rendered"] = mermaid is not None + # Machine-readable summary on stdout for the action to consume. + print(json.dumps(meta)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/run_local.sh b/scripts/run_local.sh new file mode 100755 index 0000000..8a6d7d7 --- /dev/null +++ b/scripts/run_local.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# +# Local test harness for the CodeBoarding Mermaid architecture-diff action. +# Mirrors action.yml so you can iterate without waiting on a GitHub runner. +# +# Two modes: +# +# FAST (no LLM, instant) โ€” diff two existing analysis.json files and preview: +# scripts/run_local.sh --base-json BASE.json --head-json HEAD.json +# +# FULL pipeline (needs OPENROUTER_API_KEY) โ€” run the engine on two refs of a +# local repo, exactly like the action (committed-or-generated base, then +# incremental head), then diff + preview: +# export OPENROUTER_API_KEY=sk-or-... +# scripts/run_local.sh --repo /path/to/repo --base --head +# +# Outputs (default ./.cb-local): +# diagram.md the ```mermaid block (what the action posts) +# preview.html opens in a browser and renders the colored diagram via mermaid.js +# +set -euo pipefail + +ACTION_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ENGINE="${ENGINE:-$ACTION_DIR/../CodeBoarding}" +OUT="$ACTION_DIR/.cb-local" +DEPTH="1" +DIRECTION="LR" +CHANGED_ONLY="" +NO_EDGE_LABELS="" +NESTED="" +EXTRA="" +OPEN="auto" +REPO="" BASE_REF="" HEAD_REF="" BASE_JSON="" HEAD_JSON="" +AGENT_MODEL="${AGENT_MODEL:-openrouter/anthropic/claude-sonnet-4}" +PARSING_MODEL="${PARSING_MODEL:-openrouter/anthropic/claude-sonnet-4}" + +while [ $# -gt 0 ]; do + case "$1" in + --repo) REPO="$2"; shift 2;; + --base) BASE_REF="$2"; shift 2;; + --head) HEAD_REF="$2"; shift 2;; + --base-json) BASE_JSON="$2"; shift 2;; + --head-json) HEAD_JSON="$2"; shift 2;; + --engine) ENGINE="$2"; shift 2;; + --out) OUT="$2"; shift 2;; + --depth) DEPTH="$2"; shift 2;; + --direction) DIRECTION="$2"; shift 2;; + --changed-only) CHANGED_ONLY="--changed-only"; shift;; + --no-edge-labels) NO_EDGE_LABELS="--no-edge-labels"; shift;; + --nested) NESTED="--nested"; shift;; + --extra) EXTRA="$2"; shift 2;; # raw args forwarded to diff_to_mermaid.py, e.g. --extra "--font-size 20 --node-padding 16" + --no-open) OPEN="no"; shift;; + -h|--help) sed -n '2,30p' "${BASH_SOURCE[0]}"; exit 0;; + *) echo "Unknown arg: $1" >&2; exit 2;; + esac +done + +mkdir -p "$OUT" + +run_engine() { # $1 = uv-runnable python source + ( cd "$ENGINE" && \ + STATIC_ANALYSIS_CONFIG="$ENGINE/static_analysis_config.yml" \ + PROJECT_ROOT="$ENGINE" \ + DIAGRAM_DEPTH_LEVEL="$DEPTH" \ + CACHING_DOCUMENTATION="false" \ + ENABLE_MONITORING="false" \ + OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-}" \ + AGENT_MODEL="$AGENT_MODEL" \ + PARSING_MODEL="$PARSING_MODEL" \ + uv run python -c "$1" ) +} + +if [ -n "$BASE_JSON" ] && [ -n "$HEAD_JSON" ]; then + echo "== Fast mode: diffing existing analyses (no engine run) ==" + BASE_ANALYSIS="$BASE_JSON" + HEAD_ANALYSIS="$HEAD_JSON" +else + [ -n "$REPO" ] && [ -n "$BASE_REF" ] && [ -n "$HEAD_REF" ] || { + echo "Need either --base-json/--head-json, or --repo/--base/--head." >&2; exit 2; } + [ -d "$ENGINE" ] || { echo "Engine not found at $ENGINE (set --engine or \$ENGINE)." >&2; exit 2; } + [ -n "${OPENROUTER_API_KEY:-}" ] || { echo "Export OPENROUTER_API_KEY for the full pipeline." >&2; exit 2; } + REPO="$(cd "$REPO" && pwd)" + BASE_DIR="$OUT/base"; HEAD_DIR="$OUT/head" + rm -rf "$BASE_DIR" "$HEAD_DIR"; mkdir -p "$BASE_DIR" "$HEAD_DIR" + + echo "== Resolving base analysis at $BASE_REF ==" + if git -C "$REPO" show "$BASE_REF:.codeboarding/analysis.json" > "$BASE_DIR/analysis.json" 2>/dev/null; then + git -C "$REPO" show "$BASE_REF:.codeboarding/static_analysis.pkl" > "$BASE_DIR/static_analysis.pkl" 2>/dev/null \ + && echo " using committed baseline (+ static_analysis.pkl)" || { rm -f "$BASE_DIR/static_analysis.pkl"; echo " using committed baseline"; } + else + rm -f "$BASE_DIR/analysis.json" + echo " no committed baseline; running FULL analysis on base (LLM)..." + BASE_SRC="$OUT/base-src"; rm -rf "$BASE_SRC" + git -C "$REPO" worktree add --detach "$BASE_SRC" "$BASE_REF" >/dev/null + run_engine " +from pathlib import Path +from codeboarding_workflows.analysis import run_full +print(run_full(repo_name='$(basename "$REPO")', repo_path=Path('$BASE_SRC'), output_dir=Path('$BASE_DIR'), + run_id='local-base', log_path='/tmp/cb-local-base.log', depth_level=int('$DEPTH'), source_sha='$BASE_REF')) +" + git -C "$REPO" worktree remove --force "$BASE_SRC" >/dev/null 2>&1 || true + fi + + echo "== Analyzing head at $HEAD_REF (incremental from base) ==" + cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true + run_engine " +from pathlib import Path +from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError +from diagram_analysis.exceptions import IncrementalCacheMissingError +repo=Path('$REPO'); out=Path('$HEAD_DIR'); name='$(basename "$REPO")' +try: + print(run_incremental(repo_path=repo, output_dir=out, project_name=name, run_id='local-head', + log_path='/tmp/cb-local-head.log', base_ref='$BASE_REF', target_ref='$HEAD_REF', source_sha='$HEAD_REF')) +except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: + print(f'Incremental unavailable ({exc}); full analysis on head.') + for p in out.glob('*'): + if p.is_file(): p.unlink() + print(run_full(repo_name=name, repo_path=repo, output_dir=out, run_id='local-head', + log_path='/tmp/cb-local-head.log', depth_level=int('$DEPTH'), source_sha='$HEAD_REF')) +" + BASE_ANALYSIS="$BASE_DIR/analysis.json" + HEAD_ANALYSIS="$HEAD_DIR/analysis.json" +fi + +echo "== Diff -> Mermaid ==" +META="$(python3 "$ACTION_DIR/scripts/diff_to_mermaid.py" \ + --base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" \ + --out "$OUT/diagram.md" --direction "$DIRECTION" $CHANGED_ONLY $NO_EDGE_LABELS $NESTED $EXTRA)" +echo " $META" + +# Browser preview: render the (fence-stripped) mermaid via mermaid.js, strict mode +# to match GitHub. HTML-escape the body so labels with < > & stay valid. +python3 - "$OUT/diagram.md" "$OUT/preview.html" <<'PY' +import html, sys +src, dst = sys.argv[1], sys.argv[2] +body = open(src, encoding="utf-8").read().strip() +lines = body.splitlines() +if lines and lines[0].startswith("```"): lines = lines[1:] +if lines and lines[-1].startswith("```"): lines = lines[:-1] +graph = html.escape("\n".join(lines)) +open(dst, "w", encoding="utf-8").write(f""" +CodeBoarding architecture diff + +

Architecture diff preview

+
+ ■ added + ■ modified + ■ deleted +
+
+{graph}
+
+""") +print(f" wrote {dst}") +PY + +echo +echo "diagram : $OUT/diagram.md" +echo "preview : $OUT/preview.html" +if [ "$OPEN" != "no" ]; then + if command -v open >/dev/null 2>&1; then open "$OUT/preview.html"; + elif command -v xdg-open >/dev/null 2>&1; then xdg-open "$OUT/preview.html"; + else echo "(open $OUT/preview.html in your browser)"; fi +fi From e00323a498b45b8c71f440b8776f0679dd73abfe Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 15:36:43 +0200 Subject: [PATCH 02/27] test: self-test workflow to run the action on a PR (remove before merge) --- .github/workflows/test-self.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/test-self.yml diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml new file mode 100644 index 0000000..5440157 --- /dev/null +++ b/.github/workflows/test-self.yml @@ -0,0 +1,23 @@ +name: Self-test architecture diff + +# Exercises THIS branch's action (uses: ./) against the action repo itself, so a +# PR posts a Mermaid architecture-diff comment. Remove before merge. + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + pull-requests: write + +jobs: + diagram: + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@v4 + - uses: ./ + with: + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} + agent_model: ${{ secrets.AGENT_MODEL }} + parsing_model: ${{ secrets.PARSING_MODEL }} From 9802ce93374cd9e5de8a7e39fac18d38ea23c8dc Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 16:37:44 +0200 Subject: [PATCH 03/27] feat(comment): clarify color legend (file-level changes) + add workspace/extension CTA via click proxy --- .github/workflows/example-usage.yml | 5 +++-- .github/workflows/test-self.yml | 1 + action.yml | 25 ++++++++++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/.github/workflows/example-usage.yml b/.github/workflows/example-usage.yml index 6fb9f78..1e8913c 100644 --- a/.github/workflows/example-usage.yml +++ b/.github/workflows/example-usage.yml @@ -1,8 +1,9 @@ name: Architecture diff +# Reference example only (the README shows the real pull_request usage). Manual +# trigger so it doesn't run the published @v1 against this repo's own PRs. on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] + workflow_dispatch: # Only a PR comment is posted โ€” no image is pushed โ€” so contents:write is not needed. permissions: diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml index 5440157..302e014 100644 --- a/.github/workflows/test-self.yml +++ b/.github/workflows/test-self.yml @@ -21,3 +21,4 @@ jobs: llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} agent_model: ${{ secrets.AGENT_MODEL }} parsing_model: ${{ secrets.PARSING_MODEL }} + cta_base_url: https://codeboarding.pontux-inc.workers.dev diff --git a/action.yml b/action.yml index 3c3c031..5ce42aa 100644 --- a/action.yml +++ b/action.yml @@ -46,6 +46,10 @@ inputs: description: 'Draw depth>1 sub-components as nested subgraphs (pair with depth_level >= 2).' required: false default: 'false' + cta_base_url: + description: 'Base URL of the click proxy (e.g. https://go.codeboarding.org). When set, the comment adds "open in workspace" / "get the extension" links with owner/repo/pr appended. Empty disables the CTA.' + required: false + default: '' outputs: diagram_md: @@ -340,12 +344,30 @@ runs: TRUNC="${{ steps.diagram.outputs.truncated }}" BODY_FILE=$(mktemp) + OWNER_REPO="${{ github.repository }}" + OWNER="${OWNER_REPO%%/*}"; REPO="${OWNER_REPO##*/}" + PR="${{ steps.guard.outputs.pr_number }}" + CTA_BASE="${{ inputs.cta_base_url }}" + headline() { if [ "$1" = "0" ]; then echo "no architectural changes"; elif [ "$1" = "1" ]; then echo "1 component changed"; else echo "$1 components changed"; fi } + # Call-to-action: links open the live workspace (github.dev-equivalent) and + # the extension via the click proxy, with owner/repo/pr appended for tracking. + cta() { + [ -z "$CTA_BASE" ] && return + local ws="${CTA_BASE}/use-workspace?owner=${OWNER}&repo=${REPO}&pr=${PR}" + local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}" + echo "" + echo "---" + echo "๐Ÿ” **This is the flattened map.** [**Explore it live in your browser โ†’**](${ws}) โ€” expand each component, follow every dependency, and click straight through to the code that changed. No install; it opens right here on this PR." + echo "" + echo "๐Ÿ’ก Want this on every PR? [**Add the CodeBoarding extension โ†’**](${mp})" + } + { echo "### ${HEADER} ยท $(headline "$N")" echo "" @@ -355,7 +377,7 @@ runs: cat "${{ steps.diagram.outputs.diagram_md }}" echo "" echo "" - echo "๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ deleted โ€” compared against \`${BASE_REF}\`." + echo "Components are tinted by the files that changed inside them โ€” ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\` (not whole subsystems being added or dropped)." if [ "$TRUNC" = "true" ]; then echo "" echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." @@ -363,6 +385,7 @@ runs: else echo "**$(headline "$N")** versus \`${BASE_REF}\`, but the diagram is too large to render inline (GitHub caps inline Mermaid at 500 edges)." fi + cta echo "" echo "codeboarding-action ยท run ${{ github.run_id }}" } > "$BODY_FILE" From f5d3bbac23c77145e99295a5237b51a5e0b0117c Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 16:50:35 +0200 Subject: [PATCH 04/27] fix(comment): extension CTA = explore diffs in VS Code (not 'on every PR') --- action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/action.yml b/action.yml index 5ce42aa..6ac9456 100644 --- a/action.yml +++ b/action.yml @@ -363,9 +363,9 @@ runs: local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}" echo "" echo "---" - echo "๐Ÿ” **This is the flattened map.** [**Explore it live in your browser โ†’**](${ws}) โ€” expand each component, follow every dependency, and click straight through to the code that changed. No install; it opens right here on this PR." + echo "๐Ÿ” **This is the flattened map.** [**Explore this diff live in your browser โ†’**](${ws}) โ€” expand every component, follow each dependency, and click straight through to the changed code. Opens in your browser, no install." echo "" - echo "๐Ÿ’ก Want this on every PR? [**Add the CodeBoarding extension โ†’**](${mp})" + echo "๐Ÿ’ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ†’**](${mp}) to explore these architecture diffs right inside VS Code." } { From 951a729f9d77a2777ce0fb0f00497c01f845c00c Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 17:10:56 +0200 Subject: [PATCH 05/27] feat(comment): real health-check warning hook + tighter legend/CTA copy --- action.yml | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/action.yml b/action.yml index 6ac9456..e37c80e 100644 --- a/action.yml +++ b/action.yml @@ -311,6 +311,46 @@ runs: echo "base_analysis=$BASE_DIR/analysis.json" >> $GITHUB_OUTPUT echo "head_analysis=$HEAD_DIR/analysis.json" >> $GITHUB_OUTPUT + - name: Architecture health check (best-effort) + if: steps.guard.outputs.skip != 'true' + id: health + continue-on-error: true + shell: bash + working-directory: codeboarding-engine + env: + STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml + PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine + run: | + HEAD_DIR="${{ steps.base.outputs.head_dir }}" + TARGET="${{ github.workspace }}/target-repo" + # Count real WARNING/CRITICAL findings from the head analysis. Never fails + # the run โ€” a missing/old health module just yields 0 issues (no banner). + uv run python -c " + import json + from pathlib import Path + issues = 0 + try: + from static_analyzer.analysis_cache import StaticAnalysisCache + from health.runner import run_health_checks + from health.models import Severity + cache = StaticAnalysisCache(artifact_dir=Path('$HEAD_DIR'), repo_root=Path('$TARGET')) + sa = cache.get() + if sa is not None: + report = run_health_checks(sa, repo_name='${{ github.event.repository.name }}', repo_path=Path('$TARGET')) + if report is not None: + for cs in report.check_summaries: + for fg in getattr(cs, 'finding_groups', []): + if getattr(fg, 'severity', None) in (Severity.WARNING, Severity.CRITICAL): + issues += len(fg.entities) + print(f'Architecture issues found: {issues}') + except Exception as exc: + print(f'Health check skipped ({exc}).') + Path('/tmp/cb-issues.txt').write_text(str(issues)) + " + N=$(cat /tmp/cb-issues.txt 2>/dev/null || echo 0) + echo "issues=$N" >> $GITHUB_OUTPUT + echo "Architecture issues: $N" + - name: Diff analyses โ†’ Mermaid if: steps.guard.outputs.skip != 'true' id: diagram @@ -348,6 +388,7 @@ runs: OWNER="${OWNER_REPO%%/*}"; REPO="${OWNER_REPO##*/}" PR="${{ steps.guard.outputs.pr_number }}" CTA_BASE="${{ inputs.cta_base_url }}" + ISSUES="${{ steps.health.outputs.issues }}" headline() { if [ "$1" = "0" ]; then echo "no architectural changes"; @@ -356,16 +397,22 @@ runs: } # Call-to-action: links open the live workspace (github.dev-equivalent) and - # the extension via the click proxy, with owner/repo/pr appended for tracking. + # the VS Code extension via the click proxy, with owner/repo/pr for tracking. + # The warning banner is shown only when real health findings exist. cta() { [ -z "$CTA_BASE" ] && return local ws="${CTA_BASE}/use-workspace?owner=${OWNER}&repo=${REPO}&pr=${PR}" local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}" echo "" echo "---" - echo "๐Ÿ” **This is the flattened map.** [**Explore this diff live in your browser โ†’**](${ws}) โ€” expand every component, follow each dependency, and click straight through to the changed code. Opens in your browser, no install." + if [ -n "$ISSUES" ] && [ "$ISSUES" != "0" ]; then + local noun="issue"; [ "$ISSUES" != "1" ] && noun="issues" + echo "โš ๏ธ **${ISSUES} architecture ${noun} found.** [**See live in your browser โ†’**](${ws})" + echo "" + fi + echo "๐Ÿ” This is the flattened map. [**Explore this diff live in your browser โ†’**](${ws})" echo "" - echo "๐Ÿ’ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ†’**](${mp}) to explore these architecture diffs right inside VS Code." + echo "๐Ÿ’ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ†’**](${mp})" } { @@ -377,7 +424,7 @@ runs: cat "${{ steps.diagram.outputs.diagram_md }}" echo "" echo "" - echo "Components are tinted by the files that changed inside them โ€” ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\` (not whole subsystems being added or dropped)." + echo "Colours indicate components that have been ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\`." if [ "$TRUNC" = "true" ]; then echo "" echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." From e639920b30a452b7ea29213fc9efd98dbec750fd Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 18:40:18 +0200 Subject: [PATCH 06/27] feat(comment): auto-detect .vscode/.cursor -> editor-specific 'open in editor' CTA Move CTA building into scripts/build_cta.py (testable; docstring cites the 2025 SO survey: VS Code 75.9% + Cursor 17.9% ~= 94% justifies VS Code+Cursor only). .vscode dir -> VS Code, .cursor -> Cursor, both -> both, neither -> VS Code. --- action.yml | 22 ++++------- scripts/build_cta.py | 91 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 15 deletions(-) create mode 100644 scripts/build_cta.py diff --git a/action.yml b/action.yml index e37c80e..aadf7f5 100644 --- a/action.yml +++ b/action.yml @@ -396,23 +396,15 @@ runs: else echo "$1 components changed"; fi } - # Call-to-action: links open the live workspace (github.dev-equivalent) and - # the VS Code extension via the click proxy, with owner/repo/pr for tracking. - # The warning banner is shown only when real health findings exist. + # Call-to-action footer (workspace + editor + extension links via the click + # proxy, plus a warning banner when real health findings exist). Editor + # links auto-select VS Code / Cursor from the repo's .vscode/.cursor dirs. + # See scripts/build_cta.py for the copy and the market-share rationale. cta() { [ -z "$CTA_BASE" ] && return - local ws="${CTA_BASE}/use-workspace?owner=${OWNER}&repo=${REPO}&pr=${PR}" - local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}" - echo "" - echo "---" - if [ -n "$ISSUES" ] && [ "$ISSUES" != "0" ]; then - local noun="issue"; [ "$ISSUES" != "1" ] && noun="issues" - echo "โš ๏ธ **${ISSUES} architecture ${noun} found.** [**See live in your browser โ†’**](${ws})" - echo "" - fi - echo "๐Ÿ” This is the flattened map. [**Explore this diff live in your browser โ†’**](${ws})" - echo "" - echo "๐Ÿ’ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ†’**](${mp})" + python3 "${{ github.action_path }}/scripts/build_cta.py" \ + --cta-base "$CTA_BASE" --owner "$OWNER" --repo "$REPO" --pr "$PR" \ + --repo-path "${{ github.workspace }}/target-repo" --issues "${ISSUES:-0}" } { diff --git a/scripts/build_cta.py b/scripts/build_cta.py new file mode 100644 index 0000000..4c8f965 --- /dev/null +++ b/scripts/build_cta.py @@ -0,0 +1,91 @@ +"""Build the call-to-action footer appended to the architecture-diff PR comment. + +The footer links into CodeBoarding's click proxy (so owner/repo/pr are tracked): +a "see issues live" banner (only when real health findings exist), an +"explore in browser" workspace link, an editor-specific "open in your editor" +link, and an "install the extension" link. + +Editor coverage is deliberately limited to **VS Code and Cursor**. Per the 2025 +Stack Overflow Developer Survey (https://survey.stackoverflow.co/2025/technology/), +editor usage is VS Code 75.9%, Cursor 17.9%, VSCodium 6.2%, Windsurf 4.9%, +Trae 0.8% โ€” so VS Code + Cursor alone cover ~94% of developers. The long-tail +forks each carry their own URL scheme and extension registry, and don't justify +that upkeep for <7% reach apiece. + +Which editor link(s) appear is inferred from the analyzed repo's own signals: +a ``.vscode`` directory -> VS Code, a ``.cursor`` directory -> Cursor, both -> +both, neither -> VS Code (the safe majority default). + +Self-contained stdlib. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from urllib.parse import urlencode + + +def detect_editors(repo_path: Path) -> list[str]: + """Return the editor link(s) to offer, from the repo's ``.vscode``/``.cursor`` dirs. + + ``.vscode`` -> ['vscode'], ``.cursor`` -> ['cursor'], both -> both (VS Code + first), neither -> ['vscode']. Only VS Code and Cursor are considered (see + module docstring for the market-share rationale). + """ + editors: list[str] = [] + if (repo_path / ".vscode").is_dir(): + editors.append("vscode") + if (repo_path / ".cursor").is_dir(): + editors.append("cursor") + return editors or ["vscode"] + + +_EDITOR_LABEL = {"vscode": "VS Code", "cursor": "Cursor"} + + +def build_cta(cta_base: str, owner: str, repo: str, pr: str, repo_path: Path, issues: int = 0) -> str: + """Return the markdown CTA footer, or '' when ``cta_base`` is unset.""" + if not cta_base: + return "" + base = cta_base.rstrip("/") + + def link(path: str, **extra: str) -> str: + return f"{base}/{path}?" + urlencode({"owner": owner, "repo": repo, "pr": pr, **extra}) + + lines = ["", "---"] + if issues > 0: + noun = "issue" if issues == 1 else "issues" + lines += [f"โš ๏ธ **{issues} architecture {noun} found.** [**See live in your browser โ†’**]({link('use-workspace')})", ""] + + lines += [f"๐Ÿ” This is the flattened map. [**Explore this diff live in your browser โ†’**]({link('use-workspace')})", ""] + + editor_links = [ + f"[**Open in {_EDITOR_LABEL[e]} โ†’**]({link('open-in-editor', editor=e)})" for e in detect_editors(repo_path) + ] + lines += [f"๐Ÿ› ๏ธ Already have CodeBoarding? {' ยท '.join(editor_links)}", ""] + + lines += [f"๐Ÿ’ก New to CodeBoarding? [**Get the extension โ†’**]({link('use-marketplace')})"] + return "\n".join(lines) + + +def main() -> int: + p = argparse.ArgumentParser(description="Build the architecture-diff PR-comment CTA footer.") + p.add_argument("--cta-base", required=True, help="Click-proxy base URL (empty -> no footer)") + p.add_argument("--owner", required=True) + p.add_argument("--repo", required=True) + p.add_argument("--pr", required=True) + p.add_argument("--repo-path", required=True, type=Path, help="Path to the analyzed repo checkout") + p.add_argument("--issues", default="0", help="Real architecture-issue count (0 -> no warning banner)") + args = p.parse_args() + + try: + issues = int(args.issues or 0) + except ValueError: + issues = 0 + print(build_cta(args.cta_base, args.owner, args.repo, args.pr, args.repo_path, issues)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From c25b56d274508e81d59556025cb4651b4df210b8 Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 20:26:19 +0200 Subject: [PATCH 07/27] ci: auto-move major version tag (vN) on each release for stable @vN pinning --- .github/workflows/release-major-tag.yml | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/release-major-tag.yml diff --git a/.github/workflows/release-major-tag.yml b/.github/workflows/release-major-tag.yml new file mode 100644 index 0000000..159e1ca --- /dev/null +++ b/.github/workflows/release-major-tag.yml @@ -0,0 +1,41 @@ +name: Move major version tag + +# Marketplace actions are pinned by consumers as `@v1`, a *moving* major tag that +# should always point at the newest v1.x.x release. This re-points it on every +# published (non-pre) release, e.g. publishing v1.4.2 moves `v1` -> v1.4.2. +# +# First release is still manual (cut a `vX.Y.Z` release once); after that `vX` +# is maintained here automatically. + +on: + release: + types: [published] + +permissions: + contents: write + +jobs: + major-tag: + if: github.event.release.prerelease == false + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.release.tag_name }} + + - name: Re-point major tag at ${{ github.event.release.tag_name }} + env: + TAG: ${{ github.event.release.tag_name }} + run: | + set -euo pipefail + ver="${TAG#v}" + if ! printf '%s' "$ver" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then + echo "::notice::Tag '$TAG' is not vMAJOR.MINOR.PATCH semver; skipping major-tag move." + exit 0 + fi + major="v${ver%%.*}" + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git tag -fa "$major" -m "Update ${major} to ${TAG}" + git push origin "refs/tags/${major}" --force + echo "::notice::${major} now points at ${TAG}" From 2995d2859c5975bdb25aabcacc2414df255368de Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 20:26:19 +0200 Subject: [PATCH 08/27] feat: decouple analysis depth from display depth via render_depth depth_level drives the engine (deep, rich data); new render_depth controls how many component levels the PR Mermaid draws (default 1 = top-level flat). Replaces the nested boolean. Self-test now runs depth_level=2 + render_depth=1. --- .github/workflows/test-self.yml | 2 ++ README.md | 6 +++--- action.yml | 9 ++++----- scripts/diff_to_mermaid.py | 25 ++++++++++++++----------- scripts/run_local.sh | 6 +++--- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml index 302e014..493d3aa 100644 --- a/.github/workflows/test-self.yml +++ b/.github/workflows/test-self.yml @@ -22,3 +22,5 @@ jobs: agent_model: ${{ secrets.AGENT_MODEL }} parsing_model: ${{ secrets.PARSING_MODEL }} cta_base_url: https://codeboarding.pontux-inc.workers.dev + depth_level: '2' # analyze deep (rich nested data for the workspace/extension) + render_depth: '1' # but draw a clean top-level diagram in the PR diff --git a/README.md b/README.md index 3ba9387..3507e6f 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ You need **one secret**: an LLM API key. OpenRouter is the default; pass your ow | `comment_header` | `Architecture review` | Header line of the PR comment. | | `diagram_direction` | `LR` | Mermaid layout direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | | `changed_only` | `false` | Draw only changed components and their incident edges. | -| `nested` | `false` | Draw depth>1 sub-components as nested subgraphs (pair with `depth_level >= 2`). | +| `render_depth` | `1` | Component levels to **draw** in the PR diagram, independent of `depth_level`: `1` = top-level flat, `2` = +one nesting level as subgraphs. Analyze deep, display shallow. | ## Outputs @@ -97,7 +97,7 @@ Because nothing is pushed (the diagram is inline Mermaid), there is no image ste ## Limitations - **GitHub Mermaid caps.** Inline Mermaid in comments is capped (โ‰ˆ500 edges / 50 000 chars). The action stays under this by auto-falling-back to a changed-only graph; if even that overflows it posts a text summary instead of a broken diagram. -- **Nesting.** By default only the top-level component graph is drawn (matching the engine's default `graph LR`). Set `nested: true` with `depth_level >= 2` to draw sub-components as nested subgraphs โ€” leaf nodes filled, parent containers outlined, both colored by status. Large nested graphs are more likely to hit GitHub's Mermaid caps (above), in which case the action degrades to changed-only or a text summary. +- **Analysis depth vs. display depth.** `depth_level` controls how deep the engine *analyzes* (so the workspace/extension get rich nested data); `render_depth` controls how many levels the PR Mermaid *draws*. Keep `render_depth: 1` (default) for a clean top-level PR diagram even when `depth_level: 2`. Set `render_depth: 2` to draw one level of sub-components as subgraphs (leaf nodes filled, parent containers outlined). Large nested graphs are more likely to hit GitHub's Mermaid caps (above), in which case the action degrades to changed-only or a text summary. - **Renames show as remove + add.** Components are matched across the two analyses by name (the stable join), so a renamed component appears as a red removal plus a green addition rather than a single yellow change. - **No click-through.** GitHub renders Mermaid in strict security mode, so node hyperlinks are disabled. @@ -121,7 +121,7 @@ scripts/run_local.sh --repo /path/to/repo --base --head \ --engine /path/to/CodeBoarding # defaults to ../CodeBoarding ``` -Flags: `--depth N`, `--direction LR|TD|โ€ฆ`, `--nested`, `--changed-only`, `--no-edge-labels`, `--out DIR`, `--no-open`. +Flags: `--depth N`, `--direction LR|TD|โ€ฆ`, `--render-depth N`, `--changed-only`, `--no-edge-labels`, `--out DIR`, `--no-open`. The diagram step alone is also directly runnable: diff --git a/action.yml b/action.yml index aadf7f5..2513de4 100644 --- a/action.yml +++ b/action.yml @@ -42,10 +42,10 @@ inputs: description: 'Render only changed components and their incident edges (also auto-applied when the full graph exceeds GitHub''s Mermaid limit).' required: false default: 'false' - nested: - description: 'Draw depth>1 sub-components as nested subgraphs (pair with depth_level >= 2).' + render_depth: + description: 'Component levels to DRAW in the PR Mermaid (independent of depth_level): 1 = top-level flat (default), 2 = +one nesting level as subgraphs, etc. Lets you analyze deep (depth_level=2) but display a clean level-1 diagram.' required: false - default: 'false' + default: '1' cta_base_url: description: 'Base URL of the click proxy (e.g. https://go.codeboarding.org). When set, the comment adds "open in workspace" / "get the extension" links with owner/repo/pr appended. Empty disables the CTA.' required: false @@ -356,9 +356,8 @@ runs: id: diagram shell: bash run: | - FLAG="" + FLAG="--render-depth ${{ inputs.render_depth }}" [ "${{ inputs.changed_only }}" = "true" ] && FLAG="$FLAG --changed-only" - [ "${{ inputs.nested }}" = "true" ] && FLAG="$FLAG --nested" META=$(python3 ${{ github.action_path }}/scripts/diff_to_mermaid.py \ --base "${{ steps.analyze.outputs.base_analysis }}" \ --head "${{ steps.analyze.outputs.head_analysis }}" \ diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py index 6d9f6fd..b7196c8 100644 --- a/scripts/diff_to_mermaid.py +++ b/scripts/diff_to_mermaid.py @@ -304,7 +304,7 @@ def render_mermaid( direction: str = "LR", changed_only: bool = False, edge_labels: bool = True, - nested: bool = False, + render_depth: int = 1, font_size: int | None = None, node_padding: int | None = None, node_spacing: int | None = None, @@ -312,10 +312,13 @@ def render_mermaid( ) -> tuple: """Return (mermaid_text, meta). ``mermaid_text`` is None when there's nothing to draw. - With ``nested`` the depth>1 sub-components are drawn as Mermaid subgraphs โ€” - leaf nodes get a filled class, parent containers a stroke-only ``*Box`` - class. A wholly-added parent forces ``added`` onto its subtree (the engine - only diff-annotates surviving branches; an added subtree arrives raw). + ``render_depth`` controls how many component levels are drawn, independent of + the engine's analysis depth: 1 = top-level flat (default), 2 = top-level plus + one level of sub-components as subgraphs, etc. So you can analyze deep + (depth_level=2) yet render a clean level-1 PR diagram. At each drawn nesting + level, parent containers get a stroke-only ``*Box`` class and leaf nodes a + filled class. A wholly-added parent forces ``added`` onto its subtree (the + engine only diff-annotates surviving branches; an added subtree arrives raw). """ components = diff.get("components") or [] relations = diff.get("components_relations") or [] @@ -345,17 +348,17 @@ def emit_edges(rels: list, scope: _Scope, pad: str, force: str | None) -> None: edge_styles[status].append(counters["edges"]) counters["edges"] += 1 - def emit_level(comps: list, rels: list, indent: int, force: str | None) -> None: + def emit_level(comps: list, rels: list, indent: int, force: str | None, level: int) -> None: pad = " " * indent scope = _Scope(comps, used, force) for key, label, status, comp in scope.entries: - children = comp.get("components") if nested else None + children = comp.get("components") if level < render_depth else None # cap drawn nesting if children: body.append(f'{pad}subgraph {key}["{_esc(label)}"]') if status in box_classes: box_classes[status].append(key) child_force = force or (status if status == "added" else None) - emit_level(children, comp.get("components_relations") or [], indent + 1, child_force) + emit_level(children, comp.get("components_relations") or [], indent + 1, child_force, level + 1) body.append(f"{pad}end") else: body.append(f'{pad}{key}["{_esc(label)}"]') @@ -364,7 +367,7 @@ def emit_level(comps: list, rels: list, indent: int, force: str | None) -> None: counters["nodes"] += 1 emit_edges(rels, scope, pad, force) - emit_level(components, relations, 1, None) + emit_level(components, relations, 1, None, 1) if counters["nodes"] == 0: return None, {"n_changed": n_changed, "n_nodes": 0, "n_edges": 0, "truncated": False} @@ -418,7 +421,7 @@ def main() -> int: p.add_argument("--direction", default="LR", choices=["LR", "TD", "TB", "RL", "BT"]) p.add_argument("--changed-only", action="store_true", help="Render only changed components + incident edges") p.add_argument("--no-edge-labels", dest="edge_labels", action="store_false", help="Draw arrows without relation labels") - p.add_argument("--nested", action="store_true", help="Draw depth>1 sub-components as subgraphs") + p.add_argument("--render-depth", type=int, default=1, help="Component levels to draw: 1=top-level flat, 2=+one nesting level, ...") p.add_argument("--font-size", type=int, default=None, help="Node label font size in px (bigger label โ‡’ bigger node)") p.add_argument("--node-padding", type=int, default=None, help="Interior padding around each node label") p.add_argument("--node-spacing", type=int, default=None, help="Space between nodes in the same rank") @@ -431,7 +434,7 @@ def main() -> int: direction=args.direction, changed_only=args.changed_only, edge_labels=args.edge_labels, - nested=args.nested, + render_depth=args.render_depth, font_size=args.font_size, node_padding=args.node_padding, node_spacing=args.node_spacing, diff --git a/scripts/run_local.sh b/scripts/run_local.sh index 8a6d7d7..ecb66f7 100755 --- a/scripts/run_local.sh +++ b/scripts/run_local.sh @@ -27,7 +27,7 @@ DEPTH="1" DIRECTION="LR" CHANGED_ONLY="" NO_EDGE_LABELS="" -NESTED="" +RENDER_DEPTH="" EXTRA="" OPEN="auto" REPO="" BASE_REF="" HEAD_REF="" BASE_JSON="" HEAD_JSON="" @@ -47,7 +47,7 @@ while [ $# -gt 0 ]; do --direction) DIRECTION="$2"; shift 2;; --changed-only) CHANGED_ONLY="--changed-only"; shift;; --no-edge-labels) NO_EDGE_LABELS="--no-edge-labels"; shift;; - --nested) NESTED="--nested"; shift;; + --render-depth) RENDER_DEPTH="--render-depth $2"; shift 2;; --extra) EXTRA="$2"; shift 2;; # raw args forwarded to diff_to_mermaid.py, e.g. --extra "--font-size 20 --node-padding 16" --no-open) OPEN="no"; shift;; -h|--help) sed -n '2,30p' "${BASH_SOURCE[0]}"; exit 0;; @@ -125,7 +125,7 @@ fi echo "== Diff -> Mermaid ==" META="$(python3 "$ACTION_DIR/scripts/diff_to_mermaid.py" \ --base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" \ - --out "$OUT/diagram.md" --direction "$DIRECTION" $CHANGED_ONLY $NO_EDGE_LABELS $NESTED $EXTRA)" + --out "$OUT/diagram.md" --direction "$DIRECTION" $CHANGED_ONLY $NO_EDGE_LABELS $RENDER_DEPTH $EXTRA)" echo " $META" # Browser preview: render the (fence-stripped) mermaid via mermaid.js, strict mode From 04223e0a104cfa49027530378f6a99e3d2531e27 Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 20:34:06 +0200 Subject: [PATCH 09/27] chore(publish-prep): pin engine to v0.12.0, README inputs (cta_base_url), unit tests + CI - engine_ref default main -> v0.12.0 (verified to have the run_full/run_incremental/ health/StaticAnalysisCache API); reproducible default, overridable. - README: engine_ref + cta_base_url rows; depth_level vs render_depth clarified. - tests/: 21 stdlib unittest cases for diff_to_mermaid + build_cta. - .github/workflows/test.yml runs them on push/PR (no LLM). --- .github/workflows/test.yml | 17 +++++ README.md | 5 +- action.yml | 4 +- tests/test_build_cta.py | 65 ++++++++++++++++++ tests/test_diff_to_mermaid.py | 122 ++++++++++++++++++++++++++++++++++ 5 files changed, 209 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 tests/test_build_cta.py create mode 100644 tests/test_diff_to_mermaid.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..e16776b --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,17 @@ +name: Tests + +on: + push: + branches: [main] + pull_request: + +jobs: + unittest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.13' + - name: Run unit tests (stdlib only) + run: python -m unittest discover -s tests -v diff --git a/README.md b/README.md index 3507e6f..314f9d5 100644 --- a/README.md +++ b/README.md @@ -48,14 +48,15 @@ You need **one secret**: an LLM API key. OpenRouter is the default; pass your ow |---|---|---| | `llm_api_key` | (required) | LLM API key. Currently OpenRouter (`OPENROUTER_API_KEY`). | | `github_token` | `${{ github.token }}` | Token used to post the comment. | -| `engine_ref` | `main` | Git ref of `CodeBoarding/CodeBoarding`. Pin in production. | -| `depth_level` | `1` | Diagram depth (1โ€“3). Higher = slower + more detail. | +| `engine_ref` | `v0.12.0` | Git ref of `CodeBoarding/CodeBoarding` (pinned to a release). Override to track a newer ref. | +| `depth_level` | `1` | Engine **analysis** depth (1โ€“3). Higher = slower + richer data. See `render_depth` for the diagram. | | `agent_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for analysis. | | `parsing_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for parsing. | | `comment_header` | `Architecture review` | Header line of the PR comment. | | `diagram_direction` | `LR` | Mermaid layout direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | | `changed_only` | `false` | Draw only changed components and their incident edges. | | `render_depth` | `1` | Component levels to **draw** in the PR diagram, independent of `depth_level`: `1` = top-level flat, `2` = +one nesting level as subgraphs. Analyze deep, display shallow. | +| `cta_base_url` | `''` | Base URL of a click proxy. When set, the comment adds "explore in browser" / "open in VS Code or Cursor" / "get the extension" links (with `owner`/`repo`/`pr` appended). Empty disables the CTA. | ## Outputs diff --git a/action.yml b/action.yml index 2513de4..1bf6382 100644 --- a/action.yml +++ b/action.yml @@ -15,9 +15,9 @@ inputs: required: false default: ${{ github.token }} engine_ref: - description: 'Git ref of CodeBoarding/CodeBoarding to use as the analysis engine.' + description: 'Git ref (tag/branch/SHA) of CodeBoarding/CodeBoarding used as the analysis engine. Pinned to a release for reproducibility; override to track a newer ref.' required: false - default: 'main' + default: 'v0.12.0' depth_level: description: 'Diagram depth (1-3). Higher is slower and more detailed.' required: false diff --git a/tests/test_build_cta.py b/tests/test_build_cta.py new file mode 100644 index 0000000..ca3a2a3 --- /dev/null +++ b/tests/test_build_cta.py @@ -0,0 +1,65 @@ +"""Unit tests for scripts/build_cta.py โ€” editor detection + CTA footer.""" + +import sys +import tempfile +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) +import build_cta as bc # noqa: E402 + + +def repo_with(*dirs): + d = Path(tempfile.mkdtemp()) + for x in dirs: + (d / x).mkdir() + return d + + +class TestDetectEditors(unittest.TestCase): + def test_neither_defaults_to_vscode(self): + self.assertEqual(bc.detect_editors(repo_with()), ["vscode"]) + + def test_vscode_only(self): + self.assertEqual(bc.detect_editors(repo_with(".vscode")), ["vscode"]) + + def test_cursor_only(self): + self.assertEqual(bc.detect_editors(repo_with(".cursor")), ["cursor"]) + + def test_both_vscode_first(self): + self.assertEqual(bc.detect_editors(repo_with(".vscode", ".cursor")), ["vscode", "cursor"]) + + +class TestBuildCta(unittest.TestCase): + def test_empty_base_yields_no_footer(self): + self.assertEqual(bc.build_cta("", "o", "r", "1", repo_with()), "") + + def test_links_banner_and_cursor_only(self): + out = bc.build_cta("https://x.dev/", "Org", "Repo", "9", repo_with(".cursor"), issues=2) + self.assertIn("2 architecture issues found", out) + self.assertIn("use-workspace?owner=Org&repo=Repo&pr=9", out) + self.assertIn("open-in-editor?owner=Org&repo=Repo&pr=9&editor=cursor", out) + self.assertIn("use-marketplace?owner=Org&repo=Repo&pr=9", out) + self.assertNotIn("Open in VS Code", out) # cursor-only repo + + def test_no_banner_when_zero_issues_and_default_vscode(self): + out = bc.build_cta("https://x.dev", "o", "r", "1", repo_with(), issues=0) + self.assertNotIn("architecture issue", out) + self.assertIn("Open in VS Code", out) + self.assertNotIn("Open in Cursor", out) + + def test_both_editors_singular_issue(self): + out = bc.build_cta("https://x.dev", "o", "r", "1", repo_with(".vscode", ".cursor"), issues=1) + self.assertIn("1 architecture issue found", out) # singular + self.assertIn("Open in VS Code", out) + self.assertIn("Open in Cursor", out) + + def test_trailing_slash_in_base_is_normalized(self): + a = bc.build_cta("https://x.dev/", "o", "r", "1", repo_with()) + b = bc.build_cta("https://x.dev", "o", "r", "1", repo_with()) + self.assertNotIn("x.dev//", a) + self.assertEqual(a, b) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_diff_to_mermaid.py b/tests/test_diff_to_mermaid.py new file mode 100644 index 0000000..10aa0a8 --- /dev/null +++ b/tests/test_diff_to_mermaid.py @@ -0,0 +1,122 @@ +"""Unit tests for scripts/diff_to_mermaid.py โ€” diff logic + Mermaid rendering.""" + +import re +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) +import diff_to_mermaid as dm # noqa: E402 + + +def comp(name, files=None, cid=None, subs=None, subrels=None): + c = { + "name": name, + "component_id": cid or name, + "file_methods": [{"file_path": f, "methods": m} for f, m in (files or {}).items()], + } + if subs is not None: + c["components"] = subs + if subrels is not None: + c["components_relations"] = subrels + return c + + +def rel(src, dst, label="calls"): + return {"src_name": src, "dst_name": dst, "src_id": src, "dst_id": dst, "relation": label} + + +def linkstyle_indices_in_range(text): + n_edges = text.count("-->") + idxs = [int(x) for m in re.finditer(r"linkStyle ([\d,]+)", text) for x in m.group(1).split(",")] + return all(i < n_edges for i in idxs) + + +class TestDiff(unittest.TestCase): + def test_added_modified_deleted_unchanged(self): + base = {"components": [comp("A", {"a.py": ["f"]}), comp("B"), comp("D")], "components_relations": []} + head = {"components": [comp("A", {"a.py": ["f", "g"]}), comp("B"), comp("C")], "components_relations": []} + status = {c["name"]: c["diff_status"] for c in dm.build_diff(base, head)["components"]} + self.assertEqual(status["A"], "modified") # method added inside the component + self.assertEqual(status["B"], "unchanged") + self.assertEqual(status["C"], "added") + self.assertEqual(status["D"], "deleted") + + def test_structural_change_is_modified(self): + base = {"components": [comp("A", {"a.py": ["f"]})], "components_relations": []} + head = {"components": [comp("A", {"a.py": ["f"], "b.py": ["h"]})], "components_relations": []} + self.assertEqual(dm.build_diff(base, head)["components"][0]["diff_status"], "modified") + + def test_rename_is_add_plus_delete(self): + base = {"components": [comp("Old")], "components_relations": []} + head = {"components": [comp("New")], "components_relations": []} + status = {c["name"]: c["diff_status"] for c in dm.build_diff(base, head)["components"]} + self.assertEqual(status, {"New": "added", "Old": "deleted"}) + + def test_relation_modified_on_label_change(self): + base = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B", "uses")]} + head = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B", "calls")]} + self.assertEqual(dm.build_diff(base, head)["components_relations"][0]["diff_status"], "modified") + + def test_relation_added_and_deleted(self): + base = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B")]} + head = {"components": [comp("A"), comp("B")], "components_relations": [rel("B", "A")]} + statuses = sorted(r["diff_status"] for r in dm.build_diff(base, head)["components_relations"]) + self.assertEqual(statuses, ["added", "deleted"]) + + +class TestRender(unittest.TestCase): + def _diff(self): + base = {"components": [comp("A"), comp("B"), comp("Gone")], "components_relations": [rel("A", "B"), rel("A", "Gone")]} + head = {"components": [comp("A", {"x.py": ["f"]}), comp("B"), comp("New")], "components_relations": [rel("A", "B"), rel("A", "New")]} + return dm.build_diff(base, head) + + def test_flat_default_has_no_subgraphs(self): + text, _ = dm.render_mermaid(self._diff(), render_depth=1) + self.assertNotIn("subgraph", text) + for cls in ("added", "modified", "deleted"): + self.assertIn(f"classDef {cls}", text) + self.assertTrue(linkstyle_indices_in_range(text)) + + def test_nested_subgraphs_balanced_and_valid(self): + base = {"components": [comp("P", subs=[comp("c1"), comp("c2")], subrels=[rel("c1", "c2")])], "components_relations": []} + head = {"components": [comp("P", subs=[comp("c1"), comp("c3")], subrels=[rel("c1", "c3")])], "components_relations": []} + text, _ = dm.render_mermaid(dm.build_diff(base, head), render_depth=2) + sg = sum(1 for line in text.splitlines() if line.strip().startswith("subgraph ")) + en = sum(1 for line in text.splitlines() if line.strip() == "end") + self.assertGreater(sg, 0) + self.assertEqual(sg, en) + self.assertTrue(linkstyle_indices_in_range(text)) + + def test_render_depth_caps_at_data_depth(self): + base = {"components": [comp("P", subs=[comp("c1")], subrels=[])], "components_relations": []} + head = {"components": [comp("P", subs=[comp("c1"), comp("c2")], subrels=[])], "components_relations": []} + diff = dm.build_diff(base, head) + deep = dm.render_mermaid(diff, render_depth=5)[0] + two = dm.render_mermaid(diff, render_depth=2)[0] + self.assertEqual(deep, two) # no level-3 data, so depth 5 == depth 2 + + def test_label_escaping(self): + head = {"components": [comp('A "q" #h'), comp("B")], "components_relations": []} + base = {"components": [comp("B")], "components_relations": []} + text, _ = dm.render_mermaid(dm.build_diff(base, head), render_depth=1) + self.assertIn("#quot;", text) + self.assertIn("#35;", text) + + def test_changed_only_truncates(self): + text, meta = dm.render_mermaid(self._diff(), render_depth=1, changed_only=True) + self.assertIsNotNone(text) + self.assertTrue(meta["truncated"]) + + def test_empty_returns_none(self): + text, meta = dm.render_mermaid({"components": [], "components_relations": []}) + self.assertIsNone(text) + self.assertEqual(meta["n_nodes"], 0) + + def test_no_edge_labels(self): + text, _ = dm.render_mermaid(self._diff(), render_depth=1, edge_labels=False) + self.assertNotIn(' -- "', text) + + +if __name__ == "__main__": + unittest.main() From 6de6045a8c9f1dc2d8844695f319e1d92e9f3301 Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 21:00:43 +0200 Subject: [PATCH 10/27] perf(ci): reuse cached venv (drop uv venv --clear) + enable setup-uv cache The manual .venv cache was restored then immediately wiped by 'uv venv --clear', so it never helped. Reuse it when present and enable uv's download cache for cold installs. --- action.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/action.yml b/action.yml index 1bf6382..4cd5249 100644 --- a/action.yml +++ b/action.yml @@ -119,6 +119,8 @@ runs: - name: Install uv if: steps.guard.outputs.skip != 'true' uses: astral-sh/setup-uv@v4 + with: + enable-cache: true # cache ~/.cache/uv (wheels/builds) for fast cold installs - name: Cache uv venv (engine) if: steps.guard.outputs.skip != 'true' @@ -145,7 +147,7 @@ runs: shell: bash working-directory: codeboarding-engine run: | - uv venv --clear + test -d .venv || uv venv # reuse the cached venv instead of wiping it (--clear defeated the cache) uv pip install -e . - name: Install LSP servers From 76a3c9eb2a1d6a5ea287b2712193449530552b1f Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 22:07:19 +0200 Subject: [PATCH 11/27] feat: /codeboarding comment command to trigger on-demand (issue_comment) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guard now resolves the PR from either a pull_request event or an issue_comment '/codeboarding' command (comment body read from env, never interpolated -> no injection; SHAs fetched via gh api). Reacts ๐Ÿ‘€ to acknowledge. Configurable via trigger_command. Sticky comment + base_ref now use the resolved PR explicitly. --- README.md | 18 ++++++++++++++-- action.yml | 62 ++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 314f9d5..19dc842 100644 --- a/README.md +++ b/README.md @@ -25,14 +25,21 @@ name: Architecture diff on: pull_request: types: [opened, synchronize, reopened, ready_for_review] + issue_comment: # enables the /codeboarding command on PRs + types: [created] permissions: - pull-requests: write # the only permission needed โ€” nothing is pushed + pull-requests: write # the only permission needed โ€” nothing is pushed jobs: diagram: runs-on: ubuntu-latest - if: github.event.pull_request.draft == false + # Run on (non-draft) PR events, OR when someone comments "/codeboarding" on a PR. + # The if-gate is important: without it a runner spins up for every comment. + if: > + (github.event_name == 'pull_request' && github.event.pull_request.draft == false) || + (github.event_name == 'issue_comment' && github.event.issue.pull_request != null && + startsWith(github.event.comment.body, '/codeboarding')) timeout-minutes: 60 steps: - uses: codeboarding/codeboarding-action@v1 @@ -42,6 +49,12 @@ jobs: You need **one secret**: an LLM API key. OpenRouter is the default; pass your own model via the `agent_model` / `parsing_model` inputs if you prefer. +### On-demand: the `/codeboarding` command + +Comment **`/codeboarding`** on any pull request to (re)run the diagram on demand โ€” handy after the engine/baseline changes, or on draft PRs you don't auto-review. The action reacts with ๐Ÿ‘€ to acknowledge. Change the word via the `trigger_command` input. + +> **Note:** GitHub runs `issue_comment` workflows from the **default branch's** copy of the workflow file. So the command only works once this workflow is merged to your default branch โ€” a workflow that exists only on a feature branch won't respond to comments. + ## Inputs | Input | Default | Description | @@ -57,6 +70,7 @@ You need **one secret**: an LLM API key. OpenRouter is the default; pass your ow | `changed_only` | `false` | Draw only changed components and their incident edges. | | `render_depth` | `1` | Component levels to **draw** in the PR diagram, independent of `depth_level`: `1` = top-level flat, `2` = +one nesting level as subgraphs. Analyze deep, display shallow. | | `cta_base_url` | `''` | Base URL of a click proxy. When set, the comment adds "explore in browser" / "open in VS Code or Cursor" / "get the extension" links (with `owner`/`repo`/`pr` appended). Empty disables the CTA. | +| `trigger_command` | `/codeboarding` | PR-comment slash-command that triggers an on-demand run (requires the `issue_comment` trigger in your workflow). | ## Outputs diff --git a/action.yml b/action.yml index 4cd5249..a479545 100644 --- a/action.yml +++ b/action.yml @@ -50,6 +50,10 @@ inputs: description: 'Base URL of the click proxy (e.g. https://go.codeboarding.org). When set, the comment adds "open in workspace" / "get the extension" links with owner/repo/pr appended. Empty disables the CTA.' required: false default: '' + trigger_command: + description: 'Slash-command that triggers the action from a PR comment (issue_comment event). A comment whose first word is this runs the diagram on-demand.' + required: false + default: '/codeboarding' outputs: diagram_md: @@ -65,20 +69,59 @@ outputs: runs: using: 'composite' steps: - - name: Guard โ€” PR event only + - name: Guard โ€” resolve the target PR id: guard shell: bash + env: + GH_TOKEN: ${{ inputs.github_token }} + # Read from env, NEVER interpolated into the script โ€” a comment body is + # untrusted input and must not reach the shell as code (injection). + COMMENT_BODY: ${{ github.event.comment.body }} + TRIGGER: ${{ inputs.trigger_command }} + EVENT: ${{ github.event_name }} run: | - if [ -z "${{ github.event.pull_request.number }}" ]; then - echo "::warning::CodeBoarding Architecture Diff only runs on pull_request events. Skipping." - echo "skip=true" >> $GITHUB_OUTPUT + skip() { echo "::notice::$1 Skipping."; echo "skip=true" >> $GITHUB_OUTPUT; exit 0; } + + if [ "$EVENT" = "pull_request" ] || [ "$EVENT" = "pull_request_target" ]; then + PR_NUMBER="${{ github.event.pull_request.number }}" + BASE_SHA="${{ github.event.pull_request.base.sha }}" + HEAD_SHA="${{ github.event.pull_request.head.sha }}" + BASE_REF="${{ github.event.pull_request.base.ref }}" + elif [ "$EVENT" = "issue_comment" ]; then + # On-demand "/codeboarding" command. Must be a PR comment whose first + # word is the trigger; the payload lacks SHAs so we query the API. + [ -n "${{ github.event.issue.pull_request.url }}" ] || skip "Comment is on a plain issue, not a PR." + FIRST_WORD="$(printf '%s' "$COMMENT_BODY" | tr -d '\r' | awk 'NR==1{print $1; exit}')" + [ "$FIRST_WORD" = "$TRIGGER" ] || skip "Comment does not start with '$TRIGGER'." + PR_NUMBER="${{ github.event.issue.number }}" + PR_JSON="$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}")" + BASE_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["sha"])')" + HEAD_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["head"]["sha"])')" + BASE_REF="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["ref"])')" else - echo "skip=false" >> $GITHUB_OUTPUT - echo "base_sha=${{ github.event.pull_request.base.sha }}" >> $GITHUB_OUTPUT - echo "head_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT - echo "pr_number=${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT + skip "Unsupported event '$EVENT' (use pull_request or issue_comment)." fi + [ -n "$PR_NUMBER" ] || skip "No pull request in context." + { + echo "skip=false" + echo "pr_number=$PR_NUMBER" + echo "base_sha=$BASE_SHA" + echo "head_sha=$HEAD_SHA" + echo "base_ref=$BASE_REF" + } >> $GITHUB_OUTPUT + echo "Resolved PR #$PR_NUMBER (base=$BASE_SHA head=$HEAD_SHA) via $EVENT" + + - name: Acknowledge command + if: steps.guard.outputs.skip != 'true' && github.event_name == 'issue_comment' + shell: bash + env: + GH_TOKEN: ${{ inputs.github_token }} + run: | + # ๐Ÿ‘€ react to the triggering comment so the user knows it was picked up. + gh api -X POST "repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions" \ + -f content=eyes >/dev/null 2>&1 || true + - name: Checkout CodeBoarding engine if: steps.guard.outputs.skip != 'true' uses: actions/checkout@v4 @@ -379,7 +422,7 @@ runs: shell: bash run: | HEADER="${{ inputs.comment_header }}" - BASE_REF="${{ github.event.pull_request.base.ref }}" + BASE_REF="${{ steps.guard.outputs.base_ref }}" N="${{ steps.diagram.outputs.n_changed }}" RENDERED="${{ steps.diagram.outputs.rendered }}" TRUNC="${{ steps.diagram.outputs.truncated }}" @@ -440,5 +483,6 @@ runs: uses: marocchino/sticky-pull-request-comment@v2 with: header: codeboarding-architecture-diff + number: ${{ steps.guard.outputs.pr_number }} path: ${{ steps.body.outputs.body_file }} GITHUB_TOKEN: ${{ inputs.github_token }} From 6e183ea30c1c1f174db31e1f618c1de40b2ffeba Mon Sep 17 00:00:00 2001 From: brovatten Date: Thu, 4 Jun 2026 01:07:34 +0200 Subject: [PATCH 12/27] strategy: document commit/no-commit (analysis+health commit, pkl cache); CTA -> extension-direct - docs/COMMIT_STRATEGY.md: commit analysis.json + health_report.json (text, display-critical); do NOT commit static_analysis.pkl (binary -> actions/cache). Forward-compatible with a future hosted webview. - build_cta: drop the 'explore in browser' webview tier (deferred); footer now drives straight to the extension (open in VS Code/Cursor + get the extension). --- README.md | 2 +- docs/COMMIT_STRATEGY.md | 52 +++++++++++++++++++++++++++++++++++++++++ scripts/build_cta.py | 23 +++++++++--------- tests/test_build_cta.py | 2 +- 4 files changed, 66 insertions(+), 13 deletions(-) create mode 100644 docs/COMMIT_STRATEGY.md diff --git a/README.md b/README.md index 19dc842..2d17818 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ Comment **`/codeboarding`** on any pull request to (re)run the diagram on demand | `diagram_direction` | `LR` | Mermaid layout direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | | `changed_only` | `false` | Draw only changed components and their incident edges. | | `render_depth` | `1` | Component levels to **draw** in the PR diagram, independent of `depth_level`: `1` = top-level flat, `2` = +one nesting level as subgraphs. Analyze deep, display shallow. | -| `cta_base_url` | `''` | Base URL of a click proxy. When set, the comment adds "explore in browser" / "open in VS Code or Cursor" / "get the extension" links (with `owner`/`repo`/`pr` appended). Empty disables the CTA. | +| `cta_base_url` | `''` | Base URL of a click proxy. When set, the comment adds "open in VS Code / Cursor" + "get the extension" links (with `owner`/`repo`/`pr` appended) that drive straight to the extension. Empty disables the CTA. | | `trigger_command` | `/codeboarding` | PR-comment slash-command that triggers an on-demand run (requires the `issue_comment` trigger in your workflow). | ## Outputs diff --git a/docs/COMMIT_STRATEGY.md b/docs/COMMIT_STRATEGY.md new file mode 100644 index 0000000..3d16fa8 --- /dev/null +++ b/docs/COMMIT_STRATEGY.md @@ -0,0 +1,52 @@ +# Baseline & artifact commit strategy + +What CodeBoarding writes into a repo, what we commit vs. cache, where, and how +today's choice keeps a future hosted-webview viewer possible without rework. + +## The artifacts + +The engine writes these under `.codeboarding/`: + +| File | Type | Size | Purpose | +|---|---|---|---| +| `analysis.json` | JSON (text) | KBโ€“low MB | The component graph โ€” **the diagram source** | +| `health/health_report.json` | JSON (text) | KB | Health findings โ†’ **the warnings** | +| `static_analysis.pkl` | binary pickle | MB-scale | LSP/CFG cache โ†’ **warm-start** (re-LSP only changed files) | +| `static_analysis.sha` | text (1 line) | bytes | Tag recording the pkl's commit โ†’ the warm-start gate | + +## Decision + +**Commit (text, small, display-critical):** +- โœ… `analysis.json` โ€” required for the extension (and later the webview) to **show the diagram instantly without regenerating** โ€” i.e. without spending the user's API key. It's text and diffs meaningfully. +- โœ… `health/health_report.json` โ€” required for warnings in the extension/webview. Small text. + +**Do NOT commit (binary, bloat):** +- โŒ `static_analysis.pkl` โ€” binary, MB-scale, noisy diffs, repo bloat. It is a *rebuildable speed cache*, not display data. Keep it in **`actions/cache` keyed by the base SHA** (or a backend). A cache miss just falls back to a cold (full) LSP pass โ€” slower but correct, and the committed `analysis.json` still drives the diagram. +- `static_analysis.sha` โ€” commit **only** if the pkl is kept reachable (cache/backend); on its own it's harmless but unused. + +> **Principle:** version-control the *source-of-truth display data* (text, small); *cache* the *rebuildable speed artifacts* (binary, large). This is exactly what keeps the repo clean โ€” the thing that bloats (`.pkl`) never enters git. + +## Where to commit โ€” two separate workflows + +1. **CI/CD on `main` (the baseline keeper).** On push to `main`, regenerate and commit `analysis.json` + `health/health_report.json` to `main`. Keeps the baseline current so PRs diff against an accurate, up-to-date snapshot and the extension shows a real diagram on the default branch. + +2. **The review action (PR).** **Comment-only by default** โ€” no commits to contributors' branches (no churn, and it still works on fork PRs where the token is read-only). The PR comment leads users to the extension. + - *Optional later:* commit the head `analysis.json` to the PR branch so opening the extension on that PR shows the exact head diagram. Deferred โ€” it pushes a bot commit to the contributor's branch and can't run on fork PRs. + +## Now vs. later + +- **Now โ€” extension-direct.** Committing `analysis.json` + `health_report.json` on `main` means a user who installs the extension and opens the repo sees the committed diagram + warnings **instantly, with no API key**. The PR comment's CTA points straight at the extension (install / open in editor). +- **Later โ€” hosted webview.** The webview needs the **same** committed `analysis.json` (+ a diff + health). So committing now is **forward-compatible**: when the viewer is built, the data already exists at each commit โ€” no migration, just a host layer that reads it. (See `scripts/render_diagram.mjs` โ€” it's the headless prototype of that viewer.) + +## Warm-start tradeoff (the `.pkl`) + +The committed-baseline warm-start needs the pkl **and** its `.sha`. By caching the pkl (not committing) keyed by base SHA, PR runs restore the base-branch cache โ†’ warm-start; on 7-day eviction โ†’ cold LSP. This keeps the repo clean and still gets the speedup most of the time. + +## Summary + +| Artifact | Commit? | Where | Why | +|---|---|---|---| +| `analysis.json` | โœ… | `main` (CI/CD); PR branch optional/later | diagram source; powers extension now + webview later | +| `health_report.json` | โœ… | with `analysis.json` | warnings | +| `static_analysis.pkl` | โŒ | `actions/cache` (or backend), key = base SHA | binary speed cache; never bloat git | +| `static_analysis.sha` | โš ๏ธ optional | with the cached pkl | warm-start gate; useless without the pkl | diff --git a/scripts/build_cta.py b/scripts/build_cta.py index 4c8f965..a505fce 100644 --- a/scripts/build_cta.py +++ b/scripts/build_cta.py @@ -1,9 +1,11 @@ """Build the call-to-action footer appended to the architecture-diff PR comment. -The footer links into CodeBoarding's click proxy (so owner/repo/pr are tracked): -a "see issues live" banner (only when real health findings exist), an -"explore in browser" workspace link, an editor-specific "open in your editor" -link, and an "install the extension" link. +The footer links into CodeBoarding's click proxy (so owner/repo/pr are tracked) +and currently drives straight to the VS Code/Cursor **extension**: an "open this +architecture in your editor" link (editor-specific) plus an "install the +extension" link, and a warning banner when real health findings exist. A +no-install hosted-webview ("explore in browser") tier is intentionally deferred +(see docs/COMMIT_STRATEGY.md) โ€” the committed analysis already supports it later. Editor coverage is deliberately limited to **VS Code and Cursor**. Per the 2025 Stack Overflow Developer Survey (https://survey.stackoverflow.co/2025/technology/), @@ -53,17 +55,16 @@ def build_cta(cta_base: str, owner: str, repo: str, pr: str, repo_path: Path, is def link(path: str, **extra: str) -> str: return f"{base}/{path}?" + urlencode({"owner": owner, "repo": repo, "pr": pr, **extra}) + editor_links = " ยท ".join( + f"[**Open in {_EDITOR_LABEL[e]} โ†’**]({link('open-in-editor', editor=e)})" for e in detect_editors(repo_path) + ) + lines = ["", "---"] if issues > 0: noun = "issue" if issues == 1 else "issues" - lines += [f"โš ๏ธ **{issues} architecture {noun} found.** [**See live in your browser โ†’**]({link('use-workspace')})", ""] + lines += [f"โš ๏ธ **{issues} architecture {noun} found** โ€” open CodeBoarding to explore them.", ""] - lines += [f"๐Ÿ” This is the flattened map. [**Explore this diff live in your browser โ†’**]({link('use-workspace')})", ""] - - editor_links = [ - f"[**Open in {_EDITOR_LABEL[e]} โ†’**]({link('open-in-editor', editor=e)})" for e in detect_editors(repo_path) - ] - lines += [f"๐Ÿ› ๏ธ Already have CodeBoarding? {' ยท '.join(editor_links)}", ""] + lines += [f"๐Ÿงญ See this architecture in your editor: {editor_links}", ""] lines += [f"๐Ÿ’ก New to CodeBoarding? [**Get the extension โ†’**]({link('use-marketplace')})"] return "\n".join(lines) diff --git a/tests/test_build_cta.py b/tests/test_build_cta.py index ca3a2a3..08fb367 100644 --- a/tests/test_build_cta.py +++ b/tests/test_build_cta.py @@ -37,7 +37,7 @@ def test_empty_base_yields_no_footer(self): def test_links_banner_and_cursor_only(self): out = bc.build_cta("https://x.dev/", "Org", "Repo", "9", repo_with(".cursor"), issues=2) self.assertIn("2 architecture issues found", out) - self.assertIn("use-workspace?owner=Org&repo=Repo&pr=9", out) + self.assertNotIn("use-workspace", out) # webview/browser tier deferred โ€” extension-direct self.assertIn("open-in-editor?owner=Org&repo=Repo&pr=9&editor=cursor", out) self.assertIn("use-marketplace?owner=Org&repo=Repo&pr=9", out) self.assertNotIn("Open in VS Code", out) # cursor-only repo From 715e0e9cecfb76b4b060ea6c7110997b4eb48601 Mon Sep 17 00:00:00 2001 From: brovatten Date: Thu, 4 Jun 2026 11:13:33 +0200 Subject: [PATCH 13/27] fix: address all 23 confirmed review findings (security, bugs, hardening) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security - CRITICAL: gate the /codeboarding (issue_comment) path on author_association (OWNER/MEMBER/COLLABORATOR) before any checkout/analysis โ€” closes the pwn-request/secret-exfiltration hole; README example + security note updated. - Pass base_ref/header/cta_base via env (not interpolated into bash); extract engine python -c into scripts/cb_engine.py (no ${{ }}->python-source interp); preflight prints only the error message, not the whole auth body. Bugs (diff_to_mermaid.py) - Escape Mermaid shape metacharacters (] [ ( ) { } | < > &) โ€” a bare ] no longer breaks GitHub rendering. - n_changed is now recursive + a `changed` flag covers relation/nested-only changes; the action shows the diagram on `rendered` instead of suppressing it. - Size guard counts drawn edges and RETRIES changed-only before giving up. - _filter_changed ignores empty id/name strings. - Deleted parent keeps its subtree (renders as a deleted subgraph, symmetric with added). action.yml hardening - Guard: set -uo pipefail + graceful gh-api failure + assert base/head SHAs. - Generate-base: worktree remove/prune (not just rm -rf); cache the generated base keyed by base SHA (pay the cold full-analysis once). - Seed static_analysis.sha with the pkl (enables the committed-baseline warm-start that was previously unreachable). - Health step writes 0 first (no stale count on exception). - Post a failure comment (if: failure()) so a failed run isn't silent. Workflows - release-major-tag: anchor the semver regex ($) so prereleases don't move vN. - test-self + README example: concurrency cancel-in-progress. Tests: +cb_engine smoke tests, escaping/changed-flag cases (31 total, all green). --- .github/workflows/release-major-tag.yml | 4 +- .github/workflows/test-self.yml | 4 + README.md | 16 +- action.yml | 228 ++++++++-------- scripts/__pycache__/build_cta.cpython-310.pyc | Bin 0 -> 4655 bytes scripts/__pycache__/cb_engine.cpython-310.pyc | Bin 0 -> 4672 bytes .../diff_to_mermaid.cpython-310.pyc | Bin 0 -> 18389 bytes scripts/build_cta.py | 38 +-- scripts/cb_engine.py | 129 +++++++++ scripts/diff_to_mermaid.py | 245 +++++++++++------- .../test_build_cta.cpython-310.pyc | Bin 0 -> 3871 bytes .../test_cb_engine.cpython-310.pyc | Bin 0 -> 6716 bytes .../test_diff_to_mermaid.cpython-310.pyc | Bin 0 -> 9098 bytes tests/test_build_cta.py | 5 + tests/test_cb_engine.py | 140 ++++++++++ tests/test_diff_to_mermaid.py | 33 +++ 16 files changed, 613 insertions(+), 229 deletions(-) create mode 100644 scripts/__pycache__/build_cta.cpython-310.pyc create mode 100644 scripts/__pycache__/cb_engine.cpython-310.pyc create mode 100644 scripts/__pycache__/diff_to_mermaid.cpython-310.pyc create mode 100644 scripts/cb_engine.py create mode 100644 tests/__pycache__/test_build_cta.cpython-310.pyc create mode 100644 tests/__pycache__/test_cb_engine.cpython-310.pyc create mode 100644 tests/__pycache__/test_diff_to_mermaid.cpython-310.pyc create mode 100644 tests/test_cb_engine.py diff --git a/.github/workflows/release-major-tag.yml b/.github/workflows/release-major-tag.yml index 159e1ca..4b3c08b 100644 --- a/.github/workflows/release-major-tag.yml +++ b/.github/workflows/release-major-tag.yml @@ -29,8 +29,8 @@ jobs: run: | set -euo pipefail ver="${TAG#v}" - if ! printf '%s' "$ver" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then - echo "::notice::Tag '$TAG' is not vMAJOR.MINOR.PATCH semver; skipping major-tag move." + if ! printf '%s' "$ver" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "::notice::Tag '$TAG' is not a clean vMAJOR.MINOR.PATCH release (prerelease/suffix); skipping major-tag move." exit 0 fi major="v${ver%%.*}" diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml index 493d3aa..040389d 100644 --- a/.github/workflows/test-self.yml +++ b/.github/workflows/test-self.yml @@ -10,6 +10,10 @@ on: permissions: pull-requests: write +concurrency: + group: self-test-${{ github.event.pull_request.number }} + cancel-in-progress: true + jobs: diagram: runs-on: ubuntu-latest diff --git a/README.md b/README.md index 2d17818..90678f1 100644 --- a/README.md +++ b/README.md @@ -31,15 +31,23 @@ on: permissions: pull-requests: write # the only permission needed โ€” nothing is pushed +# Cancel a superseded run when new commits land on the same PR (avoid stacking +# multi-minute LLM jobs). +concurrency: + group: codeboarding-${{ github.event.pull_request.number || github.event.issue.number }} + cancel-in-progress: true + jobs: diagram: runs-on: ubuntu-latest - # Run on (non-draft) PR events, OR when someone comments "/codeboarding" on a PR. - # The if-gate is important: without it a runner spins up for every comment. + # Run on (non-draft) PR events, OR when a TRUSTED collaborator comments exactly + # "/codeboarding" on a PR. The if-gate matters: (1) without it a runner spins up + # for every comment; (2) the author_association check is a SECURITY gate โ€” see below. if: > (github.event_name == 'pull_request' && github.event.pull_request.draft == false) || (github.event_name == 'issue_comment' && github.event.issue.pull_request != null && - startsWith(github.event.comment.body, '/codeboarding')) + (github.event.comment.body == '/codeboarding' || startsWith(github.event.comment.body, '/codeboarding ')) && + contains(fromJSON('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) timeout-minutes: 60 steps: - uses: codeboarding/codeboarding-action@v1 @@ -47,6 +55,8 @@ jobs: llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} ``` +> โš ๏ธ **Security โ€” the `author_association` gate is required.** `issue_comment` workflows run from your default branch **with full repository secrets, for any commenter**. Without the `OWNER`/`MEMBER`/`COLLABORATOR` check, anyone could comment `/codeboarding` on a fork PR and have the action check out and run the engine over their PR-head code with your `OPENROUTER_API_KEY` present (a "pwn request"). The action's guard enforces this too, but gate it at the workflow level so a runner never even starts for an untrusted commenter. + You need **one secret**: an LLM API key. OpenRouter is the default; pass your own model via the `agent_model` / `parsing_model` inputs if you prefer. ### On-demand: the `/codeboarding` command diff --git a/action.yml b/action.yml index a479545..19aab95 100644 --- a/action.yml +++ b/action.yml @@ -77,10 +77,12 @@ runs: # Read from env, NEVER interpolated into the script โ€” a comment body is # untrusted input and must not reach the shell as code (injection). COMMENT_BODY: ${{ github.event.comment.body }} + AUTHOR_ASSOC: ${{ github.event.comment.author_association }} TRIGGER: ${{ inputs.trigger_command }} EVENT: ${{ github.event_name }} run: | - skip() { echo "::notice::$1 Skipping."; echo "skip=true" >> $GITHUB_OUTPUT; exit 0; } + set -uo pipefail + skip() { echo "::notice::$1 Skipping."; echo "skip=true" >> "$GITHUB_OUTPUT"; exit 0; } if [ "$EVENT" = "pull_request" ] || [ "$EVENT" = "pull_request_target" ]; then PR_NUMBER="${{ github.event.pull_request.number }}" @@ -93,23 +95,30 @@ runs: [ -n "${{ github.event.issue.pull_request.url }}" ] || skip "Comment is on a plain issue, not a PR." FIRST_WORD="$(printf '%s' "$COMMENT_BODY" | tr -d '\r' | awk 'NR==1{print $1; exit}')" [ "$FIRST_WORD" = "$TRIGGER" ] || skip "Comment does not start with '$TRIGGER'." + # SECURITY (pwn-request guard): issue_comment runs in the base repo WITH + # secrets for ANY commenter. Only a trusted collaborator may trigger an + # analysis that checks out + runs over PR-head code with the LLM key present. + case "$AUTHOR_ASSOC" in + OWNER|MEMBER|COLLABORATOR) : ;; + *) skip "Commenter is '$AUTHOR_ASSOC' (not OWNER/MEMBER/COLLABORATOR)." ;; + esac PR_NUMBER="${{ github.event.issue.number }}" - PR_JSON="$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}")" - BASE_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["sha"])')" - HEAD_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["head"]["sha"])')" - BASE_REF="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["ref"])')" + PR_JSON="$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}" 2>/dev/null)" || skip "Could not fetch PR #$PR_NUMBER from the API." + BASE_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["sha"])' 2>/dev/null)" || skip "Could not parse base SHA from the PR API." + HEAD_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["head"]["sha"])' 2>/dev/null)" || skip "Could not parse head SHA from the PR API." + BASE_REF="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["ref"])' 2>/dev/null)" || BASE_REF="" else skip "Unsupported event '$EVENT' (use pull_request or issue_comment)." fi - [ -n "$PR_NUMBER" ] || skip "No pull request in context." + { [ -n "$PR_NUMBER" ] && [ -n "$BASE_SHA" ] && [ -n "$HEAD_SHA" ]; } || skip "Could not resolve PR/base/head SHAs." { echo "skip=false" echo "pr_number=$PR_NUMBER" echo "base_sha=$BASE_SHA" echo "head_sha=$HEAD_SHA" echo "base_ref=$BASE_REF" - } >> $GITHUB_OUTPUT + } >> "$GITHUB_OUTPUT" echo "Resolved PR #$PR_NUMBER (base=$BASE_SHA head=$HEAD_SHA) via $EVENT" - name: Acknowledge command @@ -234,8 +243,10 @@ runs: https://openrouter.ai/api/v1/auth/key || echo "curl-fail") echo "OpenRouter /auth/key response: HTTP $STATUS" if [ "$STATUS" != "200" ]; then - echo "::error::OpenRouter rejected the API key (HTTP $STATUS). Verify the OPENROUTER_API_KEY secret." - cat /tmp/openrouter-auth.json 2>/dev/null || true + # Surface the upstream error MESSAGE only โ€” never the whole auth body (avoid leaking). + MSG="$(python3 -c 'import json,sys;print(json.load(open("/tmp/openrouter-auth.json")).get("error",{}).get("message",""))' 2>/dev/null || true)" + echo "::error::OpenRouter rejected the API key (HTTP $STATUS). ${MSG:-Verify the OPENROUTER_API_KEY secret.}" + rm -f /tmp/openrouter-auth.json exit 1 fi @@ -259,9 +270,14 @@ runs: echo "base_dir=$BASE_DIR" >> $GITHUB_OUTPUT echo "head_dir=$HEAD_DIR" >> $GITHUB_OUTPUT if git show "${BASE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then - git show "${BASE_SHA}:.codeboarding/static_analysis.pkl" > "${BASE_DIR}/static_analysis.pkl" 2>/dev/null \ - && echo "Seeded committed static_analysis.pkl from base." \ - || rm -f "${BASE_DIR}/static_analysis.pkl" + # Warm-start needs BOTH the pkl AND its .sha tag; seed them as a pair or not at all. + if git show "${BASE_SHA}:.codeboarding/static_analysis.pkl" > "${BASE_DIR}/static_analysis.pkl" 2>/dev/null \ + && git show "${BASE_SHA}:.codeboarding/static_analysis.sha" > "${BASE_DIR}/static_analysis.sha" 2>/dev/null; then + echo "Seeded committed static_analysis.pkl + .sha (head warm-start enabled)." + else + rm -f "${BASE_DIR}/static_analysis.pkl" "${BASE_DIR}/static_analysis.sha" + echo "No committed pkl+sha pair; head will run a full (cold) static analysis." + fi echo "committed=true" >> $GITHUB_OUTPUT echo "Using committed .codeboarding/analysis.json at ${BASE_SHA}." else @@ -270,8 +286,16 @@ runs: echo "No committed baseline at ${BASE_SHA}; will generate one via a full analysis on the base commit." fi - - name: Generate base analysis (no committed baseline) + - name: Cache generated base analysis (keyed by base SHA) if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' + id: basecache + uses: actions/cache@v4 + with: + path: ${{ runner.temp }}/cb-base + key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }} + + - name: Generate base analysis (no committed baseline) + if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' && steps.basecache.outputs.cache-hit != 'true' shell: bash working-directory: codeboarding-engine env: @@ -282,22 +306,21 @@ runs: ENABLE_MONITORING: 'false' run: | BASE_SRC="${RUNNER_TEMP}/base-src" + TARGET="${{ github.workspace }}/target-repo" + # Clean up any stale registration before re-adding (rm -rf alone leaves a + # dangling worktree entry that makes a retry's `worktree add` fail). + git -C "$TARGET" worktree remove --force "$BASE_SRC" 2>/dev/null || true + git -C "$TARGET" worktree prune rm -rf "$BASE_SRC" - git -C "${{ github.workspace }}/target-repo" worktree add --detach "$BASE_SRC" "${{ steps.guard.outputs.base_sha }}" - uv run python -c " - from pathlib import Path - from codeboarding_workflows.analysis import run_full - res = run_full( - repo_name='${{ github.event.repository.name }}', - repo_path=Path('$BASE_SRC'), - output_dir=Path('${{ steps.base.outputs.base_dir }}'), - run_id='${{ github.run_id }}-${{ github.run_attempt }}-base', - log_path='/tmp/cb-base.log', - depth_level=int('${{ inputs.depth_level }}'), - source_sha='${{ steps.guard.outputs.base_sha }}', - ) - print(f'Base analysis written: {res}') - " + git -C "$TARGET" worktree add --detach "$BASE_SRC" "${{ steps.guard.outputs.base_sha }}" + uv run python "${{ github.action_path }}/scripts/cb_engine.py" base \ + --repo "$BASE_SRC" \ + --out "${{ steps.base.outputs.base_dir }}" \ + --name "${{ github.event.repository.name }}" \ + --run-id "${{ github.run_id }}-${{ github.run_attempt }}-base" \ + --depth "${{ inputs.depth_level }}" \ + --source-sha "${{ steps.guard.outputs.base_sha }}" + git -C "$TARGET" worktree remove --force "$BASE_SRC" 2>/dev/null || true if [ ! -f "${{ steps.base.outputs.base_dir }}/analysis.json" ]; then echo "::error::Base full analysis ran but analysis.json is missing." exit 1 @@ -321,34 +344,15 @@ runs: # component ids from the baseline (stable diff). Base dir is left # untouched as the "before" snapshot for the diff. cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true - uv run python -c " - from pathlib import Path - from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError - from diagram_analysis.exceptions import IncrementalCacheMissingError - base_sha='${{ steps.guard.outputs.base_sha }}' - head_sha='${{ steps.guard.outputs.head_sha }}' - repo=Path('${{ github.workspace }}/target-repo') - out=Path('$HEAD_DIR') - name='${{ github.event.repository.name }}' - rid='${{ github.run_id }}-${{ github.run_attempt }}-head' - try: - res = run_incremental( - repo_path=repo, output_dir=out, project_name=name, run_id=rid, - log_path='/tmp/cb-head.log', base_ref=base_sha, target_ref=head_sha, - source_sha=head_sha, - ) - except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: - print(f'Incremental unavailable ({exc}); running full analysis on head.') - for p in out.glob('*'): - if p.is_file(): - p.unlink() - res = run_full( - repo_name=name, repo_path=repo, output_dir=out, run_id=rid, - log_path='/tmp/cb-head.log', depth_level=int('${{ inputs.depth_level }}'), - source_sha=head_sha, - ) - print(f'Head analysis written: {res}') - " + uv run python "${{ github.action_path }}/scripts/cb_engine.py" head \ + --repo "${{ github.workspace }}/target-repo" \ + --out "$HEAD_DIR" \ + --name "${{ github.event.repository.name }}" \ + --run-id "${{ github.run_id }}-${{ github.run_attempt }}-head" \ + --depth "${{ inputs.depth_level }}" \ + --base-ref "${{ steps.guard.outputs.base_sha }}" \ + --target-ref "${{ steps.guard.outputs.head_sha }}" \ + --source-sha "${{ steps.guard.outputs.head_sha }}" if [ ! -f "$HEAD_DIR/analysis.json" ]; then echo "::error::Head analysis ran but analysis.json is missing." exit 1 @@ -366,32 +370,13 @@ runs: STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine run: | - HEAD_DIR="${{ steps.base.outputs.head_dir }}" - TARGET="${{ github.workspace }}/target-repo" - # Count real WARNING/CRITICAL findings from the head analysis. Never fails - # the run โ€” a missing/old health module just yields 0 issues (no banner). - uv run python -c " - import json - from pathlib import Path - issues = 0 - try: - from static_analyzer.analysis_cache import StaticAnalysisCache - from health.runner import run_health_checks - from health.models import Severity - cache = StaticAnalysisCache(artifact_dir=Path('$HEAD_DIR'), repo_root=Path('$TARGET')) - sa = cache.get() - if sa is not None: - report = run_health_checks(sa, repo_name='${{ github.event.repository.name }}', repo_path=Path('$TARGET')) - if report is not None: - for cs in report.check_summaries: - for fg in getattr(cs, 'finding_groups', []): - if getattr(fg, 'severity', None) in (Severity.WARNING, Severity.CRITICAL): - issues += len(fg.entities) - print(f'Architecture issues found: {issues}') - except Exception as exc: - print(f'Health check skipped ({exc}).') - Path('/tmp/cb-issues.txt').write_text(str(issues)) - " + rm -f /tmp/cb-issues.txt + # cb_engine writes the WARNING/CRITICAL count (0 on any failure โ€” best-effort). + uv run python "${{ github.action_path }}/scripts/cb_engine.py" health \ + --artifact-dir "${{ steps.base.outputs.head_dir }}" \ + --repo "${{ github.workspace }}/target-repo" \ + --name "${{ github.event.repository.name }}" \ + --issues-out /tmp/cb-issues.txt || true N=$(cat /tmp/cb-issues.txt 2>/dev/null || echo 0) echo "issues=$N" >> $GITHUB_OUTPUT echo "Architecture issues: $N" @@ -410,8 +395,9 @@ runs: --direction "${{ inputs.diagram_direction }}" $FLAG) echo "$META" > "${RUNNER_TEMP}/diagram_meta.json" echo "diff meta: $META" - read N RENDERED TRUNC < <(python3 -c "import json;d=json.load(open('${RUNNER_TEMP}/diagram_meta.json'));print(d['n_changed'], str(d['rendered']).lower(), str(d['truncated']).lower())") + read N CHANGED RENDERED TRUNC < <(python3 -c "import json;d=json.load(open('${RUNNER_TEMP}/diagram_meta.json'));print(d['n_changed'], str(d.get('changed', d['n_changed']>0)).lower(), str(d['rendered']).lower(), str(d['truncated']).lower())") echo "n_changed=$N" >> $GITHUB_OUTPUT + echo "changed=$CHANGED" >> $GITHUB_OUTPUT echo "rendered=$RENDERED" >> $GITHUB_OUTPUT echo "truncated=$TRUNC" >> $GITHUB_OUTPUT echo "diagram_md=${RUNNER_TEMP}/diagram.md" >> $GITHUB_OUTPUT @@ -420,44 +406,46 @@ runs: if: steps.guard.outputs.skip != 'true' id: body shell: bash + env: + # Pass event/input-derived strings as DATA (not interpolated into the script). + HEADER: ${{ inputs.comment_header }} + BASE_REF: ${{ steps.guard.outputs.base_ref }} + CTA_BASE: ${{ inputs.cta_base_url }} + OWNER_REPO: ${{ github.repository }} + ACTION_PATH: ${{ github.action_path }} + TARGET_REPO: ${{ github.workspace }}/target-repo + DIAGRAM_MD: ${{ steps.diagram.outputs.diagram_md }} + RUN_ID: ${{ github.run_id }} + N: ${{ steps.diagram.outputs.n_changed }} + CHANGED: ${{ steps.diagram.outputs.changed }} + RENDERED: ${{ steps.diagram.outputs.rendered }} + TRUNC: ${{ steps.diagram.outputs.truncated }} + PR: ${{ steps.guard.outputs.pr_number }} + ISSUES: ${{ steps.health.outputs.issues }} run: | - HEADER="${{ inputs.comment_header }}" - BASE_REF="${{ steps.guard.outputs.base_ref }}" - N="${{ steps.diagram.outputs.n_changed }}" - RENDERED="${{ steps.diagram.outputs.rendered }}" - TRUNC="${{ steps.diagram.outputs.truncated }}" BODY_FILE=$(mktemp) - - OWNER_REPO="${{ github.repository }}" OWNER="${OWNER_REPO%%/*}"; REPO="${OWNER_REPO##*/}" - PR="${{ steps.guard.outputs.pr_number }}" - CTA_BASE="${{ inputs.cta_base_url }}" - ISSUES="${{ steps.health.outputs.issues }}" headline() { - if [ "$1" = "0" ]; then echo "no architectural changes"; - elif [ "$1" = "1" ]; then echo "1 component changed"; - else echo "$1 components changed"; fi + if [ "$CHANGED" != "true" ]; then echo "no architectural changes"; + elif [ "$N" = "1" ]; then echo "1 component changed"; + elif [ "$N" = "0" ]; then echo "architecture updated"; + else echo "$N components changed"; fi } - # Call-to-action footer (workspace + editor + extension links via the click - # proxy, plus a warning banner when real health findings exist). Editor - # links auto-select VS Code / Cursor from the repo's .vscode/.cursor dirs. - # See scripts/build_cta.py for the copy and the market-share rationale. + # CTA footer (editor + extension links via the click proxy, warning banner + # on real health findings). build_cta also emits the โš ๏ธ banner with no proxy. cta() { - [ -z "$CTA_BASE" ] && return - python3 "${{ github.action_path }}/scripts/build_cta.py" \ + python3 "$ACTION_PATH/scripts/build_cta.py" \ --cta-base "$CTA_BASE" --owner "$OWNER" --repo "$REPO" --pr "$PR" \ - --repo-path "${{ github.workspace }}/target-repo" --issues "${ISSUES:-0}" + --repo-path "$TARGET_REPO" --issues "${ISSUES:-0}" } { - echo "### ${HEADER} ยท $(headline "$N")" + echo "### ${HEADER} ยท $(headline)" echo "" - if [ "$N" = "0" ]; then - echo "No architectural changes detected versus \`${BASE_REF}\`." - elif [ "$RENDERED" = "true" ]; then - cat "${{ steps.diagram.outputs.diagram_md }}" + if [ "$RENDERED" = "true" ]; then + cat "$DIAGRAM_MD" echo "" echo "" echo "Colours indicate components that have been ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\`." @@ -465,15 +453,17 @@ runs: echo "" echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." fi + elif [ "$CHANGED" = "true" ]; then + echo "Architecture changed versus \`${BASE_REF}\`, but the diagram is too large to render inline (GitHub caps inline Mermaid at ~500 edges)." else - echo "**$(headline "$N")** versus \`${BASE_REF}\`, but the diagram is too large to render inline (GitHub caps inline Mermaid at 500 edges)." + echo "No architectural changes detected versus \`${BASE_REF}\`." fi cta echo "" - echo "codeboarding-action ยท run ${{ github.run_id }}" + echo "codeboarding-action ยท run ${RUN_ID}" } > "$BODY_FILE" - echo "body_file=$BODY_FILE" >> $GITHUB_OUTPUT + echo "body_file=$BODY_FILE" >> "$GITHUB_OUTPUT" echo "--- comment preview ---" cat "$BODY_FILE" echo "--- end preview ---" @@ -486,3 +476,19 @@ runs: number: ${{ steps.guard.outputs.pr_number }} path: ${{ steps.body.outputs.body_file }} GITHUB_TOKEN: ${{ inputs.github_token }} + + # If any analysis step failed, replace the sticky comment with a short failure + # note (same header) instead of leaving the PR with nothing / a stale diagram. + - name: Post failure comment + if: failure() && steps.guard.outputs.skip != 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: codeboarding-architecture-diff + number: ${{ steps.guard.outputs.pr_number }} + message: | + ### ${{ inputs.comment_header }} ยท failed + + The architecture diff couldn't be generated for this run. See the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details. + + codeboarding-action ยท run ${{ github.run_id }} + GITHUB_TOKEN: ${{ inputs.github_token }} diff --git a/scripts/__pycache__/build_cta.cpython-310.pyc b/scripts/__pycache__/build_cta.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..305f3b3bce8fcf7b0447d94640b7394511a4ef1d GIT binary patch literal 4655 zcmZ`-&u<&Y72a7ce`qDkj*{4p?IcqphDAn8I&NYouA#)g5Cm{+*HRpWfkCf0Lvp3% zE;F-hSp-$2l~Fqfqd<^T6QlN_D01kzJ@ruZ(7$1i=_#l7?2G%oS<;f6mcs7N%+9`< z_uluu_eSxaJ+6h{mBpW}9^GeI|Dw*$pM%Z^mc{e{4PiAc!Gs;MzTLDLwF_aPS-{f? zoqn-dG-JiE)Gs&7Xdf1?n7->YD`H0M5wqXf%_(t2yejsK19yu}SG*<;idXP{TF!_= zYqmNi_sHojYm5B`l9#ON>*+tRf%9n)3ZC>NZ~I~BC9&tXlOT?GCyo=TxIY-kNJvbH z%`9KFdqE=GNvfnLf=-8DyUyEjzb~Vt?z+nmH2yLSqBYHf2p?XEg*+eoN(52&xaRFJ zXs_{sitlXjc^&h3J(8-SBf>-p>ZKqI##?^lXnssX=qWao#B4OkH$~}+Yhw6bN|>J--uJir3jK(9p$uTKA-nI zJ&^676SS**5T@FQ2_l`qN@NK4*&6w03z#6d&)0nwK}){kM^K8d_pq>%e#m>cB|Y8= zBC>+!@=l*tm)efU|uFRJ{9FTcsX4VMK8R>Qvi za6?0$A7Z0ogX?rKh*g3G5+3>h0sMN&SO|{3EtTKJhBOuogBA1=iPggZ(sGp4YGZ#G zi{w73)$05jc1<`tdE(?d?ot8=^Q(~73FCEsQQnpzoTT_ts%}fbqL(BCeWuaSreBBM z+iSEqO|Q4(21#iEhrK8chQPet-Gvz$xqbZJ!b3$h)!g^IT7QBafFzmE%9Hy_x3R!cVLmR7P`(U z5HO|dC4LaPome3jq~Go#%#_+7Lj;O|&p)|-nd^2>_NB4cGgtvNT_8kl7WaA=rgX*Z{4cj)@=mxty^3K3JefK4{vc|8f=s1AXpKD&(Fzi zzra^wgn+l`(sO}FGJqY_4k3J=5TgB#&Fs=&tC@U!mCX1d-g|PMK zF!#+iBWdHGjVH~!KrDR*jjpxHM&Rbd)+6h&rS^>2BXIU(#w?5$#-j(#sAERi;E1rh z418Pk7a;n&S=@tlb@titizF@?5fXA$qXR4~L zO7K&&oLg6a4=17jWTo*5*i$z^qqlwVUevf4w^Pbmy3qv;)0I{y4zcaV&SZrQ_uN39 z9UHo>f&o5Vq4e2mCw_gfkkJ(q2ICRgtXWuK_ zb2<)H8Fy#CVq)(j`yT5su}{2kw~`b_&KA3fy5ce0tQe_tqsq6|!#pZzl^5?b@zNH< z8ZSRAm^DiJn%KW&QLWOoH>XCWi`LD`raPJv`$ulqK5cDIeQ`q^d}NQN9#iZbPT$-e zJA@lS40U1o+@#wRGNB{oo?mt^g(;Pk{2~Rh;-d)w5YwEQZLf&CAv}VjHr2q+J~s^WDdnlKF>@slPUPsEN+=YR{j7jO|1R~8y?sW z-m_th@>eg`X--PA`QQJ-f1*MKFMduc)Y14gDF5c%CKFa-jo2$jq5Dv0g!1}<4aJ;? z#Typ0zOY3R%9MuFpVw+vsSrgqmy#&{!*9Q?)o#uYXUWj00==9$s;pVcTTfV(64{>C zrHdaeU%lSCeD3_E%OE|wej;PC%G9e6V+bzSaq4Ur$h$L8ix==h(<1hlj`bRIpAL`w z4|ShSPv6AU|NP-kf96Z59;s5^jg8%v@)@3NUv9tYN|Pyo|Y5s-@hmgec=%Wm*I()2cd(SQWV&^cV~>XEhEY!>iS4 zg>#nmJXQ#K2LD-c;G?i@IuStLoXKt6GSQb6@c^l^fiY*bsE*KA#=Zu6+mJG7ntEo< zw=mCO+9_AqK6a2fw##0!XA31(dAhH#AAN^a@Hb2JU&HJ+u4lgjTI447$S(E~^=Tz5}E7z#2vKe297T%h%yq0y% zOd8Z`KX#Tw-o4?6sl23Aj8bBN;wv#2{J(($4XZ{&BZsm^&M!NNw2$)pmw2HmaA$z| zS@SoCR)yL297-KLpK{<{y-TtxtrmULY9Vw@FcRoZmQVzJxP~%SbvECWiFOlnre$c{ zv`h$^*0vC{L8I+9KiIiw8m-P|@EvYSv7Kqy@mpe;orR6R? zGb=^JGDxeu6oudVP#_3kKjt+*rVsnrKJ_0oZjtsov!pJD9iUlhhI2i0X6F0OIg_?p zj)w0~@4d10hf|vN52_sfEL7gpG+o3fFs-XGof)w{G`faPWivLrCVs8h8dka$v{kxQ zR_)f<>+B?(|HA6p>@qvU&Z1OjSJ-)W0VRjI>E6O`;zQmV^<>Fn#N0d7$ik7_ zbU#F?l(Hx`eRuu#3Pf#&KKo&!NC~VzaBo1agz}+O)rYTGCWZ>=^0|x#IQvBwai#IW z77kR8gvdgnw)Fm;yQ_}V>(OGk6Ko3S(22L#4c5#YoW|~0aox@=_eca|ftcsDVoTt3kTkiF4U`c|-qr~6wqnPe;+Bsbj z8^W5qJi=8Iz?_d(9<1G2`(XLzgF6rJ++4Zq4x)t7>Oq<%l9uoF78eh9huI5+m-5O7 zSVYO8U3LJQhrn!D0(zm>3sM&LQ=gMSZ#U)JgE-w45E05?sY8&AhNF}dUSfY7#e0B_ zh!{j*cg`S7O7sK3aub7xDa&FYPt6hTZUo>1Ld28lnmb{# zJD30zmU3_@*oQAf$fdg*x&fj5h%6GX>O9)ml!Ww*Zb&RS|0MI#(Tj$kB&k${6>F$l z_ra0ziDfw)EeHKYy8op(-8gKaE2oyiA^;U8=w6uELSYe(iYgA#i&#-n*e^_QsBlX3 zdq6+>Lz(fUu-4Kf463BX^t16>N4iON>^&5e4bz7Dq^fIqH8-S@!|ov0_x0SArnGVk zr3y2j)Fw4qVOC!IQv1r-*J0l@*rv)F*C7YHvPmn#;McXHPI`Mv*|JDhp>VK;Q6@dG zgOetOjXip*tD&~y#V`;tZ=i2{jxc&a1O-JICU3i)YH{V|cv+gUhPNSr!(?hj-Mg`} zzUtlm-3Oekdr=v2oL0x?WX(Ap4X;r~>To(4wYU;cn83%^8oz+n=l$hJxPP(S=jo0w zp~&*BG$`+Rc>@5*`raUoS;&`f0?Rk1SSCAgyrHzy%Oc>>NDAD(S0Zw0v{xP=f#Ef* zD9$2l>V{$HRs0$FqrCdqcBN&`jZfc7!_=1^!NsmuNw3I~^M4~y&NGd#;tjG{()z5GRLb97eOJR;^}Klug3c6zW^tY{^D?_uUg(_S&7o}sXkYt2Qwf-y z#If;tWZmY7r(0nl6{<_TAWG0UT^4_xG6mZUM+CxI9++2mVf;qtI+?$OtmZK*@4_uB z$`|@uoNjG=?l!ru8J>@Tzs1kcQkN-HD8GVI_Y#Zz4ek%U*^H&|(*S-E5rW_@jh&L= z_Ug(lMTT=|EUb+uiXz+6wtMTarBA8PG7$AeC$|`!BAbJDTFIj-{-ZEeg>o}g1HNd=j2y+F{ z+NAnfmFepmQze4v_+DO-wxW?uL;#$PNiDBEtv}NyHq$5d+-9{*=Q6i)8#E|K)t`Wn zxijr?wj5CB@}wafd4o9{+P;bUnMo7%CTp$06Ux90ZsUul#2 z<4+zxg1*hXKBG!r{|NS4?dY%n?~XdH@oyg}pQ0Sy_s0U)-JcSm{bUajN)%^2bQk(? zt&8CR(W>k$9XO123I`*(2OgdzY6299J3JMB3N)TtC#M;#OoJg6%8bu~8We7+QLfmI zMk9m;3uB|RwAL|r2RA)+KRit(T27!v@wjMDV-|0Nr`bppwwNj~zO;g<8nI3Qzlp#> z#b}7FvxLFnoanYn&6W@lgt6dnLE=FhF*@X&z}&rzuow!LG#rQgz$1A9g<<@fQf_s| zO-?Kn8O((5 zF(yWCJhfQmnLaUD6;!BCEP%g~Tj;Mn!4vH9X*I&ehzhGuM^uiD_>mgnU__NQrX#Ay zM*K3@pTa9)1PHqY!s9vidl0m?RY!7=8c0o~7E*g3hJiKLHI(7CFgBBOD8Fp%3ACS7 zbL=B?9L)K%oJad9c+AuCj693-IV6(N*oQxTMxhqg=(H$yM4(g#53ucp84TIOj!{@> zlrXH}<1FGKD=dl)I@RaZ@)7mCqTWzcrI75oJ$petuT9@i&znb|PaU1F;9d%gtTdb; zET_hAQMO9i`;>ivZ0szm;FX?6V@NoBDDm6qE~+JMI@Y*-{B2Z9Bw;k|@X=6OhVzAu zAbR9;5f<&0X*hkKsvv^ru{7|!qU|#Vru5Y?E((Y0Wd*(f`;kLG(`r(JGK zX{2IXRZ`|am56PnzADruuUN*rs+VF#PTsOJ{{!Txs6fe4!H_DcvwT6eT^cIYs)BG; zI@D2VL7H`Rvge1@Hou3~!twCPAT#ZG+#rp~E)_L;MZ{5GSyN82Grvlix@!s$${U)U z7tsJ9a@O~R42P?qM)Js&P{7a2Q03R<6Yw1}S%R7@tvPSQ{#k~lJEk+|xXnxUMMt;( F`#+k$@Eo207#JpAW|Z^qydPehD6QI&gshRB$1OiagM6QIjZ9JA4!x`IbFrLoI)Zo zzu)(IW@i?lN|~*B^ZMP#_rBxzzTVc@SRsYaA3XEGg}ra2QvZ$*gP&16JeNvY!OvK! zluDIS%2Ia2TCz*F#kkXON{;-xC0BmarL_EJN*VmRjqFmcltbBcDX%i6g379#%70+1 zf*O(EQ8gyN;~zMs5j9>KRhvpVD-RHmNSD{pvx)wkWM0QU?&*sseRLJ&f2rDpb>I2C;3bqmHUa5!bN?go>9-L!r^Xdh4Qk_z#KNuLrxg?T^(gwX9xNue_Ho z?Rhb!URAU2rPS=IUAk9w)laB1D0QFuk~*ipfY|*iQg5g?5%biFdP{u~u?O;{{M-3= z^5^r8`BOiseo8H>I$GSTuBi*^B1-M^_p8Q5TmO!qx%r^KZ#8wx!b&=4dghO(yTO=gL*K1A!s$dTD{>1NK>Bno66T-xafPzZ-f=E zf^@IiYP7VkyjOj_RH-X(L08&~(}ltrzoM9bXucBohrEOHehuY{($b+tWFoCn3w>Sm z=H`aFoSXB$ckSoB`Bu2-RT>&amkUc3l&n`8^)9-?s8l_;=*=(7VEpDnz3EGxYHO+8 z2{~SjYpLC8`ps}g`;AIimrAwTVGf|+FV#a9da)k9)R~_$Gpo!ue2isoDVfQ9qgB1= zU0L+Cf2>gOyhC0S%Y@Dj|6VemGUp^>A^?M}4o(WpB51 z=(Rw4C{(F(Zw_$$$?la|Z@RraH+R^(f{LWj0@JV&m{%an5H6ynPJl^(yp9=S;q1&x zl4!h|ZY_E3Mx}0u*lSn9#lSn*Y=wonxrKV@1&ggKb8|%@35=>wCs!6*Xn1b!V4$mq zJr#sSkR-cnRu?PH1+FYs$&_kT=KV$?^sk0qFB2uT&ncoHjtB@UAqTW_kW+49k?J^P zAY|-{!l4oRndxmUmhY_Js7+N{%@CtUGeM{t_4(<-_sL(2RyZTQ?ZrZoNl-^98O}VMi2~!u`n!Q}KQY&Co-N`V06G9+LeIupp z$ofX=LzfKf&D0kmN>;3&v=r)@*cIzas_4vii^q=nI86H3v6HP%LxChiYy=3Q$6dV7 zpM7oCtAt+HKIRp(u}cOQryDJBsyNT0Wo~4geeLXNHdNZg3a9)kj=dzrPOu!ru79;2 z7HvI(xx|itwfa#?--Gz=#hEVzz7A&Qb?b5^4E^TJsaCZkM{EZ4w%D03*IEtm(3wGg zJEiIy63mg9397o@4uctzemQKF6TXC#5sz`q<=(s=LFWObt!+(MTdn<8?wb9H-p=7! zRg5{*LmR@)aq*Q6*@g(V0`mBl)hn@HhlxRi-33_UX&=VeH89Jy#igWw);5NCM+cOkNf0i4gv zK&;SOwKWdd&)S!;=d!xs6@_{LVG&lqZuL&oK$1OuB(|0|?m_+p_&YcJ*_TmFwqRGP zyRWy1<@se~z0;oJ|(Gn3i?Z_o0Effaa86t8^M62-gqeOsB0N`%#F* zR>~#~#&&H9-*%kl_6Hk>HL-C3FLHDf2wZE_8n<>>E)I%)&9yWSPwY}jiL=#?hKLFA z*lIPZT({nb=cT`q@8ALYsr_E6x0To=oSG!X;^rd7iLhZ89lb-jXH%gtJjHFje*w98 z#)iuC0Y0PLVU|-FY-o1&jt$J9Pj2AHTn5>|+zOij4+Q<#X8S&}^@GS>KQ~A?z=UZA z!^cM#k#+{(k2%jBsqUfjVkIbpS#+x4s`Ua+>-p)G zOo(m2=|mYEEu2-H=6-w!$s&vVfwaN0lAnvR`b3nC+$eJ*73K5`Q7%d|erYSxU?6=! z=4{VG0&$3OR{Kk>%g_j2p=EuTiJ3t353`8*fkhmZMNCdnX;W_*lLTo|pFj&jNU>Fk zExmDd$3#Kvov^o2Shz|~R+Q(;!=EcpIh?VArJrMiGuUr`# zVELlId;w*Il^t>PXF2M;EE;kHNJPUUST-1ob@>6sfUDNyJ>2(=OIpQu7xgiutimKV$2}jMGGows)HDqH+sWe+6ZPs=NCQ<*e`YfB^5G zBKpdLk* zN`YVDH?j@0C|adLwbCs6SKGj3qQEXq1* zsx+|!yH%vReh!cNF{VvyTo=u=qNx)Ux9GPR|008Rg`+PkDOgHlz@VytZA6{~nkr1} za!r!+F4?8Rx&o!CCzd9uRV9m=XjHPorF>$;`>JH*OVF7QN`3hvPh)bB5fyP$n&=(l zeoA@)SVg#TrRLG`m+=jrLqNGaPDWwn>~X}wDDYgs-&Qb=NqbyU$F1J)sGYM%tw}eB z63~!s&DLU zMB{RZa;by#)%!_25TPK?10T%790qJ!bh}T#N(vaj9D!O95}X2p6I7b@u--L5)G4Z+qbRrx2=P>tzv9_DYjN_TT$#13>jJ`9`61!QOHtEC<{i2UQL6u=B%=rWJi5%Zv+(1^`;KX*E)R@y%B)%iNWM-6nBuo^+nir0ge z1#bfqM*JK(~UZ? z<=6pgddF$u@`-jxe$-n!ER`6CRH80gdOU)T6~U?lzqEIxzG6f715T!#?MMeCBUAf6 z>9s?4`@L)52G9HwV)_^Y=od0C91%`Lzsukb;)03Stiq`N7sw4bs8Q>nr71zG_zr!T z`y9;9;2}i(R0&MHWGf5zHd}DZl2--XIk@T`wOLK#&LOR+j&F-I<+IgR+plZ}k9ghJ zQw<;ZGjQl7e+EZo>RGP`u|o^Ot_7{eWo#!m8i>8u^no7yt&z>3-bRIQGU6LeUbgHFiHh^lS{!Bh|C^DeaN&Ej#doyNeMGPiV2fFjnQDKt3HMUyHRUF&5295u~mH`FX%ns04k^6L0K$Z zgs%%A^**%Psji$PF{VKOkRqwFm?340dPbSHPpV9PGgWpYKrrVUw5{5^VCV2! z5zyjF8ul|F8QAEV$c5bwn`zau?UgKa)(im{XqIVch`IKoQMUc4%B))3Q!Du>ud*mr zfDT%KZI;r|Lsj<46m?SI)46B___VW9FuB2JU~%Om=#du2av^n->AChhXz?9bX6eYS zxssRBHRM}ONSO=H$|y$AM9h|Fm)fkpf|~e+?wHjph>u2D-~gM6eHRO}Mv0@=Cc03` zZkBNvaT0*kLM-W3=F|{$^9MUk(wQn2N8&7*THO!g0%?X29kAWeUYy0^JXv+T32yAb_3j77}0&5$!iR%4BkKxXA{zmooZc`(p70wWcfK}TL9xt zr-b<=a|7?nmCJR=*>d^cqkO=c8E7`0b3=rmw(^`7{I^v=@D)1b+H!}vi#6_|>}1Veut|BrBq-0l z1!9d{P~)D&`nL6ch`tP{Xn&N6z~60@0MEDd5$MKgq#pr|SiSU1Te;gGS+jsZe;WPi z_Zj>QgFlIY5)Fqq@QYdfH75RP2EWLFvxwcLN;}X$&*xuYuwg+_Te$E4MP9HK0g=(1 zs7q9tpfp``t?qcTVo72BXHc-1G5gGOtYxzaJeeK8!Tg*V3fI+)M`FWEtEFTHVpz7j zT#gInGA^4sz~RgBXt{i;Q)zHZ@RTi=RjXPq>p#U7KVYyv8Ho{>&|gLH`}hVlX>uvx zy@l}|k}BinJx|YefpqGBxUYG9vA0&R4g4jsc)>?t!vXhOP2GT2h!8+7c5^$F7za9{ za|_B1_Uxgx)J-SEt{aMT$IW{NlL>)bWuB_vOowK%Q03H`f7UNXmil43@i8M2lIZnL0`Adg?mpQH{b9&2hAtK^m?S~a$ z{&B_IyA%~sWNuoG+^Q_KAmds7qTdeXHkl6rZuq?#MCp8`dU1M4`{Z^}`y>V><|ReI z`H>zBj*YOe5il^nE?5+FYg)1&*#@Y|o_y)~*%wcrx*IMO7f#515%H)3_Fm7SBG4#Y zs=)0a0m9XQ9o;W2UEDEBjx@k11+0IUqXbfh1CFu1fx~{u=8=|DBqWR>9zc!Qx;jA7 zB<47OV!?01KGx4pVptzjAllael>#A027u<2@)SS15)4=n`Om z@D|K6BVgwnwneT^c5p+u(R+U6cJ!{t?z&bSjsA=M4oZxDT7Nsa{(p%j1W+AAEY8y7 zfv7fFYn+!S5nvQsBEYJYRlN@n55HTuMNc3?Bpr4J?vEq}#r~EJJ;zybu`!O`8adpq z_Rh#{;md7WrKx5cD+GH)KvMdFaFSus=+^K+7?49|=h{TyW#7;u3J#&nxJ_M)coP5YYdIK>hcHuMNtp4tHDMQA+s^zj zm(ky0lN9em>JxW}8;s?(f~$z|B8?>2v2+3IkE?7byC6h7PeM%so`7dy40zrZIe2zY zunv@Hy;rWo(#VIfM*a*gN9Zn+{m`yMky2Ef)B2doi@OOfjJBs#fo8NK5sy^%VpcB@ zF7nplg}J#0AN&$k%#{ZpoSTCUq<#1&UG_Z~fqt`5p?zq)z-GJvMkegk#6q2g#toye z6Sk&Ah8d@+y7mOI=($*VKHQj0i!^VwF(Yd*GL7V;bDD<_#3%gSU=X)qV$m2rIA32_ zfRQYn7va_ftBmm(U1?tI4aGYMU%dckBQPT9`qSPy94cd=!k4I4U!Z|(x?F5E#cdBh zbe{^N7;_C;yBR}igdLtWvo4LW_35yb<7Cc4ikIy6)zUa*`jY_@uSV(tiWlLyW~i0?AfLmK;G@U@ECL?mwUh z?hqdi0APvwu?58Jz%qn@lY3r3VhPS^F*WhnNOwPX#s`=i7{|P7t0Q)E;;|@pGd$1+ zuAanij!uR%Jy)!m#K@o4m{iIztyRXz4}b~v`A57Hellpkr^blajCe){C??$F=U8tC z1L0`vZ0&C%?O84w*^>*XivTiB1kIcwfNN-(sCb1~D7gv|IGAcDB>osZP_cLuvUppe zSmQbXeh^%wUEtV^^J>>4FsvUm9;+~T#Vexw^1z}UlO<^qKT6y?55lvnf`lRb2o9%r zUAE5*?J?yz*-}8Z#FZ1J4eEjMurPa@sBR(wWxZ$DCBnvqqsH`kE*x96uR%vBici7- zhc*`|14?C2b311?g*Z4oB5;?{T#mQhy|(OfZx?T&!d-XqyU2KngW*}*X43`cKCp57 z6M*~7i|tp=eD<8c1@zym)nNY{#s~MfzEGjFVmw0Sy(}yZcj3lwwyYpt7WJ>ArP~EC zt^~f(zkvk(n+$#nL6z23??X#7v81iQ!_cUI$jt%n%5Rt_-VfmJ;0DzCHCix4VVsL@ zdshg-y#}|Xl|tyyE`hH^Zq>cvs8QNIv}CTk*VDBOt*iG&fzgc6N8=qJEGuKU6|`{| zII40G81F>mp?lE{PQy|g1sasO=pxMy-T}Ikml_i*n^z{IiKH}c0dbR;uetBIrY*Dt zoF{2bMPopZoaE_ZG|nd=Oq<$T(l7^A_#nNq#XJS?CB45l(ECpVAKIcuP7qVtLVF8{ zkNg(LD&Gc?o)ITY?_h$Tio4=rz?NYJ%)alrEXj~w02hg)(S^B5OH2E5VF6E3T3#(r z>B^Nvdd{bLq;kxA#Crw1#1D9V zOzz)hmgMh{9HZRb8r710SnuclwtcJHaJd~-ttn)9CP4Or@=`Fo6OK{}- zA6}#IE1LUTpz;Ve(_v~Yx0YYEn+|_#wz)*6OVtfrF5ZB3$v-$lIXA#E`f+O@4u3rS z;d1eY4aQ(!9|gn#{Slx_4iJZ76<`$CGuOwiXKOiO7r7hhHGpr>Kyz@c%dMsX#n>vp z69OLZQoN=L&>lxZKr2WY+evM6JbBtAPZJ-e)n=qj4(0P{J%0=Gw+`j=$>iVjVS2$? zftx1)AomtnhW_Ockj?pfp(VR&E5`73wGE7AJ2YSi5Ydjv#Ta%%)qt}^(#EbRjkMh# z3Mh2xhSWd2l39c6(9V?puGHJ3?!B2cDYxKUt?r95QU|h^;FFa6mjfjwWuWB!A4&Jn;AS?;uOU6cMVUIt)LWL> zIfTWXsT!|v56x<7azd3bOVffx!G#oxW-w%?kQ^zmge3n5B1F;7Yq?Jt0bVDn&ES0k zZ@VQOn}w1H?U+_rh?o-j6_(n4>QeNKYdhEpjZKaPOmwD#v0fp`jWwtC-IuC zR%7WDHfsDyK%cl<5NN8nY2)3(0GdPiH1L z1g@V}^0+>7*D@D@*+m6t!BBs&;k;rR(Pe_ma;hEKau+}|oTsq)=?^oq!Fj>Zj^pHl zSD~z&eO!K>g3`pRhLcfVS{q$+p$@TqlxV+7ja%h#YDU3b@GJbT*H`{SjKNAZclCO| zqi^8>ICs3XSO(u5=o_c5AcU1m2x|<)2QC7I%~^G!*TW8nlVR8O@DYL0&G#fY-p_b@ zZ`a;?9_AsnM7iA(Ul60C;#OYie~1jw)mNGSCm0BYk)-rB1jVs9P3asM^2$&{fkv~2 zEKm^dqxC;#SzhGpa}53o10lF7VxX%IUvifzJF>=8nv_u9mBfN)Uj| zevdW(Ig<^UN%M)t+AH(-Smru|b?Yj|<8@M_hX^lSCgGgpS{~?d9L_m=@qC}nIDP*9 z|7g<8kY~><6yB&nM~3rtcW}*RV5j_Aa@P&0yK&5{Hcggp?l3N5@uJW%O~USOIxO$` z9otuv&++c=p*DI(Dc=@EGdWh-HB=(8R+9oe9NqoHZ6+N~@m)8(6jxb3_P7VPm3!`* zE^ZLrv7s)yBQLbB(ieoo?(Xi@yHZ!-;<2OctA}N9O=vZ@t>azMRAdq0BXU?@;fP!` zaJ-xCHL>}QCc5M7u~7%|K@Q_pxv0GljYtPyF(PSA_mF>sd~ZPhfg$ch8IZWa{_A%$ zA%6|+3+UcG@?&zO{)IsoFS+$zW$b2*hNmy1$F746;msiC?;ye4zh_Rq_R4E#&g#E~ zlsI#_(&&J)zsYQ#DZDF1=YxX&U8V?Va0{^#+-R^4u^oiCdB(<>dtSG$t_HCcXoWH# zWlG~vDFRKVTrIY8zs> zJtmyj#G(bC$MaS@^GU{`xy!!-eBE}9Piz_G5pz5oGxrCh8QvNn#DUAV<6BDP;v^fAVB zhKCVa(cfi2EGo{w`utnv)2CiMeb%`6c7jNzj|VkZ?MAk ze`1mU%-}yV=rH&`ga5*SLyGP2Xvt|d8TdzWfeh=?(B^+&5n1T*;pI5P6(?#vzKsCU zBv{rCd;DdrGf?3iR?>FpbI8BU|JjpZZ~6~Wp^9(+Lryb-?@6LCZ=p@R69^PdS?^mb z5S$mN(BqDkD*XEpA(E1j6zEm`l$@kMYwD-qb#a&qqrU{L=Y1!sH;x<73AMTZJSh%M z_o!|CTHC`kUmsVy`YF4Sl2UsHQtp+M`vy|(mlSUxtfAg_ntS0Vsp~ z7NFn3F)jpBfz(la$M7A;cN4x7_-@8`65lOq-}`wOL%5^C)7A_3031&N%lO5}fq=(r z?CPQS@p`>Ug-}0mA{A~6x8H=Ja0@yl1n)rz-XdQbg=dF(#So52-OSV!eNE4G5Aan| zntwFlnKbz7_P~qbK&Ie2f2jkY2yX_4%TV6nBid-QCHj(d9~x|kDgF1oduV{XVBSqn zO~Ku+yYEc`6zmx95W}a7-LG>n^Dc9DWQuP~52ezpaJlwWS={Q?+YdS%qu1P zg>z8u@usSIjSPpeRmW??-SMfZv;Gc)yjg7V`pd*dLcq@{YKh?u50er*Kqqc<~%BaW@x8^CT2_t${zK ziaX;R?u6j@hSvwjpVtfWw(skF)cPh0#G`&S$7vf8mLy-{40@babny9WI~*^j8C5(syP&rrSj zdzfEv47pTG`6u4uNu1B{B!^?Q4Ouz-SNP*Sqqryl%&dW)@P*(q+bfsxf*QHISp;4v zXxd3gm752~#ofxNMR%m`y=z%%A!F7}T6nbAu4zl;>W|1zM$o@fH?^K;Um5d85BrtZ zdbGr3+%lGod1pXUzsN#&WY+hfA=nA#bh?EhM_hZ=+apOE8O1F;P5Z)#X&lm_-F)4Woxh zOl%(+@vk%E?=$#~eoSr%B#qYuCRf&DaO5(^hs=?Ji&(9LS5)B?rfpXF0R!G`=miFo z2zY53F5-VC7;5`(O#kl;{xySdA}DPVrKpU$qVz^WBvmS_A_{d!Epx9 zG59QlHyO+`@EOz@L=3Jm_zHusGWZSys&H~yroYRW9BUp6{UIN>Fjzl*>qjtm!g#^) z@+}qA5V)z)tvOhO{DTUc#+7B7@yrhxFJuaZf-8TB6gJ^`vT(RCmMic*vCIz&+41~% dx-e7l3fl?=dz1%v{crS-3qO_H2RF%2{vS$4-#q{T literal 0 HcmV?d00001 diff --git a/scripts/build_cta.py b/scripts/build_cta.py index a505fce..d3af96c 100644 --- a/scripts/build_cta.py +++ b/scripts/build_cta.py @@ -47,26 +47,34 @@ def detect_editors(repo_path: Path) -> list[str]: def build_cta(cta_base: str, owner: str, repo: str, pr: str, repo_path: Path, issues: int = 0) -> str: - """Return the markdown CTA footer, or '' when ``cta_base`` is unset.""" - if not cta_base: - return "" - base = cta_base.rstrip("/") - - def link(path: str, **extra: str) -> str: - return f"{base}/{path}?" + urlencode({"owner": owner, "repo": repo, "pr": pr, **extra}) - - editor_links = " ยท ".join( - f"[**Open in {_EDITOR_LABEL[e]} โ†’**]({link('open-in-editor', editor=e)})" for e in detect_editors(repo_path) - ) + """Return the markdown CTA footer (the warning banner shows even without a proxy URL). - lines = ["", "---"] + The โš ๏ธ health banner is informational and needs no proxy, so it renders + whenever ``issues > 0``; the editor/marketplace links require ``cta_base``. + Returns '' only when there's nothing to show. + """ + parts: list[str] = [] if issues > 0: noun = "issue" if issues == 1 else "issues" - lines += [f"โš ๏ธ **{issues} architecture {noun} found** โ€” open CodeBoarding to explore them.", ""] + parts.append(f"โš ๏ธ **{issues} architecture {noun} found** โ€” open CodeBoarding to explore them.") - lines += [f"๐Ÿงญ See this architecture in your editor: {editor_links}", ""] + if cta_base: + base = cta_base.rstrip("/") - lines += [f"๐Ÿ’ก New to CodeBoarding? [**Get the extension โ†’**]({link('use-marketplace')})"] + def link(path: str, **extra: str) -> str: + return f"{base}/{path}?" + urlencode({"owner": owner, "repo": repo, "pr": pr, **extra}) + + editor_links = " ยท ".join( + f"[**Open in {_EDITOR_LABEL[e]} โ†’**]({link('open-in-editor', editor=e)})" for e in detect_editors(repo_path) + ) + parts.append(f"๐Ÿงญ See this architecture in your editor: {editor_links}") + parts.append(f"๐Ÿ’ก New to CodeBoarding? [**Get the extension โ†’**]({link('use-marketplace')})") + + if not parts: + return "" + lines = ["", "---"] + for p in parts: + lines += ["", p] return "\n".join(lines) diff --git a/scripts/cb_engine.py b/scripts/cb_engine.py new file mode 100644 index 0000000..37c45f8 --- /dev/null +++ b/scripts/cb_engine.py @@ -0,0 +1,129 @@ +"""Engine orchestration for the action โ€” extracted from inline ``python -c`` blocks +in action.yml so it is checked in, reviewable, and unit-testable. + +Subcommands (all paths/refs come in as argv, never interpolated into source): + + base --repo P --out D --name N --run-id ID --depth K --source-sha SHA + head --repo P --out D --name N --run-id ID --depth K --base-ref B --target-ref T --source-sha SHA + health --artifact-dir D --repo P --name N --issues-out FILE + +``base`` runs a full analysis; ``head`` runs incremental, falling back to full on +``IncrementalCacheMissingError``/``BaselineUnavailableError``; ``health`` writes the +WARNING/CRITICAL finding count to ``--issues-out`` (and never fails the run). + +The engine (``codeboarding_workflows`` etc.) is imported lazily inside each +function so this module imports without the engine venv present โ€” the tests stub +those modules and assert we call the engine with the right arguments. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +_BASE_LOG = "/tmp/cb-base.log" +_HEAD_LOG = "/tmp/cb-head.log" + + +def run_base(repo: str, out: str, name: str, run_id: str, depth: int, source_sha: str) -> None: + from codeboarding_workflows.analysis import run_full + + res = run_full( + repo_name=name, + repo_path=Path(repo), + output_dir=Path(out), + run_id=run_id, + log_path=_BASE_LOG, + depth_level=int(depth), + source_sha=source_sha, + ) + print(f"Base analysis written: {res}") + + +def run_head(repo: str, out: str, name: str, run_id: str, depth: int, base_ref: str, target_ref: str, source_sha: str) -> None: + from codeboarding_workflows.analysis import BaselineUnavailableError, run_full, run_incremental + from diagram_analysis.exceptions import IncrementalCacheMissingError + + try: + res = run_incremental( + repo_path=Path(repo), + output_dir=Path(out), + project_name=name, + run_id=run_id, + log_path=_HEAD_LOG, + base_ref=base_ref, + target_ref=target_ref, + source_sha=source_sha, + ) + except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: + print(f"Incremental unavailable ({exc}); running full analysis on head.") + for p in Path(out).glob("*"): + if p.is_file(): + p.unlink() + res = run_full( + repo_name=name, + repo_path=Path(repo), + output_dir=Path(out), + run_id=run_id, + log_path=_HEAD_LOG, + depth_level=int(depth), + source_sha=source_sha, + ) + print(f"Head analysis written: {res}") + + +def run_health(artifact_dir: str, repo: str, name: str) -> int: + """Return the WARNING/CRITICAL finding count; 0 on any failure (best-effort).""" + try: + from health.models import Severity + from health.runner import run_health_checks + from static_analyzer.analysis_cache import StaticAnalysisCache + except Exception as exc: # engine without the health module + print(f"Health check skipped ({exc}).") + return 0 + try: + cache = StaticAnalysisCache(artifact_dir=Path(artifact_dir), repo_root=Path(repo)) + sa = cache.get() + issues = 0 + if sa is not None: + report = run_health_checks(sa, repo_name=name, repo_path=Path(repo)) + if report is not None: + for cs in report.check_summaries: + for fg in getattr(cs, "finding_groups", []): + if getattr(fg, "severity", None) in (Severity.WARNING, Severity.CRITICAL): + issues += len(fg.entities) + print(f"Architecture issues found: {issues}") + return issues + except Exception as exc: + print(f"Health check skipped ({exc}).") + return 0 + + +def main(argv=None) -> int: + p = argparse.ArgumentParser(description=__doc__) + sub = p.add_subparsers(dest="cmd", required=True) + + b = sub.add_parser("base") + for a in ("--repo", "--out", "--name", "--run-id", "--depth", "--source-sha"): + b.add_argument(a, required=True) + + h = sub.add_parser("head") + for a in ("--repo", "--out", "--name", "--run-id", "--depth", "--base-ref", "--target-ref", "--source-sha"): + h.add_argument(a, required=True) + + hc = sub.add_parser("health") + for a in ("--artifact-dir", "--repo", "--name", "--issues-out"): + hc.add_argument(a, required=True) + + args = p.parse_args(argv) + if args.cmd == "base": + run_base(args.repo, args.out, args.name, args.run_id, args.depth, args.source_sha) + elif args.cmd == "head": + run_head(args.repo, args.out, args.name, args.run_id, args.depth, args.base_ref, args.target_ref, args.source_sha) + elif args.cmd == "health": + Path(args.issues_out).write_text(str(run_health(args.artifact_dir, args.repo, args.name))) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py index b7196c8..fb4b354 100644 --- a/scripts/diff_to_mermaid.py +++ b/scripts/diff_to_mermaid.py @@ -161,7 +161,10 @@ def _diff_components(base_components: list, current_components: list) -> list: for comp in base: if _comp_name(comp) not in matched_names: - ghost = {k: v for k, v in comp.items() if k not in ("components", "components_relations", "can_expand")} + # Keep the subtree: a deleted parent's children/relations render as a + # deleted subgraph (the renderer forces 'deleted' down), mirroring how + # an added parent renders its whole subtree. + ghost = {k: v for k, v in comp.items() if k != "can_expand"} ghost["diff_status"] = "deleted" result.append(ghost) @@ -186,13 +189,23 @@ def _sanitize(name: str) -> str: return re.sub(r"\W+", "_", name or "") -def _esc(text: str) -> str: - """Escape arbitrary text for a mermaid label under GitHub's strict security. +# Mermaid label metacharacters โ†’ numeric/named char-refs (the ``#NNN;`` form +# GitHub's strict renderer accepts). A bare ``]`` / ``)`` / ``}`` terminates a +# node label and breaks the whole diagram, so escape the shape chars too โ€” not +# just ``#`` and ``"``. +_ESC_MAP = { + "&": "#amp;", '"': "#quot;", "<": "#lt;", ">": "#gt;", + "[": "#91;", "]": "#93;", "(": "#40;", ")": "#41;", + "{": "#123;", "}": "#125;", "|": "#124;", +} - ``#`` first (so the entities we inject are not re-escaped), then ``"``. - """ + +def _esc(text: str) -> str: + """Escape arbitrary text for a Mermaid label under GitHub's strict renderer.""" out = (text or "").replace("\n", " ").replace("\r", " ").strip() - out = out.replace("#", "#35;").replace('"', "#quot;") + out = out.replace("#", "#35;") # first: literal '#'; the entities below add their own '#' + for ch, ent in _ESC_MAP.items(): + out = out.replace(ch, ent) return out @@ -258,15 +271,18 @@ def _filter_changed(components: list, relations: list) -> tuple: keep_ids.add(_comp_id(c)) keep_names.add(_comp_name(c)) for r in changed_rels: # so a changed edge between two unchanged nodes still draws its endpoints - keep_ids.update((r.get("src_id", ""), r.get("dst_id", ""))) - keep_names.update((r.get("src_name", ""), r.get("dst_name", ""))) + keep_ids.update(x for x in (r.get("src_id", ""), r.get("dst_id", "")) if x) + keep_names.update(x for x in (r.get("src_name", ""), r.get("dst_name", "")) if x) + keep_ids.discard("") + keep_names.discard("") - kept = [c for c in components if _comp_id(c) in keep_ids or _comp_name(c) in keep_names] - kept_ids = {_comp_id(c) for c in kept} - kept_names = {_comp_name(c) for c in kept} + kept = [c for c in components if (_comp_id(c) and _comp_id(c) in keep_ids) or (_comp_name(c) and _comp_name(c) in keep_names)] + kept_ids = {_comp_id(c) for c in kept if _comp_id(c)} + kept_names = {_comp_name(c) for c in kept if _comp_name(c)} def touches(r: dict, side_id: str, side_name: str) -> bool: - return r.get(side_id, "") in kept_ids or r.get(side_name, "") in kept_names + rid, rname = r.get(side_id, ""), r.get(side_name, "") + return bool((rid and rid in kept_ids) or (rname and rname in kept_names)) rels = [ r @@ -299,6 +315,26 @@ def _init_directive(font_size, node_padding, node_spacing, rank_spacing) -> str return "%%{init: " + json.dumps(cfg) + "}%%" if cfg else None +def _count_changed_components(components: list) -> int: + """Recursively count components whose diff_status is added/modified/deleted.""" + n = 0 + for c in components or []: + if c.get("diff_status") in CHANGED: + n += 1 + n += _count_changed_components(c.get("components") or []) + return n + + +def _has_changed_relations(components: list, relations: list) -> bool: + """Recursively: is any relation (at any nesting level) added/modified/deleted?""" + if any(r.get("diff_status") in CHANGED for r in relations or []): + return True + for c in components or []: + if _has_changed_relations(c.get("components") or [], c.get("components_relations") or []): + return True + return False + + def render_mermaid( diff: dict, direction: str = "LR", @@ -314,97 +350,110 @@ def render_mermaid( ``render_depth`` controls how many component levels are drawn, independent of the engine's analysis depth: 1 = top-level flat (default), 2 = top-level plus - one level of sub-components as subgraphs, etc. So you can analyze deep - (depth_level=2) yet render a clean level-1 PR diagram. At each drawn nesting - level, parent containers get a stroke-only ``*Box`` class and leaf nodes a - filled class. A wholly-added parent forces ``added`` onto its subtree (the - engine only diff-annotates surviving branches; an added subtree arrives raw). + one level of sub-components as subgraphs, etc. ``meta`` reports ``n_changed`` + (recursive changed-component count) and ``changed`` (any changed component OR + relation at any level) so the caller never mistakes a relation/nested change + for "no changes". On overflow of GitHub's Mermaid caps the full graph degrades + to a changed-only graph (and finally to None) rather than emitting an + unrenderable blob. """ - components = diff.get("components") or [] - relations = diff.get("components_relations") or [] - n_changed = sum(1 for c in components if c.get("diff_status") in CHANGED) - - if changed_only or len(relations) > MAX_EDGES: - components, relations = _filter_changed(components, relations) - - used: set = set() - body: list = [] - node_classes: dict = {"added": [], "modified": [], "deleted": []} - box_classes: dict = {"added": [], "modified": [], "deleted": []} - edge_styles: dict = {"added": [], "modified": [], "deleted": []} - counters = {"edges": 0, "nodes": 0} - - def emit_edges(rels: list, scope: _Scope, pad: str, force: str | None) -> None: - for rel in rels: - status = force or rel.get("diff_status", "unchanged") - present = status != "deleted" - src = scope.resolve(rel.get("src_id", ""), rel.get("src_name", ""), present) - dst = scope.resolve(rel.get("dst_id", ""), rel.get("dst_name", ""), present) - if src is None or dst is None: - continue # endpoint not drawn โ€” skip, don't consume an edge index - label = _esc(_truncate(rel.get("relation", ""))) if edge_labels else "" - body.append(f'{pad}{src} -- "{label}" --> {dst}' if label else f"{pad}{src} --> {dst}") - if status in edge_styles: - edge_styles[status].append(counters["edges"]) - counters["edges"] += 1 - - def emit_level(comps: list, rels: list, indent: int, force: str | None, level: int) -> None: - pad = " " * indent - scope = _Scope(comps, used, force) - for key, label, status, comp in scope.entries: - children = comp.get("components") if level < render_depth else None # cap drawn nesting - if children: - body.append(f'{pad}subgraph {key}["{_esc(label)}"]') - if status in box_classes: - box_classes[status].append(key) - child_force = force or (status if status == "added" else None) - emit_level(children, comp.get("components_relations") or [], indent + 1, child_force, level + 1) - body.append(f"{pad}end") - else: - body.append(f'{pad}{key}["{_esc(label)}"]') - if status in node_classes: - node_classes[status].append(key) - counters["nodes"] += 1 - emit_edges(rels, scope, pad, force) - - emit_level(components, relations, 1, None, 1) - if counters["nodes"] == 0: - return None, {"n_changed": n_changed, "n_nodes": 0, "n_edges": 0, "truncated": False} - - style: list = [ - f' classDef added fill:{COLORS["added"]["fill"]},stroke:{COLORS["added"]["stroke"]},color:#ffffff;', - f' classDef modified fill:{COLORS["modified"]["fill"]},stroke:{COLORS["modified"]["stroke"]},color:#ffffff;', - f' classDef deleted fill:{COLORS["deleted"]["fill"]},stroke:{COLORS["deleted"]["stroke"]},' - f"color:#ffffff,stroke-dasharray:5 3;", - ] - if any(box_classes.values()): # stroke-only containers so big parents aren't solid blocks - for st in CHANGED: - dash = ",stroke-dasharray:5 3" if st == "deleted" else "" - style.append(f' classDef {st}Box stroke:{COLORS[st]["stroke"]},stroke-width:2px,fill:none{dash};') - for status in CHANGED: - if node_classes[status]: - style.append(f' class {",".join(node_classes[status])} {status};') - if box_classes[status]: - style.append(f' class {",".join(box_classes[status])} {status}Box;') - for status in CHANGED: - idxs = edge_styles[status] - if not idxs: - continue - s = f'stroke:{COLORS[status]["stroke"]},stroke-width:2px' - if status == "deleted": - s += ",stroke-dasharray:5 3" - style.append(f' linkStyle {",".join(str(i) for i in idxs)} {s};') - + all_components = diff.get("components") or [] + all_relations = diff.get("components_relations") or [] + n_changed = _count_changed_components(all_components) + changed = n_changed > 0 or _has_changed_relations(all_components, all_relations) directive = _init_directive(font_size, node_padding, node_spacing, rank_spacing) - head = ["```mermaid"] + ([directive] if directive else []) + [f"graph {direction}"] - text = "\n".join(head + body + style + ["```"]) + + def build(only_changed: bool): + components, relations = ( + _filter_changed(all_components, all_relations) if only_changed else (all_components, all_relations) + ) + used: set = set() + body: list = [] + node_classes: dict = {"added": [], "modified": [], "deleted": []} + box_classes: dict = {"added": [], "modified": [], "deleted": []} + edge_styles: dict = {"added": [], "modified": [], "deleted": []} + counters = {"edges": 0, "nodes": 0} + + def emit_edges(rels, scope, pad, force): + for rel in rels: + status = force or rel.get("diff_status", "unchanged") + present = status != "deleted" + src = scope.resolve(rel.get("src_id", ""), rel.get("src_name", ""), present) + dst = scope.resolve(rel.get("dst_id", ""), rel.get("dst_name", ""), present) + if src is None or dst is None: + continue # endpoint not drawn โ€” skip, don't consume an edge index + label = _esc(_truncate(rel.get("relation", ""))) if edge_labels else "" + body.append(f'{pad}{src} -- "{label}" --> {dst}' if label else f"{pad}{src} --> {dst}") + if status in edge_styles: + edge_styles[status].append(counters["edges"]) + counters["edges"] += 1 + + def emit_level(comps, rels, indent, force, level): + pad = " " * indent + scope = _Scope(comps, used, force) + for key, label, status, comp in scope.entries: + children = comp.get("components") if level < render_depth else None # cap drawn nesting + if children: + body.append(f'{pad}subgraph {key}["{_esc(label)}"]') + if status in box_classes: + box_classes[status].append(key) + child_force = force or (status if status in ("added", "deleted") else None) + emit_level(children, comp.get("components_relations") or [], indent + 1, child_force, level + 1) + body.append(f"{pad}end") + else: + body.append(f'{pad}{key}["{_esc(label)}"]') + if status in node_classes: + node_classes[status].append(key) + counters["nodes"] += 1 + emit_edges(rels, scope, pad, force) + + emit_level(components, relations, 1, None, 1) + if counters["nodes"] == 0: + return None, 0, 0 + + style: list = [ + f' classDef added fill:{COLORS["added"]["fill"]},stroke:{COLORS["added"]["stroke"]},color:#ffffff;', + f' classDef modified fill:{COLORS["modified"]["fill"]},stroke:{COLORS["modified"]["stroke"]},color:#ffffff;', + f' classDef deleted fill:{COLORS["deleted"]["fill"]},stroke:{COLORS["deleted"]["stroke"]},' + f"color:#ffffff,stroke-dasharray:5 3;", + ] + if any(box_classes.values()): # stroke-only containers so big parents aren't solid blocks + for st in CHANGED: + dash = ",stroke-dasharray:5 3" if st == "deleted" else "" + style.append(f' classDef {st}Box stroke:{COLORS[st]["stroke"]},stroke-width:2px,fill:none{dash};') + for status in CHANGED: + if node_classes[status]: + style.append(f' class {",".join(node_classes[status])} {status};') + if box_classes[status]: + style.append(f' class {",".join(box_classes[status])} {status}Box;') + for status in CHANGED: + idxs = edge_styles[status] + if not idxs: + continue + s = f'stroke:{COLORS[status]["stroke"]},stroke-width:2px' + if status == "deleted": + s += ",stroke-dasharray:5 3" + style.append(f' linkStyle {",".join(str(i) for i in idxs)} {s};') + + head = ["```mermaid"] + ([directive] if directive else []) + [f"graph {direction}"] + return "\n".join(head + body + style + ["```"]), counters["nodes"], counters["edges"] + + text, n_nodes, n_edges = build(changed_only) + truncated = changed_only + # Degrade an oversized full graph to changed-only before giving up (GitHub caps). + if text is not None and (n_edges > MAX_EDGES or len(text) > MAX_TEXT) and not changed_only: + t2, nn2, ne2 = build(True) + if t2 is not None: + text, n_nodes, n_edges, truncated = t2, nn2, ne2, True + meta = { "n_changed": n_changed, - "n_nodes": counters["nodes"], - "n_edges": counters["edges"], - "truncated": bool(changed_only or len(diff.get("components_relations") or []) > MAX_EDGES), + "changed": changed, + "n_nodes": n_nodes if text is not None else 0, + "n_edges": n_edges if text is not None else 0, + "truncated": bool(truncated or text is None), } - if len(text) > MAX_TEXT or counters["edges"] > MAX_EDGES: # never trip GitHub's red error box + if text is None or n_edges > MAX_EDGES or len(text) > MAX_TEXT: # never trip GitHub's red error box meta["truncated"] = True return None, meta return text, meta diff --git a/tests/__pycache__/test_build_cta.cpython-310.pyc b/tests/__pycache__/test_build_cta.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4dfa007a3d2283c97439c8cff78fb84c457c1de2 GIT binary patch literal 3871 zcmb7H&r{n*6xK?zEgM4!p=kv>Tl?FL)!N8=$a=p5d#D#3LibB?7ag2L!mb(3gjB$ge~H>n8RE+yX_EXOo06?%L5dJ^y~`=)q)PfV;vD$BcSErhvN(!^Ki%s5L?!c zMp-%A)-Ghnt4l)8o z0%g|aqEp?^%@`X521g8|7)60$>T+RJ8(|)$NgQo{4ENuo%@;}Q$^e_Y9v^6=I0rh) z+K^gP$V8-Bb3V-$^OUv)11Zjf7-&h?!+n35)8WGV)nKTfI-47+nQZ4D>7Aq z;S%V-!R!K#1_F0p1Gku{m3FY{5)`8P6c%atN~Cpt0DOIms^whjQ5pwzPH9@8w4ShL zgySNmn3?8|7(_Si5$IFc68<3~uL^D5{I2-?;c5Nrq-3SFof)+y6WHMLTbq(r1-7MO4ZR*N$r z()=L}>FPW-uQjz(U`n+~7X!)ZX2>HZX`BG}A$4Ctb(0`k)7?g!qC8<;k5^bH2)p*` z8yF~SRku3rk-cppoBf_en%{9)ZcBRyW0wKNAEsWuDoU_cTt=box(cIr4v0PCXG1BQ zT(%}}l>$*)39%0~1uvOtXhhAJ`REY`B2qGo#Rw}P&7+yFWuKOUWHw z{qHcU;tLS1$(w^QZw)skpWaBsnrs9$-nyMMcsw1()24GgNH$_F=0N)t_Q^SX!&F1e z-I+4?=L~hVuA`QEAlA5QM4)Hjy8ScH;02{HF8%VngpFHVM>}tz=#YT!OP6&JqcC2R z5U?2Rpg|0~8qLF+S-*r;5<9~LgwXsUkTd;(JW0vmFg^s$yXB#{qd$0|}hbMsZ_>$F#*oLQVB) zSlfNS-3D%gF1;7R;?U*spdZ!$rExIK3-GR9n2~R`ZQMH@*2^h1qMXH!myBrTp$esT_aO>m}oG>hNyK*Y;U=!1nOxfye?z_-&w*t?lGL|HJGX}2XHahS7pc4j-Yg8TJD0?$>&47z&+K7_QDGdxon zkb2nF%8ub<5~O7DXf2m@3MvtVP>R6FQ6N`n7@MXRVnY-4APQTY-3L_~sBtoGGVj~| z%=-{U)pB0o7E|D6Mtp_>DPK&Z!2A}sQGAEuE{YEQ=#k!oT!I^jheQwCdytIhdD&uC z;62>%)B|vYmdDc^r7Wqze#eoUQecpLuL&P$=)0e5z^7FdE}Cy47q8N67~{4bo>u}j z;l~lBeokMN^LRBlYf?&Deb`0sU#= YTAmGCDuDP3o}IDW*ob$nNZfz_1H10Np8x;= literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_cb_engine.cpython-310.pyc b/tests/__pycache__/test_cb_engine.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35ad851ec8fddbab05030bf511192b24cacb4320 GIT binary patch literal 6716 zcmb7I&2!tv6~_V~2vQU!D~jXTY0RW)J9HAMiQA;pdR*D^N8Bb%YT0cE;}nI6r34Bj z=v`2@6l&5bGj-CL+;Zwk>dpT~|BOAhC!cfEbf*2i1wTYePHb@Sh~335-oD@a++=pP zXyE$%%4^NrFBryusB!di&{)AO{{<2no)NOpjG4#uzv-F!-|{T|Z+kZWtvJ_mJck*a zHS_y!$asM}yx3>^GY3Yq)GUSeIit@GOqQ)S>&k!(OV2>&^yEJ=Bd8rEug0q&Z1|QFQEMb+U0N#?K$0Emur;a=E_l^Y%v6=1ZNO%D?~eO=X*lsIgOt(n>vu zV_8Wzc_n+Ja^;g-m3krsucz_OTg9%7+8Y&_cGoMdB<#jqR%B9XCl$eicsOU9x3?>i zth6}~dARfhtQfT_Q}!o8y6NQyY*Y*CbUg|AdJ>2btNITTvDJu^2lDXz$^A<~JBW8= zB-MNv1sfu0`NN*WbJIOb{9&DU(kN+5g^a-VURwj-XD`UDo=gsee?ll@3W_E$)uX+e%=&9m2&q*b-W9A+pd=yZ5H6sOP-^B`4A zEYJW86x-4Q$Ov&oJd*f)|14V41t~Fy6>*RF%*Lyxn8j$Vnxi@Gpv8S(6@5Q50z8*| z|9&@!vk~zU-WBJFki?G=`l={_P`@O%;~GU)vUT5tFmf29`?vJsdmt%^!Z=`iW{>Tg z2gWty)8ajI&)Tz7t7rDCp53>YalgN{ z)^Sf@k7iIZ-6MPlXL)PrV{Pxy|^h4#I@u>`vTt0;K6?E|LvAF zOH0Gy8q|%r5I-P7<}Q|roP_q97$8x6OD5n#!36aj6O4t<72I+OWF(kkXoi+{9(I`1 z&O-+dFEXY0F9lJhCO2 zc;WgmNNQ`3zQY;7nD6OvYdG~Z<&_KT>MPt{MU6HR>CuR-`y8a6zqgUpdFFFUZn;94g%dZ_9!YaI0N=I zB1Q_wK;{#|bSU3CK(2=PVW-gUrqhS+lFOjyu<)`0w~Dcz85UoLCFl`)H;3xq+LckPi>0J)j1?PT2P0%Wx+eip}m}RQ_pGbP;1u}QTX;c)L5~1JttVN zzw_?JCu9ZbfJII>UOo(J=(~tQ!kDrh?^ola+iaPh@W>9rCa^kX{KZ?Sh zRtx>5nLr}ur`IV)DJ$mfELvJJ6~h2&iiURa)7YI!;aYrJMaN3%4|VMtz9z|$5^{@( z?d1Cx^}YL}2?U?E!ZAcCf@nm)!~-Epw`uNC&?96K`hYFuJdG{fp?ly?;aeCY1jJ2^ zcyZXHQ75Evwml2N70GtJOnw@34KRFJ}iSs9*A_<{xN4SjK^I&mv&<5C&j+HXJ_l z%@iP!`Qh10?fu+=fuMnKK?Zcr*hex<$z1C07eWMy%rGgIg@plp#dM};H%oYb2v(8W z8mOaYH+y8&@&WRqf$^2e_$ay9EmGv*OHG-ycg@fCgvRY7jEEYDVi(0H)c75d5oF>z znor2PROhrkWmt{SS4a_BokkRMq)J;MH3XTXDn1a%r~H&ZOf{(DHmy5}ry7{y4MZ}o z9R4suX;yVI&#Jv>9=@N#gkru%Tv_lO)39F{!``5J4aQmD^3y~|06%GG<%!?II)4Nc zlB7|FXU#n9f>sBu43n|;*;hr@U4s@+WkBS8#D_%IhO3-9>k% zRM7o(^iNzQ+ZEjM4UloFFw7SzZ$U+sRaUt$5AW-$BI)=Cy5y_V{NY8xk^�EAN4f zGoik@XO8nAw5$}hI#k)w-UdG6k1KR!$p_3JnZT0uUQ|u#!?{CsDV<{M}6vLu~Q_;w9V*>Y>Sp6g|OM~#b;;h z^px~J@5UVto(t*d=IHfOQ zApb1+yA%hl^)R?BXb~-ca21z1C$gdb24pnz3zxw7^a7K@eN33B|x(X+ggA>x#Z4c3PgTWDHNf4Y% zmfj);>j;YjCb*>oD~p#pVVS83+Eg9)#6_}S!7VO;RPC(R${Zz1TV#3lq>ED^f8bJZ z5*GSHuQGN%H*OwN5%R3sQ0a`I$5MmRfIjOpePTtYR@TUTQQ+G#0-wr>puNF8R}P$+ z=b}`C3!~#Jjh1I&NGuVcqT;qb1o1>O9Mk zZqiV3Y~lk2(4Wa{R*Szw%V>u5srGlN>qERM>3E^U%I2ayZDe{0(z5r8g%c>-S7MK@T(}U1DK-l8=$1i$7t2LW({ZrI6t+l*+o74nbQ% z)b{=AOE`4)!=&!}*(uxaX*CVC+r{xOF{biV!p6~hmMPo1pmlH}fQnT72Jh2ylv3jy zH7~eK;%#Kdoj|}JWVtliQI?34GgmIbxQdEYAEoKUQM09WUD?Fh5y6f&q})rHL@yH{ i`h?jwT#Jq%$g#g};@>R0ZfU`tcjn)7e_X=R-~RzHp&euZ literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_diff_to_mermaid.cpython-310.pyc b/tests/__pycache__/test_diff_to_mermaid.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65cf958a2da82046f062d1a34b30710da7a565d9 GIT binary patch literal 9098 zcmbVS-;*21ao*VX_<5qhYZ7CrtG*3%O)i|FzNLkW)FuW z4ghBtB8lKqa*kDYDkYa)iJ#0=-MxkRAM%#JVji6*J?l+XF8k}A-31nRM^VAm&hE_4 z&dz+@{dM>34Hg%x7JkRCy|M8>r!4FLs51L2p>iEZ{NE^~)v~0Jc2BefpY4{-XQ$=h z?DX7CujL6VvNuXQWho**@}kPN*s1PX8w(o?(*26HEp}~TMya(HBj@X>$Nskz=8 zbh_>OTlMdodvz7{Wu&_O&V^4fpw(;$?Y#cG+THB-c^ha<}{T7b+4vL92v4`%2 z7yQhch+(yB?O40kZ2%#i=gyWlaild8>cqsEc+$OUOz7%Q1|XyjZ(8MFk1F= za6zxRiz7aXqJtMbsNA==#Fjk~6I)u(?cdsufJQ?!>{ey1+lzuBrmf4HQL;Xe@e8Zr zX#2v~wOG?)Q@I#~i_acjd_RsR-G)Xpc6dK7i|Vll9SRur}xclZ&?o zGP*ekl>|EHWp}P2Y$x48|01K$Wv~}^UA@}t#z}jyIlOilGx*d-anc$erdxLh^ zi?87#>x4PG{ctmC)u_jyk68s>*{bpS(v|6IelONXr`LgsdLhjzh77%K5^ctfl3KtU zwMWm#EvMa;+Kor6u`Z)T|5%Im6dUDJXP1Vo;VNE6VcR8fRFwYcd%mDQSEwUAB&Obr z+a#&%N7tkvtQ@plD2Lz!R`i_2Y&RQT%a2t%;6?f}PRvz_+OW_PwczY$YvIWO6$c%( zgS*=6g{x6-e$oyeIDuEjgz#aZj^k8NDQYK`qDjVog<=9dm10-?D-23H&#Xl3*bwvCsm|6S@|Kl{`B^6dw2H3|zO&}ea& z=p6*0L&* zPagc9zd)y$)VC^*;ZHSi*~U@$5g^xb#FtS-7TknoTm%T+k&bk6c3EevmM6>7N4+E~ zvWjzAE&!;S_V0nPZ-a{qP!kj+>RUAWf1ya2Q>D`pU9oMW=57m=-nQsC9ZQy;gAB7i zvfQx?z5JP8ws}A4H&N%VEf-EOKW{m(0SEjnPQVqB+e+&$>WedVAN8e~x`+B9c5@{9 z!%Z!2eCCF5Gm7xj;-(h2U)YK`L@jblM~hp9l;k;zCBf zD8YHuZ?BWZ$QhH#2jfL)KYOTE$$M*$$J6C>oENSkN?UH~>guT5!y6!Uy%5ImKgqWr zjl$jw7jzu;*0j4C#*uc{qfqM7%o1lz=PzGr&S$XBcnQTB@uoN_4)fnpF~0D{mR^t* zF+k4iKS3o^JENK%0d;h9b%er74+j)<+s^OVP%V+zP&cE3WvCFTRRu>iSEYrfdJE&L zx2d34s*5PJSZ~=yhYE6-uk2n@**t4;0vpeGGzPBVI3c>k=)sc#StMC%5OZ{}AF z5OK94C&}$>J6DR!M%aijg>Zt_2gBYdPVMJFqfviDgMJGIg!vu1ewT{BrQ#YDbD}xL zqB-zAw1AlANh*F4ggyS+E6%V0No^nf?0ql4PkL@A z&W<}3nap*4Mo>o2Nw>5+is2(k(bZ8Zh%wcic6-bv9M#OJxcWAApTq9W6gv|S=BgYF z`T<8zshX2SShee-PO5!z9>LFfzJMf6Ay@A{D*GWxjDWNtbzKD<5hLbz5hSI`UqI3X zVeGajt$_M>rpjMQxq~oeGz+sM=m<6i?xAlLw~iX z2z%y3bjen9O3)u1#n}UNV9%q?hOhmA!}lQ2)qu2YgupY43TV_FD!xm_ z_o!%6G1b|8zKVk9(>P+Hc;7<8Q9_^XLek^pes9&T?0zuCCNnly4lV0LTasFwumVI1@aK{m_yAMkig4)z#O z^Xp7O+;g#Q;6Qa&6;jY#!K<&f_()w&;WN@_RzXO8Bn3M1fiC(1C-?*{=}P|<2&;*?L)9gHnLa6!D`qG`)xFlyZA8dVc%}LL6kX*J*y+O;nme~!K+)y zRwpg&9L}vxJ0PrdRYDP;s)!2KSuXPVe}MLM{-;bl(pZ?fCPNxOre`V8#{SF*X%25( z5FoK$H1_hYibAC3CTM98F8x1bPyn1PE2p)8MrCSUY)4LNPdSz6_C#5`4NIHt0ZYs>@+~-o4a&qz zviz}4i4kncv{dpI50>H5uIX$T`7_Ky1yqDoa1{A-Tzx|GWP>oFXjABgOKg z7yuiEMS*CSlT}wn(Xiztj|cPeIK%Ro<=&tj4v`Lk>%t^7bW8$eSN^(KVHqqVST|v& z@th_R|C~_@GN5(kGPsWl$zU2t62>W;QGibDMs;AlL3le8b}AHHW}HQ+%F@J1xyO}d zbIlo*-yHil>MM^{>ZjLL9$B&hF}UN>=|`hMa%JqEzVyv21%PwgQ2&TozVHmdH*l@4 zQ;}=@JhDd^Srey6F+j!kGg)ftCMesf5sati)p_m_foHNGA(s1vU!#Imn;lQEtmGg% zZ*kD#rf8Tx9$J{269nDB+L5%sMEZ>ElXgnEPFPc&aEA8ocn0fTao^s8Z@~B^30_Y} z#%|aQ&b8)v;X7gc;HC=O52Iwfgv#k}UAmIh-LC0cg^5pd8cdm@R8}&Eq zH^-G5_0!?z@Cq&$e;jp{e9%N*iq2DMjXiFIh%jKopXN_3Hvqnd)27n;3D9AYsYi5G zWX6<+ibw~mX4c?vu-!vSu2j5%hna?!?PZY9F-VMv2y)v)L$5H}9uvrg`7d}+(FDGD z&PoyF)S=G^%)`eTY&yttDzp;nASZIn5GNT*7LOIOFA8c56;VQ~EB!Rzl*~oCobInd z7DuS(Gzgtaf%kF9U+6<=2o@tEYa*V7krknM!s0Ydfwd+WodCs=Gy8u2cA7rz@ySy!kZwXw zBUiu--#XonWHGq3Q=Y4Mt*Q3&ws#3I61yTFYg{hNgYpx_@<>y9_PhJ2_+E`ZP!jtn zG=B&|Qo-@$GrW`i#Ov74q3vG?;pOUFc)WmqA(6d|ffH*siKAEs%uXL<=_>+9>x?2& z*%{P4lc?Rpz6zNHK5UxJb+*4&1v{_(HX=j`5T^LtBj`S)f+!%{#buAVpMHLYJ?8Ad zK{jsuZx|rmzO51(ZyuzQ#0S&I4n~4D9uuSv9em}(XH8?c1_g>O3^gLokTy)1y&W5^ z+ce1vK8p46T{&iRh4JN!*0m|oA){ad+#(3(jPc97*l2T@JO!Cy*Sb#`Wk32&w$P)Z zDzK5WGQZHhj~u+nv%JWI#f=Z8CTpi{=TyXZG7&G(S3k4S7>~XB`SbOa*@beNt;o$H zM`tSkoKOh)^2~_8qjeG{$H|%Q34)59n~g*22k5Kbqk>Wc^Y#aHMVm8fNQI(;(kS(W zihrVl_(l<>sgJ1m1r<|K@D=AuKf;aMIQah*c+1N3%YN1OQ7&Nns)VzL4J_(M8)o>A zjD8R_+$Mev!CStKW4as!a?lO}b%we*@zYrQBm8#<30s%xKO1`8Rb|snm;cY9)a}z} zBy>#_4a@%_5a_as;z94jNSB5o!s-P7Zc_Yb5TkHY|3K-O`6PJ@pP+b~ji<`6!K-4n r`z>4hSIvIkH3~E1Qz%@^x9L9wYGMWFs#9I`-R08q8~z71;r{8*@76tr literal 0 HcmV?d00001 diff --git a/tests/test_build_cta.py b/tests/test_build_cta.py index 08fb367..acb5580 100644 --- a/tests/test_build_cta.py +++ b/tests/test_build_cta.py @@ -34,6 +34,11 @@ class TestBuildCta(unittest.TestCase): def test_empty_base_yields_no_footer(self): self.assertEqual(bc.build_cta("", "o", "r", "1", repo_with()), "") + def test_warning_shows_without_cta_base(self): + out = bc.build_cta("", "o", "r", "1", repo_with(), issues=3) + self.assertIn("3 architecture issues found", out) + self.assertNotIn("http", out) # no links without a proxy base + def test_links_banner_and_cursor_only(self): out = bc.build_cta("https://x.dev/", "Org", "Repo", "9", repo_with(".cursor"), issues=2) self.assertIn("2 architecture issues found", out) diff --git a/tests/test_cb_engine.py b/tests/test_cb_engine.py new file mode 100644 index 0000000..8096ddd --- /dev/null +++ b/tests/test_cb_engine.py @@ -0,0 +1,140 @@ +"""Smoke tests for scripts/cb_engine.py โ€” verify it calls the engine API correctly, +using stub modules so no real engine venv is needed.""" + +import sys +import tempfile +import types +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) +import cb_engine # noqa: E402 + +_STUBBED = [ + "codeboarding_workflows", "codeboarding_workflows.analysis", + "diagram_analysis", "diagram_analysis.exceptions", + "health", "health.models", "health.runner", + "static_analyzer", "static_analyzer.analysis_cache", +] + + +class _Rec: + def __init__(self, ret="OUT", raises=None): + self.calls = [] + self._ret, self._raises = ret, raises + + def __call__(self, *a, **k): + self.calls.append(k) + if self._raises: + raise self._raises("boom") + return self._ret + + +def _mod(name, **attrs): + m = types.ModuleType(name) + for k, v in attrs.items(): + setattr(m, k, v) + sys.modules[name] = m + return m + + +class _Base(unittest.TestCase): + def tearDown(self): + for n in _STUBBED: + sys.modules.pop(n, None) + + +class TestAnalysis(_Base): + def _install(self, run_full=None, run_incremental=None): + class BaselineUnavailableError(Exception): + pass + + class IncrementalCacheMissingError(Exception): + pass + + analysis = _mod( + "codeboarding_workflows.analysis", + run_full=run_full or _Rec(), + run_incremental=run_incremental or _Rec(), + BaselineUnavailableError=BaselineUnavailableError, + ) + pkg = _mod("codeboarding_workflows") + pkg.analysis = analysis + exc = _mod("diagram_analysis.exceptions", IncrementalCacheMissingError=IncrementalCacheMissingError) + da = _mod("diagram_analysis") + da.exceptions = exc + return analysis, IncrementalCacheMissingError, BaselineUnavailableError + + def test_base_calls_run_full(self): + rf = _Rec() + self._install(run_full=rf) + cb_engine.run_base("/repo", "/out", "myrepo", "rid-base", "2", "abc123") + self.assertEqual(len(rf.calls), 1) + k = rf.calls[0] + self.assertEqual(k["repo_name"], "myrepo") + self.assertEqual(str(k["repo_path"]), "/repo") + self.assertEqual(k["depth_level"], 2) # coerced to int + self.assertEqual(k["source_sha"], "abc123") + + def test_head_uses_incremental(self): + ri, rf = _Rec(), _Rec() + self._install(run_full=rf, run_incremental=ri) + cb_engine.run_head("/repo", "/out", "r", "rid", "1", "base", "head", "head") + self.assertEqual(len(ri.calls), 1) + self.assertEqual(len(rf.calls), 0) # no fallback + self.assertEqual(ri.calls[0]["base_ref"], "base") + self.assertEqual(ri.calls[0]["target_ref"], "head") + + def test_head_falls_back_to_full_on_cache_miss(self): + analysis, IncMiss, _ = self._install() # install once so the exception class identity matches + rf = _Rec() + analysis.run_full = rf + analysis.run_incremental = _Rec(raises=IncMiss) + out = tempfile.mkdtemp() + (Path(out) / "stale.json").write_text("{}") # must be wiped before the full run + cb_engine.run_head("/repo", out, "r", "rid", "3", "base", "head", "head") + self.assertEqual(len(rf.calls), 1) # fell back to full + self.assertEqual(rf.calls[0]["depth_level"], 3) + self.assertFalse((Path(out) / "stale.json").exists()) # head dir wiped before full + + +class TestHealth(_Base): + def _install_health(self, report): + class Severity: + WARNING, CRITICAL = "warning", "critical" + + class _Cache: + def __init__(self, artifact_dir, repo_root): + pass + + def get(self): + return object() # non-None static analysis + + _mod("health.models", Severity=Severity) + _mod("health.runner", run_health_checks=lambda sa, repo_name, repo_path: report) + _mod("health", ) + _mod("static_analyzer.analysis_cache", StaticAnalysisCache=_Cache) + _mod("static_analyzer", ) + return Severity + + def test_counts_warning_and_critical(self): + Sev = self._install_health(report=None) + + class FG: + def __init__(self, sev, n): + self.severity, self.entities = sev, list(range(n)) + + class CS: + finding_groups = [FG(Sev.WARNING, 2), FG(Sev.CRITICAL, 1), FG("info", 5)] + + report = types.SimpleNamespace(check_summaries=[CS()]) + self._install_health(report=report) + self.assertEqual(cb_engine.run_health("/art", "/repo", "r"), 3) # 2 warnings + 1 critical, info ignored + + def test_missing_module_yields_zero(self): + # No health.* modules installed -> import fails -> 0, never raises. + self.assertEqual(cb_engine.run_health("/art", "/repo", "r"), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_diff_to_mermaid.py b/tests/test_diff_to_mermaid.py index 10aa0a8..f9916e3 100644 --- a/tests/test_diff_to_mermaid.py +++ b/tests/test_diff_to_mermaid.py @@ -103,6 +103,39 @@ def test_label_escaping(self): self.assertIn("#quot;", text) self.assertIn("#35;", text) + def test_label_escaping_brackets_break_chars(self): + # `]` / `(` / `&` would break GitHub's renderer if left raw. + self.assertEqual(dm._esc("Has]Bracket"), "Has#93;Bracket") + self.assertEqual(dm._esc("f(x)"), "f#40;x#41;") + self.assertEqual(dm._esc("A & B"), "A #amp; B") + head = {"components": [comp("Weird]Name(x)"), comp("B")], "components_relations": []} + base = {"components": [comp("B")], "components_relations": []} + text, _ = dm.render_mermaid(dm.build_diff(base, head)) + self.assertNotIn("]Name", text) # no raw ] inside a label + self.assertIn("#93;", text) + + def test_changed_flag_relation_only(self): + # A label-only relation change leaves n_changed=0 but must report changed=True. + base = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B", "uses")]} + head = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B", "calls")]} + text, meta = dm.render_mermaid(dm.build_diff(base, head)) + self.assertEqual(meta["n_changed"], 0) + self.assertTrue(meta["changed"]) + self.assertIsNotNone(text) + + def test_changed_flag_false_when_identical(self): + d = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B")]} + _, meta = dm.render_mermaid(dm.build_diff(d, d)) + self.assertEqual(meta["n_changed"], 0) + self.assertFalse(meta["changed"]) + + def test_changed_flag_counts_nested(self): + base = {"components": [comp("P", subs=[comp("c1")], subrels=[])], "components_relations": []} + head = {"components": [comp("P", subs=[comp("c1", {"x.py": ["f"]})], subrels=[])], "components_relations": []} + _, meta = dm.render_mermaid(dm.build_diff(base, head), render_depth=2) + self.assertEqual(meta["n_changed"], 1) # the nested child counts + self.assertTrue(meta["changed"]) + def test_changed_only_truncates(self): text, meta = dm.render_mermaid(self._diff(), render_depth=1, changed_only=True) self.assertIsNotNone(text) From a616ee98f9da70f3bcde4066ea3faafbdc1d829d Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 12:48:00 +0200 Subject: [PATCH 14/27] comment: reword color legend + drop compass emoji from editor CTA --- action.yml | 2 +- scripts/build_cta.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/action.yml b/action.yml index 19aab95..0f83a0c 100644 --- a/action.yml +++ b/action.yml @@ -448,7 +448,7 @@ runs: cat "$DIAGRAM_MD" echo "" echo "" - echo "Colours indicate components that have been ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\`." + echo "Colors indicate component changes compared to \`${BASE_REF}\`: ๐ŸŸฉ Added ยท ๐ŸŸจ Modified ยท ๐ŸŸฅ Removed" if [ "$TRUNC" = "true" ]; then echo "" echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." diff --git a/scripts/build_cta.py b/scripts/build_cta.py index d3af96c..553c06b 100644 --- a/scripts/build_cta.py +++ b/scripts/build_cta.py @@ -67,7 +67,7 @@ def link(path: str, **extra: str) -> str: editor_links = " ยท ".join( f"[**Open in {_EDITOR_LABEL[e]} โ†’**]({link('open-in-editor', editor=e)})" for e in detect_editors(repo_path) ) - parts.append(f"๐Ÿงญ See this architecture in your editor: {editor_links}") + parts.append(f"See this architecture in your editor: {editor_links}") parts.append(f"๐Ÿ’ก New to CodeBoarding? [**Get the extension โ†’**]({link('use-marketplace')})") if not parts: From 1be068816b051039b6959d3dd5888f8f116bd349 Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 13:52:27 +0200 Subject: [PATCH 15/27] fix: harden action security and mermaid diff --- .codeboarding/static_analysis.pkl | Bin 108 -> 0 bytes .github/workflows/example-usage.yml | 23 -- .github/workflows/test-self.yml | 30 --- .github/workflows/test.yml | 23 ++ .gitignore | 15 ++ README.md | 16 +- action.yml | 224 ++++++++++++------ docs/COMMIT_STRATEGY.md | 4 +- scripts/__pycache__/build_cta.cpython-310.pyc | Bin 4655 -> 0 bytes scripts/__pycache__/cb_engine.cpython-310.pyc | Bin 4672 -> 0 bytes .../diff_to_mermaid.cpython-310.pyc | Bin 18389 -> 0 bytes scripts/cb_engine.py | 68 +++++- scripts/diff_to_mermaid.py | 111 +++++++-- scripts/run_local.sh | 70 +++--- .../test_build_cta.cpython-310.pyc | Bin 3871 -> 0 bytes .../test_cb_engine.cpython-310.pyc | Bin 6716 -> 0 bytes .../test_diff_to_mermaid.cpython-310.pyc | Bin 9098 -> 0 bytes tests/test_cb_engine.py | 69 +++++- tests/test_diff_to_mermaid.py | 69 +++++- 19 files changed, 513 insertions(+), 209 deletions(-) delete mode 100644 .codeboarding/static_analysis.pkl delete mode 100644 .github/workflows/example-usage.yml delete mode 100644 .github/workflows/test-self.yml delete mode 100644 scripts/__pycache__/build_cta.cpython-310.pyc delete mode 100644 scripts/__pycache__/cb_engine.cpython-310.pyc delete mode 100644 scripts/__pycache__/diff_to_mermaid.cpython-310.pyc delete mode 100644 tests/__pycache__/test_build_cta.cpython-310.pyc delete mode 100644 tests/__pycache__/test_cb_engine.cpython-310.pyc delete mode 100644 tests/__pycache__/test_diff_to_mermaid.cpython-310.pyc diff --git a/.codeboarding/static_analysis.pkl b/.codeboarding/static_analysis.pkl deleted file mode 100644 index 62c36b1e828bce40a2ef2d8da21bbc5e4b0200ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 zcmZo*nVQG|0X_1?C5a`O$?=JKi8+;3sYQBVMsa3wd{Js~X->(M9?@X13P+Fvpm-2i mxOmFsDVmK_YNu%QutO9U135k1DVd4sdHKaa6G8maBs~C3$|@@W diff --git a/.github/workflows/example-usage.yml b/.github/workflows/example-usage.yml deleted file mode 100644 index 1e8913c..0000000 --- a/.github/workflows/example-usage.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Architecture diff - -# Reference example only (the README shows the real pull_request usage). Manual -# trigger so it doesn't run the published @v1 against this repo's own PRs. -on: - workflow_dispatch: - -# Only a PR comment is posted โ€” no image is pushed โ€” so contents:write is not needed. -permissions: - pull-requests: write - -jobs: - architecture-diff: - runs-on: ubuntu-latest - if: github.event.pull_request.draft == false - timeout-minutes: 60 - steps: - - uses: codeboarding/codeboarding-action@v1 - with: - llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} - # depth_level: '1' # 1-3, higher = more detail - # diagram_direction: 'LR' # LR | TD | TB | RL | BT - # changed_only: 'false' # 'true' to draw only changed components diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml deleted file mode 100644 index 040389d..0000000 --- a/.github/workflows/test-self.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Self-test architecture diff - -# Exercises THIS branch's action (uses: ./) against the action repo itself, so a -# PR posts a Mermaid architecture-diff comment. Remove before merge. - -on: - pull_request: - types: [opened, synchronize, reopened] - -permissions: - pull-requests: write - -concurrency: - group: self-test-${{ github.event.pull_request.number }} - cancel-in-progress: true - -jobs: - diagram: - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v4 - - uses: ./ - with: - llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} - agent_model: ${{ secrets.AGENT_MODEL }} - parsing_model: ${{ secrets.PARSING_MODEL }} - cta_base_url: https://codeboarding.pontux-inc.workers.dev - depth_level: '2' # analyze deep (rich nested data for the workspace/extension) - render_depth: '1' # but draw a clean top-level diagram in the PR diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e16776b..ea9ddb1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,13 +5,36 @@ on: branches: [main] pull_request: +permissions: + contents: read + jobs: unittest: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3.13' - name: Run unit tests (stdlib only) run: python -m unittest discover -s tests -v + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-go@v5 + with: + go-version: '1.22' + - name: Install shellcheck + run: sudo apt-get update && sudo apt-get install -y shellcheck + - name: Install actionlint + run: go install github.com/rhysd/actionlint/cmd/actionlint@v1.7.7 + - name: Run actionlint + run: actionlint + - name: Run shellcheck + run: shellcheck scripts/run_local.sh diff --git a/.gitignore b/.gitignore index ea8864f..5fc1409 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,21 @@ test_codeboarding/ # Local test harness output (scripts/run_local.sh) .cb-local/ +# Dependencies +node_modules/ + +# Python generated files +__pycache__/ +*.py[cod] + +# CodeBoarding generated cache/log artifacts +.codeboarding/static_analysis.pkl +.codeboarding/static_analysis.sha +.codeboarding/logs/ +.codeboarding/health/* +!.codeboarding/health/ +!.codeboarding/health/health_report.json + # Environment files .env diff --git a/README.md b/README.md index 90678f1..aea4eea 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,9 @@ on: types: [created] permissions: - pull-requests: write # the only permission needed โ€” nothing is pushed + contents: read # checkout + fetch PR/base commits + pull-requests: write # post/update the PR comment + issues: write # issue_comment command reactions/comments # Cancel a superseded run when new commits land on the same PR (avoid stacking # multi-minute LLM jobs). @@ -40,13 +42,11 @@ concurrency: jobs: diagram: runs-on: ubuntu-latest - # Run on (non-draft) PR events, OR when a TRUSTED collaborator comments exactly - # "/codeboarding" on a PR. The if-gate matters: (1) without it a runner spins up - # for every comment; (2) the author_association check is a SECURITY gate โ€” see below. + # Run on (non-draft) PR events, OR when a TRUSTED collaborator comments on a PR. + # The action itself checks whether the first word matches `trigger_command`. if: > (github.event_name == 'pull_request' && github.event.pull_request.draft == false) || (github.event_name == 'issue_comment' && github.event.issue.pull_request != null && - (github.event.comment.body == '/codeboarding' || startsWith(github.event.comment.body, '/codeboarding ')) && contains(fromJSON('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) timeout-minutes: 60 steps: @@ -61,7 +61,7 @@ You need **one secret**: an LLM API key. OpenRouter is the default; pass your ow ### On-demand: the `/codeboarding` command -Comment **`/codeboarding`** on any pull request to (re)run the diagram on demand โ€” handy after the engine/baseline changes, or on draft PRs you don't auto-review. The action reacts with ๐Ÿ‘€ to acknowledge. Change the word via the `trigger_command` input. +Comment **`/codeboarding`** on any same-repository pull request to (re)run the diagram on demand โ€” handy after the engine/baseline changes, or on draft PRs you don't auto-review. The action reacts with ๐Ÿ‘€ to acknowledge. Change the word via the `trigger_command` input. > **Note:** GitHub runs `issue_comment` workflows from the **default branch's** copy of the workflow file. So the command only works once this workflow is merged to your default branch โ€” a workflow that exists only on a feature branch won't respond to comments. @@ -87,7 +87,7 @@ Comment **`/codeboarding`** on any pull request to (re)run the diagram on demand | Output | Description | |---|---| | `diagram_md` | Path to the rendered ```` ```mermaid ```` block in the runner workspace. | -| `n_changed` | Number of top-level components added/modified/deleted. | +| `n_changed` | Number of components added/modified/deleted, counted recursively. | | `truncated` | `true` if the diagram was reduced to changed-only to fit GitHub's Mermaid limit. | ## How the diff is colored @@ -117,7 +117,7 @@ If `.codeboarding/analysis.json` isn't committed at the PR base commit, the acti ## Fork PRs -Because nothing is pushed (the diagram is inline Mermaid), there is no image step to skip on forks. The one caveat is GitHub's own policy: **secrets are withheld from `pull_request`-triggered runs on forks**, so the LLM key is unavailable and the run fails early with a clear message. A maintainer can re-run from the Actions tab, or use `pull_request_target` if you understand its security implications. +Because nothing is pushed (the diagram is inline Mermaid), there is no image step to skip on forks. The one caveat is GitHub's own policy: **secrets are withheld from `pull_request`-triggered runs on forks**, so the LLM key is unavailable and the run fails early with a clear message. Do not use `pull_request_target` for this action; it would analyze PR-head code while secrets are available. The trusted `/codeboarding` `issue_comment` path is intentionally limited to same-repository PRs, so fork code is not analyzed with repository secrets present. ## Limitations diff --git a/action.yml b/action.yml index 0f83a0c..f9c29e3 100644 --- a/action.yml +++ b/action.yml @@ -47,7 +47,7 @@ inputs: required: false default: '1' cta_base_url: - description: 'Base URL of the click proxy (e.g. https://go.codeboarding.org). When set, the comment adds "open in workspace" / "get the extension" links with owner/repo/pr appended. Empty disables the CTA.' + description: 'Base URL of the click proxy (e.g. https://go.codeboarding.org). When set, the comment adds "open in VS Code/Cursor" / "get the extension" links with owner/repo/pr appended. Empty disables the CTA.' required: false default: '' trigger_command: @@ -60,7 +60,7 @@ outputs: description: 'Path to the rendered ```mermaid block (in the runner workspace).' value: ${{ steps.diagram.outputs.diagram_md }} n_changed: - description: 'Number of top-level components added/modified/deleted.' + description: 'Number of components added/modified/deleted, counted recursively.' value: ${{ steps.diagram.outputs.n_changed }} truncated: description: 'True if the diagram was reduced to changed-only to fit GitHub''s Mermaid limit.' @@ -80,19 +80,32 @@ runs: AUTHOR_ASSOC: ${{ github.event.comment.author_association }} TRIGGER: ${{ inputs.trigger_command }} EVENT: ${{ github.event_name }} + REPOSITORY: ${{ github.repository }} + PR_NUMBER_PULL: ${{ github.event.pull_request.number }} + PULL_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PULL_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PULL_BASE_REF: ${{ github.event.pull_request.base.ref }} + PULL_BASE_REPO: ${{ github.event.pull_request.base.repo.full_name }} + PULL_HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + ISSUE_PR_URL: ${{ github.event.issue.pull_request.url }} run: | set -uo pipefail skip() { echo "::notice::$1 Skipping."; echo "skip=true" >> "$GITHUB_OUTPUT"; exit 0; } - if [ "$EVENT" = "pull_request" ] || [ "$EVENT" = "pull_request_target" ]; then - PR_NUMBER="${{ github.event.pull_request.number }}" - BASE_SHA="${{ github.event.pull_request.base.sha }}" - HEAD_SHA="${{ github.event.pull_request.head.sha }}" - BASE_REF="${{ github.event.pull_request.base.ref }}" + if [ "$EVENT" = "pull_request" ]; then + PR_NUMBER="$PR_NUMBER_PULL" + BASE_SHA="$PULL_BASE_SHA" + HEAD_SHA="$PULL_HEAD_SHA" + BASE_REF="$PULL_BASE_REF" + BASE_REPO="$PULL_BASE_REPO" + HEAD_REPO="$PULL_HEAD_REPO" + elif [ "$EVENT" = "pull_request_target" ]; then + skip "pull_request_target is not supported because it can expose secrets to PR-head code; use pull_request or trusted issue_comment." elif [ "$EVENT" = "issue_comment" ]; then # On-demand "/codeboarding" command. Must be a PR comment whose first # word is the trigger; the payload lacks SHAs so we query the API. - [ -n "${{ github.event.issue.pull_request.url }}" ] || skip "Comment is on a plain issue, not a PR." + [ -n "$ISSUE_PR_URL" ] || skip "Comment is on a plain issue, not a PR." FIRST_WORD="$(printf '%s' "$COMMENT_BODY" | tr -d '\r' | awk 'NR==1{print $1; exit}')" [ "$FIRST_WORD" = "$TRIGGER" ] || skip "Comment does not start with '$TRIGGER'." # SECURITY (pwn-request guard): issue_comment runs in the base repo WITH @@ -102,33 +115,40 @@ runs: OWNER|MEMBER|COLLABORATOR) : ;; *) skip "Commenter is '$AUTHOR_ASSOC' (not OWNER/MEMBER/COLLABORATOR)." ;; esac - PR_NUMBER="${{ github.event.issue.number }}" - PR_JSON="$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}" 2>/dev/null)" || skip "Could not fetch PR #$PR_NUMBER from the API." + PR_NUMBER="$ISSUE_NUMBER" + PR_JSON="$(gh api "repos/${REPOSITORY}/pulls/${PR_NUMBER}" 2>/dev/null)" || skip "Could not fetch PR #$PR_NUMBER from the API." BASE_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["sha"])' 2>/dev/null)" || skip "Could not parse base SHA from the PR API." HEAD_SHA="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["head"]["sha"])' 2>/dev/null)" || skip "Could not parse head SHA from the PR API." BASE_REF="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["ref"])' 2>/dev/null)" || BASE_REF="" + BASE_REPO="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["base"]["repo"]["full_name"])' 2>/dev/null)" || skip "Could not parse base repo from the PR API." + HEAD_REPO="$(printf '%s' "$PR_JSON" | python3 -c 'import json,sys;print(json.load(sys.stdin)["head"]["repo"]["full_name"])' 2>/dev/null)" || skip "Could not parse head repo from the PR API." + [ "$HEAD_REPO" = "$REPOSITORY" ] || skip "On-demand runs with secrets are disabled for fork PRs." else skip "Unsupported event '$EVENT' (use pull_request or issue_comment)." fi - { [ -n "$PR_NUMBER" ] && [ -n "$BASE_SHA" ] && [ -n "$HEAD_SHA" ]; } || skip "Could not resolve PR/base/head SHAs." + { [ -n "$PR_NUMBER" ] && [ -n "$BASE_SHA" ] && [ -n "$HEAD_SHA" ] && [ -n "$BASE_REPO" ] && [ -n "$HEAD_REPO" ]; } || skip "Could not resolve PR/base/head SHAs/repos." { echo "skip=false" echo "pr_number=$PR_NUMBER" echo "base_sha=$BASE_SHA" echo "head_sha=$HEAD_SHA" echo "base_ref=$BASE_REF" + echo "base_repo=$BASE_REPO" + echo "head_repo=$HEAD_REPO" } >> "$GITHUB_OUTPUT" - echo "Resolved PR #$PR_NUMBER (base=$BASE_SHA head=$HEAD_SHA) via $EVENT" + echo "Resolved PR #$PR_NUMBER (base=$BASE_REPO@$BASE_SHA head=$HEAD_REPO@$HEAD_SHA) via $EVENT" - name: Acknowledge command if: steps.guard.outputs.skip != 'true' && github.event_name == 'issue_comment' shell: bash env: GH_TOKEN: ${{ inputs.github_token }} + REPOSITORY: ${{ github.repository }} + COMMENT_ID: ${{ github.event.comment.id }} run: | # ๐Ÿ‘€ react to the triggering comment so the user knows it was picked up. - gh api -X POST "repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions" \ + gh api -X POST "repos/${REPOSITORY}/issues/comments/${COMMENT_ID}/reactions" \ -f content=eyes >/dev/null 2>&1 || true - name: Checkout CodeBoarding engine @@ -138,23 +158,33 @@ runs: repository: CodeBoarding/CodeBoarding ref: ${{ inputs.engine_ref }} path: codeboarding-engine + persist-credentials: false - name: Checkout target repository (PR head) if: steps.guard.outputs.skip != 'true' uses: actions/checkout@v4 with: + repository: ${{ steps.guard.outputs.head_repo }} path: target-repo fetch-depth: 0 ref: ${{ steps.guard.outputs.head_sha }} + persist-credentials: false - name: Ensure PR base commit is fetched if: steps.guard.outputs.skip != 'true' shell: bash working-directory: target-repo + env: + BASE_SHA: ${{ steps.guard.outputs.base_sha }} + BASE_REPO: ${{ steps.guard.outputs.base_repo }} run: | - git fetch origin "${{ steps.guard.outputs.base_sha }}" --depth=1 || true - git cat-file -e "${{ steps.guard.outputs.base_sha }}" && echo "Base commit reachable." || \ - (echo "::error::Base commit ${{ steps.guard.outputs.base_sha }} is not reachable." && exit 1) + git fetch origin "$BASE_SHA" --depth=1 || true + if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then + git remote add base "https://github.com/${BASE_REPO}.git" 2>/dev/null || git remote set-url base "https://github.com/${BASE_REPO}.git" + git fetch base "$BASE_SHA" --depth=1 || true + fi + git cat-file -e "$BASE_SHA" && echo "Base commit reachable." || \ + (echo "::error::Base commit $BASE_SHA is not reachable." && exit 1) - name: Set up Python 3.13 if: steps.guard.outputs.skip != 'true' @@ -217,8 +247,10 @@ runs: RAW_AGENT_MODEL: ${{ inputs.agent_model }} RAW_PARSING_MODEL: ${{ inputs.parsing_model }} run: | + AUTH_FILE="${RUNNER_TEMP}/openrouter-auth.json" + trap 'rm -f "$AUTH_FILE"' EXIT if [ -z "$RAW_KEY" ]; then - echo "::error::llm_api_key is empty. On fork PRs, repo secrets are withheld by GitHub โ€” a maintainer must re-run, or use pull_request_target." + echo "::error::llm_api_key is empty. On fork PRs, repo secrets are withheld by GitHub." exit 1 fi # Pasting a key into the secret UI often picks up trailing newlines, @@ -238,46 +270,38 @@ runs: case "$KEY" in sk-or-v1-*) PFX=1 ;; *) PFX=0 ;; esac echo "OPENROUTER_API_KEY length: ${#KEY}; looks-like-OpenRouter: $PFX" - STATUS=$(curl -sS -o /tmp/openrouter-auth.json -w "%{http_code}" \ + STATUS=$(curl -sS -o "$AUTH_FILE" -w "%{http_code}" \ -H "Authorization: Bearer $KEY" --max-time 10 \ https://openrouter.ai/api/v1/auth/key || echo "curl-fail") echo "OpenRouter /auth/key response: HTTP $STATUS" if [ "$STATUS" != "200" ]; then # Surface the upstream error MESSAGE only โ€” never the whole auth body (avoid leaking). - MSG="$(python3 -c 'import json,sys;print(json.load(open("/tmp/openrouter-auth.json")).get("error",{}).get("message",""))' 2>/dev/null || true)" + MSG="$(AUTH_FILE="$AUTH_FILE" python3 -c 'import json,os;print(json.load(open(os.environ["AUTH_FILE"])).get("error",{}).get("message",""))' 2>/dev/null || true)" echo "::error::OpenRouter rejected the API key (HTTP $STATUS). ${MSG:-Verify the OPENROUTER_API_KEY secret.}" - rm -f /tmp/openrouter-auth.json exit 1 fi - # Export (masked) for the analysis steps. - { - echo "OPENROUTER_API_KEY=$KEY" - echo "AGENT_MODEL=$AGENT_MODEL" - echo "PARSING_MODEL=$PARSING_MODEL" - } >> "$GITHUB_ENV" + # Store key material in runner-temp files. Later shell steps read these + # explicitly; third-party post-comment actions do not inherit the LLM key. + umask 077 + printf '%s' "$KEY" > "${RUNNER_TEMP}/cb-openrouter-key" + printf '%s' "$AGENT_MODEL" > "${RUNNER_TEMP}/cb-agent-model" + printf '%s' "$PARSING_MODEL" > "${RUNNER_TEMP}/cb-parsing-model" - name: Resolve base analysis (committed baseline) if: steps.guard.outputs.skip != 'true' id: base shell: bash working-directory: target-repo + env: + BASE_SHA: ${{ steps.guard.outputs.base_sha }} run: | - BASE_SHA="${{ steps.guard.outputs.base_sha }}" BASE_DIR="${RUNNER_TEMP}/cb-base" HEAD_DIR="${RUNNER_TEMP}/cb-head" mkdir -p "$BASE_DIR" "$HEAD_DIR" echo "base_dir=$BASE_DIR" >> $GITHUB_OUTPUT echo "head_dir=$HEAD_DIR" >> $GITHUB_OUTPUT if git show "${BASE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then - # Warm-start needs BOTH the pkl AND its .sha tag; seed them as a pair or not at all. - if git show "${BASE_SHA}:.codeboarding/static_analysis.pkl" > "${BASE_DIR}/static_analysis.pkl" 2>/dev/null \ - && git show "${BASE_SHA}:.codeboarding/static_analysis.sha" > "${BASE_DIR}/static_analysis.sha" 2>/dev/null; then - echo "Seeded committed static_analysis.pkl + .sha (head warm-start enabled)." - else - rm -f "${BASE_DIR}/static_analysis.pkl" "${BASE_DIR}/static_analysis.sha" - echo "No committed pkl+sha pair; head will run a full (cold) static analysis." - fi echo "committed=true" >> $GITHUB_OUTPUT echo "Using committed .codeboarding/analysis.json at ${BASE_SHA}." else @@ -286,10 +310,10 @@ runs: echo "No committed baseline at ${BASE_SHA}; will generate one via a full analysis on the base commit." fi - - name: Cache generated base analysis (keyed by base SHA) - if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' + - name: Restore base artifacts (keyed by base SHA) + if: steps.guard.outputs.skip != 'true' id: basecache - uses: actions/cache@v4 + uses: actions/cache/restore@v4 with: path: ${{ runner.temp }}/cb-base key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }} @@ -304,28 +328,46 @@ runs: DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }} CACHING_DOCUMENTATION: 'false' ENABLE_MONITORING: 'false' + ACTION_PATH: ${{ github.action_path }} + TARGET: ${{ github.workspace }}/target-repo + BASE_DIR: ${{ steps.base.outputs.base_dir }} + REPO_NAME: ${{ github.event.repository.name }} + RUN_ID_BASE: ${{ github.run_id }}-${{ github.run_attempt }}-base + DEPTH: ${{ inputs.depth_level }} + BASE_SHA: ${{ steps.guard.outputs.base_sha }} run: | + OPENROUTER_API_KEY="$(cat "${RUNNER_TEMP}/cb-openrouter-key")" + AGENT_MODEL="$(cat "${RUNNER_TEMP}/cb-agent-model")" + PARSING_MODEL="$(cat "${RUNNER_TEMP}/cb-parsing-model")" + export OPENROUTER_API_KEY AGENT_MODEL PARSING_MODEL + BASE_SRC="${RUNNER_TEMP}/base-src" - TARGET="${{ github.workspace }}/target-repo" # Clean up any stale registration before re-adding (rm -rf alone leaves a # dangling worktree entry that makes a retry's `worktree add` fail). git -C "$TARGET" worktree remove --force "$BASE_SRC" 2>/dev/null || true git -C "$TARGET" worktree prune rm -rf "$BASE_SRC" - git -C "$TARGET" worktree add --detach "$BASE_SRC" "${{ steps.guard.outputs.base_sha }}" - uv run python "${{ github.action_path }}/scripts/cb_engine.py" base \ + git -C "$TARGET" worktree add --detach "$BASE_SRC" "$BASE_SHA" + uv run python "$ACTION_PATH/scripts/cb_engine.py" base \ --repo "$BASE_SRC" \ - --out "${{ steps.base.outputs.base_dir }}" \ - --name "${{ github.event.repository.name }}" \ - --run-id "${{ github.run_id }}-${{ github.run_attempt }}-base" \ - --depth "${{ inputs.depth_level }}" \ - --source-sha "${{ steps.guard.outputs.base_sha }}" + --out "$BASE_DIR" \ + --name "$REPO_NAME" \ + --run-id "$RUN_ID_BASE" \ + --depth "$DEPTH" \ + --source-sha "$BASE_SHA" git -C "$TARGET" worktree remove --force "$BASE_SRC" 2>/dev/null || true - if [ ! -f "${{ steps.base.outputs.base_dir }}/analysis.json" ]; then + if [ ! -f "$BASE_DIR/analysis.json" ]; then echo "::error::Base full analysis ran but analysis.json is missing." exit 1 fi + - name: Save generated base artifacts + if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' && steps.basecache.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: ${{ runner.temp }}/cb-base + key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }} + - name: Analyze PR head (incremental from base) if: steps.guard.outputs.skip != 'true' id: analyze @@ -337,22 +379,35 @@ runs: DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }} CACHING_DOCUMENTATION: 'false' ENABLE_MONITORING: 'false' + ACTION_PATH: ${{ github.action_path }} + TARGET_REPO: ${{ github.workspace }}/target-repo + BASE_DIR: ${{ steps.base.outputs.base_dir }} + HEAD_DIR: ${{ steps.base.outputs.head_dir }} + REPO_NAME: ${{ github.event.repository.name }} + RUN_ID_HEAD: ${{ github.run_id }}-${{ github.run_attempt }}-head + DEPTH: ${{ inputs.depth_level }} + BASE_SHA: ${{ steps.guard.outputs.base_sha }} + HEAD_SHA: ${{ steps.guard.outputs.head_sha }} run: | - BASE_DIR="${{ steps.base.outputs.base_dir }}" - HEAD_DIR="${{ steps.base.outputs.head_dir }}" + OPENROUTER_API_KEY="$(cat "${RUNNER_TEMP}/cb-openrouter-key")" + AGENT_MODEL="$(cat "${RUNNER_TEMP}/cb-agent-model")" + PARSING_MODEL="$(cat "${RUNNER_TEMP}/cb-parsing-model")" + export OPENROUTER_API_KEY AGENT_MODEL PARSING_MODEL + # Seed the head dir from the base analysis so incremental stitches # component ids from the baseline (stable diff). Base dir is left # untouched as the "before" snapshot for the diff. cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true - uv run python "${{ github.action_path }}/scripts/cb_engine.py" head \ - --repo "${{ github.workspace }}/target-repo" \ + rm -rf "$HEAD_DIR/health" + uv run python "$ACTION_PATH/scripts/cb_engine.py" head \ + --repo "$TARGET_REPO" \ --out "$HEAD_DIR" \ - --name "${{ github.event.repository.name }}" \ - --run-id "${{ github.run_id }}-${{ github.run_attempt }}-head" \ - --depth "${{ inputs.depth_level }}" \ - --base-ref "${{ steps.guard.outputs.base_sha }}" \ - --target-ref "${{ steps.guard.outputs.head_sha }}" \ - --source-sha "${{ steps.guard.outputs.head_sha }}" + --name "$REPO_NAME" \ + --run-id "$RUN_ID_HEAD" \ + --depth "$DEPTH" \ + --base-ref "$BASE_SHA" \ + --target-ref "$HEAD_SHA" \ + --source-sha "$HEAD_SHA" if [ ! -f "$HEAD_DIR/analysis.json" ]; then echo "::error::Head analysis ran but analysis.json is missing." exit 1 @@ -369,30 +424,59 @@ runs: env: STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine + ACTION_PATH: ${{ github.action_path }} + ARTIFACT_DIR: ${{ steps.base.outputs.head_dir }} + TARGET_REPO: ${{ github.workspace }}/target-repo + REPO_NAME: ${{ github.event.repository.name }} run: | rm -f /tmp/cb-issues.txt # cb_engine writes the WARNING/CRITICAL count (0 on any failure โ€” best-effort). - uv run python "${{ github.action_path }}/scripts/cb_engine.py" health \ - --artifact-dir "${{ steps.base.outputs.head_dir }}" \ - --repo "${{ github.workspace }}/target-repo" \ - --name "${{ github.event.repository.name }}" \ + uv run python "$ACTION_PATH/scripts/cb_engine.py" health \ + --artifact-dir "$ARTIFACT_DIR" \ + --repo "$TARGET_REPO" \ + --name "$REPO_NAME" \ --issues-out /tmp/cb-issues.txt || true N=$(cat /tmp/cb-issues.txt 2>/dev/null || echo 0) echo "issues=$N" >> $GITHUB_OUTPUT echo "Architecture issues: $N" + - name: Drop LLM key material + if: always() && steps.guard.outputs.skip != 'true' + shell: bash + run: | + rm -f "${RUNNER_TEMP}/cb-openrouter-key" \ + "${RUNNER_TEMP}/cb-agent-model" \ + "${RUNNER_TEMP}/cb-parsing-model" + - name: Diff analyses โ†’ Mermaid if: steps.guard.outputs.skip != 'true' id: diagram shell: bash + env: + ACTION_PATH: ${{ github.action_path }} + BASE_ANALYSIS: ${{ steps.analyze.outputs.base_analysis }} + HEAD_ANALYSIS: ${{ steps.analyze.outputs.head_analysis }} + DIRECTION: ${{ inputs.diagram_direction }} + RENDER_DEPTH: ${{ inputs.render_depth }} + CHANGED_ONLY: ${{ inputs.changed_only }} run: | - FLAG="--render-depth ${{ inputs.render_depth }}" - [ "${{ inputs.changed_only }}" = "true" ] && FLAG="$FLAG --changed-only" - META=$(python3 ${{ github.action_path }}/scripts/diff_to_mermaid.py \ - --base "${{ steps.analyze.outputs.base_analysis }}" \ - --head "${{ steps.analyze.outputs.head_analysis }}" \ - --out "${RUNNER_TEMP}/diagram.md" \ - --direction "${{ inputs.diagram_direction }}" $FLAG) + case "$CHANGED_ONLY" in + true|false) ;; + *) echo "::error::changed_only must be 'true' or 'false'."; exit 1 ;; + esac + case "$RENDER_DEPTH" in + ''|*[!0-9]*) echo "::error::render_depth must be a positive integer."; exit 1 ;; + esac + + args=( + --base "$BASE_ANALYSIS" + --head "$HEAD_ANALYSIS" + --out "${RUNNER_TEMP}/diagram.md" + --direction "$DIRECTION" + --render-depth "$RENDER_DEPTH" + ) + [ "$CHANGED_ONLY" = "true" ] && args+=(--changed-only) + META=$(python3 "$ACTION_PATH/scripts/diff_to_mermaid.py" "${args[@]}") echo "$META" > "${RUNNER_TEMP}/diagram_meta.json" echo "diff meta: $META" read N CHANGED RENDERED TRUNC < <(python3 -c "import json;d=json.load(open('${RUNNER_TEMP}/diagram_meta.json'));print(d['n_changed'], str(d.get('changed', d['n_changed']>0)).lower(), str(d['rendered']).lower(), str(d['truncated']).lower())") @@ -464,9 +548,6 @@ runs: } > "$BODY_FILE" echo "body_file=$BODY_FILE" >> "$GITHUB_OUTPUT" - echo "--- comment preview ---" - cat "$BODY_FILE" - echo "--- end preview ---" - name: Post sticky PR comment if: steps.guard.outputs.skip != 'true' @@ -481,6 +562,7 @@ runs: # note (same header) instead of leaving the PR with nothing / a stale diagram. - name: Post failure comment if: failure() && steps.guard.outputs.skip != 'true' + continue-on-error: true uses: marocchino/sticky-pull-request-comment@v2 with: header: codeboarding-architecture-diff diff --git a/docs/COMMIT_STRATEGY.md b/docs/COMMIT_STRATEGY.md index 3d16fa8..8acaea2 100644 --- a/docs/COMMIT_STRATEGY.md +++ b/docs/COMMIT_STRATEGY.md @@ -36,11 +36,11 @@ The engine writes these under `.codeboarding/`: ## Now vs. later - **Now โ€” extension-direct.** Committing `analysis.json` + `health_report.json` on `main` means a user who installs the extension and opens the repo sees the committed diagram + warnings **instantly, with no API key**. The PR comment's CTA points straight at the extension (install / open in editor). -- **Later โ€” hosted webview.** The webview needs the **same** committed `analysis.json` (+ a diff + health). So committing now is **forward-compatible**: when the viewer is built, the data already exists at each commit โ€” no migration, just a host layer that reads it. (See `scripts/render_diagram.mjs` โ€” it's the headless prototype of that viewer.) +- **Later โ€” hosted webview.** The webview needs the **same** committed `analysis.json` (+ a diff + health). So committing now is **forward-compatible**: when the viewer is built, the data already exists at each commit โ€” no migration, just a host layer that reads it. ## Warm-start tradeoff (the `.pkl`) -The committed-baseline warm-start needs the pkl **and** its `.sha`. By caching the pkl (not committing) keyed by base SHA, PR runs restore the base-branch cache โ†’ warm-start; on 7-day eviction โ†’ cold LSP. This keeps the repo clean and still gets the speedup most of the time. +The warm-start needs the pkl **and** its `.sha`. When the review action has to generate a base analysis, it saves that generated base artifact directory in `actions/cache` keyed by base SHA / depth / engine ref, then seeds the head analysis from that directory. When a committed `analysis.json` already exists but no matching cache exists, the PR still diffs correctly but may run a cold LSP pass. This keeps the repo clean; the cache improves speed but is not required for correctness. ## Summary diff --git a/scripts/__pycache__/build_cta.cpython-310.pyc b/scripts/__pycache__/build_cta.cpython-310.pyc deleted file mode 100644 index 305f3b3bce8fcf7b0447d94640b7394511a4ef1d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4655 zcmZ`-&u<&Y72a7ce`qDkj*{4p?IcqphDAn8I&NYouA#)g5Cm{+*HRpWfkCf0Lvp3% zE;F-hSp-$2l~Fqfqd<^T6QlN_D01kzJ@ruZ(7$1i=_#l7?2G%oS<;f6mcs7N%+9`< z_uluu_eSxaJ+6h{mBpW}9^GeI|Dw*$pM%Z^mc{e{4PiAc!Gs;MzTLDLwF_aPS-{f? zoqn-dG-JiE)Gs&7Xdf1?n7->YD`H0M5wqXf%_(t2yejsK19yu}SG*<;idXP{TF!_= zYqmNi_sHojYm5B`l9#ON>*+tRf%9n)3ZC>NZ~I~BC9&tXlOT?GCyo=TxIY-kNJvbH z%`9KFdqE=GNvfnLf=-8DyUyEjzb~Vt?z+nmH2yLSqBYHf2p?XEg*+eoN(52&xaRFJ zXs_{sitlXjc^&h3J(8-SBf>-p>ZKqI##?^lXnssX=qWao#B4OkH$~}+Yhw6bN|>J--uJir3jK(9p$uTKA-nI zJ&^676SS**5T@FQ2_l`qN@NK4*&6w03z#6d&)0nwK}){kM^K8d_pq>%e#m>cB|Y8= zBC>+!@=l*tm)efU|uFRJ{9FTcsX4VMK8R>Qvi za6?0$A7Z0ogX?rKh*g3G5+3>h0sMN&SO|{3EtTKJhBOuogBA1=iPggZ(sGp4YGZ#G zi{w73)$05jc1<`tdE(?d?ot8=^Q(~73FCEsQQnpzoTT_ts%}fbqL(BCeWuaSreBBM z+iSEqO|Q4(21#iEhrK8chQPet-Gvz$xqbZJ!b3$h)!g^IT7QBafFzmE%9Hy_x3R!cVLmR7P`(U z5HO|dC4LaPome3jq~Go#%#_+7Lj;O|&p)|-nd^2>_NB4cGgtvNT_8kl7WaA=rgX*Z{4cj)@=mxty^3K3JefK4{vc|8f=s1AXpKD&(Fzi zzra^wgn+l`(sO}FGJqY_4k3J=5TgB#&Fs=&tC@U!mCX1d-g|PMK zF!#+iBWdHGjVH~!KrDR*jjpxHM&Rbd)+6h&rS^>2BXIU(#w?5$#-j(#sAERi;E1rh z418Pk7a;n&S=@tlb@titizF@?5fXA$qXR4~L zO7K&&oLg6a4=17jWTo*5*i$z^qqlwVUevf4w^Pbmy3qv;)0I{y4zcaV&SZrQ_uN39 z9UHo>f&o5Vq4e2mCw_gfkkJ(q2ICRgtXWuK_ zb2<)H8Fy#CVq)(j`yT5su}{2kw~`b_&KA3fy5ce0tQe_tqsq6|!#pZzl^5?b@zNH< z8ZSRAm^DiJn%KW&QLWOoH>XCWi`LD`raPJv`$ulqK5cDIeQ`q^d}NQN9#iZbPT$-e zJA@lS40U1o+@#wRGNB{oo?mt^g(;Pk{2~Rh;-d)w5YwEQZLf&CAv}VjHr2q+J~s^WDdnlKF>@slPUPsEN+=YR{j7jO|1R~8y?sW z-m_th@>eg`X--PA`QQJ-f1*MKFMduc)Y14gDF5c%CKFa-jo2$jq5Dv0g!1}<4aJ;? z#Typ0zOY3R%9MuFpVw+vsSrgqmy#&{!*9Q?)o#uYXUWj00==9$s;pVcTTfV(64{>C zrHdaeU%lSCeD3_E%OE|wej;PC%G9e6V+bzSaq4Ur$h$L8ix==h(<1hlj`bRIpAL`w z4|ShSPv6AU|NP-kf96Z59;s5^jg8%v@)@3NUv9tYN|Pyo|Y5s-@hmgec=%Wm*I()2cd(SQWV&^cV~>XEhEY!>iS4 zg>#nmJXQ#K2LD-c;G?i@IuStLoXKt6GSQb6@c^l^fiY*bsE*KA#=Zu6+mJG7ntEo< zw=mCO+9_AqK6a2fw##0!XA31(dAhH#AAN^a@Hb2JU&HJ+u4lgjTI447$S(E~^=Tz5}E7z#2vKe297T%h%yq0y% zOd8Z`KX#Tw-o4?6sl23Aj8bBN;wv#2{J(($4XZ{&BZsm^&M!NNw2$)pmw2HmaA$z| zS@SoCR)yL297-KLpK{<{y-TtxtrmULY9Vw@FcRoZmQVzJxP~%SbvECWiFOlnre$c{ zv`h$^*0vC{L8I+9KiIiw8m-P|@EvYSv7Kqy@mpe;orR6R? zGb=^JGDxeu6oudVP#_3kKjt+*rVsnrKJ_0oZjtsov!pJD9iUlhhI2i0X6F0OIg_?p zj)w0~@4d10hf|vN52_sfEL7gpG+o3fFs-XGof)w{G`faPWivLrCVs8h8dka$v{kxQ zR_)f<>+B?(|HA6p>@qvU&Z1OjSJ-)W0VRjI>E6O`;zQmV^<>Fn#N0d7$ik7_ zbU#F?l(Hx`eRuu#3Pf#&KKo&!NC~VzaBo1agz}+O)rYTGCWZ>=^0|x#IQvBwai#IW z77kR8gvdgnw)Fm;yQ_}V>(OGk6Ko3S(22L#4c5#YoW|~0aox@=_eca|ftcsDVoTt3kTkiF4U`c|-qr~6wqnPe;+Bsbj z8^W5qJi=8Iz?_d(9<1G2`(XLzgF6rJ++4Zq4x)t7>Oq<%l9uoF78eh9huI5+m-5O7 zSVYO8U3LJQhrn!D0(zm>3sM&LQ=gMSZ#U)JgE-w45E05?sY8&AhNF}dUSfY7#e0B_ zh!{j*cg`S7O7sK3aub7xDa&FYPt6hTZUo>1Ld28lnmb{# zJD30zmU3_@*oQAf$fdg*x&fj5h%6GX>O9)ml!Ww*Zb&RS|0MI#(Tj$kB&k${6>F$l z_ra0ziDfw)EeHKYy8op(-8gKaE2oyiA^;U8=w6uELSYe(iYgA#i&#-n*e^_QsBlX3 zdq6+>Lz(fUu-4Kf463BX^t16>N4iON>^&5e4bz7Dq^fIqH8-S@!|ov0_x0SArnGVk zr3y2j)Fw4qVOC!IQv1r-*J0l@*rv)F*C7YHvPmn#;McXHPI`Mv*|JDhp>VK;Q6@dG zgOetOjXip*tD&~y#V`;tZ=i2{jxc&a1O-JICU3i)YH{V|cv+gUhPNSr!(?hj-Mg`} zzUtlm-3Oekdr=v2oL0x?WX(Ap4X;r~>To(4wYU;cn83%^8oz+n=l$hJxPP(S=jo0w zp~&*BG$`+Rc>@5*`raUoS;&`f0?Rk1SSCAgyrHzy%Oc>>NDAD(S0Zw0v{xP=f#Ef* zD9$2l>V{$HRs0$FqrCdqcBN&`jZfc7!_=1^!NsmuNw3I~^M4~y&NGd#;tjG{()z5GRLb97eOJR;^}Klug3c6zW^tY{^D?_uUg(_S&7o}sXkYt2Qwf-y z#If;tWZmY7r(0nl6{<_TAWG0UT^4_xG6mZUM+CxI9++2mVf;qtI+?$OtmZK*@4_uB z$`|@uoNjG=?l!ru8J>@Tzs1kcQkN-HD8GVI_Y#Zz4ek%U*^H&|(*S-E5rW_@jh&L= z_Ug(lMTT=|EUb+uiXz+6wtMTarBA8PG7$AeC$|`!BAbJDTFIj-{-ZEeg>o}g1HNd=j2y+F{ z+NAnfmFepmQze4v_+DO-wxW?uL;#$PNiDBEtv}NyHq$5d+-9{*=Q6i)8#E|K)t`Wn zxijr?wj5CB@}wafd4o9{+P;bUnMo7%CTp$06Ux90ZsUul#2 z<4+zxg1*hXKBG!r{|NS4?dY%n?~XdH@oyg}pQ0Sy_s0U)-JcSm{bUajN)%^2bQk(? zt&8CR(W>k$9XO123I`*(2OgdzY6299J3JMB3N)TtC#M;#OoJg6%8bu~8We7+QLfmI zMk9m;3uB|RwAL|r2RA)+KRit(T27!v@wjMDV-|0Nr`bppwwNj~zO;g<8nI3Qzlp#> z#b}7FvxLFnoanYn&6W@lgt6dnLE=FhF*@X&z}&rzuow!LG#rQgz$1A9g<<@fQf_s| zO-?Kn8O((5 zF(yWCJhfQmnLaUD6;!BCEP%g~Tj;Mn!4vH9X*I&ehzhGuM^uiD_>mgnU__NQrX#Ay zM*K3@pTa9)1PHqY!s9vidl0m?RY!7=8c0o~7E*g3hJiKLHI(7CFgBBOD8Fp%3ACS7 zbL=B?9L)K%oJad9c+AuCj693-IV6(N*oQxTMxhqg=(H$yM4(g#53ucp84TIOj!{@> zlrXH}<1FGKD=dl)I@RaZ@)7mCqTWzcrI75oJ$petuT9@i&znb|PaU1F;9d%gtTdb; zET_hAQMO9i`;>ivZ0szm;FX?6V@NoBDDm6qE~+JMI@Y*-{B2Z9Bw;k|@X=6OhVzAu zAbR9;5f<&0X*hkKsvv^ru{7|!qU|#Vru5Y?E((Y0Wd*(f`;kLG(`r(JGK zX{2IXRZ`|am56PnzADruuUN*rs+VF#PTsOJ{{!Txs6fe4!H_DcvwT6eT^cIYs)BG; zI@D2VL7H`Rvge1@Hou3~!twCPAT#ZG+#rp~E)_L;MZ{5GSyN82Grvlix@!s$${U)U z7tsJ9a@O~R42P?qM)Js&P{7a2Q03R<6Yw1}S%R7@tvPSQ{#k~lJEk+|xXnxUMMt;( F`#+k$@Eo207#JpAW|Z^qydPehD6QI&gshRB$1OiagM6QIjZ9JA4!x`IbFrLoI)Zo zzu)(IW@i?lN|~*B^ZMP#_rBxzzTVc@SRsYaA3XEGg}ra2QvZ$*gP&16JeNvY!OvK! zluDIS%2Ia2TCz*F#kkXON{;-xC0BmarL_EJN*VmRjqFmcltbBcDX%i6g379#%70+1 zf*O(EQ8gyN;~zMs5j9>KRhvpVD-RHmNSD{pvx)wkWM0QU?&*sseRLJ&f2rDpb>I2C;3bqmHUa5!bN?go>9-L!r^Xdh4Qk_z#KNuLrxg?T^(gwX9xNue_Ho z?Rhb!URAU2rPS=IUAk9w)laB1D0QFuk~*ipfY|*iQg5g?5%biFdP{u~u?O;{{M-3= z^5^r8`BOiseo8H>I$GSTuBi*^B1-M^_p8Q5TmO!qx%r^KZ#8wx!b&=4dghO(yTO=gL*K1A!s$dTD{>1NK>Bno66T-xafPzZ-f=E zf^@IiYP7VkyjOj_RH-X(L08&~(}ltrzoM9bXucBohrEOHehuY{($b+tWFoCn3w>Sm z=H`aFoSXB$ckSoB`Bu2-RT>&amkUc3l&n`8^)9-?s8l_;=*=(7VEpDnz3EGxYHO+8 z2{~SjYpLC8`ps}g`;AIimrAwTVGf|+FV#a9da)k9)R~_$Gpo!ue2isoDVfQ9qgB1= zU0L+Cf2>gOyhC0S%Y@Dj|6VemGUp^>A^?M}4o(WpB51 z=(Rw4C{(F(Zw_$$$?la|Z@RraH+R^(f{LWj0@JV&m{%an5H6ynPJl^(yp9=S;q1&x zl4!h|ZY_E3Mx}0u*lSn9#lSn*Y=wonxrKV@1&ggKb8|%@35=>wCs!6*Xn1b!V4$mq zJr#sSkR-cnRu?PH1+FYs$&_kT=KV$?^sk0qFB2uT&ncoHjtB@UAqTW_kW+49k?J^P zAY|-{!l4oRndxmUmhY_Js7+N{%@CtUGeM{t_4(<-_sL(2RyZTQ?ZrZoNl-^98O}VMi2~!u`n!Q}KQY&Co-N`V06G9+LeIupp z$ofX=LzfKf&D0kmN>;3&v=r)@*cIzas_4vii^q=nI86H3v6HP%LxChiYy=3Q$6dV7 zpM7oCtAt+HKIRp(u}cOQryDJBsyNT0Wo~4geeLXNHdNZg3a9)kj=dzrPOu!ru79;2 z7HvI(xx|itwfa#?--Gz=#hEVzz7A&Qb?b5^4E^TJsaCZkM{EZ4w%D03*IEtm(3wGg zJEiIy63mg9397o@4uctzemQKF6TXC#5sz`q<=(s=LFWObt!+(MTdn<8?wb9H-p=7! zRg5{*LmR@)aq*Q6*@g(V0`mBl)hn@HhlxRi-33_UX&=VeH89Jy#igWw);5NCM+cOkNf0i4gv zK&;SOwKWdd&)S!;=d!xs6@_{LVG&lqZuL&oK$1OuB(|0|?m_+p_&YcJ*_TmFwqRGP zyRWy1<@se~z0;oJ|(Gn3i?Z_o0Effaa86t8^M62-gqeOsB0N`%#F* zR>~#~#&&H9-*%kl_6Hk>HL-C3FLHDf2wZE_8n<>>E)I%)&9yWSPwY}jiL=#?hKLFA z*lIPZT({nb=cT`q@8ALYsr_E6x0To=oSG!X;^rd7iLhZ89lb-jXH%gtJjHFje*w98 z#)iuC0Y0PLVU|-FY-o1&jt$J9Pj2AHTn5>|+zOij4+Q<#X8S&}^@GS>KQ~A?z=UZA z!^cM#k#+{(k2%jBsqUfjVkIbpS#+x4s`Ua+>-p)G zOo(m2=|mYEEu2-H=6-w!$s&vVfwaN0lAnvR`b3nC+$eJ*73K5`Q7%d|erYSxU?6=! z=4{VG0&$3OR{Kk>%g_j2p=EuTiJ3t353`8*fkhmZMNCdnX;W_*lLTo|pFj&jNU>Fk zExmDd$3#Kvov^o2Shz|~R+Q(;!=EcpIh?VArJrMiGuUr`# zVELlId;w*Il^t>PXF2M;EE;kHNJPUUST-1ob@>6sfUDNyJ>2(=OIpQu7xgiutimKV$2}jMGGows)HDqH+sWe+6ZPs=NCQ<*e`YfB^5G zBKpdLk* zN`YVDH?j@0C|adLwbCs6SKGj3qQEXq1* zsx+|!yH%vReh!cNF{VvyTo=u=qNx)Ux9GPR|008Rg`+PkDOgHlz@VytZA6{~nkr1} za!r!+F4?8Rx&o!CCzd9uRV9m=XjHPorF>$;`>JH*OVF7QN`3hvPh)bB5fyP$n&=(l zeoA@)SVg#TrRLG`m+=jrLqNGaPDWwn>~X}wDDYgs-&Qb=NqbyU$F1J)sGYM%tw}eB z63~!s&DLU zMB{RZa;by#)%!_25TPK?10T%790qJ!bh}T#N(vaj9D!O95}X2p6I7b@u--L5)G4Z+qbRrx2=P>tzv9_DYjN_TT$#13>jJ`9`61!QOHtEC<{i2UQL6u=B%=rWJi5%Zv+(1^`;KX*E)R@y%B)%iNWM-6nBuo^+nir0ge z1#bfqM*JK(~UZ? z<=6pgddF$u@`-jxe$-n!ER`6CRH80gdOU)T6~U?lzqEIxzG6f715T!#?MMeCBUAf6 z>9s?4`@L)52G9HwV)_^Y=od0C91%`Lzsukb;)03Stiq`N7sw4bs8Q>nr71zG_zr!T z`y9;9;2}i(R0&MHWGf5zHd}DZl2--XIk@T`wOLK#&LOR+j&F-I<+IgR+plZ}k9ghJ zQw<;ZGjQl7e+EZo>RGP`u|o^Ot_7{eWo#!m8i>8u^no7yt&z>3-bRIQGU6LeUbgHFiHh^lS{!Bh|C^DeaN&Ej#doyNeMGPiV2fFjnQDKt3HMUyHRUF&5295u~mH`FX%ns04k^6L0K$Z zgs%%A^**%Psji$PF{VKOkRqwFm?340dPbSHPpV9PGgWpYKrrVUw5{5^VCV2! z5zyjF8ul|F8QAEV$c5bwn`zau?UgKa)(im{XqIVch`IKoQMUc4%B))3Q!Du>ud*mr zfDT%KZI;r|Lsj<46m?SI)46B___VW9FuB2JU~%Om=#du2av^n->AChhXz?9bX6eYS zxssRBHRM}ONSO=H$|y$AM9h|Fm)fkpf|~e+?wHjph>u2D-~gM6eHRO}Mv0@=Cc03` zZkBNvaT0*kLM-W3=F|{$^9MUk(wQn2N8&7*THO!g0%?X29kAWeUYy0^JXv+T32yAb_3j77}0&5$!iR%4BkKxXA{zmooZc`(p70wWcfK}TL9xt zr-b<=a|7?nmCJR=*>d^cqkO=c8E7`0b3=rmw(^`7{I^v=@D)1b+H!}vi#6_|>}1Veut|BrBq-0l z1!9d{P~)D&`nL6ch`tP{Xn&N6z~60@0MEDd5$MKgq#pr|SiSU1Te;gGS+jsZe;WPi z_Zj>QgFlIY5)Fqq@QYdfH75RP2EWLFvxwcLN;}X$&*xuYuwg+_Te$E4MP9HK0g=(1 zs7q9tpfp``t?qcTVo72BXHc-1G5gGOtYxzaJeeK8!Tg*V3fI+)M`FWEtEFTHVpz7j zT#gInGA^4sz~RgBXt{i;Q)zHZ@RTi=RjXPq>p#U7KVYyv8Ho{>&|gLH`}hVlX>uvx zy@l}|k}BinJx|YefpqGBxUYG9vA0&R4g4jsc)>?t!vXhOP2GT2h!8+7c5^$F7za9{ za|_B1_Uxgx)J-SEt{aMT$IW{NlL>)bWuB_vOowK%Q03H`f7UNXmil43@i8M2lIZnL0`Adg?mpQH{b9&2hAtK^m?S~a$ z{&B_IyA%~sWNuoG+^Q_KAmds7qTdeXHkl6rZuq?#MCp8`dU1M4`{Z^}`y>V><|ReI z`H>zBj*YOe5il^nE?5+FYg)1&*#@Y|o_y)~*%wcrx*IMO7f#515%H)3_Fm7SBG4#Y zs=)0a0m9XQ9o;W2UEDEBjx@k11+0IUqXbfh1CFu1fx~{u=8=|DBqWR>9zc!Qx;jA7 zB<47OV!?01KGx4pVptzjAllael>#A027u<2@)SS15)4=n`Om z@D|K6BVgwnwneT^c5p+u(R+U6cJ!{t?z&bSjsA=M4oZxDT7Nsa{(p%j1W+AAEY8y7 zfv7fFYn+!S5nvQsBEYJYRlN@n55HTuMNc3?Bpr4J?vEq}#r~EJJ;zybu`!O`8adpq z_Rh#{;md7WrKx5cD+GH)KvMdFaFSus=+^K+7?49|=h{TyW#7;u3J#&nxJ_M)coP5YYdIK>hcHuMNtp4tHDMQA+s^zj zm(ky0lN9em>JxW}8;s?(f~$z|B8?>2v2+3IkE?7byC6h7PeM%so`7dy40zrZIe2zY zunv@Hy;rWo(#VIfM*a*gN9Zn+{m`yMky2Ef)B2doi@OOfjJBs#fo8NK5sy^%VpcB@ zF7nplg}J#0AN&$k%#{ZpoSTCUq<#1&UG_Z~fqt`5p?zq)z-GJvMkegk#6q2g#toye z6Sk&Ah8d@+y7mOI=($*VKHQj0i!^VwF(Yd*GL7V;bDD<_#3%gSU=X)qV$m2rIA32_ zfRQYn7va_ftBmm(U1?tI4aGYMU%dckBQPT9`qSPy94cd=!k4I4U!Z|(x?F5E#cdBh zbe{^N7;_C;yBR}igdLtWvo4LW_35yb<7Cc4ikIy6)zUa*`jY_@uSV(tiWlLyW~i0?AfLmK;G@U@ECL?mwUh z?hqdi0APvwu?58Jz%qn@lY3r3VhPS^F*WhnNOwPX#s`=i7{|P7t0Q)E;;|@pGd$1+ zuAanij!uR%Jy)!m#K@o4m{iIztyRXz4}b~v`A57Hellpkr^blajCe){C??$F=U8tC z1L0`vZ0&C%?O84w*^>*XivTiB1kIcwfNN-(sCb1~D7gv|IGAcDB>osZP_cLuvUppe zSmQbXeh^%wUEtV^^J>>4FsvUm9;+~T#Vexw^1z}UlO<^qKT6y?55lvnf`lRb2o9%r zUAE5*?J?yz*-}8Z#FZ1J4eEjMurPa@sBR(wWxZ$DCBnvqqsH`kE*x96uR%vBici7- zhc*`|14?C2b311?g*Z4oB5;?{T#mQhy|(OfZx?T&!d-XqyU2KngW*}*X43`cKCp57 z6M*~7i|tp=eD<8c1@zym)nNY{#s~MfzEGjFVmw0Sy(}yZcj3lwwyYpt7WJ>ArP~EC zt^~f(zkvk(n+$#nL6z23??X#7v81iQ!_cUI$jt%n%5Rt_-VfmJ;0DzCHCix4VVsL@ zdshg-y#}|Xl|tyyE`hH^Zq>cvs8QNIv}CTk*VDBOt*iG&fzgc6N8=qJEGuKU6|`{| zII40G81F>mp?lE{PQy|g1sasO=pxMy-T}Ikml_i*n^z{IiKH}c0dbR;uetBIrY*Dt zoF{2bMPopZoaE_ZG|nd=Oq<$T(l7^A_#nNq#XJS?CB45l(ECpVAKIcuP7qVtLVF8{ zkNg(LD&Gc?o)ITY?_h$Tio4=rz?NYJ%)alrEXj~w02hg)(S^B5OH2E5VF6E3T3#(r z>B^Nvdd{bLq;kxA#Crw1#1D9V zOzz)hmgMh{9HZRb8r710SnuclwtcJHaJd~-ttn)9CP4Or@=`Fo6OK{}- zA6}#IE1LUTpz;Ve(_v~Yx0YYEn+|_#wz)*6OVtfrF5ZB3$v-$lIXA#E`f+O@4u3rS z;d1eY4aQ(!9|gn#{Slx_4iJZ76<`$CGuOwiXKOiO7r7hhHGpr>Kyz@c%dMsX#n>vp z69OLZQoN=L&>lxZKr2WY+evM6JbBtAPZJ-e)n=qj4(0P{J%0=Gw+`j=$>iVjVS2$? zftx1)AomtnhW_Ockj?pfp(VR&E5`73wGE7AJ2YSi5Ydjv#Ta%%)qt}^(#EbRjkMh# z3Mh2xhSWd2l39c6(9V?puGHJ3?!B2cDYxKUt?r95QU|h^;FFa6mjfjwWuWB!A4&Jn;AS?;uOU6cMVUIt)LWL> zIfTWXsT!|v56x<7azd3bOVffx!G#oxW-w%?kQ^zmge3n5B1F;7Yq?Jt0bVDn&ES0k zZ@VQOn}w1H?U+_rh?o-j6_(n4>QeNKYdhEpjZKaPOmwD#v0fp`jWwtC-IuC zR%7WDHfsDyK%cl<5NN8nY2)3(0GdPiH1L z1g@V}^0+>7*D@D@*+m6t!BBs&;k;rR(Pe_ma;hEKau+}|oTsq)=?^oq!Fj>Zj^pHl zSD~z&eO!K>g3`pRhLcfVS{q$+p$@TqlxV+7ja%h#YDU3b@GJbT*H`{SjKNAZclCO| zqi^8>ICs3XSO(u5=o_c5AcU1m2x|<)2QC7I%~^G!*TW8nlVR8O@DYL0&G#fY-p_b@ zZ`a;?9_AsnM7iA(Ul60C;#OYie~1jw)mNGSCm0BYk)-rB1jVs9P3asM^2$&{fkv~2 zEKm^dqxC;#SzhGpa}53o10lF7VxX%IUvifzJF>=8nv_u9mBfN)Uj| zevdW(Ig<^UN%M)t+AH(-Smru|b?Yj|<8@M_hX^lSCgGgpS{~?d9L_m=@qC}nIDP*9 z|7g<8kY~><6yB&nM~3rtcW}*RV5j_Aa@P&0yK&5{Hcggp?l3N5@uJW%O~USOIxO$` z9otuv&++c=p*DI(Dc=@EGdWh-HB=(8R+9oe9NqoHZ6+N~@m)8(6jxb3_P7VPm3!`* zE^ZLrv7s)yBQLbB(ieoo?(Xi@yHZ!-;<2OctA}N9O=vZ@t>azMRAdq0BXU?@;fP!` zaJ-xCHL>}QCc5M7u~7%|K@Q_pxv0GljYtPyF(PSA_mF>sd~ZPhfg$ch8IZWa{_A%$ zA%6|+3+UcG@?&zO{)IsoFS+$zW$b2*hNmy1$F746;msiC?;ye4zh_Rq_R4E#&g#E~ zlsI#_(&&J)zsYQ#DZDF1=YxX&U8V?Va0{^#+-R^4u^oiCdB(<>dtSG$t_HCcXoWH# zWlG~vDFRKVTrIY8zs> zJtmyj#G(bC$MaS@^GU{`xy!!-eBE}9Piz_G5pz5oGxrCh8QvNn#DUAV<6BDP;v^fAVB zhKCVa(cfi2EGo{w`utnv)2CiMeb%`6c7jNzj|VkZ?MAk ze`1mU%-}yV=rH&`ga5*SLyGP2Xvt|d8TdzWfeh=?(B^+&5n1T*;pI5P6(?#vzKsCU zBv{rCd;DdrGf?3iR?>FpbI8BU|JjpZZ~6~Wp^9(+Lryb-?@6LCZ=p@R69^PdS?^mb z5S$mN(BqDkD*XEpA(E1j6zEm`l$@kMYwD-qb#a&qqrU{L=Y1!sH;x<73AMTZJSh%M z_o!|CTHC`kUmsVy`YF4Sl2UsHQtp+M`vy|(mlSUxtfAg_ntS0Vsp~ z7NFn3F)jpBfz(la$M7A;cN4x7_-@8`65lOq-}`wOL%5^C)7A_3031&N%lO5}fq=(r z?CPQS@p`>Ug-}0mA{A~6x8H=Ja0@yl1n)rz-XdQbg=dF(#So52-OSV!eNE4G5Aan| zntwFlnKbz7_P~qbK&Ie2f2jkY2yX_4%TV6nBid-QCHj(d9~x|kDgF1oduV{XVBSqn zO~Ku+yYEc`6zmx95W}a7-LG>n^Dc9DWQuP~52ezpaJlwWS={Q?+YdS%qu1P zg>z8u@usSIjSPpeRmW??-SMfZv;Gc)yjg7V`pd*dLcq@{YKh?u50er*Kqqc<~%BaW@x8^CT2_t${zK ziaX;R?u6j@hSvwjpVtfWw(skF)cPh0#G`&S$7vf8mLy-{40@babny9WI~*^j8C5(syP&rrSj zdzfEv47pTG`6u4uNu1B{B!^?Q4Ouz-SNP*Sqqryl%&dW)@P*(q+bfsxf*QHISp;4v zXxd3gm752~#ofxNMR%m`y=z%%A!F7}T6nbAu4zl;>W|1zM$o@fH?^K;Um5d85BrtZ zdbGr3+%lGod1pXUzsN#&WY+hfA=nA#bh?EhM_hZ=+apOE8O1F;P5Z)#X&lm_-F)4Woxh zOl%(+@vk%E?=$#~eoSr%B#qYuCRf&DaO5(^hs=?Ji&(9LS5)B?rfpXF0R!G`=miFo z2zY53F5-VC7;5`(O#kl;{xySdA}DPVrKpU$qVz^WBvmS_A_{d!Epx9 zG59QlHyO+`@EOz@L=3Jm_zHusGWZSys&H~yroYRW9BUp6{UIN>Fjzl*>qjtm!g#^) z@+}qA5V)z)tvOhO{DTUc#+7B7@yrhxFJuaZf-8TB6gJ^`vT(RCmMic*vCIz&+41~% dx-e7l3fl?=dz1%v{crS-3qO_H2RF%2{vS$4-#q{T diff --git a/scripts/cb_engine.py b/scripts/cb_engine.py index 37c45f8..842fb69 100644 --- a/scripts/cb_engine.py +++ b/scripts/cb_engine.py @@ -19,10 +19,22 @@ from __future__ import annotations import argparse +import json +import shutil from pathlib import Path -_BASE_LOG = "/tmp/cb-base.log" -_HEAD_LOG = "/tmp/cb-head.log" + +def _log_path(out: str, name: str) -> str: + return str(Path(out) / name) + + +def _clear_dir(path: Path) -> None: + path.mkdir(parents=True, exist_ok=True) + for child in path.iterdir(): + if child.is_dir() and not child.is_symlink(): + shutil.rmtree(child) + else: + child.unlink() def run_base(repo: str, out: str, name: str, run_id: str, depth: int, source_sha: str) -> None: @@ -33,8 +45,8 @@ def run_base(repo: str, out: str, name: str, run_id: str, depth: int, source_sha repo_path=Path(repo), output_dir=Path(out), run_id=run_id, - log_path=_BASE_LOG, - depth_level=int(depth), + log_path=_log_path(out, "cb-base.log"), + depth_level=depth, source_sha=source_sha, ) print(f"Base analysis written: {res}") @@ -50,30 +62,60 @@ def run_head(repo: str, out: str, name: str, run_id: str, depth: int, base_ref: output_dir=Path(out), project_name=name, run_id=run_id, - log_path=_HEAD_LOG, + log_path=_log_path(out, "cb-head.log"), base_ref=base_ref, target_ref=target_ref, source_sha=source_sha, ) except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: print(f"Incremental unavailable ({exc}); running full analysis on head.") - for p in Path(out).glob("*"): - if p.is_file(): - p.unlink() + _clear_dir(Path(out)) res = run_full( repo_name=name, repo_path=Path(repo), output_dir=Path(out), run_id=run_id, - log_path=_HEAD_LOG, - depth_level=int(depth), + log_path=_log_path(out, "cb-head.log"), + depth_level=depth, source_sha=source_sha, ) print(f"Head analysis written: {res}") +def _count_report_issues(report: dict) -> int: + issues = 0 + if not isinstance(report, dict): + raise ValueError("health report root is not an object") + for cs in report.get("check_summaries") or []: + if not isinstance(cs, dict): + continue + for fg in cs.get("finding_groups") or []: + if not isinstance(fg, dict): + continue + if fg.get("severity") in ("warning", "critical"): + entities = fg.get("entities") or [] + issues += len(entities) if isinstance(entities, list) else 0 + return issues + + +def _count_health_report(artifact_dir: str) -> int | None: + report_path = Path(artifact_dir) / "health" / "health_report.json" + if not report_path.is_file(): + return None + try: + return _count_report_issues(json.loads(report_path.read_text(encoding="utf-8"))) + except (OSError, json.JSONDecodeError, ValueError) as exc: + print(f"Health report unreadable ({exc}); falling back to health runner.") + return None + + def run_health(artifact_dir: str, repo: str, name: str) -> int: """Return the WARNING/CRITICAL finding count; 0 on any failure (best-effort).""" + report_count = _count_health_report(artifact_dir) + if report_count is not None: + print(f"Architecture issues found in health report: {report_count}") + return report_count + try: from health.models import Severity from health.runner import run_health_checks @@ -104,12 +146,14 @@ def main(argv=None) -> int: sub = p.add_subparsers(dest="cmd", required=True) b = sub.add_parser("base") - for a in ("--repo", "--out", "--name", "--run-id", "--depth", "--source-sha"): + for a in ("--repo", "--out", "--name", "--run-id", "--source-sha"): b.add_argument(a, required=True) + b.add_argument("--depth", required=True, type=int, choices=range(1, 4)) h = sub.add_parser("head") - for a in ("--repo", "--out", "--name", "--run-id", "--depth", "--base-ref", "--target-ref", "--source-sha"): + for a in ("--repo", "--out", "--name", "--run-id", "--base-ref", "--target-ref", "--source-sha"): h.add_argument(a, required=True) + h.add_argument("--depth", required=True, type=int, choices=range(1, 4)) hc = sub.add_parser("health") for a in ("--artifact-dir", "--repo", "--name", "--issues-out"): diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py index fb4b354..87838b5 100644 --- a/scripts/diff_to_mermaid.py +++ b/scripts/diff_to_mermaid.py @@ -21,6 +21,7 @@ from __future__ import annotations import argparse +from collections import defaultdict import json import re import sys @@ -110,23 +111,61 @@ def _rel_key(r: dict) -> tuple: def _diff_relations(base_rels: list, current_rels: list) -> list: - base_edges = {_rel_key(r): r for r in (base_rels or [])} - current_edges = {_rel_key(r): r for r in (current_rels or [])} + base_by_endpoint: dict = defaultdict(list) + current_by_endpoint: dict = defaultdict(list) + for rel in base_rels or []: + base_by_endpoint[_rel_key(rel)].append(rel) + for rel in current_rels or []: + current_by_endpoint[_rel_key(rel)].append(rel) + result: list = [] - for key, rel in current_edges.items(): - if key not in base_edges: - status = "added" - elif (base_edges[key].get("relation") or "") != (rel.get("relation") or ""): - status = "modified" + keys = list(current_by_endpoint) + keys.extend(k for k in base_by_endpoint if k not in current_by_endpoint) + for key in keys: + base_group = base_by_endpoint.get(key, []) + current_group = current_by_endpoint.get(key, []) + if not base_group: + result.extend({**rel, "diff_status": "added"} for rel in current_group) + continue + if not current_group: + result.extend({**rel, "diff_status": "deleted"} for rel in base_group) + continue + + if len(base_group) == 1 and len(current_group) == 1: + status = "unchanged" if (base_group[0].get("relation") or "") == (current_group[0].get("relation") or "") else "modified" + result.append({**current_group[0], "diff_status": status}) + continue + + unmatched_base = list(base_group) + unmatched_current = [] + for rel in current_group: + label = rel.get("relation") or "" + match_idx = next((i for i, b in enumerate(unmatched_base) if (b.get("relation") or "") == label), None) + if match_idx is None: + unmatched_current.append(rel) + else: + unmatched_base.pop(match_idx) + result.append({**rel, "diff_status": "unchanged"}) + + if len(unmatched_base) == 1 and len(unmatched_current) == 1: + result.append({**unmatched_current[0], "diff_status": "modified"}) else: - status = "unchanged" - result.append({**rel, "diff_status": status}) - for key, rel in base_edges.items(): - if key not in current_edges: - result.append({**rel, "diff_status": "deleted"}) + result.extend({**rel, "diff_status": "added"} for rel in unmatched_current) + result.extend({**rel, "diff_status": "deleted"} for rel in unmatched_base) return result +def _has_changes(components: list, relations: list) -> bool: + if any(r.get("diff_status") in CHANGED for r in relations or []): + return True + for comp in components or []: + if comp.get("diff_status") in CHANGED: + return True + if _has_changes(comp.get("components") or [], comp.get("components_relations") or []): + return True + return False + + def _diff_components(base_components: list, current_components: list) -> list: base = base_components or [] current = current_components or [] @@ -157,6 +196,12 @@ def _diff_components(base_components: list, current_components: list) -> list: if base_sub_rels or current_sub_rels: annotated["components_relations"] = _diff_relations(base_sub_rels, current_sub_rels) + if diff_status == "unchanged" and _has_changes( + annotated.get("components") or [], + annotated.get("components_relations") or [], + ): + annotated["display_status"] = "modified" + result.append(annotated) for comp in base: @@ -214,6 +259,10 @@ def _truncate(text: str, limit: int = _EDGE_LABEL_MAX) -> str: return text if len(text) <= limit else text[: limit - 1].rstrip() + "โ€ฆ" +def _display_status(comp: dict, force: str | None = None) -> str: + return force or comp.get("display_status") or comp.get("diff_status", "unchanged") + + class _Scope: """Per-level name/id -> mermaid key resolver for one nesting level. @@ -232,7 +281,7 @@ def __init__(self, components: list, used: set, force: str | None = None): self.del_by_id: dict = {} self.del_by_name: dict = {} for comp in components: - status = force or comp.get("diff_status", "unchanged") + status = _display_status(comp, force) present = status != "deleted" cid, cname = _comp_id(comp), _comp_name(comp) base = ("n_" if present else "del_") + _sanitize(cname or cid or "node") @@ -262,12 +311,18 @@ def resolve(self, rid: str, rname: str, present: bool) -> str | None: def _filter_changed(components: list, relations: list) -> tuple: - """Keep changed components, the endpoints of changed edges, and edges among the kept โ€” the size fallback.""" + """Keep changed components, changed-edge endpoints, ancestors, and edges among the kept.""" changed_rels = [r for r in relations if r.get("diff_status") in CHANGED] keep_ids: set = set() keep_names: set = set() + filtered_children: dict[int, tuple] = {} for c in components: - if c.get("diff_status") in CHANGED: + child_components, child_relations = _filter_changed( + c.get("components") or [], + c.get("components_relations") or [], + ) + filtered_children[id(c)] = (child_components, child_relations) + if _display_status(c) in CHANGED or child_components or child_relations: keep_ids.add(_comp_id(c)) keep_names.add(_comp_name(c)) for r in changed_rels: # so a changed edge between two unchanged nodes still draws its endpoints @@ -276,7 +331,15 @@ def _filter_changed(components: list, relations: list) -> tuple: keep_ids.discard("") keep_names.discard("") - kept = [c for c in components if (_comp_id(c) and _comp_id(c) in keep_ids) or (_comp_name(c) and _comp_name(c) in keep_names)] + kept = [] + for c in components: + if not ((_comp_id(c) and _comp_id(c) in keep_ids) or (_comp_name(c) and _comp_name(c) in keep_names)): + continue + child_components, child_relations = filtered_children[id(c)] + status = _display_status(c) + if child_components or child_relations or status == "modified": + c = {**c, "components": child_components, "components_relations": child_relations} + kept.append(c) kept_ids = {_comp_id(c) for c in kept if _comp_id(c)} kept_names = {_comp_name(c) for c in kept if _comp_name(c)} @@ -327,12 +390,10 @@ def _count_changed_components(components: list) -> int: def _has_changed_relations(components: list, relations: list) -> bool: """Recursively: is any relation (at any nesting level) added/modified/deleted?""" - if any(r.get("diff_status") in CHANGED for r in relations or []): - return True - for c in components or []: - if _has_changed_relations(c.get("components") or [], c.get("components_relations") or []): - return True - return False + return _has_changes([], relations) or any( + _has_changed_relations(c.get("components") or [], c.get("components_relations") or []) + for c in components or [] + ) def render_mermaid( @@ -439,12 +500,14 @@ def emit_level(comps, rels, indent, force, level): return "\n".join(head + body + style + ["```"]), counters["nodes"], counters["edges"] text, n_nodes, n_edges = build(changed_only) - truncated = changed_only + rendered_changed_only = changed_only + truncated = False # Degrade an oversized full graph to changed-only before giving up (GitHub caps). if text is not None and (n_edges > MAX_EDGES or len(text) > MAX_TEXT) and not changed_only: t2, nn2, ne2 = build(True) if t2 is not None: text, n_nodes, n_edges, truncated = t2, nn2, ne2, True + rendered_changed_only = True meta = { "n_changed": n_changed, @@ -452,6 +515,8 @@ def emit_level(comps, rels, indent, force, level): "n_nodes": n_nodes if text is not None else 0, "n_edges": n_edges if text is not None else 0, "truncated": bool(truncated or text is None), + "changed_only": bool(rendered_changed_only), + "requested_changed_only": bool(changed_only), } if text is None or n_edges > MAX_EDGES or len(text) > MAX_TEXT: # never trip GitHub's red error box meta["truncated"] = True diff --git a/scripts/run_local.sh b/scripts/run_local.sh index ecb66f7..234fde3 100755 --- a/scripts/run_local.sh +++ b/scripts/run_local.sh @@ -25,10 +25,10 @@ ENGINE="${ENGINE:-$ACTION_DIR/../CodeBoarding}" OUT="$ACTION_DIR/.cb-local" DEPTH="1" DIRECTION="LR" -CHANGED_ONLY="" -NO_EDGE_LABELS="" -RENDER_DEPTH="" -EXTRA="" +CHANGED_ONLY=() +NO_EDGE_LABELS=() +RENDER_DEPTH=() +EXTRA=() OPEN="auto" REPO="" BASE_REF="" HEAD_REF="" BASE_JSON="" HEAD_JSON="" AGENT_MODEL="${AGENT_MODEL:-openrouter/anthropic/claude-sonnet-4}" @@ -45,10 +45,10 @@ while [ $# -gt 0 ]; do --out) OUT="$2"; shift 2;; --depth) DEPTH="$2"; shift 2;; --direction) DIRECTION="$2"; shift 2;; - --changed-only) CHANGED_ONLY="--changed-only"; shift;; - --no-edge-labels) NO_EDGE_LABELS="--no-edge-labels"; shift;; - --render-depth) RENDER_DEPTH="--render-depth $2"; shift 2;; - --extra) EXTRA="$2"; shift 2;; # raw args forwarded to diff_to_mermaid.py, e.g. --extra "--font-size 20 --node-padding 16" + --changed-only) CHANGED_ONLY=(--changed-only); shift;; + --no-edge-labels) NO_EDGE_LABELS=(--no-edge-labels); shift;; + --render-depth) RENDER_DEPTH=(--render-depth "$2"); shift 2;; + --extra) read -r -a EXTRA <<< "$2"; shift 2;; # raw args forwarded to diff_to_mermaid.py, e.g. --extra "--font-size 20 --node-padding 16" --no-open) OPEN="no"; shift;; -h|--help) sed -n '2,30p' "${BASH_SOURCE[0]}"; exit 0;; *) echo "Unknown arg: $1" >&2; exit 2;; @@ -57,7 +57,7 @@ done mkdir -p "$OUT" -run_engine() { # $1 = uv-runnable python source +run_engine() { ( cd "$ENGINE" && \ STATIC_ANALYSIS_CONFIG="$ENGINE/static_analysis_config.yml" \ PROJECT_ROOT="$ENGINE" \ @@ -67,7 +67,7 @@ run_engine() { # $1 = uv-runnable python source OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-}" \ AGENT_MODEL="$AGENT_MODEL" \ PARSING_MODEL="$PARSING_MODEL" \ - uv run python -c "$1" ) + uv run python "$ACTION_DIR/scripts/cb_engine.py" "$@" ) } if [ -n "$BASE_JSON" ] && [ -n "$HEAD_JSON" ]; then @@ -85,39 +85,38 @@ else echo "== Resolving base analysis at $BASE_REF ==" if git -C "$REPO" show "$BASE_REF:.codeboarding/analysis.json" > "$BASE_DIR/analysis.json" 2>/dev/null; then - git -C "$REPO" show "$BASE_REF:.codeboarding/static_analysis.pkl" > "$BASE_DIR/static_analysis.pkl" 2>/dev/null \ - && echo " using committed baseline (+ static_analysis.pkl)" || { rm -f "$BASE_DIR/static_analysis.pkl"; echo " using committed baseline"; } + echo " using committed baseline" else rm -f "$BASE_DIR/analysis.json" echo " no committed baseline; running FULL analysis on base (LLM)..." - BASE_SRC="$OUT/base-src"; rm -rf "$BASE_SRC" + BASE_SRC="$OUT/base-src" + git -C "$REPO" worktree remove --force "$BASE_SRC" 2>/dev/null || true + git -C "$REPO" worktree prune + rm -rf "$BASE_SRC" git -C "$REPO" worktree add --detach "$BASE_SRC" "$BASE_REF" >/dev/null - run_engine " -from pathlib import Path -from codeboarding_workflows.analysis import run_full -print(run_full(repo_name='$(basename "$REPO")', repo_path=Path('$BASE_SRC'), output_dir=Path('$BASE_DIR'), - run_id='local-base', log_path='/tmp/cb-local-base.log', depth_level=int('$DEPTH'), source_sha='$BASE_REF')) -" + run_engine base \ + --repo "$BASE_SRC" \ + --out "$BASE_DIR" \ + --name "$(basename "$REPO")" \ + --run-id local-base \ + --depth "$DEPTH" \ + --source-sha "$BASE_REF" git -C "$REPO" worktree remove --force "$BASE_SRC" >/dev/null 2>&1 || true + [ -f "$BASE_DIR/analysis.json" ] || { echo "Base full analysis ran but analysis.json is missing." >&2; exit 1; } fi echo "== Analyzing head at $HEAD_REF (incremental from base) ==" cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true - run_engine " -from pathlib import Path -from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError -from diagram_analysis.exceptions import IncrementalCacheMissingError -repo=Path('$REPO'); out=Path('$HEAD_DIR'); name='$(basename "$REPO")' -try: - print(run_incremental(repo_path=repo, output_dir=out, project_name=name, run_id='local-head', - log_path='/tmp/cb-local-head.log', base_ref='$BASE_REF', target_ref='$HEAD_REF', source_sha='$HEAD_REF')) -except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: - print(f'Incremental unavailable ({exc}); full analysis on head.') - for p in out.glob('*'): - if p.is_file(): p.unlink() - print(run_full(repo_name=name, repo_path=repo, output_dir=out, run_id='local-head', - log_path='/tmp/cb-local-head.log', depth_level=int('$DEPTH'), source_sha='$HEAD_REF')) -" + run_engine head \ + --repo "$REPO" \ + --out "$HEAD_DIR" \ + --name "$(basename "$REPO")" \ + --run-id local-head \ + --depth "$DEPTH" \ + --base-ref "$BASE_REF" \ + --target-ref "$HEAD_REF" \ + --source-sha "$HEAD_REF" + [ -f "$HEAD_DIR/analysis.json" ] || { echo "Head analysis ran but analysis.json is missing." >&2; exit 1; } BASE_ANALYSIS="$BASE_DIR/analysis.json" HEAD_ANALYSIS="$HEAD_DIR/analysis.json" fi @@ -125,7 +124,8 @@ fi echo "== Diff -> Mermaid ==" META="$(python3 "$ACTION_DIR/scripts/diff_to_mermaid.py" \ --base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" \ - --out "$OUT/diagram.md" --direction "$DIRECTION" $CHANGED_ONLY $NO_EDGE_LABELS $RENDER_DEPTH $EXTRA)" + --out "$OUT/diagram.md" --direction "$DIRECTION" \ + "${CHANGED_ONLY[@]}" "${NO_EDGE_LABELS[@]}" "${RENDER_DEPTH[@]}" "${EXTRA[@]}")" echo " $META" # Browser preview: render the (fence-stripped) mermaid via mermaid.js, strict mode diff --git a/tests/__pycache__/test_build_cta.cpython-310.pyc b/tests/__pycache__/test_build_cta.cpython-310.pyc deleted file mode 100644 index 4dfa007a3d2283c97439c8cff78fb84c457c1de2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3871 zcmb7H&r{n*6xK?zEgM4!p=kv>Tl?FL)!N8=$a=p5d#D#3LibB?7ag2L!mb(3gjB$ge~H>n8RE+yX_EXOo06?%L5dJ^y~`=)q)PfV;vD$BcSErhvN(!^Ki%s5L?!c zMp-%A)-Ghnt4l)8o z0%g|aqEp?^%@`X521g8|7)60$>T+RJ8(|)$NgQo{4ENuo%@;}Q$^e_Y9v^6=I0rh) z+K^gP$V8-Bb3V-$^OUv)11Zjf7-&h?!+n35)8WGV)nKTfI-47+nQZ4D>7Aq z;S%V-!R!K#1_F0p1Gku{m3FY{5)`8P6c%atN~Cpt0DOIms^whjQ5pwzPH9@8w4ShL zgySNmn3?8|7(_Si5$IFc68<3~uL^D5{I2-?;c5Nrq-3SFof)+y6WHMLTbq(r1-7MO4ZR*N$r z()=L}>FPW-uQjz(U`n+~7X!)ZX2>HZX`BG}A$4Ctb(0`k)7?g!qC8<;k5^bH2)p*` z8yF~SRku3rk-cppoBf_en%{9)ZcBRyW0wKNAEsWuDoU_cTt=box(cIr4v0PCXG1BQ zT(%}}l>$*)39%0~1uvOtXhhAJ`REY`B2qGo#Rw}P&7+yFWuKOUWHw z{qHcU;tLS1$(w^QZw)skpWaBsnrs9$-nyMMcsw1()24GgNH$_F=0N)t_Q^SX!&F1e z-I+4?=L~hVuA`QEAlA5QM4)Hjy8ScH;02{HF8%VngpFHVM>}tz=#YT!OP6&JqcC2R z5U?2Rpg|0~8qLF+S-*r;5<9~LgwXsUkTd;(JW0vmFg^s$yXB#{qd$0|}hbMsZ_>$F#*oLQVB) zSlfNS-3D%gF1;7R;?U*spdZ!$rExIK3-GR9n2~R`ZQMH@*2^h1qMXH!myBrTp$esT_aO>m}oG>hNyK*Y;U=!1nOxfye?z_-&w*t?lGL|HJGX}2XHahS7pc4j-Yg8TJD0?$>&47z&+K7_QDGdxon zkb2nF%8ub<5~O7DXf2m@3MvtVP>R6FQ6N`n7@MXRVnY-4APQTY-3L_~sBtoGGVj~| z%=-{U)pB0o7E|D6Mtp_>DPK&Z!2A}sQGAEuE{YEQ=#k!oT!I^jheQwCdytIhdD&uC z;62>%)B|vYmdDc^r7Wqze#eoUQecpLuL&P$=)0e5z^7FdE}Cy47q8N67~{4bo>u}j z;l~lBeokMN^LRBlYf?&Deb`0sU#= YTAmGCDuDP3o}IDW*ob$nNZfz_1H10Np8x;= diff --git a/tests/__pycache__/test_cb_engine.cpython-310.pyc b/tests/__pycache__/test_cb_engine.cpython-310.pyc deleted file mode 100644 index 35ad851ec8fddbab05030bf511192b24cacb4320..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6716 zcmb7I&2!tv6~_V~2vQU!D~jXTY0RW)J9HAMiQA;pdR*D^N8Bb%YT0cE;}nI6r34Bj z=v`2@6l&5bGj-CL+;Zwk>dpT~|BOAhC!cfEbf*2i1wTYePHb@Sh~335-oD@a++=pP zXyE$%%4^NrFBryusB!di&{)AO{{<2no)NOpjG4#uzv-F!-|{T|Z+kZWtvJ_mJck*a zHS_y!$asM}yx3>^GY3Yq)GUSeIit@GOqQ)S>&k!(OV2>&^yEJ=Bd8rEug0q&Z1|QFQEMb+U0N#?K$0Emur;a=E_l^Y%v6=1ZNO%D?~eO=X*lsIgOt(n>vu zV_8Wzc_n+Ja^;g-m3krsucz_OTg9%7+8Y&_cGoMdB<#jqR%B9XCl$eicsOU9x3?>i zth6}~dARfhtQfT_Q}!o8y6NQyY*Y*CbUg|AdJ>2btNITTvDJu^2lDXz$^A<~JBW8= zB-MNv1sfu0`NN*WbJIOb{9&DU(kN+5g^a-VURwj-XD`UDo=gsee?ll@3W_E$)uX+e%=&9m2&q*b-W9A+pd=yZ5H6sOP-^B`4A zEYJW86x-4Q$Ov&oJd*f)|14V41t~Fy6>*RF%*Lyxn8j$Vnxi@Gpv8S(6@5Q50z8*| z|9&@!vk~zU-WBJFki?G=`l={_P`@O%;~GU)vUT5tFmf29`?vJsdmt%^!Z=`iW{>Tg z2gWty)8ajI&)Tz7t7rDCp53>YalgN{ z)^Sf@k7iIZ-6MPlXL)PrV{Pxy|^h4#I@u>`vTt0;K6?E|LvAF zOH0Gy8q|%r5I-P7<}Q|roP_q97$8x6OD5n#!36aj6O4t<72I+OWF(kkXoi+{9(I`1 z&O-+dFEXY0F9lJhCO2 zc;WgmNNQ`3zQY;7nD6OvYdG~Z<&_KT>MPt{MU6HR>CuR-`y8a6zqgUpdFFFUZn;94g%dZ_9!YaI0N=I zB1Q_wK;{#|bSU3CK(2=PVW-gUrqhS+lFOjyu<)`0w~Dcz85UoLCFl`)H;3xq+LckPi>0J)j1?PT2P0%Wx+eip}m}RQ_pGbP;1u}QTX;c)L5~1JttVN zzw_?JCu9ZbfJII>UOo(J=(~tQ!kDrh?^ola+iaPh@W>9rCa^kX{KZ?Sh zRtx>5nLr}ur`IV)DJ$mfELvJJ6~h2&iiURa)7YI!;aYrJMaN3%4|VMtz9z|$5^{@( z?d1Cx^}YL}2?U?E!ZAcCf@nm)!~-Epw`uNC&?96K`hYFuJdG{fp?ly?;aeCY1jJ2^ zcyZXHQ75Evwml2N70GtJOnw@34KRFJ}iSs9*A_<{xN4SjK^I&mv&<5C&j+HXJ_l z%@iP!`Qh10?fu+=fuMnKK?Zcr*hex<$z1C07eWMy%rGgIg@plp#dM};H%oYb2v(8W z8mOaYH+y8&@&WRqf$^2e_$ay9EmGv*OHG-ycg@fCgvRY7jEEYDVi(0H)c75d5oF>z znor2PROhrkWmt{SS4a_BokkRMq)J;MH3XTXDn1a%r~H&ZOf{(DHmy5}ry7{y4MZ}o z9R4suX;yVI&#Jv>9=@N#gkru%Tv_lO)39F{!``5J4aQmD^3y~|06%GG<%!?II)4Nc zlB7|FXU#n9f>sBu43n|;*;hr@U4s@+WkBS8#D_%IhO3-9>k% zRM7o(^iNzQ+ZEjM4UloFFw7SzZ$U+sRaUt$5AW-$BI)=Cy5y_V{NY8xk^�EAN4f zGoik@XO8nAw5$}hI#k)w-UdG6k1KR!$p_3JnZT0uUQ|u#!?{CsDV<{M}6vLu~Q_;w9V*>Y>Sp6g|OM~#b;;h z^px~J@5UVto(t*d=IHfOQ zApb1+yA%hl^)R?BXb~-ca21z1C$gdb24pnz3zxw7^a7K@eN33B|x(X+ggA>x#Z4c3PgTWDHNf4Y% zmfj);>j;YjCb*>oD~p#pVVS83+Eg9)#6_}S!7VO;RPC(R${Zz1TV#3lq>ED^f8bJZ z5*GSHuQGN%H*OwN5%R3sQ0a`I$5MmRfIjOpePTtYR@TUTQQ+G#0-wr>puNF8R}P$+ z=b}`C3!~#Jjh1I&NGuVcqT;qb1o1>O9Mk zZqiV3Y~lk2(4Wa{R*Szw%V>u5srGlN>qERM>3E^U%I2ayZDe{0(z5r8g%c>-S7MK@T(}U1DK-l8=$1i$7t2LW({ZrI6t+l*+o74nbQ% z)b{=AOE`4)!=&!}*(uxaX*CVC+r{xOF{biV!p6~hmMPo1pmlH}fQnT72Jh2ylv3jy zH7~eK;%#Kdoj|}JWVtliQI?34GgmIbxQdEYAEoKUQM09WUD?Fh5y6f&q})rHL@yH{ i`h?jwT#Jq%$g#g};@>R0ZfU`tcjn)7e_X=R-~RzHp&euZ diff --git a/tests/__pycache__/test_diff_to_mermaid.cpython-310.pyc b/tests/__pycache__/test_diff_to_mermaid.cpython-310.pyc deleted file mode 100644 index 65cf958a2da82046f062d1a34b30710da7a565d9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9098 zcmbVS-;*21ao*VX_<5qhYZ7CrtG*3%O)i|FzNLkW)FuW z4ghBtB8lKqa*kDYDkYa)iJ#0=-MxkRAM%#JVji6*J?l+XF8k}A-31nRM^VAm&hE_4 z&dz+@{dM>34Hg%x7JkRCy|M8>r!4FLs51L2p>iEZ{NE^~)v~0Jc2BefpY4{-XQ$=h z?DX7CujL6VvNuXQWho**@}kPN*s1PX8w(o?(*26HEp}~TMya(HBj@X>$Nskz=8 zbh_>OTlMdodvz7{Wu&_O&V^4fpw(;$?Y#cG+THB-c^ha<}{T7b+4vL92v4`%2 z7yQhch+(yB?O40kZ2%#i=gyWlaild8>cqsEc+$OUOz7%Q1|XyjZ(8MFk1F= za6zxRiz7aXqJtMbsNA==#Fjk~6I)u(?cdsufJQ?!>{ey1+lzuBrmf4HQL;Xe@e8Zr zX#2v~wOG?)Q@I#~i_acjd_RsR-G)Xpc6dK7i|Vll9SRur}xclZ&?o zGP*ekl>|EHWp}P2Y$x48|01K$Wv~}^UA@}t#z}jyIlOilGx*d-anc$erdxLh^ zi?87#>x4PG{ctmC)u_jyk68s>*{bpS(v|6IelONXr`LgsdLhjzh77%K5^ctfl3KtU zwMWm#EvMa;+Kor6u`Z)T|5%Im6dUDJXP1Vo;VNE6VcR8fRFwYcd%mDQSEwUAB&Obr z+a#&%N7tkvtQ@plD2Lz!R`i_2Y&RQT%a2t%;6?f}PRvz_+OW_PwczY$YvIWO6$c%( zgS*=6g{x6-e$oyeIDuEjgz#aZj^k8NDQYK`qDjVog<=9dm10-?D-23H&#Xl3*bwvCsm|6S@|Kl{`B^6dw2H3|zO&}ea& z=p6*0L&* zPagc9zd)y$)VC^*;ZHSi*~U@$5g^xb#FtS-7TknoTm%T+k&bk6c3EevmM6>7N4+E~ zvWjzAE&!;S_V0nPZ-a{qP!kj+>RUAWf1ya2Q>D`pU9oMW=57m=-nQsC9ZQy;gAB7i zvfQx?z5JP8ws}A4H&N%VEf-EOKW{m(0SEjnPQVqB+e+&$>WedVAN8e~x`+B9c5@{9 z!%Z!2eCCF5Gm7xj;-(h2U)YK`L@jblM~hp9l;k;zCBf zD8YHuZ?BWZ$QhH#2jfL)KYOTE$$M*$$J6C>oENSkN?UH~>guT5!y6!Uy%5ImKgqWr zjl$jw7jzu;*0j4C#*uc{qfqM7%o1lz=PzGr&S$XBcnQTB@uoN_4)fnpF~0D{mR^t* zF+k4iKS3o^JENK%0d;h9b%er74+j)<+s^OVP%V+zP&cE3WvCFTRRu>iSEYrfdJE&L zx2d34s*5PJSZ~=yhYE6-uk2n@**t4;0vpeGGzPBVI3c>k=)sc#StMC%5OZ{}AF z5OK94C&}$>J6DR!M%aijg>Zt_2gBYdPVMJFqfviDgMJGIg!vu1ewT{BrQ#YDbD}xL zqB-zAw1AlANh*F4ggyS+E6%V0No^nf?0ql4PkL@A z&W<}3nap*4Mo>o2Nw>5+is2(k(bZ8Zh%wcic6-bv9M#OJxcWAApTq9W6gv|S=BgYF z`T<8zshX2SShee-PO5!z9>LFfzJMf6Ay@A{D*GWxjDWNtbzKD<5hLbz5hSI`UqI3X zVeGajt$_M>rpjMQxq~oeGz+sM=m<6i?xAlLw~iX z2z%y3bjen9O3)u1#n}UNV9%q?hOhmA!}lQ2)qu2YgupY43TV_FD!xm_ z_o!%6G1b|8zKVk9(>P+Hc;7<8Q9_^XLek^pes9&T?0zuCCNnly4lV0LTasFwumVI1@aK{m_yAMkig4)z#O z^Xp7O+;g#Q;6Qa&6;jY#!K<&f_()w&;WN@_RzXO8Bn3M1fiC(1C-?*{=}P|<2&;*?L)9gHnLa6!D`qG`)xFlyZA8dVc%}LL6kX*J*y+O;nme~!K+)y zRwpg&9L}vxJ0PrdRYDP;s)!2KSuXPVe}MLM{-;bl(pZ?fCPNxOre`V8#{SF*X%25( z5FoK$H1_hYibAC3CTM98F8x1bPyn1PE2p)8MrCSUY)4LNPdSz6_C#5`4NIHt0ZYs>@+~-o4a&qz zviz}4i4kncv{dpI50>H5uIX$T`7_Ky1yqDoa1{A-Tzx|GWP>oFXjABgOKg z7yuiEMS*CSlT}wn(Xiztj|cPeIK%Ro<=&tj4v`Lk>%t^7bW8$eSN^(KVHqqVST|v& z@th_R|C~_@GN5(kGPsWl$zU2t62>W;QGibDMs;AlL3le8b}AHHW}HQ+%F@J1xyO}d zbIlo*-yHil>MM^{>ZjLL9$B&hF}UN>=|`hMa%JqEzVyv21%PwgQ2&TozVHmdH*l@4 zQ;}=@JhDd^Srey6F+j!kGg)ftCMesf5sati)p_m_foHNGA(s1vU!#Imn;lQEtmGg% zZ*kD#rf8Tx9$J{269nDB+L5%sMEZ>ElXgnEPFPc&aEA8ocn0fTao^s8Z@~B^30_Y} z#%|aQ&b8)v;X7gc;HC=O52Iwfgv#k}UAmIh-LC0cg^5pd8cdm@R8}&Eq zH^-G5_0!?z@Cq&$e;jp{e9%N*iq2DMjXiFIh%jKopXN_3Hvqnd)27n;3D9AYsYi5G zWX6<+ibw~mX4c?vu-!vSu2j5%hna?!?PZY9F-VMv2y)v)L$5H}9uvrg`7d}+(FDGD z&PoyF)S=G^%)`eTY&yttDzp;nASZIn5GNT*7LOIOFA8c56;VQ~EB!Rzl*~oCobInd z7DuS(Gzgtaf%kF9U+6<=2o@tEYa*V7krknM!s0Ydfwd+WodCs=Gy8u2cA7rz@ySy!kZwXw zBUiu--#XonWHGq3Q=Y4Mt*Q3&ws#3I61yTFYg{hNgYpx_@<>y9_PhJ2_+E`ZP!jtn zG=B&|Qo-@$GrW`i#Ov74q3vG?;pOUFc)WmqA(6d|ffH*siKAEs%uXL<=_>+9>x?2& z*%{P4lc?Rpz6zNHK5UxJb+*4&1v{_(HX=j`5T^LtBj`S)f+!%{#buAVpMHLYJ?8Ad zK{jsuZx|rmzO51(ZyuzQ#0S&I4n~4D9uuSv9em}(XH8?c1_g>O3^gLokTy)1y&W5^ z+ce1vK8p46T{&iRh4JN!*0m|oA){ad+#(3(jPc97*l2T@JO!Cy*Sb#`Wk32&w$P)Z zDzK5WGQZHhj~u+nv%JWI#f=Z8CTpi{=TyXZG7&G(S3k4S7>~XB`SbOa*@beNt;o$H zM`tSkoKOh)^2~_8qjeG{$H|%Q34)59n~g*22k5Kbqk>Wc^Y#aHMVm8fNQI(;(kS(W zihrVl_(l<>sgJ1m1r<|K@D=AuKf;aMIQah*c+1N3%YN1OQ7&Nns)VzL4J_(M8)o>A zjD8R_+$Mev!CStKW4as!a?lO}b%we*@zYrQBm8#<30s%xKO1`8Rb|snm;cY9)a}z} zBy>#_4a@%_5a_as;z94jNSB5o!s-P7Zc_Yb5TkHY|3K-O`6PJ@pP+b~ji<`6!K-4n r`z>4hSIvIkH3~E1Qz%@^x9L9wYGMWFs#9I`-R08q8~z71;r{8*@76tr diff --git a/tests/test_cb_engine.py b/tests/test_cb_engine.py index 8096ddd..89d3a38 100644 --- a/tests/test_cb_engine.py +++ b/tests/test_cb_engine.py @@ -5,6 +5,8 @@ import tempfile import types import unittest +from contextlib import redirect_stderr +from io import StringIO from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) @@ -68,18 +70,47 @@ class IncrementalCacheMissingError(Exception): def test_base_calls_run_full(self): rf = _Rec() self._install(run_full=rf) - cb_engine.run_base("/repo", "/out", "myrepo", "rid-base", "2", "abc123") + cb_engine.run_base("/repo", "/out", "myrepo", "rid-base", 2, "abc123") self.assertEqual(len(rf.calls), 1) k = rf.calls[0] self.assertEqual(k["repo_name"], "myrepo") self.assertEqual(str(k["repo_path"]), "/repo") - self.assertEqual(k["depth_level"], 2) # coerced to int + self.assertEqual(k["depth_level"], 2) self.assertEqual(k["source_sha"], "abc123") + def test_main_parses_depth_as_int(self): + rf = _Rec() + self._install(run_full=rf) + cb_engine.main([ + "base", + "--repo", "/repo", + "--out", "/out", + "--name", "myrepo", + "--run-id", "rid-base", + "--depth", "2", + "--source-sha", "abc123", + ]) + self.assertEqual(rf.calls[0]["depth_level"], 2) + + def test_main_rejects_invalid_depth(self): + for depth in ("0", "4", "x"): + with self.subTest(depth=depth): + with redirect_stderr(StringIO()): + with self.assertRaises(SystemExit): + cb_engine.main([ + "base", + "--repo", "/repo", + "--out", "/out", + "--name", "myrepo", + "--run-id", "rid-base", + "--depth", depth, + "--source-sha", "abc123", + ]) + def test_head_uses_incremental(self): ri, rf = _Rec(), _Rec() self._install(run_full=rf, run_incremental=ri) - cb_engine.run_head("/repo", "/out", "r", "rid", "1", "base", "head", "head") + cb_engine.run_head("/repo", "/out", "r", "rid", 1, "base", "head", "head") self.assertEqual(len(ri.calls), 1) self.assertEqual(len(rf.calls), 0) # no fallback self.assertEqual(ri.calls[0]["base_ref"], "base") @@ -92,10 +123,13 @@ def test_head_falls_back_to_full_on_cache_miss(self): analysis.run_incremental = _Rec(raises=IncMiss) out = tempfile.mkdtemp() (Path(out) / "stale.json").write_text("{}") # must be wiped before the full run - cb_engine.run_head("/repo", out, "r", "rid", "3", "base", "head", "head") + (Path(out) / "health").mkdir() + (Path(out) / "health" / "stale.json").write_text("{}") + cb_engine.run_head("/repo", out, "r", "rid", 3, "base", "head", "head") self.assertEqual(len(rf.calls), 1) # fell back to full self.assertEqual(rf.calls[0]["depth_level"], 3) self.assertFalse((Path(out) / "stale.json").exists()) # head dir wiped before full + self.assertFalse((Path(out) / "health").exists()) # nested stale artifacts wiped too class TestHealth(_Base): @@ -131,6 +165,33 @@ class CS: self._install_health(report=report) self.assertEqual(cb_engine.run_health("/art", "/repo", "r"), 3) # 2 warnings + 1 critical, info ignored + def test_prefers_written_health_report(self): + artifact_dir = Path(tempfile.mkdtemp()) + report_dir = artifact_dir / "health" + report_dir.mkdir() + (report_dir / "health_report.json").write_text( + """ + { + "check_summaries": [ + {"finding_groups": [ + {"severity": "warning", "entities": [{}, {}]}, + {"severity": "critical", "entities": [{}]}, + {"severity": "info", "entities": [{}, {}, {}, {}, {}]} + ]} + ] + } + """, + encoding="utf-8", + ) + self.assertEqual(cb_engine.run_health(str(artifact_dir), "/repo", "r"), 3) + + def test_malformed_health_report_falls_back(self): + artifact_dir = Path(tempfile.mkdtemp()) + report_dir = artifact_dir / "health" + report_dir.mkdir() + (report_dir / "health_report.json").write_text("[]", encoding="utf-8") + self.assertEqual(cb_engine.run_health(str(artifact_dir), "/repo", "r"), 0) + def test_missing_module_yields_zero(self): # No health.* modules installed -> import fails -> 0, never raises. self.assertEqual(cb_engine.run_health("/art", "/repo", "r"), 0) diff --git a/tests/test_diff_to_mermaid.py b/tests/test_diff_to_mermaid.py index f9916e3..a28030e 100644 --- a/tests/test_diff_to_mermaid.py +++ b/tests/test_diff_to_mermaid.py @@ -64,6 +64,15 @@ def test_relation_added_and_deleted(self): statuses = sorted(r["diff_status"] for r in dm.build_diff(base, head)["components_relations"]) self.assertEqual(statuses, ["added", "deleted"]) + def test_parallel_relation_deletion_is_not_label_modification(self): + base = { + "components": [comp("A"), comp("B")], + "components_relations": [rel("A", "B", "uses"), rel("A", "B", "publishes")], + } + head = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B", "uses")]} + statuses = sorted(r["diff_status"] for r in dm.build_diff(base, head)["components_relations"]) + self.assertEqual(statuses, ["deleted", "unchanged"]) + class TestRender(unittest.TestCase): def _diff(self): @@ -136,10 +145,68 @@ def test_changed_flag_counts_nested(self): self.assertEqual(meta["n_changed"], 1) # the nested child counts self.assertTrue(meta["changed"]) - def test_changed_only_truncates(self): + def test_nested_method_change_highlights_collapsed_parent(self): + base = {"components": [comp("P", subs=[comp("c1")], subrels=[])], "components_relations": []} + head = {"components": [comp("P", subs=[comp("c1", {"x.py": ["f"]})], subrels=[])], "components_relations": []} + text, meta = dm.render_mermaid(dm.build_diff(base, head), render_depth=1) + self.assertEqual(meta["n_changed"], 1) + self.assertIn("class n_P modified;", text) + + def test_nested_relation_change_highlights_collapsed_parent(self): + base = {"components": [comp("P", subs=[comp("c1"), comp("c2")], subrels=[rel("c1", "c2", "uses")])], "components_relations": []} + head = {"components": [comp("P", subs=[comp("c1"), comp("c2")], subrels=[rel("c1", "c2", "calls")])], "components_relations": []} + text, meta = dm.render_mermaid(dm.build_diff(base, head), render_depth=1) + self.assertEqual(meta["n_changed"], 0) + self.assertTrue(meta["changed"]) + self.assertIn("class n_P modified;", text) + + def test_changed_only_keeps_nested_change(self): + base = {"components": [comp("P", subs=[comp("c1"), comp("c2")], subrels=[])], "components_relations": []} + head = {"components": [comp("P", subs=[comp("c1", {"x.py": ["f"]}), comp("c2")], subrels=[])], "components_relations": []} + text, meta = dm.render_mermaid(dm.build_diff(base, head), render_depth=2, changed_only=True) + self.assertIsNotNone(text) + self.assertTrue(meta["changed"]) + self.assertFalse(meta["truncated"]) + self.assertIn("subgraph n_P", text) + self.assertIn("class n_c1 modified;", text) + self.assertNotIn('n_c2["c2"]', text) + + def test_changed_only_prunes_unchanged_children_of_modified_parent(self): + base = {"components": [comp("P", {"p.py": ["old"]}, subs=[comp("c1"), comp("c2")], subrels=[])], "components_relations": []} + head = {"components": [comp("P", {"p.py": ["old", "new"]}, subs=[comp("c1"), comp("c2")], subrels=[])], "components_relations": []} + text, meta = dm.render_mermaid(dm.build_diff(base, head), render_depth=2, changed_only=True) + self.assertIsNotNone(text) + self.assertTrue(meta["changed"]) + self.assertIn('n_P["P"]', text) + self.assertNotIn('n_c1["c1"]', text) + self.assertNotIn('n_c2["c2"]', text) + + def test_changed_only_is_not_auto_truncated(self): text, meta = dm.render_mermaid(self._diff(), render_depth=1, changed_only=True) self.assertIsNotNone(text) + self.assertFalse(meta["truncated"]) + self.assertTrue(meta["changed_only"]) + self.assertTrue(meta["requested_changed_only"]) + + def test_auto_truncation_reports_rendered_changed_only(self): + base = { + "components": [comp("A"), comp("B"), comp("C")], + "components_relations": [rel("B", "C", "uses"), rel("C", "B", "uses")], + } + head = { + "components": [comp("A", {"a.py": ["f"]}), comp("B"), comp("C")], + "components_relations": [rel("B", "C", "uses"), rel("C", "B", "uses")], + } + old = dm.MAX_EDGES + try: + dm.MAX_EDGES = 1 + text, meta = dm.render_mermaid(dm.build_diff(base, head), render_depth=1) + finally: + dm.MAX_EDGES = old + self.assertIsNotNone(text) self.assertTrue(meta["truncated"]) + self.assertTrue(meta["changed_only"]) + self.assertFalse(meta["requested_changed_only"]) def test_empty_returns_none(self): text, meta = dm.render_mermaid({"components": [], "components_relations": []}) From 3f47159d9d4c4675cc99a0902bf49662a752af4f Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 14:10:49 +0200 Subject: [PATCH 16/27] ci: key base-analysis cache on LLM models; drop stale venv restore-key The cb-base cache stores a full LLM analysis whose output depends on agent_model/parsing_model, but the key omitted them: re-running on the same base SHA with a different model served a stale-model 'before' snapshot, producing phantom added/modified/deleted components. Add both models to the restore+save keys. Drop the cb-uv restore-keys fallback: on a lockfile change it restored a venv built from a different uv.lock, and 'uv pip install -e .' does not reconcile already-installed transitive deps back to the lock, silently running mismatched versions. Exact-key hits (unchanged lock) still warm-start. --- action.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/action.yml b/action.yml index f9c29e3..fb2c51a 100644 --- a/action.yml +++ b/action.yml @@ -209,9 +209,10 @@ runs: uses: actions/cache@v4 with: path: codeboarding-engine/.venv + # No restore-keys: a lockfile change must force a clean cold venv, not + # restore a venv built from a different lock (`uv pip install -e .` won't + # downgrade/sync already-installed transitive deps back to this uv.lock). key: cb-uv-${{ runner.os }}-${{ hashFiles('codeboarding-engine/pyproject.toml', 'codeboarding-engine/uv.lock') }} - restore-keys: | - cb-uv-${{ runner.os }}- - name: Cache LSP servers if: steps.guard.outputs.skip != 'true' @@ -316,7 +317,7 @@ runs: uses: actions/cache/restore@v4 with: path: ${{ runner.temp }}/cb-base - key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }} + key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }} - name: Generate base analysis (no committed baseline) if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' && steps.basecache.outputs.cache-hit != 'true' @@ -366,7 +367,7 @@ runs: uses: actions/cache/save@v4 with: path: ${{ runner.temp }}/cb-base - key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }} + key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }} - name: Analyze PR head (incremental from base) if: steps.guard.outputs.skip != 'true' From 631db959125427f82c84d65070a1265ae3c9f06f Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 14:11:48 +0200 Subject: [PATCH 17/27] refactor(diff): drop unused method_diff payload; collapse structural check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _diff_methods built full per-file sorted added/removed dicts, but only the boolean 'any change?' answer was used โ€” the method_diff payload was attached to every component and never read by the renderer or any test. Replace it with a short-circuiting _has_method_changes() (no sorting, no payload), and fold _has_structural_changes' if/return ladder into one boolean return. Behaviour-identical; 41 tests green. --- scripts/diff_to_mermaid.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py index 87838b5..0696d3f 100644 --- a/scripts/diff_to_mermaid.py +++ b/scripts/diff_to_mermaid.py @@ -82,26 +82,16 @@ def _methods_by_file(c: dict) -> dict: def _has_structural_changes(base: dict, current: dict) -> bool: base_files = {fm.get("file_path", "") for fm in _file_methods(base)} current_files = {fm.get("file_path", "") for fm in _file_methods(current)} - if base_files != current_files: - return True - if len(base.get("components") or []) != len(current.get("components") or []): - return True - return False + return base_files != current_files or len(base.get("components") or []) != len(current.get("components") or []) -def _diff_methods(base: dict, current: dict) -> dict: +def _has_method_changes(base: dict, current: dict) -> bool: base_by_file = _methods_by_file(base) current_by_file = _methods_by_file(current) - added: dict = {} - removed: dict = {} - for file_path in set(base_by_file) | set(current_by_file): - a = sorted(current_by_file.get(file_path, set()) - base_by_file.get(file_path, set())) - r = sorted(base_by_file.get(file_path, set()) - current_by_file.get(file_path, set())) - if a: - added[file_path] = a - if r: - removed[file_path] = r - return {"added": added, "removed": removed} + return any( + base_by_file.get(fp, set()) != current_by_file.get(fp, set()) + for fp in set(base_by_file) | set(current_by_file) + ) def _rel_key(r: dict) -> tuple: @@ -180,11 +170,9 @@ def _diff_components(base_components: list, current_components: list) -> list: continue matched_names.add(_comp_name(base_match)) structural = _has_structural_changes(base_match, comp) - method_diff = _diff_methods(base_match, comp) - has_method_changes = bool(method_diff["added"] or method_diff["removed"]) - diff_status = "modified" if (structural or has_method_changes) else "unchanged" + diff_status = "modified" if (structural or _has_method_changes(base_match, comp)) else "unchanged" - annotated = {**comp, "diff_status": diff_status, "method_diff": method_diff} + annotated = {**comp, "diff_status": diff_status} base_subs = base_match.get("components") or [] current_subs = comp.get("components") or [] From 897dabba0d7e5e35d2bfc3780942d5abd3b4481e Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 14:12:58 +0200 Subject: [PATCH 18/27] test: cover edge-label truncation, newline escaping, baseline-unavailable fallback Lock down three untested renderer/engine paths: _truncate's 48-char ellipsis boundary, _esc stripping raw newlines/CR (a raw newline breaks the whole Mermaid block), and run_head falling back to a full analysis on BaselineUnavailableError (only IncrementalCacheMissingError was covered). --- tests/test_cb_engine.py | 8 ++++++++ tests/test_diff_to_mermaid.py | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/tests/test_cb_engine.py b/tests/test_cb_engine.py index 89d3a38..523db5d 100644 --- a/tests/test_cb_engine.py +++ b/tests/test_cb_engine.py @@ -131,6 +131,14 @@ def test_head_falls_back_to_full_on_cache_miss(self): self.assertFalse((Path(out) / "stale.json").exists()) # head dir wiped before full self.assertFalse((Path(out) / "health").exists()) # nested stale artifacts wiped too + def test_head_falls_back_to_full_on_baseline_unavailable(self): + analysis, _, BaseUnavail = self._install() # the other warm-start failure must also fall back + rf = _Rec() + analysis.run_full = rf + analysis.run_incremental = _Rec(raises=BaseUnavail) + cb_engine.run_head("/repo", tempfile.mkdtemp(), "r", "rid", 1, "base", "head", "head") + self.assertEqual(len(rf.calls), 1) # BaselineUnavailableError also triggers the full re-run + class TestHealth(_Base): def _install_health(self, report): diff --git a/tests/test_diff_to_mermaid.py b/tests/test_diff_to_mermaid.py index a28030e..1b14daf 100644 --- a/tests/test_diff_to_mermaid.py +++ b/tests/test_diff_to_mermaid.py @@ -123,6 +123,17 @@ def test_label_escaping_brackets_break_chars(self): self.assertNotIn("]Name", text) # no raw ] inside a label self.assertIn("#93;", text) + def test_esc_strips_newlines(self): + # A raw newline/CR in a label breaks the whole Mermaid block. + self.assertNotIn("\n", dm._esc("line1\nline2")) + self.assertNotIn("\r", dm._esc("a\r\nb")) + + def test_truncate_caps_long_edge_label_with_ellipsis(self): + out = dm._truncate("x" * 60) + self.assertLessEqual(len(out), dm._EDGE_LABEL_MAX) + self.assertTrue(out.endswith("โ€ฆ")) + self.assertEqual(dm._truncate("short"), "short") # under the cap: unchanged + def test_changed_flag_relation_only(self): # A label-only relation change leaves n_changed=0 but must report changed=True. base = {"components": [comp("A"), comp("B")], "components_relations": [rel("A", "B", "uses")]} From 3ea177e672c1c9e49325c6c14decf2263b2162b3 Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 14:34:40 +0200 Subject: [PATCH 19/27] docs: tighten README for developer use Lead with a rendered sample Mermaid diagram so readers see the actual colored output (a visual tool should show, not tell). Drop the marketing lead line, and fold the overlapping 'when it runs', 'on-demand', and trailing security paragraphs into one section that states each fact (auto-once vs /codeboarding, default-branch rule, fork refusal) exactly once. --- README.md | 175 +++++++++++++++++++++++++----------------------------- 1 file changed, 82 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index aea4eea..ae0933f 100644 --- a/README.md +++ b/README.md @@ -1,159 +1,148 @@
- CodeBoarding Logo + CodeBoarding Logo - # CodeBoarding Architecture Diff (Mermaid) + # CodeBoarding Visual Architecture Review - Posts a PR comment with a **Mermaid** architecture diagram showing which components changed โ€” **green** added, **yellow** modified, **red** deleted โ€” for both nodes and arrows. + Visual system-design review for pull requests. CodeBoarding analyzes the architecture before and after a change, then comments on the PR with an inline Mermaid diagram showing what changed.
-## What it does +## What It Does -On every pull request, this action: +- Builds or reuses a baseline architecture analysis for the PR base. +- Runs incremental analysis on the PR head, then diffs components and relationships. +- Posts a sticky PR comment with an inline Mermaid map โ€” ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ deleted (dashed), for both nodes and edges. -1. Resolves a **base ("before") analysis**: it reads the `.codeboarding/analysis.json` committed at the PR base commit if one exists; otherwise it runs a full CodeBoarding analysis on the base commit to produce one. -2. Runs an **incremental analysis on the PR head**, seeded from the base analysis โ€” only LLM-calling the components whose code actually changed, so a typical PR costs a handful of LLM calls. -3. **Diffs the two analyses** and renders the architecture graph as a Mermaid block with changed components and relations colored: - - **green** โ€” added - - **yellow** โ€” modified - - **red** (dashed) โ€” deleted -4. Posts a sticky PR comment containing the Mermaid block. **GitHub renders the diagram inline** โ€” no image, no Playwright, no extra branch. +A PR comment looks like this: + +```mermaid +graph LR + Gateway["API Gateway"] + Auth["Auth Service"] + Cache["Cache"] + Gateway -- "routes to" --> Auth + Auth -- "reads/writes" --> Cache + classDef added fill:#1f883d,stroke:#0b5d23,color:#fff; + classDef modified fill:#bf8700,stroke:#7d4e00,color:#fff; + classDef deleted fill:#cf222e,stroke:#82071e,color:#fff,stroke-dasharray:5 3; + class Cache added; + class Auth modified; + class Gateway deleted; + linkStyle 0 stroke:#cf222e,stroke-width:2px,stroke-dasharray:5 3; + linkStyle 1 stroke:#1f883d,stroke-width:2px; +``` ## Usage +Create `.github/workflows/codeboarding.yml`: + ```yaml -name: Architecture diff +name: CodeBoarding review + on: pull_request: - types: [opened, synchronize, reopened, ready_for_review] - issue_comment: # enables the /codeboarding command on PRs + # Generate ONCE, when the PR becomes reviewable โ€” not on every push, so you + # don't spend an LLM job per commit. Use [opened] for strictly creation-only, + # or add `synchronize` to re-run on each push. Refresh anytime with /codeboarding. + types: [opened, reopened, ready_for_review] + issue_comment: types: [created] permissions: - contents: read # checkout + fetch PR/base commits - pull-requests: write # post/update the PR comment - issues: write # issue_comment command reactions/comments + contents: read + pull-requests: write + issues: write -# Cancel a superseded run when new commits land on the same PR (avoid stacking -# multi-minute LLM jobs). concurrency: group: codeboarding-${{ github.event.pull_request.number || github.event.issue.number }} cancel-in-progress: true jobs: - diagram: + review: runs-on: ubuntu-latest - # Run on (non-draft) PR events, OR when a TRUSTED collaborator comments on a PR. - # The action itself checks whether the first word matches `trigger_command`. + timeout-minutes: 60 if: > (github.event_name == 'pull_request' && github.event.pull_request.draft == false) || (github.event_name == 'issue_comment' && github.event.issue.pull_request != null && contains(fromJSON('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) - timeout-minutes: 60 steps: - - uses: codeboarding/codeboarding-action@v1 + - uses: CodeBoarding/CodeBoarding-action@v1 with: llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} ``` -> โš ๏ธ **Security โ€” the `author_association` gate is required.** `issue_comment` workflows run from your default branch **with full repository secrets, for any commenter**. Without the `OWNER`/`MEMBER`/`COLLABORATOR` check, anyone could comment `/codeboarding` on a fork PR and have the action check out and run the engine over their PR-head code with your `OPENROUTER_API_KEY` present (a "pwn request"). The action's guard enforces this too, but gate it at the workflow level so a runner never even starts for an untrusted commenter. +Add one repository secret: -You need **one secret**: an LLM API key. OpenRouter is the default; pass your own model via the `agent_model` / `parsing_model` inputs if you prefer. +```text +OPENROUTER_API_KEY=sk-or-... +``` -### On-demand: the `/codeboarding` command +## When it runs -Comment **`/codeboarding`** on any same-repository pull request to (re)run the diagram on demand โ€” handy after the engine/baseline changes, or on draft PRs you don't auto-review. The action reacts with ๐Ÿ‘€ to acknowledge. Change the word via the `trigger_command` input. +- **PR opened / reopened / marked ready** โ€” generated once (per the `on:` triggers above). It does **not** re-run on every push, so you never spend an LLM job per commit; the comment reflects that point until refreshed. +- **`/codeboarding` comment** โ€” a trusted collaborator (`OWNER`/`MEMBER`/`COLLABORATOR`) regenerates the diagram against the **current** PR head, even if one already exists. It re-runs and updates the same comment in place (the action reacts with ๐Ÿ‘€). Change the keyword via `trigger_command`. -> **Note:** GitHub runs `issue_comment` workflows from the **default branch's** copy of the workflow file. So the command only works once this workflow is merged to your default branch โ€” a workflow that exists only on a feature branch won't respond to comments. +The command needs the `issue_comment` trigger and runs from your **default branch** (GitHub's rule), so it only works once the workflow is merged there. On-demand runs on fork PRs are refused, so fork code is never analyzed with your secrets. ## Inputs | Input | Default | Description | |---|---|---| -| `llm_api_key` | (required) | LLM API key. Currently OpenRouter (`OPENROUTER_API_KEY`). | -| `github_token` | `${{ github.token }}` | Token used to post the comment. | -| `engine_ref` | `v0.12.0` | Git ref of `CodeBoarding/CodeBoarding` (pinned to a release). Override to track a newer ref. | -| `depth_level` | `1` | Engine **analysis** depth (1โ€“3). Higher = slower + richer data. See `render_depth` for the diagram. | -| `agent_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for analysis. | -| `parsing_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for parsing. | -| `comment_header` | `Architecture review` | Header line of the PR comment. | -| `diagram_direction` | `LR` | Mermaid layout direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | -| `changed_only` | `false` | Draw only changed components and their incident edges. | -| `render_depth` | `1` | Component levels to **draw** in the PR diagram, independent of `depth_level`: `1` = top-level flat, `2` = +one nesting level as subgraphs. Analyze deep, display shallow. | -| `cta_base_url` | `''` | Base URL of a click proxy. When set, the comment adds "open in VS Code / Cursor" + "get the extension" links (with `owner`/`repo`/`pr` appended) that drive straight to the extension. Empty disables the CTA. | -| `trigger_command` | `/codeboarding` | PR-comment slash-command that triggers an on-demand run (requires the `issue_comment` trigger in your workflow). | +| `llm_api_key` | required | LLM API key. OpenRouter is the default provider. | +| `github_token` | `${{ github.token }}` | Token used to post/update the PR comment. | +| `engine_ref` | `v0.12.0` | CodeBoarding engine ref. Pin for reproducibility. | +| `depth_level` | `1` | Analysis depth, 1 to 3. Higher is slower and richer. | +| `render_depth` | `1` | Display depth for the PR diagram. Keep `1` for a clean top-level view. | +| `diagram_direction` | `LR` | Mermaid direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | +| `changed_only` | `false` | Render only changed components and incident edges. | +| `agent_model` | `openrouter/anthropic/claude-sonnet-4` | Model used for analysis. | +| `parsing_model` | `openrouter/anthropic/claude-sonnet-4` | Model used for parsing. | +| `comment_header` | `Architecture review` | Heading for the PR comment. | +| `trigger_command` | `/codeboarding` | Slash command for trusted on-demand runs. | +| `cta_base_url` | empty | Optional click-proxy base URL for editor and extension links. | ## Outputs | Output | Description | |---|---| -| `diagram_md` | Path to the rendered ```` ```mermaid ```` block in the runner workspace. | -| `n_changed` | Number of components added/modified/deleted, counted recursively. | -| `truncated` | `true` if the diagram was reduced to changed-only to fit GitHub's Mermaid limit. | - -## How the diff is colored - -Nodes are styled with Mermaid `classDef` / `class`; arrows are styled with positional `linkStyle`. A relation counts as **modified** when its endpoints are unchanged but its label text changed. Example of the emitted block: +| `diagram_md` | Path to the generated Mermaid markdown block on the runner. | +| `n_changed` | Number of changed components, counted recursively. | +| `truncated` | `true` when the graph was reduced to fit GitHub Mermaid limits. | -```mermaid -graph LR - Api["API Gateway"] - Auth["Auth Service"] - Cache["Cache"] - Api -- "routes to" --> Auth - Auth -- "reads/writes" --> Cache - classDef added fill:#1f883d,stroke:#0b5d23,color:#ffffff; - classDef modified fill:#bf8700,stroke:#7d4e00,color:#ffffff; - classDef deleted fill:#cf222e,stroke:#82071e,color:#ffffff,stroke-dasharray:5 3; - class Cache added; - class Auth modified; - class Api deleted; - linkStyle 0 stroke:#cf222e,stroke-width:2px,stroke-dasharray:5 3; - linkStyle 1 stroke:#1f883d,stroke-width:2px; -``` - -## No baseline required - -If `.codeboarding/analysis.json` isn't committed at the PR base commit, the action **generates the baseline itself** by running a full analysis on the base commit, then diffs the head against it. Committing a baseline on your default branch makes runs cheaper (the base run is skipped) and the diff more stable, but it is not required. - -## Fork PRs - -Because nothing is pushed (the diagram is inline Mermaid), there is no image step to skip on forks. The one caveat is GitHub's own policy: **secrets are withheld from `pull_request`-triggered runs on forks**, so the LLM key is unavailable and the run fails early with a clear message. Do not use `pull_request_target` for this action; it would analyze PR-head code while secrets are available. The trusted `/codeboarding` `issue_comment` path is intentionally limited to same-repository PRs, so fork code is not analyzed with repository secrets present. +## Notes -## Limitations +- No checkout step is required in your workflow. This action checks out the target PR and the CodeBoarding engine internally. +- GitHub withholds secrets from fork PRs on `pull_request`, so fork runs fail early if an LLM key is unavailable. +- Do not use `pull_request_target` for this action. It can expose secrets to PR-head code. +- GitHub renders Mermaid in strict mode, so node click-through links are not supported in the PR diagram. -- **GitHub Mermaid caps.** Inline Mermaid in comments is capped (โ‰ˆ500 edges / 50 000 chars). The action stays under this by auto-falling-back to a changed-only graph; if even that overflows it posts a text summary instead of a broken diagram. -- **Analysis depth vs. display depth.** `depth_level` controls how deep the engine *analyzes* (so the workspace/extension get rich nested data); `render_depth` controls how many levels the PR Mermaid *draws*. Keep `render_depth: 1` (default) for a clean top-level PR diagram even when `depth_level: 2`. Set `render_depth: 2` to draw one level of sub-components as subgraphs (leaf nodes filled, parent containers outlined). Large nested graphs are more likely to hit GitHub's Mermaid caps (above), in which case the action degrades to changed-only or a text summary. -- **Renames show as remove + add.** Components are matched across the two analyses by name (the stable join), so a renamed component appears as a red removal plus a green addition rather than a single yellow change. -- **No click-through.** GitHub renders Mermaid in strict security mode, so node hyperlinks are disabled. +## Local Testing -## Local testing - -A GitHub run is slow (engine install + two analyses). To iterate locally, use `scripts/run_local.sh`. It mirrors `action.yml` and writes `.cb-local/diagram.md` plus a `.cb-local/preview.html` you open in a browser (rendered with mermaid.js in GitHub's strict mode, so it looks like the comment will). - -**Fast โ€” no LLM, instant.** Diff two existing `analysis.json` files. Great for iterating on colors/layout. For a realistic pair, pull two revisions of a committed analysis: +Fast path, no LLM calls: ```bash -git show :.codeboarding/analysis.json > /tmp/base.json -git show :.codeboarding/analysis.json > /tmp/head.json scripts/run_local.sh --base-json /tmp/base.json --head-json /tmp/head.json ``` -**Full pipeline โ€” needs an LLM key.** Runs the engine on two refs of a local repo exactly like the action (committed-or-generated base, then incremental head): +Full local pipeline: ```bash export OPENROUTER_API_KEY=sk-or-... scripts/run_local.sh --repo /path/to/repo --base --head \ - --engine /path/to/CodeBoarding # defaults to ../CodeBoarding + --engine /path/to/CodeBoarding ``` -Flags: `--depth N`, `--direction LR|TD|โ€ฆ`, `--render-depth N`, `--changed-only`, `--no-edge-labels`, `--out DIR`, `--no-open`. +Useful flags: -The diagram step alone is also directly runnable: - -```bash -python3 scripts/diff_to_mermaid.py --base base/analysis.json --head head/analysis.json --out diagram.md +```text +--depth N +--render-depth N +--direction LR|TD|TB|RL|BT +--changed-only +--no-edge-labels +--out DIR +--no-open ``` ## License -MIT โ€” see [LICENSE](LICENSE). +MIT. See [LICENSE](LICENSE). From b84ceac17a899bbc3ec8cf3b3556a37988a3866e Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 14:57:57 +0200 Subject: [PATCH 20/27] fix(action): default models to the engine's per-provider default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent_model/parsing_model defaults were a hardcoded 'openrouter/anthropic/ claude-sonnet-4' that OpenRouter rejects (400 invalid model id โ€” the engine sends the slug verbatim, so it must be a bare OpenRouter id like anthropic/claude-sonnet-4, no 'openrouter/' prefix). Default both to empty and export AGENT_MODEL/PARSING_MODEL only when set, so an unset model defers to the engine's own valid per-provider default instead of a string that rots over time. Users override via the inputs (optionally wired to repo secrets). --- action.yml | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/action.yml b/action.yml index fb2c51a..21297af 100644 --- a/action.yml +++ b/action.yml @@ -23,13 +23,13 @@ inputs: required: false default: '1' agent_model: - description: 'LLM model identifier used for analysis (AGENT_MODEL env var).' + description: 'Analysis model (AGENT_MODEL env var). A bare OpenRouter slug, e.g. anthropic/claude-sonnet-4. Empty (default) uses the engine''s own per-provider default.' required: false - default: 'openrouter/anthropic/claude-sonnet-4' + default: '' parsing_model: - description: 'LLM model identifier used for parsing (PARSING_MODEL env var).' + description: 'Parsing model (PARSING_MODEL env var). A bare OpenRouter slug. Empty (default) uses the engine''s own per-provider default.' required: false - default: 'openrouter/anthropic/claude-sonnet-4' + default: '' comment_header: description: 'Header line used inside the sticky PR comment.' required: false @@ -338,9 +338,13 @@ runs: BASE_SHA: ${{ steps.guard.outputs.base_sha }} run: | OPENROUTER_API_KEY="$(cat "${RUNNER_TEMP}/cb-openrouter-key")" + export OPENROUTER_API_KEY + # Export the model env only when the user set it; empty -> the engine uses + # its own valid per-provider default (no stale hardcoded model id to rot). AGENT_MODEL="$(cat "${RUNNER_TEMP}/cb-agent-model")" PARSING_MODEL="$(cat "${RUNNER_TEMP}/cb-parsing-model")" - export OPENROUTER_API_KEY AGENT_MODEL PARSING_MODEL + if [ -n "$AGENT_MODEL" ]; then export AGENT_MODEL; fi + if [ -n "$PARSING_MODEL" ]; then export PARSING_MODEL; fi BASE_SRC="${RUNNER_TEMP}/base-src" # Clean up any stale registration before re-adding (rm -rf alone leaves a @@ -391,9 +395,13 @@ runs: HEAD_SHA: ${{ steps.guard.outputs.head_sha }} run: | OPENROUTER_API_KEY="$(cat "${RUNNER_TEMP}/cb-openrouter-key")" + export OPENROUTER_API_KEY + # Export the model env only when the user set it; empty -> the engine uses + # its own valid per-provider default (no stale hardcoded model id to rot). AGENT_MODEL="$(cat "${RUNNER_TEMP}/cb-agent-model")" PARSING_MODEL="$(cat "${RUNNER_TEMP}/cb-parsing-model")" - export OPENROUTER_API_KEY AGENT_MODEL PARSING_MODEL + if [ -n "$AGENT_MODEL" ]; then export AGENT_MODEL; fi + if [ -n "$PARSING_MODEL" ]; then export PARSING_MODEL; fi # Seed the head dir from the base analysis so incremental stitches # component ids from the baseline (stable diff). Base dir is left From ecd98cb49a691abd9718cf6fb39648d92b7f0f39 Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 14:58:58 +0200 Subject: [PATCH 21/27] docs: document LLM key setup and optional models concisely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit State where the OPENROUTER_API_KEY secret goes (and the env-var path for local runs), and that agent_model/parsing_model are optional โ€” omit for the engine's per-provider default, or pin to a bare OpenRouter slug (inline or via secret). --- README.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ae0933f..7bab901 100644 --- a/README.md +++ b/README.md @@ -70,10 +70,21 @@ jobs: llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} ``` -Add one repository secret: +Add the API key as a repository secret (**Settings โ†’ Secrets and variables โ†’ Actions**): ```text -OPENROUTER_API_KEY=sk-or-... +OPENROUTER_API_KEY = sk-or-... +``` + +That's the only required setup โ€” it's passed via `llm_api_key` above. (For local runs with `scripts/run_local.sh`, export `OPENROUTER_API_KEY` as an env var instead.) + +**Models are optional.** Omit `agent_model` / `parsing_model` to use the engine's default for your provider, or pin them to a **bare OpenRouter slug** (no `openrouter/` prefix) โ€” inline or from a secret: + +```yaml + with: + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} + agent_model: anthropic/claude-sonnet-4 # optional; or ${{ secrets.AGENT_MODEL }} + parsing_model: google/gemini-3-flash-preview # optional ``` ## When it runs @@ -94,8 +105,8 @@ The command needs the `issue_comment` trigger and runs from your **default branc | `render_depth` | `1` | Display depth for the PR diagram. Keep `1` for a clean top-level view. | | `diagram_direction` | `LR` | Mermaid direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | | `changed_only` | `false` | Render only changed components and incident edges. | -| `agent_model` | `openrouter/anthropic/claude-sonnet-4` | Model used for analysis. | -| `parsing_model` | `openrouter/anthropic/claude-sonnet-4` | Model used for parsing. | +| `agent_model` | engine default | Analysis model. Bare OpenRouter slug (e.g. `anthropic/claude-sonnet-4`); empty = engine's per-provider default. | +| `parsing_model` | engine default | Parsing model. Bare OpenRouter slug; empty = engine's per-provider default. | | `comment_header` | `Architecture review` | Heading for the PR comment. | | `trigger_command` | `/codeboarding` | Slash command for trusted on-demand runs. | | `cta_base_url` | empty | Optional click-proxy base URL for editor and extension links. | From 1fe4e6a0401d17303eacaebe19898d807fe24816 Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 15:14:16 +0200 Subject: [PATCH 22/27] fix(run_local): stop defaulting models to the invalid 'openrouter/...' slug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit run_local.sh still defaulted AGENT_MODEL/PARSING_MODEL to openrouter/anthropic/ claude-sonnet-4 โ€” the litellm-prefixed string that 400s against the engine's OpenRouter ChatOpenAI path (the bug just fixed in action.yml). Default both to empty and export only when set, so the local harness mirrors the action and an unset model falls through to the engine's own valid per-provider default. --- scripts/run_local.sh | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/scripts/run_local.sh b/scripts/run_local.sh index 234fde3..9d855ea 100755 --- a/scripts/run_local.sh +++ b/scripts/run_local.sh @@ -31,8 +31,10 @@ RENDER_DEPTH=() EXTRA=() OPEN="auto" REPO="" BASE_REF="" HEAD_REF="" BASE_JSON="" HEAD_JSON="" -AGENT_MODEL="${AGENT_MODEL:-openrouter/anthropic/claude-sonnet-4}" -PARSING_MODEL="${PARSING_MODEL:-openrouter/anthropic/claude-sonnet-4}" +# Empty by default: the engine then uses its own valid per-provider default. +# Override with a bare OpenRouter slug, e.g. AGENT_MODEL=anthropic/claude-sonnet-4 +AGENT_MODEL="${AGENT_MODEL:-}" +PARSING_MODEL="${PARSING_MODEL:-}" while [ $# -gt 0 ]; do case "$1" in @@ -58,15 +60,16 @@ done mkdir -p "$OUT" run_engine() { - ( cd "$ENGINE" && \ - STATIC_ANALYSIS_CONFIG="$ENGINE/static_analysis_config.yml" \ - PROJECT_ROOT="$ENGINE" \ - DIAGRAM_DEPTH_LEVEL="$DEPTH" \ - CACHING_DOCUMENTATION="false" \ - ENABLE_MONITORING="false" \ - OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-}" \ - AGENT_MODEL="$AGENT_MODEL" \ - PARSING_MODEL="$PARSING_MODEL" \ + ( cd "$ENGINE" + export STATIC_ANALYSIS_CONFIG="$ENGINE/static_analysis_config.yml" \ + PROJECT_ROOT="$ENGINE" \ + DIAGRAM_DEPTH_LEVEL="$DEPTH" \ + CACHING_DOCUMENTATION="false" \ + ENABLE_MONITORING="false" \ + OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-}" + # Pass the model only when set; empty -> engine's own valid per-provider default. + if [ -n "$AGENT_MODEL" ]; then export AGENT_MODEL; fi + if [ -n "$PARSING_MODEL" ]; then export PARSING_MODEL; fi uv run python "$ACTION_DIR/scripts/cb_engine.py" "$@" ) } From 016df69c39f7b7ba406b6240255b07c8daf4633f Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 15:18:00 +0200 Subject: [PATCH 23/27] docs: use vars (not secrets) for model overrides; state the slug format rule A model name is non-sensitive config, so steer overrides to a repo variable (vars.AGENT_MODEL) and keep secrets. for the key. State the format rule once: bare OpenRouter slug, exactly one '/', no 'openrouter/' prefix. --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7bab901..37a5cab 100644 --- a/README.md +++ b/README.md @@ -78,15 +78,17 @@ OPENROUTER_API_KEY = sk-or-... That's the only required setup โ€” it's passed via `llm_api_key` above. (For local runs with `scripts/run_local.sh`, export `OPENROUTER_API_KEY` as an env var instead.) -**Models are optional.** Omit `agent_model` / `parsing_model` to use the engine's default for your provider, or pin them to a **bare OpenRouter slug** (no `openrouter/` prefix) โ€” inline or from a secret: +**Models are optional.** Omit `agent_model` / `parsing_model` to use the engine's default for your provider, or pin them โ€” inline or from a repository **variable** (a model name isn't a secret, so use `vars.`, not `secrets.`): ```yaml with: - llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} - agent_model: anthropic/claude-sonnet-4 # optional; or ${{ secrets.AGENT_MODEL }} - parsing_model: google/gemini-3-flash-preview # optional + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} # secret + agent_model: anthropic/claude-sonnet-4 # optional; or ${{ vars.AGENT_MODEL }} + parsing_model: google/gemini-3-flash-preview # optional ``` +**Model format:** a bare OpenRouter slug (e.g. `anthropic/claude-sonnet-4`) โ€” exactly one `/`, **no `openrouter/` prefix** (that's the LiteLLM form; the action rejects it early). + ## When it runs - **PR opened / reopened / marked ready** โ€” generated once (per the `on:` triggers above). It does **not** re-run on every push, so you never spend an LLM job per commit; the comment reflects that point until refreshed. From 0ec2e0f243f16e73c133e03068b6d1313f819430 Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 15:18:15 +0200 Subject: [PATCH 24/27] feat(action): reject the litellm 'openrouter/' model prefix early The engine calls OpenRouter natively (langchain ChatOpenAI), so the model must be a bare slug; the litellm 'openrouter/...' form 400s deep inside the engine. Fail fast in the verify step with a one-line actionable error instead, the same early-validation the API key already gets. --- action.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/action.yml b/action.yml index 21297af..84f05f5 100644 --- a/action.yml +++ b/action.yml @@ -266,6 +266,17 @@ runs: AGENT_MODEL="$(_strip "$RAW_AGENT_MODEL")" PARSING_MODEL="$(_strip "$RAW_PARSING_MODEL")" + # Catch the most common model-id mistake early: the engine calls OpenRouter + # natively (langchain ChatOpenAI), so a model must be a BARE slug like + # anthropic/claude-sonnet-4 โ€” NOT the litellm 'openrouter/...' form. + for M in "$AGENT_MODEL" "$PARSING_MODEL"; do + case "$M" in + openrouter/*) + echo "::error::Invalid model '$M': drop the 'openrouter/' prefix and use a bare OpenRouter slug, e.g. anthropic/claude-sonnet-4." + exit 1 ;; + esac + done + # Mask the cleaned value (it may differ from the registered secret). echo "::add-mask::$KEY" From 2010200b0d1399b3245d354ac01e454af54f27cd Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 15:32:13 +0200 Subject: [PATCH 25/27] fix(run_local): bash 3.2-safe optional-flag array expansion The diff step expanded "${CHANGED_ONLY[@]}" etc. directly; on macOS's bash 3.2 an empty array under 'set -u' errors as 'unbound variable', so the full local pipeline crashed at diff->mermaid whenever the optional flags weren't passed. Use the ${arr[@]+"${arr[@]}"} idiom (elements when set, nothing when empty) so it works on bash 3.2 and 4+. --- scripts/run_local.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_local.sh b/scripts/run_local.sh index 9d855ea..b575c02 100755 --- a/scripts/run_local.sh +++ b/scripts/run_local.sh @@ -128,7 +128,7 @@ echo "== Diff -> Mermaid ==" META="$(python3 "$ACTION_DIR/scripts/diff_to_mermaid.py" \ --base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" \ --out "$OUT/diagram.md" --direction "$DIRECTION" \ - "${CHANGED_ONLY[@]}" "${NO_EDGE_LABELS[@]}" "${RENDER_DEPTH[@]}" "${EXTRA[@]}")" + ${CHANGED_ONLY[@]+"${CHANGED_ONLY[@]}"} ${NO_EDGE_LABELS[@]+"${NO_EDGE_LABELS[@]}"} ${RENDER_DEPTH[@]+"${RENDER_DEPTH[@]}"} ${EXTRA[@]+"${EXTRA[@]}"})" echo " $META" # Browser preview: render the (fence-stripped) mermaid via mermaid.js, strict mode From 11b39393ae8b20e9dd663157cbe1711922a8599e Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 16:18:30 +0200 Subject: [PATCH 26/27] docs: document multi-provider keys (llm_provider) cleanly Add a 'Bring your own LLM provider' section: lead with the _API_KEY convention + one example, collapse the full provider table, and point at the engine's registry (agents/llm_config.py) as the source of truth so the README can't rot. Scope the 'bare OpenRouter slug' model-format note to OpenRouter. --- README.md | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 37a5cab..e2cb55e 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,38 @@ That's the only required setup โ€” it's passed via `llm_api_key` above. (For loc parsing_model: google/gemini-3-flash-preview # optional ``` -**Model format:** a bare OpenRouter slug (e.g. `anthropic/claude-sonnet-4`) โ€” exactly one `/`, **no `openrouter/` prefix** (that's the LiteLLM form; the action rejects it early). +**Model format (OpenRouter):** a bare OpenRouter slug (e.g. `anthropic/claude-sonnet-4`) โ€” exactly one `/`, **no `openrouter/` prefix** (that's the LiteLLM form; the action rejects it early). Other providers use their own native model ids. + +## Bring your own LLM provider + +OpenRouter is the default, but you can use any provider the engine supports โ€” set `llm_provider` and pass that provider's key: + +```yaml + with: + llm_provider: anthropic # omit for OpenRouter (default) + llm_api_key: ${{ secrets.ANTHROPIC_API_KEY }} +``` + +`llm_provider: ` hands your key to the engine as `_API_KEY`, and the engine auto-selects that provider. Set **exactly one** key per run. + +
Supported providers + +| `llm_provider` | env var the engine reads | +|---|---| +| `openrouter` *(default)* | `OPENROUTER_API_KEY` | +| `openai` | `OPENAI_API_KEY` | +| `anthropic` | `ANTHROPIC_API_KEY` | +| `google` | `GOOGLE_API_KEY` | +| `vercel` | `VERCEL_API_KEY` | +| `deepseek` | `DEEPSEEK_API_KEY` | +| `cerebras` | `CEREBRAS_API_KEY` | +| `glm` / `kimi` | `GLM_API_KEY` / `KIMI_API_KEY` | +| `aws_bedrock` | `AWS_BEARER_TOKEN_BEDROCK` | +| `ollama` | `OLLAMA_BASE_URL` | + +This table mirrors the engine and may lag it โ€” the source of truth is the engine's provider registry ([`agents/llm_config.py`](https://github.com/CodeBoarding/CodeBoarding/blob/main/agents/llm_config.py)). Any provider it adds that follows the `_API_KEY` convention works here with no action change. + +
## When it runs @@ -100,7 +131,8 @@ The command needs the `issue_comment` trigger and runs from your **default branc | Input | Default | Description | |---|---|---| -| `llm_api_key` | required | LLM API key. OpenRouter is the default provider. | +| `llm_api_key` | required | Your LLM provider API key (see `llm_provider`). | +| `llm_provider` | `openrouter` | Provider for the key โ€” mapped to `_API_KEY` (e.g. `anthropic`, `openai`, `google`). | | `github_token` | `${{ github.token }}` | Token used to post/update the PR comment. | | `engine_ref` | `v0.12.0` | CodeBoarding engine ref. Pin for reproducibility. | | `depth_level` | `1` | Analysis depth, 1 to 3. Higher is slower and richer. | From 934369a6986be08d8a2f6740f01e7354d3ae23eb Mon Sep 17 00:00:00 2001 From: brovatten Date: Sat, 6 Jun 2026 16:18:45 +0200 Subject: [PATCH 27/27] feat(action): support any LLM provider via llm_provider input The action hardcoded OPENROUTER_API_KEY + an OpenRouter-only preflight, so a user with an OpenAI/Anthropic/etc. key couldn't use it. Add an optional 'llm_provider' input that maps the key to the engine's env var by the _API_KEY convention (with aws_bedrock/ollama exceptions); the engine then auto-selects the provider. Gate the OpenRouter preflight and the 'openrouter/' model-prefix guard behind provider==openrouter, export only the selected env var in both engine steps, and add llm_provider to the base-cache key. Default stays openrouter, so existing workflows are unchanged. No engine change. --- action.yml | 97 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/action.yml b/action.yml index 84f05f5..24bcf5f 100644 --- a/action.yml +++ b/action.yml @@ -8,8 +8,12 @@ branding: inputs: llm_api_key: - description: 'LLM API key (OpenRouter by default). Required.' + description: 'Your LLM provider API key (see llm_provider). Required.' required: true + llm_provider: + description: 'Provider for llm_api_key. The key is handed to the engine as that provider''s env var (anthropic -> ANTHROPIC_API_KEY, openai -> OPENAI_API_KEY, ...; aws_bedrock -> AWS_BEARER_TOKEN_BEDROCK, ollama -> OLLAMA_BASE_URL) and the engine auto-selects it. Default openrouter.' + required: false + default: 'openrouter' github_token: description: 'GITHUB_TOKEN used to post the PR comment. Defaults to the workflow token.' required: false @@ -245,6 +249,7 @@ runs: shell: bash env: RAW_KEY: ${{ inputs.llm_api_key }} + RAW_PROVIDER: ${{ inputs.llm_provider }} RAW_AGENT_MODEL: ${{ inputs.agent_model }} RAW_PARSING_MODEL: ${{ inputs.parsing_model }} run: | @@ -254,49 +259,56 @@ runs: echo "::error::llm_api_key is empty. On fork PRs, repo secrets are withheld by GitHub." exit 1 fi - # Pasting a key into the secret UI often picks up trailing newlines, - # wrapping quotes, or a whole `KEY=value` line. Normalize all of that. + # Resolve the provider -> the env var the engine reads. Convention is + # _API_KEY; two providers don't follow it. The engine is the source + # of truth: an unknown provider just yields an env var it won't recognize, + # and the engine errors with the list of valid keys. + PROVIDER="$(printf '%s' "$RAW_PROVIDER" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9_')" + PROVIDER="${PROVIDER:-openrouter}" + case "$PROVIDER" in + aws_bedrock) PROVIDER_ENV="AWS_BEARER_TOKEN_BEDROCK" ;; + ollama) PROVIDER_ENV="OLLAMA_BASE_URL" ;; + *) PROVIDER_ENV="$(printf '%s' "$PROVIDER" | tr '[:lower:]' '[:upper:]')_API_KEY" ;; + esac + + # Normalize a pasted key: strip whitespace/quotes and a leading `=`. _strip() { printf '%s' "$1" | tr -d '[:space:]' | sed -e 's/^"//;s/"$//' -e "s/^'//;s/'\$//"; } KEY="$(_strip "$RAW_KEY")" - case "$KEY" in - OPENROUTER_API_KEY=*) KEY="${KEY#OPENROUTER_API_KEY=}";; - openrouter_api_key=*) KEY="${KEY#openrouter_api_key=}";; - esac + case "$KEY" in "${PROVIDER_ENV}="*) KEY="${KEY#${PROVIDER_ENV}=}";; esac KEY="$(_strip "$KEY")" AGENT_MODEL="$(_strip "$RAW_AGENT_MODEL")" PARSING_MODEL="$(_strip "$RAW_PARSING_MODEL")" - - # Catch the most common model-id mistake early: the engine calls OpenRouter - # natively (langchain ChatOpenAI), so a model must be a BARE slug like - # anthropic/claude-sonnet-4 โ€” NOT the litellm 'openrouter/...' form. - for M in "$AGENT_MODEL" "$PARSING_MODEL"; do - case "$M" in - openrouter/*) - echo "::error::Invalid model '$M': drop the 'openrouter/' prefix and use a bare OpenRouter slug, e.g. anthropic/claude-sonnet-4." - exit 1 ;; - esac - done - - # Mask the cleaned value (it may differ from the registered secret). echo "::add-mask::$KEY" - - case "$KEY" in sk-or-v1-*) PFX=1 ;; *) PFX=0 ;; esac - echo "OPENROUTER_API_KEY length: ${#KEY}; looks-like-OpenRouter: $PFX" - STATUS=$(curl -sS -o "$AUTH_FILE" -w "%{http_code}" \ - -H "Authorization: Bearer $KEY" --max-time 10 \ - https://openrouter.ai/api/v1/auth/key || echo "curl-fail") - echo "OpenRouter /auth/key response: HTTP $STATUS" - if [ "$STATUS" != "200" ]; then - # Surface the upstream error MESSAGE only โ€” never the whole auth body (avoid leaking). - MSG="$(AUTH_FILE="$AUTH_FILE" python3 -c 'import json,os;print(json.load(open(os.environ["AUTH_FILE"])).get("error",{}).get("message",""))' 2>/dev/null || true)" - echo "::error::OpenRouter rejected the API key (HTTP $STATUS). ${MSG:-Verify the OPENROUTER_API_KEY secret.}" - exit 1 + echo "Provider: $PROVIDER -> $PROVIDER_ENV; key length: ${#KEY}" + + if [ "$PROVIDER" = "openrouter" ]; then + # OpenRouter-only checks. The litellm 'openrouter/...' model prefix 400s + # the engine's native OpenRouter call; other providers use native ids. + for M in "$AGENT_MODEL" "$PARSING_MODEL"; do + case "$M" in + openrouter/*) + echo "::error::Invalid model '$M': drop the 'openrouter/' prefix and use a bare OpenRouter slug, e.g. anthropic/claude-sonnet-4." + exit 1 ;; + esac + done + # Cheap preflight; other providers are validated by the engine at run time. + STATUS=$(curl -sS -o "$AUTH_FILE" -w "%{http_code}" \ + -H "Authorization: Bearer $KEY" --max-time 10 \ + https://openrouter.ai/api/v1/auth/key || echo "curl-fail") + echo "OpenRouter /auth/key response: HTTP $STATUS" + if [ "$STATUS" != "200" ]; then + # Surface the upstream error MESSAGE only โ€” never the whole auth body (avoid leaking). + MSG="$(AUTH_FILE="$AUTH_FILE" python3 -c 'import json,os;print(json.load(open(os.environ["AUTH_FILE"])).get("error",{}).get("message",""))' 2>/dev/null || true)" + echo "::error::OpenRouter rejected the API key (HTTP $STATUS). ${MSG:-Verify the OPENROUTER_API_KEY secret.}" + exit 1 + fi fi # Store key material in runner-temp files. Later shell steps read these # explicitly; third-party post-comment actions do not inherit the LLM key. umask 077 - printf '%s' "$KEY" > "${RUNNER_TEMP}/cb-openrouter-key" + printf '%s' "$KEY" > "${RUNNER_TEMP}/cb-llm-key" + printf '%s' "$PROVIDER_ENV" > "${RUNNER_TEMP}/cb-provider-env" printf '%s' "$AGENT_MODEL" > "${RUNNER_TEMP}/cb-agent-model" printf '%s' "$PARSING_MODEL" > "${RUNNER_TEMP}/cb-parsing-model" @@ -328,7 +340,7 @@ runs: uses: actions/cache/restore@v4 with: path: ${{ runner.temp }}/cb-base - key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }} + key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }} - name: Generate base analysis (no committed baseline) if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' && steps.basecache.outputs.cache-hit != 'true' @@ -348,8 +360,10 @@ runs: DEPTH: ${{ inputs.depth_level }} BASE_SHA: ${{ steps.guard.outputs.base_sha }} run: | - OPENROUTER_API_KEY="$(cat "${RUNNER_TEMP}/cb-openrouter-key")" - export OPENROUTER_API_KEY + # Export the key under the selected provider's env var (only this one), + # so the engine auto-selects that provider. + PROVIDER_ENV="$(cat "${RUNNER_TEMP}/cb-provider-env")" + export "$PROVIDER_ENV"="$(cat "${RUNNER_TEMP}/cb-llm-key")" # Export the model env only when the user set it; empty -> the engine uses # its own valid per-provider default (no stale hardcoded model id to rot). AGENT_MODEL="$(cat "${RUNNER_TEMP}/cb-agent-model")" @@ -382,7 +396,7 @@ runs: uses: actions/cache/save@v4 with: path: ${{ runner.temp }}/cb-base - key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }} + key: cb-base-${{ runner.os }}-${{ steps.guard.outputs.base_sha }}-d${{ inputs.depth_level }}-${{ inputs.engine_ref }}-${{ inputs.llm_provider }}-${{ inputs.agent_model }}-${{ inputs.parsing_model }} - name: Analyze PR head (incremental from base) if: steps.guard.outputs.skip != 'true' @@ -405,8 +419,10 @@ runs: BASE_SHA: ${{ steps.guard.outputs.base_sha }} HEAD_SHA: ${{ steps.guard.outputs.head_sha }} run: | - OPENROUTER_API_KEY="$(cat "${RUNNER_TEMP}/cb-openrouter-key")" - export OPENROUTER_API_KEY + # Export the key under the selected provider's env var (only this one), + # so the engine auto-selects that provider. + PROVIDER_ENV="$(cat "${RUNNER_TEMP}/cb-provider-env")" + export "$PROVIDER_ENV"="$(cat "${RUNNER_TEMP}/cb-llm-key")" # Export the model env only when the user set it; empty -> the engine uses # its own valid per-provider default (no stale hardcoded model id to rot). AGENT_MODEL="$(cat "${RUNNER_TEMP}/cb-agent-model")" @@ -464,7 +480,8 @@ runs: if: always() && steps.guard.outputs.skip != 'true' shell: bash run: | - rm -f "${RUNNER_TEMP}/cb-openrouter-key" \ + rm -f "${RUNNER_TEMP}/cb-llm-key" \ + "${RUNNER_TEMP}/cb-provider-env" \ "${RUNNER_TEMP}/cb-agent-model" \ "${RUNNER_TEMP}/cb-parsing-model"