From ccde6fd20759b0fa7e751a56a9b60d64f303e9d3 Mon Sep 17 00:00:00 2001
From: Vader Yang <vader.yang@netis.com>
Date: Wed, 20 May 2026 15:50:51 +0800
Subject: [PATCH 1/2] =?UTF-8?q?feat(ci):=20headless=20PR=20review=20agent?=
 =?UTF-8?q?=20=E2=80=94=20phase=201?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A code-review agent that fires after `ci` passes on a PR and posts
a single PR review (APPROVE / COMMENT / REQUEST_CHANGES) before a
human reviewer picks it up. The intent: triage the obvious-in-
hindsight stuff (schema mirror drift, body-column scans, missing
queryKey deps, classifier rules sensitive to window width, …) so the
human reviewer arrives at a PR with the easy 80% already surfaced.

## Pieces

* `.github/workflows/pr-review.yml`
  Trigger = `workflow_run` on `ci` completion, gated on
  `conclusion == 'success'` and `event == 'pull_request'`. Manual
  `workflow_dispatch` (with pr_number input) is kept as a re-run
  hatch. Concurrency-keyed per PR so a re-trigger cancels the
  in-flight job.

* `scripts/pr-review/run_review.sh`
  Substitutes PR_NUMBER / HEAD_SHA / BASE_REF into the prompt
  template, pre-flights LiteLLM with a 5 s curl, runs `claude -p`
  in print mode with a read-only tool allowlist + 1800 s outer
  timeout, drops the model's stdout into
  `/tmp/pr-review-${N}-out.md`.

* `scripts/pr-review/prompt.md`
  Repo-specific reviewer prompt. Carries the crate map, the schema-
  mirror rules (Rust serde ↔ console TS types), and an explicit
  "things this repo has been bitten by" section that the agent must
  actively look for (body-column scans like commit bf4887f, window-
  width-sensitive classifier heuristics like fea1d83, etc).
  Output format is strict — empty sections must be omitted, every
  Blocking/Suggestion item must cite file:line.

* `scripts/pr-review/allowed_tools.txt`
  Read / Grep / Glob / Bash(gh pr diff:*) / Bash(rg:*) / …
  No Edit, no Write, no unrestricted Bash(*) — the agent is
  read-only.

* `scripts/pr-review/post_review.py`
  Parses the agent's markdown for which sections are populated,
  picks the gh-review event accordingly (Blocking →
  REQUEST_CHANGES, Suggestions/Questions only → COMMENT, none →
  APPROVE), posts via `gh pr review`. Falls back to a plain
  `gh pr comment` when the bot can't review its own PRs.

* `docs/pr-review-agent.md`
  Architecture + ops notes — runner setup, cost/latency budget,
  failure modes, phasing.

## Model routing

Agent → LiteLLM (172.16.103.81:4200) → GLM-5 SGLang (172.16.103.81
:9000). LiteLLM rewrites the Anthropic-shaped
`claude-3-5-sonnet-20241022` calls onto GLM-5, which has
sglang's `glm47` tool-call parser configured. No new infrastructure
needed.

## Phasing

Phase 1 (this PR): manual + post-CI auto-trigger. Test on a few
real PRs to calibrate the prompt. Phase 2: tune prompt against a
labeled set of past PRs + reviewer comments. Phase 3: structured
inline review comments once line numbers prove reliable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr-review.yml     | 107 ++++++++++++++++++++
 docs/pr-review-agent.md             | 152 ++++++++++++++++++++++++++++
 scripts/pr-review/allowed_tools.txt |  17 ++++
 scripts/pr-review/post_review.py    | 140 +++++++++++++++++++++++++
 scripts/pr-review/prompt.md         | 138 +++++++++++++++++++++++++
 scripts/pr-review/run_review.sh     |  75 ++++++++++++++
 6 files changed, 629 insertions(+)
 create mode 100644 .github/workflows/pr-review.yml
 create mode 100644 docs/pr-review-agent.md
 create mode 100644 scripts/pr-review/allowed_tools.txt
 create mode 100755 scripts/pr-review/post_review.py
 create mode 100644 scripts/pr-review/prompt.md
 create mode 100755 scripts/pr-review/run_review.sh

diff --git a/.github/workflows/pr-review.yml b/.github/workflows/pr-review.yml
new file mode 100644
index 00000000..d24a927f
--- /dev/null
+++ b/.github/workflows/pr-review.yml
@@ -0,0 +1,107 @@
+name: pr-review
+
+# Headless code-review agent. Fires AFTER the `ci` workflow completes
+# successfully on a PR — that gate keeps the agent from wasting GLM-5
+# cycles on PRs that don't even compile, and ensures the review the
+# author sees is "tests are green, here's what to look at next".
+#
+# Manual `workflow_dispatch` is kept as a fallback for re-runs.
+#
+# Runs on the wuneng self-hosted runner. Reaches LiteLLM at
+# 172.16.103.81:4200 over the internal libvirt bridge; LiteLLM
+# rewrites the Anthropic-shaped request onto the GLM-5 backend
+# (SGLang :9000). See docs/pr-review-agent.md.
+
+on:
+  workflow_run:
+    workflows: ["ci"]
+    types: [completed]
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: PR number to review
+        required: true
+        type: number
+
+concurrency:
+  # One review per PR. A re-trigger cancels an in-flight review of
+  # the same PR so we don't post conflicting comments.
+  group: pr-review-${{ github.event.workflow_run.pull_requests[0].number || inputs.pr_number }}
+  cancel-in-progress: true
+
+jobs:
+  review:
+    # Gate the workflow_run path on:
+    #   * CI actually succeeded (we never review red builds)
+    #   * the upstream run was triggered by a PR (push events have no
+    #     PR to comment on)
+    #   * pull_requests[] is populated (empty for fork PRs — by design
+    #     we only review same-repo PRs from this internal repo)
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      (
+        github.event.workflow_run.conclusion == 'success' &&
+        github.event.workflow_run.event == 'pull_request' &&
+        github.event.workflow_run.pull_requests[0] != null
+      )
+    runs-on: [self-hosted, tokenscope]
+    timeout-minutes: 30
+    env:
+      # Agent → LiteLLM → GLM-5. LiteLLM is configured to rewrite
+      # claude-3-5-sonnet-20241022 onto the GLM-5 backend.
+      ANTHROPIC_BASE_URL: http://172.16.103.81:4200
+      ANTHROPIC_API_KEY: dummy
+      ANTHROPIC_MODEL: claude-3-5-sonnet-20241022
+    steps:
+      - name: Resolve PR + head SHA
+        id: pr
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            PR_NUMBER="${{ inputs.pr_number }}"
+          else
+            PR_NUMBER="${{ github.event.workflow_run.pull_requests[0].number }}"
+          fi
+          if [ -z "$PR_NUMBER" ] || [ "$PR_NUMBER" = "null" ]; then
+            echo "::error::could not resolve PR number"
+            exit 1
+          fi
+          eval "$(gh pr view "$PR_NUMBER" \
+            --repo "$GITHUB_REPOSITORY" \
+            --json headRefOid,baseRefName \
+            --jq '"HEAD_SHA=\(.headRefOid)\nBASE_REF=\(.baseRefName)"')"
+          {
+            echo "pr_number=$PR_NUMBER"
+            echo "head_sha=$HEAD_SHA"
+            echo "base_ref=$BASE_REF"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Checkout PR head
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.pr.outputs.head_sha }}
+          fetch-depth: 0
+
+      - name: Fetch base for diff
+        run: git fetch origin "${{ steps.pr.outputs.base_ref }}" --depth 200
+
+      - name: Run review agent
+        id: review
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ steps.pr.outputs.pr_number }}
+        run: bash scripts/pr-review/run_review.sh "$PR_NUMBER"
+
+      - name: Post review
+        if: always()
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ steps.pr.outputs.pr_number }}
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          AGENT_EXIT: ${{ steps.review.outcome }}
+        run: python3 scripts/pr-review/post_review.py "$PR_NUMBER"
+
+      - name: Cleanup transient files
+        if: always()
+        run: rm -f /tmp/pr-review-*.md /tmp/pr-review-*.log /tmp/pr-review-*.json || true
diff --git a/docs/pr-review-agent.md b/docs/pr-review-agent.md
new file mode 100644
index 00000000..e543f35b
--- /dev/null
+++ b/docs/pr-review-agent.md
@@ -0,0 +1,152 @@
+# PR review agent
+
+A headless code-review agent that fires after CI passes and before
+a human picks up the PR. The goal isn't to replace human review — it's
+to surface the obvious-in-hindsight stuff (schema mirror drift,
+body-column scans, missing queryKey deps, classifier rules sensitive
+to window width, etc.) so the human reviewer arrives at a PR with the
+"easy 80%" already triaged.
+
+## Architecture
+
+```
+GitHub                                  wuneng VM tokenscope-ci
+┌──────────────┐  ci passes (workflow_run)  ┌─────────────────────────┐
+│  PR opened   │ ─────────────────────────► │ self-hosted GH runner   │
+│  PR sync     │                            │  ┌───────────────────┐  │
+│              │                            │  │ pr-review.yml     │  │
+│              │                            │  │  ↓                │  │
+│              │                            │  │ run_review.sh ────┼──┼─► LiteLLM :4200
+│              │                            │  │   claude -p       │  │     ↓
+│              │                            │  │   read-only tools │  │   SGLang GLM-5 :9000
+│              │                            │  │  ↓                │  │
+│              │  gh pr review              │  │ post_review.py    │  │
+│              │ ◄──────────────────────────┤  └───────────────────┘  │
+└──────────────┘                            └─────────────────────────┘
+```
+
+## Components
+
+* `.github/workflows/pr-review.yml`
+  Trigger: `workflow_run` on the `ci` workflow's `completed` event,
+  gated on `conclusion == 'success'` and `event == 'pull_request'`.
+  Also accepts `workflow_dispatch` for manual re-runs (`gh workflow
+  run pr-review.yml -f pr_number=27`).
+* `scripts/pr-review/run_review.sh`
+  Substitutes `PR_NUMBER` / `HEAD_SHA` / `BASE_REF` into the prompt
+  template, pre-flights LiteLLM, runs `claude -p` with the read-only
+  tool allowlist + 1800 s outer timeout, writes the model's stdout to
+  `/tmp/pr-review-${N}-out.md`.
+* `scripts/pr-review/prompt.md`
+  Instructional prompt. Encodes repo facts the agent has to know
+  before reading the diff (crate map, schema mirror rules, repo's
+  history of footguns) and the strict output format the parser
+  expects.
+* `scripts/pr-review/allowed_tools.txt`
+  Explicit allowlist — `Read`, `Grep`, `Glob`, and a few inspection
+  Bash patterns. No `Edit`, no `Write`, no unrestricted `Bash(*)`.
+* `scripts/pr-review/post_review.py`
+  Reads the agent's output, picks the review event from the section
+  population (`Blocking` → REQUEST_CHANGES, `Suggestions`/`Questions`
+  → COMMENT, none → APPROVE), and hands it to `gh pr review`. Falls
+  back to a plain `gh pr comment` if the bot can't review the PR
+  (e.g. it authored it).
+
+## Trigger sequence
+
+```
+1. PR opened / synchronize
+2. `ci` workflow runs (cargo test, console build, …)
+3. `ci` completes with success
+4. `workflow_run` fires `pr-review` (this workflow)
+5. pr-review checks out the PR head, runs the agent
+6. agent posts a single PR review (APPROVE / COMMENT / REQUEST_CHANGES)
+```
+
+If CI fails, the review agent never runs. That's intentional — there's
+no value paying for a review on a PR that won't build.
+
+## Manual re-run
+
+```
+gh workflow run pr-review.yml -f pr_number=27
+```
+
+The `concurrency` block ensures a manual re-trigger cancels any
+in-flight review of the same PR — no duplicate comments.
+
+## Self-hosted runner expectations
+
+The `tokenscope` self-hosted runner on wuneng's `tokenscope-ci` VM
+needs:
+
+1. **Claude Code CLI** installed and on `$PATH`:
+   ```
+   npm i -g @anthropic-ai/claude-code
+   ```
+2. **GitHub CLI** authenticated:
+   ```
+   gh auth login         # one-time, as the `tokenscope-review-bot` account
+   ```
+3. **Python 3** (default `python3` is fine — `post_review.py` uses
+   stdlib only).
+4. **Network path** to LiteLLM at `172.16.103.81:4200` (the VM is on
+   wuneng's libvirt bridge, so this works out of the box).
+
+The workflow exports `ANTHROPIC_BASE_URL` / `ANTHROPIC_API_KEY` /
+`ANTHROPIC_MODEL` per-job. LiteLLM rewrites
+`claude-3-5-sonnet-20241022` onto GLM-5.
+
+## Cost / latency
+
+GLM-5 runs on-prem (GPUs 4-7 of wuneng, served by SGLang). No
+per-request cost; the constraint is GPU minutes.
+
+| PR size | Files | Input tokens | Output tokens | Wall clock |
+|---|---|---|---|---|
+| Small | 1–2 | 20–40 K | 3–6 K | 2–3 min |
+| Medium | 5–10 | 60–150 K | 8–15 K | 5–8 min |
+| Large | 30+ | 250–500 K | 15–25 K | 15–25 min |
+
+Concurrency cap: GH Actions `concurrency` is per-PR, but the runner
+itself is single-tenant (one job at a time). Multiple PRs serialize
+naturally. If we hit a "many PRs at once" pattern we can raise the
+runner's job-slot count, but two concurrent reviews is the ceiling
+before we start crowding training jobs on the same box.
+
+## Tuning the prompt
+
+The "Things this repo has been bitten by" section in `prompt.md` is
+the most valuable knob. Every time the agent misses a class of bug
+the human reviewer catches, add a one-line entry. Every time the
+agent flags a non-issue often enough to be annoying, refine or
+remove the corresponding entry.
+
+The prompt is intentionally repo-specific. A vanilla "review this
+diff" prompt produces generic style notes; the value of a per-repo
+agent is encoded prior knowledge about the repo's own historic
+footguns.
+
+## Failure modes
+
+| Failure | What happens | Mitigation |
+|---|---|---|
+| LiteLLM down | Pre-flight curl fails, `run_review.sh` exits 2 | `post_review.py` posts a brief "agent failed" comment with link to workflow log; PR is not blocked |
+| Agent loops | `timeout 1800` kills the agent | Same: failure comment, no block |
+| GLM-5 returns garbage / no `### Summary` | `run_review.sh` appends a warning to the output | `post_review.py` still posts it as COMMENT — the agent's broken output is visible, which is signal |
+| Bot can't `--approve` its own PR | `gh pr review` rc != 0 | `post_review.py` falls back to `gh pr comment` |
+| Schema mirror miss inside the agent | Agent under-reports | Add the missed signature to `prompt.md` § "Things this repo has been bitten by" — encode the lesson |
+
+## Phasing
+
+* **Phase 1 (this PR)**: scaffolding. `workflow_run` trigger gated on
+  CI success. Manual `workflow_dispatch` for re-runs. Test on a few
+  real PRs.
+* **Phase 2**: collect a calibration set of past PRs + human review
+  comments. Tune the prompt to converge with reviewer judgment. Add
+  a nightly self-check workflow that re-runs against a canonical
+  test PR and alerts on schema drift.
+* **Phase 3**: structured inline comments (`gh pr review
+  --comment line=...`) once we trust the line numbers in the agent's
+  output. Today the agent emits `file:line` references in markdown
+  and reviewers click through manually — fine for v1.
diff --git a/scripts/pr-review/allowed_tools.txt b/scripts/pr-review/allowed_tools.txt
new file mode 100644
index 00000000..89f62bce
--- /dev/null
+++ b/scripts/pr-review/allowed_tools.txt
@@ -0,0 +1,17 @@
+Read
+Grep
+Glob
+Bash(gh pr diff:*)
+Bash(gh pr view:*)
+Bash(gh pr checks:*)
+Bash(git diff:*)
+Bash(git log:*)
+Bash(git show:*)
+Bash(git blame:*)
+Bash(rg:*)
+Bash(find:*)
+Bash(wc:*)
+Bash(head:*)
+Bash(tail:*)
+Bash(cat:*)
+Bash(ls:*)
diff --git a/scripts/pr-review/post_review.py b/scripts/pr-review/post_review.py
new file mode 100755
index 00000000..7e09c0f4
--- /dev/null
+++ b/scripts/pr-review/post_review.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""Post the agent's review markdown to the PR.
+
+Reads /tmp/pr-review-${PR_NUMBER}-out.md, parses for sections, picks
+the review event (`APPROVE` / `COMMENT` / `REQUEST_CHANGES`), and
+hands it to `gh pr review`. Falls back to a plain comment if `gh pr
+review` is unavailable (e.g. the bot account lacks review rights on
+its own PRs).
+
+Always exits 0 — failing to post a review shouldn't fail the
+workflow run; the workflow logs already capture the agent output.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+PR_NUMBER = sys.argv[1] if len(sys.argv) > 1 else os.environ.get("PR_NUMBER")
+if not PR_NUMBER:
+    print("ERROR: PR_NUMBER missing", file=sys.stderr)
+    sys.exit(0)
+
+OUT_PATH = Path(f"/tmp/pr-review-{PR_NUMBER}-out.md")
+RUN_URL = os.environ.get("RUN_URL", "")
+AGENT_EXIT = os.environ.get("AGENT_EXIT", "")  # "success" / "failure" / ""
+
+
+def read_review() -> str:
+    if not OUT_PATH.exists():
+        return ""
+    return OUT_PATH.read_text(errors="replace").strip()
+
+
+def section_nonempty(body: str, heading: str) -> bool:
+    """True if `### <heading>` exists and has at least one non-blank
+    line of content before the next `### ` or end-of-document."""
+    pat = re.compile(
+        rf"^###\s+{re.escape(heading)}\s*\n(.*?)(?=^###\s+|\Z)",
+        re.MULTILINE | re.DOTALL,
+    )
+    m = pat.search(body)
+    if not m:
+        return False
+    inner = m.group(1).strip()
+    return bool(inner)
+
+
+def pick_event(body: str) -> str:
+    """Choose the gh-review event from section presence.
+
+    Priority:
+      * agent explicitly said "REQUEST_CHANGES" / "APPROVE" / "COMMENT"
+        in the Summary → trust the agent
+      * else: Blocking → REQUEST_CHANGES; Suggestions/Questions only →
+        COMMENT; nothing → APPROVE.
+    """
+    summary_pat = re.compile(
+        r"^###\s+Summary\s*\n(.*?)(?=^###\s+|\Z)",
+        re.MULTILINE | re.DOTALL,
+    )
+    m = summary_pat.search(body)
+    summary = (m.group(1) if m else "").upper()
+    for token in ("REQUEST_CHANGES", "APPROVE", "COMMENT"):
+        if token in summary:
+            return token
+    if section_nonempty(body, "Blocking"):
+        return "REQUEST_CHANGES"
+    if section_nonempty(body, "Suggestions") or section_nonempty(body, "Questions"):
+        return "COMMENT"
+    return "APPROVE"
+
+
+EVENT_FLAG = {
+    "APPROVE": "--approve",
+    "COMMENT": "--comment",
+    "REQUEST_CHANGES": "--request-changes",
+}
+
+
+def post_via_gh_review(number: str, event: str, body: str) -> int:
+    cmd = [
+        "gh", "pr", "review", number,
+        EVENT_FLAG[event],
+        "--body", body,
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode != 0:
+        sys.stderr.write(
+            f"gh pr review failed (event={event}): {proc.stderr}\n"
+        )
+    return proc.returncode
+
+
+def post_via_comment(number: str, body: str) -> int:
+    cmd = ["gh", "pr", "comment", number, "--body", body]
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode != 0:
+        sys.stderr.write(f"gh pr comment failed: {proc.stderr}\n")
+    return proc.returncode
+
+
+def main() -> int:
+    body = read_review()
+    if not body:
+        if AGENT_EXIT == "failure":
+            body = (
+                "### Summary\n"
+                "Agent run failed before producing output. "
+                f"See [workflow run]({RUN_URL}) for the agent log."
+            )
+        else:
+            print("no review body — skipping post")
+            return 0
+
+    footer = (
+        "\n\n---\n"
+        "🤖 Reviewed by **glm-5** via LiteLLM • "
+        f"[workflow run]({RUN_URL})"
+    )
+    full = body + footer
+
+    event = pick_event(body)
+    print(f"posting review event={event} ({len(full)} bytes)")
+
+    # gh pr review refuses to let the same user `--approve` their own
+    # PR. Fall back to a plain comment in that case.
+    rc = post_via_gh_review(PR_NUMBER, event, full)
+    if rc != 0:
+        sys.stderr.write("falling back to plain comment\n")
+        post_via_comment(PR_NUMBER, full)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/pr-review/prompt.md b/scripts/pr-review/prompt.md
new file mode 100644
index 00000000..c35fc71d
--- /dev/null
+++ b/scripts/pr-review/prompt.md
@@ -0,0 +1,138 @@
+You are a code review agent for the **TokenScope** repository (Rust
+workspace + React console).
+
+You are reviewing PR **#${PR_NUMBER}** (head `${HEAD_SHA}`) against
+base branch `${BASE_REF}`. CI has already passed — your job is NOT
+to verify the build, it's to read the diff like a careful human
+reviewer would and surface the issues a reviewer should look at
+before clicking Approve.
+
+## Repo facts you need
+
+- **Rust workspace**: `server/Cargo.toml`. Crates worth knowing:
+  - `ts-storage` (trait + types) + `ts-storage-duckdb` (impl). All
+    persisted data lives in DuckDB.
+  - `ts-api` (axum HTTP).
+  - `ts-llm` (wire-api detection + LlmCall extraction).
+  - `ts-protocol` (TCP/HTTP joiner — capture pipeline).
+  - `ts-turn` (agent-turn assembly + pair sweeper).
+  - `ts-proxy` (built-in MITM forward proxy).
+  - `server/app/tokenscope` (binary entry point).
+- **Console**: `console/src/`. React 19 + TypeScript + tanstack-query
+  + recharts + tailwind. `app.tsx` registers routes.
+- **Schemas**: `server/ts-storage-duckdb/src/schema.rs` for tables.
+  `console/src/types/api.ts` for TS mirrors.
+- **Build natively**: cross-platform `.next` / `dist` builds break in
+  production — verify any deploy-relevant change builds on Linux.
+
+## Things this repo has been bitten by — actively look for these
+
+When you see code that smells like one of these, call it out as
+**Blocking** or at minimum **Suggestion**:
+
+- **Body-column scan over a wide window.** `arg_max(body,
+  LENGTH(body))` / `MAX(body)` over `llm_calls` body columns on a
+  7-day window materialises 5+ GB. See commit `bf4887f` for the
+  canonical fix (top-N row_number + body-shape filter in Rust).
+- **Stale prompt-cache key.** TanStack queryKey must include every
+  filter that changes results; missing key entries silently serve
+  prior data.
+- **Cross-platform console build.** Anything that touches
+  `console/package.json`, `vite.config.ts`, or build scripts: confirm
+  it still works on the Linux runner.
+- **Schema drift between Rust + TS.** A new field in `ServiceRow`
+  (Rust serde) must mirror in `console/src/types/api.ts`; otherwise
+  the UI silently sees undefined.
+- **Window-width-sensitive heuristics.** Classifiers that aggregate
+  over the user's selected window can flip output between short and
+  long windows — see `apps.rs` removing the ≥3-models rule.
+- **Capture-pipeline regressions.** Changes to `ts-protocol/tcp.rs`
+  or `joiner.rs` can drop legitimate exchanges silently. Look for
+  changed predicates / early returns.
+- **Mode-switch on shared mutable state without a fence.** If
+  pair_sweeper / proxy_sweeper assignment changes, search for any
+  code that reads `metadata.proxy.*` and may now see a different
+  shape.
+
+## Method — do exactly this, in this order
+
+1. **Get the diff**: `gh pr diff ${PR_NUMBER} --patch`. Skim once
+   for shape (size, languages touched).
+
+2. **For each changed file** (cap at 25 if there are more — list the
+   skipped in your output):
+   - `Read` the full current file. Don't review on diff context
+     alone — surrounding code often reveals subtler bugs.
+   - For changed public functions/types, `Grep` for callers /
+     consumers (use the symbol name). Verify the change doesn't
+     silently break a downstream invariant.
+
+3. **For SQL touched in `*-duckdb` crates**: trace the join + filter
+   shape against `schema.rs`. If the query uses `LENGTH(body)` or
+   `MAX(body)` or `arg_max(body, ...)`, that's a body-scan smell —
+   flag it.
+
+4. **For console changes**: confirm
+   - any new route is registered in `console/src/app.tsx`
+   - any new API call in a hook has its `queryKey` include all
+     varying inputs
+   - the Rust serializer field names match the TS interface
+
+5. **For new public Rust types/fns**: confirm they're added to the
+   crate's `pub use` re-exports if they cross a crate boundary.
+
+6. **Look at the commit messages.** The author's commit messages are
+   evidence about intent — does the diff match what the commit
+   claims? Mismatches go in `### Questions`.
+
+## Output format — strict
+
+A single markdown document. Sections in this order. **Omit a section
+entirely if it would be empty** — never write "no issues found" or
+"N/A".
+
+```
+### Summary
+2–3 sentences. What the PR does and your overall take. End with an
+explicit recommendation: APPROVE / COMMENT / REQUEST_CHANGES.
+
+### Blocking
+- [ ] **path/to/file.rs:42** — One-line issue. Why it must be fixed
+  before merge: …
+- [ ] **path/to/other.ts:88** — …
+
+### Suggestions
+- **path/to/file.rs:120** — Non-blocking improvement. …
+- (cap at 8 items; pick the highest-value ones)
+
+### Questions
+- Why does X happen at file:line — is it intentional?
+- …
+
+### Verified
+- Schema mirror: `ServiceRow` Rust↔TS matches.
+- Caller compatibility: searched `query_services` callers — only
+  consumer is the API route, signature still fits.
+- (List the specific checks you actually ran. Don't pad.)
+```
+
+## Hard rules
+
+- **Cite specific `file:line`** for every Blocking / Suggestion item.
+  No vague "this file has issues". A reviewer must be able to click
+  through.
+- **Don't invent.** If you can't find a concrete problem in a section,
+  leave the section out. Empty `### Suggestions` is fine and
+  expected for clean PRs.
+- **Don't summarize what each file does.** The diff already shows
+  that. Spend tokens on the gotchas.
+- **Don't propose new features.** This is a review of the proposed
+  change, not a redesign opportunity.
+- **Don't run modifying commands.** You have read-only tools. Never
+  attempt `git commit`, `cargo build` writes, file edits, or anything
+  that touches state.
+- **Cap your investigation**: ≤ 60 turns. If you hit the cap, dump
+  what you have — partial reviews are still useful.
+
+Now start. Your first tool call should be `Bash: gh pr diff
+${PR_NUMBER} --patch`.
diff --git a/scripts/pr-review/run_review.sh b/scripts/pr-review/run_review.sh
new file mode 100755
index 00000000..1497ff56
--- /dev/null
+++ b/scripts/pr-review/run_review.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# Orchestrate one PR review:
+#   1) export PR_NUMBER / HEAD_SHA / BASE_REF for the prompt template
+#   2) substitute them into prompt.md
+#   3) run `claude -p` in print mode with the read-only tool allowlist
+#   4) drop the model's stdout into /tmp/pr-review-${N}-out.md for
+#      post_review.py to consume
+#
+# Exits non-zero only on infrastructure failure (LiteLLM unreachable,
+# claude binary missing, etc). The post-review step inspects the
+# output file content to decide review verdict.
+
+set -euo pipefail
+
+PR_NUMBER="${1:?usage: $0 <pr_number>}"
+WORKDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+OUT="/tmp/pr-review-${PR_NUMBER}-out.md"
+LOG="/tmp/pr-review-${PR_NUMBER}-agent.log"
+PROMPT="/tmp/pr-review-${PR_NUMBER}-prompt.md"
+
+# Resolve PR metadata for the prompt. The workflow has already
+# checked out the head SHA — pull base_ref + head_sha back out of
+# git so this script is also runnable locally (`bash run_review.sh
+# 27` from a checkout) for development / debugging.
+HEAD_SHA="$(git rev-parse HEAD)"
+BASE_REF="$(gh pr view "$PR_NUMBER" --json baseRefName --jq .baseRefName)"
+
+export PR_NUMBER HEAD_SHA BASE_REF
+envsubst < "$WORKDIR/prompt.md" > "$PROMPT"
+
+# Pre-flight: verify LiteLLM is reachable. A fast curl is cheaper
+# than waiting for the agent to time out per-turn.
+if ! curl -fsS --max-time 5 "${ANTHROPIC_BASE_URL:-}/v1/models" >/dev/null 2>&1; then
+  echo "ERROR: LiteLLM proxy unreachable at ${ANTHROPIC_BASE_URL:-<unset>}" \
+    | tee "$OUT" >&2
+  exit 2
+fi
+
+# Build the tool allowlist as a single comma-separated string (the
+# format `claude --allowed-tools` accepts).
+ALLOWED_TOOLS="$(grep -v '^#' "$WORKDIR/allowed_tools.txt" \
+  | grep -v '^[[:space:]]*$' \
+  | paste -sd, -)"
+
+# Headless agent run. The 1800 s outer cap is a hard fence — if the
+# model loops we'd rather post a "review timed out" than wedge the
+# workflow.
+timeout 1800 claude \
+  --print \
+  --model "${ANTHROPIC_MODEL:-claude-3-5-sonnet-20241022}" \
+  --max-turns 60 \
+  --output-format text \
+  --permission-mode acceptEdits \
+  --allowed-tools "$ALLOWED_TOOLS" \
+  "$(cat "$PROMPT")" \
+  > "$OUT" \
+  2> "$LOG" \
+  || {
+    rc=$?
+    echo "ERROR: agent exited with code $rc" >> "$LOG"
+    if [ ! -s "$OUT" ]; then
+      printf '### Summary\nAgent run failed (exit %d). See workflow logs.\n' \
+        "$rc" > "$OUT"
+    fi
+    exit "$rc"
+  }
+
+# Sanity: non-empty markdown with at least a `### Summary` heading.
+if ! grep -q '^### Summary' "$OUT"; then
+  echo "WARN: agent output missing ### Summary heading" >> "$LOG"
+  printf '\n\n---\n_Agent output was missing required heading._\n' >> "$OUT"
+fi
+
+echo "review written to $OUT ($(wc -c < "$OUT") bytes)"

From b24e6cb67d77eb36ced0b408e94ea8e0dfe9a830 Mon Sep 17 00:00:00 2001
From: Vader Yang <vader.yang@netis.com>
Date: Wed, 20 May 2026 16:02:54 +0800
Subject: [PATCH 2/2] ci: add pr-review-probe workflow for runner prereq check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

One-off probe runnable via workflow_dispatch. Verifies the
self-hosted runner has:
  * claude CLI on PATH (Claude Code)
  * gh CLI installed + authenticated
  * python3 + envsubst
  * network path to LiteLLM at 172.16.103.81:4200
  * round-trip claude → LiteLLM → GLM-5 returns expected token

Run before merging pr-review.yml or after re-imaging the runner so
prereq failures surface as clearly-labeled step failures instead of
opaque agent crashes in the real review path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr-review-probe.yml | 72 +++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 .github/workflows/pr-review-probe.yml

diff --git a/.github/workflows/pr-review-probe.yml b/.github/workflows/pr-review-probe.yml
new file mode 100644
index 00000000..da62bc97
--- /dev/null
+++ b/.github/workflows/pr-review-probe.yml
@@ -0,0 +1,72 @@
+name: pr-review-probe
+
+# One-off probe that verifies the self-hosted runner has the
+# prerequisites the pr-review workflow needs. Run manually before
+# merging the pr-review feature, or any time the runner is reimaged.
+# Reports each check as a step so failure points are obvious in the
+# workflow log.
+
+on:
+  workflow_dispatch:
+
+jobs:
+  probe:
+    runs-on: [self-hosted, tokenscope]
+    timeout-minutes: 5
+    env:
+      ANTHROPIC_BASE_URL: http://172.16.103.81:4200
+      ANTHROPIC_API_KEY: dummy
+      ANTHROPIC_MODEL: claude-3-5-sonnet-20241022
+    steps:
+      - name: Runner host
+        run: |
+          echo "hostname: $(hostname)"
+          echo "user: $(whoami)"
+          echo "pwd: $(pwd)"
+          echo "uname: $(uname -a)"
+
+      - name: PATH
+        run: echo "$PATH" | tr ':' '\n'
+
+      - name: Claude Code CLI present
+        run: |
+          if command -v claude >/dev/null 2>&1; then
+            echo "claude: $(which claude)"
+            claude --version || echo "(no --version)"
+          else
+            echo "::error::claude CLI not found on PATH"
+            echo "install with: npm i -g @anthropic-ai/claude-code"
+            exit 1
+          fi
+
+      - name: gh CLI authenticated
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if ! command -v gh >/dev/null 2>&1; then
+            echo "::error::gh CLI not found on PATH"
+            exit 1
+          fi
+          gh auth status || true
+          gh api repos/${{ github.repository }} --jq .full_name
+
+      - name: Python 3 + envsubst
+        run: |
+          python3 --version
+          command -v envsubst || { echo "::error::envsubst missing (apt install gettext-base)"; exit 1; }
+
+      - name: LiteLLM reachable
+        run: |
+          if ! curl -fsS --max-time 5 "$ANTHROPIC_BASE_URL/v1/models" >/dev/null; then
+            echo "::error::LiteLLM unreachable at $ANTHROPIC_BASE_URL"
+            curl -v --max-time 5 "$ANTHROPIC_BASE_URL/v1/models" || true
+            exit 1
+          fi
+          curl -s "$ANTHROPIC_BASE_URL/v1/models" | head -c 400
+          echo
+
+      - name: Round-trip claude → LiteLLM → GLM-5
+        run: |
+          OUT="$(timeout 60 claude --print --model "$ANTHROPIC_MODEL" --max-turns 1 --output-format text "say PONG, nothing else" 2>&1)"
+          echo "agent response: $OUT"
+          echo "$OUT" | grep -qi PONG || { echo "::error::no PONG in agent output"; exit 1; }