From 075503a038c845eb31ddc464be774cffdc397c82 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 18 May 2026 12:26:50 +0200 Subject: [PATCH 1/4] feat: add /review-loop dual-reviewer slash command Adds the review-loop slash command: Claude and Codex each review the working-tree diff independently while fanning out into their own subagents, a Codex fixer repairs every valid finding from both reviews and hardens it with tests, and the loop repeats with fresh agent context until both reviewers independently sign off on the same post-fix state. - commands/review-loop.md: harness-agnostic command spec - prpm.json: register review-loop as a slash-command package Pairs with the workflow at AgentWorkforce/workflows repeatable/review-loop/workflow.ts. Co-Authored-By: Claude Opus 4.7 (1M context) --- commands/review-loop.md | 79 +++++++++++++++++++++++++++++++++++++++++ prpm.json | 19 ++++++++++ 2 files changed, 98 insertions(+) create mode 100644 commands/review-loop.md diff --git a/commands/review-loop.md b/commands/review-loop.md new file mode 100644 index 0000000..4a42b76 --- /dev/null +++ b/commands/review-loop.md @@ -0,0 +1,79 @@ +--- +description: Run the dual-reviewer (Claude + Codex) subagent-fanned review/fix loop until both reviewers independently sign off +argument-hint: [base-ref] [--max N] [--pr N] +allowed-tools: Read, Write, Edit, Bash, Glob, Grep +--- + +# Review Loop + +Run an extensive code-review loop on the current working-tree changes. Claude +and Codex each review independently — fanning out into their own subagents — +a Codex fixer repairs every valid finding from both reviews, and the loop +repeats with fresh agent context until **both** reviewers sign off. Harness- +agnostic: works under Claude Code, Codex, OpenCode, Droid, Gemini, or any +harness that supports markdown slash commands. + +**Args:** $ARGUMENTS + +## Instructions + +1. **Load the skills.** Read `writing-agent-relay-workflows` and + `review-fix-signoff-loop`. After `npx prpm install + @agent-relay/writing-agent-relay-workflows @agent-relay/review-fix-signoff-loop`, + look in `.claude/skills//SKILL.md` or `.agents/skills//SKILL.md`; + when developing inside the skills repo, read `skills//SKILL.md`. These + document the SDK surface, the fresh-context iteration pattern, the strict + dual-reviewer verdict contract, and blocked-state handling that this loop + depends on. Do not improvise the loop semantics. + +2. **Parse arguments from `$ARGUMENTS`:** + - Optional positional `[base-ref]` → exported as `REVIEW_BASE`. If omitted, + the workflow auto-detects (merge-base with `origin/main`/`main`, else + `HEAD~1`). + - Optional `--max N` → exported as `MAX_REVIEW_ITERATIONS` (default 5). + - Optional `--pr N` → exported as `REVIEW_PR_NUMBER` for the printed + signoff-comment command. + +3. **Locate the workflow.** It ships in the `workflows` repo at + `repeatable/review-loop/workflow.ts`. If the current repo does not contain + it, fetch it from `https://github.com/AgentWorkforce/workflows` + (`repeatable/review-loop/`) or install the published package, then run it + from there. Never paraphrase the workflow into ad-hoc agent calls — run the + actual file so the deterministic gates and fresh-context loop execute. + +4. **Dry-run first.** `agent-relay run --dry-run + repeatable/review-loop/workflow.ts` and confirm `Validation: PASS` before + the real run. Surface any validation error verbatim and stop. + +5. **Run the loop:** + ``` + REVIEW_BASE= MAX_REVIEW_ITERATIONS= REVIEW_PR_NUMBER= \ + agent-relay run repeatable/review-loop/workflow.ts + ``` + Only set the env vars the user actually supplied. + +6. **Report back.** Read `.workflow-artifacts/review-loop/SIGNOFF.md` and + report: final status (`SIGNED_OFF` / `BLOCKED` / not-signed-off), iteration + count, and the path to per-iteration `claude-review.md` / `codex-review.md` + / `review-fix-report.md`. If `BLOCKED_NO_COMMIT.md` exists, surface its + exact evidence and the safe retry command — do not claim success. + +## Output Contract + +- One-line result: final status + iterations run + dual-signoff yes/no. +- Path to `SIGNOFF.md` and the latest iteration's review/fix artifacts. +- If a PR number was supplied and the loop signed off, the exact + `gh pr comment --body-file .workflow-artifacts/review-loop/SIGNOFF.md` + command. + +## Constraints + +- Run the real workflow file — do not reimplement the loop inline. +- The loop reviews and repairs the working tree only. It does **not** commit, + push, or open a PR; do not add that yourself unless the user asks. +- Signoff requires **both** Claude and Codex at + `VERDICT: COMPREHENSIVELY_SATISFIED`. A single reviewer's pass is not signoff. +- Both reviewers must fan out into subagents — the workflow already mandates + this in its task prompts; do not weaken those prompts. +- A blocked or budget-exhausted run is a non-zero outcome. Report it honestly; + never fabricate a signoff. diff --git a/prpm.json b/prpm.json index 723dd07..b08d3e0 100644 --- a/prpm.json +++ b/prpm.json @@ -178,6 +178,25 @@ "files": [ "commands/spawn.md" ] + }, + { + "name": "review-loop", + "version": "1.0.0", + "description": "Slash command that runs a robust dual-reviewer code-review loop - Claude and Codex each review the working-tree diff independently while fanning out into their own subagents, a Codex fixer repairs every valid finding from both reviews and hardens it with tests, and the loop repeats with fresh agent context until both reviewers independently sign off on the same post-fix state", + "format": "claude", + "subtype": "slash-command", + "tags": [ + "slash-command", + "code-review", + "review-loop", + "agent-relay", + "multi-agent", + "claude-codex", + "subagents" + ], + "files": [ + "commands/review-loop.md" + ] } ], "collections": [ From aadeff53790bfe57b0cf10242fdbdef188465066 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 18 May 2026 15:28:18 +0200 Subject: [PATCH 2/4] docs: address CodeRabbit feedback on /review-loop command - Require pinning the fetched workflow to an immutable ref (published package version, git tag/release, or commit SHA); never fall back to the default branch silently. (CodeRabbit: deterministic-run concern) - Add `bash` language to the run-loop fenced block (markdownlint MD040). Co-Authored-By: Claude Opus 4.7 (1M context) --- commands/review-loop.md | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/commands/review-loop.md b/commands/review-loop.md index 4a42b76..1ca7f35 100644 --- a/commands/review-loop.md +++ b/commands/review-loop.md @@ -34,19 +34,29 @@ harness that supports markdown slash commands. - Optional `--pr N` → exported as `REVIEW_PR_NUMBER` for the printed signoff-comment command. -3. **Locate the workflow.** It ships in the `workflows` repo at - `repeatable/review-loop/workflow.ts`. If the current repo does not contain - it, fetch it from `https://github.com/AgentWorkforce/workflows` - (`repeatable/review-loop/`) or install the published package, then run it - from there. Never paraphrase the workflow into ad-hoc agent calls — run the - actual file so the deterministic gates and fresh-context loop execute. +3. **Locate the workflow — pin to an immutable ref.** It ships in the + `workflows` repo at `repeatable/review-loop/workflow.ts`. If the current + repo does not contain it, do **not** fetch from a moving branch: pull it + from an immutable ref so the run is deterministic and not subject to + unreviewed behavior changes. In order of preference: + 1. an installed published package at a pinned version (e.g. + `npx prpm install @agent-relay/review-loop@`); + 2. a Git **tag** or **release**: + `https://raw.githubusercontent.com/AgentWorkforce/workflows//repeatable/review-loop/workflow.ts`; + 3. a specific **commit SHA** (never `main`/`HEAD`): + `…/AgentWorkforce/workflows//repeatable/review-loop/workflow.ts`. + If no pinned ref/version is available, ask the user which tag or commit to + use and stop — do not silently fall back to the default branch. Record the + exact ref used in the run summary. Never paraphrase the workflow into + ad-hoc agent calls — run the actual file so the deterministic gates and + fresh-context loop execute. 4. **Dry-run first.** `agent-relay run --dry-run repeatable/review-loop/workflow.ts` and confirm `Validation: PASS` before the real run. Surface any validation error verbatim and stop. 5. **Run the loop:** - ``` + ```bash REVIEW_BASE= MAX_REVIEW_ITERATIONS= REVIEW_PR_NUMBER= \ agent-relay run repeatable/review-loop/workflow.ts ``` From aaac09e00ffc14e0bfaa3730587e0c5aed4f3d8a Mon Sep 17 00:00:00 2001 From: Khaliq Date: Tue, 19 May 2026 12:15:37 +0200 Subject: [PATCH 3/4] fix: agent-relay CLI precheck + skill review feedback - commands/review-loop.md: add step verifying the agent-relay CLI is available before dry-run/launch; use full raw.githubusercontent URL for the commit-SHA example (cubic PR #49 P3) - commands/create-workflow.md: same agent-relay CLI verification step - writing-agent-relay-workflows: fix implementation-reconcile race by depending on the impl agent steps (failOnError:false) instead of a shared 'context' dep; restore pr_url to the valid verification-types row in the Common Mistakes table (relay PR #912 feedback) Co-Authored-By: Claude Opus 4.7 (1M context) --- commands/create-workflow.md | 8 +++++++- commands/review-loop.md | 17 +++++++++++++---- skills/writing-agent-relay-workflows/SKILL.md | 18 ++++++++++++++---- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/commands/create-workflow.md b/commands/create-workflow.md index 39e592a..b5745d0 100644 --- a/commands/create-workflow.md +++ b/commands/create-workflow.md @@ -27,7 +27,13 @@ Scaffold a new multi-agent workflow that runs on the agent-relay broker. This co 5. **Provide a runnable example.** Output one minimal end-to-end example that demonstrates feeding `$ARGUMENTS` into the workflow and shows the expected `{{steps.*.output}}` shape at each stage. -6. **Integration notes.** End with a short checklist for wiring into the existing workload-router (where the workflow file lives, how it's registered, how to dry-run, how to launch via `agent-relay`). +6. **Verify the `agent-relay` CLI is available.** Before recommending any dry-run or launch command, confirm the runtime that will execute the workflow is installed: + ```bash + command -v agent-relay || npx agent-relay --version + ``` + If neither resolves, note that it must be installed (`npm install -g agent-relay`, or invoke via `npx agent-relay …`) and surface that in the integration notes rather than assuming it is present. + +7. **Integration notes.** End with a short checklist for wiring into the existing workload-router (where the workflow file lives, how it's registered, how to dry-run, how to launch via `agent-relay`). ## Output Contract diff --git a/commands/review-loop.md b/commands/review-loop.md index 1ca7f35..2d8be5f 100644 --- a/commands/review-loop.md +++ b/commands/review-loop.md @@ -44,25 +44,34 @@ harness that supports markdown slash commands. 2. a Git **tag** or **release**: `https://raw.githubusercontent.com/AgentWorkforce/workflows//repeatable/review-loop/workflow.ts`; 3. a specific **commit SHA** (never `main`/`HEAD`): - `…/AgentWorkforce/workflows//repeatable/review-loop/workflow.ts`. + `https://raw.githubusercontent.com/AgentWorkforce/workflows//repeatable/review-loop/workflow.ts`. If no pinned ref/version is available, ask the user which tag or commit to use and stop — do not silently fall back to the default branch. Record the exact ref used in the run summary. Never paraphrase the workflow into ad-hoc agent calls — run the actual file so the deterministic gates and fresh-context loop execute. -4. **Dry-run first.** `agent-relay run --dry-run +4. **Verify the `agent-relay` CLI is available.** This loop is executed by the + `agent-relay` runtime, so confirm it before any run: + ```bash + command -v agent-relay || npx agent-relay --version + ``` + If neither resolves, install it (`npm install -g agent-relay`, or use + `npx agent-relay …` for the commands below) and re-verify before + proceeding. Do not attempt the dry-run or the loop without a working CLI. + +5. **Dry-run first.** `agent-relay run --dry-run repeatable/review-loop/workflow.ts` and confirm `Validation: PASS` before the real run. Surface any validation error verbatim and stop. -5. **Run the loop:** +6. **Run the loop:** ```bash REVIEW_BASE= MAX_REVIEW_ITERATIONS= REVIEW_PR_NUMBER= \ agent-relay run repeatable/review-loop/workflow.ts ``` Only set the env vars the user actually supplied. -6. **Report back.** Read `.workflow-artifacts/review-loop/SIGNOFF.md` and +7. **Report back.** Read `.workflow-artifacts/review-loop/SIGNOFF.md` and report: final status (`SIGNED_OFF` / `BLOCKED` / not-signed-off), iteration count, and the path to per-iteration `claude-review.md` / `codex-review.md` / `review-fix-report.md`. If `BLOCKED_NO_COMMIT.md` exists, surface its diff --git a/skills/writing-agent-relay-workflows/SKILL.md b/skills/writing-agent-relay-workflows/SKILL.md index 8125656..74dc3b0 100644 --- a/skills/writing-agent-relay-workflows/SKILL.md +++ b/skills/writing-agent-relay-workflows/SKILL.md @@ -1827,15 +1827,20 @@ For long rollouts, keep the critical path evidence-based: agent: 'impl-runtime', dependsOn: ['context'], task: 'Implement the runtime slice and write .workflow-artifacts/runtime.md', + failOnError: false, // transport failure is advisory, not a hard gate }) .step('adapter-implementation', { agent: 'impl-adapters', dependsOn: ['context'], task: 'Implement adapter wiring and write .workflow-artifacts/adapters.md', + failOnError: false, // transport failure is advisory, not a hard gate }) .step('implementation-reconcile', { type: 'deterministic', - dependsOn: ['context'], + // Depend on the agent steps so reconcile runs AFTER they finish (not in + // parallel via a shared 'context' dep). They are failOnError:false above, + // so a transport failure stays advisory while ordering is preserved. + dependsOn: ['runtime-implementation', 'adapter-implementation'], command: `git status --short -- packages/core packages/*/src/writeback.ts scripts tests .workflow-artifacts test -f scripts/verify-e2e.mjs || echo "MISSING_E2E" test -f packages/core/src/runtime/router.ts || echo "MISSING_ROUTER"`, @@ -1857,8 +1862,13 @@ test -f packages/core/src/runtime/router.ts || echo "MISSING_ROUTER"`, }) ``` -Implementation agents may still run and coordinate on a channel, but tests -depend on the reconcile/repair path. That makes transport failures advisory. +The reconcile step still waits for the implementation agents (it depends on +them, so its `git status`/`test -f` checks see their output), but those agent +steps are `failOnError: false`, so a transport failure does not block the +reconcile/repair path. Keep the ordering constraint — do not drop the agent +deps to `['context']`, or reconcile races the agents and always reports files +missing. Tests then depend on the reconcile/repair path, making transport +failures advisory. If final deterministic evidence is still red after repair, write a blocked artifact and skip commit/PR creation rather than failing the workflow. @@ -1976,7 +1986,7 @@ When you set `.pattern('supervisor')` (or `hub-spoke`, `fan-out`), the runner au | Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | | `pattern('single')` on cloud runner | Not supported — use `dag` | | `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | -| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | +| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom`, `pr_url` are valid | | Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | | Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | | Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | From cfcb1248504d1c8667ec469f665b799c4c51a106 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Tue, 19 May 2026 12:19:09 +0200 Subject: [PATCH 4/4] chore: bump prpm versions for changed packages - writing-agent-relay-workflows 1.6.12 -> 1.6.13 (reconcile race + pr_url table fix) - create-workflow 1.0.2 -> 1.0.3 (agent-relay CLI precheck step) - review-loop 1.0.0 -> 1.0.1 (agent-relay CLI precheck step + full raw.githubusercontent URL; also covers the un-bumped aadeff5 docs edit) Co-Authored-By: Claude Opus 4.7 (1M context) --- prpm.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prpm.json b/prpm.json index b08d3e0..b6d2b40 100644 --- a/prpm.json +++ b/prpm.json @@ -28,7 +28,7 @@ }, { "name": "writing-agent-relay-workflows", - "version": "1.6.12", + "version": "1.6.13", "description": "Use when building multi-agent workflows with relay broker-sdk. Covers conversation vs pipeline coordination, WorkflowBuilder/DAG steps, agents, {{steps.X.output}} chaining, repairable verification gates, evidence-based completion, mandatory Claude-then-Codex fresh-eyes review/fix loops with test hardening, channels, chat-native recipes, error handling, event listeners, step sizing, lead+workers teams, and parallel waves.", "format": "claude", "subtype": "skill", @@ -145,7 +145,7 @@ }, { "name": "create-workflow", - "version": "1.0.2", + "version": "1.0.3", "description": "Slash command to scaffold a new agent-relay multi-agent workflow using the writing-agent-relay-workflows skill - harness-agnostic, model-agnostic prompts, picks the right swarm pattern, and emits a runnable WorkflowBuilder file with verify gates, mandatory Claude-then-Codex review/fix loops with test hardening, and integration notes", "format": "claude", "subtype": "slash-command", @@ -181,7 +181,7 @@ }, { "name": "review-loop", - "version": "1.0.0", + "version": "1.0.1", "description": "Slash command that runs a robust dual-reviewer code-review loop - Claude and Codex each review the working-tree diff independently while fanning out into their own subagents, a Codex fixer repairs every valid finding from both reviews and hardens it with tests, and the loop repeats with fresh agent context until both reviewers independently sign off on the same post-fix state", "format": "claude", "subtype": "slash-command",