diff --git a/.agents/skills/choosing-swarm-patterns b/.agents/skills/choosing-swarm-patterns deleted file mode 120000 index 93ca845fd..000000000 --- a/.agents/skills/choosing-swarm-patterns +++ /dev/null @@ -1 +0,0 @@ -../../skills/choosing-swarm-patterns \ No newline at end of file diff --git a/.agents/skills/choosing-swarm-patterns/SKILL.md b/.agents/skills/choosing-swarm-patterns/SKILL.md new file mode 100644 index 000000000..86878c3ba --- /dev/null +++ b/.agents/skills/choosing-swarm-patterns/SKILL.md @@ -0,0 +1,261 @@ +--- +name: choosing-swarm-patterns +description: Use when coordinating multiple AI agents and need to pick the right orchestration pattern - covers 10 patterns (fan-out, pipeline, hub-spoke, consensus, mesh, handoff, cascade, dag, debate, hierarchical) with decision framework and reflection protocol +--- + +### Overview + +10 orchestration patterns for multi-agent workflows. Pick the simplest pattern that solves the problem — add complexity only when the system proves it's insufficient. + +### Quick Decision Framework + +#### ``` + +``` +Is the task independent per agent? + YES → fan-out (parallel workers) + +Does each step need the previous step's output? + YES → Is it strictly linear? + YES → pipeline + NO → dag (parallel where possible) + +Does a coordinator need to stay alive and adapt? + YES → Is there one level of management? + YES → hub-spoke + NO → hierarchical (multi-level) + +Is the task about making a decision? + YES → Do agents need to argue opposing sides? + YES → debate (adversarial) + NO → consensus (cooperative voting) + +Does the right specialist emerge during processing? + YES → handoff (dynamic routing) + +Do all agents need to freely collaborate? + YES → mesh (peer-to-peer) + +Is cost the primary concern? + YES → cascade (cheap model first, escalate if needed) +``` + + +### Pattern Reference + +| # | Pattern | Topology | Agents | Best For | +|---|---------|----------|--------|----------| +| 1 | **fan-out** | Star (SDK center) | N parallel | Independent subtasks (reviews, research, tests) | +| 2 | **pipeline** | Linear chain | Sequential | Ordered stages (design → implement → test) | +| 3 | **hub-spoke** | Star (live hub) | 1 lead + N workers | Dynamic coordination, lead reviews/adjusts | +| 4 | **consensus** | Broadcast + vote | N voters | Architecture decisions, approval gates | +| 5 | **mesh** | Fully connected | N peers | Brainstorming, collaborative debugging | +| 6 | **handoff** | Routing chain | 1 active at a time | Triage, specialist routing, support flows | +| 7 | **cascade** | Tiered escalation | Cheapest → most capable | Cost optimization, production workloads | +| 8 | **dag** | Dependency graph | Parallel + joins | Complex projects with mixed dependencies | +| 9 | **debate** | Adversarial rounds | 2+ debaters + judge | Rigorous evaluation, architecture trade-offs | +| 10 | **hierarchical** | Tree (multi-level) | Lead → coordinators → workers | Large teams, domain separation | + +### Pattern Details + +#### 1. fan-out — Parallel Workers + +```ts +fanOut([ + { task: "Review auth.ts", name: "AuthReviewer" }, + { task: "Review db.ts", name: "DbReviewer" }, +], { cli: "claude" }); +``` + +#### 2. pipeline — Sequential Stages + +```ts +pipeline([ + { task: "Design the API schema", name: "Designer" }, + { task: "Implement the endpoints", name: "Implementer" }, + { task: "Write integration tests", name: "Tester" }, +]); +``` + +#### 3. hub-spoke — Persistent Coordinator + +```ts +hubAndSpoke({ + hub: { task: "Coordinate building a REST API", name: "Lead" }, + workers: [ + { task: "Build database models", name: "DbWorker" }, + { task: "Build route handlers", name: "ApiWorker" }, + ], +}); +``` + +#### 4. consensus — Cooperative Voting + +```ts +consensus({ + proposal: "Should we migrate to Fastify?", + voters: [ + { task: "Evaluate performance", name: "PerfExpert" }, + { task: "Evaluate DX", name: "DxExpert" }, + ], + consensusType: "majority", +}); +``` + +#### 5. mesh — Peer Collaboration + +```ts +mesh({ + goal: "Debug the auth flow returning 500", + agents: [ + { task: "Check server logs", name: "LogAnalyst" }, + { task: "Review auth code", name: "CodeReviewer" }, + { task: "Write repro test", name: "Tester" }, + ], +}); +``` + +#### 6. handoff — Dynamic Routing + +```ts +handoff({ + entryPoint: { task: "Triage the request", name: "Triage" }, + routes: [ + { agent: { task: "Handle billing", name: "Billing" }, condition: "billing, payment" }, + { agent: { task: "Handle tech issues", name: "TechSupport" }, condition: "error, bug" }, + ], + maxHandoffs: 3, +}); +``` + +#### 7. cascade — Cost-Aware Escalation + +```ts +cascade({ + tiers: [ + { agent: { task: "Answer this", cli: "claude" }, confidenceThreshold: 0.7, costWeight: 1 }, + { agent: { task: "Answer this", cli: "claude" }, confidenceThreshold: 0.85, costWeight: 5 }, + { agent: { task: "Answer this", cli: "claude" }, costWeight: 20 }, + ], +}); +``` + +#### 8. dag — Directed Acyclic Graph + +```ts +dag({ + nodes: [ + { id: "scaffold", task: "Create project scaffold" }, + { id: "frontend", task: "Build React UI", dependsOn: ["scaffold"] }, + { id: "backend", task: "Build API", dependsOn: ["scaffold"] }, + { id: "integrate", task: "Wire together", dependsOn: ["frontend", "backend"] }, + ], + maxConcurrency: 3, +}); +``` + +#### 9. debate — Adversarial Refinement + +```ts +debate({ + topic: "Monorepo vs polyrepo for the new platform?", + debaters: [ + { task: "Argue for monorepo", position: "monorepo" }, + { task: "Argue for polyrepo", position: "polyrepo" }, + ], + judge: { task: "Judge and decide", name: "ArchJudge" }, + maxRounds: 3, +}); +``` + +#### 10. hierarchical — Multi-Level Delegation + +```ts +hierarchical({ + agents: [ + { id: "lead", task: "Coordinate full-stack app", role: "lead" }, + { id: "fe-coord", task: "Manage frontend", role: "coordinator", reportsTo: "lead" }, + { id: "be-coord", task: "Manage backend", role: "coordinator", reportsTo: "lead" }, + { id: "fe-dev", task: "Build components", role: "worker", reportsTo: "fe-coord" }, + { id: "be-dev", task: "Build API", role: "worker", reportsTo: "be-coord" }, + ], +}); +``` + + +### Reflection Protocol + +#### All patterns support reflection — periodic synthesis that enables course correction. Enabled via `reflectionThreshold` on WorkflowOptions. + +```ts +{ + reflectionThreshold: 10, // trigger after 10 agent messages + onReflect: async (ctx) => { + // Examine ctx.recentMessages, ctx.agentStatuses + // Return adjustments or null + }, +} +``` + + +### Common Mistakes + +| Mistake | Why It Fails | Fix | +|---------|-------------|-----| +| Using mesh for everything | O(n^2) communication, debugging nightmare | Use hub-spoke for most tasks | +| Pipeline for independent work | Sequential bottleneck | Use fan-out or dag | +| Hub-spoke for simple parallel tasks | Hub is unnecessary overhead | Use fan-out | +| Consensus for non-decisions | Voting on implementation tasks wastes time | Use hub-spoke, let lead decide | +| No circuit breaker on handoff | Infinite routing loops | Always set maxHandoffs | +| Cascade without confidence parsing | Agents don't report confidence | Convention injection handles this | +| Hierarchical for 3 agents | Management overhead exceeds benefit | Use hub-spoke for small teams | + +### DAG Executor — Proven Pattern + +#### Agent Completion: Detect → Release → Collect + +``` +Agent writes summary file → Orchestrator polls (5s) → Detects new mtime → + Reads summary → Calls client.release(agent) → agent_exited fires → Node marked complete +``` + +#### State & Resume + +```ts +saveState(completed, depsOutput, results, startTime); +// Restart with --resume to skip completed nodes +``` + + +### YAML Workflow Definition + +#### Any pattern can be defined in YAML for portability: + +```yaml +version: "1.0" +name: feature-dev +pattern: hub-spoke +agents: + - id: lead + role: lead + cli: claude + - id: developer + role: worker + cli: codex + reportsTo: lead +steps: + - id: plan + agent: lead + prompt: "Create a development plan for: {{task}}" + expects: "PLAN_COMPLETE" + - id: implement + agent: developer + dependsOn: [plan] + prompt: "Implement: {{steps.plan.output}}" + expects: "DONE" +reflection: + enabled: true + threshold: 10 +trajectory: + enabled: true +``` diff --git a/.agents/skills/running-headless-orchestrator b/.agents/skills/running-headless-orchestrator deleted file mode 120000 index 55d0eaa16..000000000 --- a/.agents/skills/running-headless-orchestrator +++ /dev/null @@ -1 +0,0 @@ -../../skills/running-headless-orchestrator \ No newline at end of file diff --git a/.agents/skills/running-headless-orchestrator/SKILL.md b/.agents/skills/running-headless-orchestrator/SKILL.md new file mode 100644 index 000000000..b79b3e560 --- /dev/null +++ b/.agents/skills/running-headless-orchestrator/SKILL.md @@ -0,0 +1,213 @@ +--- +name: running-headless-orchestrator +description: Use when an agent needs to self-bootstrap agent-relay and autonomously manage a team of workers - covers infrastructure startup, agent spawning, lifecycle monitoring, and team coordination without human intervention +--- + +### Overview + +A headless orchestrator is an agent that: +1. Starts the relay infrastructure itself (`agent-relay up`) +2. Spawns and manages worker agents +3. Monitors agent lifecycle events +4. Coordinates work without human intervention + +### When to Use + +- Agent needs full control over its worker team +- No human available to run `agent-relay up` manually +- Agent should manage agent lifecycle autonomously +- Building self-contained multi-agent systems + +### Quick Reference + +| Step | Command/Tool | +|------|--------------| +| Verify installation | `which agent-relay` or `npx agent-relay --version` | +| Start infrastructure | `agent-relay up --no-dashboard --verbose` | +| Check status | `agent-relay status` | +| Spawn worker | `agent-relay spawn Worker1 claude "task"` | +| List workers | `agent-relay who` | +| View worker logs | `agent-relay agents:logs Worker1` | +| Send message | `agent-relay send Worker1 "message"` | +| Release worker | `agent-relay release Worker1` | +| Stop infrastructure | `agent-relay down` | + +### Bootstrap Flow + +#### Step 0: Verify Installation + +```bash +# Check if agent-relay is installed +which agent-relay || npx agent-relay --version + +# If not installed, install globally +npm install -g agent-relay + +# Or use npx (no install needed) +npx agent-relay --version +``` + +#### Step 1: Start Infrastructure + +```bash +# Preferred: run broker in foreground/stdin mode and keep the session open +agent-relay up --no-dashboard --verbose +``` + +#### Step 2: Spawn Workers via MCP + +``` +mcp__relaycast__agent_add( + name: "Worker1", + cli: "claude", + task: "Implement the authentication module following the existing patterns" +) +``` + +#### Step 3: Monitor and Coordinate + +``` +# Check for worker messages +mcp__relaycast__message_inbox_check() + +# Send follow-up instructions +mcp__relaycast__message_dm_send(to: "Worker1", text: "Also add unit tests") + +# List active workers +mcp__relaycast__agent_list() +``` + +#### Step 4: Release Workers + +``` +mcp__relaycast__agent_remove(name: "Worker1") +``` + +#### Step 5: Shutdown (optional) + +```bash +agent-relay down +``` + + +### CLI Commands for Orchestration + +#### Spawning and Messaging + +```bash +# Spawn a worker +agent-relay spawn Worker1 claude "Implement auth module" + +# Send message to worker +agent-relay send Worker1 "Add unit tests too" + +# Release when done +agent-relay release Worker1 +``` + +#### Monitoring Workers (Essential) + +```bash +# Show currently active agents +agent-relay who + +# View real-time output from a worker (critical for debugging) +agent-relay agents:logs Worker1 + +# View recent message history +agent-relay history + +# Check overall system status +agent-relay status +``` + +#### Troubleshooting + +```bash +# Kill unresponsive worker +agent-relay agents:kill Worker1 + +# Check system health +agent-relay health + +# View metrics +agent-relay metrics +``` + + +### Orchestrator Instructions Template + +#### Give your lead agent these instructions: + +``` +You are an autonomous orchestrator. Bootstrap the relay infrastructure and manage a team of workers. + +## Step 1: Verify Installation +Run: which agent-relay || npx agent-relay --version +If not found: npm install -g agent-relay + +## Step 2: Start Infrastructure +Run: agent-relay up --no-dashboard --verbose +Verify: agent-relay status (should show "running") + +## Step 3: Manage Your Team + +Spawn workers: + agent-relay spawn Worker1 claude "Task description" + +Monitor workers (do this frequently): + agent-relay who # List active workers + agent-relay agents:logs Worker1 # View worker output/progress + +Send instructions: + agent-relay send Worker1 "Additional instructions" + +Release when done: + agent-relay release Worker1 + +## Protocol +- Workers will ACK when they receive tasks +- Workers will send DONE when complete +- Use `agent-relay agents:logs ` to monitor progress +- Use `agent-relay history` to see message flow +``` + + +### Lifecycle Events + +The broker emits these events (available via SDK subscriptions): + +| Event | When | +|-------|------| +| `agent_spawned` | Worker process started | +| `worker_ready` | Worker connected to relay | +| `agent_idle` | Worker waiting for messages | +| `agent_exited` | Worker process ended | +| `agent_permanently_dead` | Worker failed after retries | + +### Common Mistakes + +| Mistake | Fix | +|---------|-----| +| `agent-relay: command not found` | Install with `npm i -g agent-relay` or use `npx agent-relay` | +| "Nested session" error | Broker handles this automatically; if running manually, unset `CLAUDECODE` env var | +| Broker not starting | Try `agent-relay down` first, then use foreground `agent-relay up --no-dashboard --verbose` to see readiness logs | +| Background broker says started but status is STOPPED | Prefer foreground mode for that project/session; background mode may have detached incorrectly | +| Spawn fails with `internal reply dropped` | Broker likely is not fully ready yet; wait for readiness, then spawn one worker first | +| Workers not connecting | Ensure broker started; check `agent-relay who` and worker logs | +| Not monitoring workers | Use `agent-relay agents:logs ` frequently to track progress | +| Workers seem stuck | Check logs with `agent-relay agents:logs ` for errors | +| Messages not delivered | Check `agent-relay history` to verify message flow | + +### Overview + +Self-bootstrap agent-relay infrastructure and manage a team of agents autonomously. + +### Prerequisites + +#### 1. **agent-relay CLI installed** (required) + +```bash +npm install -g agent-relay + # Or use npx without installing: npx agent-relay +``` diff --git a/.agents/skills/using-agent-relay b/.agents/skills/using-agent-relay deleted file mode 120000 index b2e02cab0..000000000 --- a/.agents/skills/using-agent-relay +++ /dev/null @@ -1 +0,0 @@ -../../skills/using-agent-relay \ No newline at end of file diff --git a/.agents/skills/using-agent-relay/SKILL.md b/.agents/skills/using-agent-relay/SKILL.md new file mode 100644 index 000000000..a5079aa2c --- /dev/null +++ b/.agents/skills/using-agent-relay/SKILL.md @@ -0,0 +1,217 @@ +--- +name: using-agent-relay +description: Use when coordinating multiple AI agents in real-time - provides inter-agent messaging via MCP tools +--- + +### MCP Tools Overview + +All tools use dot-notation hierarchy. Claude uses `mcp__relaycast___`, other CLIs use `relaycast..`. + +### Messaging + +| Tool (Claude / Other CLIs) | Description | +| ------------------------------------------------- | ---------------------------------------- | +| `mcp__relaycast__message_dm_send` / `relaycast.message.dm.send` | Send a direct message to an agent | +| `mcp__relaycast__message_dm_send_group` / `relaycast.message.dm.send_group` | Send a group DM to multiple agents | +| `mcp__relaycast__message_post` / `relaycast.message.post` | Post a message to a channel | +| `mcp__relaycast__message_reply` / `relaycast.message.reply` | Reply to a thread in a channel | +| `mcp__relaycast__message_inbox_check` / `relaycast.message.inbox.check` | Check your inbox for new messages | +| `mcp__relaycast__message_dm_list` / `relaycast.message.dm.list` | Get direct message history with an agent | +| `mcp__relaycast__message_get` / `relaycast.message.get` | Get messages from a channel | +| `mcp__relaycast__thread_get` / `relaycast.thread.get` | Get a thread's messages | +| `mcp__relaycast__message_search` / `relaycast.message.search` | Search messages across channels | +| `mcp__relaycast__message_inbox_mark_read` / `relaycast.message.inbox.mark_read` | Mark messages as read | + +### Agents + +| Tool (Claude / Other CLIs) | Description | +| ------------------------------------------------- | ---------------------------------------- | +| `mcp__relaycast__agent_add` / `relaycast.agent.add` | Spawn/add a new agent | +| `mcp__relaycast__agent_remove` / `relaycast.agent.remove` | Release/remove an agent | +| `mcp__relaycast__agent_list` / `relaycast.agent.list` | List all online agents | +| `mcp__relaycast__agent_register` / `relaycast.agent.register` | Register yourself as an agent | + +### Channels + +| Tool (Claude / Other CLIs) | Description | +| ------------------------------------------------- | ---------------------------------------- | +| `mcp__relaycast__channel_create` / `relaycast.channel.create` | Create a new channel | +| `mcp__relaycast__channel_archive` / `relaycast.channel.archive` | Archive a channel | +| `mcp__relaycast__channel_list` / `relaycast.channel.list` | List all channels | +| `mcp__relaycast__channel_join` / `relaycast.channel.join` | Join a channel | +| `mcp__relaycast__channel_leave` / `relaycast.channel.leave` | Leave a channel | +| `mcp__relaycast__channel_invite` / `relaycast.channel.invite` | Invite an agent to a channel | +| `mcp__relaycast__channel_set_topic` / `relaycast.channel.set_topic` | Set a channel's topic | + +### Reactions + +| Tool (Claude / Other CLIs) | Description | +| ------------------------------------------------- | ---------------------------------------- | +| `mcp__relaycast__message_reaction_add` / `relaycast.message.reaction.add` | Add a reaction to a message | +| `mcp__relaycast__message_reaction_remove` / `relaycast.message.reaction.remove` | Remove a reaction from a message | + +### Webhooks & Subscriptions + +| Tool (Claude / Other CLIs) | Description | +| ------------------------------------------------- | ---------------------------------------- | +| `mcp__relaycast__webhook_create` / `relaycast.webhook.create` | Create a webhook | +| `mcp__relaycast__webhook_delete` / `relaycast.webhook.delete` | Delete a webhook | +| `mcp__relaycast__webhook_list` / `relaycast.webhook.list` | List webhooks | +| `mcp__relaycast__webhook_trigger` / `relaycast.webhook.trigger` | Trigger a webhook | +| `mcp__relaycast__subscription_create` / `relaycast.subscription.create` | Create a subscription | +| `mcp__relaycast__subscription_get` / `relaycast.subscription.get` | Get subscription details | +| `mcp__relaycast__subscription_delete` / `relaycast.subscription.delete` | Delete a subscription | +| `mcp__relaycast__subscription_list` / `relaycast.subscription.list` | List subscriptions | + +### Commands & Workspace + +| Tool (Claude / Other CLIs) | Description | +| ------------------------------------------------- | ---------------------------------------- | +| `mcp__relaycast__command_register` / `relaycast.command.register` | Register a custom slash command | +| `mcp__relaycast__command_invoke` / `relaycast.command.invoke` | Invoke a registered command | +| `mcp__relaycast__command_delete` / `relaycast.command.delete` | Delete a command | +| `mcp__relaycast__command_list` / `relaycast.command.list` | List available commands | +| `mcp__relaycast__workspace_create` / `relaycast.workspace.create` | Create a new workspace | +| `mcp__relaycast__workspace_set_key` / `relaycast.workspace.set_key` | Set the workspace API key | + +### Files + +| Tool (Claude / Other CLIs) | Description | +| ------------------------------------------------- | ---------------------------------------- | +| `mcp__relaycast__file_upload` / `relaycast.file.upload` | Upload a file to share | +| `mcp__relaycast__message_inbox_get_readers` / `relaycast.message.inbox.get_readers` | See who has read a message | + +### Sending Messages + +#### Direct Messages + +``` +mcp__relaycast__message_dm_send(to: "Bob", text: "Can you review my code changes?") +``` + +#### Group DMs + +``` +mcp__relaycast__message_dm_send_group(participants: ["Alice", "Bob"], text: "Sync on auth module") +``` + +#### Channel Messages + +``` +mcp__relaycast__message_post(channel: "general", text: "The API endpoints are ready") +``` + +#### Thread Replies + +``` +mcp__relaycast__message_reply(channel: "general", thread_id: "abc123", text: "Done!") +``` + + +### Communication Protocol + +#### **ACK immediately** - When you receive a task, acknowledge before starting work: + +``` +mcp__relaycast__message_dm_send(to: "Lead", text: "ACK: Brief description of task received") +``` + + +### Receiving Messages + +#### Messages appear as: + +``` +Relay message from Alice [abc123]: Content here +``` + + +### Spawning & Releasing Agents + +#### Spawn a Worker + +``` +mcp__relaycast__agent_add(name: "WorkerName", cli: "claude", task: "Task description here") +``` + +#### Release a Worker + +``` +mcp__relaycast__agent_remove(name: "WorkerName") +``` + + +### Channels + +#### Create and Join + +``` +mcp__relaycast__channel_create(name: "frontend", topic: "Frontend work") +mcp__relaycast__channel_join(channel: "frontend") +mcp__relaycast__channel_invite(channel: "frontend", agent: "Bob") +``` + +#### List and Read + +``` +mcp__relaycast__channel_list() +mcp__relaycast__message_get(channel: "general") +``` + + +### Reactions + +#### ``` + +``` +mcp__relaycast__message_reaction_add(message_id: "abc123", emoji: "thumbsup") +mcp__relaycast__message_reaction_remove(message_id: "abc123", emoji: "thumbsup") +``` + + +### Search + +#### ``` + +``` +mcp__relaycast__message_search(query: "auth module", channel: "general") +``` + + +### Checking Status + +#### ``` + +``` +mcp__relaycast__agent_list() # List online agents +mcp__relaycast__message_inbox_check() # Check for unread messages +``` + + +### CLI Commands + +#### ```bash + +```bash +agent-relay status # Check daemon status +agent-relay agents # List active agents +agent-relay agents:logs # View agent output +agent-relay agents:kill # Kill a spawned agent +agent-relay read # Read truncated message +agent-relay history # Show recent message history +``` + + +### Overview + +Real-time agent-to-agent messaging via Relaycast MCP tools. + +### Common Mistakes + +| Mistake | Fix | +| ------------------------- | ---------------------------------------------------------------- | +| Messages not sending | Use `message.inbox.check` to verify connection | +| Agent not receiving | Use `agent_list` to confirm agent is online | +| Truncated message content | `agent-relay read ` for full text | +| Wrong tool prefix | Claude: `mcp__relaycast__`, Others: `relaycast.` | +| DM vs channel confusion | Use `message.dm.send` for agents, `message.post` for channels | diff --git a/.agents/skills/writing-agent-relay-workflows b/.agents/skills/writing-agent-relay-workflows deleted file mode 120000 index 2286b4ac0..000000000 --- a/.agents/skills/writing-agent-relay-workflows +++ /dev/null @@ -1 +0,0 @@ -../../skills/writing-agent-relay-workflows \ No newline at end of file diff --git a/.agents/skills/writing-agent-relay-workflows/SKILL.md b/.agents/skills/writing-agent-relay-workflows/SKILL.md new file mode 100644 index 000000000..465cd479f --- /dev/null +++ b/.agents/skills/writing-agent-relay-workflows/SKILL.md @@ -0,0 +1,449 @@ +--- +name: writing-agent-relay-workflows +description: Use when building multi-agent workflows with the relay broker-sdk - covers the WorkflowBuilder API, DAG step dependencies, agent definitions, step output chaining via {{steps.X.output}}, verification gates, evidence-based completion, owner decisions, dedicated channels, dynamic channel management (subscribe/unsubscribe/mute/unmute), swarm patterns, error handling, event listeners, step sizing rules, authoring best practices, and the lead+workers team pattern for complex steps +--- + +### Overview + +The relay broker-sdk workflow system orchestrates multiple AI agents (Claude, Codex, Gemini, Aider, Goose) through typed DAG-based workflows. Workflows can be written in **TypeScript** (preferred), **Python**, or **YAML**. + +**Language preference:** TypeScript > Python > YAML. Use TypeScript unless the project is Python-only or a simple config-driven workflow suits YAML. + +### When to Use + +- Building multi-agent workflows with step dependencies +- Orchestrating different AI CLIs (claude, codex, gemini, aider, goose) +- Creating DAG, pipeline, fan-out, or other swarm patterns +- Needing verification gates, retries, or step output chaining +- Dynamic channel management: agents joining/leaving/muting channels mid-workflow + +### Quick Reference + +#### ```typescript + +```typescript +const { workflow } = require('@agent-relay/sdk/workflows'); + +async function main() { +const result = await workflow('my-workflow') + .description('What this workflow does') + .pattern('dag') // or 'pipeline', 'fan-out', etc. + .channel('wf-my-workflow') // dedicated channel (auto-generated if omitted) + .maxConcurrency(3) + .timeout(3_600_000) // global timeout (ms) + + .agent('lead', { cli: 'claude', role: 'Architect', retries: 2 }) + .agent('worker', { cli: 'codex', role: 'Implementer', retries: 2 }) + + .step('plan', { + agent: 'lead', + task: `Analyze the codebase and produce a plan.`, + retries: 2, + verification: { type: 'output_contains', value: 'PLAN_COMPLETE' }, + }) + .step('implement', { + agent: 'worker', + task: `Implement based on this plan:\n{{steps.plan.output}}`, + dependsOn: ['plan'], + verification: { type: 'exit_code' }, + }) + + .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) + .run({ cwd: process.cwd() }); + + console.log('Result:', result.status); +} + +main().catch(console.error); +``` + + +### ⚡ Parallelism — Design for Speed + +#### Cross-Workflow Parallelism: Wave Planning + +```bash +# BAD — sequential (14 hours for 27 workflows at ~30 min each) +agent-relay run workflows/34-sst-wiring.ts +agent-relay run workflows/35-env-config.ts +agent-relay run workflows/36-loading-states.ts +# ... one at a time + +# GOOD — parallel waves (3-4 hours for 27 workflows) +# Wave 1: independent infra (parallel) +agent-relay run workflows/34-sst-wiring.ts & +agent-relay run workflows/35-env-config.ts & +agent-relay run workflows/36-loading-states.ts & +agent-relay run workflows/37-responsive.ts & +wait +git add -A && git commit -m "Wave 1" + +# Wave 2: testing (parallel — independent test suites) +agent-relay run workflows/40-unit-tests.ts & +agent-relay run workflows/41-integration-tests.ts & +agent-relay run workflows/42-e2e-tests.ts & +wait +git add -A && git commit -m "Wave 2" +``` + +#### Declare File Scope for Planning + +```typescript +workflow('48-comparison-mode') + .packages(['web', 'core']) // monorepo packages touched + .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize + .requiresBefore(['46-admin-dashboard']) // explicit ordering constraint +``` + +#### Within-Workflow Parallelism + +```typescript +// BAD — unnecessary sequential chain +.step('fix-component-a', { agent: 'worker', dependsOn: ['review'] }) +.step('fix-component-b', { agent: 'worker', dependsOn: ['fix-component-a'] }) // why wait? + +// GOOD — parallel fan-out, merge at the end +.step('fix-component-a', { agent: 'impl-1', dependsOn: ['review'] }) +.step('fix-component-b', { agent: 'impl-2', dependsOn: ['review'] }) // same dep = parallel +.step('verify-all', { agent: 'reviewer', dependsOn: ['fix-component-a', 'fix-component-b'] }) +``` + + +### Key Concepts + +#### Verification Gates + +```typescript +verification: { type: 'exit_code' } // preferred for code-editing steps +verification: { type: 'output_contains', value: 'DONE' } // optional accelerator +verification: { type: 'file_exists', value: 'src/out.ts' } // deterministic file check +``` + +#### DAG Dependencies + +```typescript +.step('fix-types', { agent: 'worker', dependsOn: ['review'], ... }) +.step('fix-tests', { agent: 'worker', dependsOn: ['review'], ... }) +.step('final', { agent: 'lead', dependsOn: ['fix-types', 'fix-tests'], ... }) +``` + +#### SDK API + +```typescript +// Subscribe an agent to additional channels post-spawn +relay.subscribe({ agent: 'security-auditor', channels: ['review-pr-456'] }); + +// Unsubscribe — agent leaves the channel entirely +relay.unsubscribe({ agent: 'security-auditor', channels: ['general'] }); + +// Mute — agent stays subscribed (history access) but messages are NOT injected into PTY +relay.mute({ agent: 'security-auditor', channel: 'review-pr-123' }); + +// Unmute — resume PTY injection +relay.unmute({ agent: 'security-auditor', channel: 'review-pr-123' }); +``` + +#### Events + +```typescript +relay.onChannelSubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelUnsubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelMuted = (agent, channel) => { /* ... */ }; +relay.onChannelUnmuted = (agent, channel) => { /* ... */ }; +``` + + +### Agent Definition + +#### ```typescript + +```typescript +.agent('name', { + cli: 'claude' | 'codex' | 'gemini' | 'aider' | 'goose' | 'opencode' | 'droid', + role?: string, + preset?: 'lead' | 'worker' | 'reviewer' | 'analyst', + retries?: number, + model?: string, + interactive?: boolean, // default: true +}) +``` + + +### Step Definition + +#### Agent Steps + +```typescript +.step('name', { + agent: string, + task: string, // supports {{var}} and {{steps.NAME.output}} + dependsOn?: string[], + verification?: VerificationCheck, + retries?: number, +}) +``` + +#### Deterministic Steps (Shell Commands) + +```typescript +.step('verify-files', { + type: 'deterministic', + command: 'test -f src/auth.ts && echo "FILE_EXISTS"', + dependsOn: ['implement'], + captureOutput: true, + failOnError: true, +}) +``` + + +### Common Patterns + +#### Pipeline (sequential handoff) + +```typescript +.pattern('pipeline') +.step('analyze', { agent: 'analyst', task: '...' }) +.step('implement', { agent: 'dev', task: '{{steps.analyze.output}}', dependsOn: ['analyze'] }) +.step('test', { agent: 'tester', task: '{{steps.implement.output}}', dependsOn: ['implement'] }) +``` + +#### Error Handling + +```typescript +.onError('fail-fast') // stop on first failure (default) +.onError('continue') // skip failed branches, continue others +.onError('retry', { maxRetries: 3, retryDelayMs: 5000 }) +``` + + +### Multi-File Edit Pattern + +#### When a workflow needs to modify multiple existing files, **use one agent step per file** with a deterministic verify gate after each. Agents reliably edit 1-2 files per step but fail on 4+. + +```yaml +steps: + - name: read-types + type: deterministic + command: cat src/types.ts + captureOutput: true + + - name: edit-types + agent: dev + dependsOn: [read-types] + task: | + Edit src/types.ts. Current contents: + {{steps.read-types.output}} + Add 'pending' to the Status union type. + Only edit this one file. + verification: + type: exit_code + + - name: verify-types + type: deterministic + dependsOn: [edit-types] + command: 'if git diff --quiet src/types.ts; then echo "NOT MODIFIED"; exit 1; fi; echo "OK"' + failOnError: true + + - name: read-service + type: deterministic + dependsOn: [verify-types] + command: cat src/service.ts + captureOutput: true + + - name: edit-service + agent: dev + dependsOn: [read-service] + task: | + Edit src/service.ts. Current contents: + {{steps.read-service.output}} + Add a handlePending() method. + Only edit this one file. + verification: + type: exit_code + + - name: verify-service + type: deterministic + dependsOn: [edit-service] + command: 'if git diff --quiet src/service.ts; then echo "NOT MODIFIED"; exit 1; fi; echo "OK"' + failOnError: true + + # Deterministic commit — never rely on agents to commit + - name: commit + type: deterministic + dependsOn: [verify-service] + command: git add src/types.ts src/service.ts && git commit -m "feat: add pending status" + failOnError: true +``` + + +### File Materialization: Verify Before Proceeding + +#### After any step that creates files, add a deterministic `file_exists` check before proceeding. Non-interactive agents may exit 0 without writing anything (wrong cwd, stdout instead of disk). + +```yaml +- name: verify-files + type: deterministic + dependsOn: [impl-auth, impl-storage] + command: | + missing=0 + for f in src/auth/credentials.ts src/storage/client.ts; do + if [ ! -f "$f" ]; then echo "MISSING: $f"; missing=$((missing+1)); fi + done + if [ $missing -gt 0 ]; then echo "$missing files missing"; exit 1; fi + echo "All files present" + failOnError: true +``` + + +### DAG Deadlock Anti-Pattern + +#### ```yaml + +```yaml +# WRONG — deadlock: coordinate depends on context, work-a depends on coordinate +steps: + - name: coordinate + dependsOn: [context] # lead waits for WORKER_DONE... + - name: work-a + dependsOn: [coordinate] # ...but work-a can't start until coordinate finishes + +# RIGHT — workers and lead start in parallel +steps: + - name: context + type: deterministic + - name: work-a + dependsOn: [context] # starts with lead + - name: coordinate + dependsOn: [context] # starts with workers + - name: merge + dependsOn: [work-a, coordinate] +``` + + +### Step Sizing + +#### **One agent, one deliverable.** A step's task prompt should be 10-20 lines max. + +```yaml +# Team pattern: lead + workers on a shared channel +steps: + - name: track-lead-coord + agent: track-lead + dependsOn: [prior-step] + task: | + Lead the track on #my-track. Workers: track-worker-1, track-worker-2. + Post assignments to the channel. Review worker output. + + - name: track-worker-1-impl + agent: track-worker-1 + dependsOn: [prior-step] # same dep as lead — starts concurrently + task: | + Join #my-track. track-lead will post your assignment. + Implement the file as directed. + verification: + type: exit_code + + - name: next-step + dependsOn: [track-lead-coord] # downstream depends on lead, not workers +``` + + +### Supervisor Pattern + +When you set `.pattern('supervisor')` (or `hub-spoke`, `fan-out`), the runner auto-assigns a supervisor agent as owner for worker steps. The supervisor monitors progress, nudges idle workers, and issues `OWNER_DECISION`. + +**Auto-hardening only activates for hub patterns** — not `pipeline` or `dag`. + +| Use case | Pattern | Why | +|----------|---------|-----| +| Sequential, no monitoring | `pipeline` | Simple, no overhead | +| Workers need oversight | `supervisor` | Auto-owner monitors | +| Local/small models | `supervisor` | Supervisor catches stuck workers | +| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | + +### Concurrency + +**Cap `maxConcurrency` at 4-6.** Spawning 10+ agents simultaneously causes broker timeouts. + +| Parallel agents | `maxConcurrency` | +|-----------------|-------------------| +| 2-4 | 4 (default safe) | +| 5-10 | 5 | +| 10+ | 6-8 max | + +### Common Mistakes + +| Mistake | Fix | +|---------|-----| +| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | +| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | +| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | +| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | +| Adding exit instructions to tasks | Runner handles self-termination automatically | +| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | +| Using `general` channel | Set `.channel('wf-name')` for isolation | +| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | +| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | +| Writing 100-line task prompts | Split into lead + workers on a channel | +| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | +| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | +| Workers depending on lead step (deadlock) | Both depend on shared context step | +| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | +| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | +| Workers without `preset: 'worker'` in lead+worker flows | Add preset for clean stdout | +| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | +| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | +| `import { workflow }` (ESM) in TypeScript workflows | Use `require('@agent-relay/sdk/workflows')` — most repos are CJS | +| Top-level `await` in TypeScript | Wrap in `async function main() { ... } main().catch(console.error)` | +| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | +| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` inside async main | +| Relative import `'../workflows/builder.js'` | Use `require('@agent-relay/sdk/workflows')` | +| `pattern('single')` on cloud runner | Not supported — use `dag` | +| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | +| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | +| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | +| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | +| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | +| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | +| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | +| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | +| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | +| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | + +### YAML Alternative + +#### ```yaml + +```yaml +version: '1.0' +name: my-workflow +swarm: + pattern: dag + channel: wf-my-workflow +agents: + - name: lead + cli: claude + role: Architect + - name: worker + cli: codex + role: Implementer +workflows: + - name: default + steps: + - name: plan + agent: lead + task: 'Produce a detailed implementation plan.' + - name: implement + agent: worker + task: 'Implement: {{steps.plan.output}}' + dependsOn: [plan] + verification: + type: exit_code +``` + + +### Available Swarm Patterns + +`dag` (default), `fan-out`, `pipeline`, `hub-spoke`, `consensus`, `mesh`, `handoff`, `cascade`, `debate`, `hierarchical`, `map-reduce`, `scatter-gather`, `supervisor`, `reflection`, `red-team`, `verifier`, `auction`, `escalation`, `saga`, `circuit-breaker`, `blackboard`, `swarm` + +See skill `choosing-swarm-patterns` for pattern selection guidance. diff --git a/.claude/skills/choosing-swarm-patterns b/.claude/skills/choosing-swarm-patterns deleted file mode 120000 index 93ca845fd..000000000 --- a/.claude/skills/choosing-swarm-patterns +++ /dev/null @@ -1 +0,0 @@ -../../skills/choosing-swarm-patterns \ No newline at end of file diff --git a/skills/choosing-swarm-patterns/SKILL.md b/.claude/skills/choosing-swarm-patterns/SKILL.md similarity index 100% rename from skills/choosing-swarm-patterns/SKILL.md rename to .claude/skills/choosing-swarm-patterns/SKILL.md diff --git a/.claude/skills/running-headless-orchestrator b/.claude/skills/running-headless-orchestrator deleted file mode 120000 index 55d0eaa16..000000000 --- a/.claude/skills/running-headless-orchestrator +++ /dev/null @@ -1 +0,0 @@ -../../skills/running-headless-orchestrator \ No newline at end of file diff --git a/skills/running-headless-orchestrator/SKILL.md b/.claude/skills/running-headless-orchestrator/SKILL.md similarity index 81% rename from skills/running-headless-orchestrator/SKILL.md rename to .claude/skills/running-headless-orchestrator/SKILL.md index 9f3274e5e..fd66c97e1 100644 --- a/skills/running-headless-orchestrator/SKILL.md +++ b/.claude/skills/running-headless-orchestrator/SKILL.md @@ -27,10 +27,10 @@ A headless orchestrator is an agent that: | Step | Command/Tool | |------|--------------| | Verify installation | `which agent-relay` or `npx agent-relay --version` | -| Start infrastructure | `agent-relay up --background --no-dashboard` | +| Start infrastructure | `agent-relay up --no-dashboard --verbose` | | Check status | `agent-relay status` | | Spawn worker | `agent-relay spawn Worker1 claude "task"` | -| List workers | `agent-relay agents` | +| List workers | `agent-relay who` | | View worker logs | `agent-relay agents:logs Worker1` | | Send message | `agent-relay send Worker1 "message"` | | Release worker | `agent-relay release Worker1` | @@ -53,11 +53,17 @@ npx agent-relay --version ### Step 1: Start Infrastructure +Prefer a **foreground stdio broker** first. Background mode can be flaky in some environments and may report "started" while `agent-relay status` still shows `STOPPED`. + ```bash -# Start broker in background (no dashboard needed for headless) -agent-relay up --background --no-dashboard +# Preferred: run broker in foreground/stdin mode and keep the session open +agent-relay up --no-dashboard --verbose +``` -# Verify it's running +Verify broker readiness before spawning any workers: + +```bash +# Must show "running" before you spawn workers agent-relay status ``` @@ -76,6 +82,12 @@ mcp__relaycast__agent_add( ) ``` +CLI equivalent: + +```bash +agent-relay spawn Worker1 claude "Implement the authentication module following the existing patterns" +``` + ### Step 3: Monitor and Coordinate ``` @@ -121,8 +133,8 @@ agent-relay release Worker1 ### Monitoring Workers (Essential) ```bash -# List all active agents with status -agent-relay agents +# Show currently active agents +agent-relay who # View real-time output from a worker (critical for debugging) agent-relay agents:logs Worker1 @@ -161,7 +173,7 @@ Run: which agent-relay || npx agent-relay --version If not found: npm install -g agent-relay ## Step 2: Start Infrastructure -Run: agent-relay up --background --no-dashboard +Run: agent-relay up --no-dashboard --verbose Verify: agent-relay status (should show "running") ## Step 3: Manage Your Team @@ -170,7 +182,7 @@ Spawn workers: agent-relay spawn Worker1 claude "Task description" Monitor workers (do this frequently): - agent-relay agents # List active workers + agent-relay who # List active workers agent-relay agents:logs Worker1 # View worker output/progress Send instructions: @@ -204,8 +216,10 @@ The broker emits these events (available via SDK subscriptions): |---------|-----| | `agent-relay: command not found` | Install with `npm i -g agent-relay` or use `npx agent-relay` | | "Nested session" error | Broker handles this automatically; if running manually, unset `CLAUDECODE` env var | -| Broker not starting | Check `agent-relay status`; may need `agent-relay down` first | -| Workers not connecting | Ensure broker started; check `agent-relay agents` | +| Broker not starting | Try `agent-relay down` first, then use foreground `agent-relay up --no-dashboard --verbose` to see readiness logs | +| Background broker says started but status is STOPPED | Prefer foreground mode for that project/session; background mode may have detached incorrectly | +| Spawn fails with `internal reply dropped` | Broker likely is not fully ready yet; wait for readiness, then spawn one worker first | +| Workers not connecting | Ensure broker started; check `agent-relay who` and worker logs | | Not monitoring workers | Use `agent-relay agents:logs ` frequently to track progress | | Workers seem stuck | Check logs with `agent-relay agents:logs ` for errors | | Messages not delivered | Check `agent-relay history` to verify message flow | diff --git a/.claude/skills/using-agent-relay b/.claude/skills/using-agent-relay deleted file mode 120000 index b2e02cab0..000000000 --- a/.claude/skills/using-agent-relay +++ /dev/null @@ -1 +0,0 @@ -../../skills/using-agent-relay \ No newline at end of file diff --git a/skills/using-agent-relay/SKILL.md b/.claude/skills/using-agent-relay/SKILL.md similarity index 100% rename from skills/using-agent-relay/SKILL.md rename to .claude/skills/using-agent-relay/SKILL.md diff --git a/.claude/skills/writing-agent-relay-workflows b/.claude/skills/writing-agent-relay-workflows deleted file mode 120000 index 2286b4ac0..000000000 --- a/.claude/skills/writing-agent-relay-workflows +++ /dev/null @@ -1 +0,0 @@ -../../skills/writing-agent-relay-workflows \ No newline at end of file diff --git a/.claude/skills/writing-agent-relay-workflows/SKILL.md b/.claude/skills/writing-agent-relay-workflows/SKILL.md new file mode 100644 index 000000000..96dd8d8a9 --- /dev/null +++ b/.claude/skills/writing-agent-relay-workflows/SKILL.md @@ -0,0 +1,591 @@ +--- +name: writing-agent-relay-workflows +description: Use when building multi-agent workflows with the relay broker-sdk - covers the WorkflowBuilder API, DAG step dependencies, agent definitions, step output chaining via {{steps.X.output}}, verification gates, evidence-based completion, owner decisions, dedicated channels, dynamic channel management (subscribe/unsubscribe/mute/unmute), swarm patterns, error handling, event listeners, step sizing rules, authoring best practices, and the lead+workers team pattern for complex steps +--- + +# Writing Agent Relay Workflows + +## Overview + +The relay broker-sdk workflow system orchestrates multiple AI agents (Claude, Codex, Gemini, Aider, Goose) through typed DAG-based workflows. Workflows can be written in **TypeScript** (preferred), **Python**, or **YAML**. + +**Language preference:** TypeScript > Python > YAML. Use TypeScript unless the project is Python-only or a simple config-driven workflow suits YAML. + +## When to Use + +- Building multi-agent workflows with step dependencies +- Orchestrating different AI CLIs (claude, codex, gemini, aider, goose) +- Creating DAG, pipeline, fan-out, or other swarm patterns +- Needing verification gates, retries, or step output chaining +- Dynamic channel management: agents joining/leaving/muting channels mid-workflow + +## Quick Reference + +```typescript +const { workflow } = require('@agent-relay/sdk/workflows'); + +async function main() { +const result = await workflow('my-workflow') + .description('What this workflow does') + .pattern('dag') // or 'pipeline', 'fan-out', etc. + .channel('wf-my-workflow') // dedicated channel (auto-generated if omitted) + .maxConcurrency(3) + .timeout(3_600_000) // global timeout (ms) + + .agent('lead', { cli: 'claude', role: 'Architect', retries: 2 }) + .agent('worker', { cli: 'codex', role: 'Implementer', retries: 2 }) + + .step('plan', { + agent: 'lead', + task: `Analyze the codebase and produce a plan.`, + retries: 2, + verification: { type: 'output_contains', value: 'PLAN_COMPLETE' }, + }) + .step('implement', { + agent: 'worker', + task: `Implement based on this plan:\n{{steps.plan.output}}`, + dependsOn: ['plan'], + verification: { type: 'exit_code' }, + }) + + .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) + .run({ cwd: process.cwd() }); + + console.log('Result:', result.status); +} + +main().catch(console.error); +``` + +**Critical TypeScript rules:** +1. Use `require()`, not `import` — most projects default to CJS +2. Wrap in `async function main()` — CJS does not support top-level await +3. Use `.run({ cwd: process.cwd() })` — `createWorkflowRenderer` does not exist +4. Validate with `--dry-run` before running: `agent-relay run --dry-run workflow.ts` + +## ⚡ Parallelism — Design for Speed + +**This is the most important design consideration.** Sequential workflows waste hours. Always design for maximum parallelism. + +### Cross-Workflow Parallelism: Wave Planning + +When a project has multiple workflows, group independent ones into parallel waves: + +```bash +# BAD — sequential (14 hours for 27 workflows at ~30 min each) +agent-relay run workflows/34-sst-wiring.ts +agent-relay run workflows/35-env-config.ts +agent-relay run workflows/36-loading-states.ts +# ... one at a time + +# GOOD — parallel waves (3-4 hours for 27 workflows) +# Wave 1: independent infra (parallel) +agent-relay run workflows/34-sst-wiring.ts & +agent-relay run workflows/35-env-config.ts & +agent-relay run workflows/36-loading-states.ts & +agent-relay run workflows/37-responsive.ts & +wait +git add -A && git commit -m "Wave 1" + +# Wave 2: testing (parallel — independent test suites) +agent-relay run workflows/40-unit-tests.ts & +agent-relay run workflows/41-integration-tests.ts & +agent-relay run workflows/42-e2e-tests.ts & +wait +git add -A && git commit -m "Wave 2" +``` + +### Wave Planning Heuristics + +Two workflows can run in parallel if they don't have write-write or write-read file conflicts: + +| Touch Zone | Can Parallelize? | +|---|---| +| Different `packages/*/src/` dirs | ✅ Yes | +| Different `app/` routes | ✅ Yes | +| Same package, different subdirs | ⚠️ Usually yes | +| Same files (shared config, root package.json) | ❌ No — sequential or same wave with merge | +| Explicit dependency | ❌ No — ordered waves | + +### Declare File Scope for Planning + +Help wave planners (human or automated) understand what each workflow touches: + +```typescript +workflow('48-comparison-mode') + .packages(['web', 'core']) // monorepo packages touched + .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize + .requiresBefore(['46-admin-dashboard']) // explicit ordering constraint +``` + +### Within-Workflow Parallelism + +Use shared `dependsOn` to fan out independent sub-tasks: + +```typescript +// BAD — unnecessary sequential chain +.step('fix-component-a', { agent: 'worker', dependsOn: ['review'] }) +.step('fix-component-b', { agent: 'worker', dependsOn: ['fix-component-a'] }) // why wait? + +// GOOD — parallel fan-out, merge at the end +.step('fix-component-a', { agent: 'impl-1', dependsOn: ['review'] }) +.step('fix-component-b', { agent: 'impl-2', dependsOn: ['review'] }) // same dep = parallel +.step('verify-all', { agent: 'reviewer', dependsOn: ['fix-component-a', 'fix-component-b'] }) +``` + +### Impact + +Real-world example (Relayed — 60 workflows): +- **Sequential**: ~30 min × 60 = **30 hours** +- **Parallel waves (4-6 per wave)**: ~12 waves × 35 min = **~7 hours** (4x faster) +- **Aggressive parallelism (8-way)**: **~4 hours** (7.5x faster) + +--- + +## Key Concepts + +### Step Output Chaining + +Use `{{steps.STEP_NAME.output}}` in a downstream step's task to inject the prior step's terminal output. + +**Only chain output from clean sources:** +- Deterministic steps (shell commands — always clean) +- Non-interactive agents (`preset: 'worker'` — clean stdout) + +**Never chain from interactive agents** (`cli: 'claude'` without preset) — PTY output includes spinners, ANSI codes, and TUI chrome. Instead, have the agent write to a file, then read it in a deterministic step. + +### Verification Gates + +```typescript +verification: { type: 'exit_code' } // preferred for code-editing steps +verification: { type: 'output_contains', value: 'DONE' } // optional accelerator +verification: { type: 'file_exists', value: 'src/out.ts' } // deterministic file check +``` + +Only these four types are valid: `exit_code`, `output_contains`, `file_exists`, `custom`. Invalid types are silently ignored and fall through to process-exit auto-pass. + +**Verification token gotcha:** If the token appears in the task text, the runner requires it **twice** in output (once from task echo, once from agent). Prefer `exit_code` for code-editing steps to avoid this. + +### DAG Dependencies + +Steps with `dependsOn` wait for all listed steps. Steps with no dependencies start immediately. Steps sharing the same `dependsOn` run in parallel: + +```typescript +.step('fix-types', { agent: 'worker', dependsOn: ['review'], ... }) +.step('fix-tests', { agent: 'worker', dependsOn: ['review'], ... }) +.step('final', { agent: 'lead', dependsOn: ['fix-types', 'fix-tests'], ... }) +``` + +### Self-Termination + +Do NOT add exit instructions to task strings. The runner handles this automatically. + +### Step Completion Model + +Steps complete through a multi-signal pipeline (highest priority first): + +1. **Deterministic verification** — `exit_code`, `file_exists`, `output_contains` pass → immediate completion +2. **Owner decision** — `OWNER_DECISION: COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL` +3. **Evidence-based** — channel signals, file artifacts, clean exit code +4. **Marker fast-path** — `STEP_COMPLETE:` (optional accelerator) +5. **Process-exit fallback** — agent exits 0 with no signals → completes after grace period + +**Key principle:** No single signal is mandatory. Describe the deliverable, not what to print. + +### Dynamic Channel Management + +Agents can dynamically subscribe, unsubscribe, mute, and unmute channels **after spawn**. This eliminates the need for client-side channel filtering and manual peer fanout. + +#### SDK API + +```typescript +// Subscribe an agent to additional channels post-spawn +relay.subscribe({ agent: 'security-auditor', channels: ['review-pr-456'] }); + +// Unsubscribe — agent leaves the channel entirely +relay.unsubscribe({ agent: 'security-auditor', channels: ['general'] }); + +// Mute — agent stays subscribed (history access) but messages are NOT injected into PTY +relay.mute({ agent: 'security-auditor', channel: 'review-pr-123' }); + +// Unmute — resume PTY injection +relay.unmute({ agent: 'security-auditor', channel: 'review-pr-123' }); +``` + +Agent-level methods are also available: + +```typescript +const agent = await relay.claude.spawn({ name: 'auditor', channels: ['ch-a'] }); +await agent.subscribe(['ch-b']); // now subscribed to ch-a and ch-b +await agent.mute('ch-a'); // ch-a messages silenced (still in history) +await agent.unmute('ch-a'); // ch-a messages resume +await agent.unsubscribe(['ch-b']); // leaves ch-b +console.log(agent.channels); // ['ch-a'] +console.log(agent.mutedChannels); // [] +``` + +#### Semantics + +| Operation | Channel membership | PTY injection | History access | +|---------------|-------------------|---------------|----------------| +| `subscribe` | Yes | Yes | Yes | +| `unsubscribe` | No | No | No (leaves) | +| `mute` | Yes (stays) | No (silenced) | Yes (can query)| +| `unmute` | Yes | Yes (resumes) | Yes | + +#### Events + +```typescript +relay.onChannelSubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelUnsubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelMuted = (agent, channel) => { /* ... */ }; +relay.onChannelUnmuted = (agent, channel) => { /* ... */ }; +``` + +#### When to Use in Workflows + +- **Multi-PR chat sessions**: Agents focused on one PR can mute other PR channels to reduce noise +- **Phase transitions**: Subscribe agents to new channels as work progresses between phases +- **Team isolation**: Workers mute the main coordination channel during focused work, unmute for review +- **Dynamic fanout**: A lead subscribes workers to sub-channels at runtime based on task decomposition + +#### What This Eliminates + +With broker-managed subscriptions, you no longer need: +1. Client-side persona filtering (`personaNames.has(from)` checks) +2. Channel prefix regex for message routing +3. Manual peer fanout (iterating agents to forward messages) +4. Dedup caches for dual-path delivery + +## Agent Definition + +```typescript +.agent('name', { + cli: 'claude' | 'codex' | 'gemini' | 'aider' | 'goose' | 'opencode' | 'droid', + role?: string, + preset?: 'lead' | 'worker' | 'reviewer' | 'analyst', + retries?: number, + model?: string, + interactive?: boolean, // default: true +}) +``` + +**Post-spawn channel operations** (available on Agent instances and AgentRelay facade): + +```typescript +// Agent instance methods +agent.subscribe(channels: string[]): Promise +agent.unsubscribe(channels: string[]): Promise +agent.mute(channel: string): Promise +agent.unmute(channel: string): Promise +agent.channels: string[] // current subscribed channels +agent.mutedChannels: string[] // currently muted channels + +// AgentRelay facade methods (by agent name) +relay.subscribe({ agent: string, channels: string[] }): Promise +relay.unsubscribe({ agent: string, channels: string[] }): Promise +relay.mute({ agent: string, channel: string }): Promise +relay.unmute({ agent: string, channel: string }): Promise +``` + +| Preset | Interactive | Relay access | Use for | +| ---------- | ------------- | ------------ | ---------------------------------------------------- | +| `lead` | yes (PTY) | yes | Coordination, monitoring channels | +| `worker` | no (subprocess) | no | Bounded tasks, structured stdout | +| `reviewer` | no (subprocess) | no | Reading artifacts, producing verdicts | +| `analyst` | no (subprocess) | no | Reading code/files, writing findings | + +Non-interactive presets run via one-shot mode (`claude -p`, `codex exec`). Output is clean and available via `{{steps.X.output}}`. + +**Critical rule:** Pre-inject content into non-interactive agents. Don't ask them to read large files — pre-read in a deterministic step and inject via `{{steps.X.output}}`. + +## Step Definition + +### Agent Steps + +```typescript +.step('name', { + agent: string, + task: string, // supports {{var}} and {{steps.NAME.output}} + dependsOn?: string[], + verification?: VerificationCheck, + retries?: number, +}) +``` + +### Deterministic Steps (Shell Commands) + +```typescript +.step('verify-files', { + type: 'deterministic', + command: 'test -f src/auth.ts && echo "FILE_EXISTS"', + dependsOn: ['implement'], + captureOutput: true, + failOnError: true, +}) +``` + +Use for: file checks, reading files for injection, build/test gates, git operations. + +## Common Patterns + +### Pipeline (sequential handoff) + +```typescript +.pattern('pipeline') +.step('analyze', { agent: 'analyst', task: '...' }) +.step('implement', { agent: 'dev', task: '{{steps.analyze.output}}', dependsOn: ['analyze'] }) +.step('test', { agent: 'tester', task: '{{steps.implement.output}}', dependsOn: ['implement'] }) +``` + +### Error Handling + +```typescript +.onError('fail-fast') // stop on first failure (default) +.onError('continue') // skip failed branches, continue others +.onError('retry', { maxRetries: 3, retryDelayMs: 5000 }) +``` + +## Multi-File Edit Pattern + +When a workflow needs to modify multiple existing files, **use one agent step per file** with a deterministic verify gate after each. Agents reliably edit 1-2 files per step but fail on 4+. + +```yaml +steps: + - name: read-types + type: deterministic + command: cat src/types.ts + captureOutput: true + + - name: edit-types + agent: dev + dependsOn: [read-types] + task: | + Edit src/types.ts. Current contents: + {{steps.read-types.output}} + Add 'pending' to the Status union type. + Only edit this one file. + verification: + type: exit_code + + - name: verify-types + type: deterministic + dependsOn: [edit-types] + command: 'if git diff --quiet src/types.ts; then echo "NOT MODIFIED"; exit 1; fi; echo "OK"' + failOnError: true + + - name: read-service + type: deterministic + dependsOn: [verify-types] + command: cat src/service.ts + captureOutput: true + + - name: edit-service + agent: dev + dependsOn: [read-service] + task: | + Edit src/service.ts. Current contents: + {{steps.read-service.output}} + Add a handlePending() method. + Only edit this one file. + verification: + type: exit_code + + - name: verify-service + type: deterministic + dependsOn: [edit-service] + command: 'if git diff --quiet src/service.ts; then echo "NOT MODIFIED"; exit 1; fi; echo "OK"' + failOnError: true + + # Deterministic commit — never rely on agents to commit + - name: commit + type: deterministic + dependsOn: [verify-service] + command: git add src/types.ts src/service.ts && git commit -m "feat: add pending status" + failOnError: true +``` + +**Key rules:** +- Read the file in a deterministic step right before the edit (not all files upfront) +- Tell the agent "Only edit this one file" to prevent it touching other files +- Verify with `git diff --quiet` after each edit — fail fast if the agent didn't write +- Always commit with a deterministic step, never an agent step + +## File Materialization: Verify Before Proceeding + +After any step that creates files, add a deterministic `file_exists` check before proceeding. Non-interactive agents may exit 0 without writing anything (wrong cwd, stdout instead of disk). + +```yaml +- name: verify-files + type: deterministic + dependsOn: [impl-auth, impl-storage] + command: | + missing=0 + for f in src/auth/credentials.ts src/storage/client.ts; do + if [ ! -f "$f" ]; then echo "MISSING: $f"; missing=$((missing+1)); fi + done + if [ $missing -gt 0 ]; then echo "$missing files missing"; exit 1; fi + echo "All files present" + failOnError: true +``` + +**Rules for file-writing tasks:** +1. Use full paths from project root — say `src/auth/credentials.ts`, not `credentials.ts` +2. Add `IMPORTANT: Write the file to disk. Do NOT output to stdout.` +3. Use `file_exists` verification for creation steps (not just `exit_code`) +4. Gate all downstream steps on the verify step + +## DAG Deadlock Anti-Pattern + +```yaml +# WRONG — deadlock: coordinate depends on context, work-a depends on coordinate +steps: + - name: coordinate + dependsOn: [context] # lead waits for WORKER_DONE... + - name: work-a + dependsOn: [coordinate] # ...but work-a can't start until coordinate finishes + +# RIGHT — workers and lead start in parallel +steps: + - name: context + type: deterministic + - name: work-a + dependsOn: [context] # starts with lead + - name: coordinate + dependsOn: [context] # starts with workers + - name: merge + dependsOn: [work-a, coordinate] +``` + +**Rule:** if a lead step's task mentions downstream step names alongside waiting keywords, that's a deadlock. + +## Step Sizing + +**One agent, one deliverable.** A step's task prompt should be 10-20 lines max. + +Split into a **lead + workers team** when: +- The task requires a 50+ line prompt +- The deliverable is multiple files that must be consistent +- You need one agent to verify another's output + +```yaml +# Team pattern: lead + workers on a shared channel +steps: + - name: track-lead-coord + agent: track-lead + dependsOn: [prior-step] + task: | + Lead the track on #my-track. Workers: track-worker-1, track-worker-2. + Post assignments to the channel. Review worker output. + + - name: track-worker-1-impl + agent: track-worker-1 + dependsOn: [prior-step] # same dep as lead — starts concurrently + task: | + Join #my-track. track-lead will post your assignment. + Implement the file as directed. + verification: + type: exit_code + + - name: next-step + dependsOn: [track-lead-coord] # downstream depends on lead, not workers +``` + +## Supervisor Pattern + +When you set `.pattern('supervisor')` (or `hub-spoke`, `fan-out`), the runner auto-assigns a supervisor agent as owner for worker steps. The supervisor monitors progress, nudges idle workers, and issues `OWNER_DECISION`. + +**Auto-hardening only activates for hub patterns** — not `pipeline` or `dag`. + +| Use case | Pattern | Why | +|----------|---------|-----| +| Sequential, no monitoring | `pipeline` | Simple, no overhead | +| Workers need oversight | `supervisor` | Auto-owner monitors | +| Local/small models | `supervisor` | Supervisor catches stuck workers | +| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | + +## Concurrency + +**Cap `maxConcurrency` at 4-6.** Spawning 10+ agents simultaneously causes broker timeouts. + +| Parallel agents | `maxConcurrency` | +|-----------------|-------------------| +| 2-4 | 4 (default safe) | +| 5-10 | 5 | +| 10+ | 6-8 max | + +## Common Mistakes + +| Mistake | Fix | +|---------|-----| +| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | +| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | +| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | +| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | +| Adding exit instructions to tasks | Runner handles self-termination automatically | +| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | +| Using `general` channel | Set `.channel('wf-name')` for isolation | +| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | +| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | +| Writing 100-line task prompts | Split into lead + workers on a channel | +| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | +| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | +| Workers depending on lead step (deadlock) | Both depend on shared context step | +| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | +| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | +| Workers without `preset: 'worker'` in lead+worker flows | Add preset for clean stdout | +| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | +| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | +| `import { workflow }` (ESM) in TypeScript workflows | Use `require('@agent-relay/sdk/workflows')` — most repos are CJS | +| Top-level `await` in TypeScript | Wrap in `async function main() { ... } main().catch(console.error)` | +| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | +| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` inside async main | +| Relative import `'../workflows/builder.js'` | Use `require('@agent-relay/sdk/workflows')` | +| `pattern('single')` on cloud runner | Not supported — use `dag` | +| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | +| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | +| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | +| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | +| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | +| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | +| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | +| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | +| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | +| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | + +## YAML Alternative + +```yaml +version: '1.0' +name: my-workflow +swarm: + pattern: dag + channel: wf-my-workflow +agents: + - name: lead + cli: claude + role: Architect + - name: worker + cli: codex + role: Implementer +workflows: + - name: default + steps: + - name: plan + agent: lead + task: 'Produce a detailed implementation plan.' + - name: implement + agent: worker + task: 'Implement: {{steps.plan.output}}' + dependsOn: [plan] + verification: + type: exit_code +``` + +Run with: `agent-relay run path/to/workflow.yaml` + +## Available Swarm Patterns + +`dag` (default), `fan-out`, `pipeline`, `hub-spoke`, `consensus`, `mesh`, `handoff`, `cascade`, `debate`, `hierarchical`, `map-reduce`, `scatter-gather`, `supervisor`, `reflection`, `red-team`, `verifier`, `auction`, `escalation`, `saga`, `circuit-breaker`, `blackboard`, `swarm` + +See skill `choosing-swarm-patterns` for pattern selection guidance. diff --git a/prpm.lock b/prpm.lock index 1a8fbdd02..ef6b4ad3f 100644 --- a/prpm.lock +++ b/prpm.lock @@ -142,7 +142,87 @@ "sourceFormat": "claude", "sourceSubtype": "skill", "installedPath": ".agents/skills/creating-agent-skills-skill/SKILL.md" + }, + "@agent-relay/choosing-swarm-patterns#claude": { + "version": "1.0.0", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fchoosing-swarm-patterns/1.0.0.tar.gz", + "integrity": "sha256-2b28661abb540c56b46ad980b238589c6dcf59faaa3e66c80c72f72c01407f38", + "format": "claude", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".claude/skills/choosing-swarm-patterns/SKILL.md" + }, + "@agent-relay/writing-agent-relay-workflows#claude": { + "version": "1.2.0", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fwriting-agent-relay-workflows/1.2.0.tar.gz", + "integrity": "sha256-426e8353842261c32a93fad228cb6aab6c27a66923e21585e51d6f497511095b", + "format": "claude", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".claude/skills/writing-agent-relay-workflows/SKILL.md" + }, + "@agent-relay/running-headless-orchestrator#claude": { + "version": "1.0.1", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Frunning-headless-orchestrator/1.0.1.tar.gz", + "integrity": "sha256-afb7cdb67ffb22a648de756cffcac881126ec5d0bad77a524345cf083bd0d6d2", + "format": "claude", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".claude/skills/running-headless-orchestrator/SKILL.md" + }, + "@agent-relay/using-agent-relay#claude": { + "version": "1.2.0", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fusing-agent-relay/1.2.0.tar.gz", + "integrity": "sha256-bb68bcd7bf1af535b9e435033ba7e8efccc29210aad53111a0f84838a95667f8", + "format": "claude", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".claude/skills/using-agent-relay/SKILL.md" + }, + "@agent-relay/choosing-swarm-patterns#codex": { + "version": "1.0.0", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fchoosing-swarm-patterns/1.0.0.tar.gz", + "integrity": "sha256-2b28661abb540c56b46ad980b238589c6dcf59faaa3e66c80c72f72c01407f38", + "format": "codex", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".agents/skills/choosing-swarm-patterns/SKILL.md" + }, + "@agent-relay/writing-agent-relay-workflows#codex": { + "version": "1.2.0", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fwriting-agent-relay-workflows/1.2.0.tar.gz", + "integrity": "sha256-426e8353842261c32a93fad228cb6aab6c27a66923e21585e51d6f497511095b", + "format": "codex", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".agents/skills/writing-agent-relay-workflows/SKILL.md" + }, + "@agent-relay/using-agent-relay#codex": { + "version": "1.2.0", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fusing-agent-relay/1.2.0.tar.gz", + "integrity": "sha256-bb68bcd7bf1af535b9e435033ba7e8efccc29210aad53111a0f84838a95667f8", + "format": "codex", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".agents/skills/using-agent-relay/SKILL.md" + }, + "@agent-relay/running-headless-orchestrator#codex": { + "version": "1.0.1", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Frunning-headless-orchestrator/1.0.1.tar.gz", + "integrity": "sha256-afb7cdb67ffb22a648de756cffcac881126ec5d0bad77a524345cf083bd0d6d2", + "format": "codex", + "subtype": "skill", + "sourceFormat": "claude", + "sourceSubtype": "skill", + "installedPath": ".agents/skills/running-headless-orchestrator/SKILL.md" } }, - "generated": "2026-03-13T09:55:30.873Z" + "generated": "2026-03-31T10:30:25.962Z" } \ No newline at end of file diff --git a/skills/writing-agent-relay-workflows/SKILL.md b/skills/writing-agent-relay-workflows/SKILL.md deleted file mode 100644 index 901fa6c04..000000000 --- a/skills/writing-agent-relay-workflows/SKILL.md +++ /dev/null @@ -1,827 +0,0 @@ ---- -name: writing-agent-relay-workflows -description: Use when building multi-agent workflows with the relay broker-sdk - covers the WorkflowBuilder API, DAG step dependencies, agent definitions, step output chaining via {{steps.X.output}}, verification gates, evidence-based completion, owner decisions, dedicated channels, swarm patterns, error handling, event listeners, step sizing rules, authoring best practices, and the lead+workers team pattern for complex steps ---- - -# Writing Agent Relay Workflows - -## Overview - -The relay broker-sdk workflow system orchestrates multiple AI agents (Claude, Codex, Gemini, Aider, Goose) through typed DAG-based workflows. Workflows are defined via a fluent builder API or YAML files. - -## When to Use - -- Building multi-agent workflows with step dependencies -- Orchestrating different AI CLIs (claude, codex, gemini, aider, goose) -- Creating DAG, pipeline, fan-out, or other swarm patterns -- Needing verification gates, retries, or step output chaining - -## Quick Reference - -```typescript -const { workflow } = require('@agent-relay/sdk/workflows'); - -async function main() { -const result = await workflow('my-workflow') - .description('What this workflow does') - .pattern('dag') // or 'pipeline', 'fan-out', etc. - .channel('wf-my-workflow') // dedicated channel (auto-generated if omitted) - .maxConcurrency(3) - .timeout(3_600_000) // global timeout (ms) - - .agent('lead', { cli: 'claude', role: 'Architect', retries: 2 }) - .agent('worker', { cli: 'codex', role: 'Implementer', retries: 2 }) - - .step('plan', { - agent: 'lead', - task: `Analyze the codebase and produce a plan.`, - retries: 2, - verification: { type: 'output_contains', value: 'PLAN_COMPLETE' }, // optional accelerator - }) - .step('implement', { - agent: 'worker', - task: `Implement based on this plan:\n{{steps.plan.output}}`, - dependsOn: ['plan'], - verification: { type: 'exit_code' }, - }) - - .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) - .run({ onEvent: (e) => console.log(e.type), vars: { task: 'Add auth' } }); -} - -main().catch(console.error); -``` - -## Key Concepts - -### Step Output Chaining - -Use `{{steps.STEP_NAME.output}}` in a downstream step's task to inject the prior step's terminal output. The runner captures PTY output automatically. - -### Verification Gates - -Steps can include verification checks. These are **one input** to the completion decision — not the only one. The runner uses a multi-signal pipeline: deterministic verification, owner judgment, and evidence collection. - -```typescript -verification: { type: 'exit_code' } // preferred for code-editing steps -verification: { type: 'output_contains', value: 'DONE' } // optional accelerator, not mandatory -verification: { type: 'file_exists', value: 'src/out.ts' } // deterministic file check -``` - -Types: `exit_code` (preferred for implementations), `output_contains`, `file_exists`, `custom`. - -**Key principle:** Verification passing is sufficient for step completion — even if no sentinel marker is present. The runner completes steps through evidence, not ceremony. - -### DAG Dependencies - -Steps with `dependsOn` wait for all listed steps to complete. Steps with no dependencies start immediately. Steps sharing the same `dependsOn` run in parallel: - -```typescript -// These two run in parallel after 'review' completes: -.step('fix-types', { agent: 'worker', dependsOn: ['review'], ... }) -.step('fix-tests', { agent: 'worker', dependsOn: ['review'], ... }) -// This waits for BOTH to finish: -.step('final', { agent: 'lead', dependsOn: ['fix-types', 'fix-tests'], ... }) -``` - -### Dedicated Channels - -Always set `.channel('wf-my-workflow-name')` for workflow isolation. If omitted, the runner auto-generates `wf-{name}-{id}`. Never rely on `general`. - -### Self-Termination - -Do NOT add exit instructions to task strings. The runner automatically appends self-termination instructions with the agent's runtime name in `spawnAndWait()`. - -### Step Completion Model - -Steps complete through a **multi-signal decision pipeline**, not a single sentinel marker: - -1. **Deterministic verification** (highest priority) — if `verification` passes (exit_code, file_exists, output_contains), the step completes immediately -2. **Owner decision** — the step owner (lead or step agent) can issue a structured decision: `OWNER_DECISION: COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL` -3. **Evidence-based completion** — channel messages (WORKER_DONE signals), file artifacts, and process exit codes are collected as evidence -4. **Marker fast-path** — `STEP_COMPLETE:` still works as an accelerator but is never required - -**Completion states:** - -| State | Meaning | -| --- | --- | -| `completed_verified` | Deterministic verification passed | -| `completed_by_owner_decision` | Owner approved the step | -| `completed_by_evidence` | Evidence-based completion (channel signals, files, exit code) | -| `retry_requested_by_owner` | Owner requested retry via OWNER_DECISION | -| `failed_verification` | Verification explicitly failed | -| `failed_owner_decision` | Owner rejected the step | -| `failed_no_evidence` | No verification, no owner decision, no evidence — hard fail | - -**Review parsing is tolerant:** The runner accepts semantically equivalent outputs like "Approved", "Complete — task done", "LGTM", not just exact `REVIEW_DECISION: APPROVE` strings. - -### No Per-Agent Timeouts - -Avoid `timeoutMs` on agents/steps unless you have a specific reason. The global `.timeout()` is the safety net. Per-agent timeouts cause premature kills on steps that legitimately need more time. - -## Agent Definition - -```typescript -.agent('name', { - cli: 'claude' | 'codex' | 'gemini' | 'aider' | 'goose' | 'opencode' | 'droid', - role?: string, // describes agent's purpose (used by pattern auto-selection) - preset?: 'lead' | 'worker' | 'reviewer' | 'analyst', // sets interactive mode + task guardrails - retries?: number, // default retry count for steps using this agent - model?: string, // model override - interactive?: boolean, // default: true. Set false for non-interactive subprocess mode -}) -``` - -## Step Definition - -### Agent Steps - -```typescript -.step('name', { - agent: string, // must match an .agent() name - task: string, // supports {{var}} and {{steps.NAME.output}} - dependsOn?: string[], // DAG edges - verification?: VerificationCheck, - retries?: number, // overrides agent-level retries -}) -``` - -### Deterministic Steps (Shell Commands) - -```typescript -.step('verify-files', { - type: 'deterministic', - command: 'test -f src/auth.ts && echo "FILE_EXISTS"', - dependsOn: ['implement'], - captureOutput: true, // capture stdout for {{steps.verify-files.output}} - failOnError: true, // fail workflow if exit code != 0 -}) -``` - -Deterministic steps run shell commands without spawning an agent. Use them for: -- File existence checks after implementation waves -- Reading file contents to inject into downstream agent steps via `{{steps.X.output}}` -- Running build/test commands as workflow gates -- Gathering system info or context before agent steps - -## Event Listener - -```typescript -.run({ - onEvent: (event) => { - // event.type is one of: - // 'run:started' | 'run:completed' | 'run:failed' | 'run:cancelled' - // 'step:started' | 'step:completed' | 'step:failed' | 'step:skipped' | 'step:retrying' - }, - vars: { key: 'value' }, // template variables for {{key}} -}) -``` - -## Common Patterns - -### Parallel Review (lead + reviewer run simultaneously) - -```typescript -.step('lead-review', { agent: 'lead', dependsOn: ['implement'], ... }) -.step('code-review', { agent: 'reviewer', dependsOn: ['implement'], ... }) -.step('next-phase', { agent: 'worker', dependsOn: ['lead-review', 'code-review'], ... }) -``` - -### Pipeline (sequential handoff) - -```typescript -.pattern('pipeline') -.step('analyze', { agent: 'analyst', task: '...' }) -.step('implement', { agent: 'dev', task: '{{steps.analyze.output}}', dependsOn: ['analyze'] }) -.step('test', { agent: 'tester', task: '{{steps.implement.output}}', dependsOn: ['implement'] }) -``` - -### Error Handling Strategies - -```typescript -.onError('fail-fast') // stop on first failure (default) -.onError('continue') // skip failed branches, continue others -.onError('retry', { maxRetries: 3, retryDelayMs: 5000 }) -``` - -## Non-Interactive Agents (preset: worker / reviewer / analyst) - -Use presets instead of manually setting `interactive: false`. Presets configure interactive mode and inject guardrails automatically: - -```typescript -.agent('worker', { cli: 'claude', preset: 'worker', model: 'sonnet' }) -// Equivalent to interactive: false + "Do NOT use relay tools" prefix injected -``` - -| Preset | Interactive | Relay access | Use for | -| ---------- | ------------- | ------------ | ---------------------------------------------------- | -| `lead` | ✅ PTY | ✅ Full | Coordination, spawning workers, monitoring channels | -| `worker` | ❌ subprocess | ❌ None | Executing bounded tasks, producing structured stdout | -| `reviewer` | ❌ subprocess | ❌ None | Reading artifacts, producing verdicts | -| `analyst` | ❌ subprocess | ❌ None | Reading code/files, writing findings | - -**What changes with non-interactive presets:** - -- Agent runs via CLI one-shot mode (`claude -p`, `codex exec`, `gemini -p`) -- stdin is `/dev/null` — the process never blocks waiting for terminal input -- No PTY, no relay messaging, no `/exit` self-termination -- Output captured from stdout, available via `{{steps.X.output}}` - -**Critical rule — pre-inject content, never ask non-interactive agents to discover it:** - -```yaml -# WRONG — claude -p will try to read the file via tools, may time out on large files -- name: analyze - agent: analyst - task: 'Read src/runner.ts and summarize the scrubForChannel method.' - -# RIGHT — deterministic step reads the file, injects content directly -- name: read-method - type: deterministic - command: sed -n '/scrubForChannel/,/^ \}/p' src/runner.ts - captureOutput: true - -- name: analyze - agent: analyst - dependsOn: [read-method] - task: | - Summarize this method: - {{steps.read-method.output}} -``` - -Non-interactive agents can use tools but it's slow and unreliable on large files. -Deterministic steps are instant. Always pre-read, then inject. - -## DAG Deadlock Anti-Pattern - -**The lead↔worker deadlock** is the most common DAG mistake. It causes the lead to wait indefinitely for workers that can never start. - -```yaml -# WRONG — deadlock: coordinate waits for WORKER_DONE from work-a, -# but work-a can't start until coordinate finishes -steps: - - name: coordinate # lead, waits for WORKER_A_DONE signal - dependsOn: [context] - - name: work-a # can't start — blocked by coordinate - dependsOn: [coordinate] - -# RIGHT — workers and lead start in parallel, merge step gates on all three -steps: - - name: context - type: deterministic - - name: work-a # starts with lead - dependsOn: [context] - - name: work-b # starts with lead - dependsOn: [context] - - name: coordinate # lead monitors channel for worker signals - dependsOn: [context] - - name: merge # gates on everything - dependsOn: [work-a, work-b, coordinate] -``` - -The runner will catch obvious cases of this at parse time and throw an error. - -**Rule:** if a lead step's task mentions downstream step names alongside waiting keywords (wait, DONE, monitor, check inbox), that's a deadlock. - -## Step Sizing: Keep Tasks Focused - -**A step's task prompt should be 10–20 lines maximum.** If you find yourself writing a 100-line task prompt, the step is too large for one agent — split it into a team. - -### The Rule - -One agent, one deliverable. A step should instruct an agent to produce **one specific artifact** (one file, one plan, one review pass). If the step requires reading the whole codebase, coordinating sub-tasks, _and_ reviewing output, it will fail or produce poor results. - -### When to Use a Team Instead - -Decompose a large step into a **lead + workers** team when: - -- The task would require a 50+ line prompt to fully specify -- The deliverable is multiple files that must be consistent with each other -- The work benefits from back-and-forth (questions, corrections, reviews) -- You need one agent to verify another's output before signaling completion - -### Team Pattern - -All team members run as concurrent steps sharing a dedicated channel. The lead coordinates dynamically via messages; workers receive assignments at runtime, not in their task prompt. - -```yaml -agents: - - name: track-lead - cli: claude - channels: [my-track, main-channel] - role: 'Leads the track. Assigns files to workers, reviews output.' - constraints: - model: sonnet - - - name: track-worker-1 - cli: codex - channels: [my-track] - role: 'Writes file-a.ts as assigned by track-lead.' - constraints: - model: gpt-5.3-codex - - - name: track-worker-2 - cli: codex - channels: [my-track] - role: 'Writes file-b.ts as assigned by track-lead.' - constraints: - model: gpt-5.3-codex-spark - -steps: - # All three start in the same wave (same dependsOn). - # Lead posts assignments to #my-track; workers read and implement. - - name: track-lead-coord - agent: track-lead - dependsOn: [prior-step] - task: | - Lead the track on #my-track. Workers: track-worker-1, track-worker-2. - Post assignments to the channel. Review worker output. - When all workers are done and output is satisfactory, summarize results. - # Lead uses OWNER_DECISION or the runner detects completion via evidence - - - name: track-worker-1-impl - agent: track-worker-1 - dependsOn: [prior-step] # same dep as lead — starts concurrently - task: | - Join #my-track. track-lead will post your assignment. - Implement the file as directed. Post a summary when complete. - verification: - type: exit_code # preferred for code-editing workers - - - name: track-worker-2-impl - agent: track-worker-2 - dependsOn: [prior-step] - task: | - Join #my-track. track-lead will post your assignment. - Implement the file as directed. Post a summary when complete. - verification: - type: exit_code - - # Next step depends only on the lead — lead reviews workers via channel - # evidence and issues OWNER_DECISION or STEP_COMPLETE when satisfied. - - name: next-step - agent: ... - dependsOn: [track-lead-coord] -``` - -### Key Points - -- **Lead task prompt**: who your workers are, which channel to use, what to assign, what "done" looks like. ~15 lines. Describe the work contract, not output ceremony. -- **Worker task prompt**: which channel to join, that the lead will post their assignment. ~5 lines. Workers post summaries, not mandatory sentinel strings. -- **Workers don't need the full spec in their prompt** — they get it from the lead at runtime via the channel. -- **Downstream steps depend on the lead**, not the workers — the lead reviews worker output via channel evidence and issues completion. -- **Separate channels per team** prevent cross-talk: `#harness-track`, `#review-track`, etc. -- **Channel evidence is first-class** — worker summaries, DONE signals, and file creation events posted to the channel are collected as completion evidence by the runner. - -## Concurrency: Don't Over-Parallelize - -**Set `maxConcurrency` to 4–6 for most workflows.** Each agent spawn requires a PTY startup plus a Relaycast registration. Spawning 10+ agents simultaneously overwhelms the broker and causes spawn timeouts. - -```yaml -swarm: - pattern: dag - maxConcurrency: 5 # good: staggers spawns within each wave -``` - -Even if a wave has 10 ready steps, the runner will only start 5 at a time and pick up the next as each finishes. This keeps the broker healthy and prevents the `request timed out after 10000ms (type='spawn_agent')` error that occurs when too many agents register with Relaycast concurrently. - -**Rule of thumb by workflow size:** - -| Parallel agents needed | `maxConcurrency` | -| ---------------------- | ---------------- | -| 2–4 | 4 (default safe) | -| 5–10 | 5 | -| 10+ | 6–8 max | - -## Phase Count: Keep Workflows Compact - -**Limit workflows to 3–4 phases.** Each phase is a sequential barrier — the next phase can't start until the previous one finishes. More phases means more serialization, more wall-clock time, and more chances for context drift between agents. - -| Phases | Verdict | Notes | -| ------ | -------- | ----------------------------------------------------------- | -| 2–3 | Ideal | Tight feedback loops, agents see recent context | -| 4 | Okay | Acceptable for large projects with clear module boundaries | -| 5+ | Too many | Agents lose context, reviews find "FILE NOT FOUND" errors | -| 8+ | Never | Each agent works blind — integration issues multiply | - -**Why fewer phases work better:** - -- Non-interactive agents can't see each other's output. Each phase boundary is a hard wall. -- Reflection/review steps only add value if the files actually exist on disk. With many phases, early agents write files that later agents can't find (wrong cwd, wrong paths). -- Consolidating related work into one phase lets parallel workers share a lead who can coordinate and verify. - -**How to consolidate:** - -Instead of Phase 1 (auth) → Phase 2 (volumes) → Phase 3 (storage) → Phase 4 (executor), group by integration surface: - -```yaml -# Phase 1: Foundation (auth + volumes + storage — independent modules) -# Phase 2: Orchestration (executor + bootstrap — depend on Phase 1) -# Phase 3: API + Integration (web routes + reporter + barrel exports) -``` - -Within each phase, use parallel workers with a shared lead for coordination. - -## File Materialization: Verify Before Proceeding - -**Always add a deterministic file-check step after implementation waves.** Non-interactive agents (codex, claude -p) may fail silently — the process exits 0 but files weren't written because of a wrong cwd, permission issue, or the agent output code to stdout instead of writing files. - -### The pattern - -```yaml -# Workers write files in parallel -- name: impl-auth - agent: worker-1 - task: | - Create the file src/auth/credentials.ts with the following implementation... - IMPORTANT: Write the file to disk using your file-writing tools. - Do NOT just output the code to stdout — the file must exist at src/auth/credentials.ts when you finish. - -- name: impl-storage - agent: worker-2 - task: | - Create the file src/storage/client.ts with the following implementation... - IMPORTANT: Write the file to disk. The file must exist at src/storage/client.ts when you finish. - -# Deterministic gate: verify all expected files exist before any review/next-phase step -- name: verify-files - type: deterministic - dependsOn: [impl-auth, impl-storage] - command: | - missing=0 - for f in src/auth/credentials.ts src/storage/client.ts; do - if [ ! -f "$f" ]; then echo "MISSING: $f"; missing=$((missing+1)); fi - done - if [ $missing -gt 0 ]; then echo "$missing files missing"; exit 1; fi - echo "All files present" - failOnError: true - captureOutput: true - -# Reviews and next-phase steps depend on verify-files, not directly on workers -- name: review - agent: reviewer - dependsOn: [verify-files] - task: ... -``` - -### Rules for non-interactive file-writing tasks - -1. **Use absolute or explicit relative paths** — always include the full path from the project root in the task prompt. Don't say "implement credentials.ts", say "create the file at `src/auth/credentials.ts`". -2. **Tell the agent to write the file, not output it** — add `IMPORTANT: Write the file to disk using your file-writing tools. Do NOT just output the code to stdout.` Non-interactive agents sometimes default to printing code instead of writing files. -3. **Gate downstream steps on file verification** — never let a review or next-phase step run without first confirming the expected files exist via a deterministic `[ -f ]` check. -4. **Fail fast on missing files** — set `failOnError: true` on the verification step. A missing file early is much cheaper to debug than 30 minutes of "FILE NOT FOUND" reviews. - -### Reading files for context injection - -When the next phase needs to read files produced by the current phase, use a deterministic step: - -```yaml -- name: read-phase1-output - type: deterministic - dependsOn: [verify-phase1-files] - command: | - echo "=== src/auth/credentials.ts ===" - cat src/auth/credentials.ts - echo "=== src/storage/client.ts ===" - cat src/storage/client.ts - captureOutput: true - -- name: phase2-implement - agent: worker - dependsOn: [read-phase1-output] - task: | - Here are the files from Phase 1: - {{steps.read-phase1-output.output}} - - Now implement the executor that uses these modules... -``` - -## Completion Signals: Required vs Optional - -The runner uses a multi-tier completion resolution system. **No single signal is mandatory** — the runner resolves completion from whatever evidence is available. - -### Tier 1: Explicit owner decision (strongest) - -``` -OWNER_DECISION: COMPLETE -REASON: All files written and tests pass -``` - -The structured `OWNER_DECISION` format is preferred for owner/lead agents. It gives the runner an unambiguous completion signal. - -### Tier 2: Legacy completion marker - -``` -STEP_COMPLETE:step-name -``` - -Still supported but optional. The runner treats it as equivalent to `OWNER_DECISION: COMPLETE`. - -### Tier 3: Verification gate - -If `verification` is configured on the step, the runner checks it automatically. A passing verification gate completes the step even without an explicit owner decision. - -### Tier 4: Evidence-based completion - -When no explicit signal is found, the runner checks collected evidence: -- Coordination signals in output (`WORKER_DONE`, `LEAD_DONE`) -- Process exit code 0 (clean exit) -- Tool side-effects (git diff checks, file inspections) -- Positive-conclusion language in owner output - -If both a positive conclusion **and** at least one evidence signal are present, the step completes. - -### Tier 5: Process-exit fallback - -When the agent exits with code 0 but posts **no** coordination signal at all: -- The runner waits a configurable grace period (`completionGracePeriodMs`, default 5s) -- If verification is configured and passes, the step completes with reason `completed_by_process_exit` -- If no verification is configured, the step completes based on the clean exit alone - -This tier is the key mechanism for reducing dependence on exact agent behavior. - -### What this means for workflow authors - -- **Don't require exact text output** as the only completion signal. Always configure a verification gate (`exit_code`, `file_exists`, or `output_contains`) as a backup. -- **Describe the deliverable, not the ceremony.** Say "implement the auth module" not "implement the auth module and then output IMPL_DONE". -- **Prefer `exit_code` verification** for code-editing workers — it's the most reliable signal because it doesn't depend on the agent printing specific text. -- **Use `completionGracePeriodMs: 0`** in the swarm config to disable the process-exit fallback if you need strict signal compliance. - -### Configuring the grace period - -```yaml -swarm: - pattern: dag - completionGracePeriodMs: 5000 # default: 5s. Set to 0 to disable. -``` - -## Robust Coordination Best Practices - -### Design for agent non-compliance - -Agents may not follow instructions perfectly. The runner is designed to handle this gracefully: - -1. **Always configure verification gates** — they're the most reliable completion mechanism because they don't depend on agent behavior at all. -2. **Use deterministic steps for critical checks** — `file_exists` checks, test runs, and type checks are deterministic and infallible. -3. **Don't rely on agents posting exact signal text** — use `exit_code` verification instead of `output_contains` when possible. -4. **Let the runner handle self-termination** — it appends `/exit` instructions automatically and detects idle agents. - -### Completion strategy by step type - -| Step type | Recommended verification | Why | -|---|---|---| -| Code editing (codex worker) | `exit_code` | Agent may not print tokens reliably | -| Analysis/review (claude) | `output_contains` with unique token | Structured output is the deliverable | -| File creation (any worker) | `file_exists` | Deterministic check, zero agent dependency | -| Lead coordination | None (owner decision or evidence) | Lead agents are interactive and monitored | - -### Owner steps: structured decisions preferred - -For supervised steps with a dedicated owner, the `OWNER_DECISION` format is preferred over legacy `STEP_COMPLETE:` markers because: -- It supports negative outcomes (`INCOMPLETE_RETRY`, `INCOMPLETE_FAIL`) not just success -- It includes a `REASON` field for observability -- The runner can distinguish owner intent from echoed prompt text more reliably - -But if the owner doesn't post either format, the runner still resolves completion from evidence. - -## Common Mistakes - -| Mistake | Fix | -| ----------------------------------------------------------- | ----------------------------------------------------------------- | -| Adding `withExit()` or exit instructions to tasks | Runner handles this automatically | -| Setting tight `timeoutMs` on agents | Use global `.timeout()` only | -| Using `general` channel | Set `.channel('wf-name')` for isolation | -| Referencing `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | -| Making review steps serial when they could be parallel | Both reviewers can depend on the same upstream step | -| Requiring exact sentinel strings as the only completion gate | Use deterministic verification (`exit_code`, `file_exists`) or owner judgment | -| Writing 100-line task prompts | Split into lead + workers communicating on a channel | -| Putting the full spec in every worker's task | Lead posts the spec to the channel at runtime | -| `maxConcurrency: 16` with many parallel steps | Cap at 5–6; broker times out spawning 10+ agents at once | -| Asking non-interactive agent to read a large file via tools | Pre-read in a deterministic step, inject via `{{steps.X.output}}` | -| Workers depending on the lead step (deadlock) | Workers and lead both depend on a shared context step | -| Omitting `agents` field for deterministic-only workflows | Field is now optional — pure shell pipelines work without it | -| Designing prompts around output ceremony instead of work | Describe the deliverable and acceptance criteria, not what to print | -| Treating markers as mandatory truth | Markers are optional accelerators; verification and evidence decide completion | -| Using `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` — hub patterns trigger auto owner/supervisor/reviewer pipeline | -| Workers without `preset: 'worker'` in lead+worker workflows | Add `preset: 'worker'` — it auto-sets `interactive: false` and produces clean stdout for `{{steps.X.output}}` injection | -| Lead running concurrently with workers, monitoring channel | Make lead `dependsOn` workers — use `{{steps.X.output}}` injection instead of real-time channel monitoring | -| Using `_` in YAML numbers (e.g., `timeoutMs: 1_200_000`) | YAML doesn't support `_` as a numeric separator — use `1200000`. TypeScript separators don't work in YAML | -| Setting workflow timeout under 30 minutes for complex workflows | Claude leads reading large codebases take 5-15 min per step. Use `3600000` (1 hour) as a safe default | -| Passing too much context in `read-context` deterministic steps | Trim to only the relevant code. Use `grep`, `sed -n`, `head` instead of full `cat`. Large context slows lead design | -| Using `import { workflow }` (ESM) in TypeScript workflows | Use `const { workflow } = require('@agent-relay/sdk/workflows')` — most projects default to CJS and `tsx` will fail with top-level await or ESM-only imports | -| Top-level `await` in TypeScript workflow files | Wrap in `async function main() { ... } main().catch(console.error)` — CJS mode does not support top-level await | -| Using `import` path `'../workflows/builder.js'` (relative) | Use `require('@agent-relay/sdk/workflows')` — the package export, not internal file paths | -| Not validating with `--dry-run` before running | Always run `agent-relay run --dry-run workflow.ts` first to catch import errors, deadlocks, and missing deps | - -## Verification Tokens with Non-Interactive Workers - -### The double-occurrence rule - -When the verification token appears in the task text, the runner requires it to appear -**twice** in the captured output — once from the task injection echo, once from the agent's -actual response. A single occurrence is treated as the task echo and fails verification. - -This means if your task says `Output: DONE` or `REQUIRED: print DONE`, the token `DONE` -is in the task text. The agent must print it a second time, explicitly. - -### Preferred: use `exit_code` for code-editing workers - -For steps where the real quality gate is downstream (type-check, tests), `exit_code` -verification is simpler and more reliable than `output_contains`: - -```yaml -# WRONG for codex code editors — token in task causes double-occurrence requirement -- name: implement - agent: implementer # codex, preset: worker - task: | - Make these changes to foo.ts... - Output: IMPL_DONE # token now in task text → requires 2 occurrences - verification: - type: output_contains - value: IMPL_DONE - -# RIGHT — exit 0 means success; tests catch any mistakes -- name: implement - agent: implementer - task: | - Make these changes to foo.ts... - verification: - type: exit_code -``` - -### When you need `output_contains` with a codex worker - -Use a token that does **not** appear verbatim anywhere in the task text. A unique sentinel -works well: - -```yaml -task: | - Analyze foo.ts and write a summary report. - Signal completion by printing: ANALYSIS_DONE -verification: - type: output_contains - value: ANALYSIS_DONE # "ANALYSIS_DONE" does not appear verbatim above → single occurrence is enough -``` - -If the token must appear in the instructions, instruct the agent to run it as a shell -command so the execution (not the description) produces the second occurrence: - -```yaml -task: | - Make changes to foo.ts... - When done, run: echo "IMPL_DONE" -verification: - type: output_contains - value: IMPL_DONE -``` - -**Rule of thumb:** Code-editing steps → `exit_code`. Analysis/review steps that produce -structured output → `output_contains` with a token not mentioned verbatim in the task. - -## YAML Alternative - -Workflows can also be defined as `.yaml` files: - -```yaml -version: '1.0' -name: my-workflow -swarm: - pattern: dag - channel: wf-my-workflow -agents: - - name: lead - cli: claude - role: Architect - - name: worker - cli: codex - role: Implementer -workflows: - - name: default - steps: - - name: plan - agent: lead - task: 'Produce a detailed implementation plan.' - # No sentinel required — owner judgment + evidence complete the step - - name: implement - agent: worker - task: 'Implement: {{steps.plan.output}}' - dependsOn: [plan] - verification: - type: exit_code # deterministic: exit 0 = success -``` - -Run with: `agent-relay run path/to/workflow.yaml` - -## TypeScript Workflow Setup - -TypeScript workflows use the fluent builder API via `@agent-relay/sdk/workflows`. - -**Critical rules for TypeScript workflows:** - -1. **Use `require()`, not `import`** — most projects default to CJS (`"type"` is not `"module"` in package.json), and `tsx` will fail with ESM imports -2. **Wrap in `async function main()`** — CJS does not support top-level `await` -3. **Validate with `--dry-run`** before running: `agent-relay run --dry-run workflow.ts` - -**Template:** -```typescript -const { workflow } = require('@agent-relay/sdk/workflows'); - -async function main() { - const result = await workflow('my-workflow') - .description('What this workflow does') - .pattern('dag') - .channel('wf-my-workflow') - .maxConcurrency(4) - .timeout(3_600_000) - - .agent('lead', { cli: 'claude', role: 'Architect' }) - .agent('worker', { cli: 'claude', preset: 'worker', role: 'Implementer' }) - - .step('plan', { - agent: 'lead', - task: 'Produce a plan.', - verification: { type: 'output_contains', value: 'PLAN_COMPLETE' }, - }) - .step('implement', { - agent: 'worker', - dependsOn: ['plan'], - task: 'Implement: {{steps.plan.output}}', - verification: { type: 'exit_code' }, - }) - - .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) - .run({ onEvent: (e) => console.log(`[${e.type}] ${e.step ?? ''}`) }); - - console.log('Result:', result.status); -} - -main().catch(console.error); -``` - -Run with: `agent-relay run path/to/workflow.ts` - -## Workflow Authoring Rules - -Follow these principles when designing workflow step prompts: - -### 1. Prefer verification over sentinel-only prompts - -Use deterministic checks (`exit_code`, `file_exists`) as the primary completion signal. Don't rely solely on agents printing magic strings. - -```yaml -# GOOD — deterministic verification -verification: - type: exit_code # or file_exists: src/auth.ts - -# OKAY — sentinel as optional accelerator alongside verification -verification: - type: output_contains - value: PLAN_COMPLETE - -# BAD — no verification, relying only on agent printing a string -task: "Do X. You MUST print STEP_COMPLETE when done." -``` - -### 2. Use owners/reviewers to interpret ambiguous outputs - -The step owner (lead or step agent) can approve or reject a step via `OWNER_DECISION`. This is useful when automated verification isn't sufficient — the owner reads evidence and makes a judgment call. - -```yaml -# Owner reviews worker output and decides -task: | - Review worker output on #my-track. - If satisfactory, approve. If not, request retry. - # Runner accepts: OWNER_DECISION: COMPLETE, or tolerant variants like "Approved", "LGTM" -``` - -### 3. For channel workflows, define required channel events explicitly - -When coordination happens via channel messages, tell agents what to post and what the lead should observe: - -```yaml -# Worker prompt — describe what to communicate -task: | - Implement auth module. Post a summary of changes to #my-track when done. - -# Lead prompt — describe what to observe -task: | - Monitor #my-track for worker summaries. When all workers have posted summaries, - review the changes and approve the step. -``` - -### 4. Treat exact completion strings as optional accelerators only - -`STEP_COMPLETE:` and `REVIEW_DECISION: APPROVE` still work as fast-paths but are never required. The runner's completion pipeline will find evidence even without them. - -### 5. Ensure prompts describe work contract, not output ceremony - -**Bad:** "You MUST end your response with exactly: IMPLEMENTATION_DONE" -**Good:** "Implement the auth module. Write the file to src/auth.ts. The step is complete when the file exists and compiles." - -The prompt should describe what the agent should deliver, not what it should print. - -## Available Swarm Patterns - -`dag` (default), `fan-out`, `pipeline`, `hub-spoke`, `consensus`, `mesh`, `handoff`, `cascade`, `debate`, `hierarchical`, `map-reduce`, `scatter-gather`, `supervisor`, `reflection`, `red-team`, `verifier`, `auction`, `escalation`, `saga`, `circuit-breaker`, `blackboard`, `swarm` - -See skill `choosing-swarm-patterns` for pattern selection guidance. diff --git a/src/cli/commands/on.ts b/src/cli/commands/on.ts index b2b462e23..099040eac 100644 --- a/src/cli/commands/on.ts +++ b/src/cli/commands/on.ts @@ -37,8 +37,8 @@ export function registerOnCommands(program: Command, overrides: Partial', 'Join an existing relay workspace') .option('--scan', 'Preview what the agent will see without launching') .option('--doctor', 'Check prerequisites and exit') - .option('--port-auth ', 'Relayauth port', '8787') - .option('--port-file ', 'Relayfile port', '8080') + .option('--port-auth ', 'Auth service URL or local port', process.env.RELAY_AUTH_URL ?? 'https://agentrelay.dev') + .option('--port-file ', 'Relayfile service URL or local port', process.env.RELAY_FILE_URL ?? 'https://api.relayfile.dev') .allowUnknownOption(true) // pass extra args to agent CLI .action(async (cli: string | undefined, options: any, command: Command) => { if (options.doctor) { diff --git a/src/cli/commands/on/start.test.ts b/src/cli/commands/on/start.test.ts index a2dd957ca..7e6b8065c 100644 --- a/src/cli/commands/on/start.test.ts +++ b/src/cli/commands/on/start.test.ts @@ -3,6 +3,10 @@ import { tmpdir } from 'node:os'; import path from 'node:path'; import { describe, expect, it, vi } from 'vitest'; +vi.mock('@agent-relay/cloud', () => ({ + ensureAuthenticated: vi.fn().mockResolvedValue({ accessToken: 'test-token' }), +})); + import { requestWorkspaceSession } from './start.js'; function jsonResponse(payload: unknown, status = 200): Response { diff --git a/src/cli/commands/on/start.ts b/src/cli/commands/on/start.ts index f60a8b5be..75a61435f 100644 --- a/src/cli/commands/on/start.ts +++ b/src/cli/commands/on/start.ts @@ -17,6 +17,7 @@ import path from 'node:path'; import { parse as parseYaml } from 'yaml'; import { mintToken } from './token.js'; import { seedWorkspace as seedWorkspaceFiles } from './workspace.js'; +import { ensureAuthenticated } from '@agent-relay/cloud'; interface OnOptions { agent?: string; @@ -272,12 +273,21 @@ async function postWorkspaceApi( url: string, body: Record ): Promise { + const headers: Record = { + 'Content-Type': 'application/json', + 'X-Correlation-Id': `agent-relay-on-${Date.now()}`, + }; + + // Attach cloud auth token for remote endpoints + if (!isLocalBaseUrl(url)) { + const parsed = new URL(url); + const auth = await ensureAuthenticated(`${parsed.protocol}//${parsed.host}`); + headers['Authorization'] = `Bearer ${auth.accessToken}`; + } + const response = await fetchFn(url, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'X-Correlation-Id': `agent-relay-on-${Date.now()}`, - }, + headers, body: JSON.stringify(body), });