diff --git a/docs/v2/ANVIL-4-REVIEW.md b/docs/v2/ANVIL-4-REVIEW.md new file mode 100644 index 0000000..705e3a4 --- /dev/null +++ b/docs/v2/ANVIL-4-REVIEW.md @@ -0,0 +1,304 @@ +# ANVIL-4-REVIEW: cloud-brain readiness + +## Summary +- Recommendation: **GO** / **GO-WITH-PATCHES** / **HOLD** + +## Findings (P0 / P1 / P2 with file + line + issue + suggested fix) + +### Path correctness +... + +### Dedup key validity +... + +### Transparency contract (ADR-006) +... + +### chat.history under cloud-brain +... + +### Smoke script soundness +... + +## Single-test-proves-transparency answer +... + +## Out-of-scope notes (optional) +``` + +Be terse. Skip nitpicks. Focus on what would actually break under cloud-brain. + +# Attached: scripts/cloud-brain-smoke.mjs + +#!/usr/bin/env node +// cloud-brain-smoke.mjs — Phase B validation per +// `~/.openclaw/wiki/main/_boards/runbooks/platform/benchagi-v2-cloud-brain-pickup.md` §"Validation script". +// +// Activates AFTER cloud-brain Phase 1B PRs merge (BenchAGI #872 W1, #874 W4, +// #878 W2, #988 relay, openclaw#24 W3) AND a developer's agentDeployment is +// flipped to runtime: 'remote-brain' per the operator-side smoke runbook. +// +// What it does: +// +// 1. Lists all known agents from the local openclaw gateway. +// 2. For each agent, queries Firestore (admin REST + gcloud token per +// ~/.claude/.../memory/reference_firebase_admin_rest_recipe.md) for +// `agentDeployments/{instanceId}_{agentId}` and reads the `runtime` field. +// 3. If `runtime === 'remote-brain'`, spawns `benchagi --agent +// --liveness off "respond: smoke-ok"` with stdout/stderr captured and +// a 60s wall-clock timeout. +// 4. Asserts (per runbook §"Validation script" point 4): +// - chat output is non-empty (proves cloud-brain dispatched the LLM turn) +// - the run terminated cleanly (proves orchestrator returned) +// - no error markers in output +// - latency < 60s +// 5. Emits a JSON summary; exits 0 on all-green, 1 if any agent failed. +// +// Required env: +// - INSTANCE_ID — Firestore instance id (e.g. cory's primary instance) +// - GCP_PROJECT — Firebase project id (default: benchagi-8ea90) +// +// Optional env: +// - SMOKE_AGENT_FILTER — regex; if set, only test agents matching this +// - SMOKE_PROMPT — override default prompt (default: "respond: smoke-ok") +// - SMOKE_TIMEOUT_MS — override 60s default +// - DEBUG_RAW_FRAMES — if "1", tee raw WS frames to ./smoke-frames-.jsonl +// (requires bench-cli to honor BENCHAGI_DEBUG_TRACE_FILE, +// which is a V1.1 follow-up — for now this is a no-op) +// +// This script is GATED on cloud-brain Phase 1B merging. If the schemas don't +// support `runtime` field yet, every agent will appear as `runtime: undefined` +// and the script will report "no remote-brain agents found — gated". + +import { spawn } from "node:child_process"; +import { execFileSync } from "node:child_process"; +import { performance } from "node:perf_hooks"; +import { readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; + +const INSTANCE_ID = process.env.INSTANCE_ID; +const GCP_PROJECT = process.env.GCP_PROJECT ?? "benchagi-8ea90"; +const PROMPT = process.env.SMOKE_PROMPT ?? "respond: smoke-ok"; +const TIMEOUT_MS = parseInt(process.env.SMOKE_TIMEOUT_MS ?? "60000", 10); +const FILTER = process.env.SMOKE_AGENT_FILTER ? new RegExp(process.env.SMOKE_AGENT_FILTER) : null; + +if (!INSTANCE_ID) { + console.error("ERROR: INSTANCE_ID env var required"); + console.error("Usage: INSTANCE_ID= node scripts/cloud-brain-smoke.mjs"); + process.exit(2); +} + +// --- Firestore admin REST via gcloud user token --- + +function gcloudAccessToken() { + try { + return execFileSync("gcloud", ["auth", "print-access-token"], { + encoding: "utf-8", + stdio: ["ignore", "pipe", "pipe"], + }).trim(); + } catch (err) { + console.error("ERROR: gcloud auth print-access-token failed:", err.message); + console.error("Run `gcloud auth login` first."); + process.exit(2); + } +} + +async function fetchAgentDeployment(instanceId, agentId, token) { + const docPath = `instances/${instanceId}/agentDeployments/${instanceId}_${agentId}`; + const url = `https://firestore.googleapis.com/v1/projects/${GCP_PROJECT}/databases/(default)/documents/${docPath}`; + const resp = await fetch(url, { + headers: { + Authorization: `Bearer ${token}`, + "X-Goog-User-Project": GCP_PROJECT, + }, + }); + if (resp.status === 404) return null; // No deployment for this agent + if (!resp.ok) { + throw new Error(`Firestore GET ${docPath} → ${resp.status} ${await resp.text()}`); + } + const doc = await resp.json(); + // Firestore REST returns fields wrapped in type tags. Extract `runtime` (string). + const runtime = doc?.fields?.runtime?.stringValue ?? null; + const tier = doc?.fields?.tier?.stringValue ?? null; + return { runtime, tier, raw: doc }; +} + +// --- benchagi spawn with stdout capture + timeout --- + +function runBenchagi(agentId, prompt, timeoutMs) { + return new Promise((resolve) => { + const t0 = performance.now(); + const child = spawn( + "node", + ["bin/benchagi.mjs", "--agent", agentId, "--liveness", "off", "--no-thinking", prompt], + { + cwd: process.cwd(), + stdio: ["ignore", "pipe", "pipe"], + env: { ...process.env, NO_COLOR: "1" }, + }, + ); + + let stdout = ""; + let stderr = ""; + child.stdout.on("data", (d) => { stdout += d.toString(); }); + child.stderr.on("data", (d) => { stderr += d.toString(); }); + + const timer = setTimeout(() => { + try { child.kill("SIGINT"); } catch { /* ignore */ } + setTimeout(() => { try { child.kill("SIGKILL"); } catch { /* ignore */ } }, 1000); + }, timeoutMs); + + child.on("close", (code) => { + clearTimeout(timer); + const dtMs = performance.now() - t0; + resolve({ exitCode: code, stdout, stderr, durationMs: Math.round(dtMs) }); + }); + }); +} + +// --- Assertions --- + +function assertSmokePassed(result, prompt, timeoutMs) { + const issues = []; + + if (result.exitCode !== 0) { + issues.push(`exit code ${result.exitCode}`); + } + if (result.durationMs >= timeoutMs) { + issues.push(`timed out after ${timeoutMs}ms`); + } + if (result.durationMs >= 60_000) { + issues.push(`latency ${result.durationMs}ms exceeds 60s budget`); + } + if (!result.stdout || result.stdout.trim().length === 0) { + issues.push("empty stdout — no chat output captured"); + } + // Heuristic: look for a few error indicators in the rendered output. + if (/error: |chat\.send failed|connection closed|history replay failed/i.test(result.stdout)) { + issues.push("error marker in stdout"); + } + // The prompt asks the agent to "respond: smoke-ok" — don't strictly require + // this in output (model might paraphrase), but flag if completely absent. + if (!/smoke-ok|ok|hello|hi/i.test(result.stdout)) { + issues.push("response doesn't contain expected acknowledgement (lenient check failed)"); + } + + return issues; +} + +// --- Discover known agents from the local gateway --- + +function listAgents() { + try { + const out = execFileSync("node", ["bin/benchagi.mjs", "agents", "list"], { + cwd: process.cwd(), + encoding: "utf-8", + stdio: ["ignore", "pipe", "pipe"], + }); + // Parse lines like " kestrel-aurelius pi/aurelius-default"; split on whitespace. + const agents = []; + for (const line of out.split("\n")) { +-- +# ANVIL-4-REVIEW: cloud-brain readiness + +## Summary +- Recommendation: **HOLD** +- Main blocker: the proposed smoke can pass without exercising cloud-brain. Local `benchagi` still talks to local gateway `chat.send`; W3 adds `/v1/llm_turn` for the relay to call, but I found no local gateway path that routes `chat.send` through cloud-brain based on Firestore `runtime`. + +## Findings + +### Path correctness +**P0** — `scripts/cloud-brain-smoke.mjs:97-103` runs `node bin/benchagi.mjs ...`, which sends local gateway `chat.send` via `src/v2/chat-runner.ts:359-364`. The W3 path is a separate HTTP endpoint, `/v1/llm_turn`, registered in `openclaw-w3/src/gateway/server-http.ts:970-984` and handled in `openclaw-w3/src/gateway/llm-turn-http.ts:315-375`. + +Issue: selecting agents by Firestore `runtime: remote-brain` does not make local gateway `chat.send` use cloud-brain. Unless one of the unavailable PR diffs adds a local gateway dispatch bridge, this smoke tests local OpenClaw, not cloud-brain. + +Suggested fix: either add/verify a gateway `chat.send` remote-brain dispatch path that writes/claims `LlmTurnDirective` and re-emits normal `chat`/`agent` events, or change the smoke to assert cloud artifacts: directive created, claimed, completed, and correlated to the CLI run. + +### Dedup key validity +**OK, conditional.** The CLI key `(prefix, runId, seq, stream, sub)` is valid if forwarded remote-brain events are emitted as normal `agent` frames with stable `runId` and monotonic per-run `seq`. The directive schema carries `runId` per spec lines 611-619, and OpenClaw agent events generate monotonic per-run seq in `openclaw-w3/src/infra/agent-events.ts:200-217`. + +Concern is not the key. The gap is proving the gateway actually forwards remote-brain completion as those same event frames. + +### Transparency contract (ADR-006) +**OK in bench-cli code; P0 in integration contract.** `ChatRunner` does not branch on `runtime`; lifecycle handling keys off `chat` final states and `agent.lifecycle` phases only (`src/v2/chat-runner.ts:118-154`). That is the right transparency shape. + +But transparency is currently asserted, not proven. The local gateway path must be the component that hides cloud-brain. I did not find that bridge in the readable local code. + +### chat.history under cloud-brain +**P1** — bench-cli calls `chat.history { sessionKey, sinceSeq }` and expects `events`/`frames` for replay (`src/v2/chat-runner.ts:222-245`, `491-505`). The readable OpenClaw `chat.history` handler accepts `sessionKey`, `limit`, `maxChars`, then returns transcript `messages`, not event frames, and does not consume `sinceSeq` (`openclaw-w3/src/gateway/server-methods/chat.ts:1607-1675`). + +Suggested fix: gateway needs a real event-frame history contract: `chat.history({ sessionKey, sinceSeq }) -> { events: EventFrame[] }`, including forwarded remote-brain frames. Otherwise reconnect replay is best-effort no-op for both local and cloud-brain runs. + +### Smoke script soundness +**P0** — false-positive risk: non-empty stdout + exit 0 can be produced by local execution. No assertion proves `relayDirectives` had an `llm_turn` for this run, that `/v1/llm_turn` was called, or that a directive completed. + +**P1** — Firestore lookup assumes doc id `instances/${instanceId}/agentDeployments/${instanceId}_${agentId}` at `scripts/cloud-brain-smoke.mjs:75-77`. The runbook says this, but the broader spec examples use arbitrary `deploymentId`. If W1 backfill preserves noncanonical deployment ids, the smoke skips real remote-brain agents. + +**P2** — Firestore failures are recorded as skips (`scripts/cloud-brain-smoke.mjs:193-200`), so an auth/project outage can produce exit 0 if no agents are tested. For readiness, infra lookup failure should be fatal unless explicitly `--allow-skips`. + +**P2** — script assumes repo-root cwd for `bin/benchagi.mjs` (`scripts/cloud-brain-smoke.mjs:100-104`). Use `import.meta.url` to resolve repo root. + +## Single-test-proves-transparency answer +The current smoke is not sufficient. A stronger single test: + +1. Start `benchagi --agent "respond: smoke-ok"`. +2. Capture raw WS frames from the CLI. +3. Watch Firestore for a new `relayDirectives` doc with `directiveType: "llm_turn"`, matching `instanceId`, `agentId`, and a correlated `runId`. +4. Assert directive reaches `completed`. +5. Assert the CLI saw normal `chat` final and/or `agent.lifecycle end` frames with the same run identity and no runtime-specific branch. + +That proves both halves: cloud-brain actually ran, and benchagi consumed it transparently. +tokens used +164,060 +# ANVIL-4-REVIEW: cloud-brain readiness + +## Summary +- Recommendation: **HOLD** +- Main blocker: the proposed smoke can pass without exercising cloud-brain. Local `benchagi` still talks to local gateway `chat.send`; W3 adds `/v1/llm_turn` for the relay to call, but I found no local gateway path that routes `chat.send` through cloud-brain based on Firestore `runtime`. + +## Findings + +### Path correctness +**P0** — `scripts/cloud-brain-smoke.mjs:97-103` runs `node bin/benchagi.mjs ...`, which sends local gateway `chat.send` via `src/v2/chat-runner.ts:359-364`. The W3 path is a separate HTTP endpoint, `/v1/llm_turn`, registered in `openclaw-w3/src/gateway/server-http.ts:970-984` and handled in `openclaw-w3/src/gateway/llm-turn-http.ts:315-375`. + +Issue: selecting agents by Firestore `runtime: remote-brain` does not make local gateway `chat.send` use cloud-brain. Unless one of the unavailable PR diffs adds a local gateway dispatch bridge, this smoke tests local OpenClaw, not cloud-brain. + +Suggested fix: either add/verify a gateway `chat.send` remote-brain dispatch path that writes/claims `LlmTurnDirective` and re-emits normal `chat`/`agent` events, or change the smoke to assert cloud artifacts: directive created, claimed, completed, and correlated to the CLI run. + +### Dedup key validity +**OK, conditional.** The CLI key `(prefix, runId, seq, stream, sub)` is valid if forwarded remote-brain events are emitted as normal `agent` frames with stable `runId` and monotonic per-run `seq`. The directive schema carries `runId` per spec lines 611-619, and OpenClaw agent events generate monotonic per-run seq in `openclaw-w3/src/infra/agent-events.ts:200-217`. + +Concern is not the key. The gap is proving the gateway actually forwards remote-brain completion as those same event frames. + +### Transparency contract (ADR-006) +**OK in bench-cli code; P0 in integration contract.** `ChatRunner` does not branch on `runtime`; lifecycle handling keys off `chat` final states and `agent.lifecycle` phases only (`src/v2/chat-runner.ts:118-154`). That is the right transparency shape. + +But transparency is currently asserted, not proven. The local gateway path must be the component that hides cloud-brain. I did not find that bridge in the readable local code. + +### chat.history under cloud-brain +**P1** — bench-cli calls `chat.history { sessionKey, sinceSeq }` and expects `events`/`frames` for replay (`src/v2/chat-runner.ts:222-245`, `491-505`). The readable OpenClaw `chat.history` handler accepts `sessionKey`, `limit`, `maxChars`, then returns transcript `messages`, not event frames, and does not consume `sinceSeq` (`openclaw-w3/src/gateway/server-methods/chat.ts:1607-1675`). + +Suggested fix: gateway needs a real event-frame history contract: `chat.history({ sessionKey, sinceSeq }) -> { events: EventFrame[] }`, including forwarded remote-brain frames. Otherwise reconnect replay is best-effort no-op for both local and cloud-brain runs. + +### Smoke script soundness +**P0** — false-positive risk: non-empty stdout + exit 0 can be produced by local execution. No assertion proves `relayDirectives` had an `llm_turn` for this run, that `/v1/llm_turn` was called, or that a directive completed. + +**P1** — Firestore lookup assumes doc id `instances/${instanceId}/agentDeployments/${instanceId}_${agentId}` at `scripts/cloud-brain-smoke.mjs:75-77`. The runbook says this, but the broader spec examples use arbitrary `deploymentId`. If W1 backfill preserves noncanonical deployment ids, the smoke skips real remote-brain agents. + +**P2** — Firestore failures are recorded as skips (`scripts/cloud-brain-smoke.mjs:193-200`), so an auth/project outage can produce exit 0 if no agents are tested. For readiness, infra lookup failure should be fatal unless explicitly `--allow-skips`. + +**P2** — script assumes repo-root cwd for `bin/benchagi.mjs` (`scripts/cloud-brain-smoke.mjs:100-104`). Use `import.meta.url` to resolve repo root. + +## Single-test-proves-transparency answer +The current smoke is not sufficient. A stronger single test: + +1. Start `benchagi --agent "respond: smoke-ok"`. +2. Capture raw WS frames from the CLI. +3. Watch Firestore for a new `relayDirectives` doc with `directiveType: "llm_turn"`, matching `instanceId`, `agentId`, and a correlated `runId`. +4. Assert directive reaches `completed`. +5. Assert the CLI saw normal `chat` final and/or `agent.lifecycle end` frames with the same run identity and no runtime-specific branch. + +That proves both halves: cloud-brain actually ran, and benchagi consumed it transparently. +=== EXIT 0 === +DONE: Wed May 6 13:10:35 MDT 2026 diff --git a/scripts/cloud-brain-smoke.mjs b/scripts/cloud-brain-smoke.mjs new file mode 100755 index 0000000..cff7a38 --- /dev/null +++ b/scripts/cloud-brain-smoke.mjs @@ -0,0 +1,251 @@ +#!/usr/bin/env node +// cloud-brain-smoke.mjs — Phase B validation per +// `~/.openclaw/wiki/main/_boards/runbooks/platform/benchagi-v2-cloud-brain-pickup.md` §"Validation script". +// +// Activates AFTER cloud-brain Phase 1B PRs merge (BenchAGI #872 W1, #874 W4, +// #878 W2, #988 relay, openclaw#24 W3) AND a developer's agentDeployment is +// flipped to runtime: 'remote-brain' per the operator-side smoke runbook. +// +// What it does: +// +// 1. Lists all known agents from the local openclaw gateway. +// 2. For each agent, queries Firestore (admin REST + gcloud token per +// ~/.claude/.../memory/reference_firebase_admin_rest_recipe.md) for +// `agentDeployments/{instanceId}_{agentId}` and reads the `runtime` field. +// 3. If `runtime === 'remote-brain'`, spawns `benchagi --agent +// --liveness off "respond: smoke-ok"` with stdout/stderr captured and +// a 60s wall-clock timeout. +// 4. Asserts (per runbook §"Validation script" point 4): +// - chat output is non-empty (proves cloud-brain dispatched the LLM turn) +// - the run terminated cleanly (proves orchestrator returned) +// - no error markers in output +// - latency < 60s +// 5. Emits a JSON summary; exits 0 on all-green, 1 if any agent failed. +// +// Required env: +// - INSTANCE_ID — Firestore instance id (e.g. cory's primary instance) +// - GCP_PROJECT — Firebase project id (default: benchagi-8ea90) +// +// Optional env: +// - SMOKE_AGENT_FILTER — regex; if set, only test agents matching this +// - SMOKE_PROMPT — override default prompt (default: "respond: smoke-ok") +// - SMOKE_TIMEOUT_MS — override 60s default +// - DEBUG_RAW_FRAMES — if "1", tee raw WS frames to ./smoke-frames-.jsonl +// (requires bench-cli to honor BENCHAGI_DEBUG_TRACE_FILE, +// which is a V1.1 follow-up — for now this is a no-op) +// +// This script is GATED on cloud-brain Phase 1B merging. If the schemas don't +// support `runtime` field yet, every agent will appear as `runtime: undefined` +// and the script will report "no remote-brain agents found — gated". + +import { spawn } from "node:child_process"; +import { execFileSync } from "node:child_process"; +import { performance } from "node:perf_hooks"; +import { readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; + +const INSTANCE_ID = process.env.INSTANCE_ID; +const GCP_PROJECT = process.env.GCP_PROJECT ?? "benchagi-8ea90"; +const PROMPT = process.env.SMOKE_PROMPT ?? "respond: smoke-ok"; +const TIMEOUT_MS = parseInt(process.env.SMOKE_TIMEOUT_MS ?? "60000", 10); +const FILTER = process.env.SMOKE_AGENT_FILTER ? new RegExp(process.env.SMOKE_AGENT_FILTER) : null; + +if (!INSTANCE_ID) { + console.error("ERROR: INSTANCE_ID env var required"); + console.error("Usage: INSTANCE_ID= node scripts/cloud-brain-smoke.mjs"); + process.exit(2); +} + +// --- Firestore admin REST via gcloud user token --- + +function gcloudAccessToken() { + try { + return execFileSync("gcloud", ["auth", "print-access-token"], { + encoding: "utf-8", + stdio: ["ignore", "pipe", "pipe"], + }).trim(); + } catch (err) { + console.error("ERROR: gcloud auth print-access-token failed:", err.message); + console.error("Run `gcloud auth login` first."); + process.exit(2); + } +} + +async function fetchAgentDeployment(instanceId, agentId, token) { + const docPath = `instances/${instanceId}/agentDeployments/${instanceId}_${agentId}`; + const url = `https://firestore.googleapis.com/v1/projects/${GCP_PROJECT}/databases/(default)/documents/${docPath}`; + const resp = await fetch(url, { + headers: { + Authorization: `Bearer ${token}`, + "X-Goog-User-Project": GCP_PROJECT, + }, + }); + if (resp.status === 404) return null; // No deployment for this agent + if (!resp.ok) { + throw new Error(`Firestore GET ${docPath} → ${resp.status} ${await resp.text()}`); + } + const doc = await resp.json(); + // Firestore REST returns fields wrapped in type tags. Extract `runtime` (string). + const runtime = doc?.fields?.runtime?.stringValue ?? null; + const tier = doc?.fields?.tier?.stringValue ?? null; + return { runtime, tier, raw: doc }; +} + +// --- benchagi spawn with stdout capture + timeout --- + +function runBenchagi(agentId, prompt, timeoutMs) { + return new Promise((resolve) => { + const t0 = performance.now(); + const child = spawn( + "node", + ["bin/benchagi.mjs", "--agent", agentId, "--liveness", "off", "--no-thinking", prompt], + { + cwd: process.cwd(), + stdio: ["ignore", "pipe", "pipe"], + env: { ...process.env, NO_COLOR: "1" }, + }, + ); + + let stdout = ""; + let stderr = ""; + child.stdout.on("data", (d) => { stdout += d.toString(); }); + child.stderr.on("data", (d) => { stderr += d.toString(); }); + + const timer = setTimeout(() => { + try { child.kill("SIGINT"); } catch { /* ignore */ } + setTimeout(() => { try { child.kill("SIGKILL"); } catch { /* ignore */ } }, 1000); + }, timeoutMs); + + child.on("close", (code) => { + clearTimeout(timer); + const dtMs = performance.now() - t0; + resolve({ exitCode: code, stdout, stderr, durationMs: Math.round(dtMs) }); + }); + }); +} + +// --- Assertions --- + +function assertSmokePassed(result, prompt, timeoutMs) { + const issues = []; + + if (result.exitCode !== 0) { + issues.push(`exit code ${result.exitCode}`); + } + if (result.durationMs >= timeoutMs) { + issues.push(`timed out after ${timeoutMs}ms`); + } + if (result.durationMs >= 60_000) { + issues.push(`latency ${result.durationMs}ms exceeds 60s budget`); + } + if (!result.stdout || result.stdout.trim().length === 0) { + issues.push("empty stdout — no chat output captured"); + } + // Heuristic: look for a few error indicators in the rendered output. + if (/error: |chat\.send failed|connection closed|history replay failed/i.test(result.stdout)) { + issues.push("error marker in stdout"); + } + // The prompt asks the agent to "respond: smoke-ok" — don't strictly require + // this in output (model might paraphrase), but flag if completely absent. + if (!/smoke-ok|ok|hello|hi/i.test(result.stdout)) { + issues.push("response doesn't contain expected acknowledgement (lenient check failed)"); + } + + return issues; +} + +// --- Discover known agents from the local gateway --- + +function listAgents() { + try { + const out = execFileSync("node", ["bin/benchagi.mjs", "agents", "list"], { + cwd: process.cwd(), + encoding: "utf-8", + stdio: ["ignore", "pipe", "pipe"], + }); + // Parse lines like " kestrel-aurelius pi/aurelius-default"; split on whitespace. + const agents = []; + for (const line of out.split("\n")) { + const m = /^\s+(\S+)\s+(\S+)/.exec(line); + if (m) agents.push({ id: m[1], model: m[2] }); + } + return agents; + } catch (err) { + console.error("ERROR: `benchagi agents list` failed:", err.message); + process.exit(2); + } +} + +// --- Main --- + +async function main() { + console.log(`[smoke] instance=${INSTANCE_ID} project=${GCP_PROJECT} timeout=${TIMEOUT_MS}ms`); + + const token = gcloudAccessToken(); + const agents = listAgents(); + console.log(`[smoke] discovered ${agents.length} agent(s) from local gateway`); + + const results = []; + for (const agent of agents) { + if (FILTER && !FILTER.test(agent.id)) continue; + + let deployment; + try { + deployment = await fetchAgentDeployment(INSTANCE_ID, agent.id, token); + } catch (err) { + console.log(`[smoke] ${agent.id}: SKIP — Firestore lookup failed: ${err.message}`); + results.push({ agent: agent.id, skipped: true, reason: "firestore-lookup-failed", error: err.message }); + continue; + } + + if (!deployment) { + console.log(`[smoke] ${agent.id}: SKIP — no agentDeployment doc`); + results.push({ agent: agent.id, skipped: true, reason: "no-deployment-doc" }); + continue; + } + + if (deployment.runtime !== "remote-brain") { + console.log(`[smoke] ${agent.id}: SKIP — runtime=${deployment.runtime ?? ""}, not remote-brain`); + results.push({ agent: agent.id, skipped: true, reason: "not-remote-brain", runtime: deployment.runtime }); + continue; + } + + console.log(`[smoke] ${agent.id}: RUN — runtime=remote-brain, tier=${deployment.tier ?? ""}`); + const run = await runBenchagi(agent.id, PROMPT, TIMEOUT_MS); + const issues = assertSmokePassed(run, PROMPT, TIMEOUT_MS); + + if (issues.length === 0) { + console.log(`[smoke] ${agent.id}: PASS (${run.durationMs}ms)`); + results.push({ agent: agent.id, pass: true, durationMs: run.durationMs }); + } else { + console.log(`[smoke] ${agent.id}: FAIL — ${issues.join(", ")}`); + console.log(`[smoke] --- stdout ---\n${run.stdout}\n[smoke] --- stderr ---\n${run.stderr}`); + results.push({ agent: agent.id, pass: false, durationMs: run.durationMs, issues, stdout: run.stdout, stderr: run.stderr }); + } + } + + const tested = results.filter((r) => !r.skipped); + const failed = tested.filter((r) => !r.pass); + + console.log("\n[smoke] --- summary ---"); + console.log(JSON.stringify({ + totalAgents: results.length, + tested: tested.length, + passed: tested.length - failed.length, + failed: failed.length, + results, + }, null, 2)); + + if (tested.length === 0) { + console.log("[smoke] no remote-brain agents found — Phase 1B may not be merged + flipped yet"); + process.exit(0); + } + + process.exit(failed.length === 0 ? 0 : 1); +} + +main().catch((err) => { + console.error("[smoke] fatal:", err); + process.exit(2); +});