diff --git a/docs/product/ricky-skill-embedding-boundary.md b/docs/product/ricky-skill-embedding-boundary.md index da2f84c5..2309a2d6 100644 --- a/docs/product/ricky-skill-embedding-boundary.md +++ b/docs/product/ricky-skill-embedding-boundary.md @@ -12,7 +12,7 @@ For strict TypeScript or proof-oriented workflow generation, the expected loaded `writing-agent-relay-workflows` affects the generated workflow contract by shaping the dedicated channel, explicit agents, step dependencies, review stages, and final signoff. `relay-80-100-workflow` affects validation by shaping soft validation, review/fix/final-review flow, final hard validation, git diff, and regression gates. These are generation-time effects because they are materialized into the workflow text and deterministic metadata before any workflow runner launches agents. -The generated workflow also includes a deterministic `skill-boundary-metadata-gate`. This gate checks that the generated boundary metadata exists, records `generation_time_only`, names the loaded skills, includes the `generation_selection`, `generation_loading`, and applicable `generation_rendering` stages, and records effects such as `workflow_contract` and `validation_gates`. The gate proves the artifact carries the skill boundary forward as metadata; it does not prove runtime agents load skills. +The generated workflow materializes this boundary as context metadata, including `loaded-skills.txt`, `skill-matches.json`, and `skill-application-boundary.json`. Ricky verifies that metadata in generation tests rather than re-checking its own serialized files with runtime shell text matches. Runtime gates should focus on agent-produced artifacts, validation commands, scoped diff evidence, and blocker files. ## Runtime Boundary diff --git a/evals/suites/workflow-authoring/cases.jsonl b/evals/suites/workflow-authoring/cases.jsonl index f26eb084..9d4b3c70 100644 --- a/evals/suites/workflow-authoring/cases.jsonl +++ b/evals/suites/workflow-authoring/cases.jsonl @@ -4,7 +4,7 @@ {"id":"workflow-authoring.distinct-reviewer","suite":"workflow-authoring","executor":"manual","kind":"regression","input":{"message":"Write a workflow that has Codex generate a convention update and then review it."},"expected":{"maxToolCalls":0,"must":["Assign a reviewer agent distinct from the writer when possible.","Persist significant review artifacts under `.workflow-artifacts/`.","Keep convention-only edits scoped to the declared convention files."],"mustNot":["Let the same agent both write and rubber-stamp the change without an explicit reason.","Skip deterministic file-existence, grep, symlink, or scoped change-detection checks.","Edit unrelated package metadata or generated workflows for a convention-only request."],"humanReviewRequired":true},"tags":["workflow-authoring","review"]} {"id":"workflow-authoring.fresh-eyes-loop-simple-test","suite":"workflow-authoring","executor":"ricky-cli","kind":"regression","input":{"message":"Generate a small Agent Relay workflow that adds one missing Vitest unit test for a TypeScript helper and proves the test passes."},"expected":{"ok":true,"contentIncludes":["stage\": \"generate","status\": \"ok","--- GENERATED ARTIFACT:",".agent(\"reviewer-claude\"",".agent(\"validator-claude\"",".agent(\"reviewer-codex\"",".agent(\"validator-codex\"","verdict: FINDINGS | NO_ISSUES_FOUND | BLOCKED","add or update appropriate tests, fixtures, assertions, or deterministic proofs","dependsOn: [\"final-fix-codex\"]"],"contentMatches":["\\.step\\(\"review-claude\"[\\s\\S]*\\.step\\(\"fix-loop\"[\\s\\S]*\\.step\\(\"final-review-claude\"[\\s\\S]*\\.step\\(\"final-fix-claude\"[\\s\\S]*\\.step\\(\"review-codex\"[\\s\\S]*\\.step\\(\"fix-loop-codex\"[\\s\\S]*\\.step\\(\"final-review-codex\"[\\s\\S]*\\.step\\(\"final-fix-codex\"[\\s\\S]*\\.step\\(\"final-review-pass-gate\"[\\s\\S]*\\.step\\(\"final-hard-validation\""],"forbidPhrases":["TypeError","ReferenceError","needs_clarification"],"maxToolCalls":1,"must":["Include the mandatory fresh-eyes review/fix loop even though the workflow is small.","Run the loop in this order: Claude review, Claude fix, Claude final review, Claude final fix, then Codex review, Codex fix, Codex final review, Codex final fix.","Require review output to use a structured verdict such as `FINDINGS`, `NO_ISSUES_FOUND`, or `BLOCKED`.","Require fix steps to add or update tests, fixtures, assertions, or deterministic proof for testable findings.","Put final deterministic acceptance after the Codex final fix."],"mustNot":["Treat the first passing test run as a substitute for fresh-eyes review.","Run Claude and Codex reviews in parallel before fixing.","Collapse all findings into one generic fix step with no final re-review.","Commit, open a PR, or hand off before the Codex loop finishes."],"humanReviewRequired":false},"tags":["workflow-authoring","review","tests","fresh-eyes"],"mock":{"cwd":"temp","specFileContent":"Generate a small Agent Relay workflow that adds one missing Vitest unit test for a TypeScript helper and proves the test passes.","argv":"--mode local --spec-file {{specFile}} --no-run --json --no-workforce-persona","includeGeneratedArtifacts":true}} {"id":"workflow-authoring.fresh-eyes-loop-medium-source-and-test","suite":"workflow-authoring","executor":"ricky-cli","kind":"regression","input":{"message":"Generate a Ricky workflow that changes one source file and one test file for a CLI parsing bug, with scoped diff evidence and a targeted Vitest command."},"expected":{"ok":true,"contentIncludes":["stage\": \"generate","status\": \"ok","src/surfaces/cli/flows/power-user-parser.ts","src/surfaces/cli/flows/power-user-parser.test.ts","npx vitest run src/surfaces/cli/flows/power-user-parser.test.ts","git diff --name-only","git ls-files --others --exclude-standard","review-claude.md","final-review-codex.md","codex-final-fix.md","dependsOn: [\"final-fix-codex\"]"],"contentMatches":["\\.step\\(\"review-claude\"[\\s\\S]*\\.step\\(\"fix-loop\"[\\s\\S]*\\.step\\(\"final-review-claude\"[\\s\\S]*\\.step\\(\"final-fix-claude\"[\\s\\S]*\\.step\\(\"review-codex\"[\\s\\S]*\\.step\\(\"fix-loop-codex\"[\\s\\S]*\\.step\\(\"final-review-codex\"[\\s\\S]*\\.step\\(\"final-fix-codex\"[\\s\\S]*\\.step\\(\"final-review-pass-gate\"[\\s\\S]*\\.step\\(\"final-hard-validation\""],"forbidPhrases":["TypeError","ReferenceError","needs_clarification"],"maxToolCalls":1,"must":["Preserve the Claude-then-Codex review/fix/final-review/final-fix order before final acceptance.","Keep deterministic file gates and scoped `git diff --name-only` / untracked-file checks limited to the declared source and test targets.","Feed review findings into fix steps and require fixers to harden tests when findings are testable.","Write review, fix, final-review, final-fix, validation, and signoff artifacts under `.workflow-artifacts/`."],"mustNot":["Use broad repo-wide change detection as the only proof.","Allow a single reviewer to rubber-stamp its own work without a distinct fresh-eyes pass.","Skip the Codex final review/fix loop because Claude already reviewed.","Move final hard validation before the Codex final fix."],"humanReviewRequired":false},"tags":["workflow-authoring","review","generation","fresh-eyes"],"mock":{"cwd":"temp","specFileContent":"Generate a Ricky workflow that changes one source file and one test file for a CLI parsing bug.\\n\\n## Target Files\\n\\n- src/surfaces/cli/flows/power-user-parser.ts\\n- src/surfaces/cli/flows/power-user-parser.test.ts\\n\\n## Acceptance\\n\\nRun `npx vitest run src/surfaces/cli/flows/power-user-parser.test.ts`.","argv":"--mode local --spec-file {{specFile}} --no-run --json --no-workforce-persona","includeGeneratedArtifacts":true}} -{"id":"workflow-authoring.fresh-eyes-loop-complex-multitrack","suite":"workflow-authoring","executor":"ricky-cli","kind":"capability","input":{"message":"Generate a serious multi-track master executor workflow for three independent product slices: runtime evidence, CLI status copy, and generation validation. Each track owns separate files and the final workflow may create a PR."},"expected":{"ok":true,"contentIncludes":["stage\": \"generate","status\": \"ok","RICKY_MASTER_EXECUTOR_WORKFLOW","Master plan:","RICKY_CHILD_WORKFLOW_COMPLETE","review-claude","final-fix-codex","RICKY_CHILD_FRESH_EYES_LOOP_READY","BLOCKED_NO_COMMIT"],"contentMatches":["review-claude[\\s\\S]*fix-loop[\\s\\S]*final-review-claude[\\s\\S]*final-fix-claude[\\s\\S]*review-codex[\\s\\S]*fix-loop-codex[\\s\\S]*final-review-codex[\\s\\S]*final-fix-codex[\\s\\S]*final-review-pass-gate[\\s\\S]*final-hard-validation"],"forbidPhrases":["TypeError","ReferenceError","needs_clarification"],"maxToolCalls":1,"must":["Use a coordination shape that fits independent tracks while preserving deterministic gates after every editing step.","Ensure each implementation track or child workflow has the mandatory Claude-then-Codex fresh-eyes review/fix loop before track signoff.","Run final deterministic acceptance only after all Codex final fixes and post-fix reviews have completed.","Use `BLOCKED_NO_COMMIT` with evidence when a finding cannot be fixed, and skip commit or PR creation in that state.","Use the GitHub primitive for PR creation when shipping is in scope."],"mustNot":["Put one global review at the end and call that sufficient for all tracks.","Serialize independent tracks without a stated dependency reason.","Let PR creation, commit, or handoff race ahead of unresolved review findings.","Present tests, typecheck, or dry-run alone as the complete proof bar."],"humanReviewRequired":false},"tags":["workflow-authoring","review","multitrack","fresh-eyes"],"mock":{"cwd":"temp","specFileContent":"Generate a serious multi-track workflow for three independent product slices as smaller workflows run by a master executor: runtime evidence, CLI status copy, and generation validation. Each track owns separate files and the final workflow may create a PR.\\n\\nUse independent child workflows with deterministic validation, fresh-eyes review/fix loops, and GitHub primitive PR creation when shipping is in scope.","argv":"--mode local --spec-file {{specFile}} --no-run --json --no-workforce-persona","includeGeneratedArtifacts":true}} +{"id":"workflow-authoring.fresh-eyes-loop-complex-multitrack","suite":"workflow-authoring","executor":"ricky-cli","kind":"capability","input":{"message":"Generate a serious multi-track master executor workflow for three independent product slices: runtime evidence, CLI status copy, and generation validation. Each track owns separate files and the final workflow may create a PR."},"expected":{"ok":true,"contentIncludes":["stage\": \"generate","status\": \"ok","RICKY_MASTER_EXECUTOR_WORKFLOW","Master plan:","final-signoff","review-claude","final-fix-codex","RICKY_CHILD_FINAL_REVIEW_FILES_READY","BLOCKED_NO_COMMIT"],"contentMatches":["review-claude[\\s\\S]*fix-loop[\\s\\S]*final-review-claude[\\s\\S]*final-fix-claude[\\s\\S]*review-codex[\\s\\S]*fix-loop-codex[\\s\\S]*final-review-codex[\\s\\S]*final-fix-codex[\\s\\S]*final-review-pass-gate[\\s\\S]*final-hard-validation"],"forbidPhrases":["TypeError","ReferenceError","needs_clarification"],"maxToolCalls":1,"must":["Use a coordination shape that fits independent tracks while preserving deterministic gates after every editing step.","Ensure each implementation track or child workflow has the mandatory Claude-then-Codex fresh-eyes review/fix loop before track signoff.","Run final deterministic acceptance only after all Codex final fixes and post-fix reviews have completed.","Use `BLOCKED_NO_COMMIT` with evidence when a finding cannot be fixed, and skip commit or PR creation in that state.","Use the GitHub primitive for PR creation when shipping is in scope."],"mustNot":["Put one global review at the end and call that sufficient for all tracks.","Serialize independent tracks without a stated dependency reason.","Let PR creation, commit, or handoff race ahead of unresolved review findings.","Present tests, typecheck, or dry-run alone as the complete proof bar."],"humanReviewRequired":false},"tags":["workflow-authoring","review","multitrack","fresh-eyes"],"mock":{"cwd":"temp","specFileContent":"Generate a serious multi-track workflow for three independent product slices as smaller workflows run by a master executor: runtime evidence, CLI status copy, and generation validation. Each track owns separate files and the final workflow may create a PR.\\n\\nUse independent child workflows with deterministic validation, fresh-eyes review/fix loops, and GitHub primitive PR creation when shipping is in scope.","argv":"--mode local --spec-file {{specFile}} --no-run --json --no-workforce-persona","includeGeneratedArtifacts":true}} {"id":"workflow-authoring.no-silent-mode-fallback","suite":"workflow-authoring","executor":"manual","kind":"regression","input":{"message":"Create a workflow for a user who asked to run in Cloud, but Cloud auth is missing."},"expected":{"maxToolCalls":0,"must":["Detect and report the missing Cloud readiness or auth condition before any expensive generation or run step.","Ask for an explicit user decision before switching to local/BYOH execution.","Preserve the requested execution mode in the workflow context and user-facing summary."],"mustNot":["Silently fall back from Cloud to local.","Claim a provider, account, credential, or integration is connected without a deterministic check.","Hide mode changes inside generic \"auto\" wording."],"humanReviewRequired":true},"tags":["workflow-authoring","local","cloud"]} {"id":"workflow-authoring.agent-assistant-boundary","suite":"workflow-authoring","executor":"manual","kind":"capability","input":{"message":"Update Ricky to reuse a new Agent Assistant primitive while preserving Ricky-owned local execution behavior."},"expected":{"maxToolCalls":0,"must":["Reuse the shared Agent Assistant package for neutral assistant/runtime mechanics where appropriate.","State the Ricky-owned behavior that must remain local, including workflow generation, LocalResponse, blocker taxonomy, recovery wording, and evidence semantics.","Add proof that the shared primitive is exercised in a real Ricky path, not only imported or documented."],"mustNot":["Move product-specific Ricky execution contracts into Agent Assistant without an explicit proof boundary.","Overclaim broad Agent Assistant adoption from a narrow adapter change.","Replace Ricky's local blocker and recovery contract with generic assistant output."],"humanReviewRequired":true},"tags":["workflow-authoring","agent-assistant","boundary"]} {"id":"workflow-authoring.evidence-trail","suite":"workflow-authoring","executor":"manual","kind":"capability","input":{"message":"Design a workflow that watches a long-running workflow, diagnoses a failure, attempts a safe repair, and reports the outcome."},"expected":{"maxToolCalls":0,"must":["Preserve an evidence trail that names commands, artifacts, failed steps, log locations, assertions, and side effects.","Distinguish successful repair, actionable blocker, unsupported condition, and unrecoverable error.","Include resumability guidance such as failed step, previous run id, or exact rerun command when available."],"mustNot":["Claim the workflow succeeded when a blocker or missing dependency stopped execution.","Drop log paths or side-effect summaries from the final outcome.","Retry destructive or credentialed actions without explicit authorization."],"humanReviewRequired":true},"tags":["workflow-authoring","evidence"]} diff --git a/evals/suites/workflow-authoring/cases.md b/evals/suites/workflow-authoring/cases.md index b324d0fa..83aaa596 100644 --- a/evals/suites/workflow-authoring/cases.md +++ b/evals/suites/workflow-authoring/cases.md @@ -168,10 +168,10 @@ contentIncludes: - status": "ok - RICKY_MASTER_EXECUTOR_WORKFLOW - Master plan: -- RICKY_CHILD_WORKFLOW_COMPLETE +- final-signoff - review-claude - final-fix-codex -- RICKY_CHILD_FRESH_EYES_LOOP_READY +- RICKY_CHILD_FINAL_REVIEW_FILES_READY - BLOCKED_NO_COMMIT contentMatches: - review-claude[\s\S]*fix-loop[\s\S]*final-review-claude[\s\S]*final-fix-claude[\s\S]*review-codex[\s\S]*fix-loop-codex[\s\S]*final-review-codex[\s\S]*final-fix-codex[\s\S]*final-review-pass-gate[\s\S]*final-hard-validation diff --git a/scripts/evals/ci-review-comment.mjs b/scripts/evals/ci-review-comment.mjs index 9b13ff90..0ce18545 100644 --- a/scripts/evals/ci-review-comment.mjs +++ b/scripts/evals/ci-review-comment.mjs @@ -39,6 +39,8 @@ if (process.env.GITHUB_TOKEN && process.env.GITHUB_REPOSITORY && process.env.PR_ function renderComment({ result, runDir }) { const failed = result.tests.filter((test) => test.status === 'failed'); const skipped = result.tests.filter((test) => test.status === 'skipped'); + const providerInfraSkipped = skipped.filter(isProviderInfrastructureSkip); + const blockingSkipped = skipped.filter((test) => !isProviderInfrastructureSkip(test)); const needsHuman = result.tests.filter((test) => test.status === 'needs-human'); const reviewableNeedsHuman = needsHuman.filter(hasCapturedOutput); const missingOutputNeedsHuman = needsHuman.filter((test) => !hasCapturedOutput(test)); @@ -50,13 +52,25 @@ function renderComment({ result, runDir }) { `Mode: \`${result.mode}\``, `Git SHA: \`${result.git_sha}\``, '', - `**Passed:** ${result.passed} | **Needs human:** ${result.needs_human} | **Reviewable:** ${reviewableNeedsHuman.length} | **Missing output:** ${missingOutputNeedsHuman.length} | **Failed:** ${result.failed} | **Skipped:** ${result.skipped}`, + `**Passed:** ${result.passed} | **Needs human:** ${result.needs_human} | **Reviewable:** ${reviewableNeedsHuman.length} | **Missing output:** ${missingOutputNeedsHuman.length} | **Failed:** ${result.failed} | **Skipped:** ${result.skipped} | **Provider infra skipped:** ${providerInfraSkipped.length}`, '', ]; - if (failed.length > 0 || skipped.length > 0) { + if (failed.length > 0 || blockingSkipped.length > 0) { lines.push('## Blocking Cases', ''); - for (const test of [...failed, ...skipped]) { + for (const test of [...failed, ...blockingSkipped]) { + appendCaseDetails(lines, test, { forceOpen: true }); + } + } + + if (providerInfraSkipped.length > 0) { + lines.push( + '## Provider Infrastructure Skips', + '', + 'These provider-backed cases were skipped after retryable provider outages. They are not treated as Ricky product regressions.', + '', + ); + for (const test of providerInfraSkipped) { appendCaseDetails(lines, test, { forceOpen: true }); } } @@ -128,6 +142,11 @@ function appendCaseDetails(lines, test, { forceOpen }) { lines.push('', ''); } +function isProviderInfrastructureSkip(test) { + if (test.status !== 'skipped') return false; + return String(test.error ?? '').startsWith('openrouter executor skipped; transient provider infrastructure unavailable'); +} + function appendRickyOutput(lines, test) { const actualContent = getCapturedOutput(test).trim(); lines.push('**Ricky output**', ''); diff --git a/scripts/evals/ci-summary.mjs b/scripts/evals/ci-summary.mjs index 0cf6d236..0ae32320 100644 --- a/scripts/evals/ci-summary.mjs +++ b/scripts/evals/ci-summary.mjs @@ -23,6 +23,8 @@ const result = readResultJson(resultPath); const failed = result.tests.filter((test) => test.status === 'failed'); const skipped = result.tests.filter((test) => test.status === 'skipped'); +const providerInfraSkipped = skipped.filter(isProviderInfrastructureSkip); +const blockingSkipped = skipped.filter((test) => !isProviderInfrastructureSkip(test)); const needsHuman = result.tests.filter((test) => test.status === 'needs-human'); const reviewableNeedsHuman = needsHuman.filter(hasCapturedOutput); const missingOutputNeedsHuman = needsHuman.filter((test) => !hasCapturedOutput(test)); @@ -39,11 +41,14 @@ const lines = [ `- Human cases missing Ricky output: ${missingOutputNeedsHuman.length}`, `- Failed: ${result.failed}`, `- Skipped: ${result.skipped}`, + `- Provider infrastructure skipped: ${providerInfraSkipped.length}`, + `- Blocking skipped: ${blockingSkipped.length}`, '', ]; appendStatusSection(lines, 'Failed', failed); -appendStatusSection(lines, 'Skipped', skipped); +appendStatusSection(lines, 'Skipped', blockingSkipped); +appendStatusSection(lines, 'Provider Infrastructure Skips', providerInfraSkipped); appendHumanReviewSection(lines, reviewableNeedsHuman, missingOutputNeedsHuman); const summary = `${lines.join('\n')}\n`; @@ -53,7 +58,7 @@ if (process.env.GITHUB_STEP_SUMMARY) { writeFileSync(process.env.GITHUB_STEP_SUMMARY, summary, { flag: 'a' }); } -if (failed.length > 0 || skipped.length > 0 || missingOutputNeedsHuman.length > 0) { +if (failed.length > 0 || blockingSkipped.length > 0 || missingOutputNeedsHuman.length > 0) { process.exitCode = 1; } @@ -129,6 +134,11 @@ function getCapturedOutput(test) { ); } +function isProviderInfrastructureSkip(test) { + if (test.status !== 'skipped') return false; + return String(test.error ?? '').startsWith('openrouter executor skipped; transient provider infrastructure unavailable'); +} + function findLatestRunDir() { if (!existsSync(RUNS_DIR)) return null; const runs = readdirSync(RUNS_DIR) diff --git a/scripts/evals/run-ricky-evals.mjs b/scripts/evals/run-ricky-evals.mjs index 86bbe661..7e6aa8e3 100644 --- a/scripts/evals/run-ricky-evals.mjs +++ b/scripts/evals/run-ricky-evals.mjs @@ -17,6 +17,7 @@ const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../..') const DEFAULT_OPENCODE_MODEL = 'opencode/minimax-m2.5-free'; const DEFAULT_OPENROUTER_MODEL = 'openai/gpt-oss-120b:free'; const OPENROUTER_CHAT_COMPLETIONS_ENDPOINT = 'https://openrouter.ai/api/v1/chat/completions'; +const OPENROUTER_PROVIDER_INFRA_SKIP_PREFIX = 'openrouter executor skipped; transient provider infrastructure unavailable'; const { argv: evalArgv, executorOverride } = parseRickyEvalArgs(process.argv.slice(2)); const defaultExecutors = createDefaultHumanEvalExecutors(ROOT); @@ -93,6 +94,12 @@ async function executeOpenRouter(testCase, context) { emptyAttempts.push(`attempt ${attempt}: ${note || 'empty content'}`); } catch (error) { if (attempt >= maxAttempts || !isRetryableOpenRouterError(error)) { + if (isRetryableOpenRouterError(error)) { + const message = error instanceof Error ? error.message : String(error); + throw createSkippedEvalError( + `${OPENROUTER_PROVIDER_INFRA_SKIP_PREFIX} after ${maxAttempts} attempts for ${testCase.id}: ${message}`, + ); + } throw error; } emptyAttempts.push(`attempt ${attempt}: ${error instanceof Error ? error.message : String(error)}`); diff --git a/src/local/auto-fix-loop.test.ts b/src/local/auto-fix-loop.test.ts index 85b705df..9991e5c3 100644 --- a/src/local/auto-fix-loop.test.ts +++ b/src/local/auto-fix-loop.test.ts @@ -1717,7 +1717,7 @@ function legacyChildWorkflowContent(): string { ' .step("final-signoff", {', ' type: "deterministic",', ' dependsOn: ["final-hard-validation"],', - ' command: "echo RICKY_CHILD_WORKFLOW_COMPLETE",', + ' command: "test -s .workflow-artifacts/generated/child/signoff.md",', ' captureOutput: true,', ' failOnError: true,', ' })', diff --git a/src/product/generation/final-review-gate.test.ts b/src/product/generation/final-review-gate.test.ts index d44191d4..bd1889be 100644 --- a/src/product/generation/final-review-gate.test.ts +++ b/src/product/generation/final-review-gate.test.ts @@ -1,38 +1,38 @@ import { describe, expect, it } from 'vitest'; +import { execFileSync } from 'node:child_process'; +import { mkdirSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; import { buildFinalReviewPassGateCommand, GATE_BLOCKED_MARKER, - GATE_MISSING_MARKER_PREFIX, + GATE_MISSING_ARTIFACT_PREFIX, } from './final-review-gate.js'; const ARTIFACTS = '.workflow-artifacts/generated/demo/update-last-week'; function gate(overrides: { successMarker?: string } = {}): string { + void overrides; return buildFinalReviewPassGateCommand({ artifactsDir: ARTIFACTS, - checks: [ - { - presenceTest: `grep -qF RICKY_CHILD_CLAUDE_FINAL_FIX_READY '${ARTIFACTS}/claude-final-fix.md'`, - missingDetail: `${ARTIFACTS}/claude-final-fix.md is missing RICKY_CHILD_CLAUDE_FINAL_FIX_READY`, - }, - { - presenceTest: `grep -qF RICKY_CHILD_CODEX_FINAL_FIX_READY '${ARTIFACTS}/codex-final-fix.md'`, - missingDetail: `${ARTIFACTS}/codex-final-fix.md is missing RICKY_CHILD_CODEX_FINAL_FIX_READY`, - }, + requiredFiles: [ + `${ARTIFACTS}/claude-final-fix.md`, + `${ARTIFACTS}/codex-final-fix.md`, + `${ARTIFACTS}/claude-final-fix-status.json`, + `${ARTIFACTS}/codex-final-fix-status.json`, ], - successMarker: overrides.successMarker ?? 'RICKY_CHILD_FRESH_EYES_LOOP_READY', }); } describe('buildFinalReviewPassGateCommand', () => { - it('checks the BLOCKED sentinel before any marker grep', () => { + it('checks the BLOCKED sentinel before artifact file checks', () => { const command = gate(); const blockedIdx = command.indexOf('BLOCKED_NO_COMMIT.md'); - const firstGrepIdx = command.indexOf('grep -qF RICKY_CHILD_CLAUDE_FINAL_FIX_READY'); + const firstFileIdx = command.indexOf(`${ARTIFACTS}/claude-final-fix.md`); expect(blockedIdx).toBeGreaterThan(-1); - expect(firstGrepIdx).toBeGreaterThan(-1); - expect(blockedIdx).toBeLessThan(firstGrepIdx); + expect(firstFileIdx).toBeGreaterThan(-1); + expect(blockedIdx).toBeLessThan(firstFileIdx); }); it('emits a distinct, greppable marker plus the agent evidence when blocked', () => { @@ -43,30 +43,41 @@ describe('buildFinalReviewPassGateCommand', () => { expect(command).toContain('exit 3'); }); - it('makes each marker check quiet with an explicit missing-marker diagnostic', () => { + it('checks non-empty expected artifacts with explicit diagnostics', () => { const command = gate(); - // Quiet grep — matched lines must not leak into captured output and look - // like success while the command actually failed. - expect(command).toContain('grep -qF RICKY_CHILD_CLAUDE_FINAL_FIX_READY'); - expect(command).not.toMatch(/grep -F RICKY_CHILD_CLAUDE_FINAL_FIX_READY(?!\w)/); - expect(command).toContain( - `${GATE_MISSING_MARKER_PREFIX}: ${ARTIFACTS}/claude-final-fix.md is missing RICKY_CHILD_CLAUDE_FINAL_FIX_READY`, - ); - expect(command).toContain( - `${GATE_MISSING_MARKER_PREFIX}: ${ARTIFACTS}/codex-final-fix.md is missing RICKY_CHILD_CODEX_FINAL_FIX_READY`, - ); + expect(command).toContain(`if [ ! -s '${ARTIFACTS}/claude-final-fix.md' ]; then`); + expect(command).toContain(`if [ ! -s '${ARTIFACTS}/codex-final-fix.md' ]; then`); + expect(command).toContain(`${GATE_MISSING_ARTIFACT_PREFIX}: ${ARTIFACTS}/claude-final-fix.md`); + expect(command).toContain(`${GATE_MISSING_ARTIFACT_PREFIX}: ${ARTIFACTS}/codex-final-fix.md`); + expect(command).not.toContain('grep'); }); - it('still echoes the success marker last so the gate can pass', () => { + it('parses final fix status JSON and rejects blocked statuses', () => { const command = gate(); - // shellQuote wraps the marker in single quotes for safe shell embedding. - expect(command.trimEnd().endsWith("echo 'RICKY_CHILD_FRESH_EYES_LOOP_READY'")).toBe(true); + expect(command).toContain(`${ARTIFACTS}/claude-final-fix-status.json`); + expect(command).toContain(`${ARTIFACTS}/codex-final-fix-status.json`); + expect(command).toContain('includes(parsed.status)'); + expect(command).toContain('parsed.summary'); }); - it('shell-quotes the success marker (defends against future callers passing metacharacters)', () => { - const command = gate({ successMarker: 'DONE $(touch /tmp/pwned)' }); - expect(command).toContain("echo 'DONE $(touch /tmp/pwned)'"); - expect(command).not.toContain('echo DONE $(touch'); + it('embeds status paths as JSON string literals inside node assertions', () => { + const base = join(tmpdir(), "ricky-final-review-gate-quote's"); + mkdirSync(base, { recursive: true }); + const quoted = join(base, 'claude-final-fix-status.json'); + writeFileSync(quoted, '{"status":"fixed","summary":"quoted path passed"}\n'); + + const command = buildFinalReviewPassGateCommand({ + artifactsDir: base, + requiredFiles: [quoted], + }); + + expect(execFileSync('bash', ['-lc', command], { encoding: 'utf8' })).toContain('RICKY_CHILD_FINAL_REVIEW_FILES_READY'); + expect(command).not.toContain(`throw new Error('${quoted}`); + }); + + it('echoes a structural success marker last', () => { + const command = gate(); + expect(command.trimEnd().endsWith("echo 'RICKY_CHILD_FINAL_REVIEW_FILES_READY'")).toBe(true); }); it('guards the blocked-evidence cat so a failing read does not short-circuit exit 3', () => { diff --git a/src/product/generation/final-review-gate.ts b/src/product/generation/final-review-gate.ts index ec81a9c4..70e19437 100644 --- a/src/product/generation/final-review-gate.ts +++ b/src/product/generation/final-review-gate.ts @@ -1,60 +1,24 @@ -// Shared builder for the generated child-workflow `final-review-pass-gate` -// command. -// -// Why this exists: the original gate joined marker greps and a -// `test ! -f BLOCKED_NO_COMMIT.md` clause under `set -e` / `&&`. When an -// agent deliberately wrote BLOCKED_NO_COMMIT.md (the "I cannot proceed -// safely, do not commit" protocol), the gate aborted with a bare exit 1 and -// the captured output showed only the *successful* marker greps — masking -// the real cause. Operators saw "grep ... Command failed (exit code 1)" -// with the success markers in stdout and assumed a grep-target mismatch, -// and the auto-fix loop treated a deliberate "needs a human" signal as a -// retryable INVALID_ARTIFACT, looping for hours. -// -// This builder produces a gate command that: -// 1. Checks the BLOCKED sentinel FIRST and, if present, prints a distinct -// `RICKY_CHILD_BLOCKED_NO_COMMIT` marker plus the agent's evidence to -// stderr and exits with a dedicated code — so the failure is -// attributable and can be routed to escalation rather than retry. -// 2. Runs each marker presence check quietly and, on failure, prints an -// explicit `RICKY_CHILD_GATE_MISSING_MARKER: ` line — so a -// genuinely missing marker is diagnosable and never hidden behind a -// previous check's matched-line output. +// Shared builder for the generated child-workflow `final-review-pass-gate`. +// It verifies structural completion evidence only: expected artifact files +// exist and no agent raised BLOCKED_NO_COMMIT.md. Behavioral proof remains in +// the following hard validation gate. function shellQuote(value: string): string { return `'${value.replace(/'/g, `'\\''`)}'`; } -export interface GateMarkerCheck { - /** - * A shell expression that exits 0 (and produces no stdout) when the marker - * is present. Keep it quiet — matched lines must not leak into the gate's - * captured output. - */ - presenceTest: string; - /** - * Human/diagnostic detail appended after `RICKY_CHILD_GATE_MISSING_MARKER: ` - * when `presenceTest` fails. - */ - missingDetail: string; -} - export interface FinalReviewPassGateOptions { /** Directory holding the child workflow's review/fix artifacts. */ artifactsDir: string; - /** Ordered marker presence checks (claude first, then codex, etc.). */ - checks: GateMarkerCheck[]; - /** Token echoed on success to satisfy the gate's output assertion. */ - successMarker: string; + /** Expected non-empty final fix artifacts. */ + requiredFiles: string[]; } export const GATE_BLOCKED_MARKER = 'RICKY_CHILD_BLOCKED_NO_COMMIT'; -export const GATE_MISSING_MARKER_PREFIX = 'RICKY_CHILD_GATE_MISSING_MARKER'; +export const GATE_MISSING_ARTIFACT_PREFIX = 'RICKY_CHILD_GATE_MISSING_ARTIFACT'; /** - * Build the multi-line shell script for `final-review-pass-gate`. Returned as - * a single string so it can be embedded as a step `command` by either - * renderer regardless of how that renderer assembles its command. + * Build the multi-line shell script for `final-review-pass-gate`. */ export function buildFinalReviewPassGateCommand(options: FinalReviewPassGateOptions): string { const blockedPath = `${options.artifactsDir}/BLOCKED_NO_COMMIT.md`; @@ -78,20 +42,20 @@ export function buildFinalReviewPassGateCommand(options: FinalReviewPassGateOpti ' exit 3', 'fi', ]; - for (const check of options.checks) { + for (const file of options.requiredFiles) { lines.push( - `if ! { ${check.presenceTest}; }; then`, - ` echo ${shellQuote(`${GATE_MISSING_MARKER_PREFIX}: ${check.missingDetail}`)} >&2`, + `if [ ! -s ${shellQuote(file)} ]; then`, + ` echo ${shellQuote(`${GATE_MISSING_ARTIFACT_PREFIX}: ${file}`)} >&2`, ' exit 1', 'fi', ); } - // Shell-quote the success marker for consistency with every other - // dynamic value emitted in this script. Current callers pass the safe - // constant `'RICKY_CHILD_FRESH_EYES_LOOP_READY'`, but this is an exported - // shared builder; preserving the quoting discipline prevents future - // callers from accidentally injecting shell metacharacters via the - // option. - lines.push(`echo ${shellQuote(options.successMarker)}`); + for (const file of options.requiredFiles.filter((candidate) => candidate.endsWith('-status.json'))) { + const fileJson = JSON.stringify(file); + lines.push( + `node -e ${shellQuote(`const fs=require('node:fs'); const parsed=JSON.parse(fs.readFileSync(${fileJson}, 'utf8')); if (!['fixed','no_issues_found'].includes(parsed.status)) throw new Error(${fileJson} + ' must declare status fixed or no_issues_found'); if (typeof parsed.summary !== 'string' || parsed.summary.trim().length === 0) throw new Error(${fileJson} + ' must include a non-empty summary');`)}`, + ); + } + lines.push("echo 'RICKY_CHILD_FINAL_REVIEW_FILES_READY'"); return lines.join('\n'); } diff --git a/src/product/generation/master-workflow-renderer.ts b/src/product/generation/master-workflow-renderer.ts index a284225c..b68fad56 100644 --- a/src/product/generation/master-workflow-renderer.ts +++ b/src/product/generation/master-workflow-renderer.ts @@ -174,7 +174,6 @@ export function renderMasterExecutionWorkflow(input: RenderMasterWorkflowInput): desiredSlices: desiredSlicesFor(input.spec), constraints: { maxChildren: 12, - requiredGateMarkers: ['RICKY_CHILD_WORKFLOW_COMPLETE'], }, }); const channel = `wf-ricky-${slug}`; @@ -301,10 +300,8 @@ function renderMasterSource(input: { '', '## Non-goals and gates', '', - '- Each child workflow is independently 80-to-100 gated; the master executor only checks signoff markers afterward.', + '- Each child workflow is independently 80-to-100 gated; the master executor only checks signoff artifacts afterward.', '- Source edits happen inside child workflows, not in this lead-plan step.', - '', - 'RICKY_MASTER_LEAD_PLAN_READY', ]; const leadPlanCommand = [ 'set -e', @@ -312,16 +309,80 @@ function renderMasterSource(input: { `cat > ${shellQuote(`${input.artifactsDir}/lead-plan.md`)} <<'EOF'`, ...leadPlanSummaryLines, 'EOF', - // Re-verify the marker landed on disk so the deterministic guarantee - // is self-checked and any future template edit that drops it fails - // loudly here instead of silently passing through. - `grep -F RICKY_MASTER_LEAD_PLAN_READY ${shellQuote(`${input.artifactsDir}/lead-plan.md`)} >/dev/null`, - 'echo RICKY_MASTER_LEAD_PLAN_VERIFIED', + `test -s ${shellQuote(`${input.artifactsDir}/lead-plan.md`)}`, + 'echo RICKY_MASTER_LEAD_PLAN_WRITTEN', ].join('\n'); const verifyChildrenCommand = [ 'set -e', ...input.plan.children.map((child) => `test -f ${shellQuote(child.workflowFilePath)}`), - 'if command -v rg >/dev/null 2>&1; then rg "RICKY_CHILD_WORKFLOW_COMPLETE" workflows/generated >/dev/null; else grep -R "RICKY_CHILD_WORKFLOW_COMPLETE" workflows/generated >/dev/null; fi', + `node <<'NODE' +const fs = require('node:fs'); +const ts = require('typescript'); +const children = ${JSON.stringify(input.plan.children.map((child) => ({ + workflowFilePath: child.workflowFilePath, + signoffArtifactPath: child.signoffArtifactPath, +})))}; +function propertyNameText(name) { + return ts.isIdentifier(name) || ts.isStringLiteral(name) || ts.isNumericLiteral(name) ? name.text : undefined; +} +function propertyValue(objectLiteral, propertyName) { + for (const property of objectLiteral.properties) { + if (ts.isPropertyAssignment(property) && propertyNameText(property.name) === propertyName) return property.initializer; + } + return undefined; +} +function stringValue(node) { + if (!node) return undefined; + return ts.isStringLiteral(node) || ts.isNoSubstitutionTemplateLiteral(node) ? node.text : undefined; +} +function isCallNamed(node, methodName) { + return ts.isCallExpression(node) && ts.isPropertyAccessExpression(node.expression) && node.expression.name.text === methodName; +} +function hasFinalSignoffStep(sourceFile, signoffArtifactPath) { + let found = false; + const visit = (node) => { + if (isCallNamed(node, 'step')) { + const [nameArg, configArg] = node.arguments; + if (stringValue(nameArg) === 'final-signoff' && configArg && ts.isObjectLiteralExpression(configArg)) { + const command = propertyValue(configArg, 'command'); + found = found || stringValue(command)?.includes(signoffArtifactPath) === true; + } + } + ts.forEachChild(node, visit); + }; + visit(sourceFile); + return found; +} +function hasRunWithProcessCwd(sourceFile) { + let found = false; + const visit = (node) => { + if (isCallNamed(node, 'run')) { + const [configArg] = node.arguments; + if (configArg && ts.isObjectLiteralExpression(configArg)) { + const cwd = propertyValue(configArg, 'cwd'); + found = found || Boolean(cwd && ts.isCallExpression(cwd) + && ts.isPropertyAccessExpression(cwd.expression) + && ts.isIdentifier(cwd.expression.expression) + && cwd.expression.expression.text === 'process' + && cwd.expression.name.text === 'cwd'); + } + } + ts.forEachChild(node, visit); + }; + visit(sourceFile); + return found; +} +for (const child of children) { + const body = fs.readFileSync(child.workflowFilePath, 'utf8'); + const sourceFile = ts.createSourceFile(child.workflowFilePath, body, ts.ScriptTarget.Latest, true, ts.ScriptKind.TS); + if (!hasFinalSignoffStep(sourceFile, child.signoffArtifactPath)) { + throw new Error(\`child workflow missing final-signoff step for signoff artifact path: \${child.workflowFilePath}\`); + } + if (!hasRunWithProcessCwd(sourceFile)) { + throw new Error(\`child workflow missing explicit cwd run call: \${child.workflowFilePath}\`); + } +} +NODE`, 'echo RICKY_MASTER_CHILD_WORKFLOWS_READY', ].join('\n'); const finalSignoffCommand = [ @@ -331,7 +392,7 @@ function renderMasterSource(input: { '# Ricky master executor signoff', '', `Master plan: ${input.plan.children.length} child workflows across ${waveCount(input.plan)} waves.`, - 'The master executor ran child workflows through ricky run and checked deterministic signoff markers.', + 'The master executor ran child workflows through ricky run and checked deterministic signoff artifacts.', 'Source changes, code changes, tests, git diff evidence, and PR URL or explicit result reporting are required from child workflows.', '', 'MASTER_EXECUTOR_RESULT_READY', @@ -346,7 +407,6 @@ function renderMasterSource(input: { ? listedValidationCommands : [TYPECHECK_COMMAND, deriveTestCommand(input.spec)]), 'git diff --name-only', - `grep -F RICKY_MASTER_REVIEW_READY ${shellQuote(`${input.artifactsDir}/review-codex.md`)}`, 'echo RICKY_MASTER_FINAL_VALIDATION_READY', ]; @@ -440,14 +500,23 @@ function renderMasterSource(input: { ` task: ${templateLiteral([ 'Review child workflow signoffs and deterministic gates for the master executor run.', `Read ${input.artifactsDir}/master-plan.json and each child signoff path.`, - `Write ${input.artifactsDir}/review-codex.md ending with RICKY_MASTER_REVIEW_READY.`, + `Write ${input.artifactsDir}/review-codex.md with your review findings.`, + `Also write ${input.artifactsDir}/review-codex-status.json as JSON with shape {"status":"approved"|"blocked","summary":"..."}.`, ].join('\n'))},`, ` verification: { type: "file_exists", value: ${literal(`${input.artifactsDir}/review-codex.md`)} },`, ' })', '', - ' .step("final-hard-validation", {', + ' .step("final-review-pass-gate", {', ' type: "deterministic",', ' dependsOn: ["review-child-evidence"],', + ` command: ${literal(buildMasterReviewPassGateCommand(input.artifactsDir))},`, + ' captureOutput: true,', + ' failOnError: true,', + ' })', + '', + ' .step("final-hard-validation", {', + ' type: "deterministic",', + ' dependsOn: ["final-review-pass-gate"],', ` command: ${literal(finalHardValidationCommands.join('\n'))},`, ' captureOutput: true,', ' failOnError: true,', @@ -480,8 +549,7 @@ function renderChildRunStep(child: ChildWorkflowPlan): string[] { const command = [ 'set -e', `ricky run ${shellQuote(child.workflowFilePath)} --foreground`, - `test -f ${shellQuote(child.signoffArtifactPath)}`, - `grep -F ${shellQuote(child.signoffMarker)} ${shellQuote(child.signoffArtifactPath)}`, + `test -s ${shellQuote(child.signoffArtifactPath)}`, 'echo RICKY_MASTER_CHILD_RUN_VERIFIED', ].join('\n'); @@ -501,7 +569,6 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo const artifactsDir = child.signoffArtifactPath.replace(/\/signoff\.md$/, ''); const validationCommand = child.validationCommands[0] ?? 'npm run typecheck'; const targetScope = child.targetFiles.length > 0 ? child.targetFiles.join(' ') : 'NO_TARGET_FILES_DECLARED'; - const marker = child.signoffMarker; // Injected into every review/fix task. The master executor runs all child // slices in one shared checkout, so by the time a later child is reviewed // the worktree already contains earlier siblings' dirty files. Reviewers @@ -656,7 +723,8 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo 'If it says NO_ISSUES_FOUND, record that no fix was needed. Otherwise fix every valid finding and harden tests/proofs.', sharedWorktreeScopeRule, `If blocked, write ${artifactsDir}/BLOCKED_NO_COMMIT.md with exact evidence.`, - `Write ${artifactsDir}/claude-final-fix.md ending with RICKY_CHILD_CLAUDE_FINAL_FIX_READY.`, + `Write ${artifactsDir}/claude-final-fix.md and ${artifactsDir}/claude-final-fix-status.json.`, + 'The status JSON must have shape {"status":"fixed"|"no_issues_found"|"blocked","summary":"..."}. Use "blocked" only when BLOCKED_NO_COMMIT.md exists.', ].join('\n'))},`, ` verification: { type: "file_exists", value: ${literal(`${artifactsDir}/claude-final-fix.md`)} },`, ' })', @@ -667,7 +735,7 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo 'Second-pass fresh-eyes review after the Claude loop. Read the actual files, diff, review artifacts, and validation evidence.', sharedWorktreeScopeRule, 'Use verdict: FINDINGS | NO_ISSUES_FOUND | BLOCKED and include fix_required plus test_required for each finding.', - `Write ${artifactsDir}/review-codex.md ending with RICKY_CHILD_CODEX_REVIEW_READY.`, + `Write ${artifactsDir}/review-codex.md.`, ].join('\n'))},`, ` verification: { type: "file_exists", value: ${literal(`${artifactsDir}/review-codex.md`)} },`, ' })', @@ -697,7 +765,7 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo 'Final Codex fresh-eyes review after Codex fixes.', sharedWorktreeScopeRule, 'Use verdict: FINDINGS | NO_ISSUES_FOUND | BLOCKED and include fix_required plus test_required for each finding.', - `Write ${artifactsDir}/final-review-codex.md ending with RICKY_CHILD_CODEX_FINAL_REVIEW_READY.`, + `Write ${artifactsDir}/final-review-codex.md.`, ].join('\n'))},`, ` verification: { type: "file_exists", value: ${literal(`${artifactsDir}/final-review-codex.md`)} },`, ' })', @@ -709,7 +777,8 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo 'If it says NO_ISSUES_FOUND, record that no fix was needed. Otherwise fix every valid finding and harden tests/proofs.', sharedWorktreeScopeRule, `If blocked, write ${artifactsDir}/BLOCKED_NO_COMMIT.md with exact evidence.`, - `Write ${artifactsDir}/codex-final-fix.md ending with RICKY_CHILD_CODEX_FINAL_FIX_READY.`, + `Write ${artifactsDir}/codex-final-fix.md and ${artifactsDir}/codex-final-fix-status.json.`, + 'The status JSON must have shape {"status":"fixed"|"no_issues_found"|"blocked","summary":"..."}. Use "blocked" only when BLOCKED_NO_COMMIT.md exists.', ].join('\n'))},`, ` verification: { type: "file_exists", value: ${literal(`${artifactsDir}/codex-final-fix.md`)} },`, ' })', @@ -718,17 +787,7 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo ' dependsOn: ["final-fix-codex"],', ` command: ${literal(buildFinalReviewPassGateCommand({ artifactsDir, - checks: [ - { - presenceTest: `grep -qF RICKY_CHILD_CLAUDE_FINAL_FIX_READY ${shellQuote(`${artifactsDir}/claude-final-fix.md`)}`, - missingDetail: `${artifactsDir}/claude-final-fix.md is missing RICKY_CHILD_CLAUDE_FINAL_FIX_READY`, - }, - { - presenceTest: `grep -qF RICKY_CHILD_CODEX_FINAL_FIX_READY ${shellQuote(`${artifactsDir}/codex-final-fix.md`)}`, - missingDetail: `${artifactsDir}/codex-final-fix.md is missing RICKY_CHILD_CODEX_FINAL_FIX_READY`, - }, - ], - successMarker: 'RICKY_CHILD_FRESH_EYES_LOOP_READY', + requiredFiles: [`${artifactsDir}/claude-final-fix.md`, `${artifactsDir}/codex-final-fix.md`, `${artifactsDir}/claude-final-fix-status.json`, `${artifactsDir}/codex-final-fix-status.json`], }))},`, ' captureOutput: true,', ' failOnError: true,', @@ -740,10 +799,6 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo 'set -e', validationCommand, 'git diff --name-only', - // Quiet greps with explicit diagnostics — a missing marker here must - // not be hidden behind the previous grep's matched-line output. - `if ! grep -qF RICKY_CHILD_CLAUDE_FINAL_FIX_READY ${shellQuote(`${artifactsDir}/claude-final-fix.md`)}; then echo ${shellQuote(`RICKY_CHILD_GATE_MISSING_MARKER: ${artifactsDir}/claude-final-fix.md is missing RICKY_CHILD_CLAUDE_FINAL_FIX_READY`)} >&2; exit 1; fi`, - `if ! grep -qF RICKY_CHILD_CODEX_FINAL_FIX_READY ${shellQuote(`${artifactsDir}/codex-final-fix.md`)}; then echo ${shellQuote(`RICKY_CHILD_GATE_MISSING_MARKER: ${artifactsDir}/codex-final-fix.md is missing RICKY_CHILD_CODEX_FINAL_FIX_READY`)} >&2; exit 1; fi`, 'echo RICKY_CHILD_FINAL_VALIDATION_READY', ].join('\n'))},`, ' captureOutput: true,', @@ -755,14 +810,14 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo ` command: ${literal([ 'set -e', `mkdir -p ${shellQuote(artifactsDir)}`, - `cat > ${shellQuote(child.signoffArtifactPath)} <<'EOF'`, - `# Child workflow signoff: ${child.title}`, - '', - 'RICKY_CHILD_WORKFLOW_COMPLETE', - marker, - 'EOF', - 'echo RICKY_CHILD_WORKFLOW_COMPLETE', - ].join('\n'))},`, + `cat > ${shellQuote(child.signoffArtifactPath)} <<'EOF'`, + `# Child workflow signoff: ${child.title}`, + '', + `Signoff artifact: ${child.signoffArtifactPath}`, + 'Child workflow completed final review, hard validation, and signoff artifact materialization.', + 'EOF', + `test -s ${shellQuote(child.signoffArtifactPath)}`, + ].join('\n'))},`, ' captureOutput: true,', ' failOnError: true,', ' })', @@ -804,16 +859,33 @@ function buildMasterGates(artifactsDir: string, plan: MasterExecutionPlan, spec: : `{ ${TYPECHECK_COMMAND}; } && ${testCommand}`; const safeFinalValidationCommand = finalValidationCommand || "printf '%s\\n' 'No executable validation commands listed by spec.'"; return [ - gate('skill-boundary-metadata-gate', `test -f ${artifactsDir}/skill-application-boundary.json`, 'file_exists', true, ['prepare-context'], 'pre_review'), gate('child-workflow-file-gate', plan.children.map((child) => `test -f ${child.workflowFilePath}`).join(' && '), 'file_exists', true, ['materialize-child-workflows'], 'pre_review'), gate('initial-soft-validation', listedValidationOnly ? safeFinalValidationCommand : `{ ${TYPECHECK_COMMAND}; } 2>&1 | tail -160`, 'output_contains', false, ['child-workflow-file-gate'], 'pre_review'), - gate('final-review-pass-gate', `grep -F RICKY_MASTER_REVIEW_READY ${artifactsDir}/review-codex.md`, 'output_contains', true, ['review-child-evidence'], 'final'), + gate('final-review-pass-gate', buildMasterReviewPassGateCommand(artifactsDir), 'deterministic_gate', true, ['review-child-evidence'], 'final'), gate('final-hard-validation', safeFinalValidationCommand, 'exit_code', true, ['final-review-pass-gate'], 'final'), gate('git-diff-gate', 'git diff --name-only', 'output_contains', true, ['final-hard-validation'], 'final'), gate('regression-gate', listedValidationOnly ? safeFinalValidationCommand : testCommand, 'exit_code', true, ['git-diff-gate'], 'regression'), ]; } +function buildMasterReviewPassGateCommand(artifactsDir: string): string { + return [ + 'node <<\'NODE\'', + "const fs = require('node:fs');", + `const base = ${literal(artifactsDir)};`, + "const reviewPath = `${base}/review-codex.md`;", + "const statusPath = `${base}/review-codex-status.json`;", + "for (const path of [reviewPath, statusPath]) {", + " if (!fs.existsSync(path) || fs.statSync(path).size === 0) throw new Error(`required master review artifact missing or empty: ${path}`);", + '}', + "const parsed = JSON.parse(fs.readFileSync(statusPath, 'utf8'));", + "if (parsed.status !== 'approved') throw new Error(`${statusPath} must declare status approved`);", + "if (typeof parsed.summary !== 'string' || parsed.summary.trim().length === 0) throw new Error(`${statusPath} must include a non-empty summary`);", + "console.log('RICKY_MASTER_REVIEW_STRUCTURED_GATE_OK');", + 'NODE', + ].join('\n'); +} + function buildMasterSkillEvidence(skills: SkillContext): SkillApplicationEvidence[] { const names = skills.applicableSkillNames.length > 0 ? skills.applicableSkillNames diff --git a/src/product/generation/pipeline.test.ts b/src/product/generation/pipeline.test.ts index edbec6e1..922ab9a0 100644 --- a/src/product/generation/pipeline.test.ts +++ b/src/product/generation/pipeline.test.ts @@ -166,7 +166,12 @@ describe('workflow generation pipeline', () => { expect(rendered.content).toContain('ricky run \'workflows/generated/runtime-master-children/01-nested-runner.ts\' --foreground'); expect(rendered.content).not.toMatch(/^\s*command: "set -e\\nricky run .*--no-auto-fix/m); expect(rendered.content).toContain('MASTER_EXECUTOR_RESULT_READY'); - expect(rendered.content).toContain('RICKY_CHILD_WORKFLOW_COMPLETE'); + expect(rendered.content).toContain('RICKY_MASTER_CHILD_WORKFLOWS_READY'); + expect(rendered.content).not.toContain('RICKY_CHILD_WORKFLOW_COMPLETE'); + const masterStepConfigs = extractStepConfigs(rendered.content); + const verifyChildrenCommand = masterStepConfigs.get('verify-child-workflows')?.command; + expect(verifyChildrenCommand).toContain('ts.createSourceFile'); + expect(verifyChildrenCommand).not.toContain("body.includes('.step(\"final-signoff\"')"); // Child workflow sources live in the .children.json sidecar so the // master content stays under ARG_MAX. Assert child-only strings are in // the sidecar payload rather than inlined into the master TS. @@ -200,7 +205,7 @@ describe('workflow generation pipeline', () => { expect(childStepPositions, `${childPath} declares every fresh-eyes step`).not.toContain(undefined); expect(childStepPositions, `${childPath} fresh-eyes step order`) .toEqual([...childStepPositions].sort((a, b) => a! - b!)); - expect(childStepConfigs.get('final-review-pass-gate')?.command, `${childPath} child fresh-eyes marker`).toContain('RICKY_CHILD_FRESH_EYES_LOOP_READY'); + expect(childStepConfigs.get('final-review-pass-gate')?.command, `${childPath} child final review file gate`).toContain('RICKY_CHILD_FINAL_REVIEW_FILES_READY'); } expect(extractOnErrorConfigs(rendered.content), 'master workflow retry policy').toContainEqual({ strategy: 'retry', @@ -245,9 +250,9 @@ describe('workflow generation pipeline', () => { // entirely via a deterministic command. expect(leadPlan!.agent, 'lead-plan has no agent assignment').toBeUndefined(); expect(leadPlan!.type, 'lead-plan is deterministic').toBe('deterministic'); - expect(leadPlan!.command, 'lead-plan writes the marker into lead-plan.md').toContain('RICKY_MASTER_LEAD_PLAN_READY'); - expect(leadPlan!.command, 'lead-plan self-verifies the marker after writing').toContain('grep -F RICKY_MASTER_LEAD_PLAN_READY'); - expect(leadPlan!.command, 'lead-plan echoes the downstream verification marker').toContain('RICKY_MASTER_LEAD_PLAN_VERIFIED'); + expect(leadPlan!.command, 'lead-plan writes a non-empty lead-plan.md').toContain('test -s'); + expect(leadPlan!.command, 'lead-plan no longer self-verifies a marker with grep').not.toContain('grep -F RICKY_MASTER_LEAD_PLAN_READY'); + expect(leadPlan!.command, 'lead-plan echoes structural completion').toContain('RICKY_MASTER_LEAD_PLAN_WRITTEN'); // `materialize-child-workflows` formerly depended on the separate // `lead-plan-gate`; with the gate folded into `lead-plan`, the // dependency must move directly to `lead-plan`. @@ -762,7 +767,7 @@ describe('workflow generation pipeline', () => { expect(artifact.content).toContain(".onError('retry', { maxRetries: 2, retryDelayMs: 10000, repairAgent: \"validator-claude\", repairRetries: 2 })"); expect(artifact.content).not.toMatch(/^\s*\.onError\('fail-fast'\)/m); expect(artifact.content).toContain('80-to-100 review-fix loop'); - expect(artifact.content).toContain('deterministic sanity gate using POSIX grep, git grep, or an equivalent assertion'); + expect(artifact.content).toContain('deterministic structural sanity gate using a parser, inline assertion, or scoped file/diff check'); expect(artifact.content).toContain('If using rg, guard it with command -v rg'); expect(artifact.content).toContain('Generated workflow quality'); expect(artifact.content).toContain('Keep each agent step bounded to one coherent slice'); @@ -922,27 +927,13 @@ describe('workflow generation pipeline', () => { expect(artifact.content).toContain('runtimeEmbodiment'); expect(artifact.content).toContain('Skills are applied by Ricky during selection, loading, and template rendering.'); expect(artifact.content).toContain('Do not claim generated agents load, retain, or embody skill files at runtime'); - const skillBoundaryGate = artifact.gates.find((gate) => gate.name === 'skill-boundary-metadata-gate')!; - expect(skillBoundaryGate.command).toContain('choosing-swarm-patterns'); - expect(skillBoundaryGate.command).toContain('writing-agent-relay-workflows'); - expect(skillBoundaryGate.command).toContain('relay-80-100-workflow'); - expect(skillBoundaryGate.command).toContain('review-fix-signoff-loop'); - expect(skillBoundaryGate.command).toContain('"stage":"generation_selection"'); - expect(skillBoundaryGate.command).toContain('"stage":"generation_loading"'); - expect(skillBoundaryGate.command).toContain('"stage":"generation_rendering"'); - expect(skillBoundaryGate.command).toContain('"effect":"pattern_selection"'); - expect(skillBoundaryGate.command).toContain('"effect":"workflow_contract"'); - expect(skillBoundaryGate.command).toContain('"effect":"validation_gates"'); - expect(artifact.gates).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - name: 'skill-boundary-metadata-gate', - command: expect.stringContaining('skill-application-boundary.json'), - failOnError: true, - stage: 'pre_review', - }), - ]), - ); + expect(artifact.gates.map((candidate) => candidate.name)).not.toContain('skill-boundary-metadata-gate'); + expect(artifact.content).not.toContain('.step("skill-boundary-metadata-gate"'); + expect(gate(artifact, 'lead-plan-gate')).toMatchObject({ + dependsOn: ['lead-plan'], + failOnError: true, + stage: 'pre_review', + }); }); it('accepts a natural doc/spec request and selects a lighter workflow with deterministic review gates', () => { @@ -1497,15 +1488,13 @@ describe('workflow generation pipeline', () => { artifactPath: 'workflows/generated/inline-sanity.ts', }); const base = artifact(result); - const gatesWithoutGrep = base.gates.map((gate) => ({ + const gatesWithoutSanityChecks = base.gates.map((gate) => ({ ...gate, - command: gate.command - .replace(/\bgit\s+grep\b/g, 'printf') - .replace(/\bgrep\b/g, 'printf'), + command: 'printf ok', })); const withPostImplementationCommand = (command: string) => ({ ...base, - gates: gatesWithoutGrep.map((gate) => gate.name === 'post-implementation-file-gate' + gates: gatesWithoutSanityChecks.map((gate) => gate.name === 'post-implementation-file-gate' ? { ...gate, command } : gate), }); @@ -1535,6 +1524,23 @@ describe('workflow generation pipeline', () => { expect.objectContaining({ code: 'GREP_GATE_MISSING' }), ]), ); + + const heredocNodeValidation = validateGeneratedArtifact( + withPostImplementationCommand( + `node << 'ASSERT-SANITY' +const { readFileSync } = require('node:fs'); +if (!readFileSync('src/product/generation/pipeline.ts', 'utf8').includes('validateGeneratedArtifact')) throw new Error('missing validation symbol'); +ASSERT-SANITY`, + ), + result.patternDecision, + result.skillContext, + implementationSpec, + ); + expect(heredocNodeValidation.issues).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'GREP_GATE_MISSING' }), + ]), + ); }); it('accepts ruby and perl inline assertions invoked with -e', () => { @@ -1591,9 +1597,7 @@ describe('workflow generation pipeline', () => { ...base, gates: base.gates.map((gate) => ({ ...gate, - command: gate.command - .replace(/\bgit\s+grep\b/g, 'printf') - .replace(/\bgrep\b/g, 'printf'), + command: 'printf ok', })), }; @@ -1713,9 +1717,13 @@ describe('workflow generation pipeline', () => { expect(claudePathMatch).not.toBeNull(); expect(codexPathMatch).not.toBeNull(); - expect(passGate.command).toContain(claudePathMatch![1]); - expect(passGate.command).toContain(codexPathMatch![1]); - expect(passGate.command).toContain("tr -d '[:space:]*'"); + expect(passGate.command).toContain('.workflow-artifacts/generated/path-consistency'); + expect(passGate.command).toContain('claude-final-fix.md'); + expect(passGate.command).toContain('codex-final-fix.md'); + expect(passGate.command).toContain('claude-final-fix-status.json'); + expect(passGate.command).toContain('codex-final-fix-status.json'); + expect(passGate.command).toContain('JSON.parse'); + expect(passGate.command).toContain('BLOCKED_NO_COMMIT.md'); }); it('no-target spec uses output manifest instead of artifact path in file gates', () => { @@ -1751,7 +1759,7 @@ describe('workflow generation pipeline', () => { expect(consistencyGate.command).toContain("['final-review-codex.md', read('final-review-codex.md')]"); expect(consistencyGate.command).toContain("['codex-final-fix.md', read('codex-final-fix.md')]"); expect(consistencyGate.command).toContain("['signoff.md', read('signoff.md')]"); - expect(consistencyGate.command).toContain('CODEX_FINAL_FIX_COMPLETE'); + expect(consistencyGate.command).not.toContain('CODEX_FINAL_FIX_COMPLETE'); }); it('no-target code workflow file gate validates manifest contents, not source-shape grep', () => { @@ -1900,7 +1908,8 @@ describe('workflow generation pipeline', () => { expect(leadPlanGate.command).toContain('out[- ]of[- ]scope'); expect(leadPlanGate.command).toContain('Routing contract'); expect(artifact.content).toContain('write .workflow-artifacts/generated/no-target-evidence-gates/fix-loop-report.md'); - expect(fixLoopReportGate.command).toContain('FIX_LOOP_COMPLETE'); + expect(fixLoopReportGate.command).toContain('test -s'); + expect(fixLoopReportGate.command).toContain('fix-loop-report.md'); expect(fixLoopReportGate.dependsOn).toEqual(['fix-loop']); expect(postFixGate.dependsOn).toEqual(['fix-loop-report-gate']); expect(postImplementationGate.command).toContain('cleanup-report.md'); diff --git a/src/product/generation/pipeline.ts b/src/product/generation/pipeline.ts index b344f650..1cef4c06 100644 --- a/src/product/generation/pipeline.ts +++ b/src/product/generation/pipeline.ts @@ -797,6 +797,7 @@ function hasRipgrepFallback(command: string): boolean { function isInlineAssertionCommand(command: string): boolean { const invokesInlineRuntime = + /\bnode\s+<<\s*['"]?[\w-]+['"]?/.test(command) || /\b(?:node|bun)\s+(?:--input-type=module\s+)?(?:-e|--eval)\b/.test(command) || /\bpython3?\s+-c\b/.test(command) || /\b(?:ruby|perl)\s+-e\b/.test(command); diff --git a/src/product/generation/template-renderer.ts b/src/product/generation/template-renderer.ts index 1a5f644e..47b2ba6c 100644 --- a/src/product/generation/template-renderer.ts +++ b/src/product/generation/template-renderer.ts @@ -62,7 +62,7 @@ export function renderWorkflow(input: RenderWorkflowInput): RenderedArtifact { const tasks = buildTasks(input.spec, isCodeWorkflow); const toolSelection = input.toolSelection ?? selectToolsForSteps(input.spec, tasks, input.skills); applyToolSelection(team, toolSelection.selections); - const gates = buildGates(input.spec, artifactsDir, artifactPath, isCodeWorkflow, input.skills); + const gates = buildGates(input.spec, artifactsDir, artifactPath, isCodeWorkflow); const skillApplicationEvidence = buildRenderingSkillEvidence(input.skills, tasks, gates); const content = renderSource({ spec: input.spec, @@ -147,8 +147,6 @@ function renderSource(input: { '', renderPrepareContextStep(input.artifactsDir, contextSetup), '', - renderGateStep(input.gates.find((gate) => gate.name === 'skill-boundary-metadata-gate')!), - '', renderLeadPlanStep(input.artifactsDir, Boolean(input.spec.targetContext), input.isCodeWorkflow), '', renderGateStep(input.gates.find((gate) => gate.name === 'lead-plan-gate')!), @@ -161,7 +159,7 @@ function renderSource(input: { '', renderReviewStep('review-claude', 'reviewer-claude', ['initial-soft-validation'], input.artifactsDir, Boolean(input.spec.targetContext), selectionFor(input.toolSelection, 'review-claude')), '', - renderFixLoopStep('fix-loop', 'validator-claude', ['review-claude', 'initial-soft-validation'], `${input.artifactsDir}/review-claude.md`, `${input.artifactsDir}/fix-loop-report.md`, 'FIX_LOOP_COMPLETE', input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'fix-loop')), + renderFixLoopStep('fix-loop', 'validator-claude', ['review-claude', 'initial-soft-validation'], `${input.artifactsDir}/review-claude.md`, `${input.artifactsDir}/fix-loop-report.md`, input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'fix-loop')), '', renderGateStep(input.gates.find((gate) => gate.name === 'fix-loop-report-gate')!), '', @@ -173,11 +171,11 @@ function renderSource(input: { '', renderReviewStep('final-review-claude', 'reviewer-claude', ['post-fix-validation'], input.artifactsDir, Boolean(input.spec.targetContext), selectionFor(input.toolSelection, 'final-review-claude'), true), '', - renderFixLoopStep('final-fix-claude', 'validator-claude', ['final-review-claude'], `${input.artifactsDir}/final-review-claude.md`, `${input.artifactsDir}/claude-final-fix.md`, 'CLAUDE_FINAL_FIX_COMPLETE', input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'final-fix-claude'), true), + renderFixLoopStep('final-fix-claude', 'validator-claude', ['final-review-claude'], `${input.artifactsDir}/final-review-claude.md`, `${input.artifactsDir}/claude-final-fix.md`, input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'final-fix-claude'), true), '', renderReviewStep('review-codex', 'reviewer-codex', ['final-fix-claude'], input.artifactsDir, Boolean(input.spec.targetContext), selectionFor(input.toolSelection, 'review-codex')), '', - renderFixLoopStep('fix-loop-codex', 'validator-codex', ['review-codex'], `${input.artifactsDir}/review-codex.md`, `${input.artifactsDir}/codex-fix-loop-report.md`, 'CODEX_FIX_LOOP_COMPLETE', input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'fix-loop-codex')), + renderFixLoopStep('fix-loop-codex', 'validator-codex', ['review-codex'], `${input.artifactsDir}/review-codex.md`, `${input.artifactsDir}/codex-fix-loop-report.md`, input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'fix-loop-codex')), '', renderGateStep(input.gates.find((gate) => gate.name === 'codex-fix-loop-report-gate')!), '', @@ -185,7 +183,7 @@ function renderSource(input: { '', renderReviewStep('final-review-codex', 'reviewer-codex', ['post-codex-fix-validation'], input.artifactsDir, Boolean(input.spec.targetContext), selectionFor(input.toolSelection, 'final-review-codex'), true), '', - renderFixLoopStep('final-fix-codex', 'validator-codex', ['final-review-codex'], `${input.artifactsDir}/final-review-codex.md`, `${input.artifactsDir}/codex-final-fix.md`, 'CODEX_FINAL_FIX_COMPLETE', input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'final-fix-codex'), true), + renderFixLoopStep('final-fix-codex', 'validator-codex', ['final-review-codex'], `${input.artifactsDir}/final-review-codex.md`, `${input.artifactsDir}/codex-final-fix.md`, input.spec, input.isCodeWorkflow, input.artifactsDir, selectionFor(input.toolSelection, 'final-fix-codex'), true), '', renderGateStep(input.gates.find((gate) => gate.name === 'final-review-pass-gate')!), '', @@ -252,7 +250,7 @@ function buildTasks(spec: NormalizedWorkflowSpec, isCodeWorkflow: boolean): Work const implementer = isCodeWorkflow ? 'impl-primary-codex' : 'author-codex'; return [ task('prepare-context', 'Prepare context', 'deterministic', 'Read or materialize the normalized spec and target context.', []), - task('lead-plan', 'Lead plan', isCodeWorkflow ? 'lead-claude' : 'lead-codex', 'Plan deliverables, non-goals, ownership, and verification gates.', ['skill-boundary-metadata-gate']), + task('lead-plan', 'Lead plan', isCodeWorkflow ? 'lead-claude' : 'lead-codex', 'Plan deliverables, non-goals, ownership, and verification gates.', ['prepare-context']), task('implement-artifact', 'Implement artifact', implementer, describeImplementation(spec), ['lead-plan']), task('review-claude', 'Fresh-eyes review with Claude', 'reviewer-claude', 'Review generated work against scope and evidence expectations.', ['initial-soft-validation']), task('fix-loop', 'Claude review-fix loop', 'validator-claude', 'Apply bounded fixes from Claude review and validation feedback.', ['review-claude', 'initial-soft-validation']), @@ -271,7 +269,6 @@ function buildGates( artifactsDir: string, artifactPath: string, isCodeWorkflow: boolean, - skills: SkillContext, ): DeterministicGate[] { const outputManifest = `${artifactsDir}/output-manifest.txt`; const usingManifest = spec.targetFiles.length === 0; @@ -297,27 +294,15 @@ function buildGates( listedValidationOnly ? executableAcceptanceCommands : [typecheckCommand, testCommand, ...executableAcceptanceCommands], ); const regressionCommand = listedValidationOnly ? 'git diff --check' : isCodeWorkflow ? 'npx vitest run' : 'git diff --check'; - const skillBoundaryPath = `${artifactsDir}/skill-application-boundary.json`; return [ - gate( - 'skill-boundary-metadata-gate', - buildSkillBoundaryGateCommand(skillBoundaryPath, skills), - 'artifact_exists', - true, - ['prepare-context'], - 'pre_review', - ), gate('lead-plan-gate', buildLeadPlanGateCommand(`${artifactsDir}/lead-plan.md`), 'output_contains', true, ['lead-plan'], 'pre_review'), gate('post-implementation-file-gate', fileGateCommand, 'file_exists', true, ['implement-artifact'], 'pre_review'), gate('initial-soft-validation', initialValidationCommand, 'exit_code', false, ['post-implementation-file-gate'], 'pre_review'), gate( 'fix-loop-report-gate', - [ - `test -f ${shellQuote(`${artifactsDir}/fix-loop-report.md`)}`, - `tail -n 1 ${shellQuote(`${artifactsDir}/fix-loop-report.md`)} | tr -d '[:space:]' | grep -Eq '^FIX_LOOP_COMPLETE$'`, - ].join(' && '), - 'output_contains', + `test -s ${shellQuote(`${artifactsDir}/fix-loop-report.md`)}`, + 'file_exists', true, ['fix-loop'], 'post_fix', @@ -327,11 +312,8 @@ function buildGates( gate('post-fix-validation', hardValidationCommand, 'exit_code', false, ['active-reference-gate'], 'post_fix'), gate( 'codex-fix-loop-report-gate', - [ - `test -f ${shellQuote(`${artifactsDir}/codex-fix-loop-report.md`)}`, - `tail -n 1 ${shellQuote(`${artifactsDir}/codex-fix-loop-report.md`)} | tr -d '[:space:]' | grep -Eq '^CODEX_FIX_LOOP_COMPLETE$'`, - ].join(' && '), - 'output_contains', + `test -s ${shellQuote(`${artifactsDir}/codex-fix-loop-report.md`)}`, + 'file_exists', true, ['fix-loop-codex'], 'post_fix', @@ -339,12 +321,8 @@ function buildGates( gate('post-codex-fix-validation', hardValidationCommand, 'exit_code', false, ['codex-fix-loop-report-gate'], 'post_fix'), gate( 'final-review-pass-gate', - [ - `tail -n 1 ${shellQuote(`${artifactsDir}/claude-final-fix.md`)} | tr -d '[:space:]*' | grep -Eq '^CLAUDE_FINAL_FIX_COMPLETE$'`, - `tail -n 1 ${shellQuote(`${artifactsDir}/codex-final-fix.md`)} | tr -d '[:space:]*' | grep -Eq '^CODEX_FINAL_FIX_COMPLETE$'`, - `test ! -f ${shellQuote(`${artifactsDir}/BLOCKED_NO_COMMIT.md`)}`, - ].join(' && '), - 'output_contains', + buildStructuredFinalReviewPassGateCommand(artifactsDir), + 'deterministic_gate', true, ['final-fix-codex'], 'final', @@ -543,55 +521,31 @@ function buildActiveReferenceGateCommand(outputManifest: string, evidencePath: s ].join('\n'); } -function buildSkillBoundaryGateCommand(skillBoundaryPath: string, skills: SkillContext): string { - const quotedPath = shellQuote(skillBoundaryPath); - const artifactsDir = skillBoundaryPath.replace(/\/skill-application-boundary\.json$/, ''); - const commands = [ - `test -f ${quotedPath}`, - `test -f ${shellQuote(`${artifactsDir}/skill-matches.json`)}`, - `test -f ${shellQuote(`${artifactsDir}/tool-selection.json`)}`, - `grep -F ${shellQuote('generation_time_only')} ${quotedPath}`, - `grep -F ${shellQuote('"runtimeEmbodiment":false')} ${quotedPath}`, - ...skills.applicableSkillNames.map((skillName) => `grep -F ${shellQuote(skillName)} ${quotedPath}`), - ]; - - if (skills.applicableSkillNames.length > 0) { - commands.push( - `grep -F ${shellQuote('"stage":"generation_selection"')} ${quotedPath}`, - `grep -F ${shellQuote('"stage":"generation_loading"')} ${quotedPath}`, - `grep -F ${shellQuote('"effect":"metadata"')} ${quotedPath}`, - ); - } - - if (skills.applicableSkillNames.includes('choosing-swarm-patterns')) { - commands.push( - `grep -F ${shellQuote('"stage":"generation_rendering"')} ${quotedPath}`, - `grep -F ${shellQuote('"effect":"pattern_selection"')} ${quotedPath}`, - ); - } - - if (skills.applicableSkillNames.includes('writing-agent-relay-workflows')) { - commands.push( - `grep -F ${shellQuote('"stage":"generation_rendering"')} ${quotedPath}`, - `grep -F ${shellQuote('"effect":"workflow_contract"')} ${quotedPath}`, - ); - } - - if (skills.applicableSkillNames.includes('relay-80-100-workflow')) { - commands.push( - `grep -F ${shellQuote('"stage":"generation_rendering"')} ${quotedPath}`, - `grep -F ${shellQuote('"effect":"validation_gates"')} ${quotedPath}`, - ); - } - - if (skills.applicableSkillNames.includes('review-fix-signoff-loop')) { - commands.push( - `grep -F ${shellQuote('"stage":"generation_rendering"')} ${quotedPath}`, - `grep -F ${shellQuote('"skillName":"review-fix-signoff-loop"')} ${quotedPath}`, - ); - } - - return commands.join(' && '); +function buildStructuredFinalReviewPassGateCommand(artifactsDir: string): string { + return [ + 'node <<\'NODE\'', + "const fs = require('node:fs');", + `const base = ${literal(artifactsDir)};`, + "const requiredFiles = ['claude-final-fix.md', 'codex-final-fix.md', 'claude-final-fix-status.json', 'codex-final-fix-status.json'];", + "for (const name of requiredFiles) {", + " const path = `${base}/${name}`;", + " if (!fs.existsSync(path) || fs.statSync(path).size === 0) throw new Error(`required final review artifact missing or empty: ${path}`);", + '}', + "const blockedPath = `${base}/BLOCKED_NO_COMMIT.md`;", + "if (fs.existsSync(blockedPath)) throw new Error(`final review blocked; see ${blockedPath}`);", + "for (const name of ['claude-final-fix-status.json', 'codex-final-fix-status.json']) {", + " const path = `${base}/${name}`;", + " const parsed = JSON.parse(fs.readFileSync(path, 'utf8'));", + " if (!['fixed', 'no_issues_found'].includes(parsed.status)) {", + " throw new Error(`${path} must declare status fixed or no_issues_found`);", + ' }', + " if (typeof parsed.summary !== 'string' || parsed.summary.trim().length === 0) {", + " throw new Error(`${path} must include a non-empty summary`);", + ' }', + '}', + "console.log('FINAL_REVIEW_STRUCTURED_GATE_OK');", + 'NODE', + ].join('\n'); } function applyToolSelection(team: TeamMemberSpec[], selections: ToolSelection[]): void { @@ -718,7 +672,7 @@ function buildGeneratedContextPackage( : ['A generated workflow artifact and any requested output files']; const verificationCommands = [ 'file_exists gate for declared targets', - 'deterministic sanity gate using POSIX grep, git grep, or an equivalent assertion', + 'deterministic structural sanity gate using a parser, inline assertion, or scoped file/diff check', 'active-reference gate for deleted manifest paths', 'npx tsc --noEmit', deriveTestCommand(spec), @@ -807,7 +761,7 @@ function buildGeneratedContextPackage( 'Generated workflow quality:', '', '- Include a real deterministic sanity gate over produced files, not just prose saying one exists.', - '- Prefer POSIX grep, git grep, or a small inline assertion command that exits non-zero when expected content/state is missing.', + '- Prefer structural checks, scoped file/diff checks, or a small inline assertion command that exits non-zero when expected content/state is missing.', '- If using rg, guard it with command -v rg and provide a grep or git grep fallback.', '- For cleanup or deletion work, persist a changed-files inventory with statuses, active-reference evidence for deleted paths, and command summaries for final signoff.', `- For cleanup or deletion work, start from ${artifactsDir}/cleanup-candidate-prescan.txt and cite that exact path in ${artifactsDir}/cleanup-report.md so the evidence trail names its prescan input.`, @@ -879,7 +833,7 @@ function renderLeadPlanStep(artifactsDir: string, hasTargetContext: boolean, isC const agent = isCodeWorkflow ? 'lead-claude' : 'lead-codex'; return ` .step('lead-plan', { agent: ${literal(agent)}, - dependsOn: ['skill-boundary-metadata-gate'], + dependsOn: ['prepare-context'], timeoutMs: ${DEFAULT_LEAD_PLAN_TIMEOUT_MS}, task: ${templateLiteral(`Plan the workflow execution from the packaged context files. @@ -943,7 +897,6 @@ function renderReviewStep( selection?: ToolSelection, final = false, ): string { - const marker = final ? (agent.includes('claude') ? 'FINAL_REVIEW_CLAUDE_COMPLETE' : 'FINAL_REVIEW_CODEX_COMPLETE') : 'REVIEW_COMPLETE'; const reviewPath = `${artifactsDir}/${stepName}.md`; const selectionLines = renderSelectionFields(selection); return ` .step(${literal(stepName)}, { @@ -974,7 +927,7 @@ status: open | fixed | wontfix | blocked evidence: commands run, file paths, or blocker details If there are no actionable issues, write verdict: NO_ISSUES_FOUND. -End the file with ${marker}.`)}, +Materialize the review file, then stop for the next deterministic gate.`)}, verification: { type: 'file_exists', value: ${literal(reviewPath)} }, })`; } @@ -985,7 +938,6 @@ function renderFixLoopStep( dependsOn: string[], reviewPath: string, reportPath: string, - marker: string, spec: NormalizedWorkflowSpec, isCodeWorkflow: boolean, artifactsDir: string, @@ -993,6 +945,9 @@ function renderFixLoopStep( final = false, ): string { const selectionLines = renderSelectionFields(selection); + const finalStatusInstructions = final + ? `\nAlso write ${reportPath.replace(/\.md$/, '-status.json')} as JSON with shape {"status":"fixed"|"no_issues_found"|"blocked","summary":"..."}. Use "blocked" only when you also wrote ${artifactsDir}/BLOCKED_NO_COMMIT.md.` + : ''; return ` .step(${literal(stepName)}, { agent: ${literal(agent)}, dependsOn: ${arrayLiteral(dependsOn)}, @@ -1017,7 +972,7 @@ Preserve the declared target boundary: ${formatList(spec.targetFiles.length > 0 ? spec.targetFiles : ['No explicit targets supplied'])} ${renderToolSelectionSummary(selection)} -Before exiting, write ${reportPath} summarizing the exact fixes you applied or explicitly saying that no repo changes were required, then end that file with ${marker}. +Before exiting, write ${reportPath} summarizing the exact fixes you applied or explicitly saying that no repo changes were required.${finalStatusInstructions} Re-run ${isCodeWorkflow ? 'typecheck and tests' : 'document sanity checks'} before handing off to post-fix validation.`)}, verification: { type: 'file_exists', value: ${literal(reportPath)} }, })`; @@ -1204,8 +1159,6 @@ function buildFinalArtifactConsistencyGateCommand(artifactsDir: string): string " if (!body.includes(path)) throw new Error(name + ' missing manifest path: ' + path);", ' }', '}', - 'const codexMarker = read(\'codex-final-fix.md\');', - "if (!codexMarker.includes('CODEX_FINAL_FIX_COMPLETE')) throw new Error('codex-final-fix marker missing pass sentinel');", 'const staleTargets = [', " ['test', 'smoke' + '.test' + '.ts'].join('/'),", " 'smoke' + '.test' + '.ts',", diff --git a/src/product/generation/workforce-persona-writer.test.ts b/src/product/generation/workforce-persona-writer.test.ts index ac02312c..3fc00156 100644 --- a/src/product/generation/workforce-persona-writer.test.ts +++ b/src/product/generation/workforce-persona-writer.test.ts @@ -47,7 +47,7 @@ describe('workforce persona workflow writer', () => { expect(task).toContain('If the normalized spec declares `Worktree: `'); expect(task).toContain('Never use `test -f` for a worktree/repository directory'); expect(task).toContain('deterministic sanity gate'); - expect(task).toContain('POSIX grep, git grep'); + expect(task).toContain('structural checks, scoped file/diff checks'); expect(task).toContain('If using rg, guard it with command -v rg'); expect(task).toContain('Keep agent steps bounded'); expect(task).toContain('Structured response contract'); diff --git a/src/product/generation/workforce-persona-writer.ts b/src/product/generation/workforce-persona-writer.ts index 49ea71a6..a52c3fd7 100644 --- a/src/product/generation/workforce-persona-writer.ts +++ b/src/product/generation/workforce-persona-writer.ts @@ -747,7 +747,7 @@ export function buildWorkflowPersonaTask( '- Use a dedicated workflow channel, not general.', '- Include explicit agents, step dependencies, deterministic gates, review stages, and final signoff.', '- Include an 80-to-100 fix loop: implement, validate, review, fix, final review, hard validation.', - '- Include a real deterministic sanity gate over produced files using POSIX grep, git grep, or an equivalent inline assertion that exits non-zero when expected content/state is missing.', + '- Include a real deterministic sanity gate over produced files using structural checks, scoped file/diff checks, or an equivalent inline assertion that exits non-zero when expected content/state is missing.', '- If using rg, guard it with command -v rg and provide a grep or git grep fallback because ripgrep is not guaranteed to be installed.', '- Keep agent steps bounded: split broad implementation or test-writing work into multiple sequential/fan-out steps with deterministic gates between them instead of one large step that can exhaust retries by timeout.', '- Before calling `.run(...)`, load repo-local `.env.local` and `.env` values into `process.env` without overwriting existing shell exports, so local BYOH runs inherit common project configuration. If the workflow requires named env vars, add a fast deterministic preflight/assertion that prints `MISSING_ENV_VAR: NAME` before long-running agent steps.', diff --git a/src/surfaces/cli/cli/proof/external-cli-proof.ts b/src/surfaces/cli/cli/proof/external-cli-proof.ts index f9e4062a..f7d6c0ef 100644 --- a/src/surfaces/cli/cli/proof/external-cli-proof.ts +++ b/src/surfaces/cli/cli/proof/external-cli-proof.ts @@ -121,7 +121,7 @@ export async function runExternalCliProof( await writeFile(artifactFullPath, deterministicSdkSmokeWorkflow(), 'utf8'); - const nextInvocation = await runner.run('sh', ['-lc', nextCommand], { + const nextInvocation = await runner.run('sh', ['-c', nextCommand], { cwd: repoDir, env: { INIT_CWD: repoDir, PATH: `${join(repoDir, 'node_modules/.bin')}:${process.env.PATH ?? ''}` }, }); diff --git a/test/eval-ci-summary-provider-skip.test.ts b/test/eval-ci-summary-provider-skip.test.ts new file mode 100644 index 00000000..571966c8 --- /dev/null +++ b/test/eval-ci-summary-provider-skip.test.ts @@ -0,0 +1,85 @@ +import { spawnSync } from 'node:child_process'; +import { mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; + +import { afterEach, describe, expect, it } from 'vitest'; + +const runsDir = join(process.cwd(), '.ricky', 'evals', 'runs'); +const createdDirs: string[] = []; + +describe('Ricky eval CI summary provider skips', () => { + afterEach(() => { + for (const dir of createdDirs.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('does not fail CI for retry-exhausted OpenRouter infrastructure skips', () => { + writeRun('9999-01-01T00-00-00-000Z-provider-infra-skip', { + skipped: 1, + tests: [ + { + id: 'runtime-recovery.in-process-local-runner', + suite: 'runtime-recovery', + executor: 'manual', + status: 'skipped', + error: 'openrouter executor skipped; transient provider infrastructure unavailable after 3 attempts for runtime-recovery.in-process-local-runner: OpenRouter eval failed: 503 Provider returned error', + }, + ], + }); + + const result = runSummary(); + + expect(result.status).toBe(0); + expect(result.stdout).toContain('- Provider infrastructure skipped: 1'); + expect(result.stdout).toContain('- Blocking skipped: 0'); + }); + + it('still fails CI for ordinary skipped evals', () => { + writeRun('9999-01-01T00-00-00-001Z-blocking-skip', { + skipped: 1, + tests: [ + { + id: 'workflow-authoring.example', + suite: 'workflow-authoring', + executor: 'openrouter', + status: 'skipped', + error: 'openrouter executor skipped; OPENROUTER_API_KEY is missing', + }, + ], + }); + + const result = runSummary(); + + expect(result.status).toBe(1); + expect(result.stdout).toContain('- Provider infrastructure skipped: 0'); + expect(result.stdout).toContain('- Blocking skipped: 1'); + }); +}); + +function writeRun(name: string, overrides: Record): void { + const runDir = join(runsDir, name); + mkdirSync(runDir, { recursive: true }); + createdDirs.push(runDir); + writeFileSync( + join(runDir, 'result.json'), + JSON.stringify({ + timestamp: name, + mode: 'provider', + git_sha: 'test-sha', + passed: 0, + needs_human: 0, + failed: 0, + skipped: 0, + tests: [], + ...overrides, + }, null, 2), + ); +} + +function runSummary(): ReturnType { + return spawnSync(process.execPath, ['scripts/evals/ci-summary.mjs'], { + cwd: process.cwd(), + encoding: 'utf8', + }); +} diff --git a/test/generated-workflow-hygiene.test.ts b/test/generated-workflow-hygiene.test.ts index 46e99818..f1eff9f6 100644 --- a/test/generated-workflow-hygiene.test.ts +++ b/test/generated-workflow-hygiene.test.ts @@ -25,12 +25,13 @@ describe('generated workflow hygiene', () => { expect(workflowBody).toContain('must not be presented as independent review evidence'); expect(workflowBody).toContain('cleanup-candidate-prescan.txt'); expect(workflowBody).toContain('cite that exact path in'); - expect(workflowBody).toContain('CLEANUP_CANDIDATE_PRESCAN_OK'); + expect(workflowBody).toContain("test -s '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/cleanup-candidate-prescan.txt'"); + expect(workflowBody).not.toContain('CLEANUP_CANDIDATE_PRESCAN_OK'); expect(workflowBody).toContain('cleanup-evidence-sanity-gate'); expect(workflowBody).toContain('CLEANUP_EVIDENCE_SANITY_GATE_OK'); expect(workflowBody).toContain('final-artifact-consistency-gate'); expect(workflowBody).toContain('FINAL_ARTIFACT_CONSISTENCY_GATE_OK'); - expect(workflowBody).toContain('final-review-codex marker missing pass sentinel'); + expect(workflowBody).not.toContain('final-review-codex marker missing pass sentinel'); expect(workflowBody).not.toContain("['final-review-codex.md', read('final-review-codex.md')]"); expect(workflowBody).not.toContain('timeoutMs: 300_000'); expect(workflowBody).toContain('Tracked agent config files'); diff --git a/test/generated-workflow-reliability-contract.test.ts b/test/generated-workflow-reliability-contract.test.ts index 20ff7fad..eb5cc4dc 100644 --- a/test/generated-workflow-reliability-contract.test.ts +++ b/test/generated-workflow-reliability-contract.test.ts @@ -94,7 +94,8 @@ describe('generated workflow reliability contract', () => { expect(content).not.toContain('--no-auto-fix'); expect(childrenSidecar, 'children sidecar attached').toBeDefined(); expect(childrenSidecarUnescaped).toMatch(/\.onError\('retry', \{ maxRetries: 2, retryDelayMs: 10000, repairAgent: \"validator-claude\", repairRetries: 2 \}\)/); - expect(childrenSidecar).toContain('RICKY_CHILD_WORKFLOW_COMPLETE'); + expect(childrenSidecarUnescaped).toContain('.step("final-signoff"'); + expect(childrenSidecar).not.toContain('RICKY_CHILD_WORKFLOW_COMPLETE'); expect(childrenSidecarUnescaped).toMatch(/\.step\("final-hard-validation"[\s\S]*?failOnError: false,[\s\S]*?\.step\("final-signoff"/); expect(content).toMatch(/\.step\("final-hard-validation"[\s\S]*?failOnError: true,[\s\S]*?\.step\("final-signoff"/); expect(content).toContain('RICKY_MASTER_FINAL_VALIDATION_READY'); @@ -450,7 +451,7 @@ function legacyChildWorkflowContent(): string { ' .step("final-signoff", {', ' type: "deterministic",', ' dependsOn: ["final-hard-validation"],', - ' command: "echo RICKY_CHILD_WORKFLOW_COMPLETE",', + ' command: "test -s .workflow-artifacts/generated/child/signoff.md",', ' captureOutput: true,', ' failOnError: true,', ' })', diff --git a/workflows/generated/ricky-i-want-to-clean-up-the-codebase-to-remove-outdat.ts b/workflows/generated/ricky-i-want-to-clean-up-the-codebase-to-remove-outdat.ts index a59d8714..37e6285e 100644 --- a/workflows/generated/ricky-i-want-to-clean-up-the-codebase-to-remove-outdat.ts +++ b/workflows/generated/ricky-i-want-to-clean-up-the-codebase-to-remove-outdat.ts @@ -23,17 +23,9 @@ async function main() { failOnError: true, }) - .step("skill-boundary-metadata-gate", { - type: 'deterministic', - dependsOn: ["prepare-context"], - command: "test -f '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && test -f '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-matches.json' && test -f '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/tool-selection.json' && grep -F 'generation_time_only' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"runtimeEmbodiment\":false' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F 'choosing-swarm-patterns' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F 'relay-80-100-workflow' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F 'writing-agent-relay-workflows' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"stage\":\"generation_selection\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"stage\":\"generation_loading\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"effect\":\"metadata\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"stage\":\"generation_rendering\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"effect\":\"pattern_selection\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"stage\":\"generation_rendering\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"effect\":\"workflow_contract\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"stage\":\"generation_rendering\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json' && grep -F '\"effect\":\"validation_gates\"' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/skill-application-boundary.json'", - captureOutput: true, - failOnError: true, - }) - .step('lead-plan', { agent: 'lead-claude', - dependsOn: ['skill-boundary-metadata-gate'], + dependsOn: ['prepare-context'], task: `Plan the workflow execution from the normalized spec. Generation-time skill boundary: @@ -65,7 +57,7 @@ Routing contract: Verification commands: - file_exists gate for declared targets -- deterministic sanity gate using grep, rg, or an equivalent assertion +- deterministic structural sanity gate using a parser, inline assertion, or scoped file/diff check - active-reference gate for deleted manifest paths - npx tsc --noEmit - npx vitest run @@ -87,7 +79,7 @@ Write .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-ou .step("cleanup-candidate-prescan", { type: 'deterministic', dependsOn: ["lead-plan-gate"], - command: "{ printf '%s\\n' '# Cleanup Candidate Prescan' ''; printf '%s\\n' '## Generated workflow files'; git ls-files workflows/generated; printf '%s\\n' '' '## Top-level workflow hygiene tests'; git ls-files 'test/*.test.ts'; printf '%s\\n' '' '## Tracked agent config files'; git ls-files '.claude/*' '.agents/*' 'AGENTS.md'; printf '%s\\n' '' '## Relaycast permission references'; rg -n --fixed-strings 'mcp__relaycast__' .claude .agents workflows test package.json AGENTS.md || true; printf '%s\\n' '' '## Active request references'; rg -n --fixed-strings 'ricky-i-want-to-clean-up-the-codebase-to-remove-outdat' workflows test package.json || true; printf '%s\\n' '' 'CLEANUP_CANDIDATE_PRESCAN_OK'; } > '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/cleanup-candidate-prescan.txt' && grep -F 'CLEANUP_CANDIDATE_PRESCAN_OK' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/cleanup-candidate-prescan.txt'", + command: "{ printf '%s\\n' '# Cleanup Candidate Prescan' ''; printf '%s\\n' '## Generated workflow files'; git ls-files workflows/generated; printf '%s\\n' '' '## Top-level workflow hygiene tests'; git ls-files 'test/*.test.ts'; printf '%s\\n' '' '## Tracked agent config files'; git ls-files '.claude/*' '.agents/*' 'AGENTS.md'; printf '%s\\n' '' '## Relaycast permission references'; rg -n --fixed-strings 'mcp__relaycast__' .claude .agents workflows test package.json AGENTS.md || true; printf '%s\\n' '' '## Active request references'; rg -n --fixed-strings 'ricky-i-want-to-clean-up-the-codebase-to-remove-outdat' workflows test package.json || true; } > '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/cleanup-candidate-prescan.txt' && test -s '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/cleanup-candidate-prescan.txt'", captureOutput: true, failOnError: true, }) @@ -120,7 +112,7 @@ Keep execution routing explicit for local, cloud, and MCP callers. Materialize o Generated workflow quality: - Include a real deterministic sanity gate over produced files, not just prose saying one exists. -- Prefer grep, rg, git grep, or a small inline assertion command that exits non-zero when expected content/state is missing. +- Prefer structural checks, scoped file/diff checks, or a small inline assertion command that exits non-zero when expected content/state is missing. - For cleanup or deletion work, persist a changed-files inventory with statuses, active-reference evidence for deleted paths, and command summaries for final signoff. - Start from .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/cleanup-candidate-prescan.txt so cleanup candidates are based on tracked files and active request references. - For cleanup or deletion work, cite that exact path in .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/cleanup-report.md so the evidence trail names its prescan input. @@ -138,7 +130,7 @@ Generated workflow quality: .step("cleanup-evidence-sanity-gate", { type: 'deterministic', dependsOn: ["post-implementation-file-gate"], - command: "node <<'NODE'\nconst fs = require('node:fs');\nconst base = '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat';\nconst read = (name) => fs.readFileSync(`${base}/${name}`, 'utf8');\nconst manifest = read('output-manifest.txt');\nconst report = read('cleanup-report.md');\nconst inventory = read('cleanup-diff-inventory.txt');\nconst validation = read('validation-evidence.md');\nconst prescan = read('cleanup-candidate-prescan.txt');\nif (!prescan.includes('CLEANUP_CANDIDATE_PRESCAN_OK')) throw new Error('prescan marker missing');\nif (!/^(A|M|D)\\s+\\S+/m.test(manifest)) throw new Error('manifest lacks status-prefixed changed paths');\nif (!/cleanup-candidate-prescan\\.txt/i.test(report)) throw new Error('cleanup report does not cite prescan input');\nif (!/(active-reference|reference evidence|No deleted paths declared)/i.test(report)) throw new Error('cleanup report missing active-reference evidence summary');\nif (!/^(A|M|D)\\s+\\S+/m.test(inventory)) throw new Error('cleanup diff inventory lacks status-prefixed entries');\nif (!/(command summaries|Commands run|Validation commands)/i.test(validation)) throw new Error('validation evidence missing command summaries');\nif (!/(npx tsc --noEmit|npx vitest run|git diff --name-status)/.test(validation)) throw new Error('validation evidence missing deterministic command names');\nconsole.log('CLEANUP_EVIDENCE_SANITY_GATE_OK');\nNODE", + command: "node <<'NODE'\nconst fs = require('node:fs');\nconst base = '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat';\nconst read = (name) => fs.readFileSync(`${base}/${name}`, 'utf8');\nconst manifest = read('output-manifest.txt');\nconst report = read('cleanup-report.md');\nconst inventory = read('cleanup-diff-inventory.txt');\nconst validation = read('validation-evidence.md');\nconst prescan = read('cleanup-candidate-prescan.txt');\nif (!/^# Cleanup Candidate Prescan/m.test(prescan)) throw new Error('prescan missing title');\nif (!/## Generated workflow files/.test(prescan)) throw new Error('prescan missing generated workflow inventory');\nif (!/## Active request references/.test(prescan)) throw new Error('prescan missing active request references');\nif (!/^(A|M|D)\\s+\\S+/m.test(manifest)) throw new Error('manifest lacks status-prefixed changed paths');\nif (!/cleanup-candidate-prescan\\.txt/i.test(report)) throw new Error('cleanup report does not cite prescan input');\nif (!/(active-reference|reference evidence|No deleted paths declared)/i.test(report)) throw new Error('cleanup report missing active-reference evidence summary');\nif (!/^(A|M|D)\\s+\\S+/m.test(inventory)) throw new Error('cleanup diff inventory lacks status-prefixed entries');\nif (!/(command summaries|Commands run|Validation commands)/i.test(validation)) throw new Error('validation evidence missing command summaries');\nif (!/(npx tsc --noEmit|npx vitest run|git diff --name-status)/.test(validation)) throw new Error('validation evidence missing deterministic command names');\nconsole.log('CLEANUP_EVIDENCE_SANITY_GATE_OK');\nNODE", captureOutput: true, failOnError: true, }) @@ -208,8 +200,6 @@ const body = [ '- Overnight runner no longer lists the removed workflow: PASS', '- Flat-layout proof covers the obsolete package-split workflow cleanup: PASS', '- Routing remains coherent because the active generated workflow artifact is this file and remains present: PASS', - '', - 'REVIEW_COMPLETE', ].join('\n'); fs.writeFileSync(out, body + '\n'); console.log('REVIEW_CLAUDE_GATE_PASS'); @@ -221,7 +211,7 @@ NODE`, .step("review-codex", { type: 'deterministic', dependsOn: ["initial-soft-validation"], - command: "node - <<'NODE'\nconst fs = require('node:fs');\nconst out = \".workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md\";\nconst body = [\n \"# Codex structural marker gate\",\n '',\n \"- Spec: I want to clean up the codebase to remove outdated and unused files\",\n '- This deterministic structural marker replaces the hanging non-interactive Codex reviewer path for non-code workflow slices.',\n '- It is not an independent reviewer subprocess and must not be presented as independent review evidence.',\n '- Substantive review evidence comes from the Claude review steps plus deterministic validation gates.',\n \"- Marker artifact: .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md\",\n '- Deterministic validation gates completed before this review step.',\n '',\n \"REVIEW_COMPLETE\",\n].join(\"\\n\");\nfs.writeFileSync(out, `${body}\n`);\nconsole.log(\"REVIEW_CODEX_GATE_PASS\");\nNODE", + command: "node - <<'NODE'\nconst fs = require('node:fs');\nconst out = \".workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md\";\nconst body = [\n \"# Codex structural marker gate\",\n '',\n \"- Spec: I want to clean up the codebase to remove outdated and unused files\",\n '- This deterministic structural marker replaces the hanging non-interactive Codex reviewer path for non-code workflow slices.',\n '- It is not an independent reviewer subprocess and must not be presented as independent review evidence.',\n '- Substantive review evidence comes from the Claude review steps plus deterministic validation gates.',\n \"- Marker artifact: .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md\",\n '- Deterministic validation gates completed before this review step.',\n].join(\"\\n\");\nfs.writeFileSync(out, `${body}\n`);\nconsole.log(\"REVIEW_CODEX_GATE_PASS\");\nNODE", captureOutput: true, failOnError: true, }) @@ -229,7 +219,7 @@ NODE`, .step("read-review-feedback", { type: 'deterministic', dependsOn: ["review-claude", "review-codex"], - command: "test -f '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-claude.md' && test -f '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md' && grep -F 'REVIEW_COMPLETE' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-claude.md' && grep -F 'REVIEW_COMPLETE' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md' && cat '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-claude.md' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md' | tee '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-feedback.md'", + command: "test -s '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-claude.md' && test -s '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md' && cat '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-claude.md' '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-codex.md' | tee '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/review-feedback.md'", captureOutput: true, failOnError: true, }) @@ -254,7 +244,7 @@ Fix only concrete review or validation findings. Preserve the declared target bo Tool selection: runner=@agent-relay/sdk; concurrency=1; rule=project default runner @agent-relay/sdk. -Before exiting, write .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/fix-loop-report.md summarizing the exact fixes you applied or explicitly saying that no repo changes were required, then end that file with FIX_LOOP_COMPLETE. +Before exiting, write .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/fix-loop-report.md summarizing the exact fixes you applied or explicitly saying that no repo changes were required. Re-run document sanity checks before handing off to post-fix validation.`, verification: { type: 'file_exists', value: ".workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/fix-loop-report.md" }, }) @@ -262,7 +252,7 @@ Re-run document sanity checks before handing off to post-fix validation.`, .step("fix-loop-report-gate", { type: 'deterministic', dependsOn: ["fix-loop"], - command: "test -f '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/fix-loop-report.md' && tail -n 1 '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/fix-loop-report.md' | tr -d '[:space:]' | grep -Eq '^FIX_LOOP_COMPLETE$'", + command: "test -s '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/fix-loop-report.md'", captureOutput: true, failOnError: true, }) @@ -362,7 +352,7 @@ NODE`, .step("final-review-codex", { type: 'deterministic', dependsOn: ["post-fix-validation"], - command: "node - <<'NODE'\nconst fs = require('node:fs');\nconst out = \".workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-codex.md\";\nconst body = [\n \"# Final Codex structural marker gate\",\n '',\n \"- Spec: I want to clean up the codebase to remove outdated and unused files\",\n '- This deterministic structural marker replaces the hanging non-interactive Codex reviewer path for non-code workflow slices.',\n '- It is not an independent reviewer subprocess and must not be presented as independent review evidence.',\n '- Substantive review evidence comes from the Claude review steps plus deterministic validation gates.',\n \"- Marker artifact: .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-codex.md\",\n '- Deterministic validation gates completed before this review step.',\n '',\n \"FINAL_REVIEW_CODEX_PASS\",\n].join(\"\\n\");\nfs.writeFileSync(out, `${body}\n`);\nconsole.log(\"FINAL_REVIEW_CODEX_GATE_PASS\");\nNODE", + command: "node - <<'NODE'\nconst fs = require('node:fs');\nconst out = \".workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-codex.md\";\nconst body = [\n \"# Final Codex structural marker gate\",\n '',\n \"- Spec: I want to clean up the codebase to remove outdated and unused files\",\n '- This deterministic structural marker replaces the hanging non-interactive Codex reviewer path for non-code workflow slices.',\n '- It is not an independent reviewer subprocess and must not be presented as independent review evidence.',\n '- Substantive review evidence comes from the Claude review steps plus deterministic validation gates.',\n \"- Marker artifact: .workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-codex.md\",\n '- Deterministic validation gates completed before this review step.',\n].join(\"\\n\");\nfs.writeFileSync(out, `${body}\n`);\nconsole.log(\"FINAL_REVIEW_CODEX_GATE_PASS\");\nNODE", captureOutput: true, failOnError: true, }) @@ -370,7 +360,7 @@ NODE`, .step("final-review-pass-gate", { type: 'deterministic', dependsOn: ["final-review-claude", "final-review-codex"], - command: "tail -n 1 '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-claude.md' | tr -d '[:space:]*' | grep -Eq '^FINAL_REVIEW_CLAUDE_PASS$' && tail -n 1 '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-codex.md' | tr -d '[:space:]*' | grep -Eq '^FINAL_REVIEW_CODEX_PASS$'", + command: "test -s '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-claude.md' && test -s '.workflow-artifacts/generated/i-want-to-clean-up-the-codebase-to-remove-outdat/final-review-codex.md'", captureOutput: true, failOnError: true, }) @@ -451,8 +441,6 @@ for (const [name, body] of docs) { if (!body.includes(path)) throw new Error(name + ' missing manifest path: ' + path); } } -const codexMarker = read('final-review-codex.md'); -if (!codexMarker.includes('FINAL_REVIEW_CODEX_PASS')) throw new Error('final-review-codex marker missing pass sentinel'); const staleTargets = [ ['test', 'smoke' + '.test' + '.ts'].join('/'), 'smoke' + '.test' + '.ts',