Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion src/local/auto-fix-loop.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { join } from 'node:path';

import { describe, expect, it, vi } from 'vitest';

import { repairWorkflowDeterministically, runWithAutoFix } from './auto-fix-loop.js';
import { repairWorkflowDeterministically, resolveSafeResumeAnchor, runWithAutoFix } from './auto-fix-loop.js';
import type { LocalClassifiedBlocker, LocalResponse } from './entrypoint.js';
import type { LocalInvocationRequest } from './request-normalizer.js';
import type { FailureClassification } from '../runtime/failure/types.js';
Expand Down Expand Up @@ -1074,6 +1074,60 @@ describe('isSyntheticStageId', () => {
});
});

describe('resolveSafeResumeAnchor', () => {
// Mirrors the generated child-workflow shape that caused the
// final-review-pass-gate infinite-retry loop: agent steps produce review
// artifacts, then a deterministic gate greps them.
const reviewGateWorkflow = `import { workflow } from '@agent-relay/sdk/workflows';
const wf = workflow('child')
.step("prepare-context", { type: "deterministic", command: "echo prep" })
.step("implement-slice", { agent: "impl-codex", dependsOn: ["prepare-context"], task: "do work" })
.step("review-claude", { agent: "reviewer-claude", dependsOn: ["implement-slice"], task: "review" })
.step("final-fix-codex", { agent: "validator-codex", dependsOn: ["review-claude"], task: "final fix" })
.step("final-review-pass-gate", { type: "deterministic", dependsOn: ["final-fix-codex"], command: "grep -F MARKER artifact.md" })
.step("final-signoff", { type: "deterministic", dependsOn: ["final-review-pass-gate"], command: "echo done" });
`;

it('moves the anchor off a deterministic gate to its nearest upstream agent producer', () => {
expect(resolveSafeResumeAnchor('final-review-pass-gate', reviewGateWorkflow)).toBe('final-fix-codex');
});

it('leaves an agent step as its own resume anchor', () => {
expect(resolveSafeResumeAnchor('review-claude', reviewGateWorkflow)).toBe('review-claude');
});

it('keeps the anchor when the failed step is the gate but the chain is purely deterministic', () => {
const deterministicOnly = `import { workflow } from '@agent-relay/sdk/workflows';
const wf = workflow('det')
.step("build", { type: "deterministic", command: "make" })
.step("verify", { type: "deterministic", dependsOn: ["build"], command: "test -f out" });
`;
expect(resolveSafeResumeAnchor('verify', deterministicOnly)).toBe('verify');
});

it('returns the failed step unchanged when content is missing or unparseable', () => {
expect(resolveSafeResumeAnchor('final-review-pass-gate', undefined)).toBe('final-review-pass-gate');
expect(resolveSafeResumeAnchor('final-review-pass-gate', '')).toBe('final-review-pass-gate');
expect(resolveSafeResumeAnchor(undefined, reviewGateWorkflow)).toBeUndefined();
});

it('returns the failed step unchanged when it is not a known step', () => {
expect(resolveSafeResumeAnchor('does-not-exist', reviewGateWorkflow)).toBe('does-not-exist');
});

it('does not treat step ids embedded in shell command HEREDOCs as real steps', () => {
// The grep below references "final-review-pass-gate" inside a command
// string; the AST keeps string-literal contents inert so it is not
// mistaken for a step definition.
const withHeredoc = `import { workflow } from '@agent-relay/sdk/workflows';
const wf = workflow('child')
.step("implement", { agent: "impl-codex", task: "work" })
.step("gate", { type: "deterministic", dependsOn: ["implement"], command: "echo .step(\\"final-review-pass-gate\\", {}) && grep MARKER f" });
`;
expect(resolveSafeResumeAnchor('gate', withHeredoc)).toBe('implement');
});
});

function successResponse(runId: string): LocalResponse {
return {
ok: true,
Expand Down
242 changes: 236 additions & 6 deletions src/local/auto-fix-loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,199 @@ function safeToRetryWithoutStartFromStep(
return !workflowDidExpensiveWork(evidence);
}

interface WorkflowStepNode {
id: string;
/**
* True when the step regenerates its own output on every run (agent steps,
* and any non-deterministic step type). False only for step types the SDK
* treats as broker-free and pure: `deterministic`, `worktree`, `integration`
* (see `@agent-relay/sdk` runner `requiresBroker`). Deterministic gates that
* validate artifacts written by upstream agent steps are the failure mode
* this guards: resuming `--start-from <gate>` skips the agent producers, so
* the gate's input is never regenerated and it fails forever.
*/
regeneratesOutput: boolean;
dependsOn: string[];
order: number;
}

/**
* Parse `workflow.step("id", { ... })` builder calls from a generated workflow
* artifact into a step-dependency graph. Uses the TypeScript AST (per AGENTS.md
* "Source-Text Analysis: Use Grammar-Aware Parsers, Not Regex") so step ids
* embedded in shell `command` HEREDOCs or `task` prose don't get mistaken for
* real steps. Returns null when the artifact has no recognizable steps.
*/
function parseWorkflowStepGraph(content: string): Map<string, WorkflowStepNode> | null {
let sourceFile: ts.SourceFile;
try {
sourceFile = ts.createSourceFile(
'ricky-workflow-artifact.ts',
content,
ts.ScriptTarget.Latest,
false,
ts.ScriptKind.TS,
);
} catch {
return null;
}

const steps = new Map<string, WorkflowStepNode>();
let order = 0;
const nonRegeneratingTypes = new Set(['deterministic', 'worktree', 'integration']);

const visit = (node: ts.Node): void => {
if (
ts.isCallExpression(node)
&& ts.isPropertyAccessExpression(node.expression)
&& node.expression.name.text === 'step'
&& node.arguments.length >= 2
&& ts.isStringLiteralLike(node.arguments[0])
&& ts.isObjectLiteralExpression(node.arguments[1])
) {
const id = node.arguments[0].text;
const config = node.arguments[1];
let hasAgent = false;
let stepType: string | undefined;
let dependsOn: string[] = [];
for (const prop of config.properties) {
if (!ts.isPropertyAssignment(prop) || !prop.name) continue;
const key = ts.isIdentifier(prop.name) || ts.isStringLiteralLike(prop.name)
? prop.name.text
: undefined;
if (key === 'agent') hasAgent = true;
if (key === 'type' && ts.isStringLiteralLike(prop.initializer)) {
stepType = prop.initializer.text;
}
if (key === 'dependsOn' && ts.isArrayLiteralExpression(prop.initializer)) {
dependsOn = prop.initializer.elements
.filter(ts.isStringLiteralLike)
.map((element) => element.text);
}
}
// A step regenerates output unless it is explicitly one of the pure
// SDK step types AND not an agent step. Default-unknown → treat as
// regenerating (safe: resuming there re-runs it).
const regeneratesOutput = hasAgent
|| stepType === undefined
|| !nonRegeneratingTypes.has(stepType);
if (!steps.has(id)) {
steps.set(id, { id, regeneratesOutput, dependsOn, order: order++ });
}
}
ts.forEachChild(node, visit);
};
visit(sourceFile);
return steps.size > 0 ? steps : null;
}

/**
* Choose a safe `--start-from` anchor for an auto-fix retry.
*
* Resuming directly from `failedStep` is correct only when re-running that
* step alone can succeed. It cannot when `failedStep` is a deterministic gate
* that validates an artifact produced by an upstream agent step: resuming
* from the gate skips the agent producer, the artifact is never regenerated,
* and the gate fails identically every attempt (the
* `final-review-pass-gate` infinite-retry loop). In that case, anchor the
* resume at the *nearest* upstream agent step the gate transitively depends
* on so the producing step re-runs and regenerates the gate's input; earlier
* artifacts persist on disk between runs so they need not be recomputed.
*
* Returns `failedStep` unchanged when it is itself a regenerating step, when
* its whole dependency chain is pure/deterministic (recompute is safe), or
* when the workflow graph cannot be parsed.
*/
/**
* Extract the literal source text of a step's config object —
* `.step("<id>", { ... })` → the `{ ... }` span — via the TypeScript AST.
* Used to detect whether a repair actually rewrote the failed step.
*/
function extractStepConfigText(content: string, stepId: string): string | null {
let sourceFile: ts.SourceFile;
try {
sourceFile = ts.createSourceFile(
'ricky-workflow-artifact.ts',
content,
ts.ScriptTarget.Latest,
false,
ts.ScriptKind.TS,
);
} catch {
return null;
}
let found: string | null = null;
const visit = (node: ts.Node): void => {
if (found !== null) return;
if (
ts.isCallExpression(node)
&& ts.isPropertyAccessExpression(node.expression)
&& node.expression.name.text === 'step'
&& node.arguments.length >= 2
&& ts.isStringLiteralLike(node.arguments[0])
&& node.arguments[0].text === stepId
&& ts.isObjectLiteralExpression(node.arguments[1])
) {
found = node.arguments[1].getText(sourceFile);
return;
}
ts.forEachChild(node, visit);
};
visit(sourceFile);
return found;
}

/**
* True when a repair rewrote the failed step's own definition. When the gate
* step itself was the bug (e.g. the `lead-plan-gate` marker-append logic) the
* repair fixes it in place and resuming `--start-from <gate>` is correct —
* the resume-anchor downgrade must NOT fire in that case.
*/
function failedStepRepaired(
failedStep: string,
oldContent: string | undefined,
newContent: string | undefined,
): boolean {
if (!oldContent || !newContent) return false;
const before = extractStepConfigText(oldContent, failedStep);
const after = extractStepConfigText(newContent, failedStep);
if (before === null || after === null) return false;
return before !== after;
}

export function resolveSafeResumeAnchor(
failedStep: string | undefined,
workflowContent: string | undefined,
): string | undefined {
if (!failedStep || !workflowContent) return failedStep;
const graph = parseWorkflowStepGraph(workflowContent);
if (!graph) return failedStep;
const failed = graph.get(failedStep);
if (!failed) return failedStep;
if (failed.regeneratesOutput) return failedStep;

const seen = new Set<string>([failedStep]);
let frontier = [...failed.dependsOn];
while (frontier.length > 0) {
const nearestAgent = frontier
.map((id) => graph.get(id))
.filter((node): node is WorkflowStepNode => Boolean(node) && node!.regeneratesOutput)
// Prefer the most-recently-declared producer when several sit at the
// same dependency distance — it is the closest to the gate.
.sort((a, b) => b.order - a.order)[0];
Comment on lines +339 to +341
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Inverted order assignment in parseWorkflowStepGraph causes sort to pick earliest-declared producer instead of most-recently-declared

The order field on WorkflowStepNode is assigned via a DFS counter (order++) as the visit callback recurses through the AST. For chained .step() builder calls (e.g. .step("A", {...}).step("B", {...}).step("C", {...})), the TypeScript AST nests them as outermost→innermost = last-declared→first-declared. The visit function processes the current node before recursing into children (ts.forEachChild), so the last-declared step gets the lowest order value and the first-declared step gets the highest.

The sort at line 341, .sort((a, b) => b.order - a.order)[0], picks the element with the highest order — which is the earliest-declared producer. The comment on lines 339-340 says "Prefer the most-recently-declared producer when several sit at the same dependency distance," intending the opposite. When a deterministic gate has multiple agent producers at the same BFS distance (e.g. dependsOn: ["agent-a", "agent-b"]), the wrong producer is chosen. Depending on --start-from semantics, the other producer may not re-run, leaving its stale artifacts in place and the gate still failing.

Example scenario where bug manifests
.step("agent-a", { agent: "a" })
.step("agent-b", { agent: "b" })
.step("gate", { type: "deterministic", dependsOn: ["agent-a", "agent-b"], ... })

AST visit order: gate(order=0), agent-b(order=1), agent-a(order=2). Sort descending picks agent-a (order=2). Intent was to pick agent-b (most recently declared, closest to the gate).

Suggested change
// Prefer the most-recently-declared producer when several sit at the
// same dependency distance — it is the closest to the gate.
.sort((a, b) => b.order - a.order)[0];
// Prefer the most-recently-declared producer when several sit at the
// same dependency distance — it is the closest to the gate.
.sort((a, b) => a.order - b.order)[0];
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

if (nearestAgent) return nearestAgent.id;
const next: string[] = [];
for (const id of frontier) {
if (seen.has(id)) continue;
seen.add(id);
const node = graph.get(id);
if (node) next.push(...node.dependsOn);
}
frontier = next;
}
return failedStep;
}

/**
* Reason string explaining why we refused to retry from scratch. Kept in one
* place so escalation messages and warnings stay aligned.
Expand Down Expand Up @@ -468,18 +661,34 @@ export async function runWithAutoFix(
retryOfRunId = runId;
}

// If the repair rewrote the failed step itself, the fix lives in that
// step — resume from it. Only when the repair left the failed step
// untouched (e.g. it made downstream validation non-terminal but did
// not fix a gate whose input comes from a skipped upstream agent
// step) do we downgrade the anchor to the artifact's producer.
const personaResumeAnchor =
failedStep && failedStepRepaired(failedStep, repairTarget.artifactContent, repair.content)
? failedStep
: resolveSafeResumeAnchor(failedStep, repair.content);
if (personaResumeAnchor !== failedStep && failedStep) {
warnings.push(
`Auto-fix resume anchor moved from "${failedStep}" to "${personaResumeAnchor}": ` +
`"${failedStep}" validates artifacts produced by an upstream agent step that a ` +
`direct --start-from would skip.`,
);
}
currentRequest = {
...retryBaseRequest(currentRequest, response, repairedArtifactPath, repair.content),
autoFix: undefined,
retry: {
attempt: attempt + 1,
maxAttempts,
...(runId ? { previousRunId: runId, retryOfRunId: retryOfRunId ?? runId } : {}),
...(failedStep ? { startFromStep: failedStep } : {}),
...(personaResumeAnchor ? { startFromStep: personaResumeAnchor } : {}),
reason: `auto-fix retry after Workforce workflow persona repair for ${blockerCode ?? 'local failure'}`,
},
};
onProgress?.(`Retrying workflow${failedStep ? ` from ${failedStep}` : ''}...`);
onProgress?.(`Retrying workflow${personaResumeAnchor ? ` from ${personaResumeAnchor}` : ''}...`);
continue;
} catch (error) {
attemptSummary.fix_error = error instanceof Error ? error.message : String(error);
Expand All @@ -493,18 +702,26 @@ export async function runWithAutoFix(
} else if (!retryOfRunId) {
retryOfRunId = runId;
}
const providerFailResumeAnchor = resolveSafeResumeAnchor(failedStep, repairTarget.artifactContent);
if (providerFailResumeAnchor !== failedStep && failedStep) {
warnings.push(
`Auto-fix resume anchor moved from "${failedStep}" to "${providerFailResumeAnchor}": ` +
`"${failedStep}" validates artifacts produced by an upstream agent step that a ` +
`direct --start-from would skip.`,
);
}
currentRequest = {
...retryBaseRequest(currentRequest, response),
autoFix: undefined,
retry: {
attempt: attempt + 1,
maxAttempts,
...(runId ? { previousRunId: runId, retryOfRunId: retryOfRunId ?? runId } : {}),
...(failedStep ? { startFromStep: failedStep } : {}),
...(providerFailResumeAnchor ? { startFromStep: providerFailResumeAnchor } : {}),
reason: `auto-fix retry after workflow repair provider failure for ${blockerCode ?? 'local failure'}`,
},
};
onProgress?.(`Workflow repair provider failed; retrying workflow${failedStep ? ` from ${failedStep}` : ''}...`);
onProgress?.(`Workflow repair provider failed; retrying workflow${providerFailResumeAnchor ? ` from ${providerFailResumeAnchor}` : ''}...`);
continue;
}
if (!safeToRetryWithoutStartFromStep(failedStep, evidence)) {
Expand Down Expand Up @@ -605,18 +822,31 @@ export async function runWithAutoFix(
retryOfRunId = runId;
}

// The direct-repair path is only reached for V1 environment/binary
// blockers, after the workflow-repair branch has already returned. No
// workflow artifact content is in scope here, so the anchor stays as the
// failed step (the review-gate skip-loop is handled by the workflow-repair
// sites above, which do carry artifact content).
const directResumeAnchor = resolveSafeResumeAnchor(failedStep, undefined);
if (directResumeAnchor !== failedStep && failedStep) {
warnings.push(
`Auto-fix resume anchor moved from "${failedStep}" to "${directResumeAnchor}": ` +
`"${failedStep}" validates artifacts produced by an upstream agent step that a ` +
`direct --start-from would skip.`,
);
}
currentRequest = {
...retryBaseRequest(currentRequest, response),
autoFix: undefined,
retry: {
attempt: attempt + 1,
maxAttempts,
...(runId ? { previousRunId: runId, retryOfRunId: retryOfRunId ?? runId } : {}),
...(failedStep ? { startFromStep: failedStep } : {}),
...(directResumeAnchor ? { startFromStep: directResumeAnchor } : {}),
reason: `auto-fix retry after ${blockerCode ?? 'local failure'}`,
},
};
onProgress?.(`Retrying workflow${failedStep ? ` from ${failedStep}` : ''}...`);
onProgress?.(`Retrying workflow${directResumeAnchor ? ` from ${directResumeAnchor}` : ''}...`);
}

return withAutoFix(lastResponse ?? failedBeforeAttempt(request), maxAttempts, attempts, 'error', warnings, trackingRunId);
Expand Down
Loading