Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .trajectories/index.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"version": 1,
"lastUpdated": "2026-05-20T08:00:25.256Z",
"lastUpdated": "2026-05-20T11:46:17.517Z",
"trajectories": {
"traj_05xg7j388bc4": {
"title": "Add browser workflow step integration",
Expand Down Expand Up @@ -1096,6 +1096,13 @@
"startedAt": "2026-05-19T12:34:36.057Z",
"completedAt": "2026-05-19T12:47:18.115Z",
"path": "/home/runner/work/relay/relay/.trajectories/completed/2026-05/traj_gnqvtoxtc8dy.json"
},
"traj_af7iew24eiip": {
"title": "autofix-swarm-Agentworkforce-relay-workflow",
"status": "completed",
"startedAt": "2026-05-20T11:36:34.306Z",
"completedAt": "2026-05-20T11:46:17.506Z",
"path": "/Users/khaliqgant/Projects/AgentWorkforce/.msd-autofix-1bdf6c0b/.trajectories/completed/2026-05/traj_af7iew24eiip.json"
}
}
}
}
41 changes: 33 additions & 8 deletions crates/broker/src/runtime/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,43 @@ pub(crate) async fn run_init(cmd: InitCommand, telemetry: TelemetryClient) -> Re
let paths = if cmd.persist || custom_state_dir.is_some() {
ensure_runtime_paths(&runtime_cwd, &resolved_name, custom_state_dir.as_deref())?
} else {
// Warn if a stale .agent-relay/ dir exists from a previous persist run.
// Agents can read files from it directly (logs, state) and get confused.
// Warn only if there is *actual broker state* in .agent-relay/ from a
// prior `--persist` run that could confuse this ephemeral run.
//
// The SDK workflow runner ALWAYS writes .agent-relay/step-outputs/ and
// .agent-relay/team/worker-logs/ regardless of broker mode (those are
// durable artifacts, not broker state), so a bare directory check fires
// on virtually every workflow run — a noisy false positive.
//
// The discriminator is the broker's state file. `ensure_runtime_paths`
// (the persist-mode helper in runtime/paths.rs) writes it as
// `state-{safe_name}.json`, where `safe_name` is the sanitized broker
// name — so the exact filename varies by run. Glob for any
// `state-*.json` entry in `.agent-relay/` and surface every match so
// the user can see exactly what's stale regardless of broker name.
let stale_dir = runtime_cwd.join(".agent-relay");
if stale_dir.exists() {
eprintln!(
"[agent-relay] WARNING: stale .agent-relay/ directory found in {}",
runtime_cwd.display()
);
let stale_state_files: Vec<PathBuf> = std::fs::read_dir(&stale_dir)
.ok()
.into_iter()
.flatten()
.filter_map(|entry| entry.ok())
.filter(|entry| {
let name = entry.file_name();
let name_str = name.to_string_lossy();
name_str.starts_with("state-") && name_str.ends_with(".json")
})
.map(|entry| entry.path())
.collect();
if !stale_state_files.is_empty() {
eprintln!(
"[agent-relay] WARNING: remove it to avoid confusing spawned agents: rm -rf {}",
"[agent-relay] WARNING: this run is ephemeral but {} prior --persist state file(s) remain in {}:",
stale_state_files.len(),
stale_dir.display()
);
for state_file in &stale_state_files {
eprintln!("[agent-relay] WARNING: {}", state_file.display());
}
eprintln!("[agent-relay] WARNING: remove them to avoid confusing spawned agents.");
}
ensure_ephemeral_paths(&runtime_cwd, &resolved_name)?
};
Expand Down
20 changes: 14 additions & 6 deletions packages/sdk/src/workflows/__tests__/channel-messenger.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ describe('channel messenger helpers', () => {
expect(formatStepOutput('plan', output)).toBe('**[plan] Output:**\n```\nuseful line\n```');
});

it('formatStepOutput strips malformed PTY frames through the shared scrubber', () => {
const output = ['real result', 'qW0 | q0 / ql0 _ qqm ~ lqq = qW0 | q0 / ql0 _ qqm', 'done'].join('\n');
expect(formatStepOutput('plan', output)).toBe('**[plan] Output:**\n```\nreal result\ndone\n```');
});

it('formatStepOutput redacts secrets through the shared scrubber', () => {
const output = 'deploy succeeded\naccess_token=ghp_abcdefghijklmnopqrstuvwxyzABCDEFGHIJ\n';
const formatted = formatStepOutput('deploy', output);
expect(formatted).toContain('[REDACTED]');
expect(formatted).not.toContain('ghp_abcdefghijklmnopqrstuvwxyzABCDEFGHIJ');
});

it('formatError normalizes unknown errors', () => {
expect(formatError('build', new Error('Boom'))).toBe('**[build]** Failed: Boom');
expect(formatError('build', 'bad input')).toBe('**[build]** Failed: bad input');
Expand All @@ -47,12 +59,8 @@ describe('ChannelMessenger', () => {

it('lists non-interactive agents with step references', () => {
const messenger = new ChannelMessenger();
const agents = new Map([
['bg-worker', { name: 'bg-worker', cli: 'claude', interactive: false }],
]);
const stepStates = new Map([
['analyze', { row: { agentName: 'bg-worker', status: 'running' } }],
]);
const agents = new Map([['bg-worker', { name: 'bg-worker', cli: 'claude', interactive: false }]]);
const stepStates = new Map([['analyze', { row: { agentName: 'bg-worker', status: 'running' } }]]);
const result = messenger.buildNonInteractiveAwareness(agents as any, stepStates as any);
expect(result).toContain('bg-worker');
expect(result).toContain('{{steps.analyze.output}}');
Expand Down
125 changes: 125 additions & 0 deletions packages/sdk/src/workflows/__tests__/scrub-pty-chrome.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* Regression tests for WorkflowRunner.scrubForChannel — the function that
* strips PTY/TUI chrome from interactive-agent step output before it gets
* surfaced in workflow logs and channel messages.
*
* The patterns covered here are taken from a real captured run of a
* multi-turn workflow against Claude Code's PTY: when its TUI footer
* overwrites itself faster than the PTY flushes whitespace, lines like
* `bypasspermissionson`, `--INSERT--⏵⏵`, and `Opus 4.7 (1M context) ctx:5%
* $1.45` end up in the captured stream. Before these regex additions, the
* step "Output:" block was unreadable on interactive-agent steps.
*/
import { describe, it, expect } from 'vitest';

import { WorkflowRunner } from '../runner.js';

// scrubForChannel is `private static` — the cast is the minimal-invasive way
// to exercise it from a test without exporting an internal-only helper.
const scrub = (text: string): string =>
(WorkflowRunner as unknown as { scrubForChannel(t: string): string }).scrubForChannel(text);

describe('WorkflowRunner.scrubForChannel — PTY chrome stripping', () => {
it('strips the Claude Code bottom status bar (model + ctx% + cost)', () => {
const input = [
'real content line',
'workflows git:(main) Opus 4.7 (1M context) ctx:5% $1.45',
'Opus4.7(1Mcontext) ctx:6% $1.54',
'another real line',
].join('\n');
const out = scrub(input);
expect(out).toContain('real content line');
expect(out).toContain('another real line');
expect(out).not.toMatch(/ctx\s*:\s*\d+%/);
expect(out).not.toMatch(/\$\d+\.\d+/);
});
Comment on lines +23 to +35
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Assert full footer-line removal, not just token removal.

This test can still pass if scrubForChannel leaves non-token footer fragments behind. Add exact-line absence checks to guard against partial stripping regressions.

Suggested test tightening
   const out = scrub(input);
   expect(out).toContain('real content line');
   expect(out).toContain('another real line');
+  expect(out).not.toContain('workflows git:(main) Opus 4.7 (1M context) ctx:5% $1.45');
+  expect(out).not.toContain('Opus4.7(1Mcontext) ctx:6% $1.54');
   expect(out).not.toMatch(/ctx\s*:\s*\d+%/);
   expect(out).not.toMatch(/\$\d+\.\d+/);
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@packages/sdk/src/workflows/__tests__/scrub-pty-chrome.test.ts` around lines
23 - 35, The test currently only ensures tokens are removed but may allow footer
line fragments to remain; update the test around scrub to assert the entire
footer lines are absent by checking that the exact footer strings are not
present as whole lines (e.g., use expect(out.split('\n')).not.toContain(...) or
a multiline regex like not.toMatch(/^[^\n]*Opus.*ctx.*\$\d+\.\d+$/m)) so the
full footer lines produced by Claude are stripped; reference the scrub helper
(and scrubForChannel if used elsewhere) when locating the logic to validate.


it('strips vim-style mode indicators emitted by the input bar', () => {
const input = [
'pre-mode line',
'--INSERT--',
'--INSERT--⏵⏵bypasspermissionson (shift+tabtocycle)',
'post-mode line',
].join('\n');
const out = scrub(input);
expect(out).toContain('pre-mode line');
expect(out).toContain('post-mode line');
expect(out).not.toMatch(/--INSERT--/);
});

it('strips no-whitespace TUI hint variants (bypasspermissionson, pasteagaintoexpand)', () => {
const input = ['before', 'bypasspermissionson', 'pasteagaintoexpand', 'shifttabto cycle', 'after'].join(
'\n'
);
const out = scrub(input);
expect(out).toContain('before');
expect(out).toContain('after');
expect(out).not.toMatch(/bypasspermissionson/);
expect(out).not.toMatch(/pasteagaintoexpand/);
});

it('strips thinking-status fragments without ellipsis anchors', () => {
const input = [
'meaningful: round 3 codex-player guess=19 feedback=correct',
'thinking with high effort',
'↓ 13 tokens · thinking with high effort',
'Crunched for 32s',
'Sautéed for 4s',
'Gitifying…55',
].join('\n');
const out = scrub(input);
expect(out).toContain('feedback=correct');
expect(out).not.toMatch(/thinking with high effort/);
expect(out).not.toMatch(/Crunched for/);
expect(out).not.toMatch(/Gitifying/);
});

it('strips malformed overwritten q0/qW0 PTY frame runs', () => {
const input = [
'first useful line',
'qW0 | q0 / ql0 _ qqm ~ lqq = qW0 | q0 / ql0 _ qqm',
'summary: kept qW0 | q0 / ql0 _ qqm ~ lqq = qW0 | q0 done',
'last useful line',
].join('\n');
const out = scrub(input);
expect(out).toContain('first useful line');
expect(out).toContain('last useful line');
expect(out).toMatch(/summary: kept\s+done/);
expect(out).not.toMatch(/qW0|ql0|qqm|lqq/);
});

it('redacts secrets in the runner public preview path', () => {
const out = scrub('deploy succeeded\napi_key=sk-abcdefghijklmnopqrstuvwxyz123456\n');
expect(out).toContain('deploy succeeded');
expect(out).toContain('[REDACTED]');
expect(out).not.toContain('sk-abcdefghijklmnopqrstuvwxyz123456');
});

it('preserves real content and OWNER_DECISION signals', () => {
const input = [
'Read 1 file, calling relaycast 2 times',
'Transcript verification reports TRANSCRIPT_OK with all 6 lines well-formed.',
'OWNER_DECISION: COMPLETE',
'REASON: All 6 turns executed, history.log has 6 lines.',
'STEP_COMPLETE: repair-transcript',
].join('\n');
const out = scrub(input);
expect(out).toContain('TRANSCRIPT_OK');
expect(out).toContain('OWNER_DECISION: COMPLETE');
expect(out).toContain('STEP_COMPLETE: repair-transcript');
expect(out).toContain('All 6 turns executed');
});

it('does not strip lines that merely mention model names in prose', () => {
// Guard against the new claudeFooterRe (which looks for `Opus|Sonnet|Haiku <num>
// (...context...) ctx:N%`) being too eager and removing prose that
// mentions a model name.
const input = [
'Compared output from Opus 4.7 against Sonnet 4.6 — both passed.',
'We chose Haiku 4.5 for its latency profile.',
].join('\n');
const out = scrub(input);
expect(out).toContain('Opus 4.7 against Sonnet 4.6');
expect(out).toContain('Haiku 4.5 for its latency profile');
});
});
20 changes: 19 additions & 1 deletion packages/sdk/src/workflows/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,25 @@ export interface AgentOptions {
maxTokens?: number;
timeoutMs?: number;
retries?: number;
/** Seconds of silence before considering the agent idle (for idle nudging). */
/**
* Seconds of silence on the agent's PTY before the runtime marks it idle and
* tears it down. Default: 30s. Set to `0` to disable idle detection entirely.
*
* When to override (per-agent):
* - You expect long quiet stretches by design — a long-running reviewer
* waiting for downstream verdicts, a grader watching a file that updates
* every few minutes, or a `@-mention` recipient whose triggering event
* may arrive >30s after spawn. Setting `0` (or a generous N) prevents
* the runtime from killing the agent before the awaited event arrives.
*
* When NOT to override:
* - One-shot worker steps. The default is right; idle-as-complete is what
* makes `OWNER_DECISION: COMPLETE` + clean exit fast.
*
* See the `writing-agent-relay-workflows` skill ("Idle detection beats
* 'wait for X' prompts") for the trade-offs around long-running interactive
* agents and the Per-turn interactive spawn alternative.
*/
idleThresholdSecs?: number;
/** When false, the agent runs as a non-interactive subprocess (no PTY, no relay messaging).
* Default: true. */
Expand Down
33 changes: 27 additions & 6 deletions packages/sdk/src/workflows/channel-messenger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,23 @@ const CLAUDE_HEADER_RE =
/^(?:[\s\u2580-\u259f✢*·▗▖▘▝]+\s*)?(?:Claude\s+Code(?:\s+v?[\d.]+)?|(?:Sonnet|Haiku|Opus)\s*[\d.]+|claude-(?:sonnet|haiku|opus)-[\w.-]+|Running\s+on\s+claude)/iu;
const DIR_BREADCRUMB_RE = /^\s*~[\\/]/u;
const UI_HINT_RE =
/\b(?:Press\s+up\s+to\s+edit|tab\s+to\s+queue|bypass\s+permissions|esc\s+to\s+interrupt)\b/iu;
/\b(?:Press\s*up\s*to\s*edit|tab\s*to\s*queue|bypass\s*permissions|esc\s*to\s*interrupt|paste\s*again\s*to\s*expand|shift\s*[+]?\s*tab\s*to\s*cycle|running\s+stop\s+hook|fan\s+out\s+subagents)/iu;
const VIM_MODE_RE =
/^[-\s]*--?(?:INSERT|NORMAL|VISUAL|REPLACE)--?[-\s]*$|--?(?:INSERT|NORMAL|VISUAL|REPLACE)--/u;
const CLAUDE_FOOTER_RE =
/(?:Opus|Sonnet|Haiku)\s*\d[\d.]*\s*\(?(?:1M\s*context|context)?\)?\s*ctx\s*:\s*\d+%/iu;
const THINKING_LINE_RE = new RegExp(`^[\\s${SPINNER}]*\\s*\\w[\\w\\s]*\\u2026\\s*$`, 'u');
const THINKING_STATUS_RE =
/\b(?:thinking\s+(?:with\s+\w+\s+effort|more\s+with|harder)|↓\s*\d+\s*tokens?\b|↑\s*\d+\s*tokens?\b|crunched\s+for\s+\d|sautéed\s+for\s+\d|befuddl|flibbertigib|gitifying|flowing\s*…)/iu;
const CURSOR_ONLY_RE = /^[\s❯⎿›»◀▶←→↑↓⟨⟩⟪⟫·]+$/u;
const CURSOR_AGENT_RE =
/^(?:Cursor Agent|[\s⬡⬢]*Generating[.\s]|\[Pasted text|Auto-run all|Add a follow-up|ctrl\+c to stop|shift\+tab|Auto$|\/\s*commands|@\s*files|!\s*shell|follow-ups?\s|The user ha)/iu;
const SLASH_COMMAND_RE = /^\/\w+\s*$/u;
const MCP_JSON_KV_RE =
/^\s*"(?:type|method|params|result|id|jsonrpc|tool|name|arguments|content|role|metadata)"\s*:/u;
const MEANINGFUL_CONTENT_RE = /[a-zA-Z0-9]/u;
const MALFORMED_PTY_FRAME_RUN_RE = /(?:(?:qW0|q[A-Za-z]?0|[lmjkx]q{2,}|q{2,}[lmjkx]?)[\s|/_=\-~]*){4,}/giu;
const MALFORMED_PTY_FRAME_ONLY_RE = /^[\s|/_=\-~lmjkxqtwuvn0W]{12,}$/iu;

export function scrubSecrets(text: string): string {
let result = text;
Expand All @@ -89,6 +97,15 @@ export function scrubSecrets(text: string): string {
return result;
}

function stripMalformedPtyFrameGarbage(line: string): string {
const strippedRuns = line.replace(MALFORMED_PTY_FRAME_RUN_RE, ' ');
const compact = strippedRuns.replace(SPINNER_RE, '').replace(/\s+/g, '');
if (compact.length >= 12 && MALFORMED_PTY_FRAME_ONLY_RE.test(compact)) {
return '';
}
return strippedRuns;
}

export function scrubForChannel(text: string): string {
// Strip system-reminder blocks (closed or unclosed) iteratively to avoid
// polynomial backtracking (ReDoS) with [\s\S]*? on adversarial input.
Expand Down Expand Up @@ -130,29 +147,33 @@ export function scrubForChannel(text: string): string {
let jsonDepth = 0;

for (const line of lines) {
const trimmed = line.trim();
const cleanedLine = stripMalformedPtyFrameGarbage(line);
const trimmed = cleanedLine.trim();

if (jsonDepth > 0) {
jsonDepth += countJsonDepth(line);
jsonDepth += countJsonDepth(cleanedLine);
if (jsonDepth <= 0) jsonDepth = 0;
continue;
}

if (trimmed.length === 0) continue;

if (trimmed.startsWith('{') || /^\[\s*\{/.test(trimmed)) {
jsonDepth = Math.max(countJsonDepth(line), 0);
jsonDepth = Math.max(countJsonDepth(cleanedLine), 0);
continue;
}

if (MCP_JSON_KV_RE.test(line)) continue;
if (MCP_JSON_KV_RE.test(cleanedLine)) continue;
if (SPINNER_CLASS_RE.test(trimmed)) continue;
if (BOX_DRAWING_ONLY_RE.test(trimmed)) continue;
if (BROKER_LOG_RE.test(trimmed)) continue;
if (CLAUDE_HEADER_RE.test(trimmed)) continue;
if (DIR_BREADCRUMB_RE.test(trimmed)) continue;
if (UI_HINT_RE.test(trimmed)) continue;
if (VIM_MODE_RE.test(trimmed)) continue;
if (CLAUDE_FOOTER_RE.test(trimmed)) continue;
if (THINKING_LINE_RE.test(trimmed)) continue;
if (THINKING_STATUS_RE.test(trimmed)) continue;
if (CURSOR_ONLY_RE.test(trimmed)) continue;
if (CURSOR_AGENT_RE.test(trimmed)) continue;
if (SLASH_COMMAND_RE.test(trimmed)) continue;
Expand All @@ -161,7 +182,7 @@ export function scrubForChannel(text: string): string {
const alphanum = trimmed.replace(SPINNER_RE, '').replace(/\s+/g, '');
if (alphanum.replace(/[^a-zA-Z0-9]/g, '').length <= 3) continue;

meaningful.push(line);
meaningful.push(cleanedLine);
}

return meaningful
Expand Down
Loading
Loading