diff --git a/.trajectories/completed/2026-02/traj_1b5joctvz9j4.json b/.trajectories/completed/2026-02/traj_1b5joctvz9j4.json new file mode 100644 index 000000000..4c05db58d --- /dev/null +++ b/.trajectories/completed/2026-02/traj_1b5joctvz9j4.json @@ -0,0 +1,65 @@ +{ + "id": "traj_1b5joctvz9j4", + "version": 1, + "task": { + "title": "Adjust readiness behavior to allow slower agent spawn and avoid 30s worker_ready timeout" + }, + "status": "completed", + "startedAt": "2026-02-27T21:03:30.264Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-02-27T21:04:41.903Z" + } + ], + "chapters": [ + { + "id": "chap_a2z815b8rfr8", + "title": "Work", + "agentName": "default", + "startedAt": "2026-02-27T21:04:41.903Z", + "events": [ + { + "ts": 1772226281904, + "type": "decision", + "content": "Prefer longer waitForReady defaults instead of fail-fast: Prefer longer waitForReady defaults instead of fail-fast", + "raw": { + "question": "Prefer longer waitForReady defaults instead of fail-fast", + "chosen": "Prefer longer waitForReady defaults instead of fail-fast", + "alternatives": [], + "reasoning": "User wants slower agent startups tolerated. Increased SDK worker_ready wait defaults from 30s to 60s and kept readiness flow timeout-driven." + }, + "significance": "high" + }, + { + "ts": 1772226286823, + "type": "decision", + "content": "Detect Codex relaycast boot marker across chunk boundaries: Detect Codex relaycast boot marker across chunk boundaries", + "raw": { + "question": "Detect Codex relaycast boot marker across chunk boundaries", + "chosen": "Detect Codex relaycast boot marker across chunk boundaries", + "alternatives": [], + "reasoning": "Previous gating searched only current PTY chunk and could miss split markers, delaying worker_ready until timeout. Now scans accumulated startup output and tracks post-boot prompt window robustly." + }, + "significance": "high" + } + ], + "endedAt": "2026-02-27T21:04:50.010Z" + } + ], + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/agent-workforce/relay", + "tags": [], + "_trace": { + "startRef": "ad64a9bc98ed27febddd8592e680fbda26316661", + "endRef": "ad64a9bc98ed27febddd8592e680fbda26316661" + }, + "completedAt": "2026-02-27T21:04:50.010Z", + "retrospective": { + "summary": "Adjusted readiness behavior to tolerate slower startup: SDK waitForReady defaults now 60s and Codex relaycast boot-marker detection now works across PTY chunk boundaries. Kept non-fail-fast semantics.", + "approach": "Standard approach", + "confidence": 0.9 + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-02/traj_1b5joctvz9j4.md b/.trajectories/completed/2026-02/traj_1b5joctvz9j4.md new file mode 100644 index 000000000..39ec61e7e --- /dev/null +++ b/.trajectories/completed/2026-02/traj_1b5joctvz9j4.md @@ -0,0 +1,36 @@ +# Trajectory: Adjust readiness behavior to allow slower agent spawn and avoid 30s worker_ready timeout + +> **Status:** ✅ Completed +> **Confidence:** 90% +> **Started:** February 27, 2026 at 10:03 PM +> **Completed:** February 27, 2026 at 10:04 PM + +--- + +## Summary + +Adjusted readiness behavior to tolerate slower startup: SDK waitForReady defaults now 60s and Codex relaycast boot-marker detection now works across PTY chunk boundaries. Kept non-fail-fast semantics. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Prefer longer waitForReady defaults instead of fail-fast +- **Chose:** Prefer longer waitForReady defaults instead of fail-fast +- **Reasoning:** User wants slower agent startups tolerated. Increased SDK worker_ready wait defaults from 30s to 60s and kept readiness flow timeout-driven. + +### Detect Codex relaycast boot marker across chunk boundaries +- **Chose:** Detect Codex relaycast boot marker across chunk boundaries +- **Reasoning:** Previous gating searched only current PTY chunk and could miss split markers, delaying worker_ready until timeout. Now scans accumulated startup output and tracks post-boot prompt window robustly. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Prefer longer waitForReady defaults instead of fail-fast: Prefer longer waitForReady defaults instead of fail-fast +- Detect Codex relaycast boot marker across chunk boundaries: Detect Codex relaycast boot marker across chunk boundaries diff --git a/.trajectories/index.json b/.trajectories/index.json index 038688d8b..32f48ffeb 100644 --- a/.trajectories/index.json +++ b/.trajectories/index.json @@ -1,6 +1,6 @@ { "version": 1, - "lastUpdated": "2026-02-27T20:40:19.869Z", + "lastUpdated": "2026-02-27T21:04:50.127Z", "trajectories": { "traj_1b1dj40sl6jl": { "title": "Revert aggressive retry logic in relay-pty-orchestrator", @@ -470,6 +470,13 @@ "startedAt": "2026-02-27T20:40:01.515Z", "completedAt": "2026-02-27T20:40:19.768Z", "path": "/Users/khaliqgant/Projects/agent-workforce/relay/.trajectories/completed/2026-02/traj_eaxbstibc2jb.json" + }, + "traj_1b5joctvz9j4": { + "title": "Adjust readiness behavior to allow slower agent spawn and avoid 30s worker_ready timeout", + "status": "completed", + "startedAt": "2026-02-27T21:03:30.264Z", + "completedAt": "2026-02-27T21:04:50.010Z", + "path": "/Users/khaliqgant/Projects/agent-workforce/relay/.trajectories/completed/2026-02/traj_1b5joctvz9j4.json" } } } \ No newline at end of file diff --git a/packages/sdk/bin/agent-relay-broker b/packages/sdk/bin/agent-relay-broker deleted file mode 100755 index a8e430e93..000000000 Binary files a/packages/sdk/bin/agent-relay-broker and /dev/null differ diff --git a/packages/sdk/src/relay.ts b/packages/sdk/src/relay.ts index 4bcf94f2d..a9c422fbf 100644 --- a/packages/sdk/src/relay.ts +++ b/packages/sdk/src/relay.ts @@ -351,7 +351,7 @@ export class AgentRelay { if (waitForMessage) { return this.waitForAgentMessage(name, timeoutMs ?? 60_000); } - return this.waitForAgentReady(name, timeoutMs ?? 30_000); + return this.waitForAgentReady(name, timeoutMs ?? 60_000); } // ── Human source ──────────────────────────────────────────────────────── @@ -580,7 +580,7 @@ export class AgentRelay { * The agent's CLI may not yet be ready to receive messages. * Use `waitForAgentMessage()` for full readiness. */ - async waitForAgentReady(name: string, timeoutMs = 30_000): Promise { + async waitForAgentReady(name: string, timeoutMs = 60_000): Promise { const client = await this.ensureStarted(); const existing = this.knownAgents.get(name); if (existing && this.readyAgents.has(name)) { @@ -994,7 +994,7 @@ export class AgentRelay { const client = await relay.ensureStarted(); await client.release(name, reason); }, - async waitForReady(timeoutMs = 30_000) { + async waitForReady(timeoutMs = 60_000) { await relay.waitForAgentReady(name, timeoutMs); }, waitForExit(timeoutMs?: number) { diff --git a/src/helpers.rs b/src/helpers.rs index 5aa227da9..343a5663f 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -202,11 +202,12 @@ pub(crate) fn detect_cli_ready(cli: &str, output: &str, total_bytes: usize) -> b } // Prompt patterns (from relay-pty parser.rs) + // › = U+203A (single right-pointing angle quotation mark) + // ❯ = U+276F (heavy right-pointing angle quotation mark, Claude Code v2.1.52+) let prompt_patterns: &[&str] = if lower_cli.contains("codex") { - &["> ", "$ ", "codex> ", ">>> ", "›"] + &["> ", "$ ", "codex> ", ">>> ", "›", "❯"] } else { - // claude, gemini, aider, and others all share the same patterns - &["> ", "$ ", ">>> ", "›"] + &["> ", "$ ", ">>> ", "›", "❯"] }; // Check last 500 chars of output for prompt patterns diff --git a/src/pty_worker.rs b/src/pty_worker.rs index 2275f3e82..e29b4c074 100644 --- a/src/pty_worker.rs +++ b/src/pty_worker.rs @@ -58,7 +58,7 @@ fn output_has_prompt(cli: &str, output: &str) -> bool { &clean }; - let mut patterns = vec!["> ", "$ ", ">>> ", "›"]; + let mut patterns = vec!["> ", "$ ", ">>> ", "›", "❯"]; if lower_cli.contains("codex") { patterns.push("codex> "); } @@ -68,7 +68,7 @@ fn output_has_prompt(cli: &str, output: &str) -> bool { region.lines().rev().take(6).any(|line| { let trimmed = line.trim(); - matches!(trimmed, "›" | ">" | "$" | ">>>") + matches!(trimmed, "›" | ">" | "$" | ">>>" | "❯") || (lower_cli.contains("codex") && trimmed.eq_ignore_ascii_case("codex>")) }) } @@ -96,7 +96,10 @@ async fn try_emit_worker_ready( worker_ready_sent: &mut bool, startup_ready: bool, ) { - if *worker_ready_sent || init_request_id.is_none() { + // init_received_at is Some only after init_worker has been received. + // We use it (not init_request_id) as the gate because the broker sends + // init_worker without a request_id. + if *worker_ready_sent || init_received_at.is_none() { return; } @@ -369,27 +372,34 @@ pub(crate) async fn run_pty_worker(cmd: PtyCommand) -> Result<()> { STARTUP_BUFFER_KEEP, ); if wait_for_relaycast_boot { - if saw_relaycast_boot { - append_bounded( - &mut post_boot_output, - &clean_text, - STARTUP_BUFFER_MAX, - STARTUP_BUFFER_KEEP, - ); - } else { - let lower = clean_text.to_ascii_lowercase(); - if let Some(marker_idx) = lower.find(RELAYCAST_BOOT_MARKER) { + let mut just_saw_relaycast_boot = false; + if !saw_relaycast_boot { + let lower_startup = startup_output.to_ascii_lowercase(); + if let Some(marker_idx) = lower_startup.find(RELAYCAST_BOOT_MARKER) + { saw_relaycast_boot = true; - let marker_end = marker_idx + RELAYCAST_BOOT_MARKER.len(); - let marker_end = floor_char_boundary(&clean_text, marker_end); + just_saw_relaycast_boot = true; + let marker_end = floor_char_boundary( + &startup_output, + marker_idx + RELAYCAST_BOOT_MARKER.len(), + ); + post_boot_output.clear(); append_bounded( &mut post_boot_output, - &clean_text[marker_end..], + &startup_output[marker_end..], STARTUP_BUFFER_MAX, STARTUP_BUFFER_KEEP, ); } } + if saw_relaycast_boot && !just_saw_relaycast_boot { + append_bounded( + &mut post_boot_output, + &clean_text, + STARTUP_BUFFER_MAX, + STARTUP_BUFFER_KEEP, + ); + } } let startup_ready = startup_gate_ready( &resolved_cli,