Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .trajectories/completed/2026-02/traj_1b5joctvz9j4.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"id": "traj_1b5joctvz9j4",
"version": 1,
"task": {
"title": "Adjust readiness behavior to allow slower agent spawn and avoid 30s worker_ready timeout"
},
"status": "completed",
"startedAt": "2026-02-27T21:03:30.264Z",
"agents": [
{
"name": "default",
"role": "lead",
"joinedAt": "2026-02-27T21:04:41.903Z"
}
],
"chapters": [
{
"id": "chap_a2z815b8rfr8",
"title": "Work",
"agentName": "default",
"startedAt": "2026-02-27T21:04:41.903Z",
"events": [
{
"ts": 1772226281904,
"type": "decision",
"content": "Prefer longer waitForReady defaults instead of fail-fast: Prefer longer waitForReady defaults instead of fail-fast",
"raw": {
"question": "Prefer longer waitForReady defaults instead of fail-fast",
"chosen": "Prefer longer waitForReady defaults instead of fail-fast",
"alternatives": [],
"reasoning": "User wants slower agent startups tolerated. Increased SDK worker_ready wait defaults from 30s to 60s and kept readiness flow timeout-driven."
},
"significance": "high"
},
{
"ts": 1772226286823,
"type": "decision",
"content": "Detect Codex relaycast boot marker across chunk boundaries: Detect Codex relaycast boot marker across chunk boundaries",
"raw": {
"question": "Detect Codex relaycast boot marker across chunk boundaries",
"chosen": "Detect Codex relaycast boot marker across chunk boundaries",
"alternatives": [],
"reasoning": "Previous gating searched only current PTY chunk and could miss split markers, delaying worker_ready until timeout. Now scans accumulated startup output and tracks post-boot prompt window robustly."
},
"significance": "high"
}
],
"endedAt": "2026-02-27T21:04:50.010Z"
}
],
"commits": [],
"filesChanged": [],
"projectId": "/Users/khaliqgant/Projects/agent-workforce/relay",
"tags": [],
"_trace": {
"startRef": "ad64a9bc98ed27febddd8592e680fbda26316661",
"endRef": "ad64a9bc98ed27febddd8592e680fbda26316661"
},
"completedAt": "2026-02-27T21:04:50.010Z",
"retrospective": {
"summary": "Adjusted readiness behavior to tolerate slower startup: SDK waitForReady defaults now 60s and Codex relaycast boot-marker detection now works across PTY chunk boundaries. Kept non-fail-fast semantics.",
"approach": "Standard approach",
"confidence": 0.9
}
}
36 changes: 36 additions & 0 deletions .trajectories/completed/2026-02/traj_1b5joctvz9j4.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Trajectory: Adjust readiness behavior to allow slower agent spawn and avoid 30s worker_ready timeout

> **Status:** ✅ Completed
> **Confidence:** 90%
> **Started:** February 27, 2026 at 10:03 PM
> **Completed:** February 27, 2026 at 10:04 PM

---

## Summary

Adjusted readiness behavior to tolerate slower startup: SDK waitForReady defaults now 60s and Codex relaycast boot-marker detection now works across PTY chunk boundaries. Kept non-fail-fast semantics.

**Approach:** Standard approach

---

## Key Decisions

### Prefer longer waitForReady defaults instead of fail-fast
- **Chose:** Prefer longer waitForReady defaults instead of fail-fast
- **Reasoning:** User wants slower agent startups tolerated. Increased SDK worker_ready wait defaults from 30s to 60s and kept readiness flow timeout-driven.

### Detect Codex relaycast boot marker across chunk boundaries
- **Chose:** Detect Codex relaycast boot marker across chunk boundaries
- **Reasoning:** Previous gating searched only current PTY chunk and could miss split markers, delaying worker_ready until timeout. Now scans accumulated startup output and tracks post-boot prompt window robustly.

---

## Chapters

### 1. Work
*Agent: default*

- Prefer longer waitForReady defaults instead of fail-fast: Prefer longer waitForReady defaults instead of fail-fast
- Detect Codex relaycast boot marker across chunk boundaries: Detect Codex relaycast boot marker across chunk boundaries
9 changes: 8 additions & 1 deletion .trajectories/index.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"version": 1,
"lastUpdated": "2026-02-27T20:40:19.869Z",
"lastUpdated": "2026-02-27T21:04:50.127Z",
"trajectories": {
"traj_1b1dj40sl6jl": {
"title": "Revert aggressive retry logic in relay-pty-orchestrator",
Expand Down Expand Up @@ -470,6 +470,13 @@
"startedAt": "2026-02-27T20:40:01.515Z",
"completedAt": "2026-02-27T20:40:19.768Z",
"path": "/Users/khaliqgant/Projects/agent-workforce/relay/.trajectories/completed/2026-02/traj_eaxbstibc2jb.json"
},
"traj_1b5joctvz9j4": {
"title": "Adjust readiness behavior to allow slower agent spawn and avoid 30s worker_ready timeout",
"status": "completed",
"startedAt": "2026-02-27T21:03:30.264Z",
"completedAt": "2026-02-27T21:04:50.010Z",
"path": "/Users/khaliqgant/Projects/agent-workforce/relay/.trajectories/completed/2026-02/traj_1b5joctvz9j4.json"
}
}
}
Binary file removed packages/sdk/bin/agent-relay-broker
Binary file not shown.
6 changes: 3 additions & 3 deletions packages/sdk/src/relay.ts
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ export class AgentRelay {
if (waitForMessage) {
return this.waitForAgentMessage(name, timeoutMs ?? 60_000);
}
return this.waitForAgentReady(name, timeoutMs ?? 30_000);
return this.waitForAgentReady(name, timeoutMs ?? 60_000);
}

// ── Human source ────────────────────────────────────────────────────────
Expand Down Expand Up @@ -580,7 +580,7 @@ export class AgentRelay {
* The agent's CLI may not yet be ready to receive messages.
* Use `waitForAgentMessage()` for full readiness.
*/
async waitForAgentReady(name: string, timeoutMs = 30_000): Promise<Agent> {
async waitForAgentReady(name: string, timeoutMs = 60_000): Promise<Agent> {
const client = await this.ensureStarted();
const existing = this.knownAgents.get(name);
if (existing && this.readyAgents.has(name)) {
Expand Down Expand Up @@ -994,7 +994,7 @@ export class AgentRelay {
const client = await relay.ensureStarted();
await client.release(name, reason);
},
async waitForReady(timeoutMs = 30_000) {
async waitForReady(timeoutMs = 60_000) {
await relay.waitForAgentReady(name, timeoutMs);
},
waitForExit(timeoutMs?: number) {
Expand Down
7 changes: 4 additions & 3 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,12 @@ pub(crate) fn detect_cli_ready(cli: &str, output: &str, total_bytes: usize) -> b
}

// Prompt patterns (from relay-pty parser.rs)
// › = U+203A (single right-pointing angle quotation mark)
// ❯ = U+276F (heavy right-pointing angle quotation mark, Claude Code v2.1.52+)
let prompt_patterns: &[&str] = if lower_cli.contains("codex") {
&["> ", "$ ", "codex> ", ">>> ", "›"]
&["> ", "$ ", "codex> ", ">>> ", "›", "❯"]
} else {
// claude, gemini, aider, and others all share the same patterns
&["> ", "$ ", ">>> ", "›"]
&["> ", "$ ", ">>> ", "›", "❯"]
};

// Check last 500 chars of output for prompt patterns
Expand Down
42 changes: 26 additions & 16 deletions src/pty_worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn output_has_prompt(cli: &str, output: &str) -> bool {
&clean
};

let mut patterns = vec!["> ", "$ ", ">>> ", "›"];
let mut patterns = vec!["> ", "$ ", ">>> ", "›", "❯"];
if lower_cli.contains("codex") {
patterns.push("codex> ");
}
Expand All @@ -68,7 +68,7 @@ fn output_has_prompt(cli: &str, output: &str) -> bool {

region.lines().rev().take(6).any(|line| {
let trimmed = line.trim();
matches!(trimmed, "›" | ">" | "$" | ">>>")
matches!(trimmed, "›" | ">" | "$" | ">>>" | "❯")
|| (lower_cli.contains("codex") && trimmed.eq_ignore_ascii_case("codex>"))
})
}
Expand Down Expand Up @@ -96,7 +96,10 @@ async fn try_emit_worker_ready(
worker_ready_sent: &mut bool,
startup_ready: bool,
) {
if *worker_ready_sent || init_request_id.is_none() {
// init_received_at is Some only after init_worker has been received.
// We use it (not init_request_id) as the gate because the broker sends
// init_worker without a request_id.
if *worker_ready_sent || init_received_at.is_none() {
return;
}

Expand Down Expand Up @@ -369,27 +372,34 @@ pub(crate) async fn run_pty_worker(cmd: PtyCommand) -> Result<()> {
STARTUP_BUFFER_KEEP,
);
if wait_for_relaycast_boot {
if saw_relaycast_boot {
append_bounded(
&mut post_boot_output,
&clean_text,
STARTUP_BUFFER_MAX,
STARTUP_BUFFER_KEEP,
);
} else {
let lower = clean_text.to_ascii_lowercase();
if let Some(marker_idx) = lower.find(RELAYCAST_BOOT_MARKER) {
let mut just_saw_relaycast_boot = false;
if !saw_relaycast_boot {
let lower_startup = startup_output.to_ascii_lowercase();
if let Some(marker_idx) = lower_startup.find(RELAYCAST_BOOT_MARKER)
{
saw_relaycast_boot = true;
let marker_end = marker_idx + RELAYCAST_BOOT_MARKER.len();
let marker_end = floor_char_boundary(&clean_text, marker_end);
just_saw_relaycast_boot = true;
let marker_end = floor_char_boundary(
&startup_output,
marker_idx + RELAYCAST_BOOT_MARKER.len(),
);
post_boot_output.clear();
append_bounded(
&mut post_boot_output,
&clean_text[marker_end..],
&startup_output[marker_end..],
STARTUP_BUFFER_MAX,
STARTUP_BUFFER_KEEP,
);
}
}
if saw_relaycast_boot && !just_saw_relaycast_boot {
append_bounded(
&mut post_boot_output,
&clean_text,
STARTUP_BUFFER_MAX,
STARTUP_BUFFER_KEEP,
);
}
}
let startup_ready = startup_gate_ready(
&resolved_cli,
Expand Down
Loading