From 81d912cd70c4cd16ec5d18a6139396cc0c89230e Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 2 Mar 2026 12:57:03 +0100 Subject: [PATCH 1/9] feat(sdk-py): add direct spawn/message API matching TypeScript SDK Add AgentRelay facade, AgentRelayClient, Agent handles, and Models constants so the Python SDK can spawn agents and exchange messages directly via the broker binary, matching the TypeScript SDK's API. - protocol.py: wire protocol types (AgentSpec, ProtocolEnvelope, etc.) - client.py: async broker subprocess client with JSON protocol - relay.py: high-level facade with event hooks and agent spawners - models.py: Claude, Codex, Gemini model constants - Updated __init__.py with new primary exports + backward compat - Legacy agent_relay/ shim re-exports from src/agent_relay/ - Version bump to 0.3.0 Co-Authored-By: Claude Opus 4.6 --- packages/sdk-py/agent_relay/__init__.py | 35 +- packages/sdk-py/pyproject.toml | 6 +- packages/sdk-py/src/agent_relay/__init__.py | 36 +- packages/sdk-py/src/agent_relay/client.py | 570 +++++++++++++++ packages/sdk-py/src/agent_relay/models.py | 27 + packages/sdk-py/src/agent_relay/protocol.py | 110 +++ packages/sdk-py/src/agent_relay/relay.py | 742 ++++++++++++++++++++ 7 files changed, 1505 insertions(+), 21 deletions(-) create mode 100644 packages/sdk-py/src/agent_relay/client.py create mode 100644 packages/sdk-py/src/agent_relay/models.py create mode 100644 packages/sdk-py/src/agent_relay/protocol.py create mode 100644 packages/sdk-py/src/agent_relay/relay.py diff --git a/packages/sdk-py/agent_relay/__init__.py b/packages/sdk-py/agent_relay/__init__.py index 5f6b09e22..ce6344f9f 100644 --- a/packages/sdk-py/agent_relay/__init__.py +++ b/packages/sdk-py/agent_relay/__init__.py @@ -1,21 +1,18 @@ -"""Agent Relay Python SDK.""" +"""Agent Relay Python SDK — re-exports from src/agent_relay/. -from .models import ( - CLIs, - CLIVersions, - CLI_REGISTRY, - DEFAULT_MODELS, - Models, - ModelOptions, - SwarmPatterns, -) +This directory exists for backward compatibility with codegen scripts. +The real package source lives in src/agent_relay/. +""" -__all__ = [ - "CLIs", - "CLIVersions", - "CLI_REGISTRY", - "DEFAULT_MODELS", - "Models", - "ModelOptions", - "SwarmPatterns", -] +import importlib.util as _util +import sys as _sys +from pathlib import Path as _Path + +# Load the real package from src/agent_relay/ and replace this module +_src_init = _Path(__file__).resolve().parent.parent / "src" / "agent_relay" / "__init__.py" +_spec = _util.spec_from_file_location("agent_relay", str(_src_init), + submodule_search_locations=[str(_src_init.parent)]) +assert _spec is not None and _spec.loader is not None, f"Could not load {_src_init}" +_real = _util.module_from_spec(_spec) +_sys.modules[__name__] = _real +_spec.loader.exec_module(_real) diff --git a/packages/sdk-py/pyproject.toml b/packages/sdk-py/pyproject.toml index db353d234..e1a5002e0 100644 --- a/packages/sdk-py/pyproject.toml +++ b/packages/sdk-py/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "agent-relay-sdk" -version = "3.0.2" +version = "0.3.0" description = "Python SDK for Agent Relay workflows" readme = "README.md" license = "Apache-2.0" @@ -21,3 +21,7 @@ dev = [ [tool.hatch.build.targets.wheel] packages = ["src/agent_relay"] + +[tool.pytest.ini_options] +pythonpath = ["src"] +asyncio_mode = "auto" diff --git a/packages/sdk-py/src/agent_relay/__init__.py b/packages/sdk-py/src/agent_relay/__init__.py index 0449c8959..8ff826dd9 100644 --- a/packages/sdk-py/src/agent_relay/__init__.py +++ b/packages/sdk-py/src/agent_relay/__init__.py @@ -1,4 +1,20 @@ -"""Agent Relay Python SDK — workflow builder and runner.""" +"""Agent Relay Python SDK — direct spawn/message API and workflow builder.""" + +# ── Primary API: Direct spawn/message (matches TypeScript SDK) ──────────────── + +from .relay import AgentRelay, Agent, AgentSpawner, HumanHandle, Message, SpawnOptions +from .models import Models +from .client import AgentRelayClient, AgentRelayProtocolError, AgentRelayProcessError +from .protocol import ( + PROTOCOL_VERSION, + AgentRuntime, + AgentSpec, + BrokerEvent, + ProtocolEnvelope, + RestartPolicy as ProtocolRestartPolicy, +) + +# ── Secondary API: Workflow builder (backward compatibility) ────────────────── from .builder import workflow, WorkflowBuilder, run_yaml from .templates import ( @@ -52,6 +68,24 @@ ) __all__ = [ + # Primary API + "AgentRelay", + "Agent", + "AgentSpawner", + "HumanHandle", + "Message", + "SpawnOptions", + "Models", + "AgentRelayClient", + "AgentRelayProtocolError", + "AgentRelayProcessError", + "PROTOCOL_VERSION", + "AgentRuntime", + "AgentSpec", + "BrokerEvent", + "ProtocolEnvelope", + "ProtocolRestartPolicy", + # Workflow builder (backward compat) "workflow", "WorkflowBuilder", "run_yaml", diff --git a/packages/sdk-py/src/agent_relay/client.py b/packages/sdk-py/src/agent_relay/client.py new file mode 100644 index 000000000..31244fc7e --- /dev/null +++ b/packages/sdk-py/src/agent_relay/client.py @@ -0,0 +1,570 @@ +"""Low-level async client for the Agent Relay broker subprocess. + +Manages the broker process lifecycle, line-delimited JSON protocol, +request/response correlation, and event dispatch. + +Mirrors packages/sdk/src/client.ts. +""" + +from __future__ import annotations + +import asyncio +import json +import os +import platform +import shutil +from pathlib import Path +from typing import Any, Callable, Optional + +from .protocol import PROTOCOL_VERSION, AgentSpec, BrokerEvent, ProtocolEnvelope + +# ── Errors ──────────────────────────────────────────────────────────────────── + + +class AgentRelayProtocolError(Exception): + """Raised when the broker returns a protocol-level error.""" + + def __init__(self, code: str, message: str, retryable: bool = False, data: Any = None): + super().__init__(message) + self.code = code + self.retryable = retryable + self.data = data + + +class AgentRelayProcessError(Exception): + """Raised for broker process lifecycle errors.""" + + +# ── CLI / model helpers ─────────────────────────────────────────────────────── + +_CLI_MODEL_FLAG_CLIS = {"claude", "codex", "gemini", "goose", "aider"} + +_CLI_DEFAULT_ARGS: dict[str, list[str]] = { + "codex": ["-c", "check_for_update_on_startup=false"], +} + + +def _has_model_arg(args: list[str]) -> bool: + for arg in args: + if arg == "--model" or arg.startswith("--model="): + return True + return False + + +def _build_pty_args_with_model(cli: str, args: list[str], model: Optional[str] = None) -> list[str]: + cli_name = cli.split(":")[0].strip().lower() + default_args = _CLI_DEFAULT_ARGS.get(cli_name, []) + base_args = [*default_args, *args] + if not model: + return base_args + if cli_name not in _CLI_MODEL_FLAG_CLIS: + return base_args + if _has_model_arg(base_args): + return base_args + return ["--model", model, *base_args] + + +def _expand_tilde(p: str) -> str: + if p == "~" or p.startswith("~/") or p.startswith("~\\"): + return str(Path.home() / p[2:]) + return p + + +def _is_explicit_path(binary_path: str) -> bool: + return "/" in binary_path or "\\" in binary_path or binary_path.startswith(".") or binary_path.startswith("~") + + +def _resolve_default_binary_path() -> str: + broker_exe = "agent-relay-broker" + + # 1. Check ~/.agent-relay/bin/ + home = Path.home() + standalone = home / ".agent-relay" / "bin" / broker_exe + if standalone.exists(): + return str(standalone) + + # 2. Fall back to PATH + found = shutil.which(broker_exe) + if found: + return found + + # 3. Last resort: bare name (will fail at spawn time if not on PATH) + return "agent-relay" + + +# ── Pending request tracking ───────────────────────────────────────────────── + + +class _PendingRequest: + __slots__ = ("expected_type", "future", "timeout_handle") + + def __init__(self, expected_type: str, future: asyncio.Future[ProtocolEnvelope], timeout_handle: asyncio.TimerHandle): + self.expected_type = expected_type + self.future = future + self.timeout_handle = timeout_handle + + +# ── Client ──────────────────────────────────────────────────────────────────── + + +class AgentRelayClient: + """Manages a broker subprocess and communicates over line-delimited JSON.""" + + def __init__( + self, + *, + binary_path: Optional[str] = None, + binary_args: Optional[list[str]] = None, + broker_name: Optional[str] = None, + channels: Optional[list[str]] = None, + cwd: Optional[str] = None, + env: Optional[dict[str, str]] = None, + request_timeout_ms: int = 10_000, + shutdown_timeout_ms: int = 3_000, + client_name: str = "agent-relay-sdk-py", + client_version: str = "0.3.0", + ): + self._binary_path = binary_path or _resolve_default_binary_path() + self._binary_args = binary_args or [] + self._broker_name = broker_name or os.path.basename(cwd or os.getcwd()) or "project" + self._channels = channels or ["general"] + self._cwd = cwd or os.getcwd() + self._env = env + self._request_timeout_ms = request_timeout_ms + self._shutdown_timeout_ms = shutdown_timeout_ms + self._client_name = client_name + self._client_version = client_version + + self._process: Optional[asyncio.subprocess.Process] = None + self._request_seq = 0 + self._pending: dict[str, _PendingRequest] = {} + self._event_listeners: list[Callable[[BrokerEvent], None]] = [] + self._stderr_listeners: list[Callable[[str], None]] = [] + self._event_buffer: list[BrokerEvent] = [] + self._max_buffer_size = 1000 + self._last_stderr_line: Optional[str] = None + self._starting_lock = asyncio.Lock() + self._started = False + self._reader_task: Optional[asyncio.Task[None]] = None + self._stderr_task: Optional[asyncio.Task[None]] = None + self._exit_future: Optional[asyncio.Future[None]] = None + self.workspace_key: Optional[str] = None + + @classmethod + async def start(cls, **kwargs: Any) -> AgentRelayClient: + client = cls(**kwargs) + await client.start_client() + return client + + # ── Event subscription ──────────────────────────────────────────────── + + def on_event(self, listener: Callable[[BrokerEvent], None]) -> Callable[[], None]: + self._event_listeners.append(listener) + + def unsubscribe() -> None: + try: + self._event_listeners.remove(listener) + except ValueError: + pass + + return unsubscribe + + def on_broker_stderr(self, listener: Callable[[str], None]) -> Callable[[], None]: + self._stderr_listeners.append(listener) + + def unsubscribe() -> None: + try: + self._stderr_listeners.remove(listener) + except ValueError: + pass + + return unsubscribe + + def query_events( + self, + *, + kind: Optional[str] = None, + name: Optional[str] = None, + limit: Optional[int] = None, + ) -> list[BrokerEvent]: + events = list(self._event_buffer) + if kind: + events = [e for e in events if e.get("kind") == kind] + if name: + events = [e for e in events if e.get("name") == name] + if limit is not None: + events = events[-limit:] + return events + + # ── Lifecycle ───────────────────────────────────────────────────────── + + async def start_client(self) -> None: + if self._started: + return + async with self._starting_lock: + if self._started: + return + await self._start_internal() + + async def _start_internal(self) -> None: + resolved_binary = _expand_tilde(self._binary_path) + if _is_explicit_path(self._binary_path) and not Path(resolved_binary).exists(): + raise AgentRelayProcessError(f"broker binary not found: {self._binary_path}") + + args = [ + "init", + "--name", + self._broker_name, + "--channels", + ",".join(self._channels), + *self._binary_args, + ] + + env = dict(self._env) if self._env else dict(os.environ) + if _is_explicit_path(self._binary_path): + bin_dir = str(Path(resolved_binary).resolve().parent) + current_path = env.get("PATH", "") + if bin_dir not in current_path.split(os.pathsep): + env["PATH"] = f"{bin_dir}{os.pathsep}{current_path}" + + self._last_stderr_line = None + + self._process = await asyncio.create_subprocess_exec( + resolved_binary, + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=self._cwd, + env=env, + ) + + loop = asyncio.get_running_loop() + self._exit_future = loop.create_future() + + self._reader_task = asyncio.create_task(self._read_stdout()) + self._stderr_task = asyncio.create_task(self._read_stderr()) + + # Monitor process exit + asyncio.create_task(self._monitor_exit()) + + # Hello handshake + hello_ack = await self._request_hello() + self._started = True + if hello_ack.get("workspace_key"): + self.workspace_key = hello_ack["workspace_key"] + + async def _monitor_exit(self) -> None: + if not self._process: + return + code = await self._process.wait() + detail = f": {self._last_stderr_line}" if self._last_stderr_line else "" + error = AgentRelayProcessError(f"broker exited (code={code}){detail}") + self._fail_all_pending(error) + if self._exit_future and not self._exit_future.done(): + self._exit_future.set_result(None) + + async def _read_stdout(self) -> None: + assert self._process and self._process.stdout + while True: + line = await self._process.stdout.readline() + if not line: + break + self._handle_stdout_line(line.decode("utf-8", errors="replace").rstrip("\n")) + + async def _read_stderr(self) -> None: + assert self._process and self._process.stderr + while True: + line = await self._process.stderr.readline() + if not line: + break + text = line.decode("utf-8", errors="replace").rstrip("\n") + trimmed = text.strip() + if trimmed: + self._last_stderr_line = trimmed + for listener in self._stderr_listeners: + listener(text) + + def _handle_stdout_line(self, line: str) -> None: + try: + parsed = json.loads(line) + except (json.JSONDecodeError, ValueError): + return + + if not isinstance(parsed, dict): + return + if parsed.get("v") != PROTOCOL_VERSION or not isinstance(parsed.get("type"), str): + return + + envelope = ProtocolEnvelope.from_dict(parsed) + + # Events are dispatched to listeners (no request_id) + if envelope.type == "event": + event: BrokerEvent = envelope.payload + self._event_buffer.append(event) + if len(self._event_buffer) > self._max_buffer_size: + self._event_buffer.pop(0) + for listener in self._event_listeners: + listener(event) + return + + # Responses are correlated to pending requests + if not envelope.request_id: + return + + pending = self._pending.pop(envelope.request_id, None) + if not pending: + return + + pending.timeout_handle.cancel() + + if envelope.type == "error": + payload = envelope.payload + pending.future.set_exception( + AgentRelayProtocolError( + code=payload.get("code", "unknown"), + message=payload.get("message", "unknown error"), + retryable=payload.get("retryable", False), + data=payload.get("data"), + ) + ) + return + + if envelope.type != pending.expected_type: + pending.future.set_exception( + AgentRelayProcessError( + f"unexpected response type '{envelope.type}' for request " + f"'{envelope.request_id}' (expected '{pending.expected_type}')" + ) + ) + return + + pending.future.set_result(envelope) + + def _fail_all_pending(self, error: Exception) -> None: + for pending in self._pending.values(): + pending.timeout_handle.cancel() + if not pending.future.done(): + pending.future.set_exception(error) + self._pending.clear() + + # ── Request helpers ─────────────────────────────────────────────────── + + async def _send_request(self, type_: str, payload: Any, expected_type: str) -> ProtocolEnvelope: + if not self._process or not self._process.stdin: + raise AgentRelayProcessError("broker is not running") + + self._request_seq += 1 + request_id = f"req_{self._request_seq}" + + envelope = ProtocolEnvelope( + v=PROTOCOL_VERSION, + type=type_, + payload=payload, + request_id=request_id, + ) + + loop = asyncio.get_running_loop() + future: asyncio.Future[ProtocolEnvelope] = loop.create_future() + + def on_timeout() -> None: + self._pending.pop(request_id, None) + if not future.done(): + future.set_exception( + AgentRelayProcessError( + f"request timed out after {self._request_timeout_ms}ms " + f"(type='{type_}', request_id='{request_id}')" + ) + ) + + timeout_handle = loop.call_later(self._request_timeout_ms / 1000, on_timeout) + self._pending[request_id] = _PendingRequest(expected_type, future, timeout_handle) + + line = json.dumps(envelope.to_dict()) + "\n" + self._process.stdin.write(line.encode("utf-8")) + await self._process.stdin.drain() + + return await future + + async def _request_hello(self) -> dict[str, Any]: + payload = { + "client_name": self._client_name, + "client_version": self._client_version, + } + frame = await self._send_request("hello", payload, "hello_ack") + return frame.payload + + async def _request_ok(self, type_: str, payload: Any) -> Any: + frame = await self._send_request(type_, payload, "ok") + return frame.payload.get("result") + + # ── Public API methods ──────────────────────────────────────────────── + + async def spawn_pty( + self, + *, + name: str, + cli: str, + args: Optional[list[str]] = None, + channels: Optional[list[str]] = None, + task: Optional[str] = None, + model: Optional[str] = None, + cwd: Optional[str] = None, + team: Optional[str] = None, + shadow_of: Optional[str] = None, + shadow_mode: Optional[str] = None, + idle_threshold_secs: Optional[int] = None, + restart_policy: Optional[dict[str, Any]] = None, + continue_from: Optional[str] = None, + ) -> dict[str, Any]: + await self.start_client() + built_args = _build_pty_args_with_model(cli, args or [], model) + from .protocol import RestartPolicy as ProtocolRestartPolicy + rp = None + if restart_policy: + rp = ProtocolRestartPolicy(**restart_policy) + agent = AgentSpec( + name=name, + runtime="pty", + cli=cli, + args=built_args, + channels=channels or [], + model=model, + cwd=cwd or self._cwd, + team=team, + shadow_of=shadow_of, + shadow_mode=shadow_mode, + restart_policy=rp, + ) + request_payload: dict[str, Any] = {"agent": agent.to_dict()} + if task is not None: + request_payload["initial_task"] = task + if idle_threshold_secs is not None: + request_payload["idle_threshold_secs"] = idle_threshold_secs + if continue_from is not None: + request_payload["continue_from"] = continue_from + return await self._request_ok("spawn_agent", request_payload) + + async def spawn_headless_claude( + self, + *, + name: str, + args: Optional[list[str]] = None, + channels: Optional[list[str]] = None, + task: Optional[str] = None, + ) -> dict[str, Any]: + await self.start_client() + agent = AgentSpec( + name=name, + runtime="headless_claude", + args=args or [], + channels=channels or [], + ) + request_payload: dict[str, Any] = {"agent": agent.to_dict()} + if task is not None: + request_payload["initial_task"] = task + return await self._request_ok("spawn_agent", request_payload) + + async def release(self, name: str, reason: Optional[str] = None) -> dict[str, Any]: + await self.start_client() + payload: dict[str, Any] = {"name": name} + if reason is not None: + payload["reason"] = reason + return await self._request_ok("release_agent", payload) + + async def send_input(self, name: str, data: str) -> dict[str, Any]: + await self.start_client() + return await self._request_ok("send_input", {"name": name, "data": data}) + + async def set_model(self, name: str, model: str, *, timeout_ms: Optional[int] = None) -> dict[str, Any]: + await self.start_client() + payload: dict[str, Any] = {"name": name, "model": model} + if timeout_ms is not None: + payload["timeout_ms"] = timeout_ms + return await self._request_ok("set_model", payload) + + async def send_message( + self, + *, + to: str, + text: str, + from_: Optional[str] = None, + thread_id: Optional[str] = None, + priority: Optional[int] = None, + data: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: + await self.start_client() + payload: dict[str, Any] = {"to": to, "text": text} + if from_ is not None: + payload["from"] = from_ + if thread_id is not None: + payload["thread_id"] = thread_id + if priority is not None: + payload["priority"] = priority + if data is not None: + payload["data"] = data + try: + return await self._request_ok("send_message", payload) + except AgentRelayProtocolError as e: + if e.code == "unsupported_operation": + return {"event_id": "unsupported_operation", "targets": []} + raise + + async def list_agents(self) -> list[dict[str, Any]]: + await self.start_client() + result = await self._request_ok("list_agents", {}) + return result.get("agents", []) if isinstance(result, dict) else [] + + async def get_status(self) -> dict[str, Any]: + await self.start_client() + return await self._request_ok("get_status", {}) + + async def get_metrics(self, agent: Optional[str] = None) -> dict[str, Any]: + await self.start_client() + return await self._request_ok("get_metrics", {"agent": agent} if agent else {}) + + async def get_crash_insights(self) -> dict[str, Any]: + await self.start_client() + return await self._request_ok("get_crash_insights", {}) + + async def preflight_agents(self, agents: list[dict[str, str]]) -> None: + if not agents: + return + await self.start_client() + await self._request_ok("preflight_agents", {"agents": agents}) + + async def shutdown(self) -> None: + if not self._process: + return + + try: + await self._request_ok("shutdown", {}) + except Exception: + pass + + process = self._process + try: + await asyncio.wait_for( + self._exit_future if self._exit_future else asyncio.sleep(0), + timeout=self._shutdown_timeout_ms / 1000, + ) + except asyncio.TimeoutError: + if process.returncode is None: + process.terminate() + try: + await asyncio.wait_for(process.wait(), timeout=2.0) + except asyncio.TimeoutError: + process.kill() + + # Clean up reader tasks + if self._reader_task and not self._reader_task.done(): + self._reader_task.cancel() + if self._stderr_task and not self._stderr_task.done(): + self._stderr_task.cancel() + + self._process = None + self._started = False + + async def wait_for_exit(self) -> None: + if self._exit_future: + await self._exit_future diff --git a/packages/sdk-py/src/agent_relay/models.py b/packages/sdk-py/src/agent_relay/models.py new file mode 100644 index 000000000..95c4668e5 --- /dev/null +++ b/packages/sdk-py/src/agent_relay/models.py @@ -0,0 +1,27 @@ +"""Model constants for supported CLI tools. + +Matches packages/config/src/cli-registry.generated.ts. +""" + + +class Models: + """Model identifiers organized by CLI tool.""" + + class Claude: + SONNET = "sonnet" + OPUS = "opus" + HAIKU = "haiku" + + class Codex: + GPT_5_2_CODEX = "gpt-5.2-codex" + GPT_5_3_CODEX = "gpt-5.3-codex" + GPT_5_3_CODEX_SPARK = "gpt-5.3-codex-spark" + GPT_5_1_CODEX_MAX = "gpt-5.1-codex-max" + GPT_5_2 = "gpt-5.2" + GPT_5_1_CODEX_MINI = "gpt-5.1-codex-mini" + + class Gemini: + GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview" + GEMINI_2_5_PRO = "gemini-2.5-pro" + GEMINI_2_5_FLASH = "gemini-2.5-flash" + GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite" diff --git a/packages/sdk-py/src/agent_relay/protocol.py b/packages/sdk-py/src/agent_relay/protocol.py new file mode 100644 index 000000000..b396e00db --- /dev/null +++ b/packages/sdk-py/src/agent_relay/protocol.py @@ -0,0 +1,110 @@ +"""Wire protocol types for the Agent Relay broker communication. + +Matches the TypeScript definitions in packages/sdk/src/protocol.ts. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal, Optional + +PROTOCOL_VERSION = 1 + +AgentRuntime = Literal["pty", "headless_claude"] + + +@dataclass +class RestartPolicy: + enabled: bool = False + max_restarts: int = 3 + cooldown_ms: int = 1000 + max_consecutive_failures: int = 3 + + def to_dict(self) -> dict[str, Any]: + return { + "enabled": self.enabled, + "max_restarts": self.max_restarts, + "cooldown_ms": self.cooldown_ms, + "max_consecutive_failures": self.max_consecutive_failures, + } + + +@dataclass +class AgentSpec: + """Specification for spawning an agent.""" + + name: str + runtime: AgentRuntime = "pty" + cli: Optional[str] = None + args: list[str] = field(default_factory=list) + channels: list[str] = field(default_factory=list) + model: Optional[str] = None + cwd: Optional[str] = None + team: Optional[str] = None + shadow_of: Optional[str] = None + shadow_mode: Optional[str] = None + restart_policy: Optional[RestartPolicy] = None + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = { + "name": self.name, + "runtime": self.runtime, + } + if self.cli is not None: + d["cli"] = self.cli + if self.args: + d["args"] = self.args + if self.channels: + d["channels"] = self.channels + if self.model is not None: + d["model"] = self.model + if self.cwd is not None: + d["cwd"] = self.cwd + if self.team is not None: + d["team"] = self.team + if self.shadow_of is not None: + d["shadow_of"] = self.shadow_of + if self.shadow_mode is not None: + d["shadow_mode"] = self.shadow_mode + if self.restart_policy is not None: + d["restart_policy"] = self.restart_policy.to_dict() + return d + + +@dataclass +class ProtocolEnvelope: + """JSON envelope for all broker communication.""" + + v: int + type: str + payload: dict[str, Any] + request_id: Optional[str] = None + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = { + "v": self.v, + "type": self.type, + "payload": self.payload, + } + if self.request_id is not None: + d["request_id"] = self.request_id + return d + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ProtocolEnvelope: + return cls( + v=data.get("v", 0), + type=data.get("type", ""), + payload=data.get("payload", {}), + request_id=data.get("request_id"), + ) + + +# BrokerEvent is a dict with a 'kind' field discriminator. +# Event kinds: agent_spawned, agent_released, agent_exit, agent_exited, +# relay_inbound, worker_stream, worker_ready, worker_error, +# delivery_queued, delivery_injected, delivery_verified, delivery_failed, +# delivery_active, delivery_ack, delivery_retry, delivery_dropped, +# relaycast_published, relaycast_publish_failed, acl_denied, +# agent_idle, agent_restarting, agent_restarted, agent_permanently_dead +BrokerEvent = dict[str, Any] diff --git a/packages/sdk-py/src/agent_relay/relay.py b/packages/sdk-py/src/agent_relay/relay.py new file mode 100644 index 000000000..b97378b50 --- /dev/null +++ b/packages/sdk-py/src/agent_relay/relay.py @@ -0,0 +1,742 @@ +"""High-level facade for the Agent Relay SDK. + +Provides a clean, property-based API on top of the lower-level +AgentRelayClient protocol client. + +Mirrors packages/sdk/src/relay.ts. +""" + +from __future__ import annotations + +import asyncio +import os +import secrets +from dataclasses import dataclass, field +from typing import Any, Callable, Optional + +from .client import AgentRelayClient, AgentRelayProtocolError +from .protocol import AgentRuntime, BrokerEvent + +# ── Public types ────────────────────────────────────────────────────────────── + +AgentStatus = str # "spawning" | "ready" | "idle" | "exited" + +EventHook = Optional[Callable[..., None]] + + +@dataclass +class Message: + """A relay message between agents.""" + + event_id: str + from_name: str + to: str + text: str + thread_id: Optional[str] = None + data: Optional[dict[str, Any]] = None + + +@dataclass +class SpawnOptions: + """Options for spawning an agent.""" + + args: list[str] = field(default_factory=list) + channels: list[str] = field(default_factory=list) + model: Optional[str] = None + cwd: Optional[str] = None + team: Optional[str] = None + shadow_of: Optional[str] = None + shadow_mode: Optional[str] = None + idle_threshold_secs: Optional[int] = None + restart_policy: Optional[dict[str, Any]] = None + + +# ── Agent handle ────────────────────────────────────────────────────────────── + + +class Agent: + """Handle for a spawned agent with lifecycle methods.""" + + def __init__( + self, + name: str, + runtime: AgentRuntime, + channels: list[str], + relay: AgentRelay, + ): + self._name = name + self._runtime = runtime + self._channels = channels + self._relay = relay + self.exit_code: Optional[int] = None + self.exit_signal: Optional[str] = None + self.exit_reason: Optional[str] = None + + @property + def name(self) -> str: + return self._name + + @property + def runtime(self) -> AgentRuntime: + return self._runtime + + @property + def channels(self) -> list[str]: + return self._channels + + @property + def status(self) -> AgentStatus: + if self._name in self._relay._exited_agents: + return "exited" + if self._name in self._relay._idle_agents: + return "idle" + if self._name in self._relay._ready_agents: + return "ready" + return "spawning" + + async def release(self, reason: Optional[str] = None) -> None: + client = await self._relay._ensure_started() + await client.release(self._name, reason) + + async def wait_for_ready(self, timeout_ms: int = 60_000) -> None: + await self._relay.wait_for_agent_ready(self._name, timeout_ms) + + async def wait_for_exit(self, timeout_ms: Optional[int] = None) -> str: + """Wait for agent to exit. Returns 'exited', 'released', or 'timeout'.""" + if self._name not in self._relay._known_agents: + return "exited" + if timeout_ms == 0: + return "timeout" + + future: asyncio.Future[str] = asyncio.get_running_loop().create_future() + self._relay._exit_resolvers[self._name] = future + + if timeout_ms is not None: + try: + return await asyncio.wait_for(future, timeout=timeout_ms / 1000) + except asyncio.TimeoutError: + self._relay._exit_resolvers.pop(self._name, None) + return "timeout" + else: + return await future + + async def wait_for_idle(self, timeout_ms: Optional[int] = None) -> str: + """Wait for agent to go idle. Returns 'idle', 'exited', or 'timeout'.""" + if self._name not in self._relay._known_agents: + return "exited" + if timeout_ms == 0: + return "timeout" + + future: asyncio.Future[str] = asyncio.get_running_loop().create_future() + self._relay._idle_resolvers[self._name] = future + + if timeout_ms is not None: + try: + return await asyncio.wait_for(future, timeout=timeout_ms / 1000) + except asyncio.TimeoutError: + self._relay._idle_resolvers.pop(self._name, None) + return "timeout" + else: + return await future + + async def send_message( + self, + *, + to: str, + text: str, + thread_id: Optional[str] = None, + priority: Optional[int] = None, + data: Optional[dict[str, Any]] = None, + ) -> Message: + client = await self._relay._ensure_started() + try: + result = await client.send_message( + to=to, + text=text, + from_=self._name, + thread_id=thread_id, + priority=priority, + data=data, + ) + except AgentRelayProtocolError as e: + if e.code == "unsupported_operation": + return Message( + event_id="unsupported_operation", + from_name=self._name, + to=to, + text=text, + thread_id=thread_id, + data=data, + ) + raise + + event_id = result.get("event_id", secrets.token_hex(8)) + msg = Message( + event_id=event_id, + from_name=self._name, + to=to, + text=text, + thread_id=thread_id, + data=data, + ) + if self._relay.on_message_sent: + self._relay.on_message_sent(msg) + return msg + + def on_output(self, callback: Callable[[str], None]) -> Callable[[], None]: + listeners = self._relay._output_listeners.setdefault(self._name, []) + listeners.append(callback) + + def unsubscribe() -> None: + try: + listeners.remove(callback) + except ValueError: + pass + if not listeners: + self._relay._output_listeners.pop(self._name, None) + + return unsubscribe + + +# ── Human handle ────────────────────────────────────────────────────────────── + + +class HumanHandle: + """A messaging handle for human/system messages.""" + + def __init__(self, name: str, relay: AgentRelay): + self._name = name + self._relay = relay + + @property + def name(self) -> str: + return self._name + + async def send_message( + self, + *, + to: str, + text: str, + thread_id: Optional[str] = None, + priority: Optional[int] = None, + data: Optional[dict[str, Any]] = None, + ) -> Message: + client = await self._relay._ensure_started() + try: + result = await client.send_message( + to=to, + text=text, + from_=self._name, + thread_id=thread_id, + priority=priority, + data=data, + ) + except AgentRelayProtocolError as e: + if e.code == "unsupported_operation": + return Message( + event_id="unsupported_operation", + from_name=self._name, + to=to, + text=text, + thread_id=thread_id, + data=data, + ) + raise + + event_id = result.get("event_id", secrets.token_hex(8)) + msg = Message( + event_id=event_id, + from_name=self._name, + to=to, + text=text, + thread_id=thread_id, + data=data, + ) + if self._relay.on_message_sent: + self._relay.on_message_sent(msg) + return msg + + +# ── Agent spawner ───────────────────────────────────────────────────────────── + + +class AgentSpawner: + """Shorthand spawner for a specific CLI (e.g., relay.claude.spawn(...)).""" + + def __init__(self, cli: str, default_name: str, relay: AgentRelay): + self._cli = cli + self._default_name = default_name + self._relay = relay + + async def spawn( + self, + *, + name: Optional[str] = None, + args: Optional[list[str]] = None, + channels: Optional[list[str]] = None, + task: Optional[str] = None, + model: Optional[str] = None, + cwd: Optional[str] = None, + ) -> Agent: + agent_name = name or self._default_name + agent_channels = channels or ["general"] + client = await self._relay._ensure_started() + + result = await client.spawn_pty( + name=agent_name, + cli=self._cli, + args=args or [], + channels=agent_channels, + task=task, + model=model, + cwd=cwd, + ) + + agent = Agent( + name=result.get("name", agent_name), + runtime=result.get("runtime", "pty"), + channels=agent_channels, + relay=self._relay, + ) + self._relay._known_agents[agent.name] = agent + self._relay._ready_agents.discard(agent.name) + self._relay._message_ready_agents.discard(agent.name) + self._relay._exited_agents.discard(agent.name) + self._relay._idle_agents.discard(agent.name) + return agent + + +# ── AgentRelay facade ───────────────────────────────────────────────────────── + + +class AgentRelay: + """High-level facade for the Agent Relay SDK. + + Example:: + + relay = AgentRelay(channels=["GTM"]) + relay.on_message_received = lambda msg: print(f"[{msg.from_name}]: {msg.text}") + + await relay.claude.spawn(name="Analyst", model="opus", channels=["GTM"], task="Analyze") + await relay.wait_for_agent_ready("Analyst") + await relay.shutdown() + """ + + def __init__( + self, + *, + binary_path: Optional[str] = None, + binary_args: Optional[list[str]] = None, + broker_name: Optional[str] = None, + channels: Optional[list[str]] = None, + cwd: Optional[str] = None, + env: Optional[dict[str, str]] = None, + request_timeout_ms: int = 10_000, + shutdown_timeout_ms: int = 3_000, + ): + # Event hooks — assign a callback or None to clear + self.on_message_received: EventHook = None + self.on_message_sent: EventHook = None + self.on_agent_spawned: EventHook = None + self.on_agent_released: EventHook = None + self.on_agent_exited: EventHook = None + self.on_agent_ready: EventHook = None + self.on_worker_output: EventHook = None + self.on_delivery_update: EventHook = None + self.on_agent_exit_requested: EventHook = None + self.on_agent_idle: EventHook = None + + self._default_channels = channels or ["general"] + self._client_kwargs: dict[str, Any] = { + "binary_path": binary_path, + "binary_args": binary_args, + "broker_name": broker_name, + "channels": self._default_channels, + "cwd": cwd, + "env": env, + "request_timeout_ms": request_timeout_ms, + "shutdown_timeout_ms": shutdown_timeout_ms, + } + + self._client: Optional[AgentRelayClient] = None + self._start_lock = asyncio.Lock() + self._unsubscribe_event: Optional[Callable[[], None]] = None + + # Agent tracking + self._known_agents: dict[str, Agent] = {} + self._ready_agents: set[str] = set() + self._message_ready_agents: set[str] = set() + self._exited_agents: set[str] = set() + self._idle_agents: set[str] = set() + self._output_listeners: dict[str, list[Callable[[str], None]]] = {} + self._exit_resolvers: dict[str, asyncio.Future[str]] = {} + self._idle_resolvers: dict[str, asyncio.Future[str]] = {} + + # Shorthand spawners + self.codex = AgentSpawner("codex", "Codex", self) + self.claude = AgentSpawner("claude", "Claude", self) + self.gemini = AgentSpawner("gemini", "Gemini", self) + + @property + def workspace_key(self) -> Optional[str]: + return self._client.workspace_key if self._client else None + + # ── Internal startup ────────────────────────────────────────────────── + + async def _ensure_started(self) -> AgentRelayClient: + if self._client: + return self._client + async with self._start_lock: + if self._client: + return self._client + + # Ensure env has RELAY_API_KEY if available + env = self._client_kwargs.get("env") + if env is None: + env_key = os.environ.get("RELAY_API_KEY") + if env_key: + self._client_kwargs["env"] = {**os.environ, "RELAY_API_KEY": env_key} + else: + self._client_kwargs["env"] = dict(os.environ) + + # Remove None values to use defaults + kwargs = {k: v for k, v in self._client_kwargs.items() if v is not None} + client = AgentRelayClient(**kwargs) + await client.start_client() + + self._client = client + if client.workspace_key: + pass # workspace_key is available via property + + self._wire_events(client) + return client + + # ── Spawning ────────────────────────────────────────────────────────── + + async def spawn( + self, + name: str, + cli: str, + task: Optional[str] = None, + options: Optional[SpawnOptions] = None, + ) -> Agent: + client = await self._ensure_started() + opts = options or SpawnOptions() + channels = opts.channels or ["general"] + + result = await client.spawn_pty( + name=name, + cli=cli, + task=task, + args=opts.args, + channels=channels, + model=opts.model, + cwd=opts.cwd, + team=opts.team, + shadow_of=opts.shadow_of, + shadow_mode=opts.shadow_mode, + idle_threshold_secs=opts.idle_threshold_secs, + restart_policy=opts.restart_policy, + ) + + self._ready_agents.discard(name) + self._message_ready_agents.discard(name) + self._exited_agents.discard(name) + self._idle_agents.discard(name) + agent = Agent( + name=result.get("name", name), + runtime=result.get("runtime", "pty"), + channels=channels, + relay=self, + ) + self._known_agents[agent.name] = agent + return agent + + async def spawn_and_wait( + self, + name: str, + cli: str, + task: str, + options: Optional[SpawnOptions] = None, + timeout_ms: int = 60_000, + wait_for_message: bool = False, + ) -> Agent: + await self.spawn(name, cli, task, options) + if wait_for_message: + return await self.wait_for_agent_message(name, timeout_ms) + return await self.wait_for_agent_ready(name, timeout_ms) + + # ── Human/system messaging ──────────────────────────────────────────── + + def human(self, name: str) -> HumanHandle: + return HumanHandle(name, self) + + def system(self) -> HumanHandle: + return HumanHandle("system", self) + + async def broadcast(self, text: str, *, from_name: str = "human:orchestrator") -> Message: + return await self.human(from_name).send_message(to="*", text=text) + + # ── Listing / status ────────────────────────────────────────────────── + + async def list_agents(self) -> list[Agent]: + client = await self._ensure_started() + raw_list = await client.list_agents() + agents = [] + for entry in raw_list: + name = entry.get("name", "") + existing = self._known_agents.get(name) + if existing: + agents.append(existing) + else: + agent = Agent( + name=name, + runtime=entry.get("runtime", "pty"), + channels=entry.get("channels", []), + relay=self, + ) + self._known_agents[name] = agent + agents.append(agent) + return agents + + async def preflight_agents(self, agents: list[dict[str, str]]) -> None: + client = await self._ensure_started() + await client.preflight_agents(agents) + + async def get_status(self) -> dict[str, Any]: + client = await self._ensure_started() + return await client.get_status() + + # ── Wait helpers ────────────────────────────────────────────────────── + + async def wait_for_agent_ready(self, name: str, timeout_ms: int = 60_000) -> Agent: + client = await self._ensure_started() + existing = self._known_agents.get(name) + if existing and name in self._ready_agents: + return existing + + future: asyncio.Future[Agent] = asyncio.get_running_loop().create_future() + + def on_event(event: BrokerEvent) -> None: + if event.get("kind") != "worker_ready" or event.get("name") != name: + return + agent = self._ensure_agent_handle(name, event.get("runtime", "pty")) + self._ready_agents.add(name) + self._exited_agents.discard(name) + if not future.done(): + future.set_result(agent) + + unsub = client.on_event(on_event) + try: + # Check again after subscribing (race condition guard) + known = self._known_agents.get(name) + if known and name in self._ready_agents: + return known + return await asyncio.wait_for(future, timeout=timeout_ms / 1000) + except asyncio.TimeoutError: + raise TimeoutError( + f"Timed out waiting for worker_ready for '{name}' after {timeout_ms}ms" + ) from None + finally: + unsub() + + async def wait_for_agent_message(self, name: str, timeout_ms: int = 60_000) -> Agent: + client = await self._ensure_started() + existing = self._known_agents.get(name) + if existing and name in self._message_ready_agents: + return existing + + future: asyncio.Future[Agent] = asyncio.get_running_loop().create_future() + + def on_event(event: BrokerEvent) -> None: + if future.done(): + return + if event.get("kind") == "relay_inbound" and event.get("from") == name: + self._message_ready_agents.add(name) + self._exited_agents.discard(name) + future.set_result(self._ensure_agent_handle(name)) + elif event.get("kind") == "agent_exited" and event.get("name") == name: + future.set_exception( + RuntimeError(f"Agent '{name}' exited before sending its first relay message") + ) + + unsub = client.on_event(on_event) + try: + known = self._known_agents.get(name) + if known and name in self._message_ready_agents: + return known + return await asyncio.wait_for(future, timeout=timeout_ms / 1000) + except asyncio.TimeoutError: + raise TimeoutError( + f"Timed out waiting for first relay message from '{name}' after {timeout_ms}ms" + ) from None + finally: + unsub() + + @staticmethod + async def wait_for_any( + agents: list[Agent], timeout_ms: Optional[int] = None + ) -> tuple[Agent, str]: + """Wait for any agent to exit. Returns (agent, result) tuple.""" + if not agents: + raise ValueError("wait_for_any requires at least one agent") + + async def _wait(agent: Agent) -> tuple[Agent, str]: + result = await agent.wait_for_exit(timeout_ms) + return (agent, result) + + done, pending = await asyncio.wait( + [asyncio.create_task(_wait(a)) for a in agents], + return_when=asyncio.FIRST_COMPLETED, + ) + for task in pending: + task.cancel() + return done.pop().result() + + # ── Lifecycle ───────────────────────────────────────────────────────── + + async def shutdown(self) -> None: + if self._unsubscribe_event: + self._unsubscribe_event() + self._unsubscribe_event = None + if self._client: + await self._client.shutdown() + self._client = None + + self._known_agents.clear() + self._ready_agents.clear() + self._message_ready_agents.clear() + self._exited_agents.clear() + self._idle_agents.clear() + self._output_listeners.clear() + + for future in self._exit_resolvers.values(): + if not future.done(): + future.set_result("released") + self._exit_resolvers.clear() + for future in self._idle_resolvers.values(): + if not future.done(): + future.set_result("exited") + self._idle_resolvers.clear() + + # ── Private helpers ─────────────────────────────────────────────────── + + def _ensure_agent_handle( + self, name: str, runtime: AgentRuntime = "pty", channels: Optional[list[str]] = None, + ) -> Agent: + existing = self._known_agents.get(name) + if existing: + return existing + agent = Agent(name, runtime, channels or [], self) + self._known_agents[name] = agent + return agent + + def _wire_events(self, client: AgentRelayClient) -> None: + def on_event(event: BrokerEvent) -> None: + kind = event.get("kind") + name = event.get("name", "") + + if kind == "relay_inbound": + from_name = event.get("from", "") + if from_name in self._known_agents: + self._message_ready_agents.add(from_name) + self._exited_agents.discard(from_name) + msg = Message( + event_id=event.get("event_id", ""), + from_name=event.get("from", ""), + to=event.get("target", ""), + text=event.get("body", ""), + thread_id=event.get("thread_id"), + ) + if self.on_message_received: + self.on_message_received(msg) + + elif kind == "agent_spawned": + agent = self._ensure_agent_handle(name, event.get("runtime", "pty")) + self._ready_agents.discard(name) + self._message_ready_agents.discard(name) + self._exited_agents.discard(name) + self._idle_agents.discard(name) + if self.on_agent_spawned: + self.on_agent_spawned(agent) + + elif kind == "agent_released": + agent = self._known_agents.get(name) or self._ensure_agent_handle(name) + self._exited_agents.add(name) + self._ready_agents.discard(name) + self._message_ready_agents.discard(name) + self._idle_agents.discard(name) + if self.on_agent_released: + self.on_agent_released(agent) + self._known_agents.pop(name, None) + self._output_listeners.pop(name, None) + future = self._exit_resolvers.pop(name, None) + if future and not future.done(): + future.set_result("released") + idle_future = self._idle_resolvers.pop(name, None) + if idle_future and not idle_future.done(): + idle_future.set_result("exited") + + elif kind == "agent_exited": + agent = self._known_agents.get(name) or self._ensure_agent_handle(name) + self._exited_agents.add(name) + self._ready_agents.discard(name) + self._message_ready_agents.discard(name) + self._idle_agents.discard(name) + agent.exit_code = event.get("code") + agent.exit_signal = event.get("signal") + if self.on_agent_exited: + self.on_agent_exited(agent) + self._known_agents.pop(name, None) + self._output_listeners.pop(name, None) + future = self._exit_resolvers.pop(name, None) + if future and not future.done(): + future.set_result("exited") + idle_future = self._idle_resolvers.pop(name, None) + if idle_future and not idle_future.done(): + idle_future.set_result("exited") + + elif kind == "agent_exit": + agent = self._known_agents.get(name) or self._ensure_agent_handle(name) + agent.exit_reason = event.get("reason", "") + if self.on_agent_exit_requested: + self.on_agent_exit_requested({"name": name, "reason": event.get("reason", "")}) + + elif kind == "worker_ready": + agent = self._ensure_agent_handle(name, event.get("runtime", "pty")) + self._ready_agents.add(name) + self._exited_agents.discard(name) + self._idle_agents.discard(name) + if self.on_agent_ready: + self.on_agent_ready(agent) + + elif kind == "worker_stream": + self._idle_agents.discard(name) + if self.on_worker_output: + self.on_worker_output({ + "name": name, + "stream": event.get("stream", ""), + "chunk": event.get("chunk", ""), + }) + # Per-agent output listeners + listeners = self._output_listeners.get(name, []) + for listener in listeners: + listener(event.get("chunk", "")) + + elif kind == "agent_idle": + self._idle_agents.add(name) + if self.on_agent_idle: + self.on_agent_idle({ + "name": name, + "idle_secs": event.get("idle_secs", 0), + }) + idle_future = self._idle_resolvers.pop(name, None) + if idle_future and not idle_future.done(): + idle_future.set_result("idle") + + # Delivery events + if kind and kind.startswith("delivery_"): + if self.on_delivery_update: + self.on_delivery_update(event) + + self._unsubscribe_event = client.on_event(on_event) From 87a8b6c65a6b748fc73ffbab8073044b051bf325 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 2 Mar 2026 13:13:26 +0100 Subject: [PATCH 2/9] fix(sdk-py): use broker-returned name + auto-install broker binary - Use agent.name (from broker response) instead of input name for state cleanup in spawn() and spawn_and_wait() - Auto-download broker binary from GitHub releases on first use if not found at ~/.agent-relay/bin/ or on PATH. Includes platform detection, macOS codesigning, and binary verification. Co-Authored-By: Claude Opus 4.6 --- packages/sdk-py/src/agent_relay/client.py | 104 +++++++++++++++++++++- packages/sdk-py/src/agent_relay/relay.py | 14 +-- 2 files changed, 109 insertions(+), 9 deletions(-) diff --git a/packages/sdk-py/src/agent_relay/client.py b/packages/sdk-py/src/agent_relay/client.py index 31244fc7e..8ae164af9 100644 --- a/packages/sdk-py/src/agent_relay/client.py +++ b/packages/sdk-py/src/agent_relay/client.py @@ -13,6 +13,10 @@ import os import platform import shutil +import stat +import subprocess +import sys +import urllib.request from pathlib import Path from typing import Any, Callable, Optional @@ -74,6 +78,102 @@ def _is_explicit_path(binary_path: str) -> bool: return "/" in binary_path or "\\" in binary_path or binary_path.startswith(".") or binary_path.startswith("~") +def _detect_platform() -> str: + """Detect platform string matching GitHub release binary names.""" + system = platform.system().lower() + machine = platform.machine().lower() + + if system == "darwin": + os_name = "darwin" + elif system == "linux": + os_name = "linux" + else: + raise AgentRelayProcessError(f"Unsupported OS: {system}") + + if machine in ("x86_64", "amd64"): + arch = "x64" + elif machine in ("arm64", "aarch64"): + arch = "arm64" + else: + raise AgentRelayProcessError(f"Unsupported architecture: {machine}") + + return f"{os_name}-{arch}" + + +def _get_latest_version() -> str: + """Fetch the latest release version tag from GitHub.""" + url = "https://api.github.com/repos/AgentWorkforce/relay/releases/latest" + headers = {"Accept": "application/vnd.github.v3+json"} + token = os.environ.get("GITHUB_TOKEN") + if token: + headers["Authorization"] = f"token {token}" + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req, timeout=15) as resp: + data = json.loads(resp.read().decode()) + tag = data.get("tag_name", "") + return tag.lstrip("v") + + +def _install_broker_binary() -> str: + """Download the broker binary from GitHub releases. Returns the installed path.""" + install_dir = Path.home() / ".agent-relay" + bin_dir = install_dir / "bin" + target_path = bin_dir / "agent-relay-broker" + + plat = _detect_platform() + print(f"[agent-relay] Broker binary not found, installing for {plat}...") + + version = _get_latest_version() + if not version: + raise AgentRelayProcessError("Failed to fetch latest agent-relay version from GitHub") + + binary_name = f"agent-relay-broker-{plat}" + download_url = f"https://github.com/AgentWorkforce/relay/releases/download/v{version}/{binary_name}" + + bin_dir.mkdir(parents=True, exist_ok=True) + + print(f"[agent-relay] Downloading v{version} from {download_url}") + try: + urllib.request.urlretrieve(download_url, str(target_path)) + except Exception as e: + target_path.unlink(missing_ok=True) + raise AgentRelayProcessError( + f"Failed to download broker binary: {e}\n" + f"You can install manually: curl -fsSL https://raw.githubusercontent.com/AgentWorkforce/relay/main/install.sh | bash" + ) from e + + # Make executable + target_path.chmod(target_path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + # macOS: re-sign to avoid Gatekeeper issues + if platform.system() == "Darwin": + try: + subprocess.run( + ["codesign", "--force", "--sign", "-", str(target_path)], + capture_output=True, + timeout=10, + ) + except Exception: + pass # Non-fatal — binary may still work + + # Verify + try: + result = subprocess.run( + [str(target_path), "--help"], + capture_output=True, + timeout=10, + ) + if result.returncode != 0: + target_path.unlink(missing_ok=True) + raise AgentRelayProcessError("Downloaded broker binary failed verification") + except subprocess.TimeoutExpired: + target_path.unlink(missing_ok=True) + raise AgentRelayProcessError("Downloaded broker binary timed out during verification") + + print(f"[agent-relay] Broker installed to {target_path}") + return str(target_path) + + def _resolve_default_binary_path() -> str: broker_exe = "agent-relay-broker" @@ -88,8 +188,8 @@ def _resolve_default_binary_path() -> str: if found: return found - # 3. Last resort: bare name (will fail at spawn time if not on PATH) - return "agent-relay" + # 3. Auto-install from GitHub releases + return _install_broker_binary() # ── Pending request tracking ───────────────────────────────────────────────── diff --git a/packages/sdk-py/src/agent_relay/relay.py b/packages/sdk-py/src/agent_relay/relay.py index b97378b50..de49dc1cc 100644 --- a/packages/sdk-py/src/agent_relay/relay.py +++ b/packages/sdk-py/src/agent_relay/relay.py @@ -439,10 +439,6 @@ async def spawn( restart_policy=opts.restart_policy, ) - self._ready_agents.discard(name) - self._message_ready_agents.discard(name) - self._exited_agents.discard(name) - self._idle_agents.discard(name) agent = Agent( name=result.get("name", name), runtime=result.get("runtime", "pty"), @@ -450,6 +446,10 @@ async def spawn( relay=self, ) self._known_agents[agent.name] = agent + self._ready_agents.discard(agent.name) + self._message_ready_agents.discard(agent.name) + self._exited_agents.discard(agent.name) + self._idle_agents.discard(agent.name) return agent async def spawn_and_wait( @@ -461,10 +461,10 @@ async def spawn_and_wait( timeout_ms: int = 60_000, wait_for_message: bool = False, ) -> Agent: - await self.spawn(name, cli, task, options) + agent = await self.spawn(name, cli, task, options) if wait_for_message: - return await self.wait_for_agent_message(name, timeout_ms) - return await self.wait_for_agent_ready(name, timeout_ms) + return await self.wait_for_agent_message(agent.name, timeout_ms) + return await self.wait_for_agent_ready(agent.name, timeout_ms) # ── Human/system messaging ──────────────────────────────────────────── From 88af364d5cf8372589ea5f9397ef8cfbb9562488 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 2 Mar 2026 13:17:21 +0100 Subject: [PATCH 3/9] feat(sdk): auto-install broker binary if not found Add runtime auto-download fallback to resolveDefaultBinaryPath(). If the broker binary isn't found at any of the existing locations (Cargo build, bundled npm, ~/.agent-relay/bin/, PATH), automatically download it from GitHub releases. Matches the Python SDK behavior. Includes platform detection, macOS codesigning, and binary verification. Falls back to a helpful manual install message on failure. Co-Authored-By: Claude Opus 4.6 --- packages/sdk/src/client.ts | 96 ++++++++++++++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/packages/sdk/src/client.ts b/packages/sdk/src/client.ts index caa9e818b..229449ca9 100644 --- a/packages/sdk/src/client.ts +++ b/packages/sdk/src/client.ts @@ -1,5 +1,5 @@ import { once } from 'node:events'; -import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process'; +import { execSync, spawn, type ChildProcessWithoutNullStreams } from 'node:child_process'; import { createInterface, type Interface as ReadlineInterface } from 'node:readline'; import fs from 'node:fs'; import os from 'node:os'; @@ -644,6 +644,90 @@ function isExplicitPath(binaryPath: string): boolean { ); } +function detectPlatformSuffix(): string | null { + const platformMap: Record> = { + darwin: { arm64: 'darwin-arm64', x64: 'darwin-x64' }, + linux: { arm64: 'linux-arm64', x64: 'linux-x64' }, + }; + return platformMap[process.platform]?.[process.arch] ?? null; +} + +function getLatestVersionSync(): string | null { + try { + const result = execSync( + 'curl -fsSL https://api.github.com/repos/AgentWorkforce/relay/releases/latest', + { timeout: 15_000, stdio: ['pipe', 'pipe', 'pipe'] } + ).toString(); + const match = result.match(/"tag_name"\s*:\s*"v?([^"]+)"/); + return match?.[1] ?? null; + } catch { + return null; + } +} + +function installBrokerBinary(): string { + const suffix = detectPlatformSuffix(); + if (!suffix) { + throw new AgentRelayProcessError( + `Unsupported platform: ${process.platform}-${process.arch}` + ); + } + + const homeDir = process.env.HOME || process.env.USERPROFILE || ''; + const installDir = path.join(homeDir, '.agent-relay', 'bin'); + const brokerExe = process.platform === 'win32' ? 'agent-relay-broker.exe' : 'agent-relay-broker'; + const targetPath = path.join(installDir, brokerExe); + + console.log(`[agent-relay] Broker binary not found, installing for ${suffix}...`); + + const version = getLatestVersionSync(); + if (!version) { + throw new AgentRelayProcessError( + 'Failed to fetch latest agent-relay version from GitHub.\n' + + 'Install manually: curl -fsSL https://raw.githubusercontent.com/AgentWorkforce/relay/main/install.sh | bash' + ); + } + + const binaryName = `agent-relay-broker-${suffix}`; + const downloadUrl = `https://github.com/AgentWorkforce/relay/releases/download/v${version}/${binaryName}`; + + console.log(`[agent-relay] Downloading v${version} from ${downloadUrl}`); + + try { + fs.mkdirSync(installDir, { recursive: true }); + execSync(`curl -fsSL "${downloadUrl}" -o "${targetPath}"`, { + timeout: 60_000, + stdio: ['pipe', 'pipe', 'pipe'], + }); + fs.chmodSync(targetPath, 0o755); + + // macOS: re-sign to avoid Gatekeeper issues + if (process.platform === 'darwin') { + try { + execSync(`codesign --force --sign - "${targetPath}"`, { + timeout: 10_000, + stdio: ['pipe', 'pipe', 'pipe'], + }); + } catch { + // Non-fatal + } + } + + // Verify + execSync(`"${targetPath}" --help`, { timeout: 10_000, stdio: ['pipe', 'pipe', 'pipe'] }); + } catch (err) { + try { fs.unlinkSync(targetPath); } catch { /* ignore */ } + const message = err instanceof Error ? err.message : String(err); + throw new AgentRelayProcessError( + `Failed to install broker binary: ${message}\n` + + 'Install manually: curl -fsSL https://raw.githubusercontent.com/AgentWorkforce/relay/main/install.sh | bash' + ); + } + + console.log(`[agent-relay] Broker installed to ${targetPath}`); + return targetPath; +} + function resolveDefaultBinaryPath(): string { const brokerExe = process.platform === 'win32' ? 'agent-relay-broker.exe' : 'agent-relay-broker'; const moduleDir = path.dirname(fileURLToPath(import.meta.url)); @@ -659,11 +743,7 @@ function resolveDefaultBinaryPath(): string { // Try platform-specific name first (CI publishes per-platform binaries), // then fall back to the generic name (local dev / postinstall copy). const binDir = path.resolve(moduleDir, '..', 'bin'); - const platformMap: Record> = { - darwin: { arm64: 'darwin-arm64', x64: 'darwin-x64' }, - linux: { arm64: 'linux-arm64', x64: 'linux-x64' }, - }; - const suffix = platformMap[process.platform]?.[process.arch]; + const suffix = detectPlatformSuffix(); if (suffix) { const platformBinary = path.join(binDir, `agent-relay-broker-${suffix}`); if (fs.existsSync(platformBinary)) { @@ -682,6 +762,6 @@ function resolveDefaultBinaryPath(): string { return standaloneBroker; } - // 4. Fall back to agent-relay on PATH (may be Node CLI — will fail for broker ops) - return 'agent-relay'; + // 4. Auto-install from GitHub releases + return installBrokerBinary(); } From 01db8c4e900dfea573c30a9d8b5e0248bf8db346 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 2 Mar 2026 13:29:53 +0100 Subject: [PATCH 4/9] docs(sdk-py): update README with direct spawn/message API examples Lead with the new AgentRelay API showing Claude + Codex collaboration, move workflow builder to an advanced section. Co-Authored-By: Claude Opus 4.6 --- packages/sdk-py/README.md | 212 ++++++++++++++++++-------------------- 1 file changed, 98 insertions(+), 114 deletions(-) diff --git a/packages/sdk-py/README.md b/packages/sdk-py/README.md index 8779fada2..5b72039b2 100644 --- a/packages/sdk-py/README.md +++ b/packages/sdk-py/README.md @@ -1,6 +1,6 @@ # Agent Relay Python SDK -Python SDK for defining and running Agent Relay workflows. Provides the same workflow builder API as the TypeScript SDK. +Python SDK for real-time agent-to-agent communication. Spawn AI agents, send messages, and coordinate multi-agent workflows with a simple async API. ## Installation @@ -8,163 +8,147 @@ Python SDK for defining and running Agent Relay workflows. Provides the same wor pip install agent-relay ``` +The SDK automatically downloads the broker binary on first use. + ## Requirements - Python 3.10+ -- `agent-relay` CLI installed (`npm install -g agent-relay`) - -The SDK builds workflow configurations and executes them via the `agent-relay run` CLI. -## Builder API +## Quick Start ```python -from agent_relay import workflow, VerificationCheck - -result = ( - workflow("ship-feature") - .description("Plan, build, and verify a feature") - .pattern("dag") - .max_concurrency(3) - .timeout(60 * 60 * 1000) - .channel("feature-channel") - .idle_nudge(nudge_after_ms=120_000, escalate_after_ms=120_000, max_nudges=1) - .trajectories(enabled=True, reflect_on_converge=True) - .agent("planner", cli="claude", role="Planning lead") - .agent( - "builder", - cli="codex", - role="Implementation engineer", - interactive=False, - idle_threshold_secs=45, - retries=1, +import asyncio +from agent_relay import AgentRelay, Models + +async def main(): + relay = AgentRelay(channels=["dev"]) + + # Event hooks + relay.on_message_received = lambda msg: print(f"[{msg.from_name}]: {msg.text}") + relay.on_agent_ready = lambda agent: print(f" {agent.name} ready") + relay.on_agent_exited = lambda agent: print(f" {agent.name} exited") + + # Spawn agents + await relay.claude.spawn( + name="Reviewer", + model=Models.Claude.OPUS, + channels=["dev"], + task="Review the PR and suggest improvements", ) - .step("plan", agent="planner", task="Create a detailed plan") - .step( - "build", - agent="builder", - task="Implement the approved plan", - depends_on=["plan"], - verification=VerificationCheck(type="output_contains", value="DONE"), + + await relay.codex.spawn( + name="Builder", + model=Models.Codex.GPT_5_3_CODEX, + channels=["dev"], + task="Implement the suggested improvements", ) - .run() -) -``` -## Workflow Templates + # Wait for both agents to be ready + await asyncio.gather( + relay.wait_for_agent_ready("Reviewer"), + relay.wait_for_agent_ready("Builder"), + ) -Built-in templates for common multi-agent patterns: + # Let agents collaborate, then shut down + await asyncio.sleep(600) + await relay.shutdown() -### Fan-Out +asyncio.run(main()) +``` -Parallel execution across multiple agents with synthesis: +## API -```python -from agent_relay import fan_out +### AgentRelay -builder = fan_out( - "parallel-analysis", - tasks=[ - "Analyze backend modules and summarize risks", - "Analyze frontend modules and summarize risks", - ], - synthesis_task="Synthesize both analyses into one prioritized action plan", -) +The main entry point. Pass `channels` to subscribe to message channels. -result = builder.run() +```python +relay = AgentRelay(channels=["dev", "planning"]) ``` -### Pipeline +### Spawning Agents -Sequential stage-based execution: +Use runtime-specific spawners: ```python -from agent_relay import pipeline, PipelineStage - -builder = pipeline( - "release-pipeline", - stages=[ - PipelineStage(name="plan", task="Create release plan"), - PipelineStage(name="implement", task="Implement planned changes"), - PipelineStage(name="verify", task="Validate and produce release notes"), - ], -) +await relay.claude.spawn(name="Agent1", model=Models.Claude.SONNET, channels=["dev"], task="...") +await relay.codex.spawn(name="Agent2", model=Models.Codex.GPT_5_3_CODEX, channels=["dev"], task="...") +await relay.gemini.spawn(name="Agent3", model=Models.Gemini.GEMINI_2_5_PRO, channels=["dev"], task="...") ``` -### DAG - -Direct workflow definition with explicit dependencies: +### Sending Messages ```python -from agent_relay import dag - -builder = dag( - "complex-workflow", - agents=[...], - steps=[...], -) +human = relay.system() +await human.send_message(to="Agent1", text="Please start the analysis") ``` -## Event Callbacks - -Monitor workflow execution with typed event callbacks: +### Event Hooks ```python -from agent_relay import run_yaml, RunOptions +relay.on_message_received = lambda msg: ... # New message +relay.on_agent_ready = lambda agent: ... # Agent connected +relay.on_agent_exited = lambda agent: ... # Agent exited +relay.on_agent_spawned = lambda agent: ... # Agent spawned +relay.on_worker_output = lambda data: ... # Agent output +relay.on_agent_idle = lambda agent: ... # Agent idle +``` -def on_event(event): - print(event.type, event) +### Models -result = run_yaml( - "workflows/release.yaml", - RunOptions(workflow="release", on_event=on_event), -) -``` +```python +Models.Claude.OPUS +Models.Claude.SONNET +Models.Claude.HAIKU -Supported event types: +Models.Codex.GPT_5_2_CODEX +Models.Codex.GPT_5_3_CODEX -| Event | Description | -|-------|-------------| -| `run:started` | Workflow execution began | -| `run:completed` | Workflow finished successfully | -| `run:failed` | Workflow failed | -| `run:cancelled` | Workflow was cancelled | -| `step:started` | Step execution began | -| `step:completed` | Step finished successfully | -| `step:failed` | Step failed | -| `step:skipped` | Step was skipped | -| `step:retrying` | Step is being retried | -| `step:nudged` | Idle agent was nudged | -| `step:force-released` | Agent was force-released | +Models.Gemini.GEMINI_2_5_PRO +Models.Gemini.GEMINI_2_5_FLASH +``` -## YAML Workflow Execution +## Workflow Builder -Execute workflows defined in YAML files: +For structured DAG-based workflows, the builder API is also available: ```python -from agent_relay import run_yaml, RunOptions - -result = run_yaml( - "workflows/migration.yaml", - RunOptions( - workflow="main", - trajectories=False, - vars={"target": "staging"}, - ), +from agent_relay import workflow, VerificationCheck + +result = ( + workflow("ship-feature") + .description("Plan, build, and verify a feature") + .pattern("dag") + .agent("planner", cli="claude", role="Planning lead") + .agent("builder", cli="codex", role="Implementation engineer") + .step("plan", agent="planner", task="Create a detailed plan") + .step("build", agent="builder", task="Implement the plan", depends_on=["plan"]) + .run() ) ``` -## Configuration Export +### Workflow Templates -Export workflow configuration without executing: +Built-in templates for common patterns: ```python -builder = workflow("my-workflow").agent(...).step(...) +from agent_relay import fan_out, pipeline, dag -# Get as Python dict -config = builder.to_config() +# Fan-out: parallel execution with synthesis +builder = fan_out( + "parallel-analysis", + tasks=["Analyze backend", "Analyze frontend"], + synthesis_task="Combine analyses", +) -# Get as YAML string -yaml_str = builder.to_yaml() +# Pipeline: sequential stages +builder = pipeline( + "release-pipeline", + stages=[ + PipelineStage(name="plan", task="Create release plan"), + PipelineStage(name="implement", task="Implement changes"), + ], +) ``` ## License From e7d236fccebcb72f2c9947caeb3a719058623b28 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 2 Mar 2026 13:35:12 +0100 Subject: [PATCH 5/9] fix(sdk-py): address Devin review feedback - Fix concurrent wait_for_exit/wait_for_idle overwrite bug: use list of futures per agent so multiple callers all resolve correctly - Remove dead exception handler in send_message: client already catches unsupported_operation, check result dict sentinel instead - Skip on_message_sent hook for unsupported operations - Fix version placeholder to match latest published version (3.0.2) Co-Authored-By: Claude Opus 4.6 --- packages/sdk-py/pyproject.toml | 2 +- packages/sdk-py/src/agent_relay/relay.py | 132 +++++++++++------------ 2 files changed, 63 insertions(+), 71 deletions(-) diff --git a/packages/sdk-py/pyproject.toml b/packages/sdk-py/pyproject.toml index e1a5002e0..75198bced 100644 --- a/packages/sdk-py/pyproject.toml +++ b/packages/sdk-py/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "agent-relay-sdk" -version = "0.3.0" +version = "3.0.2" description = "Python SDK for Agent Relay workflows" readme = "README.md" license = "Apache-2.0" diff --git a/packages/sdk-py/src/agent_relay/relay.py b/packages/sdk-py/src/agent_relay/relay.py index de49dc1cc..9a71abb8c 100644 --- a/packages/sdk-py/src/agent_relay/relay.py +++ b/packages/sdk-py/src/agent_relay/relay.py @@ -14,7 +14,7 @@ from dataclasses import dataclass, field from typing import Any, Callable, Optional -from .client import AgentRelayClient, AgentRelayProtocolError +from .client import AgentRelayClient from .protocol import AgentRuntime, BrokerEvent # ── Public types ────────────────────────────────────────────────────────────── @@ -109,13 +109,19 @@ async def wait_for_exit(self, timeout_ms: Optional[int] = None) -> str: return "timeout" future: asyncio.Future[str] = asyncio.get_running_loop().create_future() - self._relay._exit_resolvers[self._name] = future + self._relay._exit_resolvers.setdefault(self._name, []).append(future) if timeout_ms is not None: try: return await asyncio.wait_for(future, timeout=timeout_ms / 1000) except asyncio.TimeoutError: - self._relay._exit_resolvers.pop(self._name, None) + futures = self._relay._exit_resolvers.get(self._name, []) + try: + futures.remove(future) + except ValueError: + pass + if not futures: + self._relay._exit_resolvers.pop(self._name, None) return "timeout" else: return await future @@ -128,13 +134,19 @@ async def wait_for_idle(self, timeout_ms: Optional[int] = None) -> str: return "timeout" future: asyncio.Future[str] = asyncio.get_running_loop().create_future() - self._relay._idle_resolvers[self._name] = future + self._relay._idle_resolvers.setdefault(self._name, []).append(future) if timeout_ms is not None: try: return await asyncio.wait_for(future, timeout=timeout_ms / 1000) except asyncio.TimeoutError: - self._relay._idle_resolvers.pop(self._name, None) + futures = self._relay._idle_resolvers.get(self._name, []) + try: + futures.remove(future) + except ValueError: + pass + if not futures: + self._relay._idle_resolvers.pop(self._name, None) return "timeout" else: return await future @@ -149,26 +161,14 @@ async def send_message( data: Optional[dict[str, Any]] = None, ) -> Message: client = await self._relay._ensure_started() - try: - result = await client.send_message( - to=to, - text=text, - from_=self._name, - thread_id=thread_id, - priority=priority, - data=data, - ) - except AgentRelayProtocolError as e: - if e.code == "unsupported_operation": - return Message( - event_id="unsupported_operation", - from_name=self._name, - to=to, - text=text, - thread_id=thread_id, - data=data, - ) - raise + result = await client.send_message( + to=to, + text=text, + from_=self._name, + thread_id=thread_id, + priority=priority, + data=data, + ) event_id = result.get("event_id", secrets.token_hex(8)) msg = Message( @@ -179,7 +179,8 @@ async def send_message( thread_id=thread_id, data=data, ) - if self._relay.on_message_sent: + # Don't fire hook for unsupported operations + if event_id != "unsupported_operation" and self._relay.on_message_sent: self._relay.on_message_sent(msg) return msg @@ -222,26 +223,14 @@ async def send_message( data: Optional[dict[str, Any]] = None, ) -> Message: client = await self._relay._ensure_started() - try: - result = await client.send_message( - to=to, - text=text, - from_=self._name, - thread_id=thread_id, - priority=priority, - data=data, - ) - except AgentRelayProtocolError as e: - if e.code == "unsupported_operation": - return Message( - event_id="unsupported_operation", - from_name=self._name, - to=to, - text=text, - thread_id=thread_id, - data=data, - ) - raise + result = await client.send_message( + to=to, + text=text, + from_=self._name, + thread_id=thread_id, + priority=priority, + data=data, + ) event_id = result.get("event_id", secrets.token_hex(8)) msg = Message( @@ -252,7 +241,8 @@ async def send_message( thread_id=thread_id, data=data, ) - if self._relay.on_message_sent: + # Don't fire hook for unsupported operations + if event_id != "unsupported_operation" and self._relay.on_message_sent: self._relay.on_message_sent(msg) return msg @@ -369,8 +359,8 @@ def __init__( self._exited_agents: set[str] = set() self._idle_agents: set[str] = set() self._output_listeners: dict[str, list[Callable[[str], None]]] = {} - self._exit_resolvers: dict[str, asyncio.Future[str]] = {} - self._idle_resolvers: dict[str, asyncio.Future[str]] = {} + self._exit_resolvers: dict[str, list[asyncio.Future[str]]] = {} + self._idle_resolvers: dict[str, list[asyncio.Future[str]]] = {} # Shorthand spawners self.codex = AgentSpawner("codex", "Codex", self) @@ -610,13 +600,15 @@ async def shutdown(self) -> None: self._idle_agents.clear() self._output_listeners.clear() - for future in self._exit_resolvers.values(): - if not future.done(): - future.set_result("released") + for futures in self._exit_resolvers.values(): + for future in futures: + if not future.done(): + future.set_result("released") self._exit_resolvers.clear() - for future in self._idle_resolvers.values(): - if not future.done(): - future.set_result("exited") + for futures in self._idle_resolvers.values(): + for future in futures: + if not future.done(): + future.set_result("exited") self._idle_resolvers.clear() # ── Private helpers ─────────────────────────────────────────────────── @@ -670,12 +662,12 @@ def on_event(event: BrokerEvent) -> None: self.on_agent_released(agent) self._known_agents.pop(name, None) self._output_listeners.pop(name, None) - future = self._exit_resolvers.pop(name, None) - if future and not future.done(): - future.set_result("released") - idle_future = self._idle_resolvers.pop(name, None) - if idle_future and not idle_future.done(): - idle_future.set_result("exited") + for future in self._exit_resolvers.pop(name, []): + if not future.done(): + future.set_result("released") + for future in self._idle_resolvers.pop(name, []): + if not future.done(): + future.set_result("exited") elif kind == "agent_exited": agent = self._known_agents.get(name) or self._ensure_agent_handle(name) @@ -689,12 +681,12 @@ def on_event(event: BrokerEvent) -> None: self.on_agent_exited(agent) self._known_agents.pop(name, None) self._output_listeners.pop(name, None) - future = self._exit_resolvers.pop(name, None) - if future and not future.done(): - future.set_result("exited") - idle_future = self._idle_resolvers.pop(name, None) - if idle_future and not idle_future.done(): - idle_future.set_result("exited") + for future in self._exit_resolvers.pop(name, []): + if not future.done(): + future.set_result("exited") + for future in self._idle_resolvers.pop(name, []): + if not future.done(): + future.set_result("exited") elif kind == "agent_exit": agent = self._known_agents.get(name) or self._ensure_agent_handle(name) @@ -730,9 +722,9 @@ def on_event(event: BrokerEvent) -> None: "name": name, "idle_secs": event.get("idle_secs", 0), }) - idle_future = self._idle_resolvers.pop(name, None) - if idle_future and not idle_future.done(): - idle_future.set_result("idle") + for future in self._idle_resolvers.pop(name, []): + if not future.done(): + future.set_result("idle") # Delivery events if kind and kind.startswith("delivery_"): From 61589de518731014bf2efc1440dff6e9f7f639d1 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Mon, 2 Mar 2026 16:27:23 +0100 Subject: [PATCH 6/9] docs: add Python SDK to docs site - Add Python install/examples to introduction and quickstart using CodeGroup tabs - Create full Python SDK reference page (reference/sdk-py.mdx) - Add sdk-py to mint.json navigation - Cross-link between TypeScript and Python SDK reference pages Co-Authored-By: Claude Opus 4.6 --- docs/introduction.mdx | 17 ++- docs/mint.json | 3 +- docs/quickstart.mdx | 61 ++++++++- docs/reference/sdk-py.mdx | 266 ++++++++++++++++++++++++++++++++++++++ docs/reference/sdk.mdx | 5 +- 5 files changed, 340 insertions(+), 12 deletions(-) create mode 100644 docs/reference/sdk-py.mdx diff --git a/docs/introduction.mdx b/docs/introduction.mdx index 5ee2eabde..67a0034a4 100644 --- a/docs/introduction.mdx +++ b/docs/introduction.mdx @@ -3,12 +3,18 @@ title: 'Introduction' description: 'Programmatically spawn and coordinate AI agents from TypeScript or Python.' --- -The `@agent-relay/sdk` lets you spawn AI agents (Claude, Codex) and coordinate them from code — send messages between agents, listen for responses, and shut them down when done. +The Agent Relay SDK lets you spawn AI agents (Claude, Codex) and coordinate them from code — send messages between agents, listen for responses, and shut them down when done. Available for both TypeScript and Python. -```bash + +```bash TypeScript npm install @agent-relay/sdk ``` +```bash Python +pip install agent-relay-sdk +``` + + ## What You Can Do @@ -32,7 +38,10 @@ npm install @agent-relay/sdk Get your first agents talking to each other in minutes. - - Full API reference for AgentRelay, spawn, messaging, and more. + + Full API reference for the TypeScript SDK. + + + Full API reference for the Python SDK. diff --git a/docs/mint.json b/docs/mint.json index 09fb48f7b..0207c56c3 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -34,7 +34,8 @@ { "group": "SDK", "pages": [ - "reference/sdk" + "reference/sdk", + "reference/sdk-py" ] } ], diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index e4ee85d60..032802e7e 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -5,13 +5,20 @@ description: 'Spawn your first agents and send messages between them.' ## Install -```bash + +```bash TypeScript npm install @agent-relay/sdk ``` +```bash Python +pip install agent-relay-sdk +``` + + ## Spawn Agents and Send a Message -```typescript + +```typescript TypeScript import { AgentRelay, Models } from '@agent-relay/sdk'; const relay = new AgentRelay(); @@ -38,6 +45,47 @@ await planner.sendMessage({ to: 'Coder', text: 'Implement the auth module' }); await relay.shutdown(); ``` +```python Python +import asyncio +from agent_relay import AgentRelay, Models + +async def main(): + relay = AgentRelay(channels=["dev"]) + + # Listen for messages + relay.on_message_received = lambda msg: print(f"[{msg.from_name}]: {msg.text}") + + # Spawn a planner (Claude) and a coder (Codex) + await relay.claude.spawn( + name="Planner", + model=Models.Claude.OPUS, + channels=["dev"], + task="Plan the feature implementation", + ) + + await relay.codex.spawn( + name="Coder", + model=Models.Codex.GPT_5_3_CODEX, + channels=["dev"], + task="Implement the plan", + ) + + # Wait for both agents to be ready + await asyncio.gather( + relay.wait_for_agent_ready("Planner"), + relay.wait_for_agent_ready("Coder"), + ) + + # Send a message from system to Coder + human = relay.system() + await human.send_message(to="Coder", text="Implement the auth module") + + await relay.shutdown() + +asyncio.run(main()) +``` + + ## Supported CLIs | CLI | Constant prefix | @@ -47,8 +95,11 @@ await relay.shutdown(); ## Next Steps - - - Complete API reference. + + + Complete TypeScript API reference. + + + Complete Python API reference. diff --git a/docs/reference/sdk-py.mdx b/docs/reference/sdk-py.mdx new file mode 100644 index 000000000..171593350 --- /dev/null +++ b/docs/reference/sdk-py.mdx @@ -0,0 +1,266 @@ +--- +title: Python SDK Reference +description: Complete reference for the agent-relay-sdk Python package +--- + +# Python SDK Reference + +```bash +pip install agent-relay-sdk +``` + +The SDK automatically downloads the broker binary on first use — no additional setup required. + +--- + +## AgentRelay + +The main entry point. Manages the broker lifecycle, spawns agents, and routes messages. + +```python +from agent_relay import AgentRelay + +relay = AgentRelay( + channels=["general"], # Default channels + binary_path=None, # Path to broker binary (auto-resolved) + binary_args=None, # Extra broker arguments + broker_name=None, # Broker instance name (auto-generated) + cwd=None, # Working directory (defaults to cwd) + env=None, # Environment variables (inherited) + request_timeout_ms=10_000, # Timeout for broker requests + shutdown_timeout_ms=3_000, # Timeout when shutting down +) +``` + +--- + +## Spawning Agents + +### Shorthand Spawners + +```python +# Spawn by CLI type +agent = await relay.claude.spawn(name="Analyst", model=Models.Claude.OPUS, channels=["dev"], task="...") +agent = await relay.codex.spawn(name="Coder", model=Models.Codex.GPT_5_3_CODEX, channels=["dev"], task="...") +agent = await relay.gemini.spawn(name="Researcher", model=Models.Gemini.GEMINI_2_5_PRO, channels=["dev"], task="...") +``` + +**Spawn keyword arguments:** + +| Parameter | Type | Description | +| ---------- | --------------- | --------------------------------- | +| `name` | `str` | Agent name (defaults to CLI name) | +| `model` | `str` | Model to use (see Models below) | +| `task` | `str` | Initial task / prompt | +| `channels` | `list[str]` | Channels to join | +| `args` | `list[str]` | Extra CLI arguments | +| `cwd` | `str` | Working directory override | + +### `relay.spawn(name, cli, task?, options?)` + +Spawn any CLI by name: + +```python +from agent_relay import SpawnOptions + +agent = await relay.spawn("Worker", "claude", "Help with refactoring", SpawnOptions( + model="sonnet", + channels=["team"], +)) +``` + +### `relay.spawn_and_wait(name, cli, task, options?)` + +Spawn and wait for the agent to be ready before returning: + +```python +agent = await relay.spawn_and_wait("Worker", "claude", "Analyze the codebase", timeout_ms=30_000) +``` + +--- + +## Agent + +All spawn methods return an `Agent`: + +```python +class Agent: + name: str # Agent name + runtime: str # "pty" | "headless" + channels: list[str] # Joined channels + status: str # "spawning" | "ready" | "idle" | "exited" + exit_code: int | None + exit_signal: str | None + exit_reason: str | None + + async def send_message(*, to: str, text: str, thread_id=None, priority=None, data=None) -> Message + async def release(reason=None) -> None + async def wait_for_ready(timeout_ms=60_000) -> None + async def wait_for_exit(timeout_ms=None) -> str # "exited" | "timeout" | "released" + async def wait_for_idle(timeout_ms=None) -> str # "idle" | "timeout" | "exited" + def on_output(callback) -> Callable[[], None] # returns unsubscribe +``` + +--- + +## Human Handles + +Send messages from a named human or system identity (not a spawned CLI agent): + +```python +# Named human +human = relay.human("Orchestrator") +await human.send_message(to="Worker", text="Start the task") + +# System identity (name: "system") +sys = relay.system() +await sys.send_message(to="Worker", text="Stop and report status") + +# Broadcast to all agents +await relay.broadcast("All hands: stand by for new task") +``` + +--- + +## Event Hooks + +Assign a callable to subscribe, `None` to unsubscribe: + +```python +relay.on_message_received = lambda msg: ... # New message +relay.on_message_sent = lambda msg: ... # Message sent +relay.on_agent_spawned = lambda agent: ... # Agent spawned +relay.on_agent_released = lambda agent: ... # Agent released +relay.on_agent_exited = lambda agent: ... # Agent exited +relay.on_agent_ready = lambda agent: ... # Agent ready +relay.on_agent_idle = lambda data: ... # Agent idle (data: {name, idle_secs}) +relay.on_agent_exit_requested = lambda data: ... # Exit requested (data: {name, reason}) +relay.on_worker_output = lambda data: ... # Output (data: {name, stream, chunk}) +relay.on_delivery_update = lambda event: ... # Delivery status update +``` + +**Message type:** + +```python +@dataclass +class Message: + event_id: str + from_name: str + to: str + text: str + thread_id: str | None = None + data: dict | None = None +``` + +--- + +## Other Methods + +```python +# List all known agents +agents = await relay.list_agents() # list[Agent] + +# Get broker status +status = await relay.get_status() # dict + +# Wait for the first of many agents to exit +agent, result = await AgentRelay.wait_for_any([agent1, agent2], timeout_ms=60_000) + +# Shut down all agents and the broker +await relay.shutdown() +``` + +--- + +## Complete Example + +```python +import asyncio +from agent_relay import AgentRelay, Models + +async def main(): + relay = AgentRelay(channels=["dev"]) + + relay.on_message_received = lambda msg: print(f"[{msg.from_name}]: {msg.text}") + relay.on_agent_ready = lambda agent: print(f" ✓ {agent.name} ready") + relay.on_agent_exited = lambda agent: print(f" ✗ {agent.name} exited") + + # Spawn agents + await relay.claude.spawn( + name="Planner", + model=Models.Claude.OPUS, + channels=["dev"], + task="Plan the feature implementation", + ) + + await relay.codex.spawn( + name="Coder", + model=Models.Codex.GPT_5_3_CODEX, + channels=["dev"], + task="Implement the plan", + ) + + # Wait for both to be ready + await asyncio.gather( + relay.wait_for_agent_ready("Planner"), + relay.wait_for_agent_ready("Coder"), + ) + + # Send a message + human = relay.system() + await human.send_message(to="Coder", text="Start implementing the auth module") + + # Let agents collaborate + await asyncio.sleep(600) + await relay.shutdown() + +asyncio.run(main()) +``` + +--- + +## Models + +```python +from agent_relay import Models + +# Claude +Models.Claude.OPUS # "opus" +Models.Claude.SONNET # "sonnet" +Models.Claude.HAIKU # "haiku" + +# Codex +Models.Codex.GPT_5_3_CODEX # "gpt-5.3-codex" +Models.Codex.GPT_5_2_CODEX # "gpt-5.2-codex" +Models.Codex.GPT_5_3_CODEX_SPARK # "gpt-5.3-codex-spark" +Models.Codex.GPT_5_1_CODEX_MAX # "gpt-5.1-codex-max" +Models.Codex.GPT_5_1_CODEX_MINI # "gpt-5.1-codex-mini" + +# Gemini +Models.Gemini.GEMINI_2_5_PRO # "gemini-2.5-pro" +Models.Gemini.GEMINI_2_5_FLASH # "gemini-2.5-flash" +``` + +--- + +## Error Types + +```python +from agent_relay import AgentRelayProtocolError, AgentRelayProcessError + +try: + await relay.claude.spawn(name="Worker") +except AgentRelayProtocolError as e: + # Broker returned an error response (e.code available) + pass +except AgentRelayProcessError as e: + # Broker process failed to start or crashed + pass +``` + +--- + +## See Also + +- [Quickstart](/quickstart) — Spawn agents and exchange messages quickly +- [TypeScript SDK Reference](/reference/sdk) — TypeScript API reference diff --git a/docs/reference/sdk.mdx b/docs/reference/sdk.mdx index 3a34187d5..ffdd3ece9 100644 --- a/docs/reference/sdk.mdx +++ b/docs/reference/sdk.mdx @@ -1,9 +1,9 @@ --- -title: SDK Reference +title: TypeScript SDK Reference description: Complete reference for the @agent-relay/sdk package --- -# SDK Reference +# TypeScript SDK Reference ```bash npm install @agent-relay/sdk @@ -285,3 +285,4 @@ try { ## See Also - [Quickstart](/quickstart) — Spawn agents and exchange messages quickly +- [Python SDK Reference](/reference/sdk-py) — Python API reference From e2cd6a1fea25f61386ce6446124efce67f9385a9 Mon Sep 17 00:00:00 2001 From: Agent Relay Date: Mon, 2 Mar 2026 16:26:11 +0000 Subject: [PATCH 7/9] fix(sdk-py): handle agent_released event in wait_for_agent_message Address Devin review comment - the Python SDK's wait_for_agent_message was missing handling for the agent_released event. If an agent was released before sending its first message, the method would hang until timeout instead of immediately rejecting with a clear error. This brings parity with the TypeScript SDK which handles all three cases: relay_inbound, agent_exited, and agent_released. Co-Authored-By: Claude Opus 4.5 --- packages/sdk-py/src/agent_relay/relay.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/sdk-py/src/agent_relay/relay.py b/packages/sdk-py/src/agent_relay/relay.py index 9a71abb8c..0a40f9794 100644 --- a/packages/sdk-py/src/agent_relay/relay.py +++ b/packages/sdk-py/src/agent_relay/relay.py @@ -549,6 +549,10 @@ def on_event(event: BrokerEvent) -> None: future.set_exception( RuntimeError(f"Agent '{name}' exited before sending its first relay message") ) + elif event.get("kind") == "agent_released" and event.get("name") == name: + future.set_exception( + RuntimeError(f"Agent '{name}' was released before sending its first relay message") + ) unsub = client.on_event(on_event) try: From 5e3b07cddacbdd74263915ea82d4b3a68cc7f0e0 Mon Sep 17 00:00:00 2001 From: Agent Relay Date: Mon, 2 Mar 2026 16:42:07 +0000 Subject: [PATCH 8/9] fix(sdk-py): inject RELAY_API_KEY into custom env dict Address Devin review comment - when user provides a custom env dict to AgentRelay(env={...}), RELAY_API_KEY from os.environ was not being injected. This would silently break Relaycast workspace connection. Now matches TypeScript SDK behavior at packages/sdk/src/relay.ts:790-806 which explicitly handles injecting RELAY_API_KEY into custom env dicts. Co-Authored-By: Claude Opus 4.5 --- packages/sdk-py/src/agent_relay/relay.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/sdk-py/src/agent_relay/relay.py b/packages/sdk-py/src/agent_relay/relay.py index 0a40f9794..0a27fe8df 100644 --- a/packages/sdk-py/src/agent_relay/relay.py +++ b/packages/sdk-py/src/agent_relay/relay.py @@ -388,6 +388,11 @@ async def _ensure_started(self) -> AgentRelayClient: self._client_kwargs["env"] = {**os.environ, "RELAY_API_KEY": env_key} else: self._client_kwargs["env"] = dict(os.environ) + else: + # Inject RELAY_API_KEY into custom env if not already present + env_key = os.environ.get("RELAY_API_KEY") + if env_key and "RELAY_API_KEY" not in env: + env["RELAY_API_KEY"] = env_key # Remove None values to use defaults kwargs = {k: v for k, v in self._client_kwargs.items() if v is not None} From 6cb734bf1f3595491b7220be8f5d727b09ccea61 Mon Sep 17 00:00:00 2001 From: Agent Relay Date: Mon, 2 Mar 2026 16:52:40 +0000 Subject: [PATCH 9/9] docs(sdk-py): fix pip package name in README Changed `pip install agent-relay` to `pip install agent-relay-sdk` to match the package name in pyproject.toml. Co-Authored-By: Claude Opus 4.5 --- packages/sdk-py/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/sdk-py/README.md b/packages/sdk-py/README.md index 5b72039b2..5fd36fedd 100644 --- a/packages/sdk-py/README.md +++ b/packages/sdk-py/README.md @@ -5,7 +5,7 @@ Python SDK for real-time agent-to-agent communication. Spawn AI agents, send mes ## Installation ```bash -pip install agent-relay +pip install agent-relay-sdk ``` The SDK automatically downloads the broker binary on first use.