OpenHands · enyst · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/docs.json b/docs.json
@@ -195,7 +195,8 @@
                   "sdk/guides/llm-registry",
                   "sdk/guides/llm-routing",
                   "sdk/guides/llm-reasoning",
-                  "sdk/guides/llm-image-input"
+                  "sdk/guides/llm-image-input",
+                  "sdk/guides/llm-responses-streaming"
                 ]
               },
               {
@@ -349,4 +350,4 @@
     { "source": "/openhands/usage/prompting/prompting-best-practices", "destination": "/openhands/usage/tips/prompting-best-practices" },
     { "source": "/openhands/usage/feedback", "destination": "/openhands/usage/troubleshooting/feedback" }
   ]
-}
+}
@@ -23,7 +23,7 @@ Usage:
   uv run examples/24_remote_convo_with_api_sandboxed_server.py
 
 Requirements:
-  - LITELLM_API_KEY: API key for LLM access
+  - LLM_API_KEY: API key for LLM access
   - RUNTIME_API_KEY: API key for runtime API access
 """
 
@@ -45,13 +45,13 @@ from openhands.workspace import APIRemoteWorkspace
 logger = get_logger(__name__)
 
 
-api_key = os.getenv("LITELLM_API_KEY")
-assert api_key, "LITELLM_API_KEY required"
+api_key = os.getenv("LLM_API_KEY")
+assert api_key, "LLM_API_KEY required"
 
 llm = LLM(
     usage_id="agent",
     model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=os.getenv("LLM_BASE_URL"),
     api_key=SecretStr(api_key),
 )
 
@@ -62,7 +62,7 @@ if not runtime_api_key:
 
 
 with APIRemoteWorkspace(
-    runtime_api_url="https://runtime.eval.all-hands.dev",
+    runtime_api_url=os.getenv("RUNTIME_API_URL", "https://runtime.eval.all-hands.dev"),
     runtime_api_key=runtime_api_key,
     server_image="ghcr.io/openhands/agent-server:main-python",
 ) as workspace:

@@ -44,7 +44,7 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set."
 llm = LLM(
     usage_id="agent",
     model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=os.getenv("LLM_BASE_URL"),
     api_key=SecretStr(api_key),
 )
 
@@ -258,19 +258,30 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set."
 llm = LLM(
     usage_id="agent",
     model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=os.getenv("LLM_BASE_URL"),
     api_key=SecretStr(api_key),
 )
 
 # Create a Docker-based remote workspace with extra ports for VSCode access
+
+
+def detect_platform():
+    """Detects the correct Docker platform string."""
+    import platform
+
+    machine = platform.machine().lower()
+    if "arm" in machine or "aarch64" in machine:
+        return "linux/arm64"
+    return "linux/amd64"
+
+
 with DockerWorkspace(
     base_image="nikolaik/python-nodejs:python3.12-nodejs22",
     host_port=18010,
-    # TODO: Change this to your platform if not linux/arm64
-    platform="linux/arm64",
+    platform=detect_platform(),
     extra_ports=True,  # Expose extra ports for VSCode and VNC
 ) as workspace:
-    """Extra ports allows you to access VSCode at localhost:8011"""
+    """Extra ports allows you to access VSCode at localhost:18011"""
 
     # Create agent
     agent = get_default_agent(
@@ -441,7 +452,7 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set."
 llm = LLM(
     usage_id="agent",
     model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=os.getenv("LLM_BASE_URL"),
     api_key=SecretStr(api_key),
 )
 
@@ -458,7 +469,6 @@ def detect_platform():
 with DockerWorkspace(
     base_image="nikolaik/python-nodejs:python3.12-nodejs22",
     host_port=8010,
-    # TODO: Change this to your platform if not linux/arm64
     platform=detect_platform(),
     extra_ports=True,  # Expose extra ports for VSCode and VNC
 ) as workspace:
@@ -492,7 +502,7 @@ with DockerWorkspace(
     logger.info(f"\n📋 Conversation ID: {conversation.state.id}")
     logger.info("📝 Sending first message...")
     conversation.send_message(
-        "Could you go to https://all-hands.dev/ blog page and summarize main "
+        "Could you go to https://openhands.dev/ blog page and summarize main "
         "points of the latest blog?"
     )
     conversation.run()

@@ -139,13 +139,13 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set."
 llm = LLM(
     usage_id="agent",
     model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=os.getenv("LLM_BASE_URL"),
     api_key=SecretStr(api_key),
 )
 title_gen_llm = LLM(
     usage_id="title-gen-llm",
     model="litellm_proxy/openai/gpt-5-mini",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=os.getenv("LLM_BASE_URL"),
     api_key=SecretStr(api_key),
 )
 

@@ -115,7 +115,7 @@ class GrepExecutor(ToolExecutor[GrepAction, GrepObservation]):
     def __init__(self, bash: BashExecutor):
         self.bash: BashExecutor = bash
 
-    def __call__(self, action: GrepAction) -> GrepObservation:
+    def __call__(self, action: GrepAction, conversation=None) -> GrepObservation:  # noqa: ARG002
         root = os.path.abspath(action.path)
         pat = shlex.quote(action.pattern)
         root_q = shlex.quote(root)

@@ -0,0 +1,62 @@
+---
+title: Responses Streaming
+description: Stream token deltas from the OpenAI Responses API path via LiteLLM.
+---
+
+<Note>
+This example is available on GitHub: [examples/01_standalone_sdk/24_responses_streaming.py](https://github.com/All-Hands-AI/agent-sdk/blob/main/examples/01_standalone_sdk/24_responses_streaming.py)
+</Note>
+
+Enable live token streaming when using the OpenAI Responses API path. This guide shows how to:
+
+- Subscribe to streaming deltas from the model
+- Log streamed chunks to a JSONL file
+- Optionally render streaming visually or print deltas to stdout
+
+```python icon="python" expandable examples/01_standalone_sdk/24_responses_streaming.py
+```
+
+```bash Running the Example
+export LLM_API_KEY="your-openai-compatible-api-key"
+# Optional overrides
+# export LLM_MODEL="openhands/gpt-5-codex"
+# export LLM_BASE_URL="https://your-litellm-or-provider-base-url"
+
+cd agent-sdk
+uv run python examples/01_standalone_sdk/24_responses_streaming.py
+```
+
+### How It Works
+
+- Pass a token callback to Conversation to receive streaming chunks as they arrive:
+
+```python
+conversation = Conversation(
+    agent=agent,
+    workspace=os.getcwd(),
+    token_callbacks=[on_token],
+)
+```
+
+- Each chunk contains a delta: `text_delta` for content tokens or `arguments_delta` for tool-call arguments. The example logs a serialized record per chunk to `./logs/stream/*.jsonl`.
+
+- For a visual live view, use the built-in streaming visualizer:
+
+```python
+from openhands.sdk.conversation.streaming_visualizer import create_streaming_visualizer
+
+visualizer = create_streaming_visualizer()
+conversation = Conversation(
+    agent=agent,
+    workspace=os.getcwd(),
+    token_callbacks=[on_token],
+    callbacks=[visualizer.on_event],
+    visualize=False,
+)
+```
+
+## Next Steps
+
+- **[Reasoning (Responses API)](/sdk/guides/llm-reasoning)** – Access model reasoning traces
+- **[LLM Routing](/sdk/guides/llm-routing)** – Route requests to different models
+- **[Image Input](/sdk/guides/llm-image-input)** – Send images to multimodal models
@@ -48,7 +48,7 @@ primary_llm = LLM(
 secondary_llm = LLM(
     usage_id="agent-secondary",
     model="litellm_proxy/mistral/devstral-small-2507",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=base_url,
     api_key=SecretStr(api_key),
 )
 multimodal_router = MultimodalRouter(

@@ -332,7 +332,7 @@ conversation.run()
 second_llm = LLM(
     usage_id="demo-secondary",
     model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929",
-    base_url="https://llm-proxy.eval.all-hands.dev",
+    base_url=os.getenv("LLM_BASE_URL"),
     api_key=SecretStr(api_key),
 )
 conversation.llm_registry.add(second_llm)