diff --git a/agent_instructions/adding_provider.md b/agent_instructions/adding_provider.md index c017874b..d75bfca5 100644 --- a/agent_instructions/adding_provider.md +++ b/agent_instructions/adding_provider.md @@ -681,6 +681,14 @@ providers without a native equivalent — your provider doesn't need to know abo `function_call` / `function_call_output` items in `payload.input` to the provider's tool-use format and back. The server-tool loop will not work otherwise. See `providers/{anthropic,bedrock,vertex}/convert.rs` for working examples. +- **Shell history rewrite** — a `previous_response_id` continuation replays the prior turn's + *stored* output, which uses the client-facing hosted-shell shapes (`shell_call` / + `shell_call_output`, where `output` is an **array** of `{stdout, stderr, outcome}`). In + function mode `preprocess_shell_tools` rewrites those reconstructed items back to + `function_call` / `function_call_output` (string `output`) before your provider sees them, + so `convert.rs` only ever needs to handle the function-call shapes — never `ShellCall` / + `ShellCallOutput` in `payload.input`. Forwarding the array `output` verbatim makes + OpenAI-compatible upstreams 400 (`output`: expected string, received array). - **Container files** — when the server tool returns a `container_file_citation` annotation, it shows up on a `response.content_part.done` event. You don't need to handle this in your convert layer; the shell executor injects it on the way out via `transform_event`. diff --git a/src/services/responses_chain.rs b/src/services/responses_chain.rs index 464de91d..4269e81e 100644 --- a/src/services/responses_chain.rs +++ b/src/services/responses_chain.rs @@ -76,6 +76,14 @@ fn input_to_items(input: ResponsesInput) -> Vec { /// Map a prior turn's output item to the equivalent input item so it can be /// replayed as conversation history. The inner payloads are identical between /// the two enums, so this is a total, lossless 1:1 mapping. +/// +/// Note the hosted-shell items (`ShellCall` / `ShellCallOutput`) are replayed +/// verbatim here — the array-`output` shape they carry is only valid for +/// native OpenAI passthrough. In function mode `preprocess_shell_tools` +/// (`services/shell_tool.rs`, run per provider in `routes/execution.rs`) +/// rewrites them to `function_call` / `function_call_output` before dispatch, +/// since that's the mode-aware layer that knows whether the shell tool stayed +/// native or was rewritten to a function. fn output_item_to_input(item: ResponsesOutputItem) -> ResponsesInputItem { match item { ResponsesOutputItem::Message(m) => ResponsesInputItem::OutputMessage(m), diff --git a/src/services/shell_tool.rs b/src/services/shell_tool.rs index 0fd5acb8..7a216bdf 100644 --- a/src/services/shell_tool.rs +++ b/src/services/shell_tool.rs @@ -31,9 +31,10 @@ use uuid::Uuid; use crate::{ api_types::responses::{ ContainerFileRef, CreateResponsesPayload, FunctionCallOutput, FunctionCallOutputType, - FunctionTool, KnownShellNetworkPolicyType, ResponsesAnnotation, ResponsesInput, - ResponsesInputItem, ResponsesToolDefinition, ShellDomainSecret, ShellEnvironment, - ShellNetworkPolicyType, + FunctionTool, KnownShellNetworkPolicyType, OutputItemFunctionCall, + OutputItemFunctionCallType, ResponsesAnnotation, ResponsesInput, ResponsesInputItem, + ResponsesToolDefinition, ShellCall, ShellCallOutcome, ShellCallOutputItem, + ShellDomainSecret, ShellEnvironment, ShellNetworkPolicyType, }, config::{ContainersConfig, ShellLimitsConfig}, models::UsageLogEntry, @@ -737,6 +738,19 @@ impl ShellToolArguments { /// path. In OpenAI passthrough modes the native spec is left intact and /// this is skipped. pub fn preprocess_shell_tools(payload: &mut CreateResponsesPayload, hint: &ShellToolHint) { + // History items reconstructed from a `previous_response_id` chain carry + // the client-facing hosted-shell shapes (`shell_call` / + // `shell_call_output`, the latter with an *array* `output`). In function + // mode — the exact set of provider paths that call this — the model + // originally exchanged `function_call` / `function_call_output`, so + // replay those shapes instead. Forwarded verbatim the array `output` + // makes OpenAI-compatible upstreams reject the turn (`output`: expected + // string, received array) and Anthropic / Bedrock / Vertex silently drop + // the items, erasing the tool result from history. Runs before the tools + // early-return so a continuation that no longer re-declares the shell + // tool still gets its history normalized. + rewrite_shell_history_to_function_calls(payload); + let Some(tools) = payload.tools.as_mut() else { return; }; @@ -756,6 +770,98 @@ pub fn preprocess_shell_tools(payload: &mut CreateResponsesPayload, hint: &Shell } } +/// Rewrite reconstructed hosted-shell history items (`shell_call` / +/// `shell_call_output`) in `payload.input` into the `function_call` / +/// `function_call_output` pair the model exchanged in function mode. See +/// [`preprocess_shell_tools`] for why. The two items stay paired by their +/// shared `call_id`, so the upstream still matches the call to its result. +fn rewrite_shell_history_to_function_calls(payload: &mut CreateResponsesPayload) { + let Some(ResponsesInput::Items(items)) = payload.input.as_mut() else { + return; + }; + for item in items.iter_mut() { + let rewritten = match item { + ResponsesInputItem::ShellCall(call) => Some(ResponsesInputItem::OutputFunctionCall( + shell_call_to_function_call(call), + )), + ResponsesInputItem::ShellCallOutput(output) => Some( + ResponsesInputItem::FunctionCallOutput(shell_output_to_function_output(output)), + ), + _ => None, + }; + if let Some(rewritten) = rewritten { + *item = rewritten; + } + } +} + +/// Reconstruct the model-emitted `function_call` from a stored `shell_call`. +/// The arguments mirror the `shell` function-tool schema +/// (`{ "action": { … } }`). `id` is left unset: the stored `shell_call` +/// reuses the model's `call_id` as its `id` (not a valid `fc_…` output id), +/// and the API assigns item ids on output anyway — pairing is by `call_id`. +fn shell_call_to_function_call(call: &ShellCall) -> OutputItemFunctionCall { + let arguments = serde_json::to_string(&serde_json::json!({ "action": call.action })) + .unwrap_or_else(|_| "{}".to_string()); + OutputItemFunctionCall { + type_: OutputItemFunctionCallType::FunctionCall, + id: None, + name: ShellToolArguments::FUNCTION_NAME.to_string(), + arguments, + call_id: call.call_id.clone(), + status: None, + } +} + +/// Reconstruct the `function_call_output` from a stored `shell_call_output`, +/// flattening the array `output` into the plain-text `exit_code/stdout/stderr` +/// blob the live tool loop feeds back to the model (see the continuation item +/// built in `ShellExecutor::execute`). +fn shell_output_to_function_output(output: &ShellCallOutputItem) -> FunctionCallOutput { + FunctionCallOutput { + type_: FunctionCallOutputType::FunctionCallOutput, + id: None, + call_id: output.call_id.clone(), + output: render_shell_output_text(output), + status: None, + } +} + +/// Flatten a `shell_call_output`'s content chunks (and any captured +/// `output_files` manifest) into the text result the model saw on the +/// original turn. +fn render_shell_output_text(item: &ShellCallOutputItem) -> String { + let mut combined = item + .output + .iter() + .map(|chunk| { + let exit_code = match &chunk.outcome { + ShellCallOutcome::Exit { exit_code } => *exit_code, + // Canonical `timeout(1)` exit code; matches how the live + // loop reports a killed call. + ShellCallOutcome::Timeout => 124, + }; + format!( + "exit_code: {}\nstdout:\n{}\nstderr:\n{}", + exit_code, chunk.stdout, chunk.stderr + ) + }) + .collect::>() + .join("\n"); + if !item.output_files.is_empty() { + // Only separate from the chunk text when there is some; an empty + // `output` array (no chunks) would otherwise leave a leading newline. + if !combined.is_empty() { + combined.push('\n'); + } + combined.push_str("output_files:\n"); + for f in &item.output_files { + combined.push_str(&format!("- {} ({} bytes)\n", f.path, f.bytes)); + } + } + combined +} + // ───────────────────────────────────────────────────────────────────────────── // Detection // ───────────────────────────────────────────────────────────────────────────── @@ -2899,6 +3005,78 @@ mod tests { ); } + #[test] + fn preprocess_rewrites_shell_history_to_function_calls() { + // A continuation reconstructed from `previous_response_id` replays + // the prior turn's hosted-shell items (`shell_call` / + // `shell_call_output`, the latter with an *array* `output`). In + // function mode these must become `function_call` / + // `function_call_output` so upstreams that demand a string `output` + // don't reject the turn. + let payload_json = serde_json::json!({ + "tools": [{"type": "shell"}], + "stream": false, + "input": [ + {"role": "user", "content": "Run `echo hi`"}, + {"type": "shell_call", "id": "call_abc", "call_id": "call_abc", + "status": "completed", + "action": {"commands": ["echo hi"]}}, + {"type": "shell_call_output", "id": "call_abc", "call_id": "call_abc", + "status": "completed", + "output": [{"stdout": "hi\n", "stderr": "", + "outcome": {"type": "exit", "exit_code": 0}}]}, + ], + }); + let mut payload: CreateResponsesPayload = serde_json::from_value(payload_json).unwrap(); + preprocess_shell_tools(&mut payload, &ShellToolHint::default()); + + let Some(ResponsesInput::Items(items)) = payload.input.as_ref() else { + panic!("expected items input"); + }; + assert_eq!(items.len(), 3); + // The user message is preserved as-is. A bare-string `content` + // with no `type` deserializes to the `EasyMessage` variant. + assert!( + matches!(items[0], ResponsesInputItem::EasyMessage(_)), + "expected user message at index 0, got {:?}", + items[0] + ); + // shell_call → function_call with reconstructed `shell` arguments, + // pairing preserved via call_id. + let ResponsesInputItem::OutputFunctionCall(call) = &items[1] else { + panic!( + "expected shell_call rewritten to function_call: {:?}", + items[1] + ); + }; + assert_eq!(call.name, "shell"); + assert_eq!(call.call_id, "call_abc"); + assert!(call.id.is_none(), "must omit the non-fc_ shell id"); + assert!( + call.arguments.contains("echo hi"), + "arguments should carry the action: {}", + call.arguments + ); + // shell_call_output → function_call_output with a *string* output. + let ResponsesInputItem::FunctionCallOutput(out) = &items[2] else { + panic!( + "expected shell_call_output rewritten to function_call_output: {:?}", + items[2] + ); + }; + assert_eq!(out.call_id, "call_abc"); + assert!( + out.output.contains("exit_code: 0") && out.output.contains("hi"), + "output should flatten to text: {}", + out.output + ); + // No stray hosted-shell items survive to trip the upstream validator. + assert!(!items.iter().any(|i| matches!( + i, + ResponsesInputItem::ShellCall(_) | ResponsesInputItem::ShellCallOutput(_) + ))); + } + #[test] fn shell_hint_describes_client_passthrough() { let hint = ShellToolHint {