Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions agent_instructions/adding_provider.md
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,14 @@ providers without a native equivalent — your provider doesn't need to know abo
`function_call` / `function_call_output` items in `payload.input` to the provider's
tool-use format and back. The server-tool loop will not work otherwise. See
`providers/{anthropic,bedrock,vertex}/convert.rs` for working examples.
- **Shell history rewrite** — a `previous_response_id` continuation replays the prior turn's
*stored* output, which uses the client-facing hosted-shell shapes (`shell_call` /
`shell_call_output`, where `output` is an **array** of `{stdout, stderr, outcome}`). In
function mode `preprocess_shell_tools` rewrites those reconstructed items back to
`function_call` / `function_call_output` (string `output`) before your provider sees them,
so `convert.rs` only ever needs to handle the function-call shapes — never `ShellCall` /
`ShellCallOutput` in `payload.input`. Forwarding the array `output` verbatim makes
OpenAI-compatible upstreams 400 (`output`: expected string, received array).
- **Container files** — when the server tool returns a `container_file_citation` annotation,
it shows up on a `response.content_part.done` event. You don't need to handle this in your
convert layer; the shell executor injects it on the way out via `transform_event`.
Expand Down
8 changes: 8 additions & 0 deletions src/services/responses_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ fn input_to_items(input: ResponsesInput) -> Vec<ResponsesInputItem> {
/// Map a prior turn's output item to the equivalent input item so it can be
/// replayed as conversation history. The inner payloads are identical between
/// the two enums, so this is a total, lossless 1:1 mapping.
///
/// Note the hosted-shell items (`ShellCall` / `ShellCallOutput`) are replayed
/// verbatim here — the array-`output` shape they carry is only valid for
/// native OpenAI passthrough. In function mode `preprocess_shell_tools`
/// (`services/shell_tool.rs`, run per provider in `routes/execution.rs`)
/// rewrites them to `function_call` / `function_call_output` before dispatch,
/// since that's the mode-aware layer that knows whether the shell tool stayed
/// native or was rewritten to a function.
fn output_item_to_input(item: ResponsesOutputItem) -> ResponsesInputItem {
match item {
ResponsesOutputItem::Message(m) => ResponsesInputItem::OutputMessage(m),
Expand Down
184 changes: 181 additions & 3 deletions src/services/shell_tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ use uuid::Uuid;
use crate::{
api_types::responses::{
ContainerFileRef, CreateResponsesPayload, FunctionCallOutput, FunctionCallOutputType,
FunctionTool, KnownShellNetworkPolicyType, ResponsesAnnotation, ResponsesInput,
ResponsesInputItem, ResponsesToolDefinition, ShellDomainSecret, ShellEnvironment,
ShellNetworkPolicyType,
FunctionTool, KnownShellNetworkPolicyType, OutputItemFunctionCall,
OutputItemFunctionCallType, ResponsesAnnotation, ResponsesInput, ResponsesInputItem,
ResponsesToolDefinition, ShellCall, ShellCallOutcome, ShellCallOutputItem,
ShellDomainSecret, ShellEnvironment, ShellNetworkPolicyType,
},
config::{ContainersConfig, ShellLimitsConfig},
models::UsageLogEntry,
Expand Down Expand Up @@ -737,6 +738,19 @@ impl ShellToolArguments {
/// path. In OpenAI passthrough modes the native spec is left intact and
/// this is skipped.
pub fn preprocess_shell_tools(payload: &mut CreateResponsesPayload, hint: &ShellToolHint) {
// History items reconstructed from a `previous_response_id` chain carry
// the client-facing hosted-shell shapes (`shell_call` /
// `shell_call_output`, the latter with an *array* `output`). In function
// mode — the exact set of provider paths that call this — the model
// originally exchanged `function_call` / `function_call_output`, so
// replay those shapes instead. Forwarded verbatim the array `output`
// makes OpenAI-compatible upstreams reject the turn (`output`: expected
// string, received array) and Anthropic / Bedrock / Vertex silently drop
// the items, erasing the tool result from history. Runs before the tools
// early-return so a continuation that no longer re-declares the shell
// tool still gets its history normalized.
rewrite_shell_history_to_function_calls(payload);

let Some(tools) = payload.tools.as_mut() else {
return;
};
Expand All @@ -756,6 +770,98 @@ pub fn preprocess_shell_tools(payload: &mut CreateResponsesPayload, hint: &Shell
}
}

/// Rewrite reconstructed hosted-shell history items (`shell_call` /
/// `shell_call_output`) in `payload.input` into the `function_call` /
/// `function_call_output` pair the model exchanged in function mode. See
/// [`preprocess_shell_tools`] for why. The two items stay paired by their
/// shared `call_id`, so the upstream still matches the call to its result.
fn rewrite_shell_history_to_function_calls(payload: &mut CreateResponsesPayload) {
let Some(ResponsesInput::Items(items)) = payload.input.as_mut() else {
return;
};
for item in items.iter_mut() {
let rewritten = match item {
ResponsesInputItem::ShellCall(call) => Some(ResponsesInputItem::OutputFunctionCall(
shell_call_to_function_call(call),
)),
ResponsesInputItem::ShellCallOutput(output) => Some(
ResponsesInputItem::FunctionCallOutput(shell_output_to_function_output(output)),
),
_ => None,
};
if let Some(rewritten) = rewritten {
*item = rewritten;
}
}
}

/// Reconstruct the model-emitted `function_call` from a stored `shell_call`.
/// The arguments mirror the `shell` function-tool schema
/// (`{ "action": { … } }`). `id` is left unset: the stored `shell_call`
/// reuses the model's `call_id` as its `id` (not a valid `fc_…` output id),
/// and the API assigns item ids on output anyway — pairing is by `call_id`.
fn shell_call_to_function_call(call: &ShellCall) -> OutputItemFunctionCall {
let arguments = serde_json::to_string(&serde_json::json!({ "action": call.action }))
.unwrap_or_else(|_| "{}".to_string());
OutputItemFunctionCall {
type_: OutputItemFunctionCallType::FunctionCall,
id: None,
name: ShellToolArguments::FUNCTION_NAME.to_string(),
arguments,
call_id: call.call_id.clone(),
status: None,
}
}

/// Reconstruct the `function_call_output` from a stored `shell_call_output`,
/// flattening the array `output` into the plain-text `exit_code/stdout/stderr`
/// blob the live tool loop feeds back to the model (see the continuation item
/// built in `ShellExecutor::execute`).
fn shell_output_to_function_output(output: &ShellCallOutputItem) -> FunctionCallOutput {
FunctionCallOutput {
type_: FunctionCallOutputType::FunctionCallOutput,
id: None,
call_id: output.call_id.clone(),
output: render_shell_output_text(output),
status: None,
}
}

/// Flatten a `shell_call_output`'s content chunks (and any captured
/// `output_files` manifest) into the text result the model saw on the
/// original turn.
fn render_shell_output_text(item: &ShellCallOutputItem) -> String {
let mut combined = item
.output
.iter()
.map(|chunk| {
let exit_code = match &chunk.outcome {
ShellCallOutcome::Exit { exit_code } => *exit_code,
// Canonical `timeout(1)` exit code; matches how the live
// loop reports a killed call.
ShellCallOutcome::Timeout => 124,
};
format!(
"exit_code: {}\nstdout:\n{}\nstderr:\n{}",
exit_code, chunk.stdout, chunk.stderr
)
})
.collect::<Vec<_>>()
.join("\n");
if !item.output_files.is_empty() {
// Only separate from the chunk text when there is some; an empty
// `output` array (no chunks) would otherwise leave a leading newline.
if !combined.is_empty() {
combined.push('\n');
}
combined.push_str("output_files:\n");
for f in &item.output_files {
combined.push_str(&format!("- {} ({} bytes)\n", f.path, f.bytes));
}
}
combined
Comment thread
greptile-apps[bot] marked this conversation as resolved.
}

// ─────────────────────────────────────────────────────────────────────────────
// Detection
// ─────────────────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -2899,6 +3005,78 @@ mod tests {
);
}

#[test]
fn preprocess_rewrites_shell_history_to_function_calls() {
// A continuation reconstructed from `previous_response_id` replays
// the prior turn's hosted-shell items (`shell_call` /
// `shell_call_output`, the latter with an *array* `output`). In
// function mode these must become `function_call` /
// `function_call_output` so upstreams that demand a string `output`
// don't reject the turn.
let payload_json = serde_json::json!({
"tools": [{"type": "shell"}],
"stream": false,
"input": [
{"role": "user", "content": "Run `echo hi`"},
{"type": "shell_call", "id": "call_abc", "call_id": "call_abc",
"status": "completed",
"action": {"commands": ["echo hi"]}},
{"type": "shell_call_output", "id": "call_abc", "call_id": "call_abc",
"status": "completed",
"output": [{"stdout": "hi\n", "stderr": "",
"outcome": {"type": "exit", "exit_code": 0}}]},
],
});
let mut payload: CreateResponsesPayload = serde_json::from_value(payload_json).unwrap();
preprocess_shell_tools(&mut payload, &ShellToolHint::default());

let Some(ResponsesInput::Items(items)) = payload.input.as_ref() else {
panic!("expected items input");
};
assert_eq!(items.len(), 3);
// The user message is preserved as-is. A bare-string `content`
// with no `type` deserializes to the `EasyMessage` variant.
assert!(
matches!(items[0], ResponsesInputItem::EasyMessage(_)),
"expected user message at index 0, got {:?}",
items[0]
);
// shell_call → function_call with reconstructed `shell` arguments,
// pairing preserved via call_id.
let ResponsesInputItem::OutputFunctionCall(call) = &items[1] else {
panic!(
"expected shell_call rewritten to function_call: {:?}",
items[1]
);
};
assert_eq!(call.name, "shell");
assert_eq!(call.call_id, "call_abc");
assert!(call.id.is_none(), "must omit the non-fc_ shell id");
assert!(
call.arguments.contains("echo hi"),
"arguments should carry the action: {}",
call.arguments
);
// shell_call_output → function_call_output with a *string* output.
let ResponsesInputItem::FunctionCallOutput(out) = &items[2] else {
panic!(
"expected shell_call_output rewritten to function_call_output: {:?}",
items[2]
);
};
assert_eq!(out.call_id, "call_abc");
assert!(
out.output.contains("exit_code: 0") && out.output.contains("hi"),
"output should flatten to text: {}",
out.output
);
// No stray hosted-shell items survive to trip the upstream validator.
assert!(!items.iter().any(|i| matches!(
i,
ResponsesInputItem::ShellCall(_) | ResponsesInputItem::ShellCallOutput(_)
)));
}

#[test]
fn shell_hint_describes_client_passthrough() {
let hint = ShellToolHint {
Expand Down
Loading