diff --git a/async-openai/README.md b/async-openai/README.md index 9b1fdcab..28271272 100644 --- a/async-openai/README.md +++ b/async-openai/README.md @@ -35,7 +35,7 @@ - [x] Models - [x] Moderations - [x] Organizations | Administration (partially implemented) - - [x] Realtime (Beta) (partially implemented) + - [x] Realtime GA (partially implemented) - [x] Responses (partially implemented) - [x] Uploads - [x] Videos @@ -65,7 +65,6 @@ $Env:OPENAI_API_KEY='sk-...' ## Realtime API Only types for Realtime API are implemented, and can be enabled with feature flag `realtime`. -These types were written before OpenAI released official specs. ## Image Generation Example @@ -179,8 +178,6 @@ To maintain quality of the project, a minimum of the following is a must for cod This project adheres to [Rust Code of Conduct](https://www.rust-lang.org/policies/code-of-conduct) ## Complimentary Crates - -- [openai-func-enums](https://github.com/frankfralick/openai-func-enums) provides procedural macros that make it easier to use this library with OpenAI API's tool calling feature. It also provides derive macros you can add to existing [clap](https://github.com/clap-rs/clap) application subcommands for natural language use of command line tools. It also supports openai's [parallel tool calls](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) and allows you to choose between running multiple tool calls concurrently or own their own OS threads. - [async-openai-wasm](https://github.com/ifsheldon/async-openai-wasm) provides WASM support. ## License diff --git a/async-openai/src/responses.rs b/async-openai/src/responses.rs index 9160b7be..223a5b1c 100644 --- a/async-openai/src/responses.rs +++ b/async-openai/src/responses.rs @@ -1,13 +1,15 @@ +use serde::Serialize; + use crate::{ config::Config, error::OpenAIError, - types::responses::{CreateResponse, Response, ResponseStream}, + types::responses::{ + CreateResponse, DeleteResponse, Response, ResponseItemList, ResponseStream, + TokenCountsBody, TokenCountsResource, + }, Client, }; -/// Given text input or a list of context items, the model will generate a response. -/// -/// Related guide: [Responses](https://platform.openai.com/docs/api-reference/responses) pub struct Responses<'c, C: Config> { client: &'c Client, } @@ -18,7 +20,15 @@ impl<'c, C: Config> Responses<'c, C> { Self { client } } - /// Creates a model response for the given input. + /// Creates a model response. Provide [text](https://platform.openai.com/docs/guides/text) or + /// [image](https://platform.openai.com/docs/guides/images) inputs to generate + /// [text](https://platform.openai.com/docs/guides/text) or + /// [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have the model call + /// your own [custom code](https://platform.openai.com/docs/guides/function-calling) or use + /// built-in [tools](https://platform.openai.com/docs/guides/tools) like + /// [web search](https://platform.openai.com/docs/guides/tools-web-search) + /// or [file search](https://platform.openai.com/docs/guides/tools-file-search) to use your own data + /// as input for the model's response. #[crate::byot( T0 = serde::Serialize, R = serde::de::DeserializeOwned @@ -52,4 +62,60 @@ impl<'c, C: Config> Responses<'c, C> { } Ok(self.client.post_stream("/responses", request).await) } + + /// Retrieves a model response with the given ID. + #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)] + pub async fn retrieve(&self, response_id: &str, query: &Q) -> Result + where + Q: Serialize + ?Sized, + { + self.client + .get_with_query(&format!("/responses/{}", response_id), &query) + .await + } + + /// Deletes a model response with the given ID. + #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)] + pub async fn delete(&self, response_id: &str) -> Result { + self.client + .delete(&format!("/responses/{}", response_id)) + .await + } + + /// Cancels a model response with the given ID. Only responses created with the + /// `background` parameter set to `true` can be cancelled. + /// [Learn more](https://platform.openai.com/docs/guides/background). + #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)] + pub async fn cancel(&self, response_id: &str) -> Result { + self.client + .post( + &format!("/responses/{}/cancel", response_id), + serde_json::json!({}), + ) + .await + } + + /// Returns a list of input items for a given response. + #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)] + pub async fn list_input_items( + &self, + response_id: &str, + query: &Q, + ) -> Result + where + Q: Serialize + ?Sized, + { + self.client + .get_with_query(&format!("/responses/{}/input_items", response_id), &query) + .await + } + + /// Get input token counts + #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)] + pub async fn get_input_token_counts( + &self, + request: TokenCountsBody, + ) -> Result { + self.client.post("/responses/input_tokens", request).await + } } diff --git a/async-openai/src/types/chat.rs b/async-openai/src/types/chat.rs index d9373db6..e519286d 100644 --- a/async-openai/src/types/chat.rs +++ b/async-openai/src/types/chat.rs @@ -504,9 +504,14 @@ pub struct ResponseFormatJsonSchema { /// The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. pub name: String, /// The schema for the response format, described as a JSON Schema object. + /// Learn how to build JSON schemas [here](https://json-schema.org/). #[serde(skip_serializing_if = "Option::is_none")] pub schema: Option, - /// Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. To learn more, read the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + /// Whether to enable strict schema adherence when generating the output. + /// If set to true, the model will always follow the exact schema defined + /// in the `schema` field. Only a subset of JSON Schema is supported when + /// `strict` is `true`. To learn more, read the [Structured Outputs + /// guide](https://platform.openai.com/docs/guides/structured-outputs). #[serde(skip_serializing_if = "Option::is_none")] pub strict: Option, } diff --git a/async-openai/src/types/impls.rs b/async-openai/src/types/impls.rs index b566dc7d..972c6043 100644 --- a/async-openai/src/types/impls.rs +++ b/async-openai/src/types/impls.rs @@ -14,7 +14,7 @@ use crate::{ use bytes::Bytes; use super::{ - responses::{CodeInterpreterContainer, Input, InputContent, Role as ResponsesRole}, + responses::{EasyInputContent, Role as ResponsesRole}, AddUploadPartRequest, AudioInput, AudioResponseFormat, ChatCompletionFunctionCall, ChatCompletionFunctions, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestDeveloperMessage, @@ -1047,50 +1047,26 @@ impl AsyncTryFrom for reqwest::multipart::Form { // end: types to multipart form -impl Default for Input { +impl Default for EasyInputContent { fn default() -> Self { Self::Text("".to_string()) } } -impl Default for InputContent { - fn default() -> Self { - Self::TextInput("".to_string()) - } -} - -impl From for Input { - fn from(value: String) -> Self { - Input::Text(value) - } -} - -impl From<&str> for Input { - fn from(value: &str) -> Self { - Input::Text(value.to_owned()) - } -} - impl Default for ResponsesRole { fn default() -> Self { Self::User } } -impl From for InputContent { +impl From for EasyInputContent { fn from(value: String) -> Self { - Self::TextInput(value) + Self::Text(value) } } -impl From<&str> for InputContent { +impl From<&str> for EasyInputContent { fn from(value: &str) -> Self { - Self::TextInput(value.to_owned()) - } -} - -impl Default for CodeInterpreterContainer { - fn default() -> Self { - CodeInterpreterContainer::Id("".to_string()) + Self::Text(value.to_owned()) } } diff --git a/async-openai/src/types/mcp.rs b/async-openai/src/types/mcp.rs new file mode 100644 index 00000000..fae078a1 --- /dev/null +++ b/async-openai/src/types/mcp.rs @@ -0,0 +1,137 @@ +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum McpToolConnectorId { + ConnectorDropbox, + ConnectorGmail, + ConnectorGooglecalendar, + ConnectorGoogledrive, + ConnectorMicrosoftteams, + ConnectorOutlookcalendar, + ConnectorOutlookemail, + ConnectorSharepoint, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq, Default)] +#[builder( + name = "MCPToolArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct MCPTool { + /// A label for this MCP server, used to identify it in tool calls. + pub server_label: String, + + /// List of allowed tool names or a filter object. + #[serde(skip_serializing_if = "Option::is_none")] + pub allowed_tools: Option, + + /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP + /// server URL or a service connector. Your application must handle the OAuth authorization + /// flow and provide the token here. + #[serde(skip_serializing_if = "Option::is_none")] + pub authorization: Option, + + /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or + /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + /// + /// Currently supported `connector_id` values are: + /// - Dropbox: `connector_dropbox` + /// - Gmail: `connector_gmail` + /// - Google Calendar: `connector_googlecalendar` + /// - Google Drive: `connector_googledrive` + /// - Microsoft Teams: `connector_microsoftteams` + /// - Outlook Calendar: `connector_outlookcalendar` + /// - Outlook Email: `connector_outlookemail` + /// - SharePoint: `connector_sharepoint` + #[serde(skip_serializing_if = "Option::is_none")] + pub connector_id: Option, + + /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes. + #[serde(skip_serializing_if = "Option::is_none")] + pub headers: Option, + + /// Specify which of the MCP server's tools require approval. + #[serde(skip_serializing_if = "Option::is_none")] + pub require_approval: Option, + + /// Optional description of the MCP server, used to provide more context. + #[serde(skip_serializing_if = "Option::is_none")] + pub server_description: Option, + + /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided. + #[serde(skip_serializing_if = "Option::is_none")] + pub server_url: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum MCPToolAllowedTools { + /// A string array of allowed tool names + List(Vec), + /// A filter object to specify which tools are allowed. + Filter(MCPToolFilter), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPToolFilter { + /// Indicates whether or not a tool modifies data or is read-only. + /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + /// it will match this filter. + #[serde(skip_serializing_if = "Option::is_none")] + pub read_only: Option, + /// List of allowed tool names. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_names: Option>, +} + +/// Approval policy or filter for MCP tools. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum MCPToolRequireApproval { + /// Specify which of the MCP server's tools require approval. Can be + /// `always`, `never`, or a filter object associated with tools + /// that require approval. + Filter(MCPToolApprovalFilter), + /// Specify a single approval policy for all tools. One of `always` or + /// `never`. When set to `always`, all tools will require approval. When + /// set to `never`, all tools will not require approval. + ApprovalSetting(MCPToolApprovalSetting), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum MCPToolApprovalSetting { + Always, + Never, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPToolApprovalFilter { + /// A list of tools that always require approval. + #[serde(skip_serializing_if = "Option::is_none")] + pub always: Option, + /// A list of tools that never require approval. + #[serde(skip_serializing_if = "Option::is_none")] + pub never: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPListToolsTool { + /// The JSON schema describing the tool's input. + pub input_schema: serde_json::Value, + /// The name of the tool. + pub name: String, + /// Additional annotations about the tool. + #[serde(skip_serializing_if = "Option::is_none")] + pub annotations: Option, + /// The description of the tool. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, +} diff --git a/async-openai/src/types/mod.rs b/async-openai/src/types/mod.rs index c1cd4cb5..c6474aa5 100644 --- a/async-openai/src/types/mod.rs +++ b/async-openai/src/types/mod.rs @@ -14,6 +14,7 @@ mod file; mod fine_tuning; mod image; mod invites; +mod mcp; mod message; mod model; mod moderation; @@ -46,6 +47,7 @@ pub use file::*; pub use fine_tuning::*; pub use image::*; pub use invites::*; +pub use mcp::*; pub use message::*; pub use model::*; pub use moderation::*; diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs index 87ff7010..0881b9b9 100644 --- a/async-openai/src/types/realtime/client_event.rs +++ b/async-openai/src/types/realtime/client_event.rs @@ -1,72 +1,91 @@ use serde::{Deserialize, Serialize}; use tokio_tungstenite::tungstenite::Message; -use super::{item::Item, session_resource::SessionResource}; +use crate::types::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session}; -#[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct SessionUpdateEvent { +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeClientEventSessionUpdate { /// Optional client-generated ID used to identify this event. + /// This is an arbitrary string that a client may assign. It will be passed + /// back if there is an error with the event, but the corresponding + /// `session.updated` event will not include it. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// Session configuration to update. - pub session: SessionResource, + /// Update the Realtime session. Choose either a realtime session or a transcription session. + pub session: Session, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct InputAudioBufferAppendEvent { +pub struct RealtimeClientEventInputAudioBufferAppend { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// Base64-encoded audio bytes. + /// Base64-encoded audio bytes. This must be in the format specified by + /// the `input_audio_format` field in the session configuration. pub audio: String, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct InputAudioBufferCommitEvent { +pub struct RealtimeClientEventInputAudioBufferCommit { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct InputAudioBufferClearEvent { +pub struct RealtimeClientEventInputAudioBufferClear { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemCreateEvent { +pub struct RealtimeClientEventConversationItemCreate { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, /// The ID of the preceding item after which the new item will be inserted. + /// If not set, the new item will be appended to the end of the conversation. + /// If set to `root`, the new item will be added to the beginning of the conversation. + /// If set to an existing ID, it allows an item to be inserted mid-conversation. + /// If the ID cannot be found, an error will be returned and the item will not be added. #[serde(skip_serializing_if = "Option::is_none")] pub previous_item_id: Option, - /// The item to add to the conversation. - pub item: Item, + /// A single item within a Realtime conversation. + pub item: RealtimeConversationItem, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ConversationItemTruncateEvent { +pub struct RealtimeClientEventConversationItemRetrieve { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// The ID of the assistant message item to truncate. + /// The ID of the item to retrieve. pub item_id: String, +} - /// The index of the content part to truncate. +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct RealtimeClientEventConversationItemTruncate { + /// Optional client-generated ID used to identify this event. + #[serde(skip_serializing_if = "Option::is_none")] + pub event_id: Option, + + /// The ID of the assistant message item to truncate. Only assistant message items can be truncated. + pub item_id: String, + + /// The index of the content part to truncate. Set this to `0`. pub content_index: u32, /// Inclusive duration up to which audio is truncated, in milliseconds. + /// If the audio_end_ms is greater than the actual audio duration, the server will respond with an error. pub audio_end_ms: u32, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ConversationItemDeleteEvent { +pub struct RealtimeClientEventConversationItemDelete { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -76,17 +95,29 @@ pub struct ConversationItemDeleteEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ResponseCreateEvent { +pub struct RealtimeClientEventResponseCreate { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// Configuration for the response. - pub response: Option, + /// Create a new Realtime response with these parameters + pub response: Option, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ResponseCancelEvent { +pub struct RealtimeClientEventResponseCancel { + /// Optional client-generated ID used to identify this event. + #[serde(skip_serializing_if = "Option::is_none")] + pub event_id: Option, + + /// A specific response ID to cancel - if not provided, will cancel an + /// in-progress response in the default conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub response_id: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct RealtimeClientEventOutputAudioBufferClear { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -95,52 +126,120 @@ pub struct ResponseCancelEvent { /// These are events that the OpenAI Realtime WebSocket server will accept from the client. #[derive(Debug, Serialize, Deserialize)] #[serde(tag = "type")] -pub enum ClientEvent { - /// Send this event to update the session’s default configuration. +pub enum RealtimeClientEvent { + /// Send this event to update the session's configuration. The client may send this event at any time to update any field + /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet. + /// + /// When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective + /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`, + /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`. #[serde(rename = "session.update")] - SessionUpdate(SessionUpdateEvent), - - /// Send this event to append audio bytes to the input audio buffer. + SessionUpdate(RealtimeClientEventSessionUpdate), + + /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit. + /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio + /// transcription (if enabled) will be generated when the buffer is committed. + /// + /// If VAD is enabled the audio buffer is used to detect speech and the server will decide when to commit. When Server VAD is disabled, + /// you must commit the audio buffer manually. Input audio noise reduction operates on writes to the audio buffer. + /// + /// The client may choose how much audio to place in each event up to a maximum of 15 MiB, for example streaming smaller chunks from the + /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to + /// this event. #[serde(rename = "input_audio_buffer.append")] - InputAudioBufferAppend(InputAudioBufferAppendEvent), + InputAudioBufferAppend(RealtimeClientEventInputAudioBufferAppend), - /// Send this event to commit audio bytes to a user message. + /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. + /// This event will produce an error if the input audio buffer is empty. + /// When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically. + /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. + /// The server will respond with an input_audio_buffer.committed event. #[serde(rename = "input_audio_buffer.commit")] - InputAudioBufferCommit(InputAudioBufferCommitEvent), + InputAudioBufferCommit(RealtimeClientEventInputAudioBufferCommit), /// Send this event to clear the audio bytes in the buffer. + /// The server will respond with an `input_audio_buffer.cleared` event. #[serde(rename = "input_audio_buffer.clear")] - InputAudioBufferClear(InputAudioBufferClearEvent), + InputAudioBufferClear(RealtimeClientEventInputAudioBufferClear), - /// Send this event when adding an item to the conversation. + /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses. + /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream, + /// but has the current limitation that it cannot populate assistant audio messages. + /// + /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent. #[serde(rename = "conversation.item.create")] - ConversationItemCreate(ConversationItemCreateEvent), - - /// Send this event when you want to truncate a previous assistant message’s audio. + ConversationItemCreate(RealtimeClientEventConversationItemCreate), + + /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history. + /// This is useful, for example, to inspect user audio after noise cancellation and VAD. + /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history, + /// in which case the server will respond with an error. + #[serde(rename = "conversation.item.retrieve")] + ConversationItemRetrieve(RealtimeClientEventConversationItemRetrieve), + + /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime, + /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not + /// yet played. This will synchronize the server's understanding of the audio with the client's playback. + /// + /// Truncating audio will delete the server-side text transcript to ensure there is not text in the context that hasn't + /// been heard by the user. + /// + /// If successful, the server will respond with a `conversation.item.truncated` event. #[serde(rename = "conversation.item.truncate")] - ConversationItemTruncate(ConversationItemTruncateEvent), + ConversationItemTruncate(RealtimeClientEventConversationItemTruncate), - /// Send this event when you want to remove any item from the conversation history. + /// Send this event when you want to remove any item from the conversation history. The server will respond with a + /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the + /// server will respond with an error. #[serde(rename = "conversation.item.delete")] - ConversationItemDelete(ConversationItemDeleteEvent), - - /// Send this event to trigger a response generation. + ConversationItemDelete(RealtimeClientEventConversationItemDelete), + + /// This event instructs the server to create a Response, which means triggering model inference. + /// When in Server VAD mode, the server will create Responses automatically. + /// + /// A Response will include at least one Item, and may have two, in which case the second will be a function call. + /// These Items will be appended to the conversation history by default. + /// + /// The server will respond with a `response.created` event, events for Items and content created, and finally a + /// `response.done` event to indicate the Response is complete. + /// + /// The `response.create` event includes inference configuration like `instructions` and `tools`. If these are set, they will + /// override the Session's configuration for this Response only. + /// + /// Responses can be created out-of-band of the default Conversation, meaning that they can have arbitrary input, and + /// it's possible to disable writing the output to the Conversation. Only one Response can write to the default + /// Conversation at a time, but otherwise multiple Responses can be created in parallel. The `metadata` field is a good + /// way to disambiguate multiple simultaneous Responses. + /// + /// Clients can set `conversation` to `none` to create a Response that does not write to the default Conversation. + /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to + /// existing Items. #[serde(rename = "response.create")] - ResponseCreate(ResponseCreateEvent), + ResponseCreate(RealtimeClientEventResponseCreate), - /// Send this event to cancel an in-progress response. + /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event + /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond + /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be + /// returned the session will remain unaffected. #[serde(rename = "response.cancel")] - ResponseCancel(ResponseCancelEvent), + ResponseCancel(RealtimeClientEventResponseCancel), + + /// **WebRTC Only:** Emit to cut off the current audio response. + /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event. + /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response. + /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc) + #[serde(rename = "output_audio_buffer.clear")] + OutputAudioBufferClear(RealtimeClientEventOutputAudioBufferClear), } -impl From<&ClientEvent> for String { - fn from(value: &ClientEvent) -> Self { +impl From<&RealtimeClientEvent> for String { + fn from(value: &RealtimeClientEvent) -> Self { serde_json::to_string(value).unwrap() } } -impl From for Message { - fn from(value: ClientEvent) -> Self { +impl From for Message { + fn from(value: RealtimeClientEvent) -> Self { Message::Text(String::from(&value).into()) } } @@ -165,52 +264,100 @@ macro_rules! event_from { }; } -event_from!(SessionUpdateEvent, ClientEvent, SessionUpdate); event_from!( - InputAudioBufferAppendEvent, - ClientEvent, + RealtimeClientEventSessionUpdate, + RealtimeClientEvent, + SessionUpdate +); +event_from!( + RealtimeClientEventInputAudioBufferAppend, + RealtimeClientEvent, InputAudioBufferAppend ); event_from!( - InputAudioBufferCommitEvent, - ClientEvent, + RealtimeClientEventInputAudioBufferCommit, + RealtimeClientEvent, InputAudioBufferCommit ); event_from!( - InputAudioBufferClearEvent, - ClientEvent, + RealtimeClientEventInputAudioBufferClear, + RealtimeClientEvent, InputAudioBufferClear ); event_from!( - ConversationItemCreateEvent, - ClientEvent, + RealtimeClientEventConversationItemCreate, + RealtimeClientEvent, ConversationItemCreate ); event_from!( - ConversationItemTruncateEvent, - ClientEvent, + RealtimeClientEventConversationItemTruncate, + RealtimeClientEvent, ConversationItemTruncate ); event_from!( - ConversationItemDeleteEvent, - ClientEvent, + RealtimeClientEventConversationItemDelete, + RealtimeClientEvent, ConversationItemDelete ); -event_from!(ResponseCreateEvent, ClientEvent, ResponseCreate); -event_from!(ResponseCancelEvent, ClientEvent, ResponseCancel); - -message_from_event!(SessionUpdateEvent, ClientEvent); -message_from_event!(InputAudioBufferAppendEvent, ClientEvent); -message_from_event!(InputAudioBufferCommitEvent, ClientEvent); -message_from_event!(InputAudioBufferClearEvent, ClientEvent); -message_from_event!(ConversationItemCreateEvent, ClientEvent); -message_from_event!(ConversationItemTruncateEvent, ClientEvent); -message_from_event!(ConversationItemDeleteEvent, ClientEvent); -message_from_event!(ResponseCreateEvent, ClientEvent); -message_from_event!(ResponseCancelEvent, ClientEvent); - -impl From for ConversationItemCreateEvent { - fn from(value: Item) -> Self { +event_from!( + RealtimeClientEventConversationItemRetrieve, + RealtimeClientEvent, + ConversationItemRetrieve +); +event_from!( + RealtimeClientEventResponseCreate, + RealtimeClientEvent, + ResponseCreate +); +event_from!( + RealtimeClientEventResponseCancel, + RealtimeClientEvent, + ResponseCancel +); +event_from!( + RealtimeClientEventOutputAudioBufferClear, + RealtimeClientEvent, + OutputAudioBufferClear +); + +message_from_event!(RealtimeClientEventSessionUpdate, RealtimeClientEvent); +message_from_event!( + RealtimeClientEventInputAudioBufferAppend, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventInputAudioBufferCommit, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventInputAudioBufferClear, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemCreate, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemTruncate, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemDelete, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemRetrieve, + RealtimeClientEvent +); +message_from_event!(RealtimeClientEventResponseCreate, RealtimeClientEvent); +message_from_event!(RealtimeClientEventResponseCancel, RealtimeClientEvent); +message_from_event!( + RealtimeClientEventOutputAudioBufferClear, + RealtimeClientEvent +); + +impl From for RealtimeClientEventConversationItemCreate { + fn from(value: RealtimeConversationItem) -> Self { Self { event_id: None, previous_item_id: None, diff --git a/async-openai/src/types/realtime/content_part.rs b/async-openai/src/types/realtime/content_part.rs deleted file mode 100644 index eec93ab3..00000000 --- a/async-openai/src/types/realtime/content_part.rs +++ /dev/null @@ -1,18 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum ContentPart { - #[serde(rename = "text")] - Text { - /// The text content - text: String, - }, - #[serde(rename = "audio")] - Audio { - /// Base64-encoded audio data - audio: Option, - /// The transcript of the audio - transcript: String, - }, -} diff --git a/async-openai/src/types/realtime/conversation.rs b/async-openai/src/types/realtime/conversation.rs deleted file mode 100644 index 3ea43bd8..00000000 --- a/async-openai/src/types/realtime/conversation.rs +++ /dev/null @@ -1,10 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Conversation { - /// The unique ID of the conversation. - pub id: String, - - /// The object type, must be "realtime.conversation". - pub object: String, -} diff --git a/async-openai/src/types/realtime/conversation_item.rs b/async-openai/src/types/realtime/conversation_item.rs new file mode 100644 index 00000000..895e6d42 --- /dev/null +++ b/async-openai/src/types/realtime/conversation_item.rs @@ -0,0 +1,291 @@ +use serde::{Deserialize, Serialize}; + +use crate::types::{ + mcp::MCPListToolsTool, + realtime::{ErrorCodeMessage, ErrorMessage}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SystemMessageContent { + /// The text content. + pub text: String, + /// The content type. Always `input_text` for system messages. + pub r#type: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeConversationItemMessageSystem { + /// The content of the message. + pub content: Vec, + + /// The unique ID of the item. This may be provided by the client or generated by the server. + pub id: Option, + + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + pub status: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UserMessageContentInputText { + /// The text content (for `input_text`). + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UserMessageContentInputAudio { + /// Base64-encoded audio bytes (for `input_audio`), these will be parsed as the + /// format specified in the session input audio type configuration. + /// This defaults to PCM 16-bit 24kHz mono if not specified. + pub audio: String, + /// Transcript of the audio (for `input_audio`). This is not sent to the model, + /// but will be attached to the message item for reference. + pub transcript: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +#[serde(rename_all = "snake_case")] +pub enum ImageDetail { + #[default] + Auto, + Low, + High, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UserMessageContentInputImage { + /// Base64-encoded image bytes (for `input_image`) as a data URI. + /// For example `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`. + /// Supported formats are PNG and JPEG. + pub image_url: String, + /// The detail level of the image (for `input_image`). `auto` will default to `high`. + pub detail: ImageDetail, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +#[serde(rename_all = "snake_case")] +pub enum UserMessageContent { + InputText(UserMessageContentInputText), + InputAudio(UserMessageContentInputAudio), + InputImage(UserMessageContentInputImage), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeConversationItemMessageUser { + /// The content of the message. + pub content: Vec, + + /// The unique ID of the item. This may be provided by the client or generated by the server. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AssistantMessageContentOutputText { + /// The text content + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AssistantMessageContentOutputAudio { + /// Base64-encoded audio bytes, these will be parsed as the format specified + /// in the session output audio type configuration. This defaults to PCM 16-bit + /// 24kHz mono if not specified. + pub audio: Option, + /// The transcript of the audio content, this will always be present if the + /// output type is `audio`. + pub transcript: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +#[serde(rename_all = "snake_case")] +pub enum AssistantMessageContent { + OutputText(AssistantMessageContentOutputText), + OutputAudio(AssistantMessageContentOutputAudio), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeConversationItemMessageAssistant { + /// The content of the message. + pub content: Vec, + + /// The unique ID of the item. This may be provided by the client or generated by the server. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "role")] +#[serde(rename_all = "lowercase")] +pub enum RealtimeConversationItemMessage { + System(RealtimeConversationItemMessageSystem), + User(RealtimeConversationItemMessageUser), + Assistant(RealtimeConversationItemMessageAssistant), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeConversationItemFunctionCall { + /// The arguments of the function call. This is a JSON-encoded string representing + /// the arguments passed to the function, for example {"arg1": "value1", "arg2": 42}. + pub arguments: String, + + /// The name of the function being called. + pub name: String, + + /// The ID of the function call. + pub call_id: String, + + /// The unique ID of the item. This may be provided by the client or generated by the server. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + pub status: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeConversationItemFunctionCallOutput { + /// The ID of the function call this output is for. + pub call_id: String, + + /// The output of the function call, this is free text and can contain any information + /// or simply be empty. + pub output: String, + + /// The unique ID of the item. This may be provided by the client or generated by the server. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + pub status: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeMCPApprovalResponse { + /// The ID of the approval request being answered. + pub approval_request_id: String, + + /// Whether the request was approved. + pub approved: bool, + + /// The unique ID of the approval response. + pub id: String, + + /// Optional reason for the decision. + pub reason: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeMCPListTools { + /// The label of the MCP server. + pub server_label: String, + + /// The tools available on the server. + pub tools: Vec, + + /// The unique ID of the list. + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeMCPApprovalRequest { + /// A JSON string of arguments for the tool. + pub arguments: String, + + /// The unique ID of the approval request. + pub id: String, + + /// The name of the tool to run. + pub name: String, + + /// The label of the MCP server making the request. + pub server_label: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeMCPProtocolError {} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum RealtimeMCPToolCallError { + ProtocolError(ErrorCodeMessage), + ToolExecutionError(ErrorMessage), + HttpError(ErrorCodeMessage), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeMCPToolCall { + /// A JSON string of the arguments passed to the tool. + pub arguments: String, + + /// The unique ID of the tool call. + pub id: String, + + /// The name of the tool that was run. + pub name: String, + + /// The label of the MCP server running the tool. + pub server_label: String, + + /// The ID of an associated approval request, if any. + pub approval_request_id: Option, + + /// The error from the tool call, if any. + pub error: Option, + + /// The output from the tool call. + pub output: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum RealtimeConversationItem { + Message(RealtimeConversationItemMessage), + FunctionCall(RealtimeConversationItemFunctionCall), + FunctionCallOutput(RealtimeConversationItemFunctionCallOutput), + McpApprovalResponse(RealtimeMCPApprovalResponse), + McpListTools(RealtimeMCPListTools), + McpCall(RealtimeMCPToolCall), + McpApprovalRequest(RealtimeMCPApprovalRequest), +} + +impl TryFrom for RealtimeConversationItem { + type Error = serde_json::Error; + + fn try_from(value: serde_json::Value) -> Result { + serde_json::from_value(value) + } +} diff --git a/async-openai/src/types/realtime/error.rs b/async-openai/src/types/realtime/error.rs index 6ce907c3..34fb9eac 100644 --- a/async-openai/src/types/realtime/error.rs +++ b/async-openai/src/types/realtime/error.rs @@ -17,3 +17,14 @@ pub struct RealtimeAPIError { /// The event_id of the client event that caused the error, if applicable. pub event_id: Option, } + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ErrorCodeMessage { + pub code: String, + pub message: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ErrorMessage { + pub message: String, +} diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs deleted file mode 100644 index 3af7d0d9..00000000 --- a/async-openai/src/types/realtime/item.rs +++ /dev/null @@ -1,99 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "snake_case")] -pub enum ItemType { - Message, - FunctionCall, - FunctionCallOutput, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "snake_case")] -pub enum ItemStatus { - Completed, - InProgress, - Incomplete, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum ItemRole { - User, - Assistant, - System, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "snake_case")] -pub enum ItemContentType { - InputText, - InputAudio, - Text, - Audio, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ItemContent { - /// The content type ("input_text", "input_audio", "text", "audio"). - pub r#type: ItemContentType, - - /// The text content. - #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, - - /// Base64-encoded audio bytes. - #[serde(skip_serializing_if = "Option::is_none")] - pub audio: Option, - - /// The transcript of the audio. - #[serde(skip_serializing_if = "Option::is_none")] - pub transcript: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Item { - /// The unique ID of the item. - #[serde(skip_serializing_if = "Option::is_none")] - pub id: Option, - - /// The type of the item ("message", "function_call", "function_call_output"). - #[serde(skip_serializing_if = "Option::is_none")] - pub r#type: Option, - - /// The status of the item ("completed", "in_progress", "incomplete"). - #[serde(skip_serializing_if = "Option::is_none")] - pub status: Option, - - /// The role of the message sender ("user", "assistant", "system"). - #[serde(skip_serializing_if = "Option::is_none")] - pub role: Option, - - /// The content of the message. - #[serde(skip_serializing_if = "Option::is_none")] - pub content: Option>, - - /// The ID of the function call (for "function_call" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub call_id: Option, - - /// The name of the function being called (for "function_call" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - - /// The arguments of the function call (for "function_call" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub arguments: Option, - - /// The output of the function call (for "function_call_output" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub output: Option, -} - -impl TryFrom for Item { - type Error = serde_json::Error; - - fn try_from(value: serde_json::Value) -> Result { - serde_json::from_value(value) - } -} diff --git a/async-openai/src/types/realtime/mod.rs b/async-openai/src/types/realtime/mod.rs index b47605f8..386a92b0 100644 --- a/async-openai/src/types/realtime/mod.rs +++ b/async-openai/src/types/realtime/mod.rs @@ -1,19 +1,13 @@ mod client_event; -mod content_part; -mod conversation; +mod conversation_item; mod error; -mod item; -mod rate_limit; -mod response_resource; +mod response; mod server_event; -mod session_resource; +mod session; pub use client_event::*; -pub use content_part::*; -pub use conversation::*; +pub use conversation_item::*; pub use error::*; -pub use item::*; -pub use rate_limit::*; -pub use response_resource::*; +pub use response::*; pub use server_event::*; -pub use session_resource::*; +pub use session::*; diff --git a/async-openai/src/types/realtime/rate_limit.rs b/async-openai/src/types/realtime/rate_limit.rs deleted file mode 100644 index f3fc4aa6..00000000 --- a/async-openai/src/types/realtime/rate_limit.rs +++ /dev/null @@ -1,13 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct RateLimit { - /// The name of the rate limit ("requests", "tokens", "input_tokens", "output_tokens"). - pub name: String, - /// The maximum allowed value for the rate limit. - pub limit: u32, - /// The remaining value before the limit is reached. - pub remaining: u32, - /// Seconds until the rate limit resets. - pub reset_seconds: f32, -} diff --git a/async-openai/src/types/realtime/response.rs b/async-openai/src/types/realtime/response.rs new file mode 100644 index 00000000..c9f28bb2 --- /dev/null +++ b/async-openai/src/types/realtime/response.rs @@ -0,0 +1,267 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use crate::types::{ + realtime::{ + MaxOutputTokens, RealtimeAudioFormats, RealtimeConversationItem, RealtimeTool, + RealtimeVoice, ToolChoice, + }, + responses::Prompt, +}; + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeResponseUsage { + /// Details about the input tokens used in the Response. Cached tokens are tokens from previous + /// turns in the conversation that are included as context for the current response. Cached tokens + /// here are counted as a subset of input tokens, meaning input tokens will include cached and + /// uncached tokens. + #[serde(skip_serializing_if = "Option::is_none")] + pub input_token_details: Option, + + /// The number of input tokens used in the Response, including text and audio tokens. + pub input_tokens: u32, + + #[serde(skip_serializing_if = "Option::is_none")] + pub output_token_details: Option, + + /// The number of output tokens sent in the Response, including text and audio tokens. + pub output_tokens: u32, + + /// The total number of tokens in the Response including input and output text and audio tokens. + pub total_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct InputTokenDetails { + /// The number of audio tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_tokens: Option, + /// The number of cached tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub cached_tokens: Option, + + /// Details about the cached tokens used as input for the Response. + pub cached_token_details: Option, + + /// The number of image tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub image_tokens: Option, + + /// The number of text tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub text_tokens: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct CachedTokenDetails { + /// The number of cached audio tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_tokens: Option, + + /// The number of cached image tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub image_tokens: Option, + + /// The number of cached text tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub text_tokens: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct OutputTokenDetails { + #[serde(skip_serializing_if = "Option::is_none")] + pub text_tokens: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_tokens: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub enum RealtimeResponseStatus { + InProgress, + Completed, + Cancelled, + Failed, + Incomplete, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Error { + pub code: String, + pub r#type: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum RealtimeResponseStatusDetailType { + Completed, + Cancelled, + Incomplete, + Failed, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub enum RealtimeResponseStatusDetailReason { + TurnDetected, + ClientCancelled, + MaxOutputTokens, + ContentFilter, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeResponseStatusDetail { + /// A description of the error that caused the response to fail, populated when the status is failed. + pub error: Option, + /// The reason the Response did not complete. For a `cancelled` Response, one of `turn_detected` + /// (the server VAD detected a new start of speech) or `client_cancelled` (the client sent a cancel + /// event). For an incomplete Response, one of `max_output_tokens` or `content_filter` (the + /// server-side safety filter activated and cut off the response). + pub reason: Option, + /// The type of error that caused the response to fail, corresponding with the `status` + /// field (`completed`, `cancelled`, `incomplete`, `failed`). + pub r#type: RealtimeResponseStatusDetailType, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseAudioOutput { + /// The format of the output audio. + pub format: RealtimeAudioFormats, + + /// The voice the model uses to respond. Voice cannot be changed during the session once + /// the model has responded with audio at least once. Current voice options are + /// `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. + /// We recommend `marin` and `cedar` for best quality. + pub voice: RealtimeVoice, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseAudio { + /// Configuration for audio output. + pub output: ResponseAudioOutput, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +#[serde(rename_all = "lowercase")] +pub enum Conversation { + #[default] + Auto, + None, +} + +/// The response resource. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeResponseCreateParams { + /// Configuration for audio input and output. + pub audio: ResponseAudio, + + /// Controls which conversation the response is added to. Currently supports auto and none, + /// with auto as the default value. The auto value means that the contents of the response + /// will be added to the default conversation. Set this to none to create an out-of-band + /// response which will not add items to default conversation. + pub conversation: Conversation, + + /// Input items to include in the prompt for the model. Using this field creates a new context + /// for this Response instead of using the default conversation. An empty array `[]` will clear + /// the context for this Response. Note that this can include references to items that + /// previously appeared in the session using their id. + pub input: Vec, + + /// The default system instructions (i.e. system message) prepended to model calls. + /// This field allows the client to guide the model on desired responses. + /// The model can be instructed on response content and format, (e.g. "be extremely succinct", + /// "act friendly", "here are examples of good responses") and on audio behavior + /// (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). + /// The instructions are not guaranteed to be followed by the model, but they provide + /// guidance to the model on the desired behavior. Note that the server sets default + /// instructions which will be used if this field is not set and are visible in + /// the `session.created` event at the start of the session. + pub instructions: String, + + /// Maximum number of output tokens for a single assistant response, inclusive of tool calls. + /// Provide an integer between 1 and 4096 to limit output tokens, or inf for the maximum + /// available tokens for a given model. Defaults to `inf`. + pub max_output_tokens: MaxOutputTokens, + + /// Set of 16 key-value pairs that can be attached to an object. This can be useful for + /// storing additional information about the object in a structured format, and querying + /// for objects via API or the dashboard. + /// + /// Keys are strings with a maximum length of 64 characters. Values are strings with a + /// maximum length of 512 characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, + + /// The set of modalities the model used to respond, currently the only possible values + /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript. + /// Setting the output to mode `text` will disable audio output from the model. + pub output_modalities: Vec, + + /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt: Option, + + /// How the model chooses tools. Provide one of the string modes or force a specific + /// function/MCP tool. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_choice: Option, + + /// Tools available to the model. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeResponse { + /// Configuration for audio output. + pub audio: Option, + + /// Which conversation the response is added to, determined by the `conversation` field in the + /// `response.create` event. If `auto`, the response will be added to the default conversation + /// and the value of `conversation_id` will be an id like `conv_1234`. If `none`, the response + /// will not be added to any conversation and the value of `conversation_id` will be `null`. + /// If responses are being triggered automatically by VAD the response will be added to the + /// default conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub conversation_id: Option, + + /// The unique ID of the response, will look like `resp_1234`. + pub id: String, + + /// Maximum number of output tokens for a single assistant response, inclusive of tool calls, + /// that was used in this response. + pub max_output_tokens: MaxOutputTokens, + + /// Set of 16 key-value pairs that can be attached to an object. This can be useful for + /// storing additional information about the object in a structured format, and querying + /// for objects via API or the dashboard. + /// + /// Keys are strings with a maximum length of 64 characters. Values are strings with a + /// maximum length of 512 characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option>, + + /// The object type, must be "realtime.response". + pub object: String, + + /// The list of output items generated by the response. + pub output: Vec, + + /// The set of modalities the model used to respond, currently the only possible values + /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript. + /// Setting the output to mode `text` will disable audio output from the model. + pub output_modalities: Vec, + + /// The final status of the response (`completed`, `cancelled`, `failed`, or `incomplete`, `in_progress`). + pub status: RealtimeResponseStatus, + + /// Additional details about the status. + pub status_details: Option, + + /// Usage statistics for the Response, this will correspond to billing. A Realtime API session + /// will maintain a conversation context and append new Items to the Conversation, thus output + /// from previous turns (text and audio tokens) will become the input for later turns. + pub usage: Option, +} diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs deleted file mode 100644 index a6c6c32f..00000000 --- a/async-openai/src/types/realtime/response_resource.rs +++ /dev/null @@ -1,61 +0,0 @@ -use serde::{Deserialize, Serialize}; - -use super::item::Item; - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Usage { - pub total_tokens: u32, - pub input_tokens: u32, - pub output_tokens: u32, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "snake_case")] -pub enum ResponseStatus { - InProgress, - Completed, - Cancelled, - Failed, - Incomplete, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct FailedError { - pub code: String, - pub message: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "snake_case")] -pub enum IncompleteReason { - Interruption, - MaxOutputTokens, - ContentFilter, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum ResponseStatusDetail { - #[serde(rename = "incomplete")] - Incomplete { reason: IncompleteReason }, - #[serde(rename = "failed")] - Failed { error: Option }, - #[serde(rename = "cancelled")] - Cancelled { reason: String }, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseResource { - /// The unique ID of the response. - pub id: String, - /// The object type, must be "realtime.response". - pub object: String, - /// The status of the response - pub status: ResponseStatus, - /// Additional details about the status. - pub status_details: Option, - /// The list of output items generated by the response. - pub output: Vec, - /// Usage statistics for the response. - pub usage: Option, -} diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 8795f6e4..f2ea8a58 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -1,12 +1,12 @@ use serde::{Deserialize, Serialize}; use super::{ - content_part::ContentPart, conversation::Conversation, error::RealtimeAPIError, item::Item, - rate_limit::RateLimit, response_resource::ResponseResource, session_resource::SessionResource, + conversation_item::RealtimeConversationItem, error::RealtimeAPIError, + response::RealtimeResponse, session::Session, }; #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ErrorEvent { +pub struct RealtimeServerEventError { /// The unique ID of the server event. pub event_id: String, /// Details of the error. @@ -14,116 +14,199 @@ pub struct ErrorEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct SessionCreatedEvent { +pub struct RealtimeServerEventSessionCreated { /// The unique ID of the server event. pub event_id: String, /// The session resource. - pub session: SessionResource, + pub session: Session, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct SessionUpdatedEvent { +pub struct RealtimeServerEventSessionUpdated { /// The unique ID of the server event. pub event_id: String, /// The updated session resource. - pub session: SessionResource, + pub session: Session, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationCreatedEvent { +pub struct RealtimeServerEventConversationItemAdded { /// The unique ID of the server event. pub event_id: String, - /// The conversation resource. - pub conversation: Conversation, + /// A single item within a Realtime conversation. + pub item: RealtimeConversationItem, + /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted. + pub previous_item_id: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferCommitedEvent { +pub struct RealtimeServerEventConversationItemDone { /// The unique ID of the server event. pub event_id: String, - /// The ID of the preceding item after which the new item will be inserted. - pub previous_item_id: String, + /// A single item within a Realtime conversation. + pub item: RealtimeConversationItem, + /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted. + pub previous_item_id: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventInputAudioBufferCommitted { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the preceding item after which the new item will be inserted. Can be null if the item has no predecessor. + pub previous_item_id: Option, /// The ID of the user message item that will be created. pub item_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferClearedEvent { +pub struct RealtimeServerEventInputAudioBufferCleared { /// The unique ID of the server event. pub event_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferSpeechStartedEvent { +pub struct RealtimeServerEventInputAudioBufferSpeechStarted { /// The unique ID of the server event. pub event_id: String, - /// Milliseconds since the session started when speech was detected. + /// Milliseconds from the start of all audio written to the buffer during the session when speech was + /// first detected. This will correspond to the beginning of audio sent to the model, and thus includes + /// the `prefix_padding_ms` configured in the Session. pub audio_start_ms: u32, /// The ID of the user message item that will be created when speech stops. pub item_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferSpeechStoppedEvent { +pub struct RealtimeServerEventInputAudioBufferSpeechStopped { /// The unique ID of the server event. pub event_id: String, - /// Milliseconds since the session started when speech stopped. + /// Milliseconds since the session started when speech stopped. This will correspond to the end of + /// audio sent to the model, and thus includes the `min_silence_duration_ms` configured in the Session. pub audio_end_ms: u32, /// The ID of the user message item that will be created. pub item_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemCreatedEvent { +pub struct RealtimeServerEventInputAudioBufferTimeoutTriggered { /// The unique ID of the server event. pub event_id: String, - /// The ID of the preceding item. - pub previous_item_id: Option, - /// The item that was created. - pub item: Item, + /// Millisecond offset of audio written to the input audio buffer at the time the timeout was triggered. + pub audio_end_ms: u32, + /// Millisecond offset of audio written to the input audio buffer that was after the playback time of the last model response. + pub audio_start_ms: u32, + /// The ID of the item associated with this segment. + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventOutputAudioBufferStarted { + /// The unique ID of the server event. + pub event_id: String, + /// The unique ID of the response that produced the audio. + pub response_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventOutputAudioBufferStopped { + /// The unique ID of the server event. + pub event_id: String, + /// The unique ID of the response that produced the audio. + pub response_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventOutputAudioBufferCleared { + /// The unique ID of the server event. + pub event_id: String, + /// The unique ID of the response that produced the audio. + pub response_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] /// Log probability information for a transcribed token. -pub struct LogProb { - /// Raw UTF-8 bytes for the token. +pub struct LogProbProperties { + /// The bytes that were used to generate the log probability. pub bytes: Vec, /// The log probability of the token. pub logprob: f64, - /// The token string. + /// The token that was used to generate the log probability. pub token: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemInputAudioTranscriptionCompletedEvent { +pub struct TokenUsageInputTokenDetails { + /// Number of audio tokens billed for this request. + pub audio_tokens: u32, + /// Number of text tokens billed for this request. + pub text_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptTextUsageTokens { + /// Number of input tokens billed for this request. + pub input_tokens: u32, + /// Number of output tokens generated. + pub output_tokens: u32, + /// Total number of tokens used (input + output). + pub total_tokens: u32, + /// Details about the input tokens billed for this request. + pub input_token_details: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptTextUsageDuration { + ///Duration of the input audio in seconds. + pub seconds: f32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum TranscriptionUsage { + #[serde(rename = "tokens")] + Tokens(TranscriptTextUsageTokens), + #[serde(rename = "duration")] + Duration(TranscriptTextUsageDuration), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionCompleted { /// The unique ID of the server event. pub event_id: String, - /// The ID of the user message item. + /// The ID of the item containing the audio that is being transcribed. pub item_id: String, /// The index of the content part containing the audio. pub content_index: u32, /// The transcribed text. pub transcript: String, /// Optional per-token log probability data. - pub logprobs: Option>, + pub logprobs: Option>, + /// Usage statistics for the transcription, this is billed according to the ASR model's pricing rather than + /// the realtime model's pricing. + pub usage: TranscriptionUsage, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemInputAudioTranscriptionDeltaEvent { +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionDelta { /// The unique ID of the server event. pub event_id: String, - /// The ID of the user message item. + /// The ID of the item containing the audio that is being transcribed. pub item_id: String, - /// The index of the content part containing the audio. + ///The index of the content part in the item's content array. pub content_index: u32, /// The text delta. pub delta: String, - /// Optional per-token log probability data. - pub logprobs: Option>, + /// The log probabilities of the transcription. These can be enabled by configurating the session with + /// `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the array + /// corresponds a log probability of which token would be selected for this chunk of transcription. This + /// can help to identify if it was possible there were multiple valid options for a given chunk of + /// transcription. + pub logprobs: Option>, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemInputAudioTranscriptionFailedEvent { +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionFailed { /// The unique ID of the server event. pub event_id: String, /// The ID of the user message item. @@ -135,7 +218,7 @@ pub struct ConversationItemInputAudioTranscriptionFailedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemTruncatedEvent { +pub struct RealtimeServerEventConversationItemTruncated { /// The unique ID of the server event. pub event_id: String, /// The ID of the assistant message item that was truncated. @@ -147,7 +230,7 @@ pub struct ConversationItemTruncatedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemDeletedEvent { +pub struct RealtimeServerEventConversationItemDeleted { /// The unique ID of the server event. pub event_id: String, /// The ID of the item that was deleted. @@ -155,47 +238,92 @@ pub struct ConversationItemDeletedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseCreatedEvent { +pub struct RealtimeServerEventConversationItemRetrieved { + /// The unique ID of the server event. + pub event_id: String, + /// A single item within a Realtime conversation. + pub item: RealtimeConversationItem, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionSegment { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the item containing the input audio content. + pub item_id: String, + /// The index of the input audio content part within the item. + pub content_index: u32, + /// The text for this segment. + pub text: String, + /// The segment identifier. + pub id: String, + /// The detected speaker label for this segment. + pub speaker: String, + /// Start time of the segment in seconds. + pub start: f32, + /// End time of the segment in seconds. + pub end: f32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventResponseCreated { /// The unique ID of the server event. pub event_id: String, /// The response resource. - pub response: ResponseResource, + pub response: RealtimeResponse, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseDoneEvent { +pub struct RealtimeServerEventResponseDone { /// The unique ID of the server event. pub event_id: String, /// The response resource. - pub response: ResponseResource, + pub response: RealtimeResponse, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputItemAddedEvent { +pub struct RealtimeServerEventResponseOutputItemAdded { /// The unique ID of the server event. pub event_id: String, - /// The ID of the response to which the item belongs. + /// The ID of the Response to which the item belongs. pub response_id: String, - /// The index of the output item in the response. + /// The index of the output item in the Response. pub output_index: u32, - /// The item that was added. - pub item: Item, + /// A single item within a Realtime conversation. + pub item: RealtimeConversationItem, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputItemDoneEvent { +pub struct RealtimeServerEventResponseOutputItemDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response to which the item belongs. pub response_id: String, - /// The index of the output item in the response. + /// The index of the output item in the Response. pub output_index: u32, - /// The completed item. - pub item: Item, + /// A single item within a Realtime conversation. + pub item: RealtimeConversationItem, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseContentPartAddedEvent { +#[serde(tag = "type")] +pub enum ContentPart { + #[serde(rename = "text")] + Text { + /// The text content + text: String, + }, + #[serde(rename = "audio")] + Audio { + /// Base64-encoded audio data + audio: Option, + /// The transcript of the audio + transcript: String, + }, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventResponseContentPartAdded { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -211,12 +339,12 @@ pub struct ResponseContentPartAddedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseContentPartDoneEvent { +pub struct RealtimeServerEventResponseContentPartDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. pub response_id: String, - /// The ID of the item to which the content part was added. + /// The ID of the item. pub item_id: String, /// The index of the output item in the response. pub output_index: u32, @@ -227,7 +355,7 @@ pub struct ResponseContentPartDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseTextDeltaEvent { +pub struct RealtimeServerEventResponseTextDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -243,7 +371,7 @@ pub struct ResponseTextDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseTextDoneEvent { +pub struct RealtimeServerEventResponseTextDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -259,7 +387,7 @@ pub struct ResponseTextDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioTranscriptDeltaEvent { +pub struct RealtimeServerEventResponseAudioTranscriptDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -275,7 +403,7 @@ pub struct ResponseAudioTranscriptDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioTranscriptDoneEvent { +pub struct RealtimeServerEventResponseAudioTranscriptDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -291,7 +419,7 @@ pub struct ResponseAudioTranscriptDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioDeltaEvent { +pub struct RealtimeServerEventResponseAudioDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -307,7 +435,7 @@ pub struct ResponseAudioDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioDoneEvent { +pub struct RealtimeServerEventResponseAudioDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -321,7 +449,7 @@ pub struct ResponseAudioDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseFunctionCallArgumentsDeltaEvent { +pub struct RealtimeServerEventResponseFunctionCallArgumentsDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -337,7 +465,7 @@ pub struct ResponseFunctionCallArgumentsDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseFunctionCallArgumentsDoneEvent { +pub struct RealtimeServerEventResponseFunctionCallArgumentsDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -353,137 +481,370 @@ pub struct ResponseFunctionCallArgumentsDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct RateLimitsUpdatedEvent { +#[serde(rename_all = "lowercase")] +pub enum RealtimeRateLimitName { + Requests, + Tokens, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeRateLimit { + /// The name of the rate limit (requests, tokens). + pub name: RealtimeRateLimitName, + /// The maximum allowed value for the rate limit. + pub limit: u32, + /// The remaining value before the limit is reached. + pub remaining: u32, + /// Seconds until the rate limit resets. + pub reset_seconds: f32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventRateLimitsUpdated { /// The unique ID of the server event. pub event_id: String, - pub rate_limits: Vec, + pub rate_limits: Vec, } -/// These are events emitted from the OpenAI Realtime WebSocket server to the client. #[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum ServerEvent { - /// Returned when an error occurs. - #[serde(rename = "error")] - Error(ErrorEvent), +pub struct RealtimeServerEventMCPListToolsInProgress { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the MCP list tools item. + pub item_id: String, +} - /// Returned when a session is created. Emitted automatically when a new connection is established. - #[serde(rename = "session.created")] - SessionCreated(SessionCreatedEvent), +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventMCPListToolsCompleted { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the MCP list tools item. + pub item_id: String, +} - /// Returned when a session is updated. - #[serde(rename = "session.updated")] - SessionUpdated(SessionUpdatedEvent), +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventMCPListToolsFailed { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the MCP list tools item. + pub item_id: String, +} - /// Returned when a conversation is created. Emitted right after session creation. - #[serde(rename = "conversation.created")] - ConversationCreated(ConversationCreatedEvent), +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventResponseMCPCallArgumentsDelta { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the response. + pub response_id: String, + /// The ID of the MCP tool call item. + pub item_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The JSON-encoded arguments delta. + pub delta: String, + /// If present, indicates the delta text was obfuscated. + #[serde(skip_serializing_if = "Option::is_none")] + pub obfuscation: Option, +} - /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode. - #[serde(rename = "input_audio_buffer.committed")] - InputAudioBufferCommited(InputAudioBufferCommitedEvent), +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventResponseMCPCallArgumentsDone { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the response. + pub response_id: String, + /// The ID of the MCP tool call item. + pub item_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The final JSON-encoded arguments string. + pub arguments: String, +} - /// Returned when the input audio buffer is cleared by the client. - #[serde(rename = "input_audio_buffer.cleared")] - InputAudioBufferCleared(InputAudioBufferClearedEvent), +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventResponseMCPCallInProgress { + /// The unique ID of the server event. + pub event_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The ID of the MCP tool call item. + pub item_id: String, +} - /// Returned in server turn detection mode when speech is detected. - #[serde(rename = "input_audio_buffer.speech_started")] - InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent), +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventResponseMCPCallCompleted { + /// The unique ID of the server event. + pub event_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The ID of the MCP tool call item. + pub item_id: String, +} - /// Returned in server turn detection mode when speech stops. - #[serde(rename = "input_audio_buffer.speech_stopped")] - InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent), +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeServerEventResponseMCPCallFailed { + /// The unique ID of the server event. + pub event_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The ID of the MCP tool call item. + pub item_id: String, +} - /// Returned when a conversation item is created. - #[serde(rename = "conversation.item.created")] - ConversationItemCreated(ConversationItemCreatedEvent), +/// These are events emitted from the OpenAI Realtime WebSocket server to the client. +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum RealtimeServerEvent { + /// Returned when an error occurs, which could be a client problem or a server problem. + /// Most errors are recoverable and the session will stay open, we recommend to + /// implementors to monitor and log error messages by default. + #[serde(rename = "error")] + Error(RealtimeServerEventError), - /// Returned when input audio transcription is enabled and a transcription succeeds. + /// Returned when a Session is created. Emitted automatically when a new connection is established as the first server event. + /// This event will contain the default Session configuration. + #[serde(rename = "session.created")] + SessionCreated(RealtimeServerEventSessionCreated), + + /// Returned when a session is updated with a `session.update` event, unless there is an error. + #[serde(rename = "session.updated")] + SessionUpdated(RealtimeServerEventSessionUpdated), + + /// Sent by the server when an Item is added to the default Conversation. This can happen in several cases: + /// - When the client sends a conversation.item.create event + /// - When the input audio buffer is committed. In this case the item will be a user message containing the audio from the buffer. + /// - When the model is generating a Response. In this case the `conversation.item.added` event will be sent when the model starts + /// generating a specific Item, and thus it will not yet have any content (and `status` will be `in_progress`). + /// + /// The event will include the full content of the Item (except when model is generating a Response) except for audio data, + /// which can be retrieved separately with a `conversation.item.retrieve` event if necessary. + #[serde(rename = "conversation.item.added")] + ConversationItemAdded(RealtimeServerEventConversationItemAdded), + + /// Returned when a conversation item is finalized. + /// + /// The event will include the full content of the Item except for audio data, which can be retrieved + /// separately with a `conversation.item.retrieve` event if needed. + #[serde(rename = "conversation.item.done")] + ConversationItemDone(RealtimeServerEventConversationItemDone), + + /// Returned when a conversation item is retrieved with `conversation.item.retrieve`. + /// This is provided as a way to fetch the server's representation of an item, for example to get access + /// to the post-processed audio data after noise cancellation and VAD. + /// It includes the full content of the Item, including audio data. + #[serde(rename = "conversation.item.retrieved")] + ConversationItemRetrieved(RealtimeServerEventConversationItemRetrieved), + + /// This event is the output of audio transcription for user audio written to the user audio + /// buffer. Transcription begins when the input audio buffer is committed by the client or + /// server (when VAD is enabled). Transcription runs asynchronously with Response + /// creation, so this event may come before or after the Response events. + /// + /// Realtime API models accept audio natively, and thus input transcription is a separate process + /// run on a separate ASR (Automatic Speech Recognition) model. The transcript + /// may diverge somewhat from the model's interpretation, and should be treated as a rough guide. #[serde(rename = "conversation.item.input_audio_transcription.completed")] ConversationItemInputAudioTranscriptionCompleted( - ConversationItemInputAudioTranscriptionCompletedEvent, + RealtimeServerEventConversationItemInputAudioTranscriptionCompleted, ), + /// Returned when the text value of an input audio transcription content part is updated with incremental transcription results. #[serde(rename = "conversation.item.input_audio_transcription.delta")] - ConversationItemInputAudioTranscriptionDelta(ConversationItemInputAudioTranscriptionDeltaEvent), + ConversationItemInputAudioTranscriptionDelta( + RealtimeServerEventConversationItemInputAudioTranscriptionDelta, + ), + + /// Returned when an input audio transcription segment is identified for an item. + #[serde(rename = "conversation.item.input_audio_transcription.segment")] + ConversationItemInputAudioTranscriptionSegment( + RealtimeServerEventConversationItemInputAudioTranscriptionSegment, + ), /// Returned when input audio transcription is configured, and a transcription request for a user message failed. + /// These events are separate from other `error` events so that the client can identify the related Item. #[serde(rename = "conversation.item.input_audio_transcription.failed")] ConversationItemInputAudioTranscriptionFailed( - ConversationItemInputAudioTranscriptionFailedEvent, + RealtimeServerEventConversationItemInputAudioTranscriptionFailed, ), - /// Returned when an earlier assistant audio message item is truncated by the client. + /// Returned when an earlier assistant audio message item is truncated by the client with a `conversation.item.truncate` event. + /// This event is used to synchronize the server's understanding of the audio with the client's playback. + /// + /// This action will truncate the audio and remove the server-side text transcript to ensure there is no text in the + /// context that hasn't been heard by the user. #[serde(rename = "conversation.item.truncated")] - ConversationItemTruncated(ConversationItemTruncatedEvent), + ConversationItemTruncated(RealtimeServerEventConversationItemTruncated), - /// Returned when an item in the conversation is deleted. + /// Returned when an item in the conversation is deleted by the client with a `conversation.item.delete` event. + /// This event is used to synchronize the server's understanding of the conversation history with the client's view. #[serde(rename = "conversation.item.deleted")] - ConversationItemDeleted(ConversationItemDeletedEvent), + ConversationItemDeleted(RealtimeServerEventConversationItemDeleted), + + /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode. + /// The `item_id` property is the ID of the user message item that will be created, + /// thus a `conversation.item.created` event will also be sent to the client. + #[serde(rename = "input_audio_buffer.committed")] + InputAudioBufferCommitted(RealtimeServerEventInputAudioBufferCommitted), + + /// Returned when the input audio buffer is cleared by the client with a `input_audio_buffer.clear` event. + #[serde(rename = "input_audio_buffer.cleared")] + InputAudioBufferCleared(RealtimeServerEventInputAudioBufferCleared), + + /// Sent by the server when in `server_vad` mode to indicate that speech has been detected in the audio buffer. + /// This can happen any time audio is added to the buffer (unless speech is already detected). + /// The client may want to use this event to interrupt audio playback or provide visual feedback to the user. + /// + /// The client should expect to receive a `input_audio_buffer.speech_stopped` event when speech stops. + /// The `item_id` property is the ID of the user message item that will be created when speech stops and will + /// also be included in the `input_audio_buffer.speech_stopped` event (unless the client manually commits the + /// audio buffer during VAD activation). + #[serde(rename = "input_audio_buffer.speech_started")] + InputAudioBufferSpeechStarted(RealtimeServerEventInputAudioBufferSpeechStarted), - /// Returned when a new Response is created. The first event of response creation, where the response is in an initial state of "in_progress". + /// Returned in `server_vad` mode when the server detects the end of speech in the audio buffer. + /// The server will also send a `conversation.item.created` event with the user message item that is created from the audio buffer. + #[serde(rename = "input_audio_buffer.speech_stopped")] + InputAudioBufferSpeechStopped(RealtimeServerEventInputAudioBufferSpeechStopped), + + /// Returned when the Server VAD timeout is triggered for the input audio buffer. This is + /// configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and + /// it indicates that there hasn't been any speech detected for the configured duration. + /// + /// The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the + /// last model response up to the triggering time, as an offset from the beginning of audio + /// written to the input audio buffer. This means it demarcates the segment of audio that + /// was silent and the difference between the start and end values will roughly match the configured timeout. + /// + /// The empty audio will be committed to the conversation as an `input_audio` item (there + /// will be a `input_audio_buffer.committed` event) and a model response will be generated. + /// There may be speech that didn't trigger VAD but is still detected by the model, so the model may respond + /// with something relevant to the conversation or a prompt to continue speaking. + #[serde(rename = "input_audio_buffer.timeout_triggered")] + InputAudioBufferTimeoutTriggered(RealtimeServerEventInputAudioBufferTimeoutTriggered), + + /// *WebRTC Only*: Emitted when the server begins streaming audio to the client. This + /// event is emitted after an audio content part has been added (`response.content_part.added`) to the response. + /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + #[serde(rename = "output_audio_buffer.started")] + OutputAudioBufferStarted(RealtimeServerEventOutputAudioBufferStarted), + + /// *WebRTC Only*: Emitted when the output audio buffer has been completely drained on + /// the server, and no more audio is forthcoming. This event is emitted after the full response data has been sent + /// to the client (`response.done`). [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + #[serde(rename = "output_audio_buffer.stopped")] + OutputAudioBufferStopped(RealtimeServerEventOutputAudioBufferStopped), + + /// *WebRTC Only*: Emitted when the output audio buffer is cleared. This happens either in + /// VAD mode when the user has interrupted (`input_audio_buffer.speech_started`), or when the client has + /// emitted the `output_audio_buffer.clear` event to manually cut off the current audio response. + /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + #[serde(rename = "output_audio_buffer.cleared")] + OutputAudioBufferCleared(RealtimeServerEventOutputAudioBufferCleared), + + /// Returned when a new Response is created. The first event of response creation, + /// where the response is in an initial state of `in_progress`. #[serde(rename = "response.created")] - ResponseCreated(ResponseCreatedEvent), + ResponseCreated(RealtimeServerEventResponseCreated), /// Returned when a Response is done streaming. Always emitted, no matter the final state. + /// The Response object included in the `response.done` event will include all output Items in the Response + /// but will omit the raw audio data. + /// + /// Clients should check the `status` field of the Response to determine if it was successful + /// (`completed`) or if there was another outcome: `cancelled`, `failed`, or `incomplete`. + /// + /// A response will contain all output items that were generated during the response, excluding any audio content. #[serde(rename = "response.done")] - ResponseDone(ResponseDoneEvent), + ResponseDone(RealtimeServerEventResponseDone), - /// Returned when a new Item is created during response generation. + /// Returned when a new Item is created during Response generation. #[serde(rename = "response.output_item.added")] - ResponseOutputItemAdded(ResponseOutputItemAddedEvent), + ResponseOutputItemAdded(RealtimeServerEventResponseOutputItemAdded), /// Returned when an Item is done streaming. Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.output_item.done")] - ResponseOutputItemDone(ResponseOutputItemDoneEvent), + ResponseOutputItemDone(RealtimeServerEventResponseOutputItemDone), /// Returned when a new content part is added to an assistant message item during response generation. #[serde(rename = "response.content_part.added")] - ResponseContentPartAdded(ResponseContentPartAddedEvent), + ResponseContentPartAdded(RealtimeServerEventResponseContentPartAdded), /// Returned when a content part is done streaming in an assistant message item. /// Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.content_part.done")] - ResponseContentPartDone(ResponseContentPartDoneEvent), + ResponseContentPartDone(RealtimeServerEventResponseContentPartDone), - /// Returned when the text value of a "text" content part is updated. - #[serde(rename = "response.text.delta")] - ResponseTextDelta(ResponseTextDeltaEvent), + /// Returned when the text value of an "output_text" content part is updated. + #[serde(rename = "response.output_text.delta")] + ResponseOutputTextDelta(RealtimeServerEventResponseTextDelta), - /// Returned when the text value of a "text" content part is done streaming. + /// Returned when the text value of an "output_text" content part is done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. - #[serde(rename = "response.text.done")] - ResponseTextDone(ResponseTextDoneEvent), + #[serde(rename = "response.output_text.done")] + ResponseOutputTextDone(RealtimeServerEventResponseTextDone), /// Returned when the model-generated transcription of audio output is updated. - #[serde(rename = "response.audio_transcript.delta")] - ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent), + #[serde(rename = "response.output_audio_transcript.delta")] + ResponseOutputAudioTranscriptDelta(RealtimeServerEventResponseAudioTranscriptDelta), /// Returned when the model-generated transcription of audio output is done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. - #[serde(rename = "response.audio_transcript.done")] - ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent), + #[serde(rename = "response.output_audio_transcript.done")] + ResponseOutputAudioTranscriptDone(RealtimeServerEventResponseAudioTranscriptDone), /// Returned when the model-generated audio is updated. - #[serde(rename = "response.audio.delta")] - ResponseAudioDelta(ResponseAudioDeltaEvent), + #[serde(rename = "response.output_audio.delta")] + ResponseOutputAudioDelta(RealtimeServerEventResponseAudioDelta), /// Returned when the model-generated audio is done. /// Also emitted when a Response is interrupted, incomplete, or cancelled. - #[serde(rename = "response.audio.done")] - ResponseAudioDone(ResponseAudioDoneEvent), + #[serde(rename = "response.output_audio.done")] + ResponseOutputAudioDone(RealtimeServerEventResponseAudioDone), /// Returned when the model-generated function call arguments are updated. #[serde(rename = "response.function_call_arguments.delta")] - ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent), + ResponseFunctionCallArgumentsDelta(RealtimeServerEventResponseFunctionCallArgumentsDelta), /// Returned when the model-generated function call arguments are done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.function_call_arguments.done")] - ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent), + ResponseFunctionCallArgumentsDone(RealtimeServerEventResponseFunctionCallArgumentsDone), + + /// Returned when MCP tool call arguments are updated. + #[serde(rename = "response.mcp_call_arguments.delta")] + ResponseMCPCallArgumentsDelta(RealtimeServerEventResponseMCPCallArgumentsDelta), + + /// Returned when MCP tool call arguments are finalized during response generation. + #[serde(rename = "response.mcp_call_arguments.done")] + ResponseMCPCallArgumentsDone(RealtimeServerEventResponseMCPCallArgumentsDone), + + /// Returned when an MCP tool call is in progress. + #[serde(rename = "response.mcp_call.in_progress")] + ResponseMCPCallInProgress(RealtimeServerEventResponseMCPCallInProgress), + + /// Returned when an MCP tool call has completed successfully. + #[serde(rename = "response.mcp_call.completed")] + ResponseMCPCallCompleted(RealtimeServerEventResponseMCPCallCompleted), + + /// Returned when an MCP tool call has failed. + #[serde(rename = "response.mcp_call.failed")] + ResponseMCPCallFailed(RealtimeServerEventResponseMCPCallFailed), + + /// Returned when listing MCP tools is in progress for an item. + #[serde(rename = "mcp_list_tools.in_progress")] + MCPListToolsInProgress(RealtimeServerEventMCPListToolsInProgress), + + /// Returned when listing MCP tools has completed for an item. + #[serde(rename = "mcp_list_tools.completed")] + MCPListToolsCompleted(RealtimeServerEventMCPListToolsCompleted), + + /// Returned when listing MCP tools has failed for an item. + #[serde(rename = "mcp_list_tools.failed")] + MCPListToolsFailed(RealtimeServerEventMCPListToolsFailed), - /// Emitted after every "response.done" event to indicate the updated rate limits. + /// Emitted at the beginning of a Response to indicate the updated rate limits. + /// When a Response is created some tokens will be "reserved" for the output tokens, the rate limits + /// shown here reflect that reservation, which is then adjusted accordingly once the Response is completed. #[serde(rename = "rate_limits.updated")] - RateLimitsUpdated(RateLimitsUpdatedEvent), + RateLimitsUpdated(RealtimeServerEventRateLimitsUpdated), } diff --git a/async-openai/src/types/realtime/session.rs b/async-openai/src/types/realtime/session.rs new file mode 100644 index 00000000..66780844 --- /dev/null +++ b/async-openai/src/types/realtime/session.rs @@ -0,0 +1,417 @@ +use serde::{Deserialize, Serialize}; + +use crate::types::{ + responses::{Prompt, ToolChoiceFunction, ToolChoiceMCP, ToolChoiceOptions}, + MCPTool, +}; + +#[derive(Debug, Default, Serialize, Deserialize, Clone)] +pub struct AudioTranscription { + /// The language of the input audio. Supplying the input language in + /// [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format will improve accuracy and latency. + #[serde(skip_serializing_if = "Option::is_none")] + pub language: Option, + /// The model to use for transcription. Current options are `whisper-1`, + /// `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. + /// Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + /// An optional text to guide the model's style or continue a previous audio segment. + /// For `whisper-1`, the [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). For `gpt-4o-transcribe` models + /// (excluding gpt-4o-transcribe-diarize), the prompt is a free text string, for example + /// "expect words related to technology". + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum RealtimeTurnDetection { + /// Server-side voice activity detection (VAD) which flips on when user speech is detected + /// and off after a period of silence. + #[serde(rename = "server_vad")] + ServerVAD { + /// Whether or not to automatically generate a response when a VAD stop event occurs. + #[serde(skip_serializing_if = "Option::is_none")] + create_response: Option, + + /// Optional timeout after which a model response will be triggered automatically. + /// This is useful for situations in which a long pause from the user is unexpected, + /// such as a phone call. The model will effectively prompt the user to continue the + /// conversation based on the current context. + /// + /// The timeout value will be applied after the last model response's audio has finished + /// playing, i.e. it's set to the response.done time plus audio playback duration. + /// + /// An input_audio_buffer.timeout_triggered event (plus events associated with the Response) + /// will be emitted when the timeout is reached. Idle timeout is currently only supported + /// for server_vad mode. + #[serde(skip_serializing_if = "Option::is_none")] + idle_timeout_ms: Option, + + /// Whether or not to automatically interrupt any ongoing response with output to + /// the default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + #[serde(skip_serializing_if = "Option::is_none")] + interrupt_response: Option, + + /// Used only for server_vad mode. Amount of audio to include before the VAD detected speech + /// (in milliseconds). Defaults to 300ms. + prefix_padding_ms: u32, + /// Used only for server_vad mode. Duration of silence to detect speech stop + /// (in milliseconds). Defaults to 500ms. With shorter values the model will respond + /// more quickly, but may jump in on short pauses from the user. + silence_duration_ms: u32, + + /// Used only for server_vad mode. Activation threshold for VAD (0.0 to 1.0), + /// this defaults to 0.5. A higher threshold will require louder audio to activate + /// the model, and thus might perform better in noisy environments. + threshold: f32, + }, + + /// Server-side semantic turn detection which uses a model to determine when the user has + /// finished speaking. + #[serde(rename = "semantic_vad")] + SemanticVAD { + /// Whether or not to automatically generate a response when a VAD stop event occurs. + #[serde(skip_serializing_if = "Option::is_none", default)] + create_response: Option, + + /// Used only for `semantic_vad` mode. The eagerness of the model to respond. + /// `low` will wait longer for the user to continue speaking, `high` will respond more + /// quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high` + /// have max timeouts of 8s, 4s, and 2s respectively. + eagerness: String, + + /// Whether or not to automatically interrupt any ongoing response with output to + /// the default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + #[serde(skip_serializing_if = "Option::is_none", default)] + interrupt_response: Option, + }, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub enum MaxOutputTokens { + #[serde(rename = "inf")] + Inf, + #[serde(untagged)] + Num(u16), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeFunctionTool { + /// The name of the function. + pub name: String, + /// The description of the function, including guidance on when and how to call it, + /// and guidance about what to tell the user when calling (if anything). + pub description: String, + /// Parameters of the function in JSON Schema. + pub parameters: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum RealtimeTool { + #[serde(rename = "function")] + Function(RealtimeFunctionTool), + /// Give the model access to additional tools via remote Model Context Protocol (MCP) servers. + /// [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + #[serde(rename = "mcp")] + MCP(MCPTool), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum FunctionType { + Function, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ToolChoice { + /// Use this option to force the model to call a specific function. + Function(ToolChoiceFunction), + /// Use this option to force the model to call a specific tool on a remote MCP server. + Mcp(ToolChoiceMCP), + + #[serde(untagged)] + Mode(ToolChoiceOptions), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum RealtimeVoice { + Alloy, + Ash, + Ballad, + Coral, + Echo, + Sage, + Shimmer, + Verse, + Marin, + Cedar, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum RealtimeAudioFormats { + /// The PCM audio format. Only a 24kHz sample rate is supported. + #[serde(rename = "audio/pcm")] + PCMAudioFormat { + /// The sample rate of the audio. Always 24000. + rate: u32, + }, + /// The G.711 μ-law format. + #[serde(rename = "audio/pcmu")] + PCMUAudioFormat, + /// The G.711 A-law format. + #[serde(rename = "audio/pcma")] + PCMAAudioFormat, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct G711ULAWAudioFormat { + pub sample_rate: u32, + pub channels: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AudioInput { + /// The format of the input audio. + pub format: RealtimeAudioFormats, + /// Configuration for input audio noise reduction. This can be set to null to turn off. + /// Noise reduction filters audio added to the input audio buffer before it is sent to VAD + /// and the model. Filtering the audio can improve VAD and turn detection accuracy + /// (reducing false positives) and model performance by improving perception of the + /// input audio. + pub noise_reduction: Option, + /// Configuration for input audio transcription, defaults to off and can be set to `null` to turn off once on. + /// Input audio transcription is not native to the model, since the model consumes audio directly. + /// Transcription runs asynchronously through [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + /// and should be treated as guidance of input audio content rather than precisely what the model + /// heard. The client can optionally set the language and prompt for transcription, + /// these offer additional guidance to the transcription service. + pub transcription: Option, + + /// Configuration for turn detection, ether Server VAD or Semantic VAD. This can + /// be set to null to turn off, in which case the client must manually trigger model response. + /// + /// Server VAD means that the model will detect the start and end of speech + /// based on audio volume and respond at the end of user speech. + /// + /// Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) + /// to semantically estimate whether the user has finished speaking, then dynamically sets + /// a timeout based on this probability. For example, if user audio trails off with "uhhm", + /// the model will score a low probability of turn end and wait longer for the user to + /// continue speaking. This can be useful for more natural conversations, but may have a + /// higher latency. + pub turn_detection: RealtimeTurnDetection, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AudioOutput { + /// The format of the output audio. + pub format: RealtimeAudioFormats, + /// The speed of the model's spoken response as a multiple of the original speed. + /// 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + /// This value can only be changed in between model turns, not while a response + /// is in progress. + /// + /// This parameter is a post-processing adjustment to the audio after it is generated, + /// it's also possible to prompt the model to speak faster or slower. + pub speed: f32, + /// The voice the model uses to respond. Voice cannot be changed during the session once + /// the model has responded with audio at least once. Current voice options are + /// `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. + /// We recommend `marin` and `cedar` for best quality. + pub voice: RealtimeVoice, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Audio { + pub input: AudioInput, + pub output: AudioOutput, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum Tracing { + /// Enables tracing and sets default values for tracing configuration options. Always `auto`. + Auto, + + #[serde(untagged)] + Configuration(TracingConfiguration), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TracingConfiguration { + /// The group id to attach to this trace to enable filtering and grouping in the Traces Dashboard. + pub group_id: String, + /// The arbitrary metadata to attach to this trace to enable filtering in the Traces Dashboard. + pub metadata: serde_json::Value, + /// The name of the workflow to attach to this trace. This is used to name the trace in the Traces Dashboard. + pub workflow_name: String, +} + +/// The truncation strategy to use for the session. +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum RealtimeTruncation { + /// `auto` is the default truncation strategy. + Auto, + /// `disabled` will disable truncation and emit errors when the conversation exceeds the input + /// token limit. + Disabled, + + /// Retain a fraction of the conversation tokens when the conversation exceeds the input token + /// limit. This allows you to amortize truncations across multiple turns, which can help improve + /// cached token usage. + #[serde(untagged)] + RetentionRatio(RetentionRatioTruncation), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RetentionRatioTruncation { + /// Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the conversation + /// exceeds the input token limit. Setting this to 0.8 means that messages will be dropped + /// until 80% of the maximum allowed tokens are used. This helps reduce the frequency of + /// truncations and improve cache rates. + pub retention_ratio: f32, + + /// Use retention ratio truncation. + pub r#type: String, + + /// Optional custom token limits for this truncation strategy. If not provided, the model's + /// default token limits will be used. + #[serde(skip_serializing_if = "Option::is_none")] + pub token_limits: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TokenLimits { + /// Maximum tokens allowed in the conversation after instructions (which including tool + /// definitions). For example, setting this to 5,000 would mean that truncation would occur + /// when the conversation exceeds 5,000 tokens after instructions. This cannot be higher + /// than the model's context window size minus the maximum output tokens. + pub post_instructions: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum Session { + /// The type of session to create. Always `realtime` for the Realtime API. + #[serde(rename = "realtime")] + RealtimeSession(RealtimeSession), + /// The type of session to create. Always `transcription` for transcription sessions. + #[serde(rename = "transcription")] + RealtimeTranscriptionSession(RealtimeTranscriptionSession), +} + +/// Realtime session object configuration. +/// openapi spec type: RealtimeSessionCreateRequestGA +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeSession { + pub audio: Audio, + + /// Additional fields to include in server outputs. + /// + /// `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + #[serde(skip_serializing_if = "Option::is_none")] + pub include: Option>, + + /// The default system instructions (i.e. system message) prepended to model calls. + /// This field allows the client to guide the model on desired responses. + /// The model can be instructed on response content and format, + /// (e.g. "be extremely succinct", "act friendly", "here are examples of good responses") + /// and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", + /// "laugh frequently"). The instructions are not guaranteed to be followed by the model, but + /// they provide guidance to the model on the desired behavior. + /// + /// Note that the server sets default instructions which will be used if this field is not set + /// and are visible in the `session.created` event at the start of the session. + #[serde(skip_serializing_if = "Option::is_none")] + pub instructions: Option, + + /// Maximum number of output tokens for a single assistant response, + /// inclusive of tool calls. Provide an integer between 1 and 4096 to limit output tokens, + /// or `inf` for the maximum available tokens for a given model. Defaults to `inf`. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_output_tokens: Option, + + /// The Realtime model used for this session. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + + /// The set of modalities the model can respond with. It defaults to + /// `["audio"]`, indicating that the model will respond with audio plus a transcript. `["text"]` + /// can be used to make the model respond with text only. It is not possible to request both + /// `text` and `audio` at the same time. + #[serde(skip_serializing_if = "Option::is_none")] + pub output_modalities: Option>, + + /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt: Option, + + /// How the model chooses tools. Provide one of the string modes or force a specific + /// function/MCP tool. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_choice: Option, + + /// Tools available to the model. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + /// Realtime API can write session traces to the [Traces Dashboard](https://platform.openai.com/logs?api=traces). + /// Set to null to disable tracing. Once tracing is enabled for a session, the configuration cannot be modified. + /// + /// `auto` will create a trace for the session with default values for the workflow name, + /// group id, and metadata. + #[serde(skip_serializing_if = "Option::is_none")] + pub tracing: Option, + + /// When the number of tokens in a conversation exceeds the model's input token limit, + /// the conversation be truncated, meaning messages (starting from the oldest) will not be + /// included in the model's context. A 32k context model with 4,096 max output tokens can + /// only include 28,224 tokens in the context before truncation occurs. Clients can configure + /// truncation behavior to truncate with a lower max token limit, which is an effective way to + /// control token usage and cost. Truncation will reduce the number of cached tokens on the next + /// turn (busting the cache), since messages are dropped from the beginning of the context. + /// However, clients can also configure truncation to retain messages up to a fraction of the + /// maximum context size, which will reduce the need for future truncations and thus improve + /// the cache rate. Truncation can be disabled entirely, which means the server will never + /// truncate but would instead return an error if the conversation exceeds the model's input + /// token limit. + #[serde(skip_serializing_if = "Option::is_none")] + pub truncation: Option, +} + +/// Type of noise reduction. `near_field` is for close-talking microphones such as +/// headphones, `far_field` is for far-field microphones such as laptop or conference +/// room microphones. +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum NoiseReductionType { + NearField, + FarField, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptionAudio { + pub input: AudioInput, +} + +/// Realtime transcription session object configuration. +/// openapi spec type: RealtimeTranscriptionSessionCreateRequestGA +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeTranscriptionSession { + /// Configuration for input and output audio. + pub audio: TranscriptionAudio, + + /// Additional fields to include in server outputs. + /// + /// `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + #[serde(skip_serializing_if = "Option::is_none")] + pub include: Option>, +} diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs deleted file mode 100644 index 2fe1e5b1..00000000 --- a/async-openai/src/types/realtime/session_resource.rs +++ /dev/null @@ -1,176 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub enum AudioFormat { - #[serde(rename = "pcm16")] - PCM16, - #[serde(rename = "g711_law")] - G711ULAW, - #[serde(rename = "g711_alaw")] - G711ALAW, -} - -#[derive(Debug, Default, Serialize, Deserialize, Clone)] -pub struct AudioTranscription { - /// The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency. - #[serde(skip_serializing_if = "Option::is_none")] - pub language: Option, - /// The model to use for transcription, current options are gpt-4o-transcribe, gpt-4o-mini-transcribe, and whisper-1. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - /// An optional text to guide the model's style or continue a previous audio segment. - /// For whisper-1, the prompt is a list of keywords. For gpt-4o-transcribe models, - /// the prompt is a free text string, for example "expect words related to technology". - #[serde(skip_serializing_if = "Option::is_none")] - pub prompt: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum TurnDetection { - /// Type of turn detection, only "server_vad" is currently supported. - #[serde(rename = "server_vad")] - ServerVAD { - /// Activation threshold for VAD (0.0 to 1.0). - threshold: f32, - /// Amount of audio to include before speech starts (in milliseconds). - prefix_padding_ms: u32, - /// Duration of silence to detect speech stop (in milliseconds). - silence_duration_ms: u32, - - /// Whether or not to automatically generate a response when a VAD stop event occurs. - #[serde(skip_serializing_if = "Option::is_none")] - create_response: Option, - - /// Whether or not to automatically interrupt any ongoing response with output to - /// the default conversation (i.e. conversation of auto) when a VAD start event occurs. - #[serde(skip_serializing_if = "Option::is_none")] - interrupt_response: Option, - }, - - #[serde(rename = "semantic_vad")] - SemanticVAD { - /// The eagerness of the model to respond. - /// `low` will wait longer for the user to continue speaking, - /// `high`` will respond more quickly. `auto`` is the default and is equivalent to `medium` - eagerness: String, - - /// Whether or not to automatically generate a response when a VAD stop event occurs. - #[serde(skip_serializing_if = "Option::is_none", default)] - create_response: Option, - - /// Whether or not to automatically interrupt any ongoing response with output to - /// the default conversation (i.e. conversation of auto) when a VAD start event occurs. - #[serde(skip_serializing_if = "Option::is_none", default)] - interrupt_response: Option, - }, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub enum MaxResponseOutputTokens { - #[serde(rename = "inf")] - Inf, - #[serde(untagged)] - Num(u16), -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum ToolDefinition { - #[serde(rename = "function")] - Function { - /// The name of the function. - name: String, - /// The description of the function. - description: String, - /// Parameters of the function in JSON Schema. - parameters: serde_json::Value, - }, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum FunctionType { - Function, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum ToolChoice { - Auto, - None, - Required, - #[serde(untagged)] - Function { - r#type: FunctionType, - name: String, - }, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum RealtimeVoice { - Alloy, - Ash, - Ballad, - Coral, - Echo, - Fable, - Onyx, - Nova, - Shimmer, - Verse, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct SessionResource { - /// The default model used for this session. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - - /// The set of modalities the model can respond with. To disable audio, set this to ["text"]. - #[serde(skip_serializing_if = "Option::is_none")] - pub modalities: Option>, - - //// The default system instructions prepended to model calls. - #[serde(skip_serializing_if = "Option::is_none")] - pub instructions: Option, - - /// The voice the model uses to respond. Cannot be changed once the model has responded with audio at least once. - #[serde(skip_serializing_if = "Option::is_none")] - pub voice: Option, - - /// The format of input audio. Options are "pcm16", "g711_ulaw", or "g711_alaw". - #[serde(skip_serializing_if = "Option::is_none")] - pub input_audio_format: Option, - - /// The format of output audio. Options are "pcm16", "g711_ulaw", or "g711_alaw". - #[serde(skip_serializing_if = "Option::is_none")] - pub output_audio_format: Option, - - /// Configuration for input audio transcription. Can be set to null to turn off. - #[serde(skip_serializing_if = "Option::is_none")] - pub input_audio_transcription: Option, - - /// Configuration for turn detection. Can be set to null to turn off. - #[serde(skip_serializing_if = "Option::is_none")] - pub turn_detection: Option, - - /// Tools (functions) available to the model. - #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - /// How the model chooses tools. - pub tool_choice: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - /// Sampling temperature for the model. - pub temperature: Option, - - /// Maximum number of output tokens for a single assistant response, inclusive of tool calls. - /// Provide an integer between 1 and 4096 to limit output tokens, or "inf" for the maximum available tokens for a given model. - /// Defaults to "inf". - #[serde(skip_serializing_if = "Option::is_none")] - pub max_response_output_tokens: Option, -} diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs deleted file mode 100644 index abccf301..00000000 --- a/async-openai/src/types/responses.rs +++ /dev/null @@ -1,2189 +0,0 @@ -use crate::error::OpenAIError; -pub use crate::types::{ - CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort, - ResponseFormatJsonSchema, -}; -use derive_builder::Builder; -use futures::Stream; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::collections::HashMap; -use std::pin::Pin; - -/// Role of messages in the API. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum Role { - User, - Assistant, - System, - Developer, -} - -/// Status of input/output items. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum OutputStatus { - InProgress, - Completed, - Incomplete, -} - -/// Input payload: raw text or structured context items. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum Input { - /// A text input to the model, equivalent to a text input with the user role. - Text(String), - /// A list of one or many input items to the model, containing different content types. - Items(Vec), -} - -/// A context item: currently only messages. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged, rename_all = "snake_case")] -pub enum InputItem { - Message(InputMessage), - Custom(serde_json::Value), -} - -/// A message to prime the model. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "InputMessageArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct InputMessage { - #[serde(default, rename = "type")] - pub kind: InputMessageType, - /// The role of the message input. - pub role: Role, - /// Text, image, or audio input to the model, used to generate a response. Can also contain - /// previous assistant responses. - pub content: InputContent, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -#[serde(rename_all = "snake_case")] -pub enum InputMessageType { - #[default] - Message, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum InputContent { - /// A text input to the model. - TextInput(String), - /// A list of one or many input items to the model, containing different content types. - InputItemContentList(Vec), -} - -/// Parts of a message: text, image, file, or audio. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ContentType { - /// A text input to the model. - InputText(InputText), - /// An image input to the model. - InputImage(InputImage), - /// A file input to the model. - InputFile(InputFile), -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct InputText { - pub text: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "InputImageArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct InputImage { - /// The detail level of the image to be sent to the model. - detail: ImageDetail, - /// The ID of the file to be sent to the model. - #[serde(skip_serializing_if = "Option::is_none")] - file_id: Option, - /// The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image - /// in a data URL. - #[serde(skip_serializing_if = "Option::is_none")] - image_url: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "InputFileArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct InputFile { - /// The content of the file to be sent to the model. - #[serde(skip_serializing_if = "Option::is_none")] - file_data: Option, - /// The ID of the file to be sent to the model. - #[serde(skip_serializing_if = "Option::is_none")] - file_id: Option, - /// The name of the file to be sent to the model. - #[serde(skip_serializing_if = "Option::is_none")] - filename: Option, - /// The URL of the file to be sent to the model. - #[serde(skip_serializing_if = "Option::is_none")] - file_url: Option, -} - -/// Builder for a Responses API request. -#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)] -#[builder( - name = "CreateResponseArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateResponse { - /// Text, image, or file inputs to the model, used to generate a response. - pub input: Input, - - /// Model ID used to generate the response, like `gpt-4o`. - /// OpenAI offers a wide range of models with different capabilities, - /// performance characteristics, and price points. - pub model: String, - - /// Whether to run the model response in the background. - /// boolean or null. - #[serde(skip_serializing_if = "Option::is_none")] - pub background: Option, - - /// Specify additional output data to include in the model response. - /// - /// Supported values: - /// - `file_search_call.results` - /// Include the search results of the file search tool call. - /// - `message.input_image.image_url` - /// Include image URLs from the input message. - /// - `computer_call_output.output.image_url` - /// Include image URLs from the computer call output. - /// - `reasoning.encrypted_content` - /// Include an encrypted version of reasoning tokens in reasoning item outputs. - /// This enables reasoning items to be used in multi-turn conversations when - /// using the Responses API statelessly (for example, when the `store` parameter - /// is set to `false`, or when an organization is enrolled in the zero-data- - /// retention program). - /// - /// If `None`, no additional data is returned. - #[serde(skip_serializing_if = "Option::is_none")] - pub include: Option>, - - /// Inserts a system (or developer) message as the first item in the model's context. - /// - /// When using along with previous_response_id, the instructions from a previous response will - /// not be carried over to the next response. This makes it simple to swap out system - /// (or developer) messages in new responses. - #[serde(skip_serializing_if = "Option::is_none")] - pub instructions: Option, - - /// An upper bound for the number of tokens that can be generated for a - /// response, including visible output tokens and reasoning tokens. - #[serde(skip_serializing_if = "Option::is_none")] - pub max_output_tokens: Option, - - /// The maximum number of total calls to built-in tools that can be processed in a response. - /// This maximum number applies across all built-in tool calls, not per individual tool. - /// Any further attempts to call a tool by the model will be ignored. - pub max_tool_calls: Option, - - /// Set of 16 key-value pairs that can be attached to an object. This can be - /// useful for storing additional information about the object in a structured - /// format, and querying for objects via API or the dashboard. - /// - /// Keys are strings with a maximum length of 64 characters. Values are - /// strings with a maximum length of 512 characters. - #[serde(skip_serializing_if = "Option::is_none")] - pub metadata: Option>, - - /// Whether to allow the model to run tool calls in parallel. - #[serde(skip_serializing_if = "Option::is_none")] - pub parallel_tool_calls: Option, - - /// The unique ID of the previous response to the model. Use this to create - /// multi-turn conversations. - #[serde(skip_serializing_if = "Option::is_none")] - pub previous_response_id: Option, - - /// Reference to a prompt template and its variables. - #[serde(skip_serializing_if = "Option::is_none")] - pub prompt: Option, - - /// **o-series models only**: Configuration options for reasoning models. - #[serde(skip_serializing_if = "Option::is_none")] - pub reasoning: Option, - - /// Specifies the latency tier to use for processing the request. - /// - /// This parameter is relevant for customers subscribed to the Scale tier service. - /// - /// Supported values: - /// - `auto` - /// - If the Project is Scale tier enabled, the system will utilize Scale tier credits until - /// they are exhausted. - /// - If the Project is not Scale tier enabled, the request will be processed using the - /// default service tier with a lower uptime SLA and no latency guarantee. - /// - `default` - /// The request will be processed using the default service tier with a lower uptime SLA and - /// no latency guarantee. - /// - `flex` - /// The request will be processed with the Flex Processing service tier. Learn more. - /// - /// When not set, the default behavior is `auto`. - /// - /// When this parameter is set, the response body will include the `service_tier` utilized. - #[serde(skip_serializing_if = "Option::is_none")] - pub service_tier: Option, - - /// Whether to store the generated model response for later retrieval via API. - #[serde(skip_serializing_if = "Option::is_none")] - pub store: Option, - - /// If set to true, the model response data will be streamed to the client as it is - /// generated using server-sent events. - #[serde(skip_serializing_if = "Option::is_none")] - pub stream: Option, - - /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 - /// will make the output more random, while lower values like 0.2 will make it - /// more focused and deterministic. We generally recommend altering this or - /// `top_p` but not both. - #[serde(skip_serializing_if = "Option::is_none")] - pub temperature: Option, - - /// Configuration options for a text response from the model. Can be plain text - /// or structured JSON data. - #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, - - /// How the model should select which tool (or tools) to use when generating - /// a response. - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_choice: Option, - - /// An array of tools the model may call while generating a response. - /// Can include built-in tools (file_search, web_search_preview, - /// computer_use_preview) or custom function definitions. - #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, - - /// An integer between 0 and 20 specifying the number of most likely tokens to return - /// at each token position, each with an associated log probability. - #[serde(skip_serializing_if = "Option::is_none")] - pub top_logprobs: Option, // TODO add validation of range - - /// An alternative to sampling with temperature, called nucleus sampling, - /// where the model considers the results of the tokens with top_p probability - /// mass. So 0.1 means only the tokens comprising the top 10% probability mass - /// are considered. We generally recommend altering this or `temperature` but - /// not both. - #[serde(skip_serializing_if = "Option::is_none")] - pub top_p: Option, - - /// The truncation strategy to use for the model response: - /// - `auto`: drop items in the middle to fit context window. - /// - `disabled`: error if exceeding context window. - #[serde(skip_serializing_if = "Option::is_none")] - pub truncation: Option, - - /// A unique identifier representing your end-user, which can help OpenAI to - /// monitor and detect abuse. - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, -} - -/// Service tier request options. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct PromptConfig { - /// The unique identifier of the prompt template to use. - pub id: String, - - /// Optional version of the prompt template. - #[serde(skip_serializing_if = "Option::is_none")] - pub version: Option, - - /// Optional map of values to substitute in for variables in your prompt. The substitution - /// values can either be strings, or other Response input types like images or files. - /// For now only supporting Strings. - #[serde(skip_serializing_if = "Option::is_none")] - pub variables: Option>, -} - -/// Service tier request options. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ServiceTier { - Auto, - Default, - Flex, - Scale, - Priority, -} - -/// Truncation strategies. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum Truncation { - Auto, - Disabled, -} - -/// o-series reasoning settings. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "ReasoningConfigArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct ReasoningConfig { - /// Constrain effort on reasoning. - #[serde(skip_serializing_if = "Option::is_none")] - pub effort: Option, - /// Summary mode for reasoning. - #[serde(skip_serializing_if = "Option::is_none")] - pub summary: Option, -} - -/// o-series reasoning settings. -#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum Verbosity { - Low, - Medium, - High, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ReasoningSummary { - Auto, - Concise, - Detailed, -} - -/// Configuration for text response format. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct TextConfig { - /// Defines the format: plain text, JSON object, or JSON schema. - pub format: TextResponseFormat, - - #[serde(skip_serializing_if = "Option::is_none")] - pub verbosity: Option, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum TextResponseFormat { - /// The type of response format being defined: `text` - Text, - /// The type of response format being defined: `json_object` - JsonObject, - /// The type of response format being defined: `json_schema` - JsonSchema(ResponseFormatJsonSchema), -} - -/// Definitions for model-callable tools. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ToolDefinition { - /// File search tool. - FileSearch(FileSearch), - /// Custom function call. - Function(Function), - /// Web search preview tool. - WebSearchPreview(WebSearchPreview), - /// Virtual computer control tool. - ComputerUsePreview(ComputerUsePreview), - /// Remote Model Context Protocol server. - Mcp(Mcp), - /// Python code interpreter tool. - CodeInterpreter(CodeInterpreter), - /// Image generation tool. - ImageGeneration(ImageGeneration), - /// Local shell command execution tool. - LocalShell, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "FileSearchArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct FileSearch { - /// The IDs of the vector stores to search. - pub vector_store_ids: Vec, - /// The maximum number of results to return. This number should be between 1 and 50 inclusive. - #[serde(skip_serializing_if = "Option::is_none")] - pub max_num_results: Option, - /// A filter to apply. - #[serde(skip_serializing_if = "Option::is_none")] - pub filters: Option, - /// Ranking options for search. - #[serde(skip_serializing_if = "Option::is_none")] - pub ranking_options: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "FunctionArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -pub struct Function { - /// The name of the function to call. - pub name: String, - /// A JSON schema object describing the parameters of the function. - pub parameters: serde_json::Value, - /// Whether to enforce strict parameter validation. - pub strict: bool, - /// A description of the function. Used by the model to determine whether or not to call the - /// function. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "WebSearchPreviewArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -pub struct WebSearchPreview { - /// The user's location. - #[serde(skip_serializing_if = "Option::is_none")] - pub user_location: Option, - /// High level guidance for the amount of context window space to use for the search. - #[serde(skip_serializing_if = "Option::is_none")] - pub search_context_size: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] -pub enum WebSearchContextSize { - Low, - Medium, - High, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "ComputerUsePreviewArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -pub struct ComputerUsePreview { - /// The type of computer environment to control. - environment: String, - /// The width of the computer display. - display_width: u32, - /// The height of the computer display. - display_height: u32, -} - -/// Options for search result ranking. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct RankingOptions { - /// The ranker to use for the file search. - pub ranker: String, - /// The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will - /// attempt to return only the most relevant results, but may return fewer results. - #[serde(skip_serializing_if = "Option::is_none")] - pub score_threshold: Option, -} - -/// Filters for file search. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum Filter { - /// A filter used to compare a specified attribute key to a given value using a defined - /// comparison operation. - Comparison(ComparisonFilter), - /// Combine multiple filters using and or or. - Compound(CompoundFilter), -} - -/// Single comparison filter. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ComparisonFilter { - /// Specifies the comparison operator - #[serde(rename = "type")] - pub op: ComparisonType, - /// The key to compare against the value. - pub key: String, - /// The value to compare against the attribute key; supports string, number, or boolean types. - pub value: serde_json::Value, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -pub enum ComparisonType { - #[serde(rename = "eq")] - Equals, - #[serde(rename = "ne")] - NotEquals, - #[serde(rename = "gt")] - GreaterThan, - #[serde(rename = "gte")] - GreaterThanOrEqualTo, - #[serde(rename = "lt")] - LessThan, - #[serde(rename = "lte")] - LessThanOrEqualTo, -} - -/// Combine multiple filters. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CompoundFilter { - /// Type of operation - #[serde(rename = "type")] - pub op: CompoundType, - /// Array of filters to combine. Items can be ComparisonFilter or CompoundFilter. - pub filters: Vec, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum CompoundType { - And, - Or, -} - -/// Approximate user location for web search. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "LocationArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct Location { - /// The type of location approximation. Always approximate. - #[serde(rename = "type")] - pub kind: String, - /// Free text input for the city of the user, e.g. San Francisco. - #[serde(skip_serializing_if = "Option::is_none")] - pub city: Option, - /// The two-letter ISO country code of the user, e.g. US. - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option, - /// Free text input for the region of the user, e.g. California. - #[serde(skip_serializing_if = "Option::is_none")] - pub region: Option, - /// The IANA timezone of the user, e.g. America/Los_Angeles. - #[serde(skip_serializing_if = "Option::is_none")] - pub timezone: Option, -} - -/// MCP (Model Context Protocol) tool configuration. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "McpArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct Mcp { - /// A label for this MCP server. - pub server_label: String, - /// The URL for the MCP server. - pub server_url: String, - /// List of allowed tool names or filter object. - #[serde(skip_serializing_if = "Option::is_none")] - pub allowed_tools: Option, - /// Optional HTTP headers for the MCP server. - #[serde(skip_serializing_if = "Option::is_none")] - pub headers: Option, - /// Approval policy or filter for tools. - #[serde(skip_serializing_if = "Option::is_none")] - pub require_approval: Option, -} - -/// Allowed tools configuration for MCP. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum AllowedTools { - /// A flat list of allowed tool names. - List(Vec), - /// A filter object specifying allowed tools. - Filter(McpAllowedToolsFilter), -} - -/// Filter object for MCP allowed tools. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpAllowedToolsFilter { - /// Names of tools in the filter - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_names: Option>, -} - -/// Approval policy or filter for MCP tools. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum RequireApproval { - /// A blanket policy: "always" or "never". - Policy(RequireApprovalPolicy), - /// A filter object specifying which tools require approval. - Filter(McpApprovalFilter), -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum RequireApprovalPolicy { - Always, - Never, -} - -/// Filter object for MCP tool approval. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpApprovalFilter { - /// A list of tools that always require approval. - #[serde(skip_serializing_if = "Option::is_none")] - pub always: Option, - /// A list of tools that never require approval. - #[serde(skip_serializing_if = "Option::is_none")] - pub never: Option, -} - -/// Container configuration for a code interpreter. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum CodeInterpreterContainer { - /// A simple container ID. - Id(String), - /// Auto-configured container with optional files. - Container(CodeInterpreterContainerKind), -} - -/// Auto configuration for code interpreter container. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum CodeInterpreterContainerKind { - Auto { - /// Optional list of uploaded file IDs. - #[serde(skip_serializing_if = "Option::is_none")] - file_ids: Option>, - }, -} - -/// Code interpreter tool definition. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "CodeInterpreterArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CodeInterpreter { - /// Container configuration for running code. - pub container: CodeInterpreterContainer, -} - -/// Mask image input for image generation. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct InputImageMask { - /// Base64-encoded mask image. - #[serde(skip_serializing_if = "Option::is_none")] - pub image_url: Option, - /// File ID for the mask image. - #[serde(skip_serializing_if = "Option::is_none")] - pub file_id: Option, -} - -/// Image generation tool definition. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "ImageGenerationArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct ImageGeneration { - /// Background type: transparent, opaque, or auto. - #[serde(skip_serializing_if = "Option::is_none")] - pub background: Option, - /// Optional mask for inpainting. - #[serde(skip_serializing_if = "Option::is_none")] - pub input_image_mask: Option, - /// Model to use (default: gpt-image-1). - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - /// Moderation level (default: auto). - #[serde(skip_serializing_if = "Option::is_none")] - pub moderation: Option, - /// Compression level (0-100). - #[serde(skip_serializing_if = "Option::is_none")] - pub output_compression: Option, - /// Output format: png, webp, or jpeg. - #[serde(skip_serializing_if = "Option::is_none")] - pub output_format: Option, - /// Number of partial images (0-3). - #[serde(skip_serializing_if = "Option::is_none")] - pub partial_images: Option, - /// Quality: low, medium, high, or auto. - #[serde(skip_serializing_if = "Option::is_none")] - pub quality: Option, - /// Size: e.g. "1024x1024" or auto. - #[serde(skip_serializing_if = "Option::is_none")] - pub size: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageGenerationBackground { - Transparent, - Opaque, - Auto, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageGenerationOutputFormat { - Png, - Webp, - Jpeg, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageGenerationQuality { - Low, - Medium, - High, - Auto, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageGenerationSize { - Auto, - #[serde(rename = "1024x1024")] - Size1024x1024, - #[serde(rename = "1024x1536")] - Size1024x1536, - #[serde(rename = "1536x1024")] - Size1536x1024, -} - -/// Control how the model picks or is forced to pick a tool. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum ToolChoice { - /// Controls which (if any) tool is called by the model. - Mode(ToolChoiceMode), - /// Indicates that the model should use a built-in tool to generate a response. - Hosted { - /// The type of hosted tool the model should to use. - #[serde(rename = "type")] - kind: HostedToolType, - }, - /// Use this option to force the model to call a specific function. - Function { - /// The name of the function to call. - name: String, - }, -} - -/// Simple tool-choice modes. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ToolChoiceMode { - /// The model will not call any tool and instead generates a message. - None, - /// The model can pick between generating a message or calling one or more tools. - Auto, - /// The model must call one or more tools. - Required, -} - -/// Hosted tool type identifiers. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum HostedToolType { - FileSearch, - WebSearchPreview, - ComputerUsePreview, -} - -/// Error returned by the API when a request fails. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ErrorObject { - /// The error code for the response. - pub code: String, - /// A human-readable description of the error. - pub message: String, -} - -/// Details about an incomplete response. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct IncompleteDetails { - /// The reason why the response is incomplete. - pub reason: String, -} - -/// A simple text output from the model. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct OutputText { - /// The annotations of the text output. - pub annotations: Vec, - /// The text output from the model. - pub text: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum Annotation { - /// A citation to a file. - FileCitation(FileCitation), - /// A citation for a web resource used to generate a model response. - UrlCitation(UrlCitation), - /// A path to a file. - FilePath(FilePath), -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FileCitation { - /// The ID of the file. - file_id: String, - /// The index of the file in the list of files. - index: u32, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct UrlCitation { - /// The index of the last character of the URL citation in the message. - end_index: u32, - /// The index of the first character of the URL citation in the message. - start_index: u32, - /// The title of the web resource. - title: String, - /// The URL of the web resource. - url: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FilePath { - /// The ID of the file. - file_id: String, - /// The index of the file in the list of files. - index: u32, -} - -/// A refusal explanation from the model. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct Refusal { - /// The refusal explanationfrom the model. - pub refusal: String, -} - -/// A message generated by the model. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct OutputMessage { - /// The content of the output message. - pub content: Vec, - /// The unique ID of the output message. - pub id: String, - /// The role of the output message. Always assistant. - pub role: Role, - /// The status of the message input. - pub status: OutputStatus, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum Content { - /// A text output from the model. - OutputText(OutputText), - /// A refusal from the model. - Refusal(Refusal), -} - -/// Nested content within an output message. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum OutputContent { - /// An output message from the model. - Message(OutputMessage), - /// The results of a file search tool call. - FileSearchCall(FileSearchCallOutput), - /// A tool call to run a function. - FunctionCall(FunctionCall), - /// The results of a web search tool call. - WebSearchCall(WebSearchCallOutput), - /// A tool call to a computer use tool. - ComputerCall(ComputerCallOutput), - /// A description of the chain of thought used by a reasoning model while generating a response. - /// Be sure to include these items in your input to the Responses API for subsequent turns of a - /// conversation if you are manually managing context. - Reasoning(ReasoningItem), - /// Image generation tool call output. - ImageGenerationCall(ImageGenerationCallOutput), - /// Code interpreter tool call output. - CodeInterpreterCall(CodeInterpreterCallOutput), - /// Local shell tool call output. - LocalShellCall(LocalShellCallOutput), - /// MCP tool invocation output. - McpCall(McpCallOutput), - /// MCP list-tools output. - McpListTools(McpListToolsOutput), - /// MCP approval request output. - McpApprovalRequest(McpApprovalRequestOutput), -} - -/// A reasoning item representing the model's chain of thought, including summary paragraphs. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ReasoningItem { - /// Unique identifier of the reasoning content. - pub id: String, - /// The summarized chain-of-thought paragraphs. - pub summary: Vec, - /// The encrypted content of the reasoning item - populated when a response is generated with - /// `reasoning.encrypted_content` in the `include` parameter. - #[serde(skip_serializing_if = "Option::is_none")] - pub encrypted_content: Option, - /// The status of the reasoning item. - #[serde(skip_serializing_if = "Option::is_none")] - pub status: Option, -} - -/// A single summary text fragment from reasoning. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct SummaryText { - /// A short summary of the reasoning used by the model. - pub text: String, -} - -/// File search tool call output. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FileSearchCallOutput { - /// The unique ID of the file search tool call. - pub id: String, - /// The queries used to search for files. - pub queries: Vec, - /// The status of the file search tool call. - pub status: FileSearchCallOutputStatus, - /// The results of the file search tool call. - #[serde(skip_serializing_if = "Option::is_none")] - pub results: Option>, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum FileSearchCallOutputStatus { - InProgress, - Searching, - Incomplete, - Failed, - Completed, -} - -/// A single result from a file search. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FileSearchResult { - /// The unique ID of the file. - pub file_id: String, - /// The name of the file. - pub filename: String, - /// The relevance score of the file - a value between 0 and 1. - pub score: f32, - /// The text that was retrieved from the file. - pub text: String, - /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing - /// additional information about the object in a structured format, and querying for objects - /// API or the dashboard. Keys are strings with a maximum length of 64 characters - /// . Values are strings with a maximum length of 512 characters, booleans, or numbers. - pub attributes: HashMap, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct SafetyCheck { - /// The ID of the safety check. - pub id: String, - /// The type/code of the pending safety check. - pub code: String, - /// Details about the pending safety check. - pub message: String, -} - -/// Web search tool call output. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct WebSearchCallOutput { - /// The unique ID of the web search tool call. - pub id: String, - /// The status of the web search tool call. - pub status: String, -} - -/// Output from a computer tool call. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ComputerCallOutput { - pub action: ComputerCallAction, - /// An identifier used when responding to the tool call with output. - pub call_id: String, - /// The unique ID of the computer call. - pub id: String, - /// The pending safety checks for the computer call. - pub pending_safety_checks: Vec, - /// The status of the item. - pub status: OutputStatus, -} - -/// A point in 2D space. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Point { - pub x: i32, - pub y: i32, -} - -/// Represents all user‐triggered actions. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ComputerCallAction { - /// A click action. - Click(Click), - - /// A double-click action. - DoubleClick(DoubleClick), - - /// A drag action. - Drag(Drag), - - /// A keypress action. - KeyPress(KeyPress), - - /// A mouse move action. - Move(MoveAction), - - /// A screenshot action. - Screenshot, - - /// A scroll action. - Scroll(Scroll), - - /// A type (text entry) action. - Type(TypeAction), - - /// A wait (no-op) action. - Wait, -} - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ButtonPress { - Left, - Right, - Wheel, - Back, - Forward, -} - -/// A click action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Click { - /// Which mouse button was pressed. - pub button: ButtonPress, - /// X‐coordinate of the click. - pub x: i32, - /// Y‐coordinate of the click. - pub y: i32, -} - -/// A double click action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct DoubleClick { - /// X‐coordinate of the double click. - pub x: i32, - /// Y‐coordinate of the double click. - pub y: i32, -} - -/// A drag action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Drag { - /// The path of points the cursor drags through. - pub path: Vec, - /// X‐coordinate at the end of the drag. - pub x: i32, - /// Y‐coordinate at the end of the drag. - pub y: i32, -} - -/// A keypress action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct KeyPress { - /// The list of keys to press (e.g. `["Control", "C"]`). - pub keys: Vec, -} - -/// A mouse move action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MoveAction { - /// X‐coordinate to move to. - pub x: i32, - /// Y‐coordinate to move to. - pub y: i32, -} - -/// A scroll action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Scroll { - /// Horizontal scroll distance. - pub scroll_x: i32, - /// Vertical scroll distance. - pub scroll_y: i32, - /// X‐coordinate where the scroll began. - pub x: i32, - /// Y‐coordinate where the scroll began. - pub y: i32, -} - -/// A typing (text entry) action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct TypeAction { - /// The text to type. - pub text: String, -} - -/// Metadata for a function call request. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FunctionCall { - /// The unique ID of the function tool call. - pub id: String, - /// The unique ID of the function tool call generated by the model. - pub call_id: String, - /// The name of the function to run. - pub name: String, - /// A JSON string of the arguments to pass to the function. - pub arguments: String, - /// The status of the item. - pub status: OutputStatus, -} - -/// Output of an image generation request. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ImageGenerationCallOutput { - /// Unique ID of the image generation call. - pub id: String, - /// Base64-encoded generated image, or null. - pub result: Option, - /// Status of the image generation call. - pub status: String, -} - -/// Output of a code interpreter request. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CodeInterpreterCallOutput { - /// The code that was executed. - #[serde(skip_serializing_if = "Option::is_none")] - pub code: Option, - /// Unique ID of the call. - pub id: String, - /// Status of the tool call. - pub status: String, - /// ID of the container used to run the code. - pub container_id: String, - /// The outputs of the execution: logs or files. - #[serde(skip_serializing_if = "Option::is_none")] - pub outputs: Option>, -} - -/// Individual result from a code interpreter: either logs or files. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum CodeInterpreterResult { - /// Text logs from the execution. - Logs(CodeInterpreterTextOutput), - /// File outputs from the execution. - Files(CodeInterpreterFileOutput), -} - -/// The output containing execution logs. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CodeInterpreterTextOutput { - /// The logs of the code interpreter tool call. - pub logs: String, -} - -/// The output containing file references. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CodeInterpreterFileOutput { - /// List of file IDs produced. - pub files: Vec, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CodeInterpreterFile { - /// The ID of the file. - file_id: String, - /// The MIME type of the file. - mime_type: String, -} - -/// Output of a local shell command request. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct LocalShellCallOutput { - /// Details of the exec action. - pub action: LocalShellAction, - /// Unique call identifier for responding to the tool call. - pub call_id: String, - /// Unique ID of the local shell call. - pub id: String, - /// Status of the local shell call. - pub status: String, -} - -/// Define the shape of a local shell action (exec). -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct LocalShellAction { - /// The command to run. - pub command: Vec, - /// Environment variables to set for the command. - pub env: HashMap, - /// Optional timeout for the command (ms). - pub timeout_ms: Option, - /// Optional user to run the command as. - pub user: Option, - /// Optional working directory for the command. - pub working_directory: Option, -} - -/// Output of an MCP server tool invocation. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpCallOutput { - /// JSON string of the arguments passed. - pub arguments: String, - /// Unique ID of the MCP call. - pub id: String, - /// Name of the tool invoked. - pub name: String, - /// Label of the MCP server. - pub server_label: String, - /// Error message from the call, if any. - pub error: Option, - /// Output from the call, if any. - pub output: Option, -} - -/// Output listing tools available on an MCP server. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpListToolsOutput { - /// Unique ID of the list request. - pub id: String, - /// Label of the MCP server. - pub server_label: String, - /// Tools available on the server with metadata. - pub tools: Vec, - /// Error message if listing failed. - #[serde(skip_serializing_if = "Option::is_none")] - pub error: Option, -} - -/// Information about a single tool on an MCP server. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpToolInfo { - /// The name of the tool. - pub name: String, - /// The JSON schema describing the tool's input. - pub input_schema: Value, - /// Additional annotations about the tool. - #[serde(skip_serializing_if = "Option::is_none")] - pub annotations: Option, - /// The description of the tool. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, -} - -/// Output representing a human approval request for an MCP tool. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpApprovalRequestOutput { - /// JSON string of arguments for the tool. - pub arguments: String, - /// Unique ID of the approval request. - pub id: String, - /// Name of the tool requiring approval. - pub name: String, - /// Label of the MCP server making the request. - pub server_label: String, -} - -/// Usage statistics for a response. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct Usage { - /// The number of input tokens. - pub input_tokens: u32, - /// A detailed breakdown of the input tokens. - pub input_tokens_details: PromptTokensDetails, - /// The number of output tokens. - pub output_tokens: u32, - /// A detailed breakdown of the output tokens. - pub output_tokens_details: CompletionTokensDetails, - /// The total number of tokens used. - pub total_tokens: u32, -} - -/// The complete response returned by the Responses API. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct Response { - /// Unix timestamp (in seconds) when this Response was created. - pub created_at: u64, - - /// Error object if the API failed to generate a response. - #[serde(skip_serializing_if = "Option::is_none")] - pub error: Option, - - /// Unique identifier for this response. - pub id: String, - - /// Details about why the response is incomplete, if any. - #[serde(skip_serializing_if = "Option::is_none")] - pub incomplete_details: Option, - - /// Instructions that were inserted as the first item in context. - #[serde(skip_serializing_if = "Option::is_none")] - pub instructions: Option, - - /// The value of `max_output_tokens` that was honored. - #[serde(skip_serializing_if = "Option::is_none")] - pub max_output_tokens: Option, - - /// Metadata tags/values that were attached to this response. - #[serde(skip_serializing_if = "Option::is_none")] - pub metadata: Option>, - - /// Model ID used to generate the response. - pub model: String, - - /// The object type – always `response`. - pub object: String, - - /// The array of content items generated by the model. - pub output: Vec, - - /// SDK-only convenience property that contains the aggregated text output from all - /// `output_text` items in the `output` array, if any are present. - /// Supported in the Python and JavaScript SDKs. - #[serde(skip_serializing_if = "Option::is_none")] - pub output_text: Option, - - /// Whether parallel tool calls were enabled. - #[serde(skip_serializing_if = "Option::is_none")] - pub parallel_tool_calls: Option, - - /// Previous response ID, if creating part of a multi-turn conversation. - #[serde(skip_serializing_if = "Option::is_none")] - pub previous_response_id: Option, - - /// Reasoning configuration echoed back (effort, summary settings). - #[serde(skip_serializing_if = "Option::is_none")] - pub reasoning: Option, - - /// Whether to store the generated model response for later retrieval via API. - #[serde(skip_serializing_if = "Option::is_none")] - pub store: Option, - - /// The service tier that actually processed this response. - #[serde(skip_serializing_if = "Option::is_none")] - pub service_tier: Option, - - /// The status of the response generation. - pub status: Status, - - /// Sampling temperature that was used. - #[serde(skip_serializing_if = "Option::is_none")] - pub temperature: Option, - - /// Text format configuration echoed back (plain, json_object, json_schema). - #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, - - /// How the model chose or was forced to choose a tool. - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_choice: Option, - - /// Tool definitions that were provided. - #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, - - /// Nucleus sampling cutoff that was used. - #[serde(skip_serializing_if = "Option::is_none")] - pub top_p: Option, - - /// Truncation strategy that was applied. - #[serde(skip_serializing_if = "Option::is_none")] - pub truncation: Option, - - /// Token usage statistics for this request. - #[serde(skip_serializing_if = "Option::is_none")] - pub usage: Option, - - /// End-user ID for which this response was generated. - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum Status { - Completed, - Failed, - InProgress, - Incomplete, -} - -/// Event types for streaming responses from the Responses API -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type")] -#[non_exhaustive] // Future-proof against breaking changes -pub enum ResponseEvent { - /// Response creation started - #[serde(rename = "response.created")] - ResponseCreated(ResponseCreated), - /// Processing in progress - #[serde(rename = "response.in_progress")] - ResponseInProgress(ResponseInProgress), - /// Response completed (different from done) - #[serde(rename = "response.completed")] - ResponseCompleted(ResponseCompleted), - /// Response failed - #[serde(rename = "response.failed")] - ResponseFailed(ResponseFailed), - /// Response incomplete - #[serde(rename = "response.incomplete")] - ResponseIncomplete(ResponseIncomplete), - /// Response queued - #[serde(rename = "response.queued")] - ResponseQueued(ResponseQueued), - /// Output item added - #[serde(rename = "response.output_item.added")] - ResponseOutputItemAdded(ResponseOutputItemAdded), - /// Content part added - #[serde(rename = "response.content_part.added")] - ResponseContentPartAdded(ResponseContentPartAdded), - /// Text delta update - #[serde(rename = "response.output_text.delta")] - ResponseOutputTextDelta(ResponseOutputTextDelta), - /// Text output completed - #[serde(rename = "response.output_text.done")] - ResponseOutputTextDone(ResponseOutputTextDone), - /// Refusal delta update - #[serde(rename = "response.refusal.delta")] - ResponseRefusalDelta(ResponseRefusalDelta), - /// Refusal completed - #[serde(rename = "response.refusal.done")] - ResponseRefusalDone(ResponseRefusalDone), - /// Content part completed - #[serde(rename = "response.content_part.done")] - ResponseContentPartDone(ResponseContentPartDone), - /// Output item completed - #[serde(rename = "response.output_item.done")] - ResponseOutputItemDone(ResponseOutputItemDone), - /// Function call arguments delta - #[serde(rename = "response.function_call_arguments.delta")] - ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDelta), - /// Function call arguments completed - #[serde(rename = "response.function_call_arguments.done")] - ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDone), - /// File search call in progress - #[serde(rename = "response.file_search_call.in_progress")] - ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgress), - /// File search call searching - #[serde(rename = "response.file_search_call.searching")] - ResponseFileSearchCallSearching(ResponseFileSearchCallSearching), - /// File search call completed - #[serde(rename = "response.file_search_call.completed")] - ResponseFileSearchCallCompleted(ResponseFileSearchCallCompleted), - /// Web search call in progress - #[serde(rename = "response.web_search_call.in_progress")] - ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgress), - /// Web search call searching - #[serde(rename = "response.web_search_call.searching")] - ResponseWebSearchCallSearching(ResponseWebSearchCallSearching), - /// Web search call completed - #[serde(rename = "response.web_search_call.completed")] - ResponseWebSearchCallCompleted(ResponseWebSearchCallCompleted), - /// Reasoning summary part added - #[serde(rename = "response.reasoning_summary_part.added")] - ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAdded), - /// Reasoning summary part done - #[serde(rename = "response.reasoning_summary_part.done")] - ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDone), - /// Reasoning summary text delta - #[serde(rename = "response.reasoning_summary_text.delta")] - ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDelta), - /// Reasoning summary text done - #[serde(rename = "response.reasoning_summary_text.done")] - ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDone), - /// Reasoning summary delta - #[serde(rename = "response.reasoning_summary.delta")] - ResponseReasoningSummaryDelta(ResponseReasoningSummaryDelta), - /// Reasoning summary done - #[serde(rename = "response.reasoning_summary.done")] - ResponseReasoningSummaryDone(ResponseReasoningSummaryDone), - /// Image generation call in progress - #[serde(rename = "response.image_generation_call.in_progress")] - ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgress), - /// Image generation call generating - #[serde(rename = "response.image_generation_call.generating")] - ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGenerating), - /// Image generation call partial image - #[serde(rename = "response.image_generation_call.partial_image")] - ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImage), - /// Image generation call completed - #[serde(rename = "response.image_generation_call.completed")] - ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompleted), - /// MCP call arguments delta - #[serde(rename = "response.mcp_call_arguments.delta")] - ResponseMcpCallArgumentsDelta(ResponseMcpCallArgumentsDelta), - /// MCP call arguments done - #[serde(rename = "response.mcp_call_arguments.done")] - ResponseMcpCallArgumentsDone(ResponseMcpCallArgumentsDone), - /// MCP call completed - #[serde(rename = "response.mcp_call.completed")] - ResponseMcpCallCompleted(ResponseMcpCallCompleted), - /// MCP call failed - #[serde(rename = "response.mcp_call.failed")] - ResponseMcpCallFailed(ResponseMcpCallFailed), - /// MCP call in progress - #[serde(rename = "response.mcp_call.in_progress")] - ResponseMcpCallInProgress(ResponseMcpCallInProgress), - /// MCP list tools completed - #[serde(rename = "response.mcp_list_tools.completed")] - ResponseMcpListToolsCompleted(ResponseMcpListToolsCompleted), - /// MCP list tools failed - #[serde(rename = "response.mcp_list_tools.failed")] - ResponseMcpListToolsFailed(ResponseMcpListToolsFailed), - /// MCP list tools in progress - #[serde(rename = "response.mcp_list_tools.in_progress")] - ResponseMcpListToolsInProgress(ResponseMcpListToolsInProgress), - /// Code interpreter call in progress - #[serde(rename = "response.code_interpreter_call.in_progress")] - ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgress), - /// Code interpreter call interpreting - #[serde(rename = "response.code_interpreter_call.interpreting")] - ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpreting), - /// Code interpreter call completed - #[serde(rename = "response.code_interpreter_call.completed")] - ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompleted), - /// Code interpreter call code delta - #[serde(rename = "response.code_interpreter_call_code.delta")] - ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDelta), - /// Code interpreter call code done - #[serde(rename = "response.code_interpreter_call_code.done")] - ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDone), - /// Output text annotation added - #[serde(rename = "response.output_text.annotation.added")] - ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAdded), - /// Error occurred - #[serde(rename = "error")] - ResponseError(ResponseError), - - /// Unknown event type - #[serde(untagged)] - Unknown(serde_json::Value), -} - -/// Stream of response events -pub type ResponseStream = Pin> + Send>>; - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCreated { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseInProgress { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputItemAdded { - pub sequence_number: u64, - pub output_index: u32, - pub item: OutputItem, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseContentPartAdded { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub part: ContentPart, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputTextDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub delta: String, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub logprobs: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseContentPartDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub part: ContentPart, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputItemDone { - pub sequence_number: u64, - pub output_index: u32, - pub item: OutputItem, -} - -/// Response completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCompleted { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Response failed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFailed { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Response incomplete event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseIncomplete { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Response queued event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseQueued { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Text output completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputTextDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub text: String, - pub logprobs: Option>, -} - -/// Refusal delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseRefusalDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub delta: String, -} - -/// Refusal done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseRefusalDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub refusal: String, -} - -/// Function call arguments delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFunctionCallArgumentsDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub delta: String, -} - -/// Function call arguments done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFunctionCallArgumentsDone { - pub name: String, - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub arguments: String, -} - -/// Error event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseError { - pub sequence_number: u64, - pub code: Option, - pub message: String, - pub param: Option, -} - -/// File search call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFileSearchCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// File search call searching event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFileSearchCallSearching { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// File search call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFileSearchCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Web search call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseWebSearchCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Web search call searching event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseWebSearchCallSearching { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Web search call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseWebSearchCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Reasoning summary part added event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryPartAdded { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub part: serde_json::Value, // Could be more specific but using Value for flexibility -} - -/// Reasoning summary part done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryPartDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub part: serde_json::Value, -} - -/// Reasoning summary text delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryTextDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub delta: String, -} - -/// Reasoning summary text done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryTextDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub text: String, -} - -/// Reasoning summary delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub delta: serde_json::Value, -} - -/// Reasoning summary done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub text: String, -} - -/// Image generation call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Image generation call generating event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallGenerating { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Image generation call partial image event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallPartialImage { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub partial_image_index: u32, - pub partial_image_b64: String, -} - -/// Image generation call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP call arguments delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallArgumentsDelta { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub delta: String, -} - -/// MCP call arguments done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallArgumentsDone { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub arguments: String, -} - -/// MCP call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP call failed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallFailed { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP list tools completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpListToolsCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP list tools failed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpListToolsFailed { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP list tools in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpListToolsInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call interpreting event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallInterpreting { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call code delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallCodeDelta { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub delta: String, -} - -/// Code interpreter call code done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallCodeDone { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub code: String, -} - -/// Response metadata -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMetadata { - pub id: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub object: Option, - pub created_at: u64, - pub status: Status, - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub usage: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub error: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub incomplete_details: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub input: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub instructions: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub max_output_tokens: Option, - /// Whether the model was run in background mode - #[serde(skip_serializing_if = "Option::is_none")] - pub background: Option, - /// The service tier that was actually used - #[serde(skip_serializing_if = "Option::is_none")] - pub service_tier: Option, - /// The effective value of top_logprobs parameter - #[serde(skip_serializing_if = "Option::is_none")] - pub top_logprobs: Option, - /// The effective value of max_tool_calls parameter - #[serde(skip_serializing_if = "Option::is_none")] - pub max_tool_calls: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub output: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub parallel_tool_calls: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub previous_response_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub reasoning: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub store: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub temperature: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_choice: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub top_p: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub truncation: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub metadata: Option>, - /// Prompt cache key for improved performance - #[serde(skip_serializing_if = "Option::is_none")] - pub prompt_cache_key: Option, - /// Safety identifier for content filtering - #[serde(skip_serializing_if = "Option::is_none")] - pub safety_identifier: Option, -} - -/// Output item -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type")] -#[serde(rename_all = "snake_case")] -#[non_exhaustive] -pub enum OutputItem { - Message(OutputMessage), - FileSearchCall(FileSearchCallOutput), - FunctionCall(FunctionCall), - WebSearchCall(WebSearchCallOutput), - ComputerCall(ComputerCallOutput), - Reasoning(ReasoningItem), - ImageGenerationCall(ImageGenerationCallOutput), - CodeInterpreterCall(CodeInterpreterCallOutput), - LocalShellCall(LocalShellCallOutput), - McpCall(McpCallOutput), - McpListTools(McpListToolsOutput), - McpApprovalRequest(McpApprovalRequestOutput), - CustomToolCall(CustomToolCallOutput), -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct CustomToolCallOutput { - pub call_id: String, - pub input: String, - pub name: String, - pub id: String, -} - -/// Content part -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ContentPart { - #[serde(rename = "type")] - pub part_type: String, - pub text: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub annotations: Option>, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub logprobs: Option>, -} - -// ===== RESPONSE COLLECTOR ===== - -/// Collects streaming response events into a complete response - -/// Output text annotation added event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputTextAnnotationAdded { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub annotation_index: u32, - pub annotation: TextAnnotation, -} - -/// Text annotation object for output text -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct TextAnnotation { - #[serde(rename = "type")] - pub annotation_type: String, - pub text: String, - pub start: u32, - pub end: u32, -} diff --git a/async-openai/src/types/responses/mod.rs b/async-openai/src/types/responses/mod.rs new file mode 100644 index 00000000..8d2635c2 --- /dev/null +++ b/async-openai/src/types/responses/mod.rs @@ -0,0 +1,5 @@ +mod response; +mod response_stream; + +pub use response::*; +pub use response_stream::*; diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs new file mode 100644 index 00000000..43e3e84e --- /dev/null +++ b/async-openai/src/types/responses/response.rs @@ -0,0 +1,2554 @@ +use crate::error::OpenAIError; +pub use crate::types::{ + CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort, + ResponseFormatJsonSchema, +}; +use crate::types::{MCPListToolsTool, MCPTool}; +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Role of messages in the API. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum Role { + User, + Assistant, + System, + Developer, +} + +/// Status of input/output items. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum OutputStatus { + InProgress, + Completed, + Incomplete, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum InputParam { + /// A text input to the model, equivalent to a text input with the + /// `user` role. + Text(String), + /// A list of one or many input items to the model, containing + /// different content types. + Items(Vec), +} + +impl Default for InputParam { + fn default() -> Self { + Self::Text(String::new()) + } +} + +/// Content item used to generate a response. +/// +/// This is a properly discriminated union based on the `type` field, using Rust's +/// type-safe enum with serde's tag attribute for efficient deserialization. +/// +/// # OpenAPI Specification +/// Corresponds to the `Item` schema in the OpenAPI spec with a `type` discriminator. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum Item { + /// A message (type: "message"). + /// Can represent InputMessage (user/system/developer) or OutputMessage (assistant). + /// + /// InputMessage: + /// A message input to the model with a role indicating instruction following hierarchy. + /// Instructions given with the developer or system role take precedence over instructions given with the user role. + /// OutputMessage: + /// A message output from the model. + Message(MessageItem), + + /// The results of a file search tool call. See the + /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search) for more information. + FileSearchCall(FileSearchToolCall), + + /// A tool call to a computer use tool. See the + /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) for more information. + ComputerCall(ComputerToolCall), + + /// The output of a computer tool call. + ComputerCallOutput(ComputerCallOutputItemParam), + + /// The results of a web search tool call. See the + /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search) for more information. + WebSearchCall(WebSearchToolCall), + + /// A tool call to run a function. See the + /// + /// [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more information. + FunctionCall(FunctionToolCall), + + /// The output of a function tool call. + FunctionCallOutput(FunctionCallOutputItemParam), + + /// A description of the chain of thought used by a reasoning model while generating + /// a response. Be sure to include these items in your `input` to the Responses API + /// for subsequent turns of a conversation if you are manually + /// [managing context](https://platform.openai.com/docs/guides/conversation-state). + Reasoning(ReasoningItem), + + /// An image generation request made by the model. + ImageGenerationCall(ImageGenToolCall), + + /// A tool call to run code. + CodeInterpreterCall(CodeInterpreterToolCall), + + /// A tool call to run a command on the local shell. + LocalShellCall(LocalShellToolCall), + + /// The output of a local shell tool call. + LocalShellCallOutput(LocalShellToolCallOutput), + + /// A list of tools available on an MCP server. + McpListTools(MCPListTools), + + /// A request for human approval of a tool invocation. + McpApprovalRequest(MCPApprovalRequest), + + /// A response to an MCP approval request. + McpApprovalResponse(MCPApprovalResponse), + + /// An invocation of a tool on an MCP server. + McpCall(MCPToolCall), + + /// The output of a custom tool call from your code, being sent back to the model. + CustomToolCallOutput(CustomToolCallOutput), + + /// A call to a custom tool created by the model. + CustomToolCall(CustomToolCall), +} + +/// Input item that can be used in the context for generating a response. +/// +/// This represents the OpenAPI `InputItem` schema which is an `anyOf`: +/// 1. `EasyInputMessage` - Simple, user-friendly message input (can use string content) +/// 2. `Item` - Structured items with proper type discrimination (including InputMessage, OutputMessage, tool calls) +/// 3. `ItemReferenceParam` - Reference to an existing item by ID (type can be null) +/// +/// Uses untagged deserialization because these types overlap in structure. +/// Order matters: more specific structures are tried first. +/// +/// # OpenAPI Specification +/// Corresponds to the `InputItem` schema: `anyOf[EasyInputMessage, Item, ItemReferenceParam]` +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum InputItem { + /// A reference to an existing item by ID. + /// Has a required `id` field and optional `type` (can be "item_reference" or null). + /// Must be tried first as it's the most minimal structure. + ItemReference(ItemReference), + + /// All structured items with proper type discrimination. + /// Includes InputMessage, OutputMessage, and all tool calls/outputs. + /// Uses the discriminated `Item` enum for efficient, type-safe deserialization. + Item(Item), + + /// A simple, user-friendly message input (EasyInputMessage). + /// Supports string content and can include assistant role for previous responses. + /// Must be tried last as it's the most flexible structure. + /// + /// A message input to the model with a role indicating instruction following + /// hierarchy. Instructions given with the `developer` or `system` role take + /// precedence over instructions given with the `user` role. Messages with the + /// `assistant` role are presumed to have been generated by the model in previous + /// interactions. + EasyMessage(EasyInputMessage), +} + +impl InputItem { + /// Creates an InputItem from an item reference ID. + pub fn from_reference(id: impl Into) -> Self { + Self::ItemReference(ItemReference::new(id)) + } + + /// Creates an InputItem from a structured Item. + pub fn from_item(item: Item) -> Self { + Self::Item(item) + } + + /// Creates an InputItem from an EasyInputMessage. + pub fn from_easy_message(message: EasyInputMessage) -> Self { + Self::EasyMessage(message) + } + + /// Creates a simple text message with the given role and content. + pub fn text_message(role: Role, content: impl Into) -> Self { + Self::EasyMessage(EasyInputMessage { + r#type: MessageType::Message, + role, + content: EasyInputContent::Text(content.into()), + }) + } +} + +/// A message item used within the `Item` enum. +/// +/// Both InputMessage and OutputMessage have `type: "message"`, so we use an untagged +/// enum to distinguish them based on their structure: +/// - OutputMessage: role=assistant, required id & status fields +/// - InputMessage: role=user/system/developer, content is Vec, optional id/status +/// +/// Note: EasyInputMessage is NOT included here - it's a separate variant in `InputItem`, +/// not part of the structured `Item` enum. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum MessageItem { + /// An output message from the model (role: assistant, has required id & status). + /// This must come first as it has the most specific structure (required id and status fields). + Output(OutputMessage), + + /// A structured input message (role: user/system/developer, content is Vec). + /// Has structured content list and optional id/status fields. + /// + /// A message input to the model with a role indicating instruction following hierarchy. + /// Instructions given with the `developer` or `system` role take precedence over instructions + /// given with the `user` role. + Input(InputMessage), +} + +/// A reference to an existing item by ID. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ItemReference { + /// The type of item to reference. Can be "item_reference" or null. + #[serde(skip_serializing_if = "Option::is_none")] + pub r#type: Option, + /// The ID of the item to reference. + pub id: String, +} + +impl ItemReference { + /// Create a new item reference with the given ID. + pub fn new(id: impl Into) -> Self { + Self { + r#type: Some(ItemReferenceType::ItemReference), + id: id.into(), + } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ItemReferenceType { + ItemReference, +} + +/// Output from a function call that you're providing back to the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FunctionCallOutputItemParam { + /// The unique ID of the function tool call generated by the model. + pub call_id: String, + /// Text, image, or file output of the function tool call. + pub output: FunctionCallOutput, + /// The unique ID of the function tool call output. + /// Populated when this item is returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum FunctionCallOutput { + /// A JSON string of the output of the function tool call. + Text(String), + Content(Vec), // TODO use shape which allows null from OpenAPI spec? +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ComputerCallOutputItemParam { + /// The ID of the computer tool call that produced the output. + pub call_id: String, + /// A computer screenshot image used with the computer use tool. + pub output: ComputerScreenshotImage, + /// The safety checks reported by the API that have been acknowledged by the developer. + #[serde(skip_serializing_if = "Option::is_none")] + pub acknowledged_safety_checks: Option>, + /// The unique ID of the computer tool call output. Optional when creating. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + /// The status of the message input. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when input items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, // TODO rename OutputStatus? +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ComputerScreenshotImageType { + ComputerScreenshot, +} + +/// A computer screenshot image used with the computer use tool. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ComputerScreenshotImage { + /// Specifies the event type. For a computer screenshot, this property is always + /// set to `computer_screenshot`. + pub r#type: ComputerScreenshotImageType, + /// The identifier of an uploaded file that contains the screenshot. + #[serde(skip_serializing_if = "Option::is_none")] + pub file_id: Option, + /// The URL of the screenshot image. + #[serde(skip_serializing_if = "Option::is_none")] + pub image_url: Option, +} + +/// Output from a local shell tool call that you're providing back to the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LocalShellToolCallOutput { + /// The unique ID of the local shell tool call generated by the model. + pub id: String, + + /// A JSON string of the output of the local shell tool call. + pub output: String, + + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +/// Output from a local shell command execution. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LocalShellOutput { + /// The stdout output from the command. + #[serde(skip_serializing_if = "Option::is_none")] + pub stdout: Option, + + /// The stderr output from the command. + #[serde(skip_serializing_if = "Option::is_none")] + pub stderr: Option, + + /// The exit code of the command. + #[serde(skip_serializing_if = "Option::is_none")] + pub exit_code: Option, +} + +/// An MCP approval response that you're providing back to the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPApprovalResponse { + /// The ID of the approval request being answered. + pub approval_request_id: String, + + /// Whether the request was approved. + pub approve: bool, + + /// The unique ID of the approval response + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + + /// Optional reason for the decision. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum CustomToolCallOutputOutput { + /// A string of the output of the custom tool call. + Text(String), + /// Text, image, or file output of the custom tool call. + List(Vec), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CustomToolCallOutput { + /// The call ID, used to map this custom tool call output to a custom tool call. + pub call_id: String, + + /// The output from the custom tool call generated by your code. + /// Can be a string or an list of output content. + pub output: CustomToolCallOutputOutput, + + /// The unique ID of the custom tool call output in the OpenAI platform. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, +} + +/// A simplified message input to the model (EasyInputMessage in the OpenAPI spec). +/// +/// This is the most user-friendly way to provide messages, supporting both simple +/// string content and structured content. Role can include `assistant` for providing +/// previous assistant responses. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "EasyInputMessageArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct EasyInputMessage { + /// The type of the message input. Always set to `message`. + pub r#type: MessageType, + /// The role of the message input. One of `user`, `assistant`, `system`, or `developer`. + pub role: Role, + /// Text, image, or audio input to the model, used to generate a response. + /// Can also contain previous assistant responses. + pub content: EasyInputContent, +} + +/// A structured message input to the model (InputMessage in the OpenAPI spec). +/// +/// This variant requires structured content (not a simple string) and does not support +/// the `assistant` role (use OutputMessage for that). Used when items are returned via API +/// with additional metadata. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "InputMessageArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct InputMessage { + /// A list of one or many input items to the model, containing different content types. + pub content: Vec, + /// The role of the message input. One of `user`, `system`, or `developer`. + /// Note: `assistant` is NOT allowed here; use OutputMessage instead. + pub role: InputRole, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, + /////The type of the message input. Always set to `message`. + //pub r#type: MessageType, +} + +/// The role for an input message - can only be `user`, `system`, or `developer`. +/// This type ensures type safety by excluding the `assistant` role (use OutputMessage for that). +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum InputRole { + #[default] + User, + System, + Developer, +} + +/// Content for EasyInputMessage - can be a simple string or structured list. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum EasyInputContent { + /// A text input to the model. + Text(String), + /// A list of one or many input items to the model, containing different content types. + ContentList(Vec), +} + +/// Parts of a message: text, image, file, or audio. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum InputContent { + /// A text input to the model. + InputText(InputTextContent), + /// An image input to the model. Learn about + /// [image inputs](https://platform.openai.com/docs/guides/vision). + InputImage(InputImageContent), + /// A file input to the model. + InputFile(InputFileContent), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct InputTextContent { + /// The text input to the model. + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "InputImageArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct InputImageContent { + /// The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. + /// Defaults to `auto`. + detail: ImageDetail, + /// The ID of the file to be sent to the model. + #[serde(skip_serializing_if = "Option::is_none")] + file_id: Option, + /// The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image + /// in a data URL. + #[serde(skip_serializing_if = "Option::is_none")] + image_url: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "InputFileArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct InputFileContent { + /// The content of the file to be sent to the model. + #[serde(skip_serializing_if = "Option::is_none")] + file_data: Option, + /// The ID of the file to be sent to the model. + #[serde(skip_serializing_if = "Option::is_none")] + file_id: Option, + /// The URL of the file to be sent to the model. + #[serde(skip_serializing_if = "Option::is_none")] + file_url: Option, + /// The name of the file to be sent to the model. + #[serde(skip_serializing_if = "Option::is_none")] + filename: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Conversation { + /// The unique ID of the conversation. + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum ConversationParam { + /// The unique ID of the conversation. + ConversationID(String), + /// The conversation that this response belongs to. + Object(Conversation), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub enum IncludeEnum { + #[serde(rename = "file_search_call.results")] + FileSearchCallResults, + #[serde(rename = "web_search_call.results")] + WebSearchCallResults, + #[serde(rename = "web_search_call.action.sources")] + WebSearchCallActionSources, + #[serde(rename = "message.input_image.image_url")] + MessageInputImageImageUrl, + #[serde(rename = "computer_call_output.output.image_url")] + ComputerCallOutputOutputImageUrl, + #[serde(rename = "code_interpreter_call.outputs")] + CodeInterpreterCallOutputs, + #[serde(rename = "reasoning.encrypted_content")] + ReasoningEncryptedContent, + #[serde(rename = "message.output_text.logprobs")] + MessageOutputTextLogprobs, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseStreamOptions { + /// When true, stream obfuscation will be enabled. Stream obfuscation adds + /// random characters to an `obfuscation` field on streaming delta events to + /// normalize payload sizes as a mitigation to certain side-channel attacks. + /// These obfuscation fields are included by default, but add a small amount + /// of overhead to the data stream. You can set `include_obfuscation` to + /// false to optimize for bandwidth if you trust the network links between + /// your application and the OpenAI API. + #[serde(skip_serializing_if = "Option::is_none")] + pub include_obfuscation: Option, +} + +/// Builder for a Responses API request. +#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)] +#[builder( + name = "CreateResponseArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateResponse { + /// Whether to run the model response in the background. + /// [Learn more](https://platform.openai.com/docs/guides/background). + #[serde(skip_serializing_if = "Option::is_none")] + pub background: Option, + + /// The conversation that this response belongs to. Items from this conversation are prepended to + /// `input_items` for this response request. + /// + /// Input items and output items from this response are automatically added to this conversation after + /// this response completes. + #[serde(skip_serializing_if = "Option::is_none")] + pub conversation: Option, + + /// Specify additional output data to include in the model response. Currently supported + /// values are: + /// + /// - `web_search_call.action.sources`: Include the sources of the web search tool call. + /// + /// - `code_interpreter_call.outputs`: Includes the outputs of python code execution in code + /// interpreter tool call items. + /// + /// - `computer_call_output.output.image_url`: Include image urls from the computer call + /// output. + /// + /// - `file_search_call.results`: Include the search results of the file search tool call. + /// + /// - `message.input_image.image_url`: Include image urls from the input message. + /// + /// - `message.output_text.logprobs`: Include logprobs with assistant messages. + /// + /// - `reasoning.encrypted_content`: Includes an encrypted version of reasoning tokens in + /// reasoning item outputs. This enables reasoning items to be used in multi-turn + /// conversations when using the Responses API statelessly (like when the `store` parameter is + /// set to `false`, or when an organization is enrolled in the zero data retention program). + #[serde(skip_serializing_if = "Option::is_none")] + pub include: Option>, + + /// Text, image, or file inputs to the model, used to generate a response. + /// + /// Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Image inputs](https://platform.openai.com/docs/guides/images) + /// - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + /// - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + /// - [Function calling](https://platform.openai.com/docs/guides/function-calling) + pub input: InputParam, + + /// A system (or developer) message inserted into the model's context. + /// + /// When using along with `previous_response_id`, the instructions from a previous + /// response will not be carried over to the next response. This makes it simple + /// to swap out system (or developer) messages in new responses. + #[serde(skip_serializing_if = "Option::is_none")] + pub instructions: Option, + + /// An upper bound for the number of tokens that can be generated for a response, including + /// visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_output_tokens: Option, + + /// The maximum number of total calls to built-in tools that can be processed in a response. This + /// maximum number applies across all built-in tool calls, not per individual tool. Any further + /// attempts to call a tool by the model will be ignored. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_tool_calls: Option, + + /// Set of 16 key-value pairs that can be attached to an object. This can be + /// useful for storing additional information about the object in a structured + /// format, and querying for objects via API or the dashboard. + /// + /// Keys are strings with a maximum length of 64 characters. Values are + /// strings with a maximum length of 512 characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option>, + + /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI + /// offers a wide range of models with different capabilities, performance + /// characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) + /// to browse and compare available models. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + + /// Whether to allow the model to run tool calls in parallel. + #[serde(skip_serializing_if = "Option::is_none")] + pub parallel_tool_calls: Option, + + /// The unique ID of the previous response to the model. Use this to create multi-turn conversations. + /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state). + /// Cannot be used in conjunction with `conversation`. + #[serde(skip_serializing_if = "Option::is_none")] + pub previous_response_id: Option, + + /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt: Option, + + /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces + /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching). + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt_cache_key: Option, + + /// **gpt-5 and o-series models only** + /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, + + /// A stable identifier used to help detect users of your application that may be violating OpenAI's + /// usage policies. + /// + /// The IDs should be a string that uniquely identifies each user. We recommend hashing their username + /// or email address, in order to avoid sending us any identifying information. [Learn + /// more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers). + #[serde(skip_serializing_if = "Option::is_none")] + pub safety_identifier: Option, + + /// Specifies the processing type used for serving the request. + /// - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'. + /// - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model. + /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier. + /// - When not set, the default behavior is 'auto'. + /// + /// When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter. + #[serde(skip_serializing_if = "Option::is_none")] + pub service_tier: Option, + + /// Whether to store the generated model response for later retrieval via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub store: Option, + + /// If set to true, the model response data will be streamed to the client + /// as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + /// See the [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + /// for more information. + #[serde(skip_serializing_if = "Option::is_none")] + pub stream: Option, + + /// Options for streaming responses. Only set this when you set `stream: true`. + #[serde(skip_serializing_if = "Option::is_none")] + pub stream_options: Option, + + /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 + /// will make the output more random, while lower values like 0.2 will make it + /// more focused and deterministic. We generally recommend altering this or + /// `top_p` but not both. + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, + + /// Configuration options for a text response from the model. Can be plain + /// text or structured JSON data. Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + #[serde(skip_serializing_if = "Option::is_none")] + pub text: Option, + + /// How the model should select which tool (or tools) to use when generating + /// a response. See the `tools` parameter to see how to specify which tools + /// the model can call. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_choice: Option, + + /// An array of tools the model may call while generating a response. You + /// can specify which tool to use by setting the `tool_choice` parameter. + /// + /// We support the following categories of tools: + /// - **Built-in tools**: Tools that are provided by OpenAI that extend the + /// model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search) + /// or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about + /// [built-in tools](https://platform.openai.com/docs/guides/tools). + /// - **MCP Tools**: Integrations with third-party systems via custom MCP servers + /// or predefined connectors such as Google Drive and SharePoint. Learn more about + /// [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp). + /// - **Function calls (custom tools)**: Functions that are defined by you, + /// enabling the model to call your own code with strongly typed arguments + /// and outputs. Learn more about + /// [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use + /// custom tools to call your own code. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + /// An integer between 0 and 20 specifying the number of most likely tokens to return at each + /// token position, each with an associated log probability. + #[serde(skip_serializing_if = "Option::is_none")] + pub top_logprobs: Option, + + /// An alternative to sampling with temperature, called nucleus sampling, + /// where the model considers the results of the tokens with top_p probability + /// mass. So 0.1 means only the tokens comprising the top 10% probability mass + /// are considered. + /// + /// We generally recommend altering this or `temperature` but not both. + #[serde(skip_serializing_if = "Option::is_none")] + pub top_p: Option, + + ///The truncation strategy to use for the model response. + /// - `auto`: If the input to this Response exceeds + /// the model's context window size, the model will truncate the + /// response to fit the context window by dropping items from the beginning of the conversation. + /// - `disabled` (default): If the input size will exceed the context window + /// size for a model, the request will fail with a 400 error. + #[serde(skip_serializing_if = "Option::is_none")] + pub truncation: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum ResponsePromptVariables { + String(String), + Content(InputContent), + Custom(serde_json::Value), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Prompt { + /// The unique identifier of the prompt template to use. + pub id: String, + + /// Optional version of the prompt template. + #[serde(skip_serializing_if = "Option::is_none")] + pub version: Option, + + /// Optional map of values to substitute in for variables in your + /// prompt. The substitution values can either be strings, or other + /// Response input types like images or files. + #[serde(skip_serializing_if = "Option::is_none")] + pub variables: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ServiceTier { + #[default] + Auto, + Default, + Flex, + Scale, + Priority, +} + +/// Truncation strategies. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum Truncation { + Auto, + Disabled, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Billing { + pub payer: String, +} + +/// o-series reasoning settings. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "ReasoningArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct Reasoning { + /// Constrains effort on reasoning for + /// [reasoning models](https://platform.openai.com/docs/guides/reasoning). + /// Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing + /// reasoning effort can result in faster responses and fewer tokens used + /// on reasoning in a response. + /// + /// Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + #[serde(skip_serializing_if = "Option::is_none")] + pub effort: Option, + /// A summary of the reasoning performed by the model. This can be + /// useful for debugging and understanding the model's reasoning process. + /// One of `auto`, `concise`, or `detailed`. + /// + /// `concise` is only supported for `computer-use-preview` models. + #[serde(skip_serializing_if = "Option::is_none")] + pub summary: Option, +} + +/// o-series reasoning settings. +#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum Verbosity { + Low, + Medium, + High, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ReasoningSummary { + Auto, + Concise, + Detailed, +} + +/// Configuration for text response format. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseTextParam { + /// An object specifying the format that the model must output. + /// + /// Configuring `{ "type": "json_schema" }` enables Structured Outputs, + /// which ensures the model will match your supplied JSON schema. Learn more in the + /// [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + /// + /// The default format is `{ "type": "text" }` with no additional options. + /// + /// **Not recommended for gpt-4o and newer models:** + /// + /// Setting to `{ "type": "json_object" }` enables the older JSON mode, which + /// ensures the message the model generates is valid JSON. Using `json_schema` + /// is preferred for models that support it. + pub format: TextResponseFormatConfiguration, + + /// Constrains the verbosity of the model's response. Lower values will result in + /// more concise responses, while higher values will result in more verbose responses. + /// + /// Currently supported values are `low`, `medium`, and `high`. + #[serde(skip_serializing_if = "Option::is_none")] + pub verbosity: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum TextResponseFormatConfiguration { + /// Default response format. Used to generate text responses. + Text, + /// JSON object response format. An older method of generating JSON responses. + /// Using `json_schema` is recommended for models that support it. + /// Note that the model will not generate JSON without a system or user message + /// instructing it to do so. + JsonObject, + /// JSON Schema response format. Used to generate structured JSON responses. + /// Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs). + JsonSchema(ResponseFormatJsonSchema), +} + +/// Definitions for model-callable tools. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum Tool { + /// Defines a function in your own code the model can choose to call. Learn more about [function + /// calling](https://platform.openai.com/docs/guides/tools). + Function(FunctionTool), + /// A tool that searches for relevant content from uploaded files. Learn more about the [file search + /// tool](https://platform.openai.com/docs/guides/tools-file-search). + FileSearch(FileSearchTool), + /// A tool that controls a virtual computer. Learn more about the [computer + /// use tool](https://platform.openai.com/docs/guides/tools-computer-use). + ComputerUsePreview(ComputerUsePreviewTool), + /// Search the Internet for sources related to the prompt. Learn more about the + /// [web search tool](https://platform.openai.com/docs/guides/tools-web-search). + WebSearch(WebSearchTool), + /// type: web_search_2025_08_26 + #[serde(rename = "web_search_2025_08_26")] + WebSearch20250826(WebSearchTool), + /// Give the model access to additional tools via remote Model Context Protocol + /// (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + Mcp(MCPTool), + /// A tool that runs Python code to help generate a response to a prompt. + CodeInterpreter(CodeInterpreterTool), + /// A tool that generates images using a model like `gpt-image-1`. + ImageGeneration(ImageGenTool), + /// A tool that allows the model to execute shell commands in a local environment. + LocalShell, + /// A custom tool that processes input using a specified format. Learn more about [custom + /// tools](https://platform.openai.com/docs/guides/function-calling#custom-tools) + Custom(CustomToolParam), + /// This tool searches the web for relevant results to use in a response. Learn more about the [web search + ///tool](https://platform.openai.com/docs/guides/tools-web-search). + WebSearchPreview(WebSearchTool), + /// type: web_search_preview_2025_03_11 + #[serde(rename = "web_search_preview_2025_03_11")] + WebSearchPreview20250311(WebSearchTool), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +pub struct CustomToolParam { + /// The name of the custom tool, used to identify it in tool calls. + pub name: String, + /// Optional description of the custom tool, used to provide more context. + pub description: Option, + /// The input format for the custom tool. Default is unconstrained text. + pub format: CustomToolParamFormat, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum GrammarSyntax { + Lark, + #[default] + Regex, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +pub struct CustomGrammarFormatParam { + /// The grammar definition. + pub definition: String, + /// The syntax of the grammar definition. One of `lark` or `regex`. + pub syntax: GrammarSyntax, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum CustomToolParamFormat { + /// Unconstrained free-form text. + #[default] + Text, + /// A grammar defined by the user. + Grammar(CustomGrammarFormatParam), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "FileSearchToolArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct FileSearchTool { + /// The IDs of the vector stores to search. + pub vector_store_ids: Vec, + /// The maximum number of results to return. This number should be between 1 and 50 inclusive. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_num_results: Option, + /// A filter to apply. + #[serde(skip_serializing_if = "Option::is_none")] + pub filters: Option, + /// Ranking options for search. + #[serde(skip_serializing_if = "Option::is_none")] + pub ranking_options: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "FunctionToolArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +pub struct FunctionTool { + /// The name of the function to call. + pub name: String, + /// A JSON schema object describing the parameters of the function. + #[serde(skip_serializing_if = "Option::is_none")] + pub parameters: Option, + /// Whether to enforce strict parameter validation. Default `true`. + #[serde(skip_serializing_if = "Option::is_none")] + pub strict: Option, + /// A description of the function. Used by the model to determine whether or not to call the + /// function. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchToolFilters { + /// Allowed domains for the search. If not provided, all domains are allowed. + /// Subdomains of the provided domains are allowed as well. + /// + /// Example: `["pubmed.ncbi.nlm.nih.gov"]` + #[serde(skip_serializing_if = "Option::is_none")] + pub allowed_domains: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "WebSearchToolArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +pub struct WebSearchTool { + /// Filters for the search. + #[serde(skip_serializing_if = "Option::is_none")] + pub filters: Option, + /// The approximate location of the user. + #[serde(skip_serializing_if = "Option::is_none")] + pub user_location: Option, + /// High level guidance for the amount of context window space to use for the search. One of `low`, + /// `medium`, or `high`. `medium` is the default. + #[serde(skip_serializing_if = "Option::is_none")] + pub search_context_size: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum WebSearchToolSearchContextSize { + Low, + #[default] + Medium, + High, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ComputerEnvironment { + Windows, + Mac, + Linux, + Ubuntu, + #[default] + Browser, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "ComputerUsePreviewToolArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +pub struct ComputerUsePreviewTool { + /// The type of computer environment to control. + environment: ComputerEnvironment, + /// The width of the computer display. + display_width: u32, + /// The height of the computer display. + display_height: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +pub enum RankVersionType { + #[serde(rename = "auto")] + Auto, + #[serde(rename = "default-2024-11-15")] + Default20241115, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct HybridSearch { + /// The weight of the embedding in the reciprocal ranking fusion. + pub embedding_weight: f32, + /// The weight of the text in the reciprocal ranking fusion. + pub text_weight: f32, +} + +/// Options for search result ranking. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RankingOptions { + /// Weights that control how reciprocal rank fusion balances semantic embedding matches versus + /// sparse keyword matches when hybrid search is enabled. + #[serde(skip_serializing_if = "Option::is_none")] + pub hybrid_search: Option, + /// The ranker to use for the file search. + pub ranker: RankVersionType, + /// The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will + /// attempt to return only the most relevant results, but may return fewer results. + #[serde(skip_serializing_if = "Option::is_none")] + pub score_threshold: Option, +} + +/// Filters for file search. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum Filter { + /// A filter used to compare a specified attribute key to a given value using a defined + /// comparison operation. + Comparison(ComparisonFilter), + /// Combine multiple filters using `and` or `or`. + Compound(CompoundFilter), +} + +/// Single comparison filter. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ComparisonFilter { + /// Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`, `nin`. + /// - `eq`: equals + /// - `ne`: not equal + /// - `gt`: greater than + /// - `gte`: greater than or equal + /// - `lt`: less than + /// - `lte`: less than or equal + /// - `in`: in + /// - `nin`: not in + pub r#type: ComparisonType, + /// The key to compare against the value. + pub key: String, + /// The value to compare against the attribute key; supports string, number, or boolean types. + pub value: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +pub enum ComparisonType { + #[serde(rename = "eq")] + Equals, + #[serde(rename = "ne")] + NotEquals, + #[serde(rename = "gt")] + GreaterThan, + #[serde(rename = "gte")] + GreaterThanOrEqual, + #[serde(rename = "lt")] + LessThan, + #[serde(rename = "lte")] + LessThanOrEqual, + #[serde(rename = "in")] + In, + #[serde(rename = "nin")] + NotIn, +} + +/// Combine multiple filters using `and` or `or`. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CompoundFilter { + /// 'Type of operation: `and` or `or`.' + pub r#type: CompoundType, + /// Array of filters to combine. Items can be ComparisonFilter or CompoundFilter. + pub filters: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum CompoundType { + And, + Or, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum WebSearchApproximateLocationType { + #[default] + Approximate, +} + +/// Approximate user location for web search. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "WebSearchApproximateLocationArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct WebSearchApproximateLocation { + /// The type of location approximation. Always `approximate`. + pub r#type: WebSearchApproximateLocationType, + /// Free text input for the city of the user, e.g. `San Francisco`. + #[serde(skip_serializing_if = "Option::is_none")] + pub city: Option, + /// The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, + /// e.g. `US`. + #[serde(skip_serializing_if = "Option::is_none")] + pub country: Option, + /// Free text input for the region of the user, e.g. `California`. + #[serde(skip_serializing_if = "Option::is_none")] + pub region: Option, + /// The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g. + /// `America/Los_Angeles`. + #[serde(skip_serializing_if = "Option::is_none")] + pub timezone: Option, +} + +/// Container configuration for a code interpreter. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CodeInterpreterToolContainer { + /// Configuration for a code interpreter container. Optionally specify the IDs of the + /// files to run the code on. + Auto(CodeInterpreterContainerAuto), + + /// The container ID. + #[serde(untagged)] + ContainerID(String), +} + +impl Default for CodeInterpreterToolContainer { + fn default() -> Self { + Self::Auto(CodeInterpreterContainerAuto::default()) + } +} + +/// Auto configuration for code interpreter container. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +pub struct CodeInterpreterContainerAuto { + /// An optional list of uploaded files to make available to your code. + #[serde(skip_serializing_if = "Option::is_none")] + pub file_ids: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub memory_limit: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "CodeInterpreterToolArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CodeInterpreterTool { + /// The code interpreter container. Can be a container ID or an object that + /// specifies uploaded file IDs to make available to your code. + pub container: CodeInterpreterToolContainer, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ImageGenToolInputImageMask { + /// Base64-encoded mask image. + #[serde(skip_serializing_if = "Option::is_none")] + pub image_url: Option, + /// File ID for the mask image. + #[serde(skip_serializing_if = "Option::is_none")] + pub file_id: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum InputFidelity { + #[default] + High, + Low, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ImageGenToolModeration { + #[default] + Auto, + Low, +} + +/// Image generation tool definition. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "ImageGenerationArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct ImageGenTool { + /// Background type for the generated image. One of `transparent`, + /// `opaque`, or `auto`. Default: `auto`. + #[serde(skip_serializing_if = "Option::is_none")] + pub background: Option, + /// Control how much effort the model will exert to match the style and features, especially facial features, + /// of input images. This parameter is only supported for `gpt-image-1`. Unsupported + /// for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + #[serde(skip_serializing_if = "Option::is_none")] + pub input_fidelity: Option, + /// Optional mask for inpainting. Contains `image_url` + /// (string, optional) and `file_id` (string, optional). + #[serde(skip_serializing_if = "Option::is_none")] + pub input_image_mask: Option, + /// The image generation model to use. Default: `gpt-image-1`. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + /// Moderation level for the generated image. Default: `auto`. + #[serde(skip_serializing_if = "Option::is_none")] + pub moderation: Option, + /// Compression level for the output image. Default: 100. + #[serde(skip_serializing_if = "Option::is_none")] + pub output_compression: Option, + /// The output format of the generated image. One of `png`, `webp`, or + /// `jpeg`. Default: `png`. + #[serde(skip_serializing_if = "Option::is_none")] + pub output_format: Option, + /// Number of partial images to generate in streaming mode, from 0 (default value) to 3. + #[serde(skip_serializing_if = "Option::is_none")] + pub partial_images: Option, + /// The quality of the generated image. One of `low`, `medium`, `high`, + /// or `auto`. Default: `auto`. + #[serde(skip_serializing_if = "Option::is_none")] + pub quality: Option, + /// The size of the generated image. One of `1024x1024`, `1024x1536`, + /// `1536x1024`, or `auto`. Default: `auto`. + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ImageGenToolBackground { + Transparent, + Opaque, + #[default] + Auto, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ImageGenToolOutputFormat { + #[default] + Png, + Webp, + Jpeg, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ImageGenToolQuality { + Low, + Medium, + High, + #[default] + Auto, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ImageGenToolSize { + #[default] + Auto, + #[serde(rename = "1024x1024")] + Size1024x1024, + #[serde(rename = "1024x1536")] + Size1024x1536, + #[serde(rename = "1536x1024")] + Size1536x1024, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ToolChoiceAllowedMode { + Auto, + Required, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceAllowed { + /// Constrains the tools available to the model to a pre-defined set. + /// + /// `auto` allows the model to pick from among the allowed tools and generate a + /// message. + /// + /// `required` requires the model to call one or more of the allowed tools. + mode: ToolChoiceAllowedMode, + /// A list of tool definitions that the model should be allowed to call. + /// + /// For the Responses API, the list of tool definitions might look like: + /// ```json + /// [ + /// { "type": "function", "name": "get_weather" }, + /// { "type": "mcp", "server_label": "deepwiki" }, + /// { "type": "image_generation" } + /// ] + /// ``` + tools: Vec, +} + +/// The type of hosted tool the model should to use. Learn more about +/// [built-in tools](https://platform.openai.com/docs/guides/tools). +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ToolChoiceTypes { + FileSearch, + WebSearchPreview, + ComputerUsePreview, + CodeInterpreter, + ImageGeneration, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceFunction { + /// The name of the function to call. + name: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceMCP { + /// The name of the tool to call on the server. + name: String, + /// The label of the MCP server to use. + server_label: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceCustom { + /// The name of the custom tool to call. + name: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ToolChoiceParam { + /// Constrains the tools available to the model to a pre-defined set. + AllowedTools(ToolChoiceAllowed), + + /// Use this option to force the model to call a specific function. + Function(ToolChoiceFunction), + + /// Use this option to force the model to call a specific tool on a remote MCP server. + Mcp(ToolChoiceMCP), + + /// Use this option to force the model to call a custom tool. + Custom(ToolChoiceCustom), + + /// Indicates that the model should use a built-in tool to generate a response. + /// [Learn more about built-in tools](https://platform.openai.com/docs/guides/tools). + #[serde(untagged)] + Hosted(ToolChoiceTypes), + + /// Controls which (if any) tool is called by the model. + /// + /// `none` means the model will not call any tool and instead generates a message. + /// + /// `auto` means the model can pick between generating a message or calling one or + /// more tools. + /// + /// `required` means the model must call one or more tools. + #[serde(untagged)] + Mode(ToolChoiceOptions), +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ToolChoiceOptions { + None, + Auto, + Required, +} + +/// Error returned by the API when a request fails. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ErrorObject { + /// The error code for the response. + pub code: String, + /// A human-readable description of the error. + pub message: String, +} + +/// Details about an incomplete response. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct IncompleteDetails { + /// The reason why the response is incomplete. + pub reason: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct TopLogProb { + pub bytes: Vec, + pub logprob: f64, + pub token: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LogProb { + pub bytes: Vec, + pub logprob: f64, + pub token: String, + pub top_logprobs: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseTopLobProb { + /// The log probability of this token. + pub logprob: f64, + /// A possible text token. + pub token: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseLogProb { + /// The log probability of this token. + pub logprob: f64, + /// A possible text token. + pub token: String, + /// The log probability of the top 20 most likely tokens. + pub top_logprobs: Vec, +} + +/// A simple text output from the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct OutputTextContent { + /// The annotations of the text output. + pub annotations: Vec, + pub logprobs: Option>, + /// The text output from the model. + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum Annotation { + /// A citation to a file. + FileCitation(FileCitationBody), + /// A citation for a web resource used to generate a model response. + UrlCitation(UrlCitationBody), + /// A citation for a container file used to generate a model response. + ContainerFileCitation(ContainerFileCitationBody), + /// A path to a file. + FilePath(FilePath), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FileCitationBody { + /// The ID of the file. + file_id: String, + /// The filename of the file cited. + filename: String, + /// The index of the file in the list of files. + index: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct UrlCitationBody { + /// The index of the last character of the URL citation in the message. + end_index: u32, + /// The index of the first character of the URL citation in the message. + start_index: u32, + /// The title of the web resource. + title: String, + /// The URL of the web resource. + url: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ContainerFileCitationBody { + /// The ID of the container file. + container_id: String, + /// The index of the last character of the container file citation in the message. + end_index: u32, + /// The ID of the file. + file_id: String, + /// The filename of the container file cited. + filename: String, + /// The index of the first character of the container file citation in the message. + start_index: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FilePath { + /// The ID of the file. + file_id: String, + /// The index of the file in the list of files. + index: u32, +} + +/// A refusal explanation from the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RefusalContent { + /// The refusal explanation from the model. + pub refusal: String, +} + +/// A message generated by the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct OutputMessage { + /// The content of the output message. + pub content: Vec, + /// The unique ID of the output message. + pub id: String, + /// The role of the output message. Always `assistant`. + pub role: AssistantRole, + /// The status of the message input. One of `in_progress`, `completed`, or + /// `incomplete`. Populated when input items are returned via API. + pub status: OutputStatus, + ///// The type of the output message. Always `message`. + //pub r#type: MessageType, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum MessageType { + #[default] + Message, +} + +/// The role for an output message - always `assistant`. +/// This type ensures type safety by only allowing the assistant role. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum AssistantRole { + #[default] + Assistant, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum OutputMessageContent { + /// A text output from the model. + OutputText(OutputTextContent), + /// A refusal from the model. + Refusal(RefusalContent), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum OutputContent { + /// A text output from the model. + OutputText(OutputTextContent), + /// A refusal from the model. + Refusal(RefusalContent), + /// Reasoning text from the model. + ReasoningText(ReasoningTextContent), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ReasoningTextContent { + /// The reasoning text from the model. + pub text: String, +} + +/// A reasoning item representing the model's chain of thought, including summary paragraphs. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ReasoningItem { + /// Unique identifier of the reasoning content. + pub id: String, + /// Reasoning summary content. + pub summary: Vec, + /// Reasoning text content. + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option>, + /// The encrypted content of the reasoning item - populated when a response is generated with + /// `reasoning.encrypted_content` in the `include` parameter. + #[serde(skip_serializing_if = "Option::is_none")] + pub encrypted_content: Option, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +/// A single summary text fragment from reasoning. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Summary { + /// A summary of the reasoning output from the model so far. + pub text: String, +} + +/// File search tool call output. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FileSearchToolCall { + /// The unique ID of the file search tool call. + pub id: String, + /// The queries used to search for files. + pub queries: Vec, + /// The status of the file search tool call. One of `in_progress`, `searching`, + /// `incomplete`,`failed`, or `completed`. + pub status: FileSearchToolCallStatus, + /// The results of the file search tool call. + #[serde(skip_serializing_if = "Option::is_none")] + pub results: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum FileSearchToolCallStatus { + InProgress, + Searching, + Incomplete, + Failed, + Completed, +} + +/// A single result from a file search. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FileSearchToolCallResult { + /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing + /// additional information about the object in a structured format, and querying for objects + /// API or the dashboard. Keys are strings with a maximum length of 64 characters + /// . Values are strings with a maximum length of 512 characters, booleans, or numbers. + pub attributes: HashMap, + /// The unique ID of the file. + pub file_id: String, + /// The name of the file. + pub filename: String, + /// The relevance score of the file - a value between 0 and 1. + pub score: f32, + /// The text that was retrieved from the file. + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ComputerCallSafetyCheckParam { + /// The ID of the pending safety check. + pub id: String, + /// The type of the pending safety check. + #[serde(skip_serializing_if = "Option::is_none")] + pub code: Option, + /// Details about the pending safety check. + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum WebSearchToolCallStatus { + InProgress, + Searching, + Completed, + Failed, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionSearchSource { + /// The type of source. Always `url`. + pub r#type: String, + /// The URL of the source. + pub url: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionSearch { + /// The search query. + pub query: String, + /// The sources used in the search. + pub sources: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionOpenPage { + /// The URL opened by the model. + pub url: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionFind { + /// The URL of the page searched for the pattern. + pub url: String, + /// The pattern or text to search for within the page. + pub pattern: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum WebSearchToolCallAction { + /// Action type "search" - Performs a web search query. + Search(WebSearchActionSearch), + /// Action type "open_page" - Opens a specific URL from search results. + OpenPage(WebSearchActionOpenPage), + /// Action type "find": Searches for a pattern within a loaded page. + Find(WebSearchActionFind), +} + +/// Web search tool call output. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchToolCall { + /// An object describing the specific action taken in this web search call. Includes + /// details on how the model used the web (search, open_page, find). + pub action: WebSearchToolCallAction, + /// The unique ID of the web search tool call. + pub id: String, + /// The status of the web search tool call. + pub status: WebSearchToolCallStatus, +} + +/// Output from a computer tool call. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ComputerToolCall { + pub action: ComputerAction, + /// An identifier used when responding to the tool call with output. + pub call_id: String, + /// The unique ID of the computer call. + pub id: String, + /// The pending safety checks for the computer call. + pub pending_safety_checks: Vec, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + pub status: OutputStatus, +} + +/// A point in 2D space. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct DragPoint { + /// The x-coordinate. + pub x: i32, + /// The y-coordinate. + pub y: i32, +} + +/// Represents all user‐triggered actions. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ComputerAction { + /// A click action. + Click(ClickParam), + + /// A double click action. + DoubleClick(DoubleClickAction), + + /// A drag action. + Drag(Drag), + + /// A collection of keypresses the model would like to perform. + Keypress(KeyPressAction), + + /// A mouse move action. + Move(Move), + + /// A screenshot action. + Screenshot, + + /// A scroll action. + Scroll(Scroll), + + /// An action to type in text. + Type(Type), + + /// A wait action. + Wait, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum ClickButtonType { + Left, + Right, + Wheel, + Back, + Forward, +} + +/// A click action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ClickParam { + /// Indicates which mouse button was pressed during the click. One of `left`, + /// `right`, `wheel`, `back`, or `forward`. + pub button: ClickButtonType, + /// The x-coordinate where the click occurred. + pub x: i32, + /// The y-coordinate where the click occurred. + pub y: i32, +} + +/// A double click action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct DoubleClickAction { + /// The x-coordinate where the double click occurred. + pub x: i32, + /// The y-coordinate where the double click occurred. + pub y: i32, +} + +/// A drag action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Drag { + /// The path of points the cursor drags through. + pub path: Vec, +} + +/// A keypress action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KeyPressAction { + /// The combination of keys the model is requesting to be pressed. + /// This is an array of strings, each representing a key. + pub keys: Vec, +} + +/// A mouse move action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Move { + /// The x-coordinate to move to. + pub x: i32, + /// The y-coordinate to move to. + pub y: i32, +} + +/// A scroll action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Scroll { + /// The horizontal scroll distance. + pub scroll_x: i32, + /// The vertical scroll distance. + pub scroll_y: i32, + /// The x-coordinate where the scroll occurred. + pub x: i32, + /// The y-coordinate where the scroll occurred. + pub y: i32, +} + +/// A typing (text entry) action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Type { + /// The text to type. + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FunctionToolCall { + /// A JSON string of the arguments to pass to the function. + pub arguments: String, + /// The unique ID of the function tool call generated by the model. + pub call_id: String, + /// The name of the function to run. + pub name: String, + /// The unique ID of the function tool call. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, // TODO rename OutputStatus? +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ImageGenToolCallStatus { + InProgress, + Completed, + Generating, + Failed, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ImageGenToolCall { + /// The unique ID of the image generation call. + pub id: String, + /// The generated image encoded in base64. + pub result: Option, + /// The status of the image generation call. + pub status: ImageGenToolCallStatus, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum CodeInterpreterToolCallStatus { + InProgress, + Completed, + Incomplete, + Interpreting, + Failed, +} + +/// Output of a code interpreter request. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CodeInterpreterToolCall { + /// The code to run, or null if not available. + #[serde(skip_serializing_if = "Option::is_none")] + pub code: Option, + /// ID of the container used to run the code. + pub container_id: String, + /// The unique ID of the code interpreter tool call. + pub id: String, + /// The outputs generated by the code interpreter, such as logs or images. + /// Can be null if no outputs are available. + #[serde(skip_serializing_if = "Option::is_none")] + pub outputs: Option>, + /// The status of the code interpreter tool call. + /// Valid values are `in_progress`, `completed`, `incomplete`, `interpreting`, and `failed`. + pub status: CodeInterpreterToolCallStatus, +} + +/// Individual result from a code interpreter: either logs or files. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CodeInterpreterToolCallOutput { + /// Code interpreter output logs + Logs(CodeInterpreterOutputLogs), + /// Code interpreter output image + Image(CodeInterpreterOutputImage), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CodeInterpreterOutputLogs { + /// The logs output from the code interpreter. + pub logs: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CodeInterpreterOutputImage { + /// The URL of the image output from the code interpreter. + pub url: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CodeInterpreterFile { + /// The ID of the file. + file_id: String, + /// The MIME type of the file. + mime_type: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LocalShellToolCall { + /// Execute a shell command on the server. + pub action: LocalShellExecAction, + /// The unique ID of the local shell tool call generated by the model. + pub call_id: String, + /// The unique ID of the local shell call. + pub id: String, + /// The status of the local shell call. + pub status: OutputStatus, +} + +/// Define the shape of a local shell action (exec). +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LocalShellExecAction { + /// The command to run. + pub command: Vec, + /// Environment variables to set for the command. + pub env: HashMap, + /// Optional timeout in milliseconds for the command. + pub timeout_ms: Option, + /// Optional user to run the command as. + pub user: Option, + /// Optional working directory to run the command in. + pub working_directory: Option, +} + +/// Output of an MCP server tool invocation. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPToolCall { + /// A JSON string of the arguments passed to the tool. + pub arguments: String, + /// The unique ID of the tool call. + pub id: String, + /// The name of the tool that was run. + pub name: String, + /// The label of the MCP server running the tool. + pub server_label: String, + /// Unique identifier for the MCP tool call approval request. Include this value + /// in a subsequent `mcp_approval_response` input to approve or reject the corresponding + /// tool call. + pub approval_request_id: Option, + /// Error message from the call, if any. + pub error: Option, + /// The output from the tool call. + pub output: Option, + /// The status of the tool call. One of `in_progress`, `completed`, `incomplete`, + /// `calling`, or `failed`. + pub status: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum MCPToolCallStatus { + InProgress, + Completed, + Incomplete, + Calling, + Failed, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPListTools { + /// The unique ID of the list. + pub id: String, + /// The label of the MCP server. + pub server_label: String, + /// The tools available on the server. + pub tools: Vec, + /// Error message if listing failed. + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPApprovalRequest { + /// JSON string of arguments for the tool. + pub arguments: String, + /// The unique ID of the approval request. + pub id: String, + /// The name of the tool to run. + pub name: String, + /// The label of the MCP server making the request. + pub server_label: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct InputTokenDetails { + /// The number of tokens that were retrieved from the cache. + /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching). + pub cached_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct OutputTokenDetails { + /// The number of reasoning tokens. + pub reasoning_tokens: u32, +} + +/// Usage statistics for a response. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseUsage { + /// The number of input tokens. + pub input_tokens: u32, + /// A detailed breakdown of the input tokens. + pub input_tokens_details: InputTokenDetails, + /// The number of output tokens. + pub output_tokens: u32, + /// A detailed breakdown of the output tokens. + pub output_tokens_details: OutputTokenDetails, + /// The total number of tokens used. + pub total_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum Instructions { + /// A text input to the model, equivalent to a text input with the `developer` role. + Text(String), + /// A list of one or many input items to the model, containing different content types. + Array(Vec), +} + +/// The complete response returned by the Responses API. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Response { + /// Whether to run the model response in the background. + /// [Learn more](https://platform.openai.com/docs/guides/background). + #[serde(skip_serializing_if = "Option::is_none")] + pub background: Option, + + /// Billing information for the response. + #[serde(skip_serializing_if = "Option::is_none")] + pub billing: Option, + + /// The conversation that this response belongs to. Input items and output + /// items from this response are automatically added to this conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub conversation: Option, + + /// Unix timestamp (in seconds) when this Response was created. + pub created_at: u64, + + /// An error object returned when the model fails to generate a Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + + /// Unique identifier for this response. + pub id: String, + + /// Details about why the response is incomplete, if any. + #[serde(skip_serializing_if = "Option::is_none")] + pub incomplete_details: Option, + + /// A system (or developer) message inserted into the model's context. + /// + /// When using along with `previous_response_id`, the instructions from a previous response + /// will not be carried over to the next response. This makes it simple to swap out + /// system (or developer) messages in new responses. + #[serde(skip_serializing_if = "Option::is_none")] + pub instructions: Option, + + /// An upper bound for the number of tokens that can be generated for a response, + /// including visible output tokens and + /// [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_output_tokens: Option, + + /// Set of 16 key-value pairs that can be attached to an object. This can be + /// useful for storing additional information about the object in a structured + /// format, and querying for objects via API or the dashboard. + /// + /// Keys are strings with a maximum length of 64 characters. Values are strings + /// with a maximum length of 512 characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option>, + + /// Model ID used to generate the response, like gpt-4o or o3. OpenAI offers a + /// wide range of models with different capabilities, performance characteristics, + /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare available models. + pub model: String, + + /// The object type of this resource - always set to `response`. + pub object: String, + + /// An array of content items generated by the model. + /// + /// - The length and order of items in the output array is dependent on the model's response. + /// - Rather than accessing the first item in the output array and assuming it's an assistant + /// message with the content generated by the model, you might consider using + /// the `output_text` property where supported in SDKs. + pub output: Vec, + + /// SDK-only convenience property that contains the aggregated text output from all + /// `output_text` items in the `output` array, if any are present. + /// Supported in the Python and JavaScript SDKs. + // #[serde(skip_serializing_if = "Option::is_none")] + // pub output_text: Option, + + /// Whether to allow the model to run tool calls in parallel. + #[serde(skip_serializing_if = "Option::is_none")] + pub parallel_tool_calls: Option, + + /// The unique ID of the previous response to the model. Use this to create multi-turn conversations. + /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state). + /// Cannot be used in conjunction with `conversation`. + #[serde(skip_serializing_if = "Option::is_none")] + pub previous_response_id: Option, + + /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt: Option, + + /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces + /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching). + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt_cache_key: Option, + + /// **gpt-5 and o-series models only** + /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, + + /// A stable identifier used to help detect users of your application that may be violating OpenAI's + /// usage policies. + /// + /// The IDs should be a string that uniquely identifies each user. We recommend hashing their username + /// or email address, in order to avoid sending us any identifying information. [Learn + /// more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers). + #[serde(skip_serializing_if = "Option::is_none")] + pub safety_identifier: Option, + + /// Specifies the processing type used for serving the request. + /// - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'. + /// - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model. + /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier. + /// - When not set, the default behavior is 'auto'. + /// + /// When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter. + #[serde(skip_serializing_if = "Option::is_none")] + pub service_tier: Option, + + /// The status of the response generation. + /// One of `completed`, `failed`, `in_progress`, `cancelled`, `queued`, or `incomplete`. + pub status: Status, + + /// What sampling temperature was used, between 0 and 2. Higher values like 0.8 make + /// outputs more random, lower values like 0.2 make output more focused and deterministic. + /// + /// We generally recommend altering this or `top_p` but not both. + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, + + /// Configuration options for a text response from the model. Can be plain + /// text or structured JSON data. Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + #[serde(skip_serializing_if = "Option::is_none")] + pub text: Option, + + /// How the model should select which tool (or tools) to use when generating + /// a response. See the `tools` parameter to see how to specify which tools + /// the model can call. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_choice: Option, + + /// An array of tools the model may call while generating a response. You + /// can specify which tool to use by setting the `tool_choice` parameter. + /// + /// We support the following categories of tools: + /// - **Built-in tools**: Tools that are provided by OpenAI that extend the + /// model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search) + /// or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about + /// [built-in tools](https://platform.openai.com/docs/guides/tools). + /// - **MCP Tools**: Integrations with third-party systems via custom MCP servers + /// or predefined connectors such as Google Drive and SharePoint. Learn more about + /// [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp). + /// - **Function calls (custom tools)**: Functions that are defined by you, + /// enabling the model to call your own code with strongly typed arguments + /// and outputs. Learn more about + /// [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use + /// custom tools to call your own code. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + /// An integer between 0 and 20 specifying the number of most likely tokens to return at each + /// token position, each with an associated log probability. + #[serde(skip_serializing_if = "Option::is_none")] + pub top_logprobs: Option, + + /// An alternative to sampling with temperature, called nucleus sampling, + /// where the model considers the results of the tokens with top_p probability + /// mass. So 0.1 means only the tokens comprising the top 10% probability mass + /// are considered. + /// + /// We generally recommend altering this or `temperature` but not both. + #[serde(skip_serializing_if = "Option::is_none")] + pub top_p: Option, + + ///The truncation strategy to use for the model response. + /// - `auto`: If the input to this Response exceeds + /// the model's context window size, the model will truncate the + /// response to fit the context window by dropping items from the beginning of the conversation. + /// - `disabled` (default): If the input size will exceed the context window + /// size for a model, the request will fail with a 400 error. + #[serde(skip_serializing_if = "Option::is_none")] + pub truncation: Option, + + /// Represents token usage details including input tokens, output tokens, + /// a breakdown of output tokens, and the total tokens used. + #[serde(skip_serializing_if = "Option::is_none")] + pub usage: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum Status { + Completed, + Failed, + InProgress, + Cancelled, + Queued, + Incomplete, +} + +/// Output item +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type")] +#[serde(rename_all = "snake_case")] +pub enum OutputItem { + /// An output message from the model. + Message(OutputMessage), + /// The results of a file search tool call. See the + /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search) + /// for more information. + FileSearchCall(FileSearchToolCall), + /// A tool call to run a function. See the + /// [function calling guide](https://platform.openai.com/docs/guides/function-calling) + /// for more information. + FunctionCall(FunctionToolCall), + /// The results of a web search tool call. See the + /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search) + /// for more information. + WebSearchCall(WebSearchToolCall), + /// A tool call to a computer use tool. See the + /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) + /// for more information. + ComputerCall(ComputerToolCall), + /// A description of the chain of thought used by a reasoning model while generating + /// a response. Be sure to include these items in your `input` to the Responses API for + /// subsequent turns of a conversation if you are manually + /// [managing context](https://platform.openai.com/docs/guides/conversation-state). + Reasoning(ReasoningItem), + /// An image generation request made by the model. + ImageGenerationCall(ImageGenToolCall), + /// A tool call to run code. + CodeInterpreterCall(CodeInterpreterToolCall), + /// A tool call to run a command on the local shell. + LocalShellCall(LocalShellToolCall), + /// An invocation of a tool on an MCP server. + McpCall(MCPToolCall), + /// A list of tools available on an MCP server. + McpListTools(MCPListTools), + /// A request for human approval of a tool invocation. + McpApprovalRequest(MCPApprovalRequest), + /// A call to a custom tool created by the model. + CustomToolCall(CustomToolCall), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[non_exhaustive] +pub struct CustomToolCall { + /// An identifier used to map this custom tool call to a tool call output. + pub call_id: String, + /// The input for the custom tool call generated by the model. + pub input: String, + /// The name of the custom tool being called. + pub name: String, + /// The unique ID of the custom tool call in the OpenAI platform. + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DeleteResponse { + pub object: String, + pub deleted: bool, + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct InputItemReference { + pub r#type: Option, + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ItemResourceItem { + Message(MessageItem), + FileSearchCall(FileSearchToolCall), + ComputerCall(ComputerToolCall), + ComputerCallOutput(ComputerCallOutputItemParam), + WebSearchCall(WebSearchToolCall), + FunctionCall(FunctionToolCall), + FunctionCallOutput(FunctionCallOutputItemParam), + ImageGenerationCall(ImageGenToolCall), + CodeInterpreterCall(CodeInterpreterToolCall), + LocalShellCall(LocalShellToolCall), + LocalShellCallOutput(LocalShellToolCallOutput), + McpListTools(MCPListTools), + McpApprovalRequest(MCPApprovalRequest), + McpApprovalResponse(MCPApprovalResponse), + McpCall(MCPToolCall), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum ItemResource { + ItemReference(InputItemReference), + Item(ItemResourceItem), +} + +/// A list of Response items. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseItemList { + /// The type of object returned, must be `list`. + pub object: String, + /// The ID of the first item in the list. + pub first_id: Option, + /// The ID of the last item in the list. + pub last_id: Option, + /// Whether there are more items in the list. + pub has_more: bool, + /// The list of items. + pub data: Vec, +} + +#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)] +#[builder( + name = "TokenCountsBodyArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct TokenCountsBody { + /// The conversation that this response belongs to. Items from this + /// conversation are prepended to `input_items` for this response request. + /// Input items and output items from this response are automatically added to this + /// conversation after this response completes. + #[serde(skip_serializing_if = "Option::is_none")] + pub conversation: Option, + + /// Text, image, or file inputs to the model, used to generate a response + #[serde(skip_serializing_if = "Option::is_none")] + pub input: Option, + + /// A system (or developer) message inserted into the model's context. + /// + /// When used along with `previous_response_id`, the instructions from a previous response will + /// not be carried over to the next response. This makes it simple to swap out system (or + /// developer) messages in new responses. + #[serde(skip_serializing_if = "Option::is_none")] + pub instructions: Option, + + /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a + /// wide range of models with different capabilities, performance characteristics, + /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models) + /// to browse and compare available models. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + + /// Whether to allow the model to run tool calls in parallel. + #[serde(skip_serializing_if = "Option::is_none")] + pub parallel_tool_calls: Option, + + /// The unique ID of the previous response to the model. Use this to create multi-turn + /// conversations. Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state). + /// Cannot be used in conjunction with `conversation`. + #[serde(skip_serializing_if = "Option::is_none")] + pub previous_response_id: Option, + + /// **gpt-5 and o-series models only** + /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, + + /// Configuration options for a text response from the model. Can be plain + /// text or structured JSON data. Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + #[serde(skip_serializing_if = "Option::is_none")] + pub text: Option, + + /// How the model should select which tool (or tools) to use when generating + /// a response. See the `tools` parameter to see how to specify which tools + /// the model can call. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_choice: Option, + + /// An array of tools the model may call while generating a response. You can specify which tool + /// to use by setting the `tool_choice` parameter. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + ///The truncation strategy to use for the model response. + /// - `auto`: If the input to this Response exceeds + /// the model's context window size, the model will truncate the + /// response to fit the context window by dropping items from the beginning of the conversation. + /// - `disabled` (default): If the input size will exceed the context window + /// size for a model, the request will fail with a 400 error. + #[serde(skip_serializing_if = "Option::is_none")] + pub truncation: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct TokenCountsResource { + pub object: String, + pub input_tokens: u32, +} diff --git a/async-openai/src/types/responses/response_stream.rs b/async-openai/src/types/responses/response_stream.rs new file mode 100644 index 00000000..58bcc82f --- /dev/null +++ b/async-openai/src/types/responses/response_stream.rs @@ -0,0 +1,550 @@ +use futures::Stream; +use serde::{Deserialize, Serialize}; +use std::pin::Pin; + +use crate::{ + error::OpenAIError, + types::responses::{OutputContent, OutputItem, Response, ResponseLogProb, Summary}, +}; + +/// Stream of response events +pub type ResponseStream = + Pin> + Send>>; + +/// Event types for streaming responses from the Responses API +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type")] +pub enum ResponseStreamEvent { + /// An event that is emitted when a response is created. + #[serde(rename = "response.created")] + ResponseCreated(ResponseCreatedEvent), + /// Emitted when the response is in progress. + #[serde(rename = "response.in_progress")] + ResponseInProgress(ResponseInProgressEvent), + /// Emitted when the model response is complete. + #[serde(rename = "response.completed")] + ResponseCompleted(ResponseCompletedEvent), + /// An event that is emitted when a response fails. + #[serde(rename = "response.failed")] + ResponseFailed(ResponseFailedEvent), + /// An event that is emitted when a response finishes as incomplete. + #[serde(rename = "response.incomplete")] + ResponseIncomplete(ResponseIncompleteEvent), + /// Emitted when a new output item is added. + #[serde(rename = "response.output_item.added")] + ResponseOutputItemAdded(ResponseOutputItemAddedEvent), + /// Emitted when an output item is marked done. + #[serde(rename = "response.output_item.done")] + ResponseOutputItemDone(ResponseOutputItemDoneEvent), + /// Emitted when a new content part is added. + #[serde(rename = "response.content_part.added")] + ResponseContentPartAdded(ResponseContentPartAddedEvent), + /// Emitted when a content part is done. + #[serde(rename = "response.content_part.done")] + ResponseContentPartDone(ResponseContentPartDoneEvent), + /// Emitted when there is an additional text delta. + #[serde(rename = "response.output_text.delta")] + ResponseOutputTextDelta(ResponseTextDeltaEvent), + /// Emitted when text content is finalized. + #[serde(rename = "response.output_text.done")] + ResponseOutputTextDone(ResponseTextDoneEvent), + /// Emitted when there is a partial refusal text. + #[serde(rename = "response.refusal.delta")] + ResponseRefusalDelta(ResponseRefusalDeltaEvent), + #[serde(rename = "response.refusal.done")] + /// Emitted when refusal text is finalized. + ResponseRefusalDone(ResponseRefusalDoneEvent), + /// Emitted when there is a partial function-call arguments delta. + #[serde(rename = "response.function_call_arguments.delta")] + ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent), + /// Emitted when function-call arguments are finalized. + #[serde(rename = "response.function_call_arguments.done")] + ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent), + /// Emitted when a file search call is initiated. + #[serde(rename = "response.file_search_call.in_progress")] + ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgressEvent), + /// Emitted when a file search is currently searching. + #[serde(rename = "response.file_search_call.searching")] + ResponseFileSearchCallSearching(ResponseFileSearchCallSearchingEvent), + /// Emitted when a file search call is completed (results found). + #[serde(rename = "response.file_search_call.completed")] + ResponseFileSearchCallCompleted(ResponseFileSearchCallCompletedEvent), + /// Emitted when a web search call is initiated. + #[serde(rename = "response.web_search_call.in_progress")] + ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgressEvent), + /// Emitted when a web search call is executing. + #[serde(rename = "response.web_search_call.searching")] + ResponseWebSearchCallSearching(ResponseWebSearchCallSearchingEvent), + /// Emitted when a web search call is completed. + #[serde(rename = "response.web_search_call.completed")] + ResponseWebSearchCallCompleted(ResponseWebSearchCallCompletedEvent), + /// Emitted when a new reasoning summary part is added. + #[serde(rename = "response.reasoning_summary_part.added")] + ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAddedEvent), + /// Emitted when a reasoning summary part is completed. + #[serde(rename = "response.reasoning_summary_part.done")] + ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDoneEvent), + /// Emitted when a delta is added to a reasoning summary text. + #[serde(rename = "response.reasoning_summary_text.delta")] + ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDeltaEvent), + /// Emitted when a reasoning summary text is completed. + #[serde(rename = "response.reasoning_summary_text.done")] + ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDoneEvent), + /// Emitted when a delta is added to a reasoning text. + #[serde(rename = "response.reasoning_text.delta")] + ResponseReasoningTextDelta(ResponseReasoningTextDeltaEvent), + /// Emitted when a reasoning text is completed. + #[serde(rename = "response.reasoning_text.done")] + ResponseReasoningTextDone(ResponseReasoningTextDoneEvent), + /// Emitted when an image generation tool call has completed and the final image is available. + #[serde(rename = "response.image_generation_call.completed")] + ResponseImageGenerationCallCompleted(ResponseImageGenCallCompletedEvent), + /// Emitted when an image generation tool call is actively generating an image (intermediate state). + #[serde(rename = "response.image_generation_call.generating")] + ResponseImageGenerationCallGenerating(ResponseImageGenCallGeneratingEvent), + /// Emitted when an image generation tool call is in progress. + #[serde(rename = "response.image_generation_call.in_progress")] + ResponseImageGenerationCallInProgress(ResponseImageGenCallInProgressEvent), + /// Emitted when a partial image is available during image generation streaming. + #[serde(rename = "response.image_generation_call.partial_image")] + ResponseImageGenerationCallPartialImage(ResponseImageGenCallPartialImageEvent), + /// Emitted when there is a delta (partial update) to the arguments of an MCP tool call. + #[serde(rename = "response.mcp_call_arguments.delta")] + ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent), + /// Emitted when the arguments for an MCP tool call are finalized. + #[serde(rename = "response.mcp_call_arguments.done")] + ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent), + /// Emitted when an MCP tool call has completed successfully. + #[serde(rename = "response.mcp_call.completed")] + ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent), + /// Emitted when an MCP tool call has failed. + #[serde(rename = "response.mcp_call.failed")] + ResponseMCPCallFailed(ResponseMCPCallFailedEvent), + /// Emitted when an MCP tool call is in progress. + #[serde(rename = "response.mcp_call.in_progress")] + ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent), + /// Emitted when the list of available MCP tools has been successfully retrieved. + #[serde(rename = "response.mcp_list_tools.completed")] + ResponseMCPListToolsCompleted(ResponseMCPListToolsCompletedEvent), + /// Emitted when the attempt to list available MCP tools has failed. + #[serde(rename = "response.mcp_list_tools.failed")] + ResponseMCPListToolsFailed(ResponseMCPListToolsFailedEvent), + /// Emitted when the system is in the process of retrieving the list of available MCP tools. + #[serde(rename = "response.mcp_list_tools.in_progress")] + ResponseMCPListToolsInProgress(ResponseMCPListToolsInProgressEvent), + /// Emitted when a code interpreter call is in progress. + #[serde(rename = "response.code_interpreter_call.in_progress")] + ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgressEvent), + /// Emitted when the code interpreter is actively interpreting the code snippet. + #[serde(rename = "response.code_interpreter_call.interpreting")] + ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpretingEvent), + /// Emitted when the code interpreter call is completed. + #[serde(rename = "response.code_interpreter_call.completed")] + ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompletedEvent), + /// Emitted when a partial code snippet is streamed by the code interpreter. + #[serde(rename = "response.code_interpreter_call_code.delta")] + ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDeltaEvent), + /// Emitted when the code snippet is finalized by the code interpreter. + #[serde(rename = "response.code_interpreter_call_code.done")] + ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDoneEvent), + /// Emitted when an annotation is added to output text content. + #[serde(rename = "response.output_text.annotation.added")] + ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAddedEvent), + /// Emitted when a response is queued and waiting to be processed. + #[serde(rename = "response.queued")] + ResponseQueued(ResponseQueuedEvent), + /// Event representing a delta (partial update) to the input of a custom tool call. + #[serde(rename = "response.custom_tool_call_input.delta")] + ResponseCustomToolCallInputDelta(ResponseCustomToolCallInputDeltaEvent), + /// Event indicating that input for a custom tool call is complete. + #[serde(rename = "response.custom_tool_call_input.done")] + ResponseCustomToolCallInputDone(ResponseCustomToolCallInputDoneEvent), + /// Emitted when an error occurs. + #[serde(rename = "error")] + ResponseError(ResponseErrorEvent), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCreatedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseInProgressEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCompletedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFailedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseIncompleteEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputItemAddedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item: OutputItem, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputItemDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item: OutputItem, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseContentPartAddedEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub part: OutputContent, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseContentPartDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub part: OutputContent, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseTextDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub delta: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub logprobs: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseTextDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub text: String, + pub logprobs: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseRefusalDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseRefusalDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub refusal: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFunctionCallArgumentsDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFunctionCallArgumentsDoneEvent { + pub name: String, + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub arguments: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFileSearchCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFileSearchCallSearchingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFileSearchCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseWebSearchCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseWebSearchCallSearchingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseWebSearchCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum SummaryPart { + SummaryText(Summary), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryPartAddedEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub part: SummaryPart, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryPartDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub part: SummaryPart, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryTextDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryTextDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningTextDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningTextDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenCallGeneratingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenCallPartialImageEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub partial_image_index: u32, + pub partial_image_b64: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallArgumentsDeltaEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallArgumentsDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub arguments: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallFailedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPListToolsCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPListToolsFailedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPListToolsInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallInterpretingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallCodeDeltaEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallCodeDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub code: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputTextAnnotationAddedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub content_index: u32, + pub annotation_index: u32, + pub item_id: String, + pub annotation: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseQueuedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCustomToolCallInputDeltaEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCustomToolCallInputDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub input: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseErrorEvent { + pub sequence_number: u64, + pub code: Option, + pub message: String, + pub param: Option, +} diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs index 141fefa3..87881734 100644 --- a/examples/realtime/src/main.rs +++ b/examples/realtime/src/main.rs @@ -1,7 +1,8 @@ use std::process::exit; use async_openai::types::realtime::{ - ConversationItemCreateEvent, Item, ResponseCreateEvent, ServerEvent, + RealtimeClientEventConversationItemCreate, RealtimeClientEventResponseCreate, + RealtimeConversationItem, RealtimeServerEvent, }; use futures_util::{future, pin_mut, StreamExt}; @@ -13,7 +14,7 @@ use tokio_tungstenite::{ #[tokio::main] async fn main() { - let url = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-12-17"; + let url = "wss://api.openai.com/v1/realtime?model=gpt-realtime"; let api_key = std::env::var("OPENAI_API_KEY").expect("Please provide OPENAPI_API_KEY env var"); let (stdin_tx, stdin_rx) = futures_channel::mpsc::unbounded(); @@ -25,9 +26,6 @@ async fn main() { "Authorization", format!("Bearer {api_key}").parse().unwrap(), ); - request - .headers_mut() - .insert("OpenAI-Beta", "realtime=v1".parse().unwrap()); // connect to WebSocket endpoint let (ws_stream, _) = connect_async(request).await.expect("Failed to connect"); @@ -46,7 +44,7 @@ async fn main() { match message { Message::Text(_) => { let data = message.clone().into_data(); - let server_event: Result = + let server_event: Result = serde_json::from_slice(&data); match server_event { Ok(server_event) => { @@ -56,23 +54,10 @@ async fn main() { eprint!("{:32} | ", event_type.as_str().unwrap()); match server_event { - ServerEvent::ResponseOutputItemDone(event) => { - event.item.content.unwrap_or(vec![]).iter().for_each( - |content| { - if let Some(ref transcript) = content.transcript { - eprintln!( - "[{:?}]: {}", - event.item.role, - transcript.trim(), - ); - } - }, - ); + RealtimeServerEvent::ResponseOutputItemDone(event) => { + eprint!("{event:?}"); } - ServerEvent::ResponseAudioTranscriptDelta(event) => { - eprint!("{}", event.delta.trim()); - } - ServerEvent::Error(e) => { + RealtimeServerEvent::Error(e) => { eprint!("{e:?}"); } _ => {} @@ -123,7 +108,7 @@ async fn read_stdin(tx: futures_channel::mpsc::UnboundedSender) { } // Create item from json representation - let item = Item::try_from(serde_json::json!({ + let item = RealtimeConversationItem::try_from(serde_json::json!({ "type": "message", "role": "user", "content": [ @@ -136,13 +121,13 @@ async fn read_stdin(tx: futures_channel::mpsc::UnboundedSender) { .unwrap(); // Create event of type "conversation.item.create" - let event: ConversationItemCreateEvent = item.into(); + let event: RealtimeClientEventConversationItemCreate = item.into(); // Create WebSocket message from client event let message: Message = event.into(); // send WebSocket message containing event of type "conversation.item.create" to server tx.unbounded_send(message).unwrap(); // send WebSocket message containing event of type "response.create" to server - tx.unbounded_send(ResponseCreateEvent::default().into()) + tx.unbounded_send(RealtimeClientEventResponseCreate::default().into()) .unwrap(); } } diff --git a/examples/responses-function-call/src/main.rs b/examples/responses-function-call/src/main.rs index 3e2083e8..0dcfc3e2 100644 --- a/examples/responses-function-call/src/main.rs +++ b/examples/responses-function-call/src/main.rs @@ -1,7 +1,8 @@ use async_openai::{ types::responses::{ - CreateResponseArgs, FunctionArgs, FunctionCall, Input, InputItem, InputMessageArgs, - OutputContent, Role, ToolDefinition, + CreateResponseArgs, EasyInputContent, EasyInputMessage, FunctionCallOutput, + FunctionCallOutputItemParam, FunctionTool, FunctionToolCall, InputItem, InputParam, Item, + MessageType, OutputItem, Role, Tool, }, Client, }; @@ -22,48 +23,46 @@ fn check_weather(location: String, units: String) -> String { async fn main() -> Result<(), Box> { let client = Client::new(); - let tools = vec![ToolDefinition::Function( - FunctionArgs::default() - .name("get_weather") - .description("Retrieves current weather for the given location") - .parameters(serde_json::json!( - { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "City and country e.g. Bogotá, Colombia" - }, - "units": { - "type": "string", - "enum": [ - "celsius", - "fahrenheit" - ], - "description": "Units the temperature will be returned in." - } + let tools = vec![Tool::Function(FunctionTool { + name: "get_weather".to_string(), + description: Some("Retrieves current weather for the given location".to_string()), + parameters: Some(serde_json::json!( + { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Bogotá, Colombia" }, - "required": [ - "location", - "units" - ], - "additionalProperties": false - } - )) - .build()?, - )]; - - let mut input_messages = vec![InputItem::Message( - InputMessageArgs::default() - .role(Role::User) - .content("What's the weather like in Paris today?") - .build()?, - )]; + "units": { + "type": "string", + "enum": [ + "celsius", + "fahrenheit" + ], + "description": "Units the temperature will be returned in." + } + }, + "required": [ + "location", + "units" + ], + "additionalProperties": false + } + )), + strict: None, + })]; + + let mut input_messages = vec![InputItem::EasyMessage(EasyInputMessage { + r#type: MessageType::Message, + role: Role::User, + content: EasyInputContent::Text("What's the weather like in Paris today?".to_string()), + })]; let request = CreateResponseArgs::default() .max_output_tokens(512u32) .model("gpt-4.1") - .input(Input::Items(input_messages.clone())) + .input(InputParam::Items(input_messages.clone())) .tools(tools.clone()) .build()?; @@ -72,9 +71,9 @@ async fn main() -> Result<(), Box> { let response = client.responses().create(request).await?; // the model might ask for us to do a function call - let function_call_request: Option = - response.output.into_iter().find_map(|output_content| { - if let OutputContent::FunctionCall(inner) = output_content { + let function_call_request: Option = + response.output.into_iter().find_map(|output_item| { + if let OutputItem::FunctionCall(inner) = output_item { Some(inner) } else { None @@ -97,19 +96,25 @@ async fn main() -> Result<(), Box> { } }; - input_messages.push(InputItem::Custom(serde_json::to_value( - &OutputContent::FunctionCall(function_call_request.clone()), - )?)); - input_messages.push(InputItem::Custom(serde_json::json!({ - "type": "function_call_output", - "call_id": function_call_request.call_id, - "output": function_result, - }))); + // Add the function call from the assistant back to the conversation + input_messages.push(InputItem::Item(Item::FunctionCall( + function_call_request.clone(), + ))); + + // Add the function call output back to the conversation + input_messages.push(InputItem::Item(Item::FunctionCallOutput( + FunctionCallOutputItemParam { + call_id: function_call_request.call_id.clone(), + output: FunctionCallOutput::Text(function_result), + id: None, + status: None, + }, + ))); let request = CreateResponseArgs::default() .max_output_tokens(512u32) .model("gpt-4.1") - .input(Input::Items(input_messages)) + .input(InputParam::Items(input_messages)) .tools(tools) .build()?; diff --git a/examples/responses-stream/src/main.rs b/examples/responses-stream/src/main.rs index 27e8b14e..37be90c6 100644 --- a/examples/responses-stream/src/main.rs +++ b/examples/responses-stream/src/main.rs @@ -1,7 +1,8 @@ use async_openai::{ Client, types::responses::{ - CreateResponseArgs, Input, InputContent, InputItem, InputMessageArgs, ResponseEvent, Role, + CreateResponseArgs, EasyInputContent, EasyInputMessage, InputItem, InputParam, MessageType, + ResponseStreamEvent, Role, }, }; use futures::StreamExt; @@ -13,13 +14,12 @@ async fn main() -> Result<(), Box> { let request = CreateResponseArgs::default() .model("gpt-4.1") .stream(true) - .input(Input::Items(vec![InputItem::Message( - InputMessageArgs::default() - .role(Role::User) - .content(InputContent::TextInput( - "Write a haiku about programming.".to_string(), - )) - .build()?, + .input(InputParam::Items(vec![InputItem::EasyMessage( + EasyInputMessage { + r#type: MessageType::Message, + role: Role::User, + content: EasyInputContent::Text("Write a haiku about programming.".to_string()), + }, )])) .build()?; @@ -28,12 +28,12 @@ async fn main() -> Result<(), Box> { while let Some(result) = stream.next().await { match result { Ok(response_event) => match &response_event { - ResponseEvent::ResponseOutputTextDelta(delta) => { + ResponseStreamEvent::ResponseOutputTextDelta(delta) => { print!("{}", delta.delta); } - ResponseEvent::ResponseCompleted(_) - | ResponseEvent::ResponseIncomplete(_) - | ResponseEvent::ResponseFailed(_) => { + ResponseStreamEvent::ResponseCompleted(_) + | ResponseStreamEvent::ResponseIncomplete(_) + | ResponseStreamEvent::ResponseFailed(_) => { break; } _ => { diff --git a/examples/responses/src/main.rs b/examples/responses/src/main.rs index 47395185..792382d0 100644 --- a/examples/responses/src/main.rs +++ b/examples/responses/src/main.rs @@ -1,11 +1,13 @@ use std::error::Error; use async_openai::{ - types::responses::{ - AllowedTools, CreateResponseArgs, Input, InputItem, InputMessageArgs, McpArgs, - RequireApproval, RequireApprovalPolicy, Role, TextConfig, - ToolDefinition::{Mcp, WebSearchPreview}, - Verbosity, WebSearchPreviewArgs, + types::{ + responses::{ + CreateResponseArgs, EasyInputContent, EasyInputMessage, InputItem, InputParam, + MessageType, ResponseTextParam, Role, TextResponseFormatConfiguration, Tool, Verbosity, + WebSearchToolArgs, + }, + MCPToolAllowedTools, MCPToolApprovalSetting, MCPToolArgs, MCPToolRequireApproval, }, Client, }; @@ -17,23 +19,24 @@ async fn main() -> Result<(), Box> { let request = CreateResponseArgs::default() .max_output_tokens(512u32) .model("gpt-4.1") - .text(TextConfig { - format: async_openai::types::responses::TextResponseFormat::Text, + .text(ResponseTextParam { + format: TextResponseFormatConfiguration::Text, verbosity: Some(Verbosity::Medium), // only here to test the config, but gpt-4.1 only supports medium }) - .input(Input::Items(vec![InputItem::Message( - InputMessageArgs::default() - .role(Role::User) - .content("What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?") - .build()?, + .input(InputParam::Items(vec![InputItem::EasyMessage( + EasyInputMessage { + r#type: MessageType::Message, + role: Role::User, + content: EasyInputContent::Text("What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?".to_string()), + } )])) .tools(vec![ - WebSearchPreview(WebSearchPreviewArgs::default().build()?), - Mcp(McpArgs::default() + Tool::WebSearchPreview(WebSearchToolArgs::default().build()?), + Tool::Mcp(MCPToolArgs::default() .server_label("deepwiki") .server_url("https://mcp.deepwiki.com/mcp") - .require_approval(RequireApproval::Policy(RequireApprovalPolicy::Never)) - .allowed_tools(AllowedTools::List(vec!["ask_question".to_string()])) + .require_approval(MCPToolRequireApproval::ApprovalSetting(MCPToolApprovalSetting::Never)) + .allowed_tools(MCPToolAllowedTools::List(vec!["ask_question".to_string()])) .build()?), ]) .build()?;