From 4c6445b2dcfca9aed3659ff47e515c2b2d363406 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 21 Oct 2025 17:53:36 -0700 Subject: [PATCH 01/42] updated client events --- .../src/types/realtime/client_event.rs | 139 ++++++++++++++++-- 1 file changed, 125 insertions(+), 14 deletions(-) diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs index 87ff7010..7a49e836 100644 --- a/async-openai/src/types/realtime/client_event.rs +++ b/async-openai/src/types/realtime/client_event.rs @@ -6,9 +6,12 @@ use super::{item::Item, session_resource::SessionResource}; #[derive(Debug, Serialize, Deserialize, Clone, Default)] pub struct SessionUpdateEvent { /// Optional client-generated ID used to identify this event. + /// This is an arbitrary string that a client may assign. It will be passed + /// back if there is an error with the event, but the corresponding + /// `session.updated` event will not include it. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// Session configuration to update. + /// Update the Realtime session. Choose either a realtime session or a transcription session. pub session: SessionResource, } @@ -17,7 +20,8 @@ pub struct InputAudioBufferAppendEvent { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// Base64-encoded audio bytes. + /// Base64-encoded audio bytes. This must be in the format specified by + /// the `input_audio_format` field in the session configuration. pub audio: String, } @@ -42,26 +46,41 @@ pub struct ConversationItemCreateEvent { pub event_id: Option, /// The ID of the preceding item after which the new item will be inserted. + /// If not set, the new item will be appended to the end of the conversation. + /// If set to `root`, the new item will be added to the beginning of the conversation. + /// If set to an existing ID, it allows an item to be inserted mid-conversation. + /// If the ID cannot be found, an error will be returned and the item will not be added. #[serde(skip_serializing_if = "Option::is_none")] pub previous_item_id: Option, - /// The item to add to the conversation. + /// A single item within a Realtime conversation. pub item: Item, } +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct ConversationItemRetrieveEvent { + /// Optional client-generated ID used to identify this event. + #[serde(skip_serializing_if = "Option::is_none")] + pub event_id: Option, + + /// The ID of the item to retrieve. + pub item_id: String, +} + #[derive(Debug, Serialize, Deserialize, Clone, Default)] pub struct ConversationItemTruncateEvent { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// The ID of the assistant message item to truncate. + /// The ID of the assistant message item to truncate. Only assistant message items can be truncated. pub item_id: String, - /// The index of the content part to truncate. + /// The index of the content part to truncate. Set this to `0`. pub content_index: u32, /// Inclusive duration up to which audio is truncated, in milliseconds. + /// If the audio_end_ms is greater than the actual audio duration, the server will respond with an error. pub audio_end_ms: u32, } @@ -81,7 +100,7 @@ pub struct ResponseCreateEvent { #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, - /// Configuration for the response. + /// Create a new Realtime response with these parameters pub response: Option, } @@ -90,47 +109,127 @@ pub struct ResponseCancelEvent { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, + + /// A specific response ID to cancel - if not provided, will cancel an + /// in-progress response in the default conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub response_id: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +pub struct OutputAudioBufferClearEvent { + /// Optional client-generated ID used to identify this event. + #[serde(skip_serializing_if = "Option::is_none")] + pub event_id: Option, } /// These are events that the OpenAI Realtime WebSocket server will accept from the client. #[derive(Debug, Serialize, Deserialize)] #[serde(tag = "type")] pub enum ClientEvent { - /// Send this event to update the session’s default configuration. + /// Send this event to update the session's configuration. The client may send this event at any time to update any field + /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet. + /// + /// When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective + /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`, + /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`. #[serde(rename = "session.update")] SessionUpdate(SessionUpdateEvent), - /// Send this event to append audio bytes to the input audio buffer. + /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit. + /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio + /// transcription (if enabled) will be generated when the buffer is committed. + /// + /// If VAD is enabled the audio buffer is used to detect speech and the server will decide when to commit. When Server VAD is disabled, + /// you must commit the audio buffer manually. Input audio noise reduction operates on writes to the audio buffer. + /// + /// The client may choose how much audio to place in each event up to a maximum of 15 MiB, for example streaming smaller chunks from the + /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to + /// this event. #[serde(rename = "input_audio_buffer.append")] InputAudioBufferAppend(InputAudioBufferAppendEvent), - /// Send this event to commit audio bytes to a user message. + /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. + /// This event will produce an error if the input audio buffer is empty. + /// When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically. + /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. + /// The server will respond with an input_audio_buffer.committed event. #[serde(rename = "input_audio_buffer.commit")] InputAudioBufferCommit(InputAudioBufferCommitEvent), /// Send this event to clear the audio bytes in the buffer. + /// The server will respond with an `input_audio_buffer.cleared` event. #[serde(rename = "input_audio_buffer.clear")] InputAudioBufferClear(InputAudioBufferClearEvent), - /// Send this event when adding an item to the conversation. + /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses. + /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream, + /// but has the current limitation that it cannot populate assistant audio messages. + /// + /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent. #[serde(rename = "conversation.item.create")] ConversationItemCreate(ConversationItemCreateEvent), - /// Send this event when you want to truncate a previous assistant message’s audio. + /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history. + /// This is useful, for example, to inspect user audio after noise cancellation and VAD. + /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history, + /// in which case the server will respond with an error. + #[serde(rename = "conversation.item.retrieve")] + ConversationItemRetrieve(ConversationItemRetrieveEvent), + + /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime, + /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not + /// yet played. This will synchronize the server's understanding of the audio with the client's playback. + /// + /// Truncating audio will delete the server-side text transcript to ensure there is not text in the context that hasn't + /// been heard by the user. + /// + /// If successful, the server will respond with a `conversation.item.truncated` event. #[serde(rename = "conversation.item.truncate")] ConversationItemTruncate(ConversationItemTruncateEvent), - /// Send this event when you want to remove any item from the conversation history. + /// Send this event when you want to remove any item from the conversation history. The server will respond with a + /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the + /// server will respond with an error. #[serde(rename = "conversation.item.delete")] ConversationItemDelete(ConversationItemDeleteEvent), - /// Send this event to trigger a response generation. + /// This event instructs the server to create a Response, which means triggering model inference. + /// When in Server VAD mode, the server will create Responses automatically. + /// + /// A Response will include at least one Item, and may have two, in which case the second will be a function call. + /// These Items will be appended to the conversation history by default. + /// + /// The server will respond with a `response.created` event, events for Items and content created, and finally a + /// `response.done` event to indicate the Response is complete. + /// + /// The `response.create` event includes inference configuration like `instructions` and `tools`. If these are set, they will + /// override the Session's configuration for this Response only. + /// + /// Responses can be created out-of-band of the default Conversation, meaning that they can have arbitrary input, and + /// it's possible to disable writing the output to the Conversation. Only one Response can write to the default + /// Conversation at a time, but otherwise multiple Responses can be created in parallel. The `metadata` field is a good + /// way to disambiguate multiple simultaneous Responses. + /// + /// Clients can set `conversation` to `none` to create a Response that does not write to the default Conversation. + /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to + /// existing Items. #[serde(rename = "response.create")] ResponseCreate(ResponseCreateEvent), - /// Send this event to cancel an in-progress response. + /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event + /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond + /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be + /// returned the session will remain unaffected. #[serde(rename = "response.cancel")] ResponseCancel(ResponseCancelEvent), + + /// **WebRTC Only:** Emit to cut off the current audio response. + /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event. + /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response. + /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc) + #[serde(rename = "output_audio_buffer.clear")] + OutputAudioBufferClear(OutputAudioBufferClearEvent), } impl From<&ClientEvent> for String { @@ -196,8 +295,18 @@ event_from!( ClientEvent, ConversationItemDelete ); +event_from!( + ConversationItemRetrieveEvent, + ClientEvent, + ConversationItemRetrieve +); event_from!(ResponseCreateEvent, ClientEvent, ResponseCreate); event_from!(ResponseCancelEvent, ClientEvent, ResponseCancel); +event_from!( + OutputAudioBufferClearEvent, + ClientEvent, + OutputAudioBufferClear +); message_from_event!(SessionUpdateEvent, ClientEvent); message_from_event!(InputAudioBufferAppendEvent, ClientEvent); @@ -206,8 +315,10 @@ message_from_event!(InputAudioBufferClearEvent, ClientEvent); message_from_event!(ConversationItemCreateEvent, ClientEvent); message_from_event!(ConversationItemTruncateEvent, ClientEvent); message_from_event!(ConversationItemDeleteEvent, ClientEvent); +message_from_event!(ConversationItemRetrieveEvent, ClientEvent); message_from_event!(ResponseCreateEvent, ClientEvent); message_from_event!(ResponseCancelEvent, ClientEvent); +message_from_event!(OutputAudioBufferClearEvent, ClientEvent); impl From for ConversationItemCreateEvent { fn from(value: Item) -> Self { From 70214d71cc24b675ac217471eebc77f0b68e903f Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 13:13:19 -0700 Subject: [PATCH 02/42] updated server event --- .../src/types/realtime/server_event.rs | 477 +++++++++++++++--- 1 file changed, 400 insertions(+), 77 deletions(-) diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 8795f6e4..0d35690a 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -1,8 +1,8 @@ use serde::{Deserialize, Serialize}; use super::{ - content_part::ContentPart, conversation::Conversation, error::RealtimeAPIError, item::Item, - rate_limit::RateLimit, response_resource::ResponseResource, session_resource::SessionResource, + content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit, + response_resource::ResponseResource, session_resource::SessionResource, }; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -30,19 +30,31 @@ pub struct SessionUpdatedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationCreatedEvent { +pub struct ConversationItemAddedEvent { /// The unique ID of the server event. pub event_id: String, - /// The conversation resource. - pub conversation: Conversation, + /// A single item within a Realtime conversation. + pub item: Item, + /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted. + pub previous_item_id: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ConversationItemDoneEvent { + /// The unique ID of the server event. + pub event_id: String, + /// A single item within a Realtime conversation. + pub item: Item, + /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted. + pub previous_item_id: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct InputAudioBufferCommitedEvent { /// The unique ID of the server event. pub event_id: String, - /// The ID of the preceding item after which the new item will be inserted. - pub previous_item_id: String, + /// The ID of the preceding item after which the new item will be inserted. Can be null if the item has no predecessor. + pub previous_item_id: Option, /// The ID of the user message item that will be created. pub item_id: String, } @@ -57,7 +69,9 @@ pub struct InputAudioBufferClearedEvent { pub struct InputAudioBufferSpeechStartedEvent { /// The unique ID of the server event. pub event_id: String, - /// Milliseconds since the session started when speech was detected. + /// Milliseconds from the start of all audio written to the buffer during the session when speech was + /// first detected. This will correspond to the beginning of audio sent to the model, and thus includes + /// the `prefix_padding_ms` configured in the Session. pub audio_start_ms: u32, /// The ID of the user message item that will be created when speech stops. pub item_id: String, @@ -67,20 +81,47 @@ pub struct InputAudioBufferSpeechStartedEvent { pub struct InputAudioBufferSpeechStoppedEvent { /// The unique ID of the server event. pub event_id: String, - /// Milliseconds since the session started when speech stopped. + /// Milliseconds since the session started when speech stopped. This will correspond to the end of + /// audio sent to the model, and thus includes the `min_silence_duration_ms` configured in the Session. pub audio_end_ms: u32, /// The ID of the user message item that will be created. pub item_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemCreatedEvent { +pub struct InputAudioBufferTimeoutTriggeredEvent { /// The unique ID of the server event. pub event_id: String, - /// The ID of the preceding item. - pub previous_item_id: Option, - /// The item that was created. - pub item: Item, + /// Millisecond offset of audio written to the input audio buffer at the time the timeout was triggered. + pub audio_end_ms: u32, + /// Millisecond offset of audio written to the input audio buffer that was after the playback time of the last model response. + pub audio_start_ms: u32, + /// The ID of the item associated with this segment. + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct OutputAudioBufferStartedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The unique ID of the response that produced the audio. + pub response_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct OutputAudioBufferStoppedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The unique ID of the response that produced the audio. + pub response_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct OutputAudioBufferClearedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The unique ID of the response that produced the audio. + pub response_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -94,11 +135,46 @@ pub struct LogProb { pub token: String, } +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct InputTokenDetails { + /// Number of audio tokens billed for this request. + pub audio_tokens: u32, + /// Number of text tokens billed for this request. + pub text_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TokenUsage { + /// Number of input tokens billed for this request. + pub input_tokens: u32, + /// Number of output tokens generated. + pub output_tokens: u32, + /// Total number of tokens used (input + output). + pub total_tokens: u32, + /// Details about the input tokens billed for this request. + pub input_token_details: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct DurationUsage { + ///Duration of the input audio in seconds. + pub seconds: f32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum Usage { + #[serde(rename = "tokens")] + TokenUsage(TokenUsage), + #[serde(rename = "duration")] + DurationUsage(DurationUsage), +} + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ConversationItemInputAudioTranscriptionCompletedEvent { /// The unique ID of the server event. pub event_id: String, - /// The ID of the user message item. + /// The ID of the item containing the audio that is being transcribed. pub item_id: String, /// The index of the content part containing the audio. pub content_index: u32, @@ -106,19 +182,26 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent { pub transcript: String, /// Optional per-token log probability data. pub logprobs: Option>, + /// Usage statistics for the transcription, this is billed according to the ASR model's pricing rather than + /// the realtime model's pricing. + pub usage: Usage, } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ConversationItemInputAudioTranscriptionDeltaEvent { /// The unique ID of the server event. pub event_id: String, - /// The ID of the user message item. + /// The ID of the item containing the audio that is being transcribed. pub item_id: String, - /// The index of the content part containing the audio. + ///The index of the content part in the item's content array. pub content_index: u32, /// The text delta. pub delta: String, - /// Optional per-token log probability data. + /// The log probabilities of the transcription. These can be enabled by configurating the session with + /// `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the array + /// corresponds a log probability of which token would be selected for this chunk of transcription. This + /// can help to identify if it was possible there were multiple valid options for a given chunk of + /// transcription. pub logprobs: Option>, } @@ -154,6 +237,34 @@ pub struct ConversationItemDeletedEvent { pub item_id: String, } +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ConversationItemRetrievedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// A single item within a Realtime conversation. + pub item: Item, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ConversationItemInputAudioTranscriptionSegmentEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the item containing the input audio content. + pub item_id: String, + /// The index of the input audio content part within the item. + pub content_index: u32, + /// The text for this segment. + pub text: String, + /// The segment identifier. + pub id: String, + /// The detected speaker label for this segment. + pub speaker: String, + /// Start time of the segment in seconds. + pub start: f32, + /// End time of the segment in seconds. + pub end: f32, +} + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ResponseCreatedEvent { /// The unique ID of the server event. @@ -174,11 +285,11 @@ pub struct ResponseDoneEvent { pub struct ResponseOutputItemAddedEvent { /// The unique ID of the server event. pub event_id: String, - /// The ID of the response to which the item belongs. + /// The ID of the Response to which the item belongs. pub response_id: String, - /// The index of the output item in the response. + /// The index of the output item in the Response. pub output_index: u32, - /// The item that was added. + /// A single item within a Realtime conversation. pub item: Item, } @@ -188,9 +299,9 @@ pub struct ResponseOutputItemDoneEvent { pub event_id: String, /// The ID of the response to which the item belongs. pub response_id: String, - /// The index of the output item in the response. + /// The index of the output item in the Response. pub output_index: u32, - /// The completed item. + /// A single item within a Realtime conversation. pub item: Item, } @@ -216,7 +327,7 @@ pub struct ResponseContentPartDoneEvent { pub event_id: String, /// The ID of the response. pub response_id: String, - /// The ID of the item to which the content part was added. + /// The ID of the item. pub item_id: String, /// The index of the output item in the response. pub output_index: u32, @@ -227,7 +338,7 @@ pub struct ResponseContentPartDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseTextDeltaEvent { +pub struct ResponseOutputTextDeltaEvent { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -243,7 +354,7 @@ pub struct ResponseTextDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseTextDoneEvent { +pub struct ResponseOutputTextDoneEvent { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -259,7 +370,7 @@ pub struct ResponseTextDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioTranscriptDeltaEvent { +pub struct ResponseOutputAudioTranscriptDeltaEvent { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -275,7 +386,7 @@ pub struct ResponseAudioTranscriptDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioTranscriptDoneEvent { +pub struct ResponseOutputAudioTranscriptDoneEvent { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -291,7 +402,7 @@ pub struct ResponseAudioTranscriptDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioDeltaEvent { +pub struct ResponseOutputAudioDeltaEvent { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -307,7 +418,7 @@ pub struct ResponseAudioDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseAudioDoneEvent { +pub struct ResponseOutputAudioDoneEvent { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -359,78 +470,256 @@ pub struct RateLimitsUpdatedEvent { pub rate_limits: Vec, } +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct MCPListToolsInProgressEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the MCP list tools item. + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct MCPListToolsCompletedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the MCP list tools item. + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct MCPListToolsFailedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the MCP list tools item. + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseMCPCallArgumentsDeltaEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the response. + pub response_id: String, + /// The ID of the MCP tool call item. + pub item_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The JSON-encoded arguments delta. + pub delta: String, + /// If present, indicates the delta text was obfuscated. + #[serde(skip_serializing_if = "Option::is_none")] + pub obfuscation: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseMCPCallArgumentsDoneEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The ID of the response. + pub response_id: String, + /// The ID of the MCP tool call item. + pub item_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The final JSON-encoded arguments string. + pub arguments: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseMCPCallInProgressEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The ID of the MCP tool call item. + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseMCPCallCompletedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The ID of the MCP tool call item. + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseMCPCallFailedEvent { + /// The unique ID of the server event. + pub event_id: String, + /// The index of the output item in the response. + pub output_index: u32, + /// The ID of the MCP tool call item. + pub item_id: String, +} + /// These are events emitted from the OpenAI Realtime WebSocket server to the client. #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] pub enum ServerEvent { - /// Returned when an error occurs. + /// Returned when an error occurs, which could be a client problem or a server problem. + /// Most errors are recoverable and the session will stay open, we recommend to + /// implementors to monitor and log error messages by default. #[serde(rename = "error")] Error(ErrorEvent), - /// Returned when a session is created. Emitted automatically when a new connection is established. + /// Returned when a Session is created. Emitted automatically when a new connection is established as the first server event. + /// This event will contain the default Session configuration. #[serde(rename = "session.created")] SessionCreated(SessionCreatedEvent), - /// Returned when a session is updated. + /// Returned when a session is updated with a `session.update` event, unless there is an error. #[serde(rename = "session.updated")] SessionUpdated(SessionUpdatedEvent), - /// Returned when a conversation is created. Emitted right after session creation. - #[serde(rename = "conversation.created")] - ConversationCreated(ConversationCreatedEvent), - - /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode. - #[serde(rename = "input_audio_buffer.committed")] - InputAudioBufferCommited(InputAudioBufferCommitedEvent), - - /// Returned when the input audio buffer is cleared by the client. - #[serde(rename = "input_audio_buffer.cleared")] - InputAudioBufferCleared(InputAudioBufferClearedEvent), - - /// Returned in server turn detection mode when speech is detected. - #[serde(rename = "input_audio_buffer.speech_started")] - InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent), - - /// Returned in server turn detection mode when speech stops. - #[serde(rename = "input_audio_buffer.speech_stopped")] - InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent), - - /// Returned when a conversation item is created. - #[serde(rename = "conversation.item.created")] - ConversationItemCreated(ConversationItemCreatedEvent), - - /// Returned when input audio transcription is enabled and a transcription succeeds. + /// Sent by the server when an Item is added to the default Conversation. This can happen in several cases: + /// - When the client sends a conversation.item.create event + /// - When the input audio buffer is committed. In this case the item will be a user message containing the audio from the buffer. + /// - When the model is generating a Response. In this case the `conversation.item.added` event will be sent when the model starts + /// generating a specific Item, and thus it will not yet have any content (and `status` will be `in_progress`). + /// + /// The event will include the full content of the Item (except when model is generating a Response) except for audio data, + /// which can be retrieved separately with a `conversation.item.retrieve` event if necessary. + #[serde(rename = "conversation.item.added")] + ConversationItemAdded(ConversationItemAddedEvent), + + /// Returned when a conversation item is finalized. + /// + /// The event will include the full content of the Item except for audio data, which can be retrieved + /// separately with a `conversation.item.retrieve` event if needed. + #[serde(rename = "conversation.item.done")] + ConversationItemDone(ConversationItemDoneEvent), + + /// Returned when a conversation item is retrieved with `conversation.item.retrieve`. + /// This is provided as a way to fetch the server's representation of an item, for example to get access + /// to the post-processed audio data after noise cancellation and VAD. + /// It includes the full content of the Item, including audio data. + #[serde(rename = "conversation.item.retrieved")] + ConversationItemRetrieved(ConversationItemRetrievedEvent), + + /// This event is the output of audio transcription for user audio written to the user audio + /// buffer. Transcription begins when the input audio buffer is committed by the client or + /// server (when VAD is enabled). Transcription runs asynchronously with Response + /// creation, so this event may come before or after the Response events. + /// + /// Realtime API models accept audio natively, and thus input transcription is a separate process + /// run on a separate ASR (Automatic Speech Recognition) model. The transcript + /// may diverge somewhat from the model's interpretation, and should be treated as a rough guide. #[serde(rename = "conversation.item.input_audio_transcription.completed")] ConversationItemInputAudioTranscriptionCompleted( ConversationItemInputAudioTranscriptionCompletedEvent, ), + /// Returned when the text value of an input audio transcription content part is updated with incremental transcription results. #[serde(rename = "conversation.item.input_audio_transcription.delta")] ConversationItemInputAudioTranscriptionDelta(ConversationItemInputAudioTranscriptionDeltaEvent), + /// Returned when an input audio transcription segment is identified for an item. + #[serde(rename = "conversation.item.input_audio_transcription.segment")] + ConversationItemInputAudioTranscriptionSegment( + ConversationItemInputAudioTranscriptionSegmentEvent, + ), + /// Returned when input audio transcription is configured, and a transcription request for a user message failed. + /// These events are separate from other `error` events so that the client can identify the related Item. #[serde(rename = "conversation.item.input_audio_transcription.failed")] ConversationItemInputAudioTranscriptionFailed( ConversationItemInputAudioTranscriptionFailedEvent, ), - /// Returned when an earlier assistant audio message item is truncated by the client. + /// Returned when an earlier assistant audio message item is truncated by the client with a `conversation.item.truncate` event. + /// This event is used to synchronize the server's understanding of the audio with the client's playback. + /// + /// This action will truncate the audio and remove the server-side text transcript to ensure there is no text in the + /// context that hasn't been heard by the user. #[serde(rename = "conversation.item.truncated")] ConversationItemTruncated(ConversationItemTruncatedEvent), - /// Returned when an item in the conversation is deleted. + /// Returned when an item in the conversation is deleted by the client with a `conversation.item.delete` event. + /// This event is used to synchronize the server's understanding of the conversation history with the client's view. #[serde(rename = "conversation.item.deleted")] ConversationItemDeleted(ConversationItemDeletedEvent), - /// Returned when a new Response is created. The first event of response creation, where the response is in an initial state of "in_progress". + /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode. + /// The `item_id` property is the ID of the user message item that will be created, + /// thus a `conversation.item.created` event will also be sent to the client. + #[serde(rename = "input_audio_buffer.committed")] + InputAudioBufferCommited(InputAudioBufferCommitedEvent), + + /// Returned when the input audio buffer is cleared by the client with a `input_audio_buffer.clear` event. + #[serde(rename = "input_audio_buffer.cleared")] + InputAudioBufferCleared(InputAudioBufferClearedEvent), + + /// Sent by the server when in `server_vad` mode to indicate that speech has been detected in the audio buffer. + /// This can happen any time audio is added to the buffer (unless speech is already detected). + /// The client may want to use this event to interrupt audio playback or provide visual feedback to the user. + /// + /// The client should expect to receive a `input_audio_buffer.speech_stopped` event when speech stops. + /// The `item_id` property is the ID of the user message item that will be created when speech stops and will + /// also be included in the `input_audio_buffer.speech_stopped` event (unless the client manually commits the + /// audio buffer during VAD activation). + #[serde(rename = "input_audio_buffer.speech_started")] + InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent), + + /// Returned in `server_vad` mode when the server detects the end of speech in the audio buffer. + /// The server will also send a `conversation.item.created` event with the user message item that is created from the audio buffer. + #[serde(rename = "input_audio_buffer.speech_stopped")] + InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent), + + /// Returned when the Server VAD timeout is triggered for the input audio buffer. This is + /// configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and + /// it indicates that there hasn't been any speech detected for the configured duration. + /// + /// The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the + /// last model response up to the triggering time, as an offset from the beginning of audio + /// written to the input audio buffer. This means it demarcates the segment of audio that + /// was silent and the difference between the start and end values will roughly match the configured timeout. + /// + /// The empty audio will be committed to the conversation as an `input_audio` item (there + /// will be a `input_audio_buffer.committed` event) and a model response will be generated. + /// There may be speech that didn't trigger VAD but is still detected by the model, so the model may respond + /// with something relevant to the conversation or a prompt to continue speaking. + #[serde(rename = "input_audio_buffer.timeout_triggered")] + InputAudioBufferTimeoutTriggered(InputAudioBufferTimeoutTriggeredEvent), + + /// *WebRTC Only*: Emitted when the server begins streaming audio to the client. This + /// event is emitted after an audio content part has been added (`response.content_part.added`) to the response. + /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + #[serde(rename = "output_audio_buffer.started")] + OutputAudioBufferStarted(OutputAudioBufferStartedEvent), + + /// *WebRTC Only*: Emitted when the output audio buffer has been completely drained on + /// the server, and no more audio is forthcoming. This event is emitted after the full response data has been sent + /// to the client (`response.done`). [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + #[serde(rename = "output_audio_buffer.stopped")] + OutputAudioBufferStopped(OutputAudioBufferStoppedEvent), + + /// *WebRTC Only*: Emitted when the output audio buffer is cleared. This happens either in + /// VAD mode when the user has interrupted (`input_audio_buffer.speech_started`), or when the client has + /// emitted the `output_audio_buffer.clear` event to manually cut off the current audio response. + /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + #[serde(rename = "output_audio_buffer.cleared")] + OutputAudioBufferCleared(OutputAudioBufferClearedEvent), + + /// Returned when a new Response is created. The first event of response creation, + /// where the response is in an initial state of `in_progress`. #[serde(rename = "response.created")] ResponseCreated(ResponseCreatedEvent), /// Returned when a Response is done streaming. Always emitted, no matter the final state. + /// The Response object included in the `response.done` event will include all output Items in the Response + /// but will omit the raw audio data. + /// + /// Clients should check the `status` field of the Response to determine if it was successful + /// (`completed`) or if there was another outcome: `cancelled`, `failed`, or `incomplete`. + /// + /// A response will contain all output items that were generated during the response, excluding any audio content. #[serde(rename = "response.done")] ResponseDone(ResponseDoneEvent), - /// Returned when a new Item is created during response generation. + /// Returned when a new Item is created during Response generation. #[serde(rename = "response.output_item.added")] ResponseOutputItemAdded(ResponseOutputItemAddedEvent), @@ -447,32 +736,32 @@ pub enum ServerEvent { #[serde(rename = "response.content_part.done")] ResponseContentPartDone(ResponseContentPartDoneEvent), - /// Returned when the text value of a "text" content part is updated. - #[serde(rename = "response.text.delta")] - ResponseTextDelta(ResponseTextDeltaEvent), + /// Returned when the text value of an "output_text" content part is updated. + #[serde(rename = "response.output_text.delta")] + ResponseOutputTextDelta(ResponseOutputTextDeltaEvent), - /// Returned when the text value of a "text" content part is done streaming. + /// Returned when the text value of an "output_text" content part is done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. - #[serde(rename = "response.text.done")] - ResponseTextDone(ResponseTextDoneEvent), + #[serde(rename = "response.output_text.done")] + ResponseOutputTextDone(ResponseOutputTextDoneEvent), /// Returned when the model-generated transcription of audio output is updated. - #[serde(rename = "response.audio_transcript.delta")] - ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent), + #[serde(rename = "response.output_audio_transcript.delta")] + ResponseOutputAudioTranscriptDelta(ResponseOutputAudioTranscriptDeltaEvent), /// Returned when the model-generated transcription of audio output is done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. - #[serde(rename = "response.audio_transcript.done")] - ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent), + #[serde(rename = "response.output_audio_transcript.done")] + ResponseOutputAudioTranscriptDone(ResponseOutputAudioTranscriptDoneEvent), /// Returned when the model-generated audio is updated. - #[serde(rename = "response.audio.delta")] - ResponseAudioDelta(ResponseAudioDeltaEvent), + #[serde(rename = "response.output_audio.delta")] + ResponseOutputAudioDelta(ResponseOutputAudioDeltaEvent), /// Returned when the model-generated audio is done. /// Also emitted when a Response is interrupted, incomplete, or cancelled. - #[serde(rename = "response.audio.done")] - ResponseAudioDone(ResponseAudioDoneEvent), + #[serde(rename = "response.output_audio.done")] + ResponseOutputAudioDone(ResponseOutputAudioDoneEvent), /// Returned when the model-generated function call arguments are updated. #[serde(rename = "response.function_call_arguments.delta")] @@ -483,7 +772,41 @@ pub enum ServerEvent { #[serde(rename = "response.function_call_arguments.done")] ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent), - /// Emitted after every "response.done" event to indicate the updated rate limits. + /// Returned when MCP tool call arguments are updated. + #[serde(rename = "response.mcp_call_arguments.delta")] + ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent), + + /// Returned when MCP tool call arguments are finalized during response generation. + #[serde(rename = "response.mcp_call_arguments.done")] + ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent), + + /// Returned when an MCP tool call is in progress. + #[serde(rename = "response.mcp_call.in_progress")] + ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent), + + /// Returned when an MCP tool call has completed successfully. + #[serde(rename = "response.mcp_call.completed")] + ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent), + + /// Returned when an MCP tool call has failed. + #[serde(rename = "response.mcp_call.failed")] + ResponseMCPCallFailed(ResponseMCPCallFailedEvent), + + /// Returned when listing MCP tools is in progress for an item. + #[serde(rename = "mcp_list_tools.in_progress")] + MCPListToolsInProgress(MCPListToolsInProgressEvent), + + /// Returned when listing MCP tools has completed for an item. + #[serde(rename = "mcp_list_tools.completed")] + MCPListToolsCompleted(MCPListToolsCompletedEvent), + + /// Returned when listing MCP tools has failed for an item. + #[serde(rename = "mcp_list_tools.failed")] + MCPListToolsFailed(MCPListToolsFailedEvent), + + /// Emitted at the beginning of a Response to indicate the updated rate limits. + /// When a Response is created some tokens will be "reserved" for the output tokens, the rate limits + /// shown here reflect that reservation, which is then adjusted accordingly once the Response is completed. #[serde(rename = "rate_limits.updated")] RateLimitsUpdated(RateLimitsUpdatedEvent), } From 26542d05d387592a4e643fde565498d80735e08b Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 13:13:30 -0700 Subject: [PATCH 03/42] updated rate limit --- async-openai/src/types/realtime/rate_limit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/async-openai/src/types/realtime/rate_limit.rs b/async-openai/src/types/realtime/rate_limit.rs index f3fc4aa6..9306e236 100644 --- a/async-openai/src/types/realtime/rate_limit.rs +++ b/async-openai/src/types/realtime/rate_limit.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct RateLimit { - /// The name of the rate limit ("requests", "tokens", "input_tokens", "output_tokens"). + /// The name of the rate limit (requests, tokens). pub name: String, /// The maximum allowed value for the rate limit. pub limit: u32, From 6cfc72c6955f7e3cd6e945c6ee9fc12293608f83 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 16:18:54 -0700 Subject: [PATCH 04/42] updated session configuration --- .../src/types/realtime/session_resource.rs | 448 +++++++++++++++--- 1 file changed, 378 insertions(+), 70 deletions(-) diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs index 2fe1e5b1..9a4fb9a1 100644 --- a/async-openai/src/types/realtime/session_resource.rs +++ b/async-openai/src/types/realtime/session_resource.rs @@ -1,26 +1,22 @@ use serde::{Deserialize, Serialize}; -#[derive(Debug, Serialize, Deserialize, Clone)] -pub enum AudioFormat { - #[serde(rename = "pcm16")] - PCM16, - #[serde(rename = "g711_law")] - G711ULAW, - #[serde(rename = "g711_alaw")] - G711ALAW, -} +use crate::types::responses::RequireApproval; #[derive(Debug, Default, Serialize, Deserialize, Clone)] pub struct AudioTranscription { - /// The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency. + /// The language of the input audio. Supplying the input language in + /// [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format will improve accuracy and latency. #[serde(skip_serializing_if = "Option::is_none")] pub language: Option, - /// The model to use for transcription, current options are gpt-4o-transcribe, gpt-4o-mini-transcribe, and whisper-1. + /// The model to use for transcription. Current options are `whisper-1`, + /// `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. + /// Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels. #[serde(skip_serializing_if = "Option::is_none")] pub model: Option, /// An optional text to guide the model's style or continue a previous audio segment. - /// For whisper-1, the prompt is a list of keywords. For gpt-4o-transcribe models, - /// the prompt is a free text string, for example "expect words related to technology". + /// For `whisper-1`, the [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). For `gpt-4o-transcribe` models + /// (excluding gpt-4o-transcribe-diarize), the prompt is a free text string, for example + /// "expect words related to technology". #[serde(skip_serializing_if = "Option::is_none")] pub prompt: Option, } @@ -28,64 +24,157 @@ pub struct AudioTranscription { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] pub enum TurnDetection { - /// Type of turn detection, only "server_vad" is currently supported. + /// Server-side voice activity detection (VAD) which flips on when user speech is detected + /// and off after a period of silence. #[serde(rename = "server_vad")] ServerVAD { - /// Activation threshold for VAD (0.0 to 1.0). - threshold: f32, - /// Amount of audio to include before speech starts (in milliseconds). - prefix_padding_ms: u32, - /// Duration of silence to detect speech stop (in milliseconds). - silence_duration_ms: u32, - /// Whether or not to automatically generate a response when a VAD stop event occurs. #[serde(skip_serializing_if = "Option::is_none")] create_response: Option, + /// Optional timeout after which a model response will be triggered automatically. + /// This is useful for situations in which a long pause from the user is unexpected, + /// such as a phone call. The model will effectively prompt the user to continue the + /// conversation based on the current context. + /// + /// The timeout value will be applied after the last model response's audio has finished + /// playing, i.e. it's set to the response.done time plus audio playback duration. + /// + /// An input_audio_buffer.timeout_triggered event (plus events associated with the Response) + /// will be emitted when the timeout is reached. Idle timeout is currently only supported + /// for server_vad mode. + #[serde(skip_serializing_if = "Option::is_none")] + idle_timeout_ms: Option, + /// Whether or not to automatically interrupt any ongoing response with output to - /// the default conversation (i.e. conversation of auto) when a VAD start event occurs. + /// the default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. #[serde(skip_serializing_if = "Option::is_none")] interrupt_response: Option, + + /// Used only for server_vad mode. Amount of audio to include before the VAD detected speech + /// (in milliseconds). Defaults to 300ms. + prefix_padding_ms: u32, + /// Used only for server_vad mode. Duration of silence to detect speech stop + /// (in milliseconds). Defaults to 500ms. With shorter values the model will respond + /// more quickly, but may jump in on short pauses from the user. + silence_duration_ms: u32, + + /// Used only for server_vad mode. Activation threshold for VAD (0.0 to 1.0), + /// this defaults to 0.5. A higher threshold will require louder audio to activate + /// the model, and thus might perform better in noisy environments. + threshold: f32, }, + /// Server-side semantic turn detection which uses a model to determine when the user has + /// finished speaking. #[serde(rename = "semantic_vad")] SemanticVAD { - /// The eagerness of the model to respond. - /// `low` will wait longer for the user to continue speaking, - /// `high`` will respond more quickly. `auto`` is the default and is equivalent to `medium` - eagerness: String, - /// Whether or not to automatically generate a response when a VAD stop event occurs. #[serde(skip_serializing_if = "Option::is_none", default)] create_response: Option, + /// Used only for `semantic_vad` mode. The eagerness of the model to respond. + /// `low` will wait longer for the user to continue speaking, `high` will respond more + /// quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high` + /// have max timeouts of 8s, 4s, and 2s respectively. + eagerness: String, + /// Whether or not to automatically interrupt any ongoing response with output to - /// the default conversation (i.e. conversation of auto) when a VAD start event occurs. + /// the default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. #[serde(skip_serializing_if = "Option::is_none", default)] interrupt_response: Option, }, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub enum MaxResponseOutputTokens { +pub enum MaxOutputTokens { #[serde(rename = "inf")] Inf, #[serde(untagged)] Num(u16), } +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FunctionTool { + /// The name of the function. + pub name: String, + /// The description of the function, including guidance on when and how to call it, + /// and guidance about what to tell the user when calling (if anything). + pub description: String, + /// Parameters of the function in JSON Schema. + pub parameters: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(untagged)] +pub enum AllowedTools { + /// A string array of allowed tool names + List(Vec), + /// A filter object to specify which tools are allowed. + Filter(MCPAllowedToolsFilter), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct MCPAllowedToolsFilter { + /// Indicates whether or not a tool modifies data or is read-only. + /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + /// it will match this filter. + #[serde(skip_serializing_if = "Option::is_none")] + pub read_only: Option, + /// List of allowed tool names. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_names: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct MCPTool { + /// A label for this MCP server, used to identify it in tool calls. + pub server_label: String, + + /// List of allowed tool names or a filter object. + pub allowed_tools: AllowedTools, + + /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP + /// server URL or a service connector. Your application must handle the OAuth authorization + /// flow and provide the token here. + pub authorization: Option, + + /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or + /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + /// + /// Currently supported `connector_id` values are: + /// - Dropbox: `connector_dropbox` + /// - Gmail: `connector_gmail` + /// - Google Calendar: `connector_googlecalendar` + /// - Google Drive: `connector_googledrive` + /// - Microsoft Teams: `connector_microsoftteams` + /// - Outlook Calendar: `connector_outlookcalendar` + /// - Outlook Email: `connector_outlookemail` + /// - SharePoint: `connector_sharepoint` + pub connector_id: Option, + + /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes. + pub headers: Option, + + /// Specify which of the MCP server's tools require approval. + pub require_approval: Option, + + /// Optional description of the MCP server, used to provide more context. + pub server_description: Option, + + /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided. + pub server_url: Option, +} + #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] pub enum ToolDefinition { #[serde(rename = "function")] - Function { - /// The name of the function. - name: String, - /// The description of the function. - description: String, - /// Parameters of the function in JSON Schema. - parameters: serde_json::Value, - }, + Function(FunctionTool), + /// Give the model access to additional tools via remote Model Context Protocol (MCP) servers. + /// [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + #[serde(rename = "mcp")] + MCP(MCPTool), } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -94,17 +183,36 @@ pub enum FunctionType { Function, } +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum Tool { + /// Use this option to force the model to call a specific function. + #[serde(rename = "function")] + Function { + /// The name of the function to call. + name: String, + }, + /// Use this option to force the model to call a specific tool on a remote MCP server. + #[serde(rename = "mcp")] + MCP { + /// The name of the tool to call on the server. + name: String, + /// The label of the MCP server to use. + server_label: String, + }, +} + #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "lowercase")] pub enum ToolChoice { + /// `auto` means the model can pick between generating a message or calling one or more tools. Auto, + /// `none` means the model will not call any tool and instead generates a message. None, + /// `required` means the model must call one or more tools. Required, #[serde(untagged)] - Function { - r#type: FunctionType, - name: String, - }, + Tool(Tool), } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -115,62 +223,262 @@ pub enum RealtimeVoice { Ballad, Coral, Echo, - Fable, - Onyx, - Nova, + Sage, Shimmer, Verse, + Marin, + Cedar, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum AudioFormat { + /// The PCM audio format. Only a 24kHz sample rate is supported. + #[serde(rename = "audio/pcm")] + PCMAudioFormat { + /// The sample rate of the audio. Always 24000. + rate: u32, + }, + /// The G.711 μ-law format. + #[serde(rename = "audio/pcmu")] + PCMUAudioFormat, + /// The G.711 A-law format. + #[serde(rename = "audio/pcma")] + PCMAAudioFormat, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct SessionResource { - /// The default model used for this session. +pub struct G711ULAWAudioFormat { + pub sample_rate: u32, + pub channels: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AudioInput { + /// The format of the input audio. + pub format: AudioFormat, + /// Configuration for input audio noise reduction. This can be set to null to turn off. + /// Noise reduction filters audio added to the input audio buffer before it is sent to VAD + /// and the model. Filtering the audio can improve VAD and turn detection accuracy + /// (reducing false positives) and model performance by improving perception of the + /// input audio. + pub noise_reduction: NoiseReduction, + /// Configuration for input audio transcription, defaults to off and can be set to `null` to turn off once on. + /// Input audio transcription is not native to the model, since the model consumes audio directly. + /// Transcription runs asynchronously through [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + /// and should be treated as guidance of input audio content rather than precisely what the model + /// heard. The client can optionally set the language and prompt for transcription, + /// these offer additional guidance to the transcription service. + pub transcription: AudioTranscription, + + /// Configuration for turn detection, ether Server VAD or Semantic VAD. This can + /// be set to null to turn off, in which case the client must manually trigger model response. + /// + /// Server VAD means that the model will detect the start and end of speech + /// based on audio volume and respond at the end of user speech. + /// + /// Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) + /// to semantically estimate whether the user has finished speaking, then dynamically sets + /// a timeout based on this probability. For example, if user audio trails off with "uhhm", + /// the model will score a low probability of turn end and wait longer for the user to + /// continue speaking. This can be useful for more natural conversations, but may have a + /// higher latency. + pub turn_detection: TurnDetection, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AudioOutput { + /// The format of the output audio. + pub format: AudioFormat, + /// The speed of the model's spoken response as a multiple of the original speed. + /// 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + /// This value can only be changed in between model turns, not while a response + /// is in progress. + /// + /// This parameter is a post-processing adjustment to the audio after it is generated, + /// it's also possible to prompt the model to speak faster or slower. + pub speed: f32, + /// The voice the model uses to respond. Voice cannot be changed during the session once + /// the model has responded with audio at least once. Current voice options are + /// `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. + /// We recommend `marin` and `cedar` for best quality. + pub voice: RealtimeVoice, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Audio { + pub input: AudioInput, + pub output: AudioOutput, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Prompt { + /// The unique identifier of the prompt template to use. + pub id: String, + /// Optional map of values to substitute in for variables in your prompt. The substitution + /// values can either be strings, or other Response input types like images or files. #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, + pub variables: Option, + /// Optional version of the prompt template. + #[serde(skip_serializing_if = "Option::is_none")] + pub version: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum Tracing { + /// Enables tracing and sets default values for tracing configuration options. Always `auto`. + Auto, + + #[serde(untagged)] + Configuration(TracingConfiguration), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TracingConfiguration { + /// The group id to attach to this trace to enable filtering and grouping in the Traces Dashboard. + pub group_id: String, + /// The arbitrary metadata to attach to this trace to enable filtering in the Traces Dashboard. + pub metadata: serde_json::Value, + /// The name of the workflow to attach to this trace. This is used to name the trace in the Traces Dashboard. + pub workflow_name: String, +} - /// The set of modalities the model can respond with. To disable audio, set this to ["text"]. +/// The truncation strategy to use for the session. +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum Truncation { + /// `auto` is the default truncation strategy. + Auto, + /// `disabled` will disable truncation and emit errors when the conversation exceeds the input + /// token limit. + Disabled, + + /// Retain a fraction of the conversation tokens when the conversation exceeds the input token + /// limit. This allows you to amortize truncations across multiple turns, which can help improve + /// cached token usage. + #[serde(untagged)] + RetentionRatio(RetentionRatioTruncation), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RetentionRatioTruncation { + /// Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the conversation + /// exceeds the input token limit. Setting this to 0.8 means that messages will be dropped + /// until 80% of the maximum allowed tokens are used. This helps reduce the frequency of + /// truncations and improve cache rates. + pub retention_ratio: f32, + + /// Use retention ratio truncation. + pub r#type: String, + + /// Optional custom token limits for this truncation strategy. If not provided, the model's + /// default token limits will be used. #[serde(skip_serializing_if = "Option::is_none")] - pub modalities: Option>, + pub token_limits: Option, +} - //// The default system instructions prepended to model calls. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TokenLimits { + /// Maximum tokens allowed in the conversation after instructions (which including tool + /// definitions). For example, setting this to 5,000 would mean that truncation would occur + /// when the conversation exceeds 5,000 tokens after instructions. This cannot be higher + /// than the model's context window size minus the maximum output tokens. + pub post_instructions: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SessionResource { + /// The type of session to create. Always realtime for the Realtime API. + pub r#type: String, + + pub audio: Audio, + + /// Additional fields to include in server outputs. + /// + /// `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + #[serde(skip_serializing_if = "Option::is_none")] + pub include: Option>, + + /// The default system instructions (i.e. system message) prepended to model calls. + /// This field allows the client to guide the model on desired responses. + /// The model can be instructed on response content and format, + /// (e.g. "be extremely succinct", "act friendly", "here are examples of good responses") + /// and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", + /// "laugh frequently"). The instructions are not guaranteed to be followed by the model, but + /// they provide guidance to the model on the desired behavior. + /// + /// Note that the server sets default instructions which will be used if this field is not set + /// and are visible in the `session.created` event at the start of the session. #[serde(skip_serializing_if = "Option::is_none")] pub instructions: Option, - /// The voice the model uses to respond. Cannot be changed once the model has responded with audio at least once. + /// Maximum number of output tokens for a single assistant response, + /// inclusive of tool calls. Provide an integer between 1 and 4096 to limit output tokens, + /// or `inf` for the maximum available tokens for a given model. Defaults to `inf`. #[serde(skip_serializing_if = "Option::is_none")] - pub voice: Option, + pub max_output_tokens: Option, - /// The format of input audio. Options are "pcm16", "g711_ulaw", or "g711_alaw". + /// The Realtime model used for this session. #[serde(skip_serializing_if = "Option::is_none")] - pub input_audio_format: Option, + pub model: Option, - /// The format of output audio. Options are "pcm16", "g711_ulaw", or "g711_alaw". + /// The set of modalities the model can respond with. It defaults to + /// `["audio"]`, indicating that the model will respond with audio plus a transcript. `["text"]` + /// can be used to make the model respond with text only. It is not possible to request both + /// `text` and `audio` at the same time. #[serde(skip_serializing_if = "Option::is_none")] - pub output_audio_format: Option, + pub output_modalities: Option>, - /// Configuration for input audio transcription. Can be set to null to turn off. + /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). #[serde(skip_serializing_if = "Option::is_none")] - pub input_audio_transcription: Option, + pub prompt: Option, - /// Configuration for turn detection. Can be set to null to turn off. #[serde(skip_serializing_if = "Option::is_none")] - pub turn_detection: Option, + /// How the model chooses tools. Provide one of the string modes or force a specific + /// function/MCP tool. + pub tool_choice: Option, - /// Tools (functions) available to the model. + /// Tools available to the model. #[serde(skip_serializing_if = "Option::is_none")] pub tools: Option>, + /// Realtime API can write session traces to the [Traces Dashboard](https://platform.openai.com/logs?api=traces). + /// Set to null to disable tracing. Once tracing is enabled for a session, the configuration cannot be modified. + /// + /// `auto` will create a trace for the session with default values for the workflow name, + /// group id, and metadata. #[serde(skip_serializing_if = "Option::is_none")] - /// How the model chooses tools. - pub tool_choice: Option, + pub tracing: Option, + /// When the number of tokens in a conversation exceeds the model's input token limit, + /// the conversation be truncated, meaning messages (starting from the oldest) will not be + /// included in the model's context. A 32k context model with 4,096 max output tokens can + /// only include 28,224 tokens in the context before truncation occurs. Clients can configure + /// truncation behavior to truncate with a lower max token limit, which is an effective way to + /// control token usage and cost. Truncation will reduce the number of cached tokens on the next + /// turn (busting the cache), since messages are dropped from the beginning of the context. + /// However, clients can also configure truncation to retain messages up to a fraction of the + /// maximum context size, which will reduce the need for future truncations and thus improve + /// the cache rate. Truncation can be disabled entirely, which means the server will never + /// truncate but would instead return an error if the conversation exceeds the model's input + /// token limit. #[serde(skip_serializing_if = "Option::is_none")] - /// Sampling temperature for the model. - pub temperature: Option, + pub truncation: Option, +} - /// Maximum number of output tokens for a single assistant response, inclusive of tool calls. - /// Provide an integer between 1 and 4096 to limit output tokens, or "inf" for the maximum available tokens for a given model. - /// Defaults to "inf". - #[serde(skip_serializing_if = "Option::is_none")] - pub max_response_output_tokens: Option, +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub enum NoiseReductionType { + NearField, + FarField, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct NoiseReduction { + /// Type of noise reduction. `near_field` is for close-talking microphones such as + /// headphones, `far_field` is for far-field microphones such as laptop or conference + /// room microphones. + pub r#type: NoiseReductionType, } From 2dc4467c572e3ca9d596b9dfe1f7dfd78b9d0266 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 16:25:22 -0700 Subject: [PATCH 05/42] transctiption session configuration --- .../src/types/realtime/session_resource.rs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs index 9a4fb9a1..ded5c759 100644 --- a/async-openai/src/types/realtime/session_resource.rs +++ b/async-openai/src/types/realtime/session_resource.rs @@ -387,6 +387,7 @@ pub struct TokenLimits { pub post_instructions: u32, } +/// Realtime session object configuration. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct SessionResource { /// The type of session to create. Always realtime for the Realtime API. @@ -482,3 +483,24 @@ pub struct NoiseReduction { /// room microphones. pub r#type: NoiseReductionType, } + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptionAudio { + pub input: AudioInput, +} + +/// Realtime transcription session object configuration. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptionSession { + /// The type of session to create. Always `transcription` for transcription sessions. + pub r#type: String, + + /// Configuration for input and output audio. + pub audio: TranscriptionAudio, + + /// Additional fields to include in server outputs. + /// + /// `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + #[serde(skip_serializing_if = "Option::is_none")] + pub include: Option>, +} From 7623dae60a9e710dd88e6d7f1f5d14fad338aab2 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 18:46:31 -0700 Subject: [PATCH 06/42] udpates to realtime types --- .../src/types/realtime/client_event.rs | 10 +- .../src/types/realtime/conversation.rs | 13 +- .../src/types/realtime/response_resource.rs | 220 ++++++++++++++++-- .../src/types/realtime/server_event.rs | 11 +- .../src/types/realtime/session_resource.rs | 21 +- 5 files changed, 228 insertions(+), 47 deletions(-) diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs index 7a49e836..3a0ddf54 100644 --- a/async-openai/src/types/realtime/client_event.rs +++ b/async-openai/src/types/realtime/client_event.rs @@ -1,9 +1,11 @@ use serde::{Deserialize, Serialize}; use tokio_tungstenite::tungstenite::Message; -use super::{item::Item, session_resource::SessionResource}; +use crate::types::realtime::{ResponseCreate, Session}; -#[derive(Debug, Serialize, Deserialize, Clone, Default)] +use super::item::Item; + +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct SessionUpdateEvent { /// Optional client-generated ID used to identify this event. /// This is an arbitrary string that a client may assign. It will be passed @@ -12,7 +14,7 @@ pub struct SessionUpdateEvent { #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, /// Update the Realtime session. Choose either a realtime session or a transcription session. - pub session: SessionResource, + pub session: Session, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] @@ -101,7 +103,7 @@ pub struct ResponseCreateEvent { pub event_id: Option, /// Create a new Realtime response with these parameters - pub response: Option, + pub response: Option, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] diff --git a/async-openai/src/types/realtime/conversation.rs b/async-openai/src/types/realtime/conversation.rs index 3ea43bd8..e678ede8 100644 --- a/async-openai/src/types/realtime/conversation.rs +++ b/async-openai/src/types/realtime/conversation.rs @@ -1,10 +1,9 @@ use serde::{Deserialize, Serialize}; -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Conversation { - /// The unique ID of the conversation. - pub id: String, - - /// The object type, must be "realtime.conversation". - pub object: String, +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +#[serde(rename_all = "lowercase")] +pub enum Conversation { + #[default] + Auto, + None, } diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index a6c6c32f..a32b5600 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -1,12 +1,73 @@ use serde::{Deserialize, Serialize}; -use super::item::Item; +use crate::types::realtime::{ + AudioFormat, Conversation, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice, ToolDefinition, +}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Usage { - pub total_tokens: u32, + /// Details about the input tokens used in the Response. Cached tokens are tokens from previous + /// turns in the conversation that are included as context for the current response. Cached tokens + /// here are counted as a subset of input tokens, meaning input tokens will include cached and + /// uncached tokens. + #[serde(skip_serializing_if = "Option::is_none")] + pub input_token_details: Option, + + /// The number of input tokens used in the Response, including text and audio tokens. pub input_tokens: u32, + + #[serde(skip_serializing_if = "Option::is_none")] + pub output_token_details: Option, + + /// The number of output tokens sent in the Response, including text and audio tokens. pub output_tokens: u32, + + /// The total number of tokens in the Response including input and output text and audio tokens. + pub total_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct InputTokenDetails { + /// The number of audio tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_tokens: Option, + /// The number of cached tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub cached_tokens: Option, + + /// Details about the cached tokens used as input for the Response. + pub cached_token_details: Option, + + /// The number of image tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub image_tokens: Option, + + /// The number of text tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub text_tokens: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct CachedTokenDetails { + /// The number of cached audio tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_tokens: Option, + + /// The number of cached image tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub image_tokens: Option, + + /// The number of cached text tokens used as input for the Response. + #[serde(skip_serializing_if = "Option::is_none")] + pub text_tokens: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct OutputTokenDetails { + #[serde(skip_serializing_if = "Option::is_none")] + pub text_tokens: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_tokens: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -20,42 +81,155 @@ pub enum ResponseStatus { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct FailedError { +pub struct Error { pub code: String, - pub message: String, + pub r#type: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "snake_case")] -pub enum IncompleteReason { - Interruption, - MaxOutputTokens, - ContentFilter, +pub struct ResponseStatusDetail { + /// A description of the error that caused the response to fail, populated when the status is failed. + pub error: Option, + /// The reason the Response did not complete. For a `cancelled` Response, one of `turn_detected` + /// (the server VAD detected a new start of speech) or `client_cancelled` (the client sent a cancel + /// event). For an incomplete Response, one of `max_output_tokens` or `content_filter` (the + /// server-side safety filter activated and cut off the response). + pub reason: Option, + /// The type of error that caused the response to fail, corresponding with the `status` + /// field (`completed`, `cancelled`, `incomplete`, `failed`). + pub r#type: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ResponseAudioOutput { + /// The format of the output audio. + pub format: AudioFormat, + + /// The voice the model uses to respond. Voice cannot be changed during the session once + /// the model has responded with audio at least once. Current voice options are + /// `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. + /// We recommend `marin` and `cedar` for best quality. + pub voice: RealtimeVoice, } #[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum ResponseStatusDetail { - #[serde(rename = "incomplete")] - Incomplete { reason: IncompleteReason }, - #[serde(rename = "failed")] - Failed { error: Option }, - #[serde(rename = "cancelled")] - Cancelled { reason: String }, +pub struct ResponseAudio { + /// Configuration for audio output. + pub output: ResponseAudioOutput, } +/// The response resource. #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseResource { - /// The unique ID of the response. +pub struct ResponseCreate { + /// Configuration for audio input and output. + pub audio: ResponseAudio, + + /// Controls which conversation the response is added to. Currently supports auto and none, + /// with auto as the default value. The auto value means that the contents of the response + /// will be added to the default conversation. Set this to none to create an out-of-band + /// response which will not add items to default conversation. + pub conversation: Conversation, + + /// Input items to include in the prompt for the model. Using this field creates a new context + /// for this Response instead of using the default conversation. An empty array `[]` will clear + /// the context for this Response. Note that this can include references to items that + /// previously appeared in the session using their id. + pub input: Vec, // TODO: implement types + + /// The default system instructions (i.e. system message) prepended to model calls. + /// This field allows the client to guide the model on desired responses. + /// The model can be instructed on response content and format, (e.g. "be extremely succinct", + /// "act friendly", "here are examples of good responses") and on audio behavior + /// (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). + /// The instructions are not guaranteed to be followed by the model, but they provide + /// guidance to the model on the desired behavior. Note that the server sets default + /// instructions which will be used if this field is not set and are visible in + /// the `session.created` event at the start of the session. + pub instructions: String, + + /// Maximum number of output tokens for a single assistant response, inclusive of tool calls. + /// Provide an integer between 1 and 4096 to limit output tokens, or inf for the maximum + /// available tokens for a given model. Defaults to `inf`. + pub max_output_tokens: MaxOutputTokens, + + /// Set of 16 key-value pairs that can be attached to an object. This can be useful for + /// storing additional information about the object in a structured format, and querying + /// for objects via API or the dashboard. + /// + /// Keys are strings with a maximum length of 64 characters. Values are strings with a + /// maximum length of 512 characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, + + /// The set of modalities the model used to respond, currently the only possible values + /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript. + /// Setting the output to mode `text` will disable audio output from the model. + pub output_modalities: Vec, + + /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + #[serde(skip_serializing_if = "Option::is_none")] + pub prompt: Option, + + /// How the model chooses tools. Provide one of the string modes or force a specific + /// function/MCP tool. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_choice: Option, + + /// Tools available to the model. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Response { + /// Configuration for audio output. + pub audio: ResponseAudio, + + /// Which conversation the response is added to, determined by the `conversation` field in the + /// `response.create` event. If `auto`, the response will be added to the default conversation + /// and the value of `conversation_id` will be an id like `conv_1234`. If `none`, the response + /// will not be added to any conversation and the value of `conversation_id` will be `null`. + /// If responses are being triggered automatically by VAD the response will be added to the + /// default conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub conversation_id: Option, + + /// The unique ID of the response, will look like `resp_1234`. pub id: String, + + /// Maximum number of output tokens for a single assistant response, inclusive of tool calls, + /// that was used in this response. + pub max_output_tokens: MaxOutputTokens, + + /// Set of 16 key-value pairs that can be attached to an object. This can be useful for + /// storing additional information about the object in a structured format, and querying + /// for objects via API or the dashboard. + /// + /// Keys are strings with a maximum length of 64 characters. Values are strings with a + /// maximum length of 512 characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, + /// The object type, must be "realtime.response". pub object: String, - /// The status of the response + + /// The list of output items generated by the response. + pub output: Vec, // TODO: implement types + + /// The set of modalities the model used to respond, currently the only possible values + /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript. + /// Setting the output to mode `text` will disable audio output from the model. + pub output_modalities: Vec, + + /// The final status of the response (`completed`, `cancelled`, `failed`, or `incomplete`, `in_progress`). pub status: ResponseStatus, + /// Additional details about the status. pub status_details: Option, - /// The list of output items generated by the response. - pub output: Vec, - /// Usage statistics for the response. + + /// Usage statistics for the Response, this will correspond to billing. A Realtime API session + /// will maintain a conversation context and append new Items to the Conversation, thus output + /// from previous turns (text and audio tokens) will become the input for later turns. pub usage: Option, } diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 0d35690a..6d16742c 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -1,8 +1,9 @@ use serde::{Deserialize, Serialize}; +use crate::types::realtime::{Response, Session}; + use super::{ content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit, - response_resource::ResponseResource, session_resource::SessionResource, }; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -18,7 +19,7 @@ pub struct SessionCreatedEvent { /// The unique ID of the server event. pub event_id: String, /// The session resource. - pub session: SessionResource, + pub session: Session, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -26,7 +27,7 @@ pub struct SessionUpdatedEvent { /// The unique ID of the server event. pub event_id: String, /// The updated session resource. - pub session: SessionResource, + pub session: Session, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -270,7 +271,7 @@ pub struct ResponseCreatedEvent { /// The unique ID of the server event. pub event_id: String, /// The response resource. - pub response: ResponseResource, + pub response: Response, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -278,7 +279,7 @@ pub struct ResponseDoneEvent { /// The unique ID of the server event. pub event_id: String, /// The response resource. - pub response: ResponseResource, + pub response: Response, } #[derive(Debug, Serialize, Deserialize, Clone)] diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs index ded5c759..3034fdc0 100644 --- a/async-openai/src/types/realtime/session_resource.rs +++ b/async-openai/src/types/realtime/session_resource.rs @@ -387,12 +387,20 @@ pub struct TokenLimits { pub post_instructions: u32, } -/// Realtime session object configuration. #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct SessionResource { - /// The type of session to create. Always realtime for the Realtime API. - pub r#type: String, +#[serde(tag = "type")] +pub enum Session { + /// The type of session to create. Always `realtime` for the Realtime API. + #[serde(rename = "realtime")] + RealtimeSessionConfiguration(RealtimeSession), + /// The type of session to create. Always `transcription` for transcription sessions. + #[serde(rename = "transcription")] + TranscriptionSessionConfiguration(TranscriptionSession), +} +/// Realtime session object configuration. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeSession { pub audio: Audio, /// Additional fields to include in server outputs. @@ -436,9 +444,9 @@ pub struct SessionResource { #[serde(skip_serializing_if = "Option::is_none")] pub prompt: Option, - #[serde(skip_serializing_if = "Option::is_none")] /// How the model chooses tools. Provide one of the string modes or force a specific /// function/MCP tool. + #[serde(skip_serializing_if = "Option::is_none")] pub tool_choice: Option, /// Tools available to the model. @@ -492,9 +500,6 @@ pub struct TranscriptionAudio { /// Realtime transcription session object configuration. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct TranscriptionSession { - /// The type of session to create. Always `transcription` for transcription sessions. - pub r#type: String, - /// Configuration for input and output audio. pub audio: TranscriptionAudio, From 793595019b3fa43f3cf469972b7138c1ef98f2d4 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 19:56:11 -0700 Subject: [PATCH 07/42] updated Item --- async-openai/src/types/realtime/item.rs | 300 ++++++++++++++---- .../src/types/realtime/response_resource.rs | 7 +- 2 files changed, 241 insertions(+), 66 deletions(-) diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs index 3af7d0d9..c6571191 100644 --- a/async-openai/src/types/realtime/item.rs +++ b/async-openai/src/types/realtime/item.rs @@ -1,99 +1,273 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SystemMessageContent { + /// The text content. + pub text: String, + /// The content type. Always `input_text` for system messages. + pub r#type: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct SystemMessage { + /// The content of the message. + pub content: Vec, + + /// The unique ID of the item. This may be provided by the client or generated by the server. + pub id: String, + + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + pub status: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UserMessageContentInputText { + /// The text content (for `input_text`). + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UserMessageContentInputAudio { + /// Base64-encoded audio bytes (for `input_audio`), these will be parsed as the + /// format specified in the session input audio type configuration. + /// This defaults to PCM 16-bit 24kHz mono if not specified. + pub audio: String, + /// Transcript of the audio (for `input_audio`). This is not sent to the model, + /// but will be attached to the message item for reference. + pub transcript: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default)] #[serde(rename_all = "snake_case")] -pub enum ItemType { - Message, - FunctionCall, - FunctionCallOutput, +pub enum ImageDetail { + #[default] + Auto, + Low, + High, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UserMessageContentInputImage { + /// Base64-encoded image bytes (for `input_image`) as a data URI. + /// For example `...`. + /// Supported formats are PNG and JPEG. + pub image_url: String, + /// The detail level of the image (for `input_image`). `auto` will default to `high`. + pub detail: ImageDetail, } #[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] #[serde(rename_all = "snake_case")] -pub enum ItemStatus { - Completed, - InProgress, - Incomplete, +pub enum UserMessageContent { + InputText(UserMessageContentInputText), + InputAudio(UserMessageContentInputAudio), + InputImage(UserMessageContentInputImage), } #[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum ItemRole { - User, - Assistant, - System, +pub struct UserMessage { + /// The content of the message. + pub content: Vec, + + /// The unique ID of the item. This may be provided by the client or generated by the server. + pub id: String, + + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + pub status: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AssistantMessageContentOutputText { + /// The text content + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AssistantMessageContentOutputAudio { + /// Base64-encoded audio bytes, these will be parsed as the format specified + /// in the session output audio type configuration. This defaults to PCM 16-bit + /// 24kHz mono if not specified. + pub audio: String, + /// The transcript of the audio content, this will always be present if the + /// output type is `audio`. + pub transcript: String, } #[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] #[serde(rename_all = "snake_case")] -pub enum ItemContentType { - InputText, - InputAudio, - Text, - Audio, +pub enum AssistantMessageContent { + OutputText(AssistantMessageContentOutputText), + OutputAudio(AssistantMessageContentOutputAudio), } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ItemContent { - /// The content type ("input_text", "input_audio", "text", "audio"). - pub r#type: ItemContentType, +pub struct AssistantMessage { + /// The content of the message. + pub content: Vec, - /// The text content. - #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, + /// The unique ID of the item. This may be provided by the client or generated by the server. + pub id: String, - /// Base64-encoded audio bytes. - #[serde(skip_serializing_if = "Option::is_none")] - pub audio: Option, + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + pub object: Option, - /// The transcript of the audio. - #[serde(skip_serializing_if = "Option::is_none")] - pub transcript: Option, + /// The status of the item. Has no effect on the conversation. + pub status: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Item { - /// The unique ID of the item. - #[serde(skip_serializing_if = "Option::is_none")] - pub id: Option, +#[serde(tag = "role")] +#[serde(rename_all = "lowercase")] +pub enum Message { + System(SystemMessage), + User(UserMessage), + Assistant(AssistantMessage), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FunctionCall { + /// The arguments of the function call. This is a JSON-encoded string representing + /// the arguments passed to the function, for example {"arg1": "value1", "arg2": 42}. + pub arguments: String, - /// The type of the item ("message", "function_call", "function_call_output"). - #[serde(skip_serializing_if = "Option::is_none")] - pub r#type: Option, + /// The name of the function being called. + pub name: String, - /// The status of the item ("completed", "in_progress", "incomplete"). - #[serde(skip_serializing_if = "Option::is_none")] - pub status: Option, + /// The ID of the function call. + pub call_id: String, - /// The role of the message sender ("user", "assistant", "system"). - #[serde(skip_serializing_if = "Option::is_none")] - pub role: Option, + /// The unique ID of the item. This may be provided by the client or generated by the server. + pub id: String, - /// The content of the message. - #[serde(skip_serializing_if = "Option::is_none")] - pub content: Option>, + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + pub object: Option, - /// The ID of the function call (for "function_call" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub call_id: Option, + /// The status of the item. Has no effect on the conversation. + pub status: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FunctionCallOutput { + /// The ID of the function call this output is for. + pub call_id: String, - /// The name of the function being called (for "function_call" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, + /// The output of the function call, this is free text and can contain any information + /// or simply be empty. + pub output: String, - /// The arguments of the function call (for "function_call" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub arguments: Option, + /// The unique ID of the item. This may be provided by the client or generated by the server. + pub id: String, - /// The output of the function call (for "function_call_output" items). - #[serde(skip_serializing_if = "Option::is_none")] - pub output: Option, + /// Identifier for the API object being returned - always `realtime.item`. + /// Optional when creating a new item. + pub object: Option, + + /// The status of the item. Has no effect on the conversation. + pub status: String, } -impl TryFrom for Item { - type Error = serde_json::Error; +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct McpApprovalResponse { + /// The ID of the approval request being answered. + pub approval_request_id: String, + + /// Whether the request was approved. + pub approved: bool, + + /// The unique ID of the approval response. + pub id: String, - fn try_from(value: serde_json::Value) -> Result { - serde_json::from_value(value) - } + /// Optional reason for the decision. + pub reason: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AvailableMcpTool { + /// The JSON schema describing the tool's input. + pub input_schema: serde_json::Value, + + /// The name of the tool. + pub name: String, + + /// Additional annotations about the tool. + pub annotations: Option, + + /// The description of the tool. + pub description: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct McpListTools { + /// The label of the MCP server. + pub server_label: String, + + /// The tools available on the server. + pub tools: Vec, + + /// The unique ID of the list. + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct McpApprovalRequest { + /// A JSON string of arguments for the tool. + pub arguments: String, + + /// The unique ID of the approval request. + pub id: String, + + /// The name of the tool to run. + pub name: String, + + /// The label of the MCP server making the request. + pub server_label: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct McpCall { + /// A JSON string of the arguments passed to the tool. + pub arguments: String, + + /// The unique ID of the tool call. + pub id: String, + + /// The name of the tool that was run. + pub name: String, + + /// The label of the MCP server running the tool. + pub server_label: String, + + /// The ID of an associated approval request, if any. + pub approval_request_id: Option, + + /// The error from the tool call, if any. + pub error: Option, // TODO: implement type + + /// The output from the tool call. + pub output: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +#[serde(rename_all = "snake_case")] +pub enum Item { + Message(Message), + FunctionCall(FunctionCall), + FunctionCallOutput(FunctionCallOutput), + McpApprovalResponse(McpApprovalResponse), + McpListTools(McpListTools), + McpCall(McpCall), + McpApprovalRequest(McpApprovalRequest), } diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index a32b5600..0886eae1 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -1,7 +1,8 @@ use serde::{Deserialize, Serialize}; use crate::types::realtime::{ - AudioFormat, Conversation, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice, ToolDefinition, + AudioFormat, Conversation, Item, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice, + ToolDefinition, }; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -134,7 +135,7 @@ pub struct ResponseCreate { /// for this Response instead of using the default conversation. An empty array `[]` will clear /// the context for this Response. Note that this can include references to items that /// previously appeared in the session using their id. - pub input: Vec, // TODO: implement types + pub input: Vec, /// The default system instructions (i.e. system message) prepended to model calls. /// This field allows the client to guide the model on desired responses. @@ -215,7 +216,7 @@ pub struct Response { pub object: String, /// The list of output items generated by the response. - pub output: Vec, // TODO: implement types + pub output: Vec, /// The set of modalities the model used to respond, currently the only possible values /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript. From c3c62a43e9b720ec861ee8da5fb43ac9e881cc43 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 20:36:47 -0700 Subject: [PATCH 08/42] updated realtime types --- async-openai/src/types/realtime/item.rs | 36 ++++++++++++++----- .../src/types/realtime/response_resource.rs | 2 +- .../src/types/realtime/session_resource.rs | 4 +-- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs index c6571191..b6020bf8 100644 --- a/async-openai/src/types/realtime/item.rs +++ b/async-openai/src/types/realtime/item.rs @@ -14,14 +14,14 @@ pub struct SystemMessage { pub content: Vec, /// The unique ID of the item. This may be provided by the client or generated by the server. - pub id: String, + pub id: Option, /// Identifier for the API object being returned - always `realtime.item`. /// Optional when creating a new item. pub object: Option, /// The status of the item. Has no effect on the conversation. - pub status: String, + pub status: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -75,14 +75,17 @@ pub struct UserMessage { pub content: Vec, /// The unique ID of the item. This may be provided by the client or generated by the server. - pub id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, /// Identifier for the API object being returned - always `realtime.item`. /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] pub object: Option, /// The status of the item. Has no effect on the conversation. - pub status: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -96,7 +99,7 @@ pub struct AssistantMessageContentOutputAudio { /// Base64-encoded audio bytes, these will be parsed as the format specified /// in the session output audio type configuration. This defaults to PCM 16-bit /// 24kHz mono if not specified. - pub audio: String, + pub audio: Option, /// The transcript of the audio content, this will always be present if the /// output type is `audio`. pub transcript: String, @@ -116,14 +119,17 @@ pub struct AssistantMessage { pub content: Vec, /// The unique ID of the item. This may be provided by the client or generated by the server. - pub id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, /// Identifier for the API object being returned - always `realtime.item`. /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] pub object: Option, /// The status of the item. Has no effect on the conversation. - pub status: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -148,10 +154,12 @@ pub struct FunctionCall { pub call_id: String, /// The unique ID of the item. This may be provided by the client or generated by the server. - pub id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, /// Identifier for the API object being returned - always `realtime.item`. /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] pub object: Option, /// The status of the item. Has no effect on the conversation. @@ -168,10 +176,12 @@ pub struct FunctionCallOutput { pub output: String, /// The unique ID of the item. This may be provided by the client or generated by the server. - pub id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, /// Identifier for the API object being returned - always `realtime.item`. /// Optional when creating a new item. + #[serde(skip_serializing_if = "Option::is_none")] pub object: Option, /// The status of the item. Has no effect on the conversation. @@ -271,3 +281,11 @@ pub enum Item { McpCall(McpCall), McpApprovalRequest(McpApprovalRequest), } + +impl TryFrom for Item { + type Error = serde_json::Error; + + fn try_from(value: serde_json::Value) -> Result { + serde_json::from_value(value) + } +} diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index 0886eae1..2d6342d2 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -185,7 +185,7 @@ pub struct ResponseCreate { #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Response { /// Configuration for audio output. - pub audio: ResponseAudio, + pub audio: Option, /// Which conversation the response is added to, determined by the `conversation` field in the /// `response.create` event. If `auto`, the response will be added to the default conversation diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs index 3034fdc0..c11f78c6 100644 --- a/async-openai/src/types/realtime/session_resource.rs +++ b/async-openai/src/types/realtime/session_resource.rs @@ -262,14 +262,14 @@ pub struct AudioInput { /// and the model. Filtering the audio can improve VAD and turn detection accuracy /// (reducing false positives) and model performance by improving perception of the /// input audio. - pub noise_reduction: NoiseReduction, + pub noise_reduction: Option, /// Configuration for input audio transcription, defaults to off and can be set to `null` to turn off once on. /// Input audio transcription is not native to the model, since the model consumes audio directly. /// Transcription runs asynchronously through [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) /// and should be treated as guidance of input audio content rather than precisely what the model /// heard. The client can optionally set the language and prompt for transcription, /// these offer additional guidance to the transcription service. - pub transcription: AudioTranscription, + pub transcription: Option, /// Configuration for turn detection, ether Server VAD or Semantic VAD. This can /// be set to null to turn off, in which case the client must manually trigger model response. From 26f802a8e19fefdc6e51fdb19865b35b41ab9070 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sat, 1 Nov 2025 20:37:15 -0700 Subject: [PATCH 09/42] update examples/realtime with GA api --- examples/realtime/src/main.rs | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs index 141fefa3..11a87329 100644 --- a/examples/realtime/src/main.rs +++ b/examples/realtime/src/main.rs @@ -1,7 +1,7 @@ use std::process::exit; use async_openai::types::realtime::{ - ConversationItemCreateEvent, Item, ResponseCreateEvent, ServerEvent, + ConversationItemCreateEvent, Item, Message as RealtimeMessage, ResponseCreateEvent, ServerEvent, }; use futures_util::{future, pin_mut, StreamExt}; @@ -13,7 +13,7 @@ use tokio_tungstenite::{ #[tokio::main] async fn main() { - let url = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-12-17"; + let url = "wss://api.openai.com/v1/realtime?model=gpt-realtime"; let api_key = std::env::var("OPENAI_API_KEY").expect("Please provide OPENAPI_API_KEY env var"); let (stdin_tx, stdin_rx) = futures_channel::mpsc::unbounded(); @@ -25,9 +25,6 @@ async fn main() { "Authorization", format!("Bearer {api_key}").parse().unwrap(), ); - request - .headers_mut() - .insert("OpenAI-Beta", "realtime=v1".parse().unwrap()); // connect to WebSocket endpoint let (ws_stream, _) = connect_async(request).await.expect("Failed to connect"); @@ -57,20 +54,7 @@ async fn main() { match server_event { ServerEvent::ResponseOutputItemDone(event) => { - event.item.content.unwrap_or(vec![]).iter().for_each( - |content| { - if let Some(ref transcript) = content.transcript { - eprintln!( - "[{:?}]: {}", - event.item.role, - transcript.trim(), - ); - } - }, - ); - } - ServerEvent::ResponseAudioTranscriptDelta(event) => { - eprint!("{}", event.delta.trim()); + eprint!("{event:?}"); } ServerEvent::Error(e) => { eprint!("{e:?}"); From 68d13447291b02d1239531b93554389baac711b3 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sun, 2 Nov 2025 15:44:13 -0800 Subject: [PATCH 10/42] checkpoint: responses types updates --- async-openai/src/types/responses.rs | 515 +++++++++++++++++++--------- 1 file changed, 360 insertions(+), 155 deletions(-) diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index abccf301..d0105b6f 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -6,7 +6,6 @@ pub use crate::types::{ use derive_builder::Builder; use futures::Stream; use serde::{Deserialize, Serialize}; -use serde_json::Value; use std::collections::HashMap; use std::pin::Pin; @@ -39,7 +38,6 @@ pub enum Input { Items(Vec), } -/// A context item: currently only messages. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(untagged, rename_all = "snake_case")] pub enum InputItem { @@ -142,6 +140,12 @@ pub struct InputFile { file_url: Option, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Conversation { + /// The unique ID of the conversation. + pub id: String, +} + /// Builder for a Responses API request. #[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)] #[builder( @@ -847,11 +851,27 @@ pub struct IncompleteDetails { pub reason: String, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct TopLogProb { + pub bytes: Vec, + pub logprob: f64, + pub token: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LogProb { + pub bytes: Vec, + pub logprob: f64, + pub token: String, + pub top_logprobs: Vec, +} + /// A simple text output from the model. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct OutputText { +pub struct OutputTextContent { /// The annotations of the text output. pub annotations: Vec, + pub logprobs: Option, /// The text output from the model. pub text: String, } @@ -860,23 +880,27 @@ pub struct OutputText { #[serde(tag = "type", rename_all = "snake_case")] pub enum Annotation { /// A citation to a file. - FileCitation(FileCitation), + FileCitation(FileCitationBody), /// A citation for a web resource used to generate a model response. - UrlCitation(UrlCitation), + UrlCitation(UrlCitationBody), + /// A citation for a container file used to generate a model response. + ContainerFileCitation(ContainerFileCitationBody), /// A path to a file. FilePath(FilePath), } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FileCitation { +pub struct FileCitationBody { /// The ID of the file. file_id: String, + /// The filename of the file cited. + filename: String, /// The index of the file in the list of files. index: u32, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct UrlCitation { +pub struct UrlCitationBody { /// The index of the last character of the URL citation in the message. end_index: u32, /// The index of the first character of the URL citation in the message. @@ -887,6 +911,20 @@ pub struct UrlCitation { url: String, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ContainerFileCitationBody { + /// The ID of the container file. + container_id: String, + /// The index of the last character of the container file citation in the message. + end_index: u32, + /// The ID of the file. + file_id: String, + /// The filename of the container file cited. + filename: String, + /// The index of the first character of the container file citation in the message. + start_index: u32, +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct FilePath { /// The ID of the file. @@ -897,8 +935,8 @@ pub struct FilePath { /// A refusal explanation from the model. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct Refusal { - /// The refusal explanationfrom the model. +pub struct RefusalContent { + /// The refusal explanation from the model. pub refusal: String, } @@ -906,22 +944,23 @@ pub struct Refusal { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct OutputMessage { /// The content of the output message. - pub content: Vec, + pub content: Vec, /// The unique ID of the output message. pub id: String, - /// The role of the output message. Always assistant. + /// The role of the output message. Always `assistant`. pub role: Role, - /// The status of the message input. + /// The status of the message input. One of `in_progress`, `completed`, or + /// `incomplete`. Populated when input items are returned via API. pub status: OutputStatus, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum Content { +pub enum OutputMessageContent { /// A text output from the model. - OutputText(OutputText), + OutputText(OutputTextContent), /// A refusal from the model. - Refusal(Refusal), + Refusal(RefusalContent), } /// Nested content within an output message. @@ -956,46 +995,58 @@ pub enum OutputContent { McpApprovalRequest(McpApprovalRequestOutput), } +/// Reasoning text content. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ReasoningTextContent { + /// The reasoning text from the model. + pub text: String, +} + /// A reasoning item representing the model's chain of thought, including summary paragraphs. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct ReasoningItem { /// Unique identifier of the reasoning content. pub id: String, - /// The summarized chain-of-thought paragraphs. - pub summary: Vec, + /// Reasoning summary content. + pub summary: Vec, + /// Reasoning text content. + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option>, /// The encrypted content of the reasoning item - populated when a response is generated with /// `reasoning.encrypted_content` in the `include` parameter. #[serde(skip_serializing_if = "Option::is_none")] pub encrypted_content: Option, - /// The status of the reasoning item. + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. #[serde(skip_serializing_if = "Option::is_none")] pub status: Option, } /// A single summary text fragment from reasoning. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct SummaryText { - /// A short summary of the reasoning used by the model. +pub struct Summary { + /// A summary of the reasoning output from the model so far. pub text: String, } /// File search tool call output. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FileSearchCallOutput { +pub struct FileSearchToolCall { /// The unique ID of the file search tool call. pub id: String, /// The queries used to search for files. pub queries: Vec, - /// The status of the file search tool call. - pub status: FileSearchCallOutputStatus, + /// The status of the file search tool call. One of `in_progress`, `searching`, + /// `incomplete`,`failed`, or `completed`. + pub status: FileSearchToolCallStatus, /// The results of the file search tool call. #[serde(skip_serializing_if = "Option::is_none")] - pub results: Option>, + pub results: Option>, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(rename_all = "snake_case")] -pub enum FileSearchCallOutputStatus { +pub enum FileSearchToolCallStatus { InProgress, Searching, Incomplete, @@ -1005,7 +1056,12 @@ pub enum FileSearchCallOutputStatus { /// A single result from a file search. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FileSearchResult { +pub struct FileSearchToolCallResult { + /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing + /// additional information about the object in a structured format, and querying for objects + /// API or the dashboard. Keys are strings with a maximum length of 64 characters + /// . Values are strings with a maximum length of 512 characters, booleans, or numbers. + pub attributes: HashMap, /// The unique ID of the file. pub file_id: String, /// The name of the file. @@ -1014,71 +1070,124 @@ pub struct FileSearchResult { pub score: f32, /// The text that was retrieved from the file. pub text: String, - /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing - /// additional information about the object in a structured format, and querying for objects - /// API or the dashboard. Keys are strings with a maximum length of 64 characters - /// . Values are strings with a maximum length of 512 characters, booleans, or numbers. - pub attributes: HashMap, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct SafetyCheck { - /// The ID of the safety check. +pub struct ComputerCallSafetyCheckParam { + /// The ID of the pending safety check. pub id: String, - /// The type/code of the pending safety check. - pub code: String, + /// The type of the pending safety check. + #[serde(skip_serializing_if = "Option::is_none")] + pub code: Option, /// Details about the pending safety check. - pub message: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum WebSearchToolCallStatus { + InProgress, + Searching, + Completed, + Failed, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionSearchSource { + /// The type of source. Always `url`. + pub r#type: String, + /// The URL of the source. + pub url: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionSearch { + /// The search query. + pub query: String, + /// The sources used in the search. + pub sources: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionOpenPage { + /// The URL opened by the model. + pub url: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchActionFind { + /// The URL of the page searched for the pattern. + pub url: String, + /// The pattern or text to search for within the page. + pub pattern: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum WebSearchToolCallAction { + /// Action type "search" - Performs a web search query. + Search(WebSearchActionSearch), + /// Action type "open_page" - Opens a specific URL from search results. + OpenPage(WebSearchActionOpenPage), + /// Action type "find": Searches for a pattern within a loaded page. + Find(WebSearchActionFind), } /// Web search tool call output. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct WebSearchCallOutput { +pub struct WebSearchToolCall { + /// An object describing the specific action taken in this web search call. Includes + /// details on how the model used the web (search, open_page, find). + pub action: WebSearchToolCallAction, /// The unique ID of the web search tool call. pub id: String, /// The status of the web search tool call. - pub status: String, + pub status: WebSearchToolCallStatus, } /// Output from a computer tool call. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ComputerCallOutput { - pub action: ComputerCallAction, +pub struct ComputerToolCall { + pub action: ComputerAction, /// An identifier used when responding to the tool call with output. pub call_id: String, /// The unique ID of the computer call. pub id: String, /// The pending safety checks for the computer call. - pub pending_safety_checks: Vec, - /// The status of the item. + pub pending_safety_checks: Vec, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. pub status: OutputStatus, } /// A point in 2D space. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Point { +pub struct DragPoint { + /// The x-coordinate. pub x: i32, + /// The y-coordinate. pub y: i32, } /// Represents all user‐triggered actions. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum ComputerCallAction { +pub enum ComputerAction { /// A click action. - Click(Click), + Click(ClickParam), - /// A double-click action. - DoubleClick(DoubleClick), + /// A double click action. + DoubleClick(DoubleClickAction), /// A drag action. Drag(Drag), - /// A keypress action. - KeyPress(KeyPress), + /// A collection of keypresses the model would like to perform. + Keypress(KeyPressAction), /// A mouse move action. - Move(MoveAction), + Move(Move), /// A screenshot action. Screenshot, @@ -1086,16 +1195,16 @@ pub enum ComputerCallAction { /// A scroll action. Scroll(Scroll), - /// A type (text entry) action. - Type(TypeAction), + /// An action to type in text. + Type(Type), - /// A wait (no-op) action. + /// A wait action. Wait, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ButtonPress { +#[serde(rename_all = "lowercase")] +pub enum ClickButtonType { Left, Right, Wheel, @@ -1105,21 +1214,22 @@ pub enum ButtonPress { /// A click action. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Click { - /// Which mouse button was pressed. - pub button: ButtonPress, - /// X‐coordinate of the click. +pub struct ClickParam { + /// Indicates which mouse button was pressed during the click. One of `left`, + /// `right`, `wheel`, `back`, or `forward`. + pub button: ClickButtonType, + /// The x-coordinate where the click occurred. pub x: i32, - /// Y‐coordinate of the click. + /// The y-coordinate where the click occurred. pub y: i32, } /// A double click action. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct DoubleClick { - /// X‐coordinate of the double click. +pub struct DoubleClickAction { + /// The x-coordinate where the double click occurred. pub x: i32, - /// Y‐coordinate of the double click. + /// The y-coordinate where the double click occurred. pub y: i32, } @@ -1127,52 +1237,49 @@ pub struct DoubleClick { #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Drag { /// The path of points the cursor drags through. - pub path: Vec, - /// X‐coordinate at the end of the drag. - pub x: i32, - /// Y‐coordinate at the end of the drag. - pub y: i32, + pub path: Vec, } /// A keypress action. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct KeyPress { - /// The list of keys to press (e.g. `["Control", "C"]`). +pub struct KeyPressAction { + /// The combination of keys the model is requesting to be pressed. + /// This is an array of strings, each representing a key. pub keys: Vec, } /// A mouse move action. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MoveAction { - /// X‐coordinate to move to. +pub struct Move { + /// The x-coordinate to move to. pub x: i32, - /// Y‐coordinate to move to. + /// The y-coordinate to move to. pub y: i32, } /// A scroll action. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Scroll { - /// Horizontal scroll distance. + /// The horizontal scroll distance. pub scroll_x: i32, - /// Vertical scroll distance. + /// The vertical scroll distance. pub scroll_y: i32, - /// X‐coordinate where the scroll began. + /// The x-coordinate where the scroll occurred. pub x: i32, - /// Y‐coordinate where the scroll began. + /// The y-coordinate where the scroll occurred. pub y: i32, } /// A typing (text entry) action. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct TypeAction { +pub struct Type { /// The text to type. pub text: String, } /// Metadata for a function call request. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FunctionCall { +pub struct FunctionToolCall { /// The unique ID of the function tool call. pub id: String, /// The unique ID of the function tool call generated by the model. @@ -1185,56 +1292,75 @@ pub struct FunctionCall { pub status: OutputStatus, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ImageGenToolCallStatus { + InProgress, + Completed, + Generating, + Failed, +} + /// Output of an image generation request. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ImageGenerationCallOutput { - /// Unique ID of the image generation call. +pub struct ImageGenToolCall { + /// The unique ID of the image generation call. pub id: String, - /// Base64-encoded generated image, or null. + /// The generated image encoded in base64. pub result: Option, - /// Status of the image generation call. - pub status: String, + /// The status of the image generation call. + pub status: ImageGenToolCallStatus, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum CodeInterpreterToolCallStatus { + InProgress, + Completed, + Incomplete, + Interpreting, + Failed, } /// Output of a code interpreter request. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CodeInterpreterCallOutput { - /// The code that was executed. +pub struct CodeInterpreterToolCall { + /// The code to run, or null if not available. #[serde(skip_serializing_if = "Option::is_none")] pub code: Option, - /// Unique ID of the call. - pub id: String, - /// Status of the tool call. - pub status: String, /// ID of the container used to run the code. pub container_id: String, - /// The outputs of the execution: logs or files. + /// The unique ID of the code interpreter tool call. + pub id: String, + /// The outputs generated by the code interpreter, such as logs or images. + /// Can be null if no outputs are available. #[serde(skip_serializing_if = "Option::is_none")] - pub outputs: Option>, + pub outputs: Option>, + /// The status of the code interpreter tool call. + /// Valid values are `in_progress`, `completed`, `incomplete`, `interpreting`, and `failed`. + pub status: CodeInterpreterToolCallStatus, } /// Individual result from a code interpreter: either logs or files. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum CodeInterpreterResult { - /// Text logs from the execution. - Logs(CodeInterpreterTextOutput), - /// File outputs from the execution. - Files(CodeInterpreterFileOutput), +pub enum CodeInterpreterToolCallOutput { + /// Code interpreter output logs + Logs(CodeInterpreterOutputLogs), + /// Code interpreter output image + Image(CodeInterpreterOutputImage), } -/// The output containing execution logs. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CodeInterpreterTextOutput { - /// The logs of the code interpreter tool call. +pub struct CodeInterpreterOutputLogs { + /// The logs output from the code interpreter. pub logs: String, } -/// The output containing file references. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CodeInterpreterFileOutput { - /// List of file IDs produced. - pub files: Vec, +pub struct CodeInterpreterOutputImage { + /// The URL of the image output from the code interpreter. + pub url: String, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -1247,73 +1373,88 @@ pub struct CodeInterpreterFile { /// Output of a local shell command request. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct LocalShellCallOutput { - /// Details of the exec action. - pub action: LocalShellAction, - /// Unique call identifier for responding to the tool call. +pub struct LocalShellToolCall { + /// Execute a shell command on the server. + pub action: LocalShellExecAction, + /// The unique ID of the local shell tool call generated by the model. pub call_id: String, - /// Unique ID of the local shell call. + /// The unique ID of the local shell call. pub id: String, - /// Status of the local shell call. + /// The status of the local shell call. pub status: String, } /// Define the shape of a local shell action (exec). #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct LocalShellAction { +pub struct LocalShellExecAction { /// The command to run. pub command: Vec, /// Environment variables to set for the command. pub env: HashMap, - /// Optional timeout for the command (ms). + /// Optional timeout in milliseconds for the command. pub timeout_ms: Option, /// Optional user to run the command as. pub user: Option, - /// Optional working directory for the command. + /// Optional working directory to run the command in. pub working_directory: Option, } /// Output of an MCP server tool invocation. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpCallOutput { - /// JSON string of the arguments passed. +pub struct MCPToolCall { + /// A JSON string of the arguments passed to the tool. pub arguments: String, - /// Unique ID of the MCP call. + /// The unique ID of the tool call. pub id: String, - /// Name of the tool invoked. + /// The name of the tool that was run. pub name: String, - /// Label of the MCP server. + /// The label of the MCP server running the tool. pub server_label: String, + /// Unique identifier for the MCP tool call approval request. Include this value + /// in a subsequent `mcp_approval_response` input to approve or reject the corresponding + /// tool call. + pub approval_request_id: Option, /// Error message from the call, if any. pub error: Option, - /// Output from the call, if any. + /// The output from the tool call. pub output: Option, + /// The status of the tool call. One of `in_progress`, `completed`, `incomplete`, + /// `calling`, or `failed`. + pub status: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum MCPToolCallStatus { + InProgress, + Completed, + Incomplete, + Calling, + Failed, } -/// Output listing tools available on an MCP server. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpListToolsOutput { - /// Unique ID of the list request. +pub struct MCPListTools { + /// The unique ID of the list. pub id: String, - /// Label of the MCP server. + /// The label of the MCP server. pub server_label: String, - /// Tools available on the server with metadata. - pub tools: Vec, + /// The tools available on the server. + pub tools: Vec, /// Error message if listing failed. #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } -/// Information about a single tool on an MCP server. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpToolInfo { +pub struct MCPListToolsTool { + /// The JSON schema describing the tool's input. + pub input_schema: serde_json::Value, /// The name of the tool. pub name: String, - /// The JSON schema describing the tool's input. - pub input_schema: Value, /// Additional annotations about the tool. #[serde(skip_serializing_if = "Option::is_none")] - pub annotations: Option, + pub annotations: Option, /// The description of the tool. #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, @@ -1321,14 +1462,14 @@ pub struct McpToolInfo { /// Output representing a human approval request for an MCP tool. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpApprovalRequestOutput { +pub struct MCPApprovalRequest { /// JSON string of arguments for the tool. pub arguments: String, - /// Unique ID of the approval request. + /// The unique ID of the approval request. pub id: String, - /// Name of the tool requiring approval. + /// The name of the tool to run. pub name: String, - /// Label of the MCP server making the request. + /// The label of the MCP server making the request. pub server_label: String, } @@ -1347,13 +1488,31 @@ pub struct Usage { pub total_tokens: u32, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum Instructions { + /// A text input to the model, equivalent to a text input with the `developer` role. + Text(String), + /// A list of one or many input items to the model, containing different content types. + Array(Vec), +} + /// The complete response returned by the Responses API. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct Response { + /// Whether to run the model response in the background. + /// [Learn more](https://platform.openai.com/docs/guides/background). + #[serde(skip_serializing_if = "Option::is_none")] + pub background: Option, + + /// The conversation that this response belongs to. Input items and output + /// items from this response are automatically added to this conversation. + pub conversation: Option, + /// Unix timestamp (in seconds) when this Response was created. pub created_at: u64, - /// Error object if the API failed to generate a response. + /// An error object returned when the model fails to generate a Response. #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, @@ -1364,26 +1523,44 @@ pub struct Response { #[serde(skip_serializing_if = "Option::is_none")] pub incomplete_details: Option, - /// Instructions that were inserted as the first item in context. + /// A system (or developer) message inserted into the model's context. + /// + /// When using along with `previous_response_id`, the instructions from a previous response + /// will not be carried over to the next response. This makes it simple to swap out + /// system (or developer) messages in new responses. #[serde(skip_serializing_if = "Option::is_none")] - pub instructions: Option, + pub instructions: Option, - /// The value of `max_output_tokens` that was honored. + /// An upper bound for the number of tokens that can be generated for a response, + /// including visible output tokens and + /// [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). #[serde(skip_serializing_if = "Option::is_none")] pub max_output_tokens: Option, - /// Metadata tags/values that were attached to this response. + /// Set of 16 key-value pairs that can be attached to an object. This can be + /// useful for storing additional information about the object in a structured + /// format, and querying for objects via API or the dashboard. + /// + /// Keys are strings with a maximum length of 64 characters. Values are strings + /// with a maximum length of 512 characters. #[serde(skip_serializing_if = "Option::is_none")] pub metadata: Option>, - /// Model ID used to generate the response. + /// Model ID used to generate the response, like gpt-4o or o3. OpenAI offers a + /// wide range of models with different capabilities, performance characteristics, + /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare available models. pub model: String, - /// The object type – always `response`. + /// The object type of this resource - always set to `response`. pub object: String, - /// The array of content items generated by the model. - pub output: Vec, + /// An array of content items generated by the model. + /// + /// The length and order of items in the output array is dependent on the model's response. + /// Rather than accessing the first item in the output array and assuming it's an assistant + /// message with the content generated by the model, you might consider using + /// the `output_text` property where supported in SDKs. + pub output: Vec, /// SDK-only convenience property that contains the aggregated text output from all /// `output_text` items in the `output` array, if any are present. @@ -2124,27 +2301,55 @@ pub struct ResponseMetadata { #[serde(rename_all = "snake_case")] #[non_exhaustive] pub enum OutputItem { + /// An output message from the model. Message(OutputMessage), - FileSearchCall(FileSearchCallOutput), - FunctionCall(FunctionCall), - WebSearchCall(WebSearchCallOutput), - ComputerCall(ComputerCallOutput), + /// The results of a file search tool call. See the + /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search) + /// for more information. + FileSearchCall(FileSearchToolCall), + /// A tool call to run a function. See the + /// [function calling guide](https://platform.openai.com/docs/guides/function-calling) + /// for more information. + FunctionCall(FunctionToolCall), + /// The results of a web search tool call. See the + /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search) + /// for more information. + WebSearchCall(WebSearchToolCall), + /// A tool call to a computer use tool. See the + /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) + /// for more information. + ComputerCall(ComputerToolCall), + /// A description of the chain of thought used by a reasoning model while generating + /// a response. Be sure to include these items in your `input` to the Responses API for + /// subsequent turns of a conversation if you are manually + /// [managing context](https://platform.openai.com/docs/guides/conversation-state). Reasoning(ReasoningItem), - ImageGenerationCall(ImageGenerationCallOutput), - CodeInterpreterCall(CodeInterpreterCallOutput), - LocalShellCall(LocalShellCallOutput), - McpCall(McpCallOutput), - McpListTools(McpListToolsOutput), - McpApprovalRequest(McpApprovalRequestOutput), - CustomToolCall(CustomToolCallOutput), + /// An image generation request made by the model. + ImageGenerationCall(ImageGenToolCall), + /// A tool call to run code. + CodeInterpreterCall(CodeInterpreterToolCall), + /// A tool call to run a command on the local shell. + LocalShellCall(LocalShellToolCall), + /// An invocation of a tool on an MCP server. + McpCall(MCPToolCall), + /// A list of tools available on an MCP server. + McpListTools(MCPListTools), + /// A request for human approval of a tool invocation. + McpApprovalRequest(MCPApprovalRequest), + /// A call to a custom tool created by the model. + CustomToolCall(CustomToolCall), } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[non_exhaustive] -pub struct CustomToolCallOutput { +pub struct CustomToolCall { + /// An identifier used to map this custom tool call to a tool call output. pub call_id: String, + /// The input for the custom tool call generated by the model. pub input: String, + /// The name of the custom tool being called. pub name: String, + /// The unique ID of the custom tool call in the OpenAI platform. pub id: String, } From dd5e23f17b230e35d7bffb3d3cde821212d5d769 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Sun, 2 Nov 2025 19:37:37 -0800 Subject: [PATCH 11/42] checkpoint for updated types --- async-openai/src/types/chat.rs | 7 +- async-openai/src/types/impls.rs | 14 +- async-openai/src/types/responses.rs | 626 ++++++++++++++++++++++++---- 3 files changed, 551 insertions(+), 96 deletions(-) diff --git a/async-openai/src/types/chat.rs b/async-openai/src/types/chat.rs index d9373db6..e519286d 100644 --- a/async-openai/src/types/chat.rs +++ b/async-openai/src/types/chat.rs @@ -504,9 +504,14 @@ pub struct ResponseFormatJsonSchema { /// The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. pub name: String, /// The schema for the response format, described as a JSON Schema object. + /// Learn how to build JSON schemas [here](https://json-schema.org/). #[serde(skip_serializing_if = "Option::is_none")] pub schema: Option, - /// Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. To learn more, read the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + /// Whether to enable strict schema adherence when generating the output. + /// If set to true, the model will always follow the exact schema defined + /// in the `schema` field. Only a subset of JSON Schema is supported when + /// `strict` is `true`. To learn more, read the [Structured Outputs + /// guide](https://platform.openai.com/docs/guides/structured-outputs). #[serde(skip_serializing_if = "Option::is_none")] pub strict: Option, } diff --git a/async-openai/src/types/impls.rs b/async-openai/src/types/impls.rs index b566dc7d..3689dbba 100644 --- a/async-openai/src/types/impls.rs +++ b/async-openai/src/types/impls.rs @@ -14,7 +14,7 @@ use crate::{ use bytes::Bytes; use super::{ - responses::{CodeInterpreterContainer, Input, InputContent, Role as ResponsesRole}, + responses::{CodeInterpreterContainer, EasyInputContent, Input, Role as ResponsesRole}, AddUploadPartRequest, AudioInput, AudioResponseFormat, ChatCompletionFunctionCall, ChatCompletionFunctions, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestDeveloperMessage, @@ -1053,9 +1053,9 @@ impl Default for Input { } } -impl Default for InputContent { +impl Default for EasyInputContent { fn default() -> Self { - Self::TextInput("".to_string()) + Self::Text("".to_string()) } } @@ -1077,15 +1077,15 @@ impl Default for ResponsesRole { } } -impl From for InputContent { +impl From for EasyInputContent { fn from(value: String) -> Self { - Self::TextInput(value) + Self::Text(value) } } -impl From<&str> for InputContent { +impl From<&str> for EasyInputContent { fn from(value: &str) -> Self { - Self::TextInput(value.to_owned()) + Self::Text(value.to_owned()) } } diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index d0105b6f..4b60a53b 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -38,14 +38,362 @@ pub enum Input { Items(Vec), } +/// Content item used to generate a response. +/// +/// This is a properly discriminated union based on the `type` field, using Rust's +/// type-safe enum with serde's tag attribute for efficient deserialization. +/// +/// # OpenAPI Specification +/// Corresponds to the `Item` schema in the OpenAPI spec with a `type` discriminator. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged, rename_all = "snake_case")] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum Item { + /// A message (type: "message"). + /// Can represent InputMessage (user/system/developer) or OutputMessage (assistant). + /// + /// InputMessage: + /// A message input to the model with a role indicating instruction following hierarchy. + /// Instructions given with the developer or system role take precedence over instructions given with the user role. + /// OutputMessage: + /// A message output from the model. + Message(MessageItem), + + /// The results of a file search tool call. See the + /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search) for more information. + FileSearchCall(FileSearchToolCall), + + /// A tool call to a computer use tool. See the + /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) for more information. + ComputerCall(ComputerToolCall), + + /// The output of a computer tool call. + ComputerCallOutput(ComputerCallOutputItemParam), + + /// The results of a web search tool call. See the + /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search) for more information. + WebSearchCall(WebSearchToolCall), + + /// A tool call to run a function. See the + /// + /// [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more information. + FunctionCall(FunctionToolCall), + + /// The output of a function tool call. + FunctionCallOutput(FunctionCallOutputItemParam), + + /// A description of the chain of thought used by a reasoning model while generating + /// a response. Be sure to include these items in your `input` to the Responses API + /// for subsequent turns of a conversation if you are manually + /// [managing context](https://platform.openai.com/docs/guides/conversation-state). + Reasoning(ReasoningItem), + + /// An image generation request made by the model. + ImageGenerationCall(ImageGenToolCall), + + /// A tool call to run code. + CodeInterpreterCall(CodeInterpreterToolCall), + + /// A tool call to run a command on the local shell. + LocalShellCall(LocalShellToolCall), + + /// The output of a local shell tool call. + LocalShellCallOutput(LocalShellToolCallOutput), + + /// A list of tools available on an MCP server. + McpListTools(MCPListTools), + + /// A request for human approval of a tool invocation. + McpApprovalRequest(MCPApprovalRequest), + + /// A response to an MCP approval request. + McpApprovalResponse(MCPApprovalResponse), + + /// An invocation of a tool on an MCP server. + McpCall(MCPToolCall), + + /// The output of a custom tool call from your code, being sent back to the model. + CustomToolCallOutput(CustomToolCallOutput), + + /// A call to a custom tool created by the model. + CustomToolCall(CustomToolCall), +} + +/// Input item that can be used in the context for generating a response. +/// +/// This represents the OpenAPI `InputItem` schema which is an `anyOf`: +/// 1. `EasyInputMessage` - Simple, user-friendly message input (can use string content) +/// 2. `Item` - Structured items with proper type discrimination (including InputMessage, OutputMessage, tool calls) +/// 3. `ItemReferenceParam` - Reference to an existing item by ID (type can be null) +/// +/// Uses untagged deserialization because these types overlap in structure. +/// Order matters: more specific structures are tried first. +/// +/// # OpenAPI Specification +/// Corresponds to the `InputItem` schema: `anyOf[EasyInputMessage, Item, ItemReferenceParam]` +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] pub enum InputItem { - Message(InputMessage), - Custom(serde_json::Value), + /// A reference to an existing item by ID. + /// Has a required `id` field and optional `type` (can be "item_reference" or null). + /// Must be tried first as it's the most minimal structure. + ItemReference(ItemReference), + + /// All structured items with proper type discrimination. + /// Includes InputMessage, OutputMessage, and all tool calls/outputs. + /// Uses the discriminated `Item` enum for efficient, type-safe deserialization. + Item(Item), + + /// A simple, user-friendly message input (EasyInputMessage). + /// Supports string content and can include assistant role for previous responses. + /// Must be tried last as it's the most flexible structure. + /// + /// A message input to the model with a role indicating instruction following + /// hierarchy. Instructions given with the `developer` or `system` role take + /// precedence over instructions given with the `user` role. Messages with the + /// `assistant` role are presumed to have been generated by the model in previous + /// interactions. + EasyMessage(EasyInputMessage), +} + +impl InputItem { + /// Creates an InputItem from an item reference ID. + pub fn from_reference(id: impl Into) -> Self { + Self::ItemReference(ItemReference::new(id)) + } + + /// Creates an InputItem from a structured Item. + pub fn from_item(item: Item) -> Self { + Self::Item(item) + } + + /// Creates an InputItem from an EasyInputMessage. + pub fn from_easy_message(message: EasyInputMessage) -> Self { + Self::EasyMessage(message) + } + + /// Creates a simple text message with the given role and content. + pub fn text_message(role: Role, content: impl Into) -> Self { + Self::EasyMessage(EasyInputMessage { + r#type: InputMessageType::Message, + role, + content: EasyInputContent::Text(content.into()), + }) + } +} + +/// A message item used within the `Item` enum. +/// +/// Both InputMessage and OutputMessage have `type: "message"`, so we use an untagged +/// enum to distinguish them based on their structure: +/// - OutputMessage: role=assistant, required id & status fields +/// - InputMessage: role=user/system/developer, content is Vec, optional id/status +/// +/// Note: EasyInputMessage is NOT included here - it's a separate variant in `InputItem`, +/// not part of the structured `Item` enum. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum MessageItem { + /// An output message from the model (role: assistant, has required id & status). + /// This must come first as it has the most specific structure (required id and status fields). + Output(OutputMessage), + + /// A structured input message (role: user/system/developer, content is Vec). + /// Has structured content list and optional id/status fields. + /// + /// A message input to the model with a role indicating instruction following hierarchy. + /// Instructions given with the `developer` or `system` role take precedence over instructions + /// given with the `user` role. + Input(InputMessage), +} + +/// A reference to an existing item by ID. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ItemReference { + /// The type of item to reference. Can be "item_reference" or null. + #[serde(skip_serializing_if = "Option::is_none")] + pub r#type: Option, + /// The ID of the item to reference. + pub id: String, +} + +impl ItemReference { + /// Create a new item reference with the given ID. + pub fn new(id: impl Into) -> Self { + Self { + r#type: Some(ItemReferenceType::ItemReference), + id: id.into(), + } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ItemReferenceType { + ItemReference, +} + +/// Output from a function call that you're providing back to the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FunctionCallOutputItemParam { + /// The unique ID of the function tool call generated by the model. + pub call_id: String, + /// Text, image, or file output of the function tool call. + pub output: FunctionCallOutput, + /// The unique ID of the function tool call output. + /// Populated when this item is returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum FunctionCallOutput { + /// A JSON string of the output of the function tool call. + Text(String), + Content(Vec), // TODO use shape which allows null from OpenAPI spec? +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ComputerCallOutputItemParam { + /// The ID of the computer tool call that produced the output. + pub call_id: String, + /// A computer screenshot image used with the computer use tool. + pub output: ComputerScreenshotImage, + /// The safety checks reported by the API that have been acknowledged by the developer. + #[serde(skip_serializing_if = "Option::is_none")] + pub acknowledged_safety_checks: Option>, + /// The unique ID of the computer tool call output. Optional when creating. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + /// The status of the message input. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when input items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, // TODO rename OutputStatus? +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ComputerScreenshotImageType { + ComputerScreenshot, +} + +/// A computer screenshot image used with the computer use tool. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ComputerScreenshotImage { + /// Specifies the event type. For a computer screenshot, this property is always + /// set to `computer_screenshot`. + pub r#type: ComputerScreenshotImageType, + /// The identifier of an uploaded file that contains the screenshot. + #[serde(skip_serializing_if = "Option::is_none")] + pub file_id: Option, + /// The URL of the screenshot image. + #[serde(skip_serializing_if = "Option::is_none")] + pub image_url: Option, +} + +/// Output from a local shell tool call that you're providing back to the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LocalShellToolCallOutput { + /// The unique ID of the local shell tool call generated by the model. + pub id: String, + + /// A JSON string of the output of the local shell tool call. + pub output: String, + + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +/// Output from a local shell command execution. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LocalShellOutput { + /// The stdout output from the command. + #[serde(skip_serializing_if = "Option::is_none")] + pub stdout: Option, + + /// The stderr output from the command. + #[serde(skip_serializing_if = "Option::is_none")] + pub stderr: Option, + + /// The exit code of the command. + #[serde(skip_serializing_if = "Option::is_none")] + pub exit_code: Option, } -/// A message to prime the model. +/// An MCP approval response that you're providing back to the model. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPApprovalResponse { + /// The ID of the approval request being answered. + pub approval_request_id: String, + + /// Whether the request was approved. + pub approve: bool, + + /// The unique ID of the approval response + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + + /// Optional reason for the decision. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum CustomToolCallOutputOutput { + /// A string of the output of the custom tool call. + Text(String), + /// Text, image, or file output of the custom tool call. + List(Vec), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CustomToolCallOutput { + /// The call ID, used to map this custom tool call output to a custom tool call. + pub call_id: String, + + /// The output from the custom tool call generated by your code. + /// Can be a string or an list of output content. + pub output: CustomToolCallOutputOutput, + + /// The unique ID of the custom tool call output in the OpenAI platform. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, +} + +/// A simplified message input to the model (EasyInputMessage in the OpenAPI spec). +/// +/// This is the most user-friendly way to provide messages, supporting both simple +/// string content and structured content. Role can include `assistant` for providing +/// previous assistant responses. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +#[builder( + name = "EasyInputMessageArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct EasyInputMessage { + /// The type of the message input. Always set to `message`. + pub r#type: MessageType, + /// The role of the message input. One of `user`, `assistant`, `system`, or `developer`. + pub role: Role, + /// Text, image, or audio input to the model, used to generate a response. + /// Can also contain previous assistant responses. + pub content: EasyInputContent, +} + +/// A structured message input to the model (InputMessage in the OpenAPI spec). +/// +/// This variant requires structured content (not a simple string) and does not support +/// the `assistant` role (use OutputMessage for that). Used when items are returned via API +/// with additional metadata. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( name = "InputMessageArgs", @@ -55,45 +403,56 @@ pub enum InputItem { )] #[builder(build_fn(error = "OpenAIError"))] pub struct InputMessage { - #[serde(default, rename = "type")] - pub kind: InputMessageType, - /// The role of the message input. - pub role: Role, - /// Text, image, or audio input to the model, used to generate a response. Can also contain - /// previous assistant responses. - pub content: InputContent, + /// A list of one or many input items to the model, containing different content types. + pub content: Vec, + /// The role of the message input. One of `user`, `system`, or `developer`. + /// Note: `assistant` is NOT allowed here; use OutputMessage instead. + pub role: InputRole, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, // TODO rename OutputStatus to ItemStatus maybe? + /// The type of the message input. Always set to `message`. + pub r#type: MessageType, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -#[serde(rename_all = "snake_case")] -pub enum InputMessageType { +/// The role for an input message - can only be `user`, `system`, or `developer`. +/// This type ensures type safety by excluding the `assistant` role (use OutputMessage for that). +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum InputRole { #[default] - Message, + User, + System, + Developer, } +/// Content for EasyInputMessage - can be a simple string or structured list. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(untagged)] -pub enum InputContent { +pub enum EasyInputContent { /// A text input to the model. - TextInput(String), + Text(String), /// A list of one or many input items to the model, containing different content types. - InputItemContentList(Vec), + ContentList(Vec), } /// Parts of a message: text, image, file, or audio. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum ContentType { +pub enum InputContent { /// A text input to the model. - InputText(InputText), - /// An image input to the model. - InputImage(InputImage), + InputText(InputTextContent), + /// An image input to the model. Learn about + /// [image inputs](https://platform.openai.com/docs/guides/vision). + InputImage(InputImageContent), /// A file input to the model. - InputFile(InputFile), + InputFile(InputFileContent), } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct InputText { +pub struct InputTextContent { + /// The text input to the model. pub text: String, } @@ -105,8 +464,9 @@ pub struct InputText { default )] #[builder(build_fn(error = "OpenAIError"))] -pub struct InputImage { - /// The detail level of the image to be sent to the model. +pub struct InputImageContent { + /// The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. + /// Defaults to `auto`. detail: ImageDetail, /// The ID of the file to be sent to the model. #[serde(skip_serializing_if = "Option::is_none")] @@ -125,19 +485,19 @@ pub struct InputImage { default )] #[builder(build_fn(error = "OpenAIError"))] -pub struct InputFile { +pub struct InputFileContent { /// The content of the file to be sent to the model. #[serde(skip_serializing_if = "Option::is_none")] file_data: Option, /// The ID of the file to be sent to the model. #[serde(skip_serializing_if = "Option::is_none")] file_id: Option, - /// The name of the file to be sent to the model. - #[serde(skip_serializing_if = "Option::is_none")] - filename: Option, /// The URL of the file to be sent to the model. #[serde(skip_serializing_if = "Option::is_none")] file_url: Option, + /// The name of the file to be sent to the model. + #[serde(skip_serializing_if = "Option::is_none")] + filename: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -312,9 +672,16 @@ pub struct CreateResponse { pub user: Option, } -/// Service tier request options. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct PromptConfig { +#[serde(untagged)] +pub enum ResponsePromptVariables { + String(String), + Content(InputContent), + Custom(serde_json::Value), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Prompt { /// The unique identifier of the prompt template to use. pub id: String, @@ -322,17 +689,17 @@ pub struct PromptConfig { #[serde(skip_serializing_if = "Option::is_none")] pub version: Option, - /// Optional map of values to substitute in for variables in your prompt. The substitution - /// values can either be strings, or other Response input types like images or files. - /// For now only supporting Strings. + /// Optional map of values to substitute in for variables in your + /// prompt. The substitution values can either be strings, or other + /// Response input types like images or files. #[serde(skip_serializing_if = "Option::is_none")] - pub variables: Option>, + pub variables: Option, } -/// Service tier request options. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Default)] #[serde(rename_all = "lowercase")] pub enum ServiceTier { + #[default] Auto, Default, Flex, @@ -351,17 +718,27 @@ pub enum Truncation { /// o-series reasoning settings. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( - name = "ReasoningConfigArgs", + name = "ReasoningArgs", pattern = "mutable", setter(into, strip_option), default )] #[builder(build_fn(error = "OpenAIError"))] -pub struct ReasoningConfig { - /// Constrain effort on reasoning. +pub struct Reasoning { + /// Constrains effort on reasoning for + /// [reasoning models](https://platform.openai.com/docs/guides/reasoning). + /// Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing + /// reasoning effort can result in faster responses and fewer tokens used + /// on reasoning in a response. + /// + /// Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. #[serde(skip_serializing_if = "Option::is_none")] pub effort: Option, - /// Summary mode for reasoning. + /// A summary of the reasoning performed by the model. This can be + /// useful for debugging and understanding the model's reasoning process. + /// One of `auto`, `concise`, or `detailed`. + /// + /// `concise` is only supported for `computer-use-preview` models. #[serde(skip_serializing_if = "Option::is_none")] pub summary: Option, } @@ -385,22 +762,42 @@ pub enum ReasoningSummary { /// Configuration for text response format. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct TextConfig { - /// Defines the format: plain text, JSON object, or JSON schema. - pub format: TextResponseFormat, +pub struct ResponseTextParam { + /// An object specifying the format that the model must output. + /// + /// Configuring `{ "type": "json_schema" }` enables Structured Outputs, + /// which ensures the model will match your supplied JSON schema. Learn more in the + /// [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + /// + /// The default format is `{ "type": "text" }` with no additional options. + /// + /// **Not recommended for gpt-4o and newer models:** + /// + /// Setting to `{ "type": "json_object" }` enables the older JSON mode, which + /// ensures the message the model generates is valid JSON. Using `json_schema` + /// is preferred for models that support it. + pub format: TextResponseFormatConfiguration, + /// Constrains the verbosity of the model's response. Lower values will result in + /// more concise responses, while higher values will result in more verbose responses. + /// + /// Currently supported values are `low`, `medium`, and `high`. #[serde(skip_serializing_if = "Option::is_none")] pub verbosity: Option, } #[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum TextResponseFormat { - /// The type of response format being defined: `text` +pub enum TextResponseFormatConfiguration { + /// Default response format. Used to generate text responses. Text, - /// The type of response format being defined: `json_object` + /// JSON object response format. An older method of generating JSON responses. + /// Using `json_schema` is recommended for models that support it. + /// Note that the model will not generate JSON without a system or user message + /// instructing it to do so. JsonObject, - /// The type of response format being defined: `json_schema` + /// JSON Schema response format. Used to generate structured JSON responses. + /// Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs). JsonSchema(ResponseFormatJsonSchema), } @@ -621,7 +1018,7 @@ pub struct Mcp { pub allowed_tools: Option, /// Optional HTTP headers for the MCP server. #[serde(skip_serializing_if = "Option::is_none")] - pub headers: Option, + pub headers: Option, /// Approval policy or filter for tools. #[serde(skip_serializing_if = "Option::is_none")] pub require_approval: Option, @@ -948,10 +1345,28 @@ pub struct OutputMessage { /// The unique ID of the output message. pub id: String, /// The role of the output message. Always `assistant`. - pub role: Role, + pub role: AssistantRole, /// The status of the message input. One of `in_progress`, `completed`, or /// `incomplete`. Populated when input items are returned via API. pub status: OutputStatus, + /// The type of the output message. Always `message`. + pub r#type: MessageType, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum MessageType { + #[default] + Message, +} + +/// The role for an output message - always `assistant`. +/// This type ensures type safety by only allowing the assistant role. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum AssistantRole { + #[default] + Assistant, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -964,35 +1379,38 @@ pub enum OutputMessageContent { } /// Nested content within an output message. +/// +/// Note: This enum is similar to OutputItem but may be used in different contexts. +/// Consider using OutputItem directly if it fits your use case. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] pub enum OutputContent { /// An output message from the model. Message(OutputMessage), /// The results of a file search tool call. - FileSearchCall(FileSearchCallOutput), + FileSearchCall(FileSearchToolCall), /// A tool call to run a function. - FunctionCall(FunctionCall), + FunctionCall(FunctionToolCall), /// The results of a web search tool call. - WebSearchCall(WebSearchCallOutput), + WebSearchCall(WebSearchToolCall), /// A tool call to a computer use tool. - ComputerCall(ComputerCallOutput), + ComputerCall(ComputerToolCall), /// A description of the chain of thought used by a reasoning model while generating a response. /// Be sure to include these items in your input to the Responses API for subsequent turns of a /// conversation if you are manually managing context. Reasoning(ReasoningItem), /// Image generation tool call output. - ImageGenerationCall(ImageGenerationCallOutput), + ImageGenerationCall(ImageGenToolCall), /// Code interpreter tool call output. - CodeInterpreterCall(CodeInterpreterCallOutput), + CodeInterpreterCall(CodeInterpreterToolCall), /// Local shell tool call output. - LocalShellCall(LocalShellCallOutput), + LocalShellCall(LocalShellToolCall), /// MCP tool invocation output. - McpCall(McpCallOutput), + McpCall(MCPToolCall), /// MCP list-tools output. - McpListTools(McpListToolsOutput), + McpListTools(MCPListTools), /// MCP approval request output. - McpApprovalRequest(McpApprovalRequestOutput), + McpApprovalRequest(MCPApprovalRequest), } /// Reasoning text content. @@ -1277,19 +1695,21 @@ pub struct Type { pub text: String, } -/// Metadata for a function call request. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct FunctionToolCall { - /// The unique ID of the function tool call. - pub id: String, + /// A JSON string of the arguments to pass to the function. + pub arguments: String, /// The unique ID of the function tool call generated by the model. pub call_id: String, /// The name of the function to run. pub name: String, - /// A JSON string of the arguments to pass to the function. - pub arguments: String, - /// The status of the item. - pub status: OutputStatus, + /// The unique ID of the function tool call. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. + /// Populated when items are returned via API. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, // TODO rename OutputStatus? } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -1301,7 +1721,6 @@ pub enum ImageGenToolCallStatus { Failed, } -/// Output of an image generation request. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct ImageGenToolCall { /// The unique ID of the image generation call. @@ -1371,7 +1790,6 @@ pub struct CodeInterpreterFile { mime_type: String, } -/// Output of a local shell command request. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct LocalShellToolCall { /// Execute a shell command on the server. @@ -1381,7 +1799,7 @@ pub struct LocalShellToolCall { /// The unique ID of the local shell call. pub id: String, /// The status of the local shell call. - pub status: String, + pub status: OutputStatus, } /// Define the shape of a local shell action (exec). @@ -1460,7 +1878,6 @@ pub struct MCPListToolsTool { pub description: Option, } -/// Output representing a human approval request for an MCP tool. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct MCPApprovalRequest { /// JSON string of arguments for the tool. @@ -1556,48 +1973,79 @@ pub struct Response { /// An array of content items generated by the model. /// - /// The length and order of items in the output array is dependent on the model's response. - /// Rather than accessing the first item in the output array and assuming it's an assistant - /// message with the content generated by the model, you might consider using - /// the `output_text` property where supported in SDKs. + /// - The length and order of items in the output array is dependent on the model's response. + /// - Rather than accessing the first item in the output array and assuming it's an assistant + /// message with the content generated by the model, you might consider using + /// the `output_text` property where supported in SDKs. pub output: Vec, /// SDK-only convenience property that contains the aggregated text output from all /// `output_text` items in the `output` array, if any are present. /// Supported in the Python and JavaScript SDKs. - #[serde(skip_serializing_if = "Option::is_none")] - pub output_text: Option, + // #[serde(skip_serializing_if = "Option::is_none")] + // pub output_text: Option, - /// Whether parallel tool calls were enabled. + /// Whether to allow the model to run tool calls in parallel. #[serde(skip_serializing_if = "Option::is_none")] pub parallel_tool_calls: Option, - /// Previous response ID, if creating part of a multi-turn conversation. + /// The unique ID of the previous response to the model. Use this to create multi-turn conversations. + /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state). + /// Cannot be used in conjunction with `conversation`. #[serde(skip_serializing_if = "Option::is_none")] pub previous_response_id: Option, - /// Reasoning configuration echoed back (effort, summary settings). + /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). #[serde(skip_serializing_if = "Option::is_none")] - pub reasoning: Option, + pub prompt: Option, - /// Whether to store the generated model response for later retrieval via API. + /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces + /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching). #[serde(skip_serializing_if = "Option::is_none")] - pub store: Option, + pub prompt_cache_key: Option, + + /// **gpt-5 and o-series models only** + /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, - /// The service tier that actually processed this response. + /// A stable identifier used to help detect users of your application that may be violating OpenAI's + /// usage policies. + /// + /// The IDs should be a string that uniquely identifies each user. We recommend hashing their username + /// or email address, in order to avoid sending us any identifying information. [Learn + /// more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers). + #[serde(skip_serializing_if = "Option::is_none")] + pub safety_identifier: Option, + + /// Specifies the processing type used for serving the request. + /// - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'. + /// - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model. + /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier. + /// - When not set, the default behavior is 'auto'. + /// + /// When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter. #[serde(skip_serializing_if = "Option::is_none")] pub service_tier: Option, /// The status of the response generation. + /// One of `completed`, `failed`, `in_progress`, `cancelled`, `queued`, or `incomplete`. pub status: Status, - /// Sampling temperature that was used. + /// What sampling temperature was used, between 0 and 2. Higher values like 0.8 make + /// outputs more random, lower values like 0.2 make output more focused and deterministic. + /// + /// We generally recommend altering this or `top_p` but not both. #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option, - /// Text format configuration echoed back (plain, json_object, json_schema). + /// Configuration options for a text response from the model. Can be plain + /// text or structured JSON data. Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, + pub text: Option, /// How the model chose or was forced to choose a tool. #[serde(skip_serializing_if = "Option::is_none")] @@ -1630,6 +2078,8 @@ pub enum Status { Completed, Failed, InProgress, + Cancelled, + Queued, Incomplete, } From 15c94351f9e34a4a631bd4b98824fce9719b5ab7 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Mon, 3 Nov 2025 12:56:04 -0800 Subject: [PATCH 12/42] checkpoint for updates to responses types --- async-openai/src/types/mcp.rs | 123 ++++ async-openai/src/types/mod.rs | 2 + .../src/types/realtime/session_resource.rs | 63 +- async-openai/src/types/responses.rs | 616 ++++++++++++------ 4 files changed, 529 insertions(+), 275 deletions(-) create mode 100644 async-openai/src/types/mcp.rs diff --git a/async-openai/src/types/mcp.rs b/async-openai/src/types/mcp.rs new file mode 100644 index 00000000..7b76c5fb --- /dev/null +++ b/async-openai/src/types/mcp.rs @@ -0,0 +1,123 @@ +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum McpToolConnectorId { + ConnectorDropbox, + ConnectorGmail, + ConnectorGooglecalendar, + ConnectorGoogledrive, + ConnectorMicrosoftteams, + ConnectorOutlookcalendar, + ConnectorOutlookemail, + ConnectorSharepoint, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq, Default)] +#[builder( + name = "MCPToolArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct MCPTool { + /// A label for this MCP server, used to identify it in tool calls. + pub server_label: String, + + /// List of allowed tool names or a filter object. + #[serde(skip_serializing_if = "Option::is_none")] + pub allowed_tools: Option, + + /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP + /// server URL or a service connector. Your application must handle the OAuth authorization + /// flow and provide the token here. + #[serde(skip_serializing_if = "Option::is_none")] + pub authorization: Option, + + /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or + /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + /// + /// Currently supported `connector_id` values are: + /// - Dropbox: `connector_dropbox` + /// - Gmail: `connector_gmail` + /// - Google Calendar: `connector_googlecalendar` + /// - Google Drive: `connector_googledrive` + /// - Microsoft Teams: `connector_microsoftteams` + /// - Outlook Calendar: `connector_outlookcalendar` + /// - Outlook Email: `connector_outlookemail` + /// - SharePoint: `connector_sharepoint` + #[serde(skip_serializing_if = "Option::is_none")] + pub connector_id: Option, + + /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes. + #[serde(skip_serializing_if = "Option::is_none")] + pub headers: Option, + + /// Specify which of the MCP server's tools require approval. + #[serde(skip_serializing_if = "Option::is_none")] + pub require_approval: Option, + + /// Optional description of the MCP server, used to provide more context. + #[serde(skip_serializing_if = "Option::is_none")] + pub server_description: Option, + + /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided. + #[serde(skip_serializing_if = "Option::is_none")] + pub server_url: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum MCPToolAllowedTools { + /// A string array of allowed tool names + List(Vec), + /// A filter object to specify which tools are allowed. + Filter(MCPToolFilter), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPToolFilter { + /// Indicates whether or not a tool modifies data or is read-only. + /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + /// it will match this filter. + #[serde(skip_serializing_if = "Option::is_none")] + pub read_only: Option, + /// List of allowed tool names. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_names: Option>, +} + +/// Approval policy or filter for MCP tools. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum MCPToolRequireApproval { + /// Specify which of the MCP server's tools require approval. Can be + /// `always`, `never`, or a filter object associated with tools + /// that require approval. + Filter(MCPToolApprovalFilter), + /// Specify a single approval policy for all tools. One of `always` or + /// `never`. When set to `always`, all tools will require approval. When + /// set to `never`, all tools will not require approval. + ApprovalSetting(MCPToolApprovalSetting), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum MCPToolApprovalSetting { + Always, + Never, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPToolApprovalFilter { + /// A list of tools that always require approval. + #[serde(skip_serializing_if = "Option::is_none")] + pub always: Option, + /// A list of tools that never require approval. + #[serde(skip_serializing_if = "Option::is_none")] + pub never: Option, +} diff --git a/async-openai/src/types/mod.rs b/async-openai/src/types/mod.rs index c1cd4cb5..c6474aa5 100644 --- a/async-openai/src/types/mod.rs +++ b/async-openai/src/types/mod.rs @@ -14,6 +14,7 @@ mod file; mod fine_tuning; mod image; mod invites; +mod mcp; mod message; mod model; mod moderation; @@ -46,6 +47,7 @@ pub use file::*; pub use fine_tuning::*; pub use image::*; pub use invites::*; +pub use mcp::*; pub use message::*; pub use model::*; pub use moderation::*; diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs index c11f78c6..ba0f12cf 100644 --- a/async-openai/src/types/realtime/session_resource.rs +++ b/async-openai/src/types/realtime/session_resource.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use crate::types::responses::RequireApproval; +use crate::types::MCPTool; #[derive(Debug, Default, Serialize, Deserialize, Clone)] pub struct AudioTranscription { @@ -105,67 +105,6 @@ pub struct FunctionTool { pub parameters: serde_json::Value, } -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(untagged)] -pub enum AllowedTools { - /// A string array of allowed tool names - List(Vec), - /// A filter object to specify which tools are allowed. - Filter(MCPAllowedToolsFilter), -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MCPAllowedToolsFilter { - /// Indicates whether or not a tool modifies data or is read-only. - /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), - /// it will match this filter. - #[serde(skip_serializing_if = "Option::is_none")] - pub read_only: Option, - /// List of allowed tool names. - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_names: Option>, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MCPTool { - /// A label for this MCP server, used to identify it in tool calls. - pub server_label: String, - - /// List of allowed tool names or a filter object. - pub allowed_tools: AllowedTools, - - /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP - /// server URL or a service connector. Your application must handle the OAuth authorization - /// flow and provide the token here. - pub authorization: Option, - - /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or - /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). - /// - /// Currently supported `connector_id` values are: - /// - Dropbox: `connector_dropbox` - /// - Gmail: `connector_gmail` - /// - Google Calendar: `connector_googlecalendar` - /// - Google Drive: `connector_googledrive` - /// - Microsoft Teams: `connector_microsoftteams` - /// - Outlook Calendar: `connector_outlookcalendar` - /// - Outlook Email: `connector_outlookemail` - /// - SharePoint: `connector_sharepoint` - pub connector_id: Option, - - /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes. - pub headers: Option, - - /// Specify which of the MCP server's tools require approval. - pub require_approval: Option, - - /// Optional description of the MCP server, used to provide more context. - pub server_description: Option, - - /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided. - pub server_url: Option, -} - #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] pub enum ToolDefinition { diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index 4b60a53b..824749a2 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -1,4 +1,5 @@ use crate::error::OpenAIError; +use crate::types::MCPTool; pub use crate::types::{ CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort, ResponseFormatJsonSchema, @@ -174,7 +175,7 @@ impl InputItem { /// Creates a simple text message with the given role and content. pub fn text_message(role: Role, content: impl Into) -> Self { Self::EasyMessage(EasyInputMessage { - r#type: InputMessageType::Message, + r#type: MessageType::Message, role, content: EasyInputContent::Text(content.into()), }) @@ -804,34 +805,87 @@ pub enum TextResponseFormatConfiguration { /// Definitions for model-callable tools. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum ToolDefinition { - /// File search tool. - FileSearch(FileSearch), - /// Custom function call. - Function(Function), - /// Web search preview tool. - WebSearchPreview(WebSearchPreview), - /// Virtual computer control tool. - ComputerUsePreview(ComputerUsePreview), - /// Remote Model Context Protocol server. - Mcp(Mcp), - /// Python code interpreter tool. - CodeInterpreter(CodeInterpreter), - /// Image generation tool. - ImageGeneration(ImageGeneration), - /// Local shell command execution tool. +pub enum Tool { + /// Defines a function in your own code the model can choose to call. Learn more about [function + /// calling](https://platform.openai.com/docs/guides/tools). + Function(FunctionTool), + /// A tool that searches for relevant content from uploaded files. Learn more about the [file search + /// tool](https://platform.openai.com/docs/guides/tools-file-search). + FileSearch(FileSearchTool), + /// A tool that controls a virtual computer. Learn more about the [computer + /// use tool](https://platform.openai.com/docs/guides/tools-computer-use). + ComputerUsePreview(ComputerUsePreviewTool), + /// Search the Internet for sources related to the prompt. Learn more about the + /// [web search tool](https://platform.openai.com/docs/guides/tools-web-search). + WebSearch(WebSearchTool), + /// type: web_search_2025_08_26 + #[serde(rename = "web_search_2025_08_26")] + WebSearch20250826(WebSearchTool), + /// Give the model access to additional tools via remote Model Context Protocol + /// (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + Mcp(MCPTool), + /// A tool that runs Python code to help generate a response to a prompt. + CodeInterpreter(CodeInterpreterTool), + /// A tool that generates images using a model like `gpt-image-1`. + ImageGeneration(ImageGenTool), + /// A tool that allows the model to execute shell commands in a local environment. LocalShell, + /// A custom tool that processes input using a specified format. Learn more about [custom + /// tools](https://platform.openai.com/docs/guides/function-calling#custom-tools) + Custom(CustomToolParam), + /// This tool searches the web for relevant results to use in a response. Learn more about the [web search + ///tool](https://platform.openai.com/docs/guides/tools-web-search). + WebSearchPreview(WebSearchTool), + /// type: web_search_preview_2025_03_11 + #[serde(rename = "web_search_preview_2025_03_11")] + WebSearchPreview20250311(WebSearchTool), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] +pub struct CustomToolParam { + /// The name of the custom tool, used to identify it in tool calls. + pub name: String, + /// Optional description of the custom tool, used to provide more context. + pub description: Option, + /// The input format for the custom tool. Default is unconstrained text. + pub format: CustomToolParamFormat, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum GrammarSyntax { + Lark, + #[default] + Regex, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, Default)] +pub struct CustomGrammarFormatParam { + /// The grammar definition. + pub definition: String, + /// The syntax of the grammar definition. One of `lark` or `regex`. + pub syntax: GrammarSyntax, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum CustomToolParamFormat { + /// Unconstrained free-form text. + #[default] + Text, + /// A grammar defined by the user. + Grammar(CustomGrammarFormatParam), } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( - name = "FileSearchArgs", + name = "FileSearchToolArgs", pattern = "mutable", setter(into, strip_option), default )] #[builder(build_fn(error = "OpenAIError"))] -pub struct FileSearch { +pub struct FileSearchTool { /// The IDs of the vector stores to search. pub vector_store_ids: Vec, /// The maximum number of results to return. This number should be between 1 and 50 inclusive. @@ -847,69 +901,117 @@ pub struct FileSearch { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( - name = "FunctionArgs", + name = "FunctionToolArgs", pattern = "mutable", setter(into, strip_option), default )] -pub struct Function { +pub struct FunctionTool { /// The name of the function to call. pub name: String, /// A JSON schema object describing the parameters of the function. - pub parameters: serde_json::Value, - /// Whether to enforce strict parameter validation. - pub strict: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub parameters: Option, + /// Whether to enforce strict parameter validation. Default `true`. + #[serde(skip_serializing_if = "Option::is_none")] + pub strict: Option, /// A description of the function. Used by the model to determine whether or not to call the /// function. #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct WebSearchToolFilters { + /// Allowed domains for the search. If not provided, all domains are allowed. + /// Subdomains of the provided domains are allowed as well. + /// + /// Example: `["pubmed.ncbi.nlm.nih.gov"]` + #[serde(skip_serializing_if = "Option::is_none")] + pub allowed_domains: Option>, +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( - name = "WebSearchPreviewArgs", + name = "WebSearchToolArgs", pattern = "mutable", setter(into, strip_option), default )] -pub struct WebSearchPreview { - /// The user's location. +pub struct WebSearchTool { + /// Filters for the search. #[serde(skip_serializing_if = "Option::is_none")] - pub user_location: Option, - /// High level guidance for the amount of context window space to use for the search. + pub filters: Option, + /// The approximate location of the user. #[serde(skip_serializing_if = "Option::is_none")] - pub search_context_size: Option, + pub user_location: Option, + /// High level guidance for the amount of context window space to use for the search. One of `low`, + /// `medium`, or `high`. `medium` is the default. + #[serde(skip_serializing_if = "Option::is_none")] + pub search_context_size: Option, } -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] #[serde(rename_all = "lowercase")] -pub enum WebSearchContextSize { +pub enum WebSearchToolSearchContextSize { Low, + #[default] Medium, High, } +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ComputerEnvironment { + Windows, + Mac, + Linux, + Ubuntu, + #[default] + Browser, +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( - name = "ComputerUsePreviewArgs", + name = "ComputerUsePreviewToolArgs", pattern = "mutable", setter(into, strip_option), default )] -pub struct ComputerUsePreview { +pub struct ComputerUsePreviewTool { /// The type of computer environment to control. - environment: String, + environment: ComputerEnvironment, /// The width of the computer display. display_width: u32, /// The height of the computer display. display_height: u32, } +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +pub enum RankVersionType { + #[serde(rename = "auto")] + Auto, + #[serde(rename = "default-2024-11-15")] + Default20241115, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct HybridSearch { + /// The weight of the embedding in the reciprocal ranking fusion. + pub embedding_weight: f32, + /// The weight of the text in the reciprocal ranking fusion. + pub text_weight: f32, +} + /// Options for search result ranking. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct RankingOptions { + /// Weights that control how reciprocal rank fusion balances semantic embedding matches versus + /// sparse keyword matches when hybrid search is enabled. + #[serde(skip_serializing_if = "Option::is_none")] + pub hybrid_search: Option, /// The ranker to use for the file search. - pub ranker: String, + pub ranker: RankVersionType, /// The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will /// attempt to return only the most relevant results, but may return fewer results. #[serde(skip_serializing_if = "Option::is_none")] @@ -923,16 +1025,23 @@ pub enum Filter { /// A filter used to compare a specified attribute key to a given value using a defined /// comparison operation. Comparison(ComparisonFilter), - /// Combine multiple filters using and or or. + /// Combine multiple filters using `and` or `or`. Compound(CompoundFilter), } /// Single comparison filter. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct ComparisonFilter { - /// Specifies the comparison operator - #[serde(rename = "type")] - pub op: ComparisonType, + /// Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`, `nin`. + /// - `eq`: equals + /// - `ne`: not equal + /// - `gt`: greater than + /// - `gte`: greater than or equal + /// - `lt`: less than + /// - `lte`: less than or equal + /// - `in`: in + /// - `nin`: not in + pub r#type: ComparisonType, /// The key to compare against the value. pub key: String, /// The value to compare against the attribute key; supports string, number, or boolean types. @@ -948,19 +1057,22 @@ pub enum ComparisonType { #[serde(rename = "gt")] GreaterThan, #[serde(rename = "gte")] - GreaterThanOrEqualTo, + GreaterThanOrEqual, #[serde(rename = "lt")] LessThan, #[serde(rename = "lte")] - LessThanOrEqualTo, + LessThanOrEqual, + #[serde(rename = "in")] + In, + #[serde(rename = "nin")] + NotIn, } -/// Combine multiple filters. +/// Combine multiple filters using `and` or `or`. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct CompoundFilter { - /// Type of operation - #[serde(rename = "type")] - pub op: CompoundType, + /// 'Type of operation: `and` or `or`.' + pub r#type: CompoundType, /// Array of filters to combine. Items can be ComparisonFilter or CompoundFilter. pub filters: Vec, } @@ -972,142 +1084,87 @@ pub enum CompoundType { Or, } +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "lowercase")] +pub enum WebSearchApproximateLocationType { + #[default] + Approximate, +} + /// Approximate user location for web search. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( - name = "LocationArgs", + name = "WebSearchApproximateLocationArgs", pattern = "mutable", setter(into, strip_option), default )] #[builder(build_fn(error = "OpenAIError"))] -pub struct Location { - /// The type of location approximation. Always approximate. - #[serde(rename = "type")] - pub kind: String, - /// Free text input for the city of the user, e.g. San Francisco. +pub struct WebSearchApproximateLocation { + /// The type of location approximation. Always `approximate`. + pub r#type: WebSearchApproximateLocationType, + /// Free text input for the city of the user, e.g. `San Francisco`. #[serde(skip_serializing_if = "Option::is_none")] pub city: Option, - /// The two-letter ISO country code of the user, e.g. US. + /// The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, + /// e.g. `US`. #[serde(skip_serializing_if = "Option::is_none")] pub country: Option, - /// Free text input for the region of the user, e.g. California. + /// Free text input for the region of the user, e.g. `California`. #[serde(skip_serializing_if = "Option::is_none")] pub region: Option, - /// The IANA timezone of the user, e.g. America/Los_Angeles. + /// The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g. + /// `America/Los_Angeles`. #[serde(skip_serializing_if = "Option::is_none")] pub timezone: Option, } -/// MCP (Model Context Protocol) tool configuration. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] -#[builder( - name = "McpArgs", - pattern = "mutable", - setter(into, strip_option), - default -)] -#[builder(build_fn(error = "OpenAIError"))] -pub struct Mcp { - /// A label for this MCP server. - pub server_label: String, - /// The URL for the MCP server. - pub server_url: String, - /// List of allowed tool names or filter object. - #[serde(skip_serializing_if = "Option::is_none")] - pub allowed_tools: Option, - /// Optional HTTP headers for the MCP server. - #[serde(skip_serializing_if = "Option::is_none")] - pub headers: Option, - /// Approval policy or filter for tools. - #[serde(skip_serializing_if = "Option::is_none")] - pub require_approval: Option, -} - -/// Allowed tools configuration for MCP. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum AllowedTools { - /// A flat list of allowed tool names. - List(Vec), - /// A filter object specifying allowed tools. - Filter(McpAllowedToolsFilter), -} - -/// Filter object for MCP allowed tools. +/// Container configuration for a code interpreter. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpAllowedToolsFilter { - /// Names of tools in the filter - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_names: Option>, -} +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CodeInterpreterToolContainer { + /// Configuration for a code interpreter container. Optionally specify the IDs of the + /// files to run the code on. + Auto(CodeInterpreterContainerAuto), -/// Approval policy or filter for MCP tools. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum RequireApproval { - /// A blanket policy: "always" or "never". - Policy(RequireApprovalPolicy), - /// A filter object specifying which tools require approval. - Filter(McpApprovalFilter), + /// The container ID. + #[serde(untagged)] + ContainerID(String), } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum RequireApprovalPolicy { - Always, - Never, +impl Default for CodeInterpreterToolContainer { + fn default() -> Self { + Self::Auto(CodeInterpreterContainerAuto::default()) + } } -/// Filter object for MCP tool approval. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct McpApprovalFilter { - /// A list of tools that always require approval. - #[serde(skip_serializing_if = "Option::is_none")] - pub always: Option, - /// A list of tools that never require approval. +/// Auto configuration for code interpreter container. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +pub struct CodeInterpreterContainerAuto { + /// An optional list of uploaded files to make available to your code. #[serde(skip_serializing_if = "Option::is_none")] - pub never: Option, -} + pub file_ids: Option>, -/// Container configuration for a code interpreter. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum CodeInterpreterContainer { - /// A simple container ID. - Id(String), - /// Auto-configured container with optional files. - Container(CodeInterpreterContainerKind), -} - -/// Auto configuration for code interpreter container. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum CodeInterpreterContainerKind { - Auto { - /// Optional list of uploaded file IDs. - #[serde(skip_serializing_if = "Option::is_none")] - file_ids: Option>, - }, + #[serde(skip_serializing_if = "Option::is_none")] + pub memory_limit: Option, } -/// Code interpreter tool definition. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( - name = "CodeInterpreterArgs", + name = "CodeInterpreterToolArgs", pattern = "mutable", setter(into, strip_option), default )] #[builder(build_fn(error = "OpenAIError"))] -pub struct CodeInterpreter { - /// Container configuration for running code. - pub container: CodeInterpreterContainer, +pub struct CodeInterpreterTool { + /// The code interpreter container. Can be a container ID or an object that + /// specifies uploaded file IDs to make available to your code. + pub container: CodeInterpreterToolContainer, } -/// Mask image input for image generation. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct InputImageMask { +pub struct ImageGenToolInputImageMask { /// Base64-encoded mask image. #[serde(skip_serializing_if = "Option::is_none")] pub image_url: Option, @@ -1116,6 +1173,22 @@ pub struct InputImageMask { pub file_id: Option, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum InputFidelity { + #[default] + High, + Low, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum ImageGenToolModeration { + #[default] + Auto, + Low, +} + /// Image generation tool definition. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( @@ -1125,64 +1198,78 @@ pub struct InputImageMask { default )] #[builder(build_fn(error = "OpenAIError"))] -pub struct ImageGeneration { - /// Background type: transparent, opaque, or auto. +pub struct ImageGenTool { + /// Background type for the generated image. One of `transparent`, + /// `opaque`, or `auto`. Default: `auto`. #[serde(skip_serializing_if = "Option::is_none")] - pub background: Option, - /// Optional mask for inpainting. + pub background: Option, + /// Control how much effort the model will exert to match the style and features, especially facial features, + /// of input images. This parameter is only supported for `gpt-image-1`. Unsupported + /// for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. #[serde(skip_serializing_if = "Option::is_none")] - pub input_image_mask: Option, - /// Model to use (default: gpt-image-1). + pub input_fidelity: Option, + /// Optional mask for inpainting. Contains `image_url` + /// (string, optional) and `file_id` (string, optional). + #[serde(skip_serializing_if = "Option::is_none")] + pub input_image_mask: Option, + /// The image generation model to use. Default: `gpt-image-1`. #[serde(skip_serializing_if = "Option::is_none")] pub model: Option, - /// Moderation level (default: auto). + /// Moderation level for the generated image. Default: `auto`. #[serde(skip_serializing_if = "Option::is_none")] - pub moderation: Option, - /// Compression level (0-100). + pub moderation: Option, + /// Compression level for the output image. Default: 100. #[serde(skip_serializing_if = "Option::is_none")] pub output_compression: Option, - /// Output format: png, webp, or jpeg. + /// The output format of the generated image. One of `png`, `webp`, or + /// `jpeg`. Default: `png`. #[serde(skip_serializing_if = "Option::is_none")] - pub output_format: Option, - /// Number of partial images (0-3). + pub output_format: Option, + /// Number of partial images to generate in streaming mode, from 0 (default value) to 3. #[serde(skip_serializing_if = "Option::is_none")] pub partial_images: Option, - /// Quality: low, medium, high, or auto. + /// The quality of the generated image. One of `low`, `medium`, `high`, + /// or `auto`. Default: `auto`. #[serde(skip_serializing_if = "Option::is_none")] - pub quality: Option, - /// Size: e.g. "1024x1024" or auto. + pub quality: Option, + /// The size of the generated image. One of `1024x1024`, `1024x1536`, + /// `1536x1024`, or `auto`. Default: `auto`. #[serde(skip_serializing_if = "Option::is_none")] - pub size: Option, + pub size: Option, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] #[serde(rename_all = "lowercase")] -pub enum ImageGenerationBackground { +pub enum ImageGenToolBackground { Transparent, Opaque, + #[default] Auto, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] #[serde(rename_all = "lowercase")] -pub enum ImageGenerationOutputFormat { +pub enum ImageGenToolOutputFormat { + #[default] Png, Webp, Jpeg, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] #[serde(rename_all = "lowercase")] -pub enum ImageGenerationQuality { +pub enum ImageGenToolQuality { Low, Medium, High, + #[default] Auto, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] #[serde(rename_all = "lowercase")] -pub enum ImageGenerationSize { +pub enum ImageGenToolSize { + #[default] Auto, #[serde(rename = "1024x1024")] Size1024x1024, @@ -1192,44 +1279,105 @@ pub enum ImageGenerationSize { Size1536x1024, } -/// Control how the model picks or is forced to pick a tool. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum ToolChoice { - /// Controls which (if any) tool is called by the model. - Mode(ToolChoiceMode), - /// Indicates that the model should use a built-in tool to generate a response. - Hosted { - /// The type of hosted tool the model should to use. - #[serde(rename = "type")] - kind: HostedToolType, - }, - /// Use this option to force the model to call a specific function. - Function { - /// The name of the function to call. - name: String, - }, -} - -/// Simple tool-choice modes. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[serde(rename_all = "lowercase")] -pub enum ToolChoiceMode { - /// The model will not call any tool and instead generates a message. - None, - /// The model can pick between generating a message or calling one or more tools. +pub enum ToolChoiceAllowedMode { Auto, - /// The model must call one or more tools. Required, } -/// Hosted tool type identifiers. -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum HostedToolType { +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceAllowed { + /// Constrains the tools available to the model to a pre-defined set. + /// + /// `auto` allows the model to pick from among the allowed tools and generate a + /// message. + /// + /// `required` requires the model to call one or more of the allowed tools. + mode: ToolChoiceAllowedMode, + /// A list of tool definitions that the model should be allowed to call. + /// + /// For the Responses API, the list of tool definitions might look like: + /// ```json + /// [ + /// { "type": "function", "name": "get_weather" }, + /// { "type": "mcp", "server_label": "deepwiki" }, + /// { "type": "image_generation" } + /// ] + /// ``` + tools: Vec, +} + +/// The type of hosted tool the model should to use. Learn more about +/// [built-in tools](https://platform.openai.com/docs/guides/tools). +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ToolChoiceTypes { FileSearch, WebSearchPreview, ComputerUsePreview, + CodeInterpreter, + ImageGeneration, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceFunction { + /// The name of the function to call. + name: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceMCP { + /// The name of the tool to call on the server. + name: String, + /// The label of the MCP server to use. + server_label: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ToolChoiceCustom { + /// The name of the custom tool to call. + name: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ToolChoiceParam { + /// Constrains the tools available to the model to a pre-defined set. + AllowedTools(ToolChoiceAllowed), + + /// Use this option to force the model to call a specific function. + Function(ToolChoiceFunction), + + /// Use this option to force the model to call a specific tool on a remote MCP server. + Mcp(ToolChoiceMCP), + + /// Use this option to force the model to call a custom tool. + Custom(ToolChoiceCustom), + + /// Indicates that the model should use a built-in tool to generate a response. + /// [Learn more about built-in tools](https://platform.openai.com/docs/guides/tools). + #[serde(untagged)] + Hosted(ToolChoiceTypes), + + /// Controls which (if any) tool is called by the model. + /// + /// `none` means the model will not call any tool and instead generates a message. + /// + /// `auto` means the model can pick between generating a message or calling one or + /// more tools. + /// + /// `required` means the model must call one or more tools. + #[serde(untagged)] + Mode(ToolChoiceOptions), +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ToolChoiceOptions { + None, + Auto, + Required, } /// Error returned by the API when a request fails. @@ -1890,17 +2038,30 @@ pub struct MCPApprovalRequest { pub server_label: String, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct InputTokenDetails { + /// The number of tokens that were retrieved from the cache. + /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching). + pub cached_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct OutputTokenDetails { + /// The number of reasoning tokens. + pub reasoning_tokens: u32, +} + /// Usage statistics for a response. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct Usage { +pub struct ResponseUsage { /// The number of input tokens. pub input_tokens: u32, /// A detailed breakdown of the input tokens. - pub input_tokens_details: PromptTokensDetails, + pub input_tokens_details: InputTokenDetails, /// The number of output tokens. pub output_tokens: u32, /// A detailed breakdown of the output tokens. - pub output_tokens_details: CompletionTokensDetails, + pub output_tokens_details: OutputTokenDetails, /// The total number of tokens used. pub total_tokens: u32, } @@ -2047,29 +2208,58 @@ pub struct Response { #[serde(skip_serializing_if = "Option::is_none")] pub text: Option, - /// How the model chose or was forced to choose a tool. + /// How the model should select which tool (or tools) to use when generating + /// a response. See the `tools` parameter to see how to specify which tools + /// the model can call. #[serde(skip_serializing_if = "Option::is_none")] - pub tool_choice: Option, + pub tool_choice: Option, - /// Tool definitions that were provided. - #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, + /// An array of tools the model may call while generating a response. You + /// can specify which tool to use by setting the `tool_choice` parameter. + /// + /// We support the following categories of tools: + /// - **Built-in tools**: Tools that are provided by OpenAI that extend the + /// model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search) + /// or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about + /// [built-in tools](https://platform.openai.com/docs/guides/tools). + /// - **MCP Tools**: Integrations with third-party systems via custom MCP servers + /// or predefined connectors such as Google Drive and SharePoint. Learn more about + /// [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp). + /// - **Function calls (custom tools)**: Functions that are defined by you, + /// enabling the model to call your own code with strongly typed arguments + /// and outputs. Learn more about + /// [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use + /// custom tools to call your own code. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + /// An integer between 0 and 20 specifying the number of most likely tokens to return at each + /// token position, each with an associated log probability. + #[serde(skip_serializing_if = "Option::is_none")] + pub top_logprobs: Option, - /// Nucleus sampling cutoff that was used. + /// An alternative to sampling with temperature, called nucleus sampling, + /// where the model considers the results of the tokens with top_p probability + /// mass. So 0.1 means only the tokens comprising the top 10% probability mass + /// are considered. + /// + /// We generally recommend altering this or `temperature` but not both. #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option, - /// Truncation strategy that was applied. + ///The truncation strategy to use for the model response. + /// - `auto`: If the input to this Response exceeds + /// the model's context window size, the model will truncate the + /// response to fit the context window by dropping items from the beginning of the conversation. + /// - `disabled` (default): If the input size will exceed the context window + /// size for a model, the request will fail with a 400 error. #[serde(skip_serializing_if = "Option::is_none")] pub truncation: Option, - /// Token usage statistics for this request. + /// Represents token usage details including input tokens, output tokens, + /// a breakdown of output tokens, and the total tokens used. #[serde(skip_serializing_if = "Option::is_none")] - pub usage: Option, - - /// End-user ID for which this response was generated. - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, + pub usage: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] From 84bbfe48acbc914b7ba0235fa72c32102b172426 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Mon, 3 Nov 2025 14:15:26 -0800 Subject: [PATCH 13/42] updates for CreateResponse --- async-openai/src/types/responses.rs | 266 +++++++++++++++++++--------- 1 file changed, 182 insertions(+), 84 deletions(-) diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index 824749a2..095bb63c 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -29,16 +29,23 @@ pub enum OutputStatus { Incomplete, } -/// Input payload: raw text or structured context items. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(untagged)] -pub enum Input { - /// A text input to the model, equivalent to a text input with the user role. +pub enum InputParam { + /// A text input to the model, equivalent to a text input with the + /// `user` role. Text(String), - /// A list of one or many input items to the model, containing different content types. + /// A list of one or many input items to the model, containing + /// different content types. Items(Vec), } +impl Default for InputParam { + fn default() -> Self { + Self::Text(String::new()) + } +} + /// Content item used to generate a response. /// /// This is a properly discriminated union based on the `type` field, using Rust's @@ -507,6 +514,48 @@ pub struct Conversation { pub id: String, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum ConversationParam { + /// The unique ID of the conversation. + ConversationID(String), + /// The conversation that this response belongs to. + Object(Conversation), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub enum IncludeEnum { + #[serde(rename = "file_search_call.results")] + FileSearchCallResults, + #[serde(rename = "web_search_call.results")] + WebSearchCallResults, + #[serde(rename = "web_search_call.action.sources")] + WebSearchCallActionSources, + #[serde(rename = "message.input_image.image_url")] + MessageInputImageImageUrl, + #[serde(rename = "computer_call_output.output.image_url")] + ComputerCallOutputOutputImageUrl, + #[serde(rename = "code_interpreter_call.outputs")] + CodeInterpreterCallOutputs, + #[serde(rename = "reasoning.encrypted_content")] + ReasoningEncryptedContent, + #[serde(rename = "message.output_text.logprobs")] + MessageOutputTextLogprobs, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseStreamOptions { + /// When true, stream obfuscation will be enabled. Stream obfuscation adds + /// random characters to an `obfuscation` field on streaming delta events to + /// normalize payload sizes as a mitigation to certain side-channel attacks. + /// These obfuscation fields are included by default, but add a small amount + /// of overhead to the data stream. You can set `include_obfuscation` to + /// false to optimize for bandwidth if you trust the network links between + /// your application and the OpenAI API. + #[serde(skip_serializing_if = "Option::is_none")] + pub include_obfuscation: Option, +} + /// Builder for a Responses API request. #[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)] #[builder( @@ -517,55 +566,70 @@ pub struct Conversation { )] #[builder(build_fn(error = "OpenAIError"))] pub struct CreateResponse { - /// Text, image, or file inputs to the model, used to generate a response. - pub input: Input, - - /// Model ID used to generate the response, like `gpt-4o`. - /// OpenAI offers a wide range of models with different capabilities, - /// performance characteristics, and price points. - pub model: String, - /// Whether to run the model response in the background. - /// boolean or null. + /// [Learn more](https://platform.openai.com/docs/guides/background). #[serde(skip_serializing_if = "Option::is_none")] pub background: Option, - /// Specify additional output data to include in the model response. + /// The conversation that this response belongs to. Items from this conversation are prepended to + /// `input_items` for this response request. + /// + /// Input items and output items from this response are automatically added to this conversation after + /// this response completes. + #[serde(skip_serializing_if = "Option::is_none")] + pub conversation: Option, + + /// Specify additional output data to include in the model response. Currently supported + /// values are: + /// + /// - `web_search_call.action.sources`: Include the sources of the web search tool call. + /// + /// - `code_interpreter_call.outputs`: Includes the outputs of python code execution in code + /// interpreter tool call items. + /// + /// - `computer_call_output.output.image_url`: Include image urls from the computer call + /// output. /// - /// Supported values: - /// - `file_search_call.results` - /// Include the search results of the file search tool call. - /// - `message.input_image.image_url` - /// Include image URLs from the input message. - /// - `computer_call_output.output.image_url` - /// Include image URLs from the computer call output. - /// - `reasoning.encrypted_content` - /// Include an encrypted version of reasoning tokens in reasoning item outputs. - /// This enables reasoning items to be used in multi-turn conversations when - /// using the Responses API statelessly (for example, when the `store` parameter - /// is set to `false`, or when an organization is enrolled in the zero-data- - /// retention program). + /// - `file_search_call.results`: Include the search results of the file search tool call. /// - /// If `None`, no additional data is returned. + /// - `message.input_image.image_url`: Include image urls from the input message. + /// + /// - `message.output_text.logprobs`: Include logprobs with assistant messages. + /// + /// - `reasoning.encrypted_content`: Includes an encrypted version of reasoning tokens in + /// reasoning item outputs. This enables reasoning items to be used in multi-turn + /// conversations when using the Responses API statelessly (like when the `store` parameter is + /// set to `false`, or when an organization is enrolled in the zero data retention program). #[serde(skip_serializing_if = "Option::is_none")] - pub include: Option>, + pub include: Option>, - /// Inserts a system (or developer) message as the first item in the model's context. + /// Text, image, or file inputs to the model, used to generate a response. + /// + /// Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Image inputs](https://platform.openai.com/docs/guides/images) + /// - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + /// - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + /// - [Function calling](https://platform.openai.com/docs/guides/function-calling) + pub input: InputParam, + + /// A system (or developer) message inserted into the model's context. /// - /// When using along with previous_response_id, the instructions from a previous response will - /// not be carried over to the next response. This makes it simple to swap out system - /// (or developer) messages in new responses. + /// When using along with `previous_response_id`, the instructions from a previous + /// response will not be carried over to the next response. This makes it simple + /// to swap out system (or developer) messages in new responses. #[serde(skip_serializing_if = "Option::is_none")] pub instructions: Option, - /// An upper bound for the number of tokens that can be generated for a - /// response, including visible output tokens and reasoning tokens. + /// An upper bound for the number of tokens that can be generated for a response, including + /// visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). #[serde(skip_serializing_if = "Option::is_none")] pub max_output_tokens: Option, - /// The maximum number of total calls to built-in tools that can be processed in a response. - /// This maximum number applies across all built-in tool calls, not per individual tool. - /// Any further attempts to call a tool by the model will be ignored. + /// The maximum number of total calls to built-in tools that can be processed in a response. This + /// maximum number applies across all built-in tool calls, not per individual tool. Any further + /// attempts to call a tool by the model will be ignored. + #[serde(skip_serializing_if = "Option::is_none")] pub max_tool_calls: Option, /// Set of 16 key-value pairs that can be attached to an object. This can be @@ -577,42 +641,54 @@ pub struct CreateResponse { #[serde(skip_serializing_if = "Option::is_none")] pub metadata: Option>, + /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI + /// offers a wide range of models with different capabilities, performance + /// characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models) + /// to browse and compare available models. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + /// Whether to allow the model to run tool calls in parallel. #[serde(skip_serializing_if = "Option::is_none")] pub parallel_tool_calls: Option, - /// The unique ID of the previous response to the model. Use this to create - /// multi-turn conversations. + /// The unique ID of the previous response to the model. Use this to create multi-turn conversations. + /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state). + /// Cannot be used in conjunction with `conversation`. #[serde(skip_serializing_if = "Option::is_none")] pub previous_response_id: Option, /// Reference to a prompt template and its variables. + /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). #[serde(skip_serializing_if = "Option::is_none")] - pub prompt: Option, + pub prompt: Option, - /// **o-series models only**: Configuration options for reasoning models. + /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces + /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching). #[serde(skip_serializing_if = "Option::is_none")] - pub reasoning: Option, + pub prompt_cache_key: Option, - /// Specifies the latency tier to use for processing the request. - /// - /// This parameter is relevant for customers subscribed to the Scale tier service. - /// - /// Supported values: - /// - `auto` - /// - If the Project is Scale tier enabled, the system will utilize Scale tier credits until - /// they are exhausted. - /// - If the Project is not Scale tier enabled, the request will be processed using the - /// default service tier with a lower uptime SLA and no latency guarantee. - /// - `default` - /// The request will be processed using the default service tier with a lower uptime SLA and - /// no latency guarantee. - /// - `flex` - /// The request will be processed with the Flex Processing service tier. Learn more. + /// **gpt-5 and o-series models only** + /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, + + /// A stable identifier used to help detect users of your application that may be violating OpenAI's + /// usage policies. /// - /// When not set, the default behavior is `auto`. + /// The IDs should be a string that uniquely identifies each user. We recommend hashing their username + /// or email address, in order to avoid sending us any identifying information. [Learn + /// more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers). + #[serde(skip_serializing_if = "Option::is_none")] + pub safety_identifier: Option, + + /// Specifies the processing type used for serving the request. + /// - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'. + /// - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model. + /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier. + /// - When not set, the default behavior is 'auto'. /// - /// When this parameter is set, the response body will include the `service_tier` utilized. + /// When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter. #[serde(skip_serializing_if = "Option::is_none")] pub service_tier: Option, @@ -620,11 +696,17 @@ pub struct CreateResponse { #[serde(skip_serializing_if = "Option::is_none")] pub store: Option, - /// If set to true, the model response data will be streamed to the client as it is - /// generated using server-sent events. + /// If set to true, the model response data will be streamed to the client + /// as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + /// See the [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming) + /// for more information. #[serde(skip_serializing_if = "Option::is_none")] pub stream: Option, + /// Options for streaming responses. Only set this when you set `stream: true`. + #[serde(skip_serializing_if = "Option::is_none")] + pub stream_options: Option, + /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 /// will make the output more random, while lower values like 0.2 will make it /// more focused and deterministic. We generally recommend altering this or @@ -632,45 +714,60 @@ pub struct CreateResponse { #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option, - /// Configuration options for a text response from the model. Can be plain text - /// or structured JSON data. + /// Configuration options for a text response from the model. Can be plain + /// text or structured JSON data. Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, + pub text: Option, /// How the model should select which tool (or tools) to use when generating - /// a response. + /// a response. See the `tools` parameter to see how to specify which tools + /// the model can call. #[serde(skip_serializing_if = "Option::is_none")] - pub tool_choice: Option, + pub tool_choice: Option, - /// An array of tools the model may call while generating a response. - /// Can include built-in tools (file_search, web_search_preview, - /// computer_use_preview) or custom function definitions. + /// An array of tools the model may call while generating a response. You + /// can specify which tool to use by setting the `tool_choice` parameter. + /// + /// We support the following categories of tools: + /// - **Built-in tools**: Tools that are provided by OpenAI that extend the + /// model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search) + /// or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about + /// [built-in tools](https://platform.openai.com/docs/guides/tools). + /// - **MCP Tools**: Integrations with third-party systems via custom MCP servers + /// or predefined connectors such as Google Drive and SharePoint. Learn more about + /// [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp). + /// - **Function calls (custom tools)**: Functions that are defined by you, + /// enabling the model to call your own code with strongly typed arguments + /// and outputs. Learn more about + /// [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use + /// custom tools to call your own code. #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, + pub tools: Option>, - /// An integer between 0 and 20 specifying the number of most likely tokens to return - /// at each token position, each with an associated log probability. + /// An integer between 0 and 20 specifying the number of most likely tokens to return at each + /// token position, each with an associated log probability. #[serde(skip_serializing_if = "Option::is_none")] - pub top_logprobs: Option, // TODO add validation of range + pub top_logprobs: Option, /// An alternative to sampling with temperature, called nucleus sampling, /// where the model considers the results of the tokens with top_p probability /// mass. So 0.1 means only the tokens comprising the top 10% probability mass - /// are considered. We generally recommend altering this or `temperature` but - /// not both. + /// are considered. + /// + /// We generally recommend altering this or `temperature` but not both. #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option, - /// The truncation strategy to use for the model response: - /// - `auto`: drop items in the middle to fit context window. - /// - `disabled`: error if exceeding context window. + ///The truncation strategy to use for the model response. + /// - `auto`: If the input to this Response exceeds + /// the model's context window size, the model will truncate the + /// response to fit the context window by dropping items from the beginning of the conversation. + /// - `disabled` (default): If the input size will exceed the context window + /// size for a model, the request will fail with a 400 error. #[serde(skip_serializing_if = "Option::is_none")] pub truncation: Option, - - /// A unique identifier representing your end-user, which can help OpenAI to - /// monitor and detect abuse. - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -2085,6 +2182,7 @@ pub struct Response { /// The conversation that this response belongs to. Input items and output /// items from this response are automatically added to this conversation. + #[serde(skip_serializing_if = "Option::is_none")] pub conversation: Option, /// Unix timestamp (in seconds) when this Response was created. From b5bc8edc6f1959c81b4adc3f17595783ae4f2612 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Mon, 3 Nov 2025 14:43:11 -0800 Subject: [PATCH 14/42] add reponses apis --- async-openai/src/responses.rs | 49 ++++++++++++++++++++++++++--- async-openai/src/types/responses.rs | 7 +++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/async-openai/src/responses.rs b/async-openai/src/responses.rs index 9160b7be..0c58ecfe 100644 --- a/async-openai/src/responses.rs +++ b/async-openai/src/responses.rs @@ -1,13 +1,12 @@ +use serde::Serialize; + use crate::{ config::Config, error::OpenAIError, - types::responses::{CreateResponse, Response, ResponseStream}, + types::responses::{CreateResponse, DeleteResponse, Response, ResponseStream}, Client, }; -/// Given text input or a list of context items, the model will generate a response. -/// -/// Related guide: [Responses](https://platform.openai.com/docs/api-reference/responses) pub struct Responses<'c, C: Config> { client: &'c Client, } @@ -18,7 +17,15 @@ impl<'c, C: Config> Responses<'c, C> { Self { client } } - /// Creates a model response for the given input. + /// Creates a model response. Provide [text](https://platform.openai.com/docs/guides/text) or + /// [image](https://platform.openai.com/docs/guides/images) inputs to generate + /// [text](https://platform.openai.com/docs/guides/text) or + /// [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have the model call + /// your own [custom code](https://platform.openai.com/docs/guides/function-calling) or use + /// built-in [tools](https://platform.openai.com/docs/guides/tools) like + /// [web search](https://platform.openai.com/docs/guides/tools-web-search) + /// or [file search](https://platform.openai.com/docs/guides/tools-file-search) to use your own data + /// as input for the model's response. #[crate::byot( T0 = serde::Serialize, R = serde::de::DeserializeOwned @@ -52,4 +59,36 @@ impl<'c, C: Config> Responses<'c, C> { } Ok(self.client.post_stream("/responses", request).await) } + + /// Retrieves a model response with the given ID. + #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)] + pub async fn retrieve(&self, response_id: &str, query: &Q) -> Result + where + Q: Serialize + ?Sized, + { + self.client + .get_with_query(&format!("/responses/{}", response_id), &query) + .await + } + + /// Deletes a model response with the given ID. + #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)] + pub async fn delete(&self, response_id: &str) -> Result { + self.client + .delete(&format!("/responses/{}", response_id)) + .await + } + + /// Cancels a model response with the given ID. Only responses created with the + /// `background` parameter set to `true` can be cancelled. + /// [Learn more](https://platform.openai.com/docs/guides/background). + #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)] + pub async fn cancel(&self, response_id: &str) -> Result { + self.client + .post( + &format!("/responses/{}/cancel", response_id), + serde_json::json!({}), + ) + .await + } } diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index 095bb63c..fe733e1f 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -3130,3 +3130,10 @@ pub struct TextAnnotation { pub start: u32, pub end: u32, } + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DeleteResponse { + pub object: String, + pub deleted: bool, + pub id: String, +} From d20e865420120e8bde3166fb5c6c635eab5e000e Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Mon, 3 Nov 2025 16:42:35 -0800 Subject: [PATCH 15/42] list input items --- async-openai/src/responses.rs | 19 ++++++++++++++++++- async-openai/src/types/responses.rs | 20 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/async-openai/src/responses.rs b/async-openai/src/responses.rs index 0c58ecfe..0b1974ab 100644 --- a/async-openai/src/responses.rs +++ b/async-openai/src/responses.rs @@ -3,7 +3,9 @@ use serde::Serialize; use crate::{ config::Config, error::OpenAIError, - types::responses::{CreateResponse, DeleteResponse, Response, ResponseStream}, + types::responses::{ + CreateResponse, DeleteResponse, Response, ResponseItemList, ResponseStream, + }, Client, }; @@ -91,4 +93,19 @@ impl<'c, C: Config> Responses<'c, C> { ) .await } + + /// Returns a list of input items for a given response. + #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)] + pub async fn list_input_items( + &self, + response_id: &str, + query: &Q, + ) -> Result + where + Q: Serialize + ?Sized, + { + self.client + .get_with_query(&format!("/responses/{}/input_items", response_id), &query) + .await + } } diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index fe733e1f..6d74a03b 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -3137,3 +3137,23 @@ pub struct DeleteResponse { pub deleted: bool, pub id: String, } + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub enum ItemResource { + // TODO: implement this +} + +/// A list of Response items. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseItemList { + /// The type of object returned, must be `list`. + pub object: String, + /// The ID of the first item in the list. + pub first_id: Option, + /// The ID of the last item in the list. + pub last_id: Option, + /// Whether there are more items in the list. + pub has_more: bool, + /// The list of items. + pub data: Vec, +} From 30964bff90ab8c87b17f7aae913c171c089ddf6d Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Mon, 3 Nov 2025 17:06:18 -0800 Subject: [PATCH 16/42] add get_input_token_counts for responses --- async-openai/src/responses.rs | 10 ++++ async-openai/src/types/responses.rs | 84 +++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/async-openai/src/responses.rs b/async-openai/src/responses.rs index 0b1974ab..223a5b1c 100644 --- a/async-openai/src/responses.rs +++ b/async-openai/src/responses.rs @@ -5,6 +5,7 @@ use crate::{ error::OpenAIError, types::responses::{ CreateResponse, DeleteResponse, Response, ResponseItemList, ResponseStream, + TokenCountsBody, TokenCountsResource, }, Client, }; @@ -108,4 +109,13 @@ impl<'c, C: Config> Responses<'c, C> { .get_with_query(&format!("/responses/{}/input_items", response_id), &query) .await } + + /// Get input token counts + #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)] + pub async fn get_input_token_counts( + &self, + request: TokenCountsBody, + ) -> Result { + self.client.post("/responses/input_tokens", request).await + } } diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index 6d74a03b..758a0ad2 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -3157,3 +3157,87 @@ pub struct ResponseItemList { /// The list of items. pub data: Vec, } + +#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)] +#[builder( + name = "TokenCountsBodyArgs", + pattern = "mutable", + setter(into, strip_option), + default +)] +#[builder(build_fn(error = "OpenAIError"))] +pub struct TokenCountsBody { + /// The conversation that this response belongs to. Items from this + /// conversation are prepended to `input_items` for this response request. + /// Input items and output items from this response are automatically added to this + /// conversation after this response completes. + #[serde(skip_serializing_if = "Option::is_none")] + pub conversation: Option, + + /// Text, image, or file inputs to the model, used to generate a response + #[serde(skip_serializing_if = "Option::is_none")] + pub input: Option, + + /// A system (or developer) message inserted into the model's context. + /// + /// When used along with `previous_response_id`, the instructions from a previous response will + /// not be carried over to the next response. This makes it simple to swap out system (or + /// developer) messages in new responses. + #[serde(skip_serializing_if = "Option::is_none")] + pub instructions: Option, + + /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a + /// wide range of models with different capabilities, performance characteristics, + /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models) + /// to browse and compare available models. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + + /// Whether to allow the model to run tool calls in parallel. + #[serde(skip_serializing_if = "Option::is_none")] + pub parallel_tool_calls: Option, + + /// The unique ID of the previous response to the model. Use this to create multi-turn + /// conversations. Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state). + /// Cannot be used in conjunction with `conversation`. + #[serde(skip_serializing_if = "Option::is_none")] + pub previous_response_id: Option, + + /// **gpt-5 and o-series models only** + /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning). + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, + + /// Configuration options for a text response from the model. Can be plain + /// text or structured JSON data. Learn more: + /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + #[serde(skip_serializing_if = "Option::is_none")] + pub text: Option, + + /// How the model should select which tool (or tools) to use when generating + /// a response. See the `tools` parameter to see how to specify which tools + /// the model can call. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_choice: Option, + + /// An array of tools the model may call while generating a response. You can specify which tool + /// to use by setting the `tool_choice` parameter. + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, + + ///The truncation strategy to use for the model response. + /// - `auto`: If the input to this Response exceeds + /// the model's context window size, the model will truncate the + /// response to fit the context window by dropping items from the beginning of the conversation. + /// - `disabled` (default): If the input size will exceed the context window + /// size for a model, the request will fail with a 400 error. + #[serde(skip_serializing_if = "Option::is_none")] + pub truncation: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct TokenCountsResource { + pub object: String, + pub input_tokens: u32, +} From 2413171527a5fc50faaf553812455c0b2d5e6308 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Mon, 3 Nov 2025 17:23:30 -0800 Subject: [PATCH 17/42] implement ItemResource --- async-openai/src/types/responses.rs | 30 ++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs index 758a0ad2..643e63b2 100644 --- a/async-openai/src/types/responses.rs +++ b/async-openai/src/types/responses.rs @@ -3139,8 +3139,36 @@ pub struct DeleteResponse { } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct InputItemReference { + pub r#type: Option, + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ItemResourceItem { + Message(MessageItem), + FileSearchCall(FileSearchToolCall), + ComputerCall(ComputerToolCall), + ComputerCallOutput(ComputerCallOutputItemParam), + WebSearchCall(WebSearchToolCall), + FunctionCall(FunctionToolCall), + FunctionCallOutput(FunctionCallOutputItemParam), + ImageGenerationCall(ImageGenToolCall), + CodeInterpreterCall(CodeInterpreterToolCall), + LocalShellCall(LocalShellToolCall), + LocalShellCallOutput(LocalShellToolCallOutput), + McpListTools(MCPListTools), + McpApprovalRequest(MCPApprovalRequest), + McpApprovalResponse(MCPApprovalResponse), + McpCall(MCPToolCall), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(untagged)] pub enum ItemResource { - // TODO: implement this + ItemReference(InputItemReference), + Item(ItemResourceItem), } /// A list of Response items. From 2f78f5d4717b22e59b3375c0aba0061f1872d71e Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Mon, 3 Nov 2025 17:31:50 -0800 Subject: [PATCH 18/42] types/responses dir --- async-openai/src/types/responses/mod.rs | 5 +++++ async-openai/src/types/{ => responses}/responses.rs | 0 async-openai/src/types/responses/responses_stream.rs | 0 3 files changed, 5 insertions(+) create mode 100644 async-openai/src/types/responses/mod.rs rename async-openai/src/types/{ => responses}/responses.rs (100%) create mode 100644 async-openai/src/types/responses/responses_stream.rs diff --git a/async-openai/src/types/responses/mod.rs b/async-openai/src/types/responses/mod.rs new file mode 100644 index 00000000..a57069fd --- /dev/null +++ b/async-openai/src/types/responses/mod.rs @@ -0,0 +1,5 @@ +mod responses; +mod responses_stream; + +pub use responses::*; +pub use responses_stream::*; diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses/responses.rs similarity index 100% rename from async-openai/src/types/responses.rs rename to async-openai/src/types/responses/responses.rs diff --git a/async-openai/src/types/responses/responses_stream.rs b/async-openai/src/types/responses/responses_stream.rs new file mode 100644 index 00000000..e69de29b From d9dcf246b7cf7339cc4d05c523c89653d0a571f6 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 09:49:14 -0800 Subject: [PATCH 19/42] response streaming events --- async-openai/src/types/responses/mod.rs | 8 +- .../responses/{responses.rs => response.rs} | 759 +----------------- .../src/types/responses/response_stream.rs | 550 +++++++++++++ .../src/types/responses/responses_stream.rs | 0 4 files changed, 579 insertions(+), 738 deletions(-) rename async-openai/src/types/responses/{responses.rs => response.rs} (79%) create mode 100644 async-openai/src/types/responses/response_stream.rs delete mode 100644 async-openai/src/types/responses/responses_stream.rs diff --git a/async-openai/src/types/responses/mod.rs b/async-openai/src/types/responses/mod.rs index a57069fd..8d2635c2 100644 --- a/async-openai/src/types/responses/mod.rs +++ b/async-openai/src/types/responses/mod.rs @@ -1,5 +1,5 @@ -mod responses; -mod responses_stream; +mod response; +mod response_stream; -pub use responses::*; -pub use responses_stream::*; +pub use response::*; +pub use response_stream::*; diff --git a/async-openai/src/types/responses/responses.rs b/async-openai/src/types/responses/response.rs similarity index 79% rename from async-openai/src/types/responses/responses.rs rename to async-openai/src/types/responses/response.rs index 643e63b2..73d30ab6 100644 --- a/async-openai/src/types/responses/responses.rs +++ b/async-openai/src/types/responses/response.rs @@ -956,7 +956,7 @@ pub enum GrammarSyntax { Regex, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, Default)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] pub struct CustomGrammarFormatParam { /// The grammar definition. pub definition: String, @@ -1508,6 +1508,24 @@ pub struct LogProb { pub top_logprobs: Vec, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseTopLobProb { + /// The log probability of this token. + pub logprob: f64, + /// A possible text token. + pub token: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseLogProb { + /// The log probability of this token. + pub logprob: f64, + /// A possible text token. + pub token: String, + /// The log probability of the top 20 most likely tokens. + pub top_logprobs: Vec, +} + /// A simple text output from the model. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct OutputTextContent { @@ -1623,42 +1641,17 @@ pub enum OutputMessageContent { Refusal(RefusalContent), } -/// Nested content within an output message. -/// -/// Note: This enum is similar to OutputItem but may be used in different contexts. -/// Consider using OutputItem directly if it fits your use case. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type", rename_all = "snake_case")] pub enum OutputContent { - /// An output message from the model. - Message(OutputMessage), - /// The results of a file search tool call. - FileSearchCall(FileSearchToolCall), - /// A tool call to run a function. - FunctionCall(FunctionToolCall), - /// The results of a web search tool call. - WebSearchCall(WebSearchToolCall), - /// A tool call to a computer use tool. - ComputerCall(ComputerToolCall), - /// A description of the chain of thought used by a reasoning model while generating a response. - /// Be sure to include these items in your input to the Responses API for subsequent turns of a - /// conversation if you are manually managing context. - Reasoning(ReasoningItem), - /// Image generation tool call output. - ImageGenerationCall(ImageGenToolCall), - /// Code interpreter tool call output. - CodeInterpreterCall(CodeInterpreterToolCall), - /// Local shell tool call output. - LocalShellCall(LocalShellToolCall), - /// MCP tool invocation output. - McpCall(MCPToolCall), - /// MCP list-tools output. - McpListTools(MCPListTools), - /// MCP approval request output. - McpApprovalRequest(MCPApprovalRequest), + /// A text output from the model. + OutputText(OutputTextContent), + /// A refusal from the model. + Refusal(RefusalContent), + /// Reasoning text from the model. + ReasoningText(ReasoningTextContent), } -/// Reasoning text content. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct ReasoningTextContent { /// The reasoning text from the model. @@ -2371,668 +2364,6 @@ pub enum Status { Incomplete, } -/// Event types for streaming responses from the Responses API -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type")] -#[non_exhaustive] // Future-proof against breaking changes -pub enum ResponseEvent { - /// Response creation started - #[serde(rename = "response.created")] - ResponseCreated(ResponseCreated), - /// Processing in progress - #[serde(rename = "response.in_progress")] - ResponseInProgress(ResponseInProgress), - /// Response completed (different from done) - #[serde(rename = "response.completed")] - ResponseCompleted(ResponseCompleted), - /// Response failed - #[serde(rename = "response.failed")] - ResponseFailed(ResponseFailed), - /// Response incomplete - #[serde(rename = "response.incomplete")] - ResponseIncomplete(ResponseIncomplete), - /// Response queued - #[serde(rename = "response.queued")] - ResponseQueued(ResponseQueued), - /// Output item added - #[serde(rename = "response.output_item.added")] - ResponseOutputItemAdded(ResponseOutputItemAdded), - /// Content part added - #[serde(rename = "response.content_part.added")] - ResponseContentPartAdded(ResponseContentPartAdded), - /// Text delta update - #[serde(rename = "response.output_text.delta")] - ResponseOutputTextDelta(ResponseOutputTextDelta), - /// Text output completed - #[serde(rename = "response.output_text.done")] - ResponseOutputTextDone(ResponseOutputTextDone), - /// Refusal delta update - #[serde(rename = "response.refusal.delta")] - ResponseRefusalDelta(ResponseRefusalDelta), - /// Refusal completed - #[serde(rename = "response.refusal.done")] - ResponseRefusalDone(ResponseRefusalDone), - /// Content part completed - #[serde(rename = "response.content_part.done")] - ResponseContentPartDone(ResponseContentPartDone), - /// Output item completed - #[serde(rename = "response.output_item.done")] - ResponseOutputItemDone(ResponseOutputItemDone), - /// Function call arguments delta - #[serde(rename = "response.function_call_arguments.delta")] - ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDelta), - /// Function call arguments completed - #[serde(rename = "response.function_call_arguments.done")] - ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDone), - /// File search call in progress - #[serde(rename = "response.file_search_call.in_progress")] - ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgress), - /// File search call searching - #[serde(rename = "response.file_search_call.searching")] - ResponseFileSearchCallSearching(ResponseFileSearchCallSearching), - /// File search call completed - #[serde(rename = "response.file_search_call.completed")] - ResponseFileSearchCallCompleted(ResponseFileSearchCallCompleted), - /// Web search call in progress - #[serde(rename = "response.web_search_call.in_progress")] - ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgress), - /// Web search call searching - #[serde(rename = "response.web_search_call.searching")] - ResponseWebSearchCallSearching(ResponseWebSearchCallSearching), - /// Web search call completed - #[serde(rename = "response.web_search_call.completed")] - ResponseWebSearchCallCompleted(ResponseWebSearchCallCompleted), - /// Reasoning summary part added - #[serde(rename = "response.reasoning_summary_part.added")] - ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAdded), - /// Reasoning summary part done - #[serde(rename = "response.reasoning_summary_part.done")] - ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDone), - /// Reasoning summary text delta - #[serde(rename = "response.reasoning_summary_text.delta")] - ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDelta), - /// Reasoning summary text done - #[serde(rename = "response.reasoning_summary_text.done")] - ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDone), - /// Reasoning summary delta - #[serde(rename = "response.reasoning_summary.delta")] - ResponseReasoningSummaryDelta(ResponseReasoningSummaryDelta), - /// Reasoning summary done - #[serde(rename = "response.reasoning_summary.done")] - ResponseReasoningSummaryDone(ResponseReasoningSummaryDone), - /// Image generation call in progress - #[serde(rename = "response.image_generation_call.in_progress")] - ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgress), - /// Image generation call generating - #[serde(rename = "response.image_generation_call.generating")] - ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGenerating), - /// Image generation call partial image - #[serde(rename = "response.image_generation_call.partial_image")] - ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImage), - /// Image generation call completed - #[serde(rename = "response.image_generation_call.completed")] - ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompleted), - /// MCP call arguments delta - #[serde(rename = "response.mcp_call_arguments.delta")] - ResponseMcpCallArgumentsDelta(ResponseMcpCallArgumentsDelta), - /// MCP call arguments done - #[serde(rename = "response.mcp_call_arguments.done")] - ResponseMcpCallArgumentsDone(ResponseMcpCallArgumentsDone), - /// MCP call completed - #[serde(rename = "response.mcp_call.completed")] - ResponseMcpCallCompleted(ResponseMcpCallCompleted), - /// MCP call failed - #[serde(rename = "response.mcp_call.failed")] - ResponseMcpCallFailed(ResponseMcpCallFailed), - /// MCP call in progress - #[serde(rename = "response.mcp_call.in_progress")] - ResponseMcpCallInProgress(ResponseMcpCallInProgress), - /// MCP list tools completed - #[serde(rename = "response.mcp_list_tools.completed")] - ResponseMcpListToolsCompleted(ResponseMcpListToolsCompleted), - /// MCP list tools failed - #[serde(rename = "response.mcp_list_tools.failed")] - ResponseMcpListToolsFailed(ResponseMcpListToolsFailed), - /// MCP list tools in progress - #[serde(rename = "response.mcp_list_tools.in_progress")] - ResponseMcpListToolsInProgress(ResponseMcpListToolsInProgress), - /// Code interpreter call in progress - #[serde(rename = "response.code_interpreter_call.in_progress")] - ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgress), - /// Code interpreter call interpreting - #[serde(rename = "response.code_interpreter_call.interpreting")] - ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpreting), - /// Code interpreter call completed - #[serde(rename = "response.code_interpreter_call.completed")] - ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompleted), - /// Code interpreter call code delta - #[serde(rename = "response.code_interpreter_call_code.delta")] - ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDelta), - /// Code interpreter call code done - #[serde(rename = "response.code_interpreter_call_code.done")] - ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDone), - /// Output text annotation added - #[serde(rename = "response.output_text.annotation.added")] - ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAdded), - /// Error occurred - #[serde(rename = "error")] - ResponseError(ResponseError), - - /// Unknown event type - #[serde(untagged)] - Unknown(serde_json::Value), -} - -/// Stream of response events -pub type ResponseStream = Pin> + Send>>; - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCreated { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseInProgress { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputItemAdded { - pub sequence_number: u64, - pub output_index: u32, - pub item: OutputItem, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseContentPartAdded { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub part: ContentPart, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputTextDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub delta: String, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub logprobs: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseContentPartDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub part: ContentPart, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputItemDone { - pub sequence_number: u64, - pub output_index: u32, - pub item: OutputItem, -} - -/// Response completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCompleted { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Response failed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFailed { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Response incomplete event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseIncomplete { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Response queued event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseQueued { - pub sequence_number: u64, - pub response: ResponseMetadata, -} - -/// Text output completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputTextDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub text: String, - pub logprobs: Option>, -} - -/// Refusal delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseRefusalDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub delta: String, -} - -/// Refusal done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseRefusalDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub refusal: String, -} - -/// Function call arguments delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFunctionCallArgumentsDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub delta: String, -} - -/// Function call arguments done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFunctionCallArgumentsDone { - pub name: String, - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub arguments: String, -} - -/// Error event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseError { - pub sequence_number: u64, - pub code: Option, - pub message: String, - pub param: Option, -} - -/// File search call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFileSearchCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// File search call searching event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFileSearchCallSearching { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// File search call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseFileSearchCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Web search call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseWebSearchCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Web search call searching event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseWebSearchCallSearching { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Web search call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseWebSearchCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Reasoning summary part added event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryPartAdded { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub part: serde_json::Value, // Could be more specific but using Value for flexibility -} - -/// Reasoning summary part done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryPartDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub part: serde_json::Value, -} - -/// Reasoning summary text delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryTextDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub delta: String, -} - -/// Reasoning summary text done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryTextDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub text: String, -} - -/// Reasoning summary delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryDelta { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub delta: serde_json::Value, -} - -/// Reasoning summary done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseReasoningSummaryDone { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub summary_index: u32, - pub text: String, -} - -/// Image generation call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Image generation call generating event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallGenerating { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Image generation call partial image event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallPartialImage { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub partial_image_index: u32, - pub partial_image_b64: String, -} - -/// Image generation call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseImageGenerationCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP call arguments delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallArgumentsDelta { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub delta: String, -} - -/// MCP call arguments done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallArgumentsDone { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub arguments: String, -} - -/// MCP call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP call failed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallFailed { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP list tools completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpListToolsCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP list tools failed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpListToolsFailed { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// MCP list tools in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMcpListToolsInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call in progress event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallInProgress { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call interpreting event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallInterpreting { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call completed event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallCompleted { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, -} - -/// Code interpreter call code delta event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallCodeDelta { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub delta: String, -} - -/// Code interpreter call code done event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseCodeInterpreterCallCodeDone { - pub sequence_number: u64, - pub output_index: u32, - pub item_id: String, - pub code: String, -} - -/// Response metadata -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseMetadata { - pub id: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub object: Option, - pub created_at: u64, - pub status: Status, - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub usage: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub error: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub incomplete_details: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub input: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub instructions: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub max_output_tokens: Option, - /// Whether the model was run in background mode - #[serde(skip_serializing_if = "Option::is_none")] - pub background: Option, - /// The service tier that was actually used - #[serde(skip_serializing_if = "Option::is_none")] - pub service_tier: Option, - /// The effective value of top_logprobs parameter - #[serde(skip_serializing_if = "Option::is_none")] - pub top_logprobs: Option, - /// The effective value of max_tool_calls parameter - #[serde(skip_serializing_if = "Option::is_none")] - pub max_tool_calls: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub output: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub parallel_tool_calls: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub previous_response_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub reasoning: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub store: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub temperature: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub text: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_choice: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub top_p: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub truncation: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub metadata: Option>, - /// Prompt cache key for improved performance - #[serde(skip_serializing_if = "Option::is_none")] - pub prompt_cache_key: Option, - /// Safety identifier for content filtering - #[serde(skip_serializing_if = "Option::is_none")] - pub safety_identifier: Option, -} - /// Output item #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type")] @@ -3091,46 +2422,6 @@ pub struct CustomToolCall { pub id: String, } -/// Content part -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ContentPart { - #[serde(rename = "type")] - pub part_type: String, - pub text: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub annotations: Option>, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub logprobs: Option>, -} - -// ===== RESPONSE COLLECTOR ===== - -/// Collects streaming response events into a complete response - -/// Output text annotation added event -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct ResponseOutputTextAnnotationAdded { - pub sequence_number: u64, - pub item_id: String, - pub output_index: u32, - pub content_index: u32, - pub annotation_index: u32, - pub annotation: TextAnnotation, -} - -/// Text annotation object for output text -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[non_exhaustive] -pub struct TextAnnotation { - #[serde(rename = "type")] - pub annotation_type: String, - pub text: String, - pub start: u32, - pub end: u32, -} - #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct DeleteResponse { pub object: String, diff --git a/async-openai/src/types/responses/response_stream.rs b/async-openai/src/types/responses/response_stream.rs new file mode 100644 index 00000000..c6478cae --- /dev/null +++ b/async-openai/src/types/responses/response_stream.rs @@ -0,0 +1,550 @@ +use futures::Stream; +use serde::{Deserialize, Serialize}; +use std::pin::Pin; + +use crate::{ + error::OpenAIError, + types::responses::{OutputContent, OutputItem, Response, ResponseLogProb, Summary}, +}; + +/// Stream of response events +pub type ResponseStream = + Pin> + Send>>; + +/// Event types for streaming responses from the Responses API +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type")] +pub enum ResponseStreamEvent { + /// An event that is emitted when a response is created. + #[serde(rename = "response.created")] + ResponseCreated(ResponseCreatedEvent), + /// Emitted when the response is in progress. + #[serde(rename = "response.in_progress")] + ResponseInProgress(ResponseInProgressEvent), + /// Emitted when the model response is complete. + #[serde(rename = "response.completed")] + ResponseCompleted(ResponseCompletedEvent), + /// An event that is emitted when a response fails. + #[serde(rename = "response.failed")] + ResponseFailed(ResponseFailedEvent), + /// An event that is emitted when a response finishes as incomplete. + #[serde(rename = "response.incomplete")] + ResponseIncomplete(ResponseIncompleteEvent), + /// Emitted when a new output item is added. + #[serde(rename = "response.output_item.added")] + ResponseOutputItemAdded(ResponseOutputItemAddedEvent), + /// Emitted when an output item is marked done. + #[serde(rename = "response.output_item.done")] + ResponseOutputItemDone(ResponseOutputItemDoneEvent), + /// Emitted when a new content part is added. + #[serde(rename = "response.content_part.added")] + ResponseContentPartAdded(ResponseContentPartAddedEvent), + /// Emitted when a content part is done. + #[serde(rename = "response.content_part.done")] + ResponseContentPartDone(ResponseContentPartDoneEvent), + /// Emitted when there is an additional text delta. + #[serde(rename = "response.output_text.delta")] + ResponseOutputTextDelta(ResponseOutputTextDeltaEvent), + /// Emitted when text content is finalized. + #[serde(rename = "response.output_text.done")] + ResponseOutputTextDone(ResponseOutputTextDoneEvent), + /// Emitted when there is a partial refusal text. + #[serde(rename = "response.refusal.delta")] + ResponseRefusalDelta(ResponseRefusalDeltaEvent), + #[serde(rename = "response.refusal.done")] + /// Emitted when refusal text is finalized. + ResponseRefusalDone(ResponseRefusalDoneEvent), + /// Emitted when there is a partial function-call arguments delta. + #[serde(rename = "response.function_call_arguments.delta")] + ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent), + /// Emitted when function-call arguments are finalized. + #[serde(rename = "response.function_call_arguments.done")] + ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent), + /// Emitted when a file search call is initiated. + #[serde(rename = "response.file_search_call.in_progress")] + ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgressEvent), + /// Emitted when a file search is currently searching. + #[serde(rename = "response.file_search_call.searching")] + ResponseFileSearchCallSearching(ResponseFileSearchCallSearchingEvent), + /// Emitted when a file search call is completed (results found). + #[serde(rename = "response.file_search_call.completed")] + ResponseFileSearchCallCompleted(ResponseFileSearchCallCompletedEvent), + /// Emitted when a web search call is initiated. + #[serde(rename = "response.web_search_call.in_progress")] + ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgressEvent), + /// Emitted when a web search call is executing. + #[serde(rename = "response.web_search_call.searching")] + ResponseWebSearchCallSearching(ResponseWebSearchCallSearchingEvent), + /// Emitted when a web search call is completed. + #[serde(rename = "response.web_search_call.completed")] + ResponseWebSearchCallCompleted(ResponseWebSearchCallCompletedEvent), + /// Emitted when a new reasoning summary part is added. + #[serde(rename = "response.reasoning_summary_part.added")] + ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAddedEvent), + /// Emitted when a reasoning summary part is completed. + #[serde(rename = "response.reasoning_summary_part.done")] + ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDoneEvent), + /// Emitted when a delta is added to a reasoning summary text. + #[serde(rename = "response.reasoning_summary_text.delta")] + ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDeltaEvent), + /// Emitted when a reasoning summary text is completed. + #[serde(rename = "response.reasoning_summary_text.done")] + ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDoneEvent), + /// Emitted when a delta is added to a reasoning text. + #[serde(rename = "response.reasoning_text.delta")] + ResponseReasoningTextDelta(ResponseReasoningTextDeltaEvent), + /// Emitted when a reasoning text is completed. + #[serde(rename = "response.reasoning_text.done")] + ResponseReasoningTextDone(ResponseReasoningTextDoneEvent), + /// Emitted when an image generation tool call has completed and the final image is available. + #[serde(rename = "response.image_generation_call.completed")] + ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompletedEvent), + /// Emitted when an image generation tool call is actively generating an image (intermediate state). + #[serde(rename = "response.image_generation_call.generating")] + ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGeneratingEvent), + /// Emitted when an image generation tool call is in progress. + #[serde(rename = "response.image_generation_call.in_progress")] + ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgressEvent), + /// Emitted when a partial image is available during image generation streaming. + #[serde(rename = "response.image_generation_call.partial_image")] + ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImageEvent), + /// Emitted when there is a delta (partial update) to the arguments of an MCP tool call. + #[serde(rename = "response.mcp_call_arguments.delta")] + ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent), + /// Emitted when the arguments for an MCP tool call are finalized. + #[serde(rename = "response.mcp_call_arguments.done")] + ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent), + /// Emitted when an MCP tool call has completed successfully. + #[serde(rename = "response.mcp_call.completed")] + ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent), + /// Emitted when an MCP tool call has failed. + #[serde(rename = "response.mcp_call.failed")] + ResponseMCPCallFailed(ResponseMCPCallFailedEvent), + /// Emitted when an MCP tool call is in progress. + #[serde(rename = "response.mcp_call.in_progress")] + ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent), + /// Emitted when the list of available MCP tools has been successfully retrieved. + #[serde(rename = "response.mcp_list_tools.completed")] + ResponseMCPListToolsCompleted(ResponseMCPListToolsCompletedEvent), + /// Emitted when the attempt to list available MCP tools has failed. + #[serde(rename = "response.mcp_list_tools.failed")] + ResponseMCPListToolsFailed(ResponseMCPListToolsFailedEvent), + /// Emitted when the system is in the process of retrieving the list of available MCP tools. + #[serde(rename = "response.mcp_list_tools.in_progress")] + ResponseMCPListToolsInProgress(ResponseMCPListToolsInProgressEvent), + /// Emitted when a code interpreter call is in progress. + #[serde(rename = "response.code_interpreter_call.in_progress")] + ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgressEvent), + /// Emitted when the code interpreter is actively interpreting the code snippet. + #[serde(rename = "response.code_interpreter_call.interpreting")] + ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpretingEvent), + /// Emitted when the code interpreter call is completed. + #[serde(rename = "response.code_interpreter_call.completed")] + ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompletedEvent), + /// Emitted when a partial code snippet is streamed by the code interpreter. + #[serde(rename = "response.code_interpreter_call_code.delta")] + ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDeltaEvent), + /// Emitted when the code snippet is finalized by the code interpreter. + #[serde(rename = "response.code_interpreter_call_code.done")] + ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDoneEvent), + /// Emitted when an annotation is added to output text content. + #[serde(rename = "response.output_text.annotation.added")] + ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAddedEvent), + /// Emitted when a response is queued and waiting to be processed. + #[serde(rename = "response.queued")] + ResponseQueued(ResponseQueuedEvent), + /// Event representing a delta (partial update) to the input of a custom tool call. + #[serde(rename = "response.custom_tool_call_input.delta")] + ResponseCustomToolCallInputDelta(ResponseCustomToolCallInputDeltaEvent), + /// Event indicating that input for a custom tool call is complete. + #[serde(rename = "response.custom_tool_call_input.done")] + ResponseCustomToolCallInputDone(ResponseCustomToolCallInputDoneEvent), + /// Emitted when an error occurs. + #[serde(rename = "error")] + ResponseError(ResponseErrorEvent), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCreatedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseInProgressEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCompletedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFailedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseIncompleteEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputItemAddedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item: OutputItem, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputItemDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item: OutputItem, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseContentPartAddedEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub part: OutputContent, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseContentPartDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub part: OutputContent, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputTextDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub delta: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub logprobs: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputTextDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub text: String, + pub logprobs: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseRefusalDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseRefusalDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub refusal: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFunctionCallArgumentsDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFunctionCallArgumentsDoneEvent { + pub name: String, + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub arguments: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFileSearchCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFileSearchCallSearchingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseFileSearchCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseWebSearchCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseWebSearchCallSearchingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseWebSearchCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum SummaryPart { + SummaryText(Summary), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryPartAddedEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub part: SummaryPart, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryPartDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub part: SummaryPart, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryTextDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningSummaryTextDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub summary_index: u32, + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningTextDeltaEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseReasoningTextDoneEvent { + pub sequence_number: u64, + pub item_id: String, + pub output_index: u32, + pub content_index: u32, + pub text: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenerationCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenerationCallGeneratingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenerationCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseImageGenerationCallPartialImageEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub partial_image_index: u32, + pub partial_image_b64: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallArgumentsDeltaEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallArgumentsDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub arguments: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallFailedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPListToolsCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPListToolsFailedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseMCPListToolsInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallInProgressEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallInterpretingEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallCompletedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallCodeDeltaEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCodeInterpreterCallCodeDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub code: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseOutputTextAnnotationAddedEvent { + pub sequence_number: u64, + pub output_index: u32, + pub content_index: u32, + pub annotation_index: u32, + pub item_id: String, + pub annotation: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseQueuedEvent { + pub sequence_number: u64, + pub response: Response, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCustomToolCallInputDeltaEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub delta: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseCustomToolCallInputDoneEvent { + pub sequence_number: u64, + pub output_index: u32, + pub item_id: String, + pub input: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ResponseErrorEvent { + pub sequence_number: u64, + pub code: Option, + pub message: String, + pub param: Option, +} diff --git a/async-openai/src/types/responses/responses_stream.rs b/async-openai/src/types/responses/responses_stream.rs deleted file mode 100644 index e69de29b..00000000 From 7da4798043643f710c744499d961c5e79387b214 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 09:54:44 -0800 Subject: [PATCH 20/42] fix compilation --- async-openai/src/types/impls.rs | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/async-openai/src/types/impls.rs b/async-openai/src/types/impls.rs index 3689dbba..972c6043 100644 --- a/async-openai/src/types/impls.rs +++ b/async-openai/src/types/impls.rs @@ -14,7 +14,7 @@ use crate::{ use bytes::Bytes; use super::{ - responses::{CodeInterpreterContainer, EasyInputContent, Input, Role as ResponsesRole}, + responses::{EasyInputContent, Role as ResponsesRole}, AddUploadPartRequest, AudioInput, AudioResponseFormat, ChatCompletionFunctionCall, ChatCompletionFunctions, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestDeveloperMessage, @@ -1047,30 +1047,12 @@ impl AsyncTryFrom for reqwest::multipart::Form { // end: types to multipart form -impl Default for Input { - fn default() -> Self { - Self::Text("".to_string()) - } -} - impl Default for EasyInputContent { fn default() -> Self { Self::Text("".to_string()) } } -impl From for Input { - fn from(value: String) -> Self { - Input::Text(value) - } -} - -impl From<&str> for Input { - fn from(value: &str) -> Self { - Input::Text(value.to_owned()) - } -} - impl Default for ResponsesRole { fn default() -> Self { Self::User @@ -1088,9 +1070,3 @@ impl From<&str> for EasyInputContent { Self::Text(value.to_owned()) } } - -impl Default for CodeInterpreterContainer { - fn default() -> Self { - CodeInterpreterContainer::Id("".to_string()) - } -} From 569e59577d2754c2fde6ebcb9b5f06f1a8d72e2b Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 10:06:46 -0800 Subject: [PATCH 21/42] compiling example/responses --- examples/responses/src/main.rs | 35 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/examples/responses/src/main.rs b/examples/responses/src/main.rs index 47395185..792382d0 100644 --- a/examples/responses/src/main.rs +++ b/examples/responses/src/main.rs @@ -1,11 +1,13 @@ use std::error::Error; use async_openai::{ - types::responses::{ - AllowedTools, CreateResponseArgs, Input, InputItem, InputMessageArgs, McpArgs, - RequireApproval, RequireApprovalPolicy, Role, TextConfig, - ToolDefinition::{Mcp, WebSearchPreview}, - Verbosity, WebSearchPreviewArgs, + types::{ + responses::{ + CreateResponseArgs, EasyInputContent, EasyInputMessage, InputItem, InputParam, + MessageType, ResponseTextParam, Role, TextResponseFormatConfiguration, Tool, Verbosity, + WebSearchToolArgs, + }, + MCPToolAllowedTools, MCPToolApprovalSetting, MCPToolArgs, MCPToolRequireApproval, }, Client, }; @@ -17,23 +19,24 @@ async fn main() -> Result<(), Box> { let request = CreateResponseArgs::default() .max_output_tokens(512u32) .model("gpt-4.1") - .text(TextConfig { - format: async_openai::types::responses::TextResponseFormat::Text, + .text(ResponseTextParam { + format: TextResponseFormatConfiguration::Text, verbosity: Some(Verbosity::Medium), // only here to test the config, but gpt-4.1 only supports medium }) - .input(Input::Items(vec![InputItem::Message( - InputMessageArgs::default() - .role(Role::User) - .content("What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?") - .build()?, + .input(InputParam::Items(vec![InputItem::EasyMessage( + EasyInputMessage { + r#type: MessageType::Message, + role: Role::User, + content: EasyInputContent::Text("What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?".to_string()), + } )])) .tools(vec![ - WebSearchPreview(WebSearchPreviewArgs::default().build()?), - Mcp(McpArgs::default() + Tool::WebSearchPreview(WebSearchToolArgs::default().build()?), + Tool::Mcp(MCPToolArgs::default() .server_label("deepwiki") .server_url("https://mcp.deepwiki.com/mcp") - .require_approval(RequireApproval::Policy(RequireApprovalPolicy::Never)) - .allowed_tools(AllowedTools::List(vec!["ask_question".to_string()])) + .require_approval(MCPToolRequireApproval::ApprovalSetting(MCPToolApprovalSetting::Never)) + .allowed_tools(MCPToolAllowedTools::List(vec!["ask_question".to_string()])) .build()?), ]) .build()?; From 6db509fc6b969623559e94e8e2c74322ec3a4bc5 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 11:21:16 -0800 Subject: [PATCH 22/42] fix types --- async-openai/src/types/responses/response.rs | 22 +++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs index 73d30ab6..98cae0b3 100644 --- a/async-openai/src/types/responses/response.rs +++ b/async-openai/src/types/responses/response.rs @@ -419,9 +419,9 @@ pub struct InputMessage { /// The status of the item. One of `in_progress`, `completed`, or `incomplete`. /// Populated when items are returned via API. #[serde(skip_serializing_if = "Option::is_none")] - pub status: Option, // TODO rename OutputStatus to ItemStatus maybe? - /// The type of the message input. Always set to `message`. - pub r#type: MessageType, + pub status: Option, + /////The type of the message input. Always set to `message`. + //pub r#type: MessageType, } /// The role for an input message - can only be `user`, `system`, or `developer`. @@ -813,6 +813,11 @@ pub enum Truncation { Disabled, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Billing { + pub payer: String, +} + /// o-series reasoning settings. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[builder( @@ -1531,7 +1536,7 @@ pub struct ResponseLogProb { pub struct OutputTextContent { /// The annotations of the text output. pub annotations: Vec, - pub logprobs: Option, + pub logprobs: Option>, /// The text output from the model. pub text: String, } @@ -1612,8 +1617,8 @@ pub struct OutputMessage { /// The status of the message input. One of `in_progress`, `completed`, or /// `incomplete`. Populated when input items are returned via API. pub status: OutputStatus, - /// The type of the output message. Always `message`. - pub r#type: MessageType, + ///// The type of the output message. Always `message`. + //pub r#type: MessageType, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] @@ -2173,6 +2178,10 @@ pub struct Response { #[serde(skip_serializing_if = "Option::is_none")] pub background: Option, + /// Billing information for the response. + #[serde(skip_serializing_if = "Option::is_none")] + pub billing: Option, + /// The conversation that this response belongs to. Input items and output /// items from this response are automatically added to this conversation. #[serde(skip_serializing_if = "Option::is_none")] @@ -2368,7 +2377,6 @@ pub enum Status { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(tag = "type")] #[serde(rename_all = "snake_case")] -#[non_exhaustive] pub enum OutputItem { /// An output message from the model. Message(OutputMessage), From 0f0bfa13dffb8c793f3dc42ed8f63e04e80da962 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 11:29:58 -0800 Subject: [PATCH 23/42] fix examples/responses-function-call --- examples/responses-function-call/src/main.rs | 107 ++++++++++--------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/examples/responses-function-call/src/main.rs b/examples/responses-function-call/src/main.rs index 3e2083e8..0dcfc3e2 100644 --- a/examples/responses-function-call/src/main.rs +++ b/examples/responses-function-call/src/main.rs @@ -1,7 +1,8 @@ use async_openai::{ types::responses::{ - CreateResponseArgs, FunctionArgs, FunctionCall, Input, InputItem, InputMessageArgs, - OutputContent, Role, ToolDefinition, + CreateResponseArgs, EasyInputContent, EasyInputMessage, FunctionCallOutput, + FunctionCallOutputItemParam, FunctionTool, FunctionToolCall, InputItem, InputParam, Item, + MessageType, OutputItem, Role, Tool, }, Client, }; @@ -22,48 +23,46 @@ fn check_weather(location: String, units: String) -> String { async fn main() -> Result<(), Box> { let client = Client::new(); - let tools = vec![ToolDefinition::Function( - FunctionArgs::default() - .name("get_weather") - .description("Retrieves current weather for the given location") - .parameters(serde_json::json!( - { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "City and country e.g. Bogotá, Colombia" - }, - "units": { - "type": "string", - "enum": [ - "celsius", - "fahrenheit" - ], - "description": "Units the temperature will be returned in." - } + let tools = vec![Tool::Function(FunctionTool { + name: "get_weather".to_string(), + description: Some("Retrieves current weather for the given location".to_string()), + parameters: Some(serde_json::json!( + { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Bogotá, Colombia" }, - "required": [ - "location", - "units" - ], - "additionalProperties": false - } - )) - .build()?, - )]; - - let mut input_messages = vec![InputItem::Message( - InputMessageArgs::default() - .role(Role::User) - .content("What's the weather like in Paris today?") - .build()?, - )]; + "units": { + "type": "string", + "enum": [ + "celsius", + "fahrenheit" + ], + "description": "Units the temperature will be returned in." + } + }, + "required": [ + "location", + "units" + ], + "additionalProperties": false + } + )), + strict: None, + })]; + + let mut input_messages = vec![InputItem::EasyMessage(EasyInputMessage { + r#type: MessageType::Message, + role: Role::User, + content: EasyInputContent::Text("What's the weather like in Paris today?".to_string()), + })]; let request = CreateResponseArgs::default() .max_output_tokens(512u32) .model("gpt-4.1") - .input(Input::Items(input_messages.clone())) + .input(InputParam::Items(input_messages.clone())) .tools(tools.clone()) .build()?; @@ -72,9 +71,9 @@ async fn main() -> Result<(), Box> { let response = client.responses().create(request).await?; // the model might ask for us to do a function call - let function_call_request: Option = - response.output.into_iter().find_map(|output_content| { - if let OutputContent::FunctionCall(inner) = output_content { + let function_call_request: Option = + response.output.into_iter().find_map(|output_item| { + if let OutputItem::FunctionCall(inner) = output_item { Some(inner) } else { None @@ -97,19 +96,25 @@ async fn main() -> Result<(), Box> { } }; - input_messages.push(InputItem::Custom(serde_json::to_value( - &OutputContent::FunctionCall(function_call_request.clone()), - )?)); - input_messages.push(InputItem::Custom(serde_json::json!({ - "type": "function_call_output", - "call_id": function_call_request.call_id, - "output": function_result, - }))); + // Add the function call from the assistant back to the conversation + input_messages.push(InputItem::Item(Item::FunctionCall( + function_call_request.clone(), + ))); + + // Add the function call output back to the conversation + input_messages.push(InputItem::Item(Item::FunctionCallOutput( + FunctionCallOutputItemParam { + call_id: function_call_request.call_id.clone(), + output: FunctionCallOutput::Text(function_result), + id: None, + status: None, + }, + ))); let request = CreateResponseArgs::default() .max_output_tokens(512u32) .model("gpt-4.1") - .input(Input::Items(input_messages)) + .input(InputParam::Items(input_messages)) .tools(tools) .build()?; From 86d5cf6cc2691e72225027b270d3248b7264fa18 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 11:34:17 -0800 Subject: [PATCH 24/42] fix examples/responses-stream --- examples/responses-stream/src/main.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/responses-stream/src/main.rs b/examples/responses-stream/src/main.rs index 27e8b14e..37be90c6 100644 --- a/examples/responses-stream/src/main.rs +++ b/examples/responses-stream/src/main.rs @@ -1,7 +1,8 @@ use async_openai::{ Client, types::responses::{ - CreateResponseArgs, Input, InputContent, InputItem, InputMessageArgs, ResponseEvent, Role, + CreateResponseArgs, EasyInputContent, EasyInputMessage, InputItem, InputParam, MessageType, + ResponseStreamEvent, Role, }, }; use futures::StreamExt; @@ -13,13 +14,12 @@ async fn main() -> Result<(), Box> { let request = CreateResponseArgs::default() .model("gpt-4.1") .stream(true) - .input(Input::Items(vec![InputItem::Message( - InputMessageArgs::default() - .role(Role::User) - .content(InputContent::TextInput( - "Write a haiku about programming.".to_string(), - )) - .build()?, + .input(InputParam::Items(vec![InputItem::EasyMessage( + EasyInputMessage { + r#type: MessageType::Message, + role: Role::User, + content: EasyInputContent::Text("Write a haiku about programming.".to_string()), + }, )])) .build()?; @@ -28,12 +28,12 @@ async fn main() -> Result<(), Box> { while let Some(result) = stream.next().await { match result { Ok(response_event) => match &response_event { - ResponseEvent::ResponseOutputTextDelta(delta) => { + ResponseStreamEvent::ResponseOutputTextDelta(delta) => { print!("{}", delta.delta); } - ResponseEvent::ResponseCompleted(_) - | ResponseEvent::ResponseIncomplete(_) - | ResponseEvent::ResponseFailed(_) => { + ResponseStreamEvent::ResponseCompleted(_) + | ResponseStreamEvent::ResponseIncomplete(_) + | ResponseStreamEvent::ResponseFailed(_) => { break; } _ => { From 342142762758313ed0795bb99aa6ecde9ca30e92 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 11:50:20 -0800 Subject: [PATCH 25/42] update it to RealtimeResponse to distinguish from Response --- async-openai/src/types/realtime/response_resource.rs | 2 +- async-openai/src/types/realtime/server_event.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index 2d6342d2..7999b784 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -183,7 +183,7 @@ pub struct ResponseCreate { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Response { +pub struct RealtimeResponse { /// Configuration for audio output. pub audio: Option, diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 6d16742c..3d73b742 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use crate::types::realtime::{Response, Session}; +use crate::types::realtime::{RealtimeResponse, Session}; use super::{ content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit, @@ -271,7 +271,7 @@ pub struct ResponseCreatedEvent { /// The unique ID of the server event. pub event_id: String, /// The response resource. - pub response: Response, + pub response: RealtimeResponse, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -279,7 +279,7 @@ pub struct ResponseDoneEvent { /// The unique ID of the server event. pub event_id: String, /// The response resource. - pub response: Response, + pub response: RealtimeResponse, } #[derive(Debug, Serialize, Deserialize, Clone)] From 7371c88010e063d79f1731b64f1d62393c718243 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 11:57:29 -0800 Subject: [PATCH 26/42] avoid name conflicts --- async-openai/src/types/realtime/response_resource.rs | 4 ++-- async-openai/src/types/realtime/server_event.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index 7999b784..137e562d 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -6,7 +6,7 @@ use crate::types::realtime::{ }; #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Usage { +pub struct RealtimeResponseUsage { /// Details about the input tokens used in the Response. Cached tokens are tokens from previous /// turns in the conversation that are included as context for the current response. Cached tokens /// here are counted as a subset of input tokens, meaning input tokens will include cached and @@ -232,5 +232,5 @@ pub struct RealtimeResponse { /// Usage statistics for the Response, this will correspond to billing. A Realtime API session /// will maintain a conversation context and append new Items to the Conversation, thus output /// from previous turns (text and audio tokens) will become the input for later turns. - pub usage: Option, + pub usage: Option, } diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 3d73b742..739ad9cc 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -137,7 +137,7 @@ pub struct LogProb { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputTokenDetails { +pub struct TokenUsageInputTokenDetails { /// Number of audio tokens billed for this request. pub audio_tokens: u32, /// Number of text tokens billed for this request. @@ -153,7 +153,7 @@ pub struct TokenUsage { /// Total number of tokens used (input + output). pub total_tokens: u32, /// Details about the input tokens billed for this request. - pub input_token_details: Option, + pub input_token_details: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -164,7 +164,7 @@ pub struct DurationUsage { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] -pub enum Usage { +pub enum TranscriptionUsage { #[serde(rename = "tokens")] TokenUsage(TokenUsage), #[serde(rename = "duration")] @@ -185,7 +185,7 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent { pub logprobs: Option>, /// Usage statistics for the transcription, this is billed according to the ASR model's pricing rather than /// the realtime model's pricing. - pub usage: Usage, + pub usage: TranscriptionUsage, } #[derive(Debug, Serialize, Deserialize, Clone)] From 95a2217017fde4ec5a8e6345c8da4a684224e5aa Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 12:45:09 -0800 Subject: [PATCH 27/42] update realtime types --- .../src/types/realtime/client_event.rs | 54 +++++++++---------- .../src/types/realtime/server_event.rs | 2 +- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs index 3a0ddf54..fe28dc47 100644 --- a/async-openai/src/types/realtime/client_event.rs +++ b/async-openai/src/types/realtime/client_event.rs @@ -128,7 +128,7 @@ pub struct OutputAudioBufferClearEvent { /// These are events that the OpenAI Realtime WebSocket server will accept from the client. #[derive(Debug, Serialize, Deserialize)] #[serde(tag = "type")] -pub enum ClientEvent { +pub enum RealtimeClientEvent { /// Send this event to update the session's configuration. The client may send this event at any time to update any field /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet. /// @@ -234,14 +234,14 @@ pub enum ClientEvent { OutputAudioBufferClear(OutputAudioBufferClearEvent), } -impl From<&ClientEvent> for String { - fn from(value: &ClientEvent) -> Self { +impl From<&RealtimeClientEvent> for String { + fn from(value: &RealtimeClientEvent) -> Self { serde_json::to_string(value).unwrap() } } -impl From for Message { - fn from(value: ClientEvent) -> Self { +impl From for Message { + fn from(value: RealtimeClientEvent) -> Self { Message::Text(String::from(&value).into()) } } @@ -266,61 +266,61 @@ macro_rules! event_from { }; } -event_from!(SessionUpdateEvent, ClientEvent, SessionUpdate); +event_from!(SessionUpdateEvent, RealtimeClientEvent, SessionUpdate); event_from!( InputAudioBufferAppendEvent, - ClientEvent, + RealtimeClientEvent, InputAudioBufferAppend ); event_from!( InputAudioBufferCommitEvent, - ClientEvent, + RealtimeClientEvent, InputAudioBufferCommit ); event_from!( InputAudioBufferClearEvent, - ClientEvent, + RealtimeClientEvent, InputAudioBufferClear ); event_from!( ConversationItemCreateEvent, - ClientEvent, + RealtimeClientEvent, ConversationItemCreate ); event_from!( ConversationItemTruncateEvent, - ClientEvent, + RealtimeClientEvent, ConversationItemTruncate ); event_from!( ConversationItemDeleteEvent, - ClientEvent, + RealtimeClientEvent, ConversationItemDelete ); event_from!( ConversationItemRetrieveEvent, - ClientEvent, + RealtimeClientEvent, ConversationItemRetrieve ); -event_from!(ResponseCreateEvent, ClientEvent, ResponseCreate); -event_from!(ResponseCancelEvent, ClientEvent, ResponseCancel); +event_from!(ResponseCreateEvent, RealtimeClientEvent, ResponseCreate); +event_from!(ResponseCancelEvent, RealtimeClientEvent, ResponseCancel); event_from!( OutputAudioBufferClearEvent, - ClientEvent, + RealtimeClientEvent, OutputAudioBufferClear ); -message_from_event!(SessionUpdateEvent, ClientEvent); -message_from_event!(InputAudioBufferAppendEvent, ClientEvent); -message_from_event!(InputAudioBufferCommitEvent, ClientEvent); -message_from_event!(InputAudioBufferClearEvent, ClientEvent); -message_from_event!(ConversationItemCreateEvent, ClientEvent); -message_from_event!(ConversationItemTruncateEvent, ClientEvent); -message_from_event!(ConversationItemDeleteEvent, ClientEvent); -message_from_event!(ConversationItemRetrieveEvent, ClientEvent); -message_from_event!(ResponseCreateEvent, ClientEvent); -message_from_event!(ResponseCancelEvent, ClientEvent); -message_from_event!(OutputAudioBufferClearEvent, ClientEvent); +message_from_event!(SessionUpdateEvent, RealtimeClientEvent); +message_from_event!(InputAudioBufferAppendEvent, RealtimeClientEvent); +message_from_event!(InputAudioBufferCommitEvent, RealtimeClientEvent); +message_from_event!(InputAudioBufferClearEvent, RealtimeClientEvent); +message_from_event!(ConversationItemCreateEvent, RealtimeClientEvent); +message_from_event!(ConversationItemTruncateEvent, RealtimeClientEvent); +message_from_event!(ConversationItemDeleteEvent, RealtimeClientEvent); +message_from_event!(ConversationItemRetrieveEvent, RealtimeClientEvent); +message_from_event!(ResponseCreateEvent, RealtimeClientEvent); +message_from_event!(ResponseCancelEvent, RealtimeClientEvent); +message_from_event!(OutputAudioBufferClearEvent, RealtimeClientEvent); impl From for ConversationItemCreateEvent { fn from(value: Item) -> Self { diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 739ad9cc..1fae8c4f 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -559,7 +559,7 @@ pub struct ResponseMCPCallFailedEvent { /// These are events emitted from the OpenAI Realtime WebSocket server to the client. #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] -pub enum ServerEvent { +pub enum RealtimeServerEvent { /// Returned when an error occurs, which could be a client problem or a server problem. /// Most errors are recoverable and the session will stay open, we recommend to /// implementors to monitor and log error messages by default. From 58557c00ef74a7ed5928ffb1d568c42a770c6006 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 12:45:23 -0800 Subject: [PATCH 28/42] update realtime example --- examples/realtime/src/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs index 11a87329..4f79b656 100644 --- a/examples/realtime/src/main.rs +++ b/examples/realtime/src/main.rs @@ -1,7 +1,7 @@ use std::process::exit; use async_openai::types::realtime::{ - ConversationItemCreateEvent, Item, Message as RealtimeMessage, ResponseCreateEvent, ServerEvent, + ConversationItemCreateEvent, Item, RealtimeServerEvent, ResponseCreateEvent, }; use futures_util::{future, pin_mut, StreamExt}; @@ -43,7 +43,7 @@ async fn main() { match message { Message::Text(_) => { let data = message.clone().into_data(); - let server_event: Result = + let server_event: Result = serde_json::from_slice(&data); match server_event { Ok(server_event) => { @@ -53,10 +53,10 @@ async fn main() { eprint!("{:32} | ", event_type.as_str().unwrap()); match server_event { - ServerEvent::ResponseOutputItemDone(event) => { + RealtimeServerEvent::ResponseOutputItemDone(event) => { eprint!("{event:?}"); } - ServerEvent::Error(e) => { + RealtimeServerEvent::Error(e) => { eprint!("{e:?}"); } _ => {} From ce11c0532025fe64ac33ebc3d66e94894c5dd33a Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 13:00:36 -0800 Subject: [PATCH 29/42] update names --- async-openai/src/types/realtime/session_resource.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs index ba0f12cf..97f42bbf 100644 --- a/async-openai/src/types/realtime/session_resource.rs +++ b/async-openai/src/types/realtime/session_resource.rs @@ -331,10 +331,10 @@ pub struct TokenLimits { pub enum Session { /// The type of session to create. Always `realtime` for the Realtime API. #[serde(rename = "realtime")] - RealtimeSessionConfiguration(RealtimeSession), + RealtimeSession(RealtimeSession), /// The type of session to create. Always `transcription` for transcription sessions. #[serde(rename = "transcription")] - TranscriptionSessionConfiguration(TranscriptionSession), + RealtimeTranscriptionSession(RealtimeTranscriptionSession), } /// Realtime session object configuration. @@ -438,7 +438,7 @@ pub struct TranscriptionAudio { /// Realtime transcription session object configuration. #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TranscriptionSession { +pub struct RealtimeTranscriptionSession { /// Configuration for input and output audio. pub audio: TranscriptionAudio, From 5f3dbed9f5ae78cd84e5882b4d82313b711d001c Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 14:36:49 -0800 Subject: [PATCH 30/42] updated realtime spec --- .../src/types/realtime/client_event.rs | 4 +- .../src/types/realtime/response_resource.rs | 15 ++-- .../src/types/realtime/session_resource.rs | 87 ++++++------------- 3 files changed, 38 insertions(+), 68 deletions(-) diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs index fe28dc47..c6edb1c0 100644 --- a/async-openai/src/types/realtime/client_event.rs +++ b/async-openai/src/types/realtime/client_event.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use tokio_tungstenite::tungstenite::Message; -use crate::types::realtime::{ResponseCreate, Session}; +use crate::types::realtime::{RealtimeResponseCreateParams, Session}; use super::item::Item; @@ -103,7 +103,7 @@ pub struct ResponseCreateEvent { pub event_id: Option, /// Create a new Realtime response with these parameters - pub response: Option, + pub response: Option, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index 137e562d..eb7040f5 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -1,8 +1,11 @@ use serde::{Deserialize, Serialize}; -use crate::types::realtime::{ - AudioFormat, Conversation, Item, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice, - ToolDefinition, +use crate::types::{ + realtime::{ + Conversation, Item, MaxOutputTokens, RealtimeAudioFormats, RealtimeTool, RealtimeVoice, + ToolChoice, + }, + responses::Prompt, }; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -104,7 +107,7 @@ pub struct ResponseStatusDetail { #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ResponseAudioOutput { /// The format of the output audio. - pub format: AudioFormat, + pub format: RealtimeAudioFormats, /// The voice the model uses to respond. Voice cannot be changed during the session once /// the model has responded with audio at least once. Current voice options are @@ -121,7 +124,7 @@ pub struct ResponseAudio { /// The response resource. #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseCreate { +pub struct RealtimeResponseCreateParams { /// Configuration for audio input and output. pub audio: ResponseAudio, @@ -179,7 +182,7 @@ pub struct ResponseCreate { /// Tools available to the model. #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, + pub tools: Option>, } #[derive(Debug, Serialize, Deserialize, Clone)] diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs index 97f42bbf..66780844 100644 --- a/async-openai/src/types/realtime/session_resource.rs +++ b/async-openai/src/types/realtime/session_resource.rs @@ -1,6 +1,9 @@ use serde::{Deserialize, Serialize}; -use crate::types::MCPTool; +use crate::types::{ + responses::{Prompt, ToolChoiceFunction, ToolChoiceMCP, ToolChoiceOptions}, + MCPTool, +}; #[derive(Debug, Default, Serialize, Deserialize, Clone)] pub struct AudioTranscription { @@ -23,7 +26,7 @@ pub struct AudioTranscription { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] -pub enum TurnDetection { +pub enum RealtimeTurnDetection { /// Server-side voice activity detection (VAD) which flips on when user speech is detected /// and off after a period of silence. #[serde(rename = "server_vad")] @@ -95,7 +98,7 @@ pub enum MaxOutputTokens { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct FunctionTool { +pub struct RealtimeFunctionTool { /// The name of the function. pub name: String, /// The description of the function, including guidance on when and how to call it, @@ -107,9 +110,9 @@ pub struct FunctionTool { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] -pub enum ToolDefinition { +pub enum RealtimeTool { #[serde(rename = "function")] - Function(FunctionTool), + Function(RealtimeFunctionTool), /// Give the model access to additional tools via remote Model Context Protocol (MCP) servers. /// [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). #[serde(rename = "mcp")] @@ -123,35 +126,15 @@ pub enum FunctionType { } #[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum Tool { +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ToolChoice { /// Use this option to force the model to call a specific function. - #[serde(rename = "function")] - Function { - /// The name of the function to call. - name: String, - }, + Function(ToolChoiceFunction), /// Use this option to force the model to call a specific tool on a remote MCP server. - #[serde(rename = "mcp")] - MCP { - /// The name of the tool to call on the server. - name: String, - /// The label of the MCP server to use. - server_label: String, - }, -} + Mcp(ToolChoiceMCP), -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum ToolChoice { - /// `auto` means the model can pick between generating a message or calling one or more tools. - Auto, - /// `none` means the model will not call any tool and instead generates a message. - None, - /// `required` means the model must call one or more tools. - Required, #[serde(untagged)] - Tool(Tool), + Mode(ToolChoiceOptions), } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -171,7 +154,7 @@ pub enum RealtimeVoice { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] -pub enum AudioFormat { +pub enum RealtimeAudioFormats { /// The PCM audio format. Only a 24kHz sample rate is supported. #[serde(rename = "audio/pcm")] PCMAudioFormat { @@ -195,13 +178,13 @@ pub struct G711ULAWAudioFormat { #[derive(Debug, Serialize, Deserialize, Clone)] pub struct AudioInput { /// The format of the input audio. - pub format: AudioFormat, + pub format: RealtimeAudioFormats, /// Configuration for input audio noise reduction. This can be set to null to turn off. /// Noise reduction filters audio added to the input audio buffer before it is sent to VAD /// and the model. Filtering the audio can improve VAD and turn detection accuracy /// (reducing false positives) and model performance by improving perception of the /// input audio. - pub noise_reduction: Option, + pub noise_reduction: Option, /// Configuration for input audio transcription, defaults to off and can be set to `null` to turn off once on. /// Input audio transcription is not native to the model, since the model consumes audio directly. /// Transcription runs asynchronously through [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) @@ -222,13 +205,13 @@ pub struct AudioInput { /// the model will score a low probability of turn end and wait longer for the user to /// continue speaking. This can be useful for more natural conversations, but may have a /// higher latency. - pub turn_detection: TurnDetection, + pub turn_detection: RealtimeTurnDetection, } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct AudioOutput { /// The format of the output audio. - pub format: AudioFormat, + pub format: RealtimeAudioFormats, /// The speed of the model's spoken response as a multiple of the original speed. /// 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. /// This value can only be changed in between model turns, not while a response @@ -250,19 +233,6 @@ pub struct Audio { pub output: AudioOutput, } -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Prompt { - /// The unique identifier of the prompt template to use. - pub id: String, - /// Optional map of values to substitute in for variables in your prompt. The substitution - /// values can either be strings, or other Response input types like images or files. - #[serde(skip_serializing_if = "Option::is_none")] - pub variables: Option, - /// Optional version of the prompt template. - #[serde(skip_serializing_if = "Option::is_none")] - pub version: Option, -} - #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "lowercase")] pub enum Tracing { @@ -286,7 +256,7 @@ pub struct TracingConfiguration { /// The truncation strategy to use for the session. #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "lowercase")] -pub enum Truncation { +pub enum RealtimeTruncation { /// `auto` is the default truncation strategy. Auto, /// `disabled` will disable truncation and emit errors when the conversation exceeds the input @@ -338,6 +308,7 @@ pub enum Session { } /// Realtime session object configuration. +/// openapi spec type: RealtimeSessionCreateRequestGA #[derive(Debug, Serialize, Deserialize, Clone)] pub struct RealtimeSession { pub audio: Audio, @@ -390,7 +361,7 @@ pub struct RealtimeSession { /// Tools available to the model. #[serde(skip_serializing_if = "Option::is_none")] - pub tools: Option>, + pub tools: Option>, /// Realtime API can write session traces to the [Traces Dashboard](https://platform.openai.com/logs?api=traces). /// Set to null to disable tracing. Once tracing is enabled for a session, the configuration cannot be modified. @@ -413,30 +384,26 @@ pub struct RealtimeSession { /// truncate but would instead return an error if the conversation exceeds the model's input /// token limit. #[serde(skip_serializing_if = "Option::is_none")] - pub truncation: Option, + pub truncation: Option, } +/// Type of noise reduction. `near_field` is for close-talking microphones such as +/// headphones, `far_field` is for far-field microphones such as laptop or conference +/// room microphones. #[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "snake_case")] +#[serde(tag = "type", rename_all = "snake_case")] pub enum NoiseReductionType { NearField, FarField, } -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct NoiseReduction { - /// Type of noise reduction. `near_field` is for close-talking microphones such as - /// headphones, `far_field` is for far-field microphones such as laptop or conference - /// room microphones. - pub r#type: NoiseReductionType, -} - #[derive(Debug, Serialize, Deserialize, Clone)] pub struct TranscriptionAudio { pub input: AudioInput, } /// Realtime transcription session object configuration. +/// openapi spec type: RealtimeTranscriptionSessionCreateRequestGA #[derive(Debug, Serialize, Deserialize, Clone)] pub struct RealtimeTranscriptionSession { /// Configuration for input and output audio. From 4e0fa1dc61b79d385efd87e01e1d5e7b1539e67c Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 14:55:58 -0800 Subject: [PATCH 31/42] RealtimeConversationItem --- async-openai/src/types/realtime/client_event.rs | 10 ++++------ async-openai/src/types/realtime/item.rs | 4 ++-- .../src/types/realtime/response_resource.rs | 8 ++++---- async-openai/src/types/realtime/server_event.rs | 13 +++++++------ 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs index c6edb1c0..d77e04bb 100644 --- a/async-openai/src/types/realtime/client_event.rs +++ b/async-openai/src/types/realtime/client_event.rs @@ -1,9 +1,7 @@ use serde::{Deserialize, Serialize}; use tokio_tungstenite::tungstenite::Message; -use crate::types::realtime::{RealtimeResponseCreateParams, Session}; - -use super::item::Item; +use crate::types::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct SessionUpdateEvent { @@ -56,7 +54,7 @@ pub struct ConversationItemCreateEvent { pub previous_item_id: Option, /// A single item within a Realtime conversation. - pub item: Item, + pub item: RealtimeConversationItem, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] @@ -322,8 +320,8 @@ message_from_event!(ResponseCreateEvent, RealtimeClientEvent); message_from_event!(ResponseCancelEvent, RealtimeClientEvent); message_from_event!(OutputAudioBufferClearEvent, RealtimeClientEvent); -impl From for ConversationItemCreateEvent { - fn from(value: Item) -> Self { +impl From for ConversationItemCreateEvent { + fn from(value: RealtimeConversationItem) -> Self { Self { event_id: None, previous_item_id: None, diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs index b6020bf8..4402a946 100644 --- a/async-openai/src/types/realtime/item.rs +++ b/async-openai/src/types/realtime/item.rs @@ -272,7 +272,7 @@ pub struct McpCall { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] #[serde(rename_all = "snake_case")] -pub enum Item { +pub enum RealtimeConversationItem { Message(Message), FunctionCall(FunctionCall), FunctionCallOutput(FunctionCallOutput), @@ -282,7 +282,7 @@ pub enum Item { McpApprovalRequest(McpApprovalRequest), } -impl TryFrom for Item { +impl TryFrom for RealtimeConversationItem { type Error = serde_json::Error; fn try_from(value: serde_json::Value) -> Result { diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index eb7040f5..57f866c5 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -2,8 +2,8 @@ use serde::{Deserialize, Serialize}; use crate::types::{ realtime::{ - Conversation, Item, MaxOutputTokens, RealtimeAudioFormats, RealtimeTool, RealtimeVoice, - ToolChoice, + Conversation, MaxOutputTokens, RealtimeAudioFormats, RealtimeConversationItem, + RealtimeTool, RealtimeVoice, ToolChoice, }, responses::Prompt, }; @@ -138,7 +138,7 @@ pub struct RealtimeResponseCreateParams { /// for this Response instead of using the default conversation. An empty array `[]` will clear /// the context for this Response. Note that this can include references to items that /// previously appeared in the session using their id. - pub input: Vec, + pub input: Vec, /// The default system instructions (i.e. system message) prepended to model calls. /// This field allows the client to guide the model on desired responses. @@ -219,7 +219,7 @@ pub struct RealtimeResponse { pub object: String, /// The list of output items generated by the response. - pub output: Vec, + pub output: Vec, /// The set of modalities the model used to respond, currently the only possible values /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript. diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 1fae8c4f..a440648d 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -3,7 +3,8 @@ use serde::{Deserialize, Serialize}; use crate::types::realtime::{RealtimeResponse, Session}; use super::{ - content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit, + content_part::ContentPart, error::RealtimeAPIError, item::RealtimeConversationItem, + rate_limit::RateLimit, }; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -35,7 +36,7 @@ pub struct ConversationItemAddedEvent { /// The unique ID of the server event. pub event_id: String, /// A single item within a Realtime conversation. - pub item: Item, + pub item: RealtimeConversationItem, /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted. pub previous_item_id: Option, } @@ -45,7 +46,7 @@ pub struct ConversationItemDoneEvent { /// The unique ID of the server event. pub event_id: String, /// A single item within a Realtime conversation. - pub item: Item, + pub item: RealtimeConversationItem, /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted. pub previous_item_id: Option, } @@ -243,7 +244,7 @@ pub struct ConversationItemRetrievedEvent { /// The unique ID of the server event. pub event_id: String, /// A single item within a Realtime conversation. - pub item: Item, + pub item: RealtimeConversationItem, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -291,7 +292,7 @@ pub struct ResponseOutputItemAddedEvent { /// The index of the output item in the Response. pub output_index: u32, /// A single item within a Realtime conversation. - pub item: Item, + pub item: RealtimeConversationItem, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -303,7 +304,7 @@ pub struct ResponseOutputItemDoneEvent { /// The index of the output item in the Response. pub output_index: u32, /// A single item within a Realtime conversation. - pub item: Item, + pub item: RealtimeConversationItem, } #[derive(Debug, Serialize, Deserialize, Clone)] From c39abf0729458925c6b4e9e90a1736a48404b211 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 14:56:17 -0800 Subject: [PATCH 32/42] RealtimeConversationItem --- examples/realtime/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs index 4f79b656..3793b95a 100644 --- a/examples/realtime/src/main.rs +++ b/examples/realtime/src/main.rs @@ -1,7 +1,7 @@ use std::process::exit; use async_openai::types::realtime::{ - ConversationItemCreateEvent, Item, RealtimeServerEvent, ResponseCreateEvent, + ConversationItemCreateEvent, RealtimeConversationItem, RealtimeServerEvent, ResponseCreateEvent, }; use futures_util::{future, pin_mut, StreamExt}; @@ -107,7 +107,7 @@ async fn read_stdin(tx: futures_channel::mpsc::UnboundedSender) { } // Create item from json representation - let item = Item::try_from(serde_json::json!({ + let item = RealtimeConversationItem::try_from(serde_json::json!({ "type": "message", "role": "user", "content": [ From c8e614827e553ef213e68e390ba058558874e5c5 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 15:24:27 -0800 Subject: [PATCH 33/42] updates for the spec --- async-openai/src/types/realtime/error.rs | 11 ++++ async-openai/src/types/realtime/item.rs | 77 ++++++++++++------------ 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/async-openai/src/types/realtime/error.rs b/async-openai/src/types/realtime/error.rs index 6ce907c3..34fb9eac 100644 --- a/async-openai/src/types/realtime/error.rs +++ b/async-openai/src/types/realtime/error.rs @@ -17,3 +17,14 @@ pub struct RealtimeAPIError { /// The event_id of the client event that caused the error, if applicable. pub event_id: Option, } + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ErrorCodeMessage { + pub code: String, + pub message: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ErrorMessage { + pub message: String, +} diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs index 4402a946..80a31ef4 100644 --- a/async-openai/src/types/realtime/item.rs +++ b/async-openai/src/types/realtime/item.rs @@ -1,5 +1,10 @@ use serde::{Deserialize, Serialize}; +use crate::types::{ + realtime::{ErrorCodeMessage, ErrorMessage}, + responses::MCPListToolsTool, +}; + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct SystemMessageContent { /// The text content. @@ -9,7 +14,7 @@ pub struct SystemMessageContent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct SystemMessage { +pub struct RealtimeConversationItemMessageSystem { /// The content of the message. pub content: Vec, @@ -70,7 +75,7 @@ pub enum UserMessageContent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct UserMessage { +pub struct RealtimeConversationItemMessageUser { /// The content of the message. pub content: Vec, @@ -114,7 +119,7 @@ pub enum AssistantMessageContent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct AssistantMessage { +pub struct RealtimeConversationItemMessageAssistant { /// The content of the message. pub content: Vec, @@ -135,14 +140,14 @@ pub struct AssistantMessage { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "role")] #[serde(rename_all = "lowercase")] -pub enum Message { - System(SystemMessage), - User(UserMessage), - Assistant(AssistantMessage), +pub enum RealtimeConversationItemMessage { + System(RealtimeConversationItemMessageSystem), + User(RealtimeConversationItemMessageUser), + Assistant(RealtimeConversationItemMessageAssistant), } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct FunctionCall { +pub struct RealtimeConversationItemFunctionCall { /// The arguments of the function call. This is a JSON-encoded string representing /// the arguments passed to the function, for example {"arg1": "value1", "arg2": 42}. pub arguments: String, @@ -167,7 +172,7 @@ pub struct FunctionCall { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct FunctionCallOutput { +pub struct RealtimeConversationItemFunctionCallOutput { /// The ID of the function call this output is for. pub call_id: String, @@ -189,7 +194,7 @@ pub struct FunctionCallOutput { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct McpApprovalResponse { +pub struct RealtimeMCPApprovalResponse { /// The ID of the approval request being answered. pub approval_request_id: String, @@ -204,34 +209,19 @@ pub struct McpApprovalResponse { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct AvailableMcpTool { - /// The JSON schema describing the tool's input. - pub input_schema: serde_json::Value, - - /// The name of the tool. - pub name: String, - - /// Additional annotations about the tool. - pub annotations: Option, - - /// The description of the tool. - pub description: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct McpListTools { +pub struct RealtimeMCPListTools { /// The label of the MCP server. pub server_label: String, /// The tools available on the server. - pub tools: Vec, + pub tools: Vec, /// The unique ID of the list. pub id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct McpApprovalRequest { +pub struct RealtimeMCPApprovalRequest { /// A JSON string of arguments for the tool. pub arguments: String, @@ -246,7 +236,18 @@ pub struct McpApprovalRequest { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct McpCall { +pub struct RealtimeMCPProtocolError {} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum RealtimeMCPToolCallError { + ProtocolError(ErrorCodeMessage), + ToolExecutionError(ErrorMessage), + HttpError(ErrorCodeMessage), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeMCPToolCall { /// A JSON string of the arguments passed to the tool. pub arguments: String, @@ -263,23 +264,23 @@ pub struct McpCall { pub approval_request_id: Option, /// The error from the tool call, if any. - pub error: Option, // TODO: implement type + pub error: Option, /// The output from the tool call. - pub output: String, + pub output: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "type")] #[serde(rename_all = "snake_case")] pub enum RealtimeConversationItem { - Message(Message), - FunctionCall(FunctionCall), - FunctionCallOutput(FunctionCallOutput), - McpApprovalResponse(McpApprovalResponse), - McpListTools(McpListTools), - McpCall(McpCall), - McpApprovalRequest(McpApprovalRequest), + Message(RealtimeConversationItemMessage), + FunctionCall(RealtimeConversationItemFunctionCall), + FunctionCallOutput(RealtimeConversationItemFunctionCallOutput), + McpApprovalResponse(RealtimeMCPApprovalResponse), + McpListTools(RealtimeMCPListTools), + McpCall(RealtimeMCPToolCall), + McpApprovalRequest(RealtimeMCPApprovalRequest), } impl TryFrom for RealtimeConversationItem { From 46c7159d0e5ee927e5622ec269f9930604ec0a23 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 16:45:10 -0800 Subject: [PATCH 34/42] update types to match spec --- .../src/types/realtime/response_resource.rs | 34 +++++++++++++++---- .../src/types/realtime/server_event.rs | 18 +++++----- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs index 57f866c5..1dac0634 100644 --- a/async-openai/src/types/realtime/response_resource.rs +++ b/async-openai/src/types/realtime/response_resource.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use serde::{Deserialize, Serialize}; use crate::types::{ @@ -76,7 +78,7 @@ pub struct OutputTokenDetails { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "snake_case")] -pub enum ResponseStatus { +pub enum RealtimeResponseStatus { InProgress, Completed, Cancelled, @@ -91,17 +93,35 @@ pub struct Error { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseStatusDetail { +#[serde(rename_all = "lowercase")] +pub enum RealtimeResponseStatusDetailType { + Completed, + Cancelled, + Incomplete, + Failed, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub enum RealtimeResponseStatusDetailReason { + TurnDetected, + ClientCancelled, + MaxOutputTokens, + ContentFilter, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeResponseStatusDetail { /// A description of the error that caused the response to fail, populated when the status is failed. pub error: Option, /// The reason the Response did not complete. For a `cancelled` Response, one of `turn_detected` /// (the server VAD detected a new start of speech) or `client_cancelled` (the client sent a cancel /// event). For an incomplete Response, one of `max_output_tokens` or `content_filter` (the /// server-side safety filter activated and cut off the response). - pub reason: Option, + pub reason: Option, /// The type of error that caused the response to fail, corresponding with the `status` /// field (`completed`, `cancelled`, `incomplete`, `failed`). - pub r#type: String, + pub r#type: RealtimeResponseStatusDetailType, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -213,7 +233,7 @@ pub struct RealtimeResponse { /// Keys are strings with a maximum length of 64 characters. Values are strings with a /// maximum length of 512 characters. #[serde(skip_serializing_if = "Option::is_none")] - pub metadata: Option, + pub metadata: Option>, /// The object type, must be "realtime.response". pub object: String, @@ -227,10 +247,10 @@ pub struct RealtimeResponse { pub output_modalities: Vec, /// The final status of the response (`completed`, `cancelled`, `failed`, or `incomplete`, `in_progress`). - pub status: ResponseStatus, + pub status: RealtimeResponseStatus, /// Additional details about the status. - pub status_details: Option, + pub status_details: Option, /// Usage statistics for the Response, this will correspond to billing. A Realtime API session /// will maintain a conversation context and append new Items to the Conversation, thus output diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index a440648d..b4994aab 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -128,12 +128,12 @@ pub struct OutputAudioBufferClearedEvent { #[derive(Debug, Serialize, Deserialize, Clone)] /// Log probability information for a transcribed token. -pub struct LogProb { - /// Raw UTF-8 bytes for the token. +pub struct LogProbProperties { + /// The bytes that were used to generate the log probability. pub bytes: Vec, /// The log probability of the token. pub logprob: f64, - /// The token string. + /// The token that was used to generate the log probability. pub token: String, } @@ -146,7 +146,7 @@ pub struct TokenUsageInputTokenDetails { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TokenUsage { +pub struct TranscriptTextUsageTokens { /// Number of input tokens billed for this request. pub input_tokens: u32, /// Number of output tokens generated. @@ -158,7 +158,7 @@ pub struct TokenUsage { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct DurationUsage { +pub struct TranscriptTextUsageDuration { ///Duration of the input audio in seconds. pub seconds: f32, } @@ -167,9 +167,9 @@ pub struct DurationUsage { #[serde(tag = "type")] pub enum TranscriptionUsage { #[serde(rename = "tokens")] - TokenUsage(TokenUsage), + Tokens(TranscriptTextUsageTokens), #[serde(rename = "duration")] - DurationUsage(DurationUsage), + Duration(TranscriptTextUsageDuration), } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -183,7 +183,7 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent { /// The transcribed text. pub transcript: String, /// Optional per-token log probability data. - pub logprobs: Option>, + pub logprobs: Option>, /// Usage statistics for the transcription, this is billed according to the ASR model's pricing rather than /// the realtime model's pricing. pub usage: TranscriptionUsage, @@ -204,7 +204,7 @@ pub struct ConversationItemInputAudioTranscriptionDeltaEvent { /// corresponds a log probability of which token would be selected for this chunk of transcription. This /// can help to identify if it was possible there were multiple valid options for a given chunk of /// transcription. - pub logprobs: Option>, + pub logprobs: Option>, } #[derive(Debug, Serialize, Deserialize, Clone)] From 6486c1c55afde2b62f2f7cec02c1eeb4e2be05d2 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 16:55:51 -0800 Subject: [PATCH 35/42] types updated --- async-openai/src/types/realtime/item.rs | 3 +-- async-openai/src/types/realtime/mod.rs | 8 ++++---- async-openai/src/types/realtime/rate_limit.rs | 11 +++++++++-- .../realtime/{response_resource.rs => response.rs} | 0 async-openai/src/types/realtime/server_event.rs | 6 ++---- .../realtime/{session_resource.rs => session.rs} | 0 6 files changed, 16 insertions(+), 12 deletions(-) rename async-openai/src/types/realtime/{response_resource.rs => response.rs} (100%) rename async-openai/src/types/realtime/{session_resource.rs => session.rs} (100%) diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs index 80a31ef4..a1f28893 100644 --- a/async-openai/src/types/realtime/item.rs +++ b/async-openai/src/types/realtime/item.rs @@ -271,8 +271,7 @@ pub struct RealtimeMCPToolCall { } #[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -#[serde(rename_all = "snake_case")] +#[serde(tag = "type", rename_all = "snake_case")] pub enum RealtimeConversationItem { Message(RealtimeConversationItemMessage), FunctionCall(RealtimeConversationItemFunctionCall), diff --git a/async-openai/src/types/realtime/mod.rs b/async-openai/src/types/realtime/mod.rs index b47605f8..071164e5 100644 --- a/async-openai/src/types/realtime/mod.rs +++ b/async-openai/src/types/realtime/mod.rs @@ -4,9 +4,9 @@ mod conversation; mod error; mod item; mod rate_limit; -mod response_resource; +mod response; mod server_event; -mod session_resource; +mod session; pub use client_event::*; pub use content_part::*; @@ -14,6 +14,6 @@ pub use conversation::*; pub use error::*; pub use item::*; pub use rate_limit::*; -pub use response_resource::*; +pub use response::*; pub use server_event::*; -pub use session_resource::*; +pub use session::*; diff --git a/async-openai/src/types/realtime/rate_limit.rs b/async-openai/src/types/realtime/rate_limit.rs index 9306e236..86a50e21 100644 --- a/async-openai/src/types/realtime/rate_limit.rs +++ b/async-openai/src/types/realtime/rate_limit.rs @@ -1,9 +1,16 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct RateLimit { +#[serde(rename_all = "lowercase")] +pub enum RealtimeRateLimitName { + Requests, + Tokens, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeRateLimit { /// The name of the rate limit (requests, tokens). - pub name: String, + pub name: RealtimeRateLimitName, /// The maximum allowed value for the rate limit. pub limit: u32, /// The remaining value before the limit is reached. diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response.rs similarity index 100% rename from async-openai/src/types/realtime/response_resource.rs rename to async-openai/src/types/realtime/response.rs diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index b4994aab..1b7512f9 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -1,10 +1,8 @@ use serde::{Deserialize, Serialize}; -use crate::types::realtime::{RealtimeResponse, Session}; - use super::{ content_part::ContentPart, error::RealtimeAPIError, item::RealtimeConversationItem, - rate_limit::RateLimit, + rate_limit::RealtimeRateLimit, response::RealtimeResponse, session::Session, }; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -469,7 +467,7 @@ pub struct ResponseFunctionCallArgumentsDoneEvent { pub struct RateLimitsUpdatedEvent { /// The unique ID of the server event. pub event_id: String, - pub rate_limits: Vec, + pub rate_limits: Vec, } #[derive(Debug, Serialize, Deserialize, Clone)] diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session.rs similarity index 100% rename from async-openai/src/types/realtime/session_resource.rs rename to async-openai/src/types/realtime/session.rs From 5a1cd63ce7450db6a63129c84012149776c52503 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 17:01:14 -0800 Subject: [PATCH 36/42] update realtime types --- .../src/types/realtime/content_part.rs | 18 --------- .../src/types/realtime/conversation.rs | 9 ----- .../{item.rs => conversation_item.rs} | 0 async-openai/src/types/realtime/mod.rs | 10 +---- async-openai/src/types/realtime/rate_limit.rs | 20 ---------- async-openai/src/types/realtime/response.rs | 12 +++++- .../src/types/realtime/server_event.rs | 40 ++++++++++++++++++- 7 files changed, 50 insertions(+), 59 deletions(-) delete mode 100644 async-openai/src/types/realtime/content_part.rs delete mode 100644 async-openai/src/types/realtime/conversation.rs rename async-openai/src/types/realtime/{item.rs => conversation_item.rs} (100%) delete mode 100644 async-openai/src/types/realtime/rate_limit.rs diff --git a/async-openai/src/types/realtime/content_part.rs b/async-openai/src/types/realtime/content_part.rs deleted file mode 100644 index eec93ab3..00000000 --- a/async-openai/src/types/realtime/content_part.rs +++ /dev/null @@ -1,18 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum ContentPart { - #[serde(rename = "text")] - Text { - /// The text content - text: String, - }, - #[serde(rename = "audio")] - Audio { - /// Base64-encoded audio data - audio: Option, - /// The transcript of the audio - transcript: String, - }, -} diff --git a/async-openai/src/types/realtime/conversation.rs b/async-openai/src/types/realtime/conversation.rs deleted file mode 100644 index e678ede8..00000000 --- a/async-openai/src/types/realtime/conversation.rs +++ /dev/null @@ -1,9 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone, Default)] -#[serde(rename_all = "lowercase")] -pub enum Conversation { - #[default] - Auto, - None, -} diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/conversation_item.rs similarity index 100% rename from async-openai/src/types/realtime/item.rs rename to async-openai/src/types/realtime/conversation_item.rs diff --git a/async-openai/src/types/realtime/mod.rs b/async-openai/src/types/realtime/mod.rs index 071164e5..386a92b0 100644 --- a/async-openai/src/types/realtime/mod.rs +++ b/async-openai/src/types/realtime/mod.rs @@ -1,19 +1,13 @@ mod client_event; -mod content_part; -mod conversation; +mod conversation_item; mod error; -mod item; -mod rate_limit; mod response; mod server_event; mod session; pub use client_event::*; -pub use content_part::*; -pub use conversation::*; +pub use conversation_item::*; pub use error::*; -pub use item::*; -pub use rate_limit::*; pub use response::*; pub use server_event::*; pub use session::*; diff --git a/async-openai/src/types/realtime/rate_limit.rs b/async-openai/src/types/realtime/rate_limit.rs deleted file mode 100644 index 86a50e21..00000000 --- a/async-openai/src/types/realtime/rate_limit.rs +++ /dev/null @@ -1,20 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum RealtimeRateLimitName { - Requests, - Tokens, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct RealtimeRateLimit { - /// The name of the rate limit (requests, tokens). - pub name: RealtimeRateLimitName, - /// The maximum allowed value for the rate limit. - pub limit: u32, - /// The remaining value before the limit is reached. - pub remaining: u32, - /// Seconds until the rate limit resets. - pub reset_seconds: f32, -} diff --git a/async-openai/src/types/realtime/response.rs b/async-openai/src/types/realtime/response.rs index 1dac0634..c9f28bb2 100644 --- a/async-openai/src/types/realtime/response.rs +++ b/async-openai/src/types/realtime/response.rs @@ -4,8 +4,8 @@ use serde::{Deserialize, Serialize}; use crate::types::{ realtime::{ - Conversation, MaxOutputTokens, RealtimeAudioFormats, RealtimeConversationItem, - RealtimeTool, RealtimeVoice, ToolChoice, + MaxOutputTokens, RealtimeAudioFormats, RealtimeConversationItem, RealtimeTool, + RealtimeVoice, ToolChoice, }, responses::Prompt, }; @@ -142,6 +142,14 @@ pub struct ResponseAudio { pub output: ResponseAudioOutput, } +#[derive(Debug, Serialize, Deserialize, Clone, Default)] +#[serde(rename_all = "lowercase")] +pub enum Conversation { + #[default] + Auto, + None, +} + /// The response resource. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct RealtimeResponseCreateParams { diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 1b7512f9..4544e2bc 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -1,8 +1,8 @@ use serde::{Deserialize, Serialize}; use super::{ - content_part::ContentPart, error::RealtimeAPIError, item::RealtimeConversationItem, - rate_limit::RealtimeRateLimit, response::RealtimeResponse, session::Session, + conversation_item::RealtimeConversationItem, error::RealtimeAPIError, + response::RealtimeResponse, session::Session, }; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -305,6 +305,23 @@ pub struct ResponseOutputItemDoneEvent { pub item: RealtimeConversationItem, } +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum ContentPart { + #[serde(rename = "text")] + Text { + /// The text content + text: String, + }, + #[serde(rename = "audio")] + Audio { + /// Base64-encoded audio data + audio: Option, + /// The transcript of the audio + transcript: String, + }, +} + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ResponseContentPartAddedEvent { /// The unique ID of the server event. @@ -463,6 +480,25 @@ pub struct ResponseFunctionCallArgumentsDoneEvent { pub arguments: String, } +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum RealtimeRateLimitName { + Requests, + Tokens, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RealtimeRateLimit { + /// The name of the rate limit (requests, tokens). + pub name: RealtimeRateLimitName, + /// The maximum allowed value for the rate limit. + pub limit: u32, + /// The remaining value before the limit is reached. + pub remaining: u32, + /// Seconds until the rate limit resets. + pub reset_seconds: f32, +} + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct RateLimitsUpdatedEvent { /// The unique ID of the server event. From 9571490acae575c6af18c70f08d370a60163b721 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 17:11:09 -0800 Subject: [PATCH 37/42] match realtime client event to spec --- .../src/types/realtime/client_event.rs | 128 +++++++++++------- 1 file changed, 82 insertions(+), 46 deletions(-) diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs index d77e04bb..0881b9b9 100644 --- a/async-openai/src/types/realtime/client_event.rs +++ b/async-openai/src/types/realtime/client_event.rs @@ -4,7 +4,7 @@ use tokio_tungstenite::tungstenite::Message; use crate::types::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session}; #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct SessionUpdateEvent { +pub struct RealtimeClientEventSessionUpdate { /// Optional client-generated ID used to identify this event. /// This is an arbitrary string that a client may assign. It will be passed /// back if there is an error with the event, but the corresponding @@ -16,7 +16,7 @@ pub struct SessionUpdateEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct InputAudioBufferAppendEvent { +pub struct RealtimeClientEventInputAudioBufferAppend { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -26,21 +26,21 @@ pub struct InputAudioBufferAppendEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct InputAudioBufferCommitEvent { +pub struct RealtimeClientEventInputAudioBufferCommit { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct InputAudioBufferClearEvent { +pub struct RealtimeClientEventInputAudioBufferClear { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemCreateEvent { +pub struct RealtimeClientEventConversationItemCreate { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -58,7 +58,7 @@ pub struct ConversationItemCreateEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ConversationItemRetrieveEvent { +pub struct RealtimeClientEventConversationItemRetrieve { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -68,7 +68,7 @@ pub struct ConversationItemRetrieveEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ConversationItemTruncateEvent { +pub struct RealtimeClientEventConversationItemTruncate { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -85,7 +85,7 @@ pub struct ConversationItemTruncateEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ConversationItemDeleteEvent { +pub struct RealtimeClientEventConversationItemDelete { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -95,7 +95,7 @@ pub struct ConversationItemDeleteEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ResponseCreateEvent { +pub struct RealtimeClientEventResponseCreate { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -105,7 +105,7 @@ pub struct ResponseCreateEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct ResponseCancelEvent { +pub struct RealtimeClientEventResponseCancel { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -117,7 +117,7 @@ pub struct ResponseCancelEvent { } #[derive(Debug, Serialize, Deserialize, Clone, Default)] -pub struct OutputAudioBufferClearEvent { +pub struct RealtimeClientEventOutputAudioBufferClear { /// Optional client-generated ID used to identify this event. #[serde(skip_serializing_if = "Option::is_none")] pub event_id: Option, @@ -134,7 +134,7 @@ pub enum RealtimeClientEvent { /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`, /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`. #[serde(rename = "session.update")] - SessionUpdate(SessionUpdateEvent), + SessionUpdate(RealtimeClientEventSessionUpdate), /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit. /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio @@ -147,7 +147,7 @@ pub enum RealtimeClientEvent { /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to /// this event. #[serde(rename = "input_audio_buffer.append")] - InputAudioBufferAppend(InputAudioBufferAppendEvent), + InputAudioBufferAppend(RealtimeClientEventInputAudioBufferAppend), /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. /// This event will produce an error if the input audio buffer is empty. @@ -155,12 +155,12 @@ pub enum RealtimeClientEvent { /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. /// The server will respond with an input_audio_buffer.committed event. #[serde(rename = "input_audio_buffer.commit")] - InputAudioBufferCommit(InputAudioBufferCommitEvent), + InputAudioBufferCommit(RealtimeClientEventInputAudioBufferCommit), /// Send this event to clear the audio bytes in the buffer. /// The server will respond with an `input_audio_buffer.cleared` event. #[serde(rename = "input_audio_buffer.clear")] - InputAudioBufferClear(InputAudioBufferClearEvent), + InputAudioBufferClear(RealtimeClientEventInputAudioBufferClear), /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses. /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream, @@ -168,14 +168,14 @@ pub enum RealtimeClientEvent { /// /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent. #[serde(rename = "conversation.item.create")] - ConversationItemCreate(ConversationItemCreateEvent), + ConversationItemCreate(RealtimeClientEventConversationItemCreate), /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history. /// This is useful, for example, to inspect user audio after noise cancellation and VAD. /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history, /// in which case the server will respond with an error. #[serde(rename = "conversation.item.retrieve")] - ConversationItemRetrieve(ConversationItemRetrieveEvent), + ConversationItemRetrieve(RealtimeClientEventConversationItemRetrieve), /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime, /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not @@ -186,13 +186,13 @@ pub enum RealtimeClientEvent { /// /// If successful, the server will respond with a `conversation.item.truncated` event. #[serde(rename = "conversation.item.truncate")] - ConversationItemTruncate(ConversationItemTruncateEvent), + ConversationItemTruncate(RealtimeClientEventConversationItemTruncate), /// Send this event when you want to remove any item from the conversation history. The server will respond with a /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the /// server will respond with an error. #[serde(rename = "conversation.item.delete")] - ConversationItemDelete(ConversationItemDeleteEvent), + ConversationItemDelete(RealtimeClientEventConversationItemDelete), /// This event instructs the server to create a Response, which means triggering model inference. /// When in Server VAD mode, the server will create Responses automatically. @@ -215,21 +215,21 @@ pub enum RealtimeClientEvent { /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to /// existing Items. #[serde(rename = "response.create")] - ResponseCreate(ResponseCreateEvent), + ResponseCreate(RealtimeClientEventResponseCreate), /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be /// returned the session will remain unaffected. #[serde(rename = "response.cancel")] - ResponseCancel(ResponseCancelEvent), + ResponseCancel(RealtimeClientEventResponseCancel), /// **WebRTC Only:** Emit to cut off the current audio response. /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event. /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response. /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc) #[serde(rename = "output_audio_buffer.clear")] - OutputAudioBufferClear(OutputAudioBufferClearEvent), + OutputAudioBufferClear(RealtimeClientEventOutputAudioBufferClear), } impl From<&RealtimeClientEvent> for String { @@ -264,63 +264,99 @@ macro_rules! event_from { }; } -event_from!(SessionUpdateEvent, RealtimeClientEvent, SessionUpdate); event_from!( - InputAudioBufferAppendEvent, + RealtimeClientEventSessionUpdate, + RealtimeClientEvent, + SessionUpdate +); +event_from!( + RealtimeClientEventInputAudioBufferAppend, RealtimeClientEvent, InputAudioBufferAppend ); event_from!( - InputAudioBufferCommitEvent, + RealtimeClientEventInputAudioBufferCommit, RealtimeClientEvent, InputAudioBufferCommit ); event_from!( - InputAudioBufferClearEvent, + RealtimeClientEventInputAudioBufferClear, RealtimeClientEvent, InputAudioBufferClear ); event_from!( - ConversationItemCreateEvent, + RealtimeClientEventConversationItemCreate, RealtimeClientEvent, ConversationItemCreate ); event_from!( - ConversationItemTruncateEvent, + RealtimeClientEventConversationItemTruncate, RealtimeClientEvent, ConversationItemTruncate ); event_from!( - ConversationItemDeleteEvent, + RealtimeClientEventConversationItemDelete, RealtimeClientEvent, ConversationItemDelete ); event_from!( - ConversationItemRetrieveEvent, + RealtimeClientEventConversationItemRetrieve, RealtimeClientEvent, ConversationItemRetrieve ); -event_from!(ResponseCreateEvent, RealtimeClientEvent, ResponseCreate); -event_from!(ResponseCancelEvent, RealtimeClientEvent, ResponseCancel); event_from!( - OutputAudioBufferClearEvent, + RealtimeClientEventResponseCreate, + RealtimeClientEvent, + ResponseCreate +); +event_from!( + RealtimeClientEventResponseCancel, + RealtimeClientEvent, + ResponseCancel +); +event_from!( + RealtimeClientEventOutputAudioBufferClear, RealtimeClientEvent, OutputAudioBufferClear ); -message_from_event!(SessionUpdateEvent, RealtimeClientEvent); -message_from_event!(InputAudioBufferAppendEvent, RealtimeClientEvent); -message_from_event!(InputAudioBufferCommitEvent, RealtimeClientEvent); -message_from_event!(InputAudioBufferClearEvent, RealtimeClientEvent); -message_from_event!(ConversationItemCreateEvent, RealtimeClientEvent); -message_from_event!(ConversationItemTruncateEvent, RealtimeClientEvent); -message_from_event!(ConversationItemDeleteEvent, RealtimeClientEvent); -message_from_event!(ConversationItemRetrieveEvent, RealtimeClientEvent); -message_from_event!(ResponseCreateEvent, RealtimeClientEvent); -message_from_event!(ResponseCancelEvent, RealtimeClientEvent); -message_from_event!(OutputAudioBufferClearEvent, RealtimeClientEvent); - -impl From for ConversationItemCreateEvent { +message_from_event!(RealtimeClientEventSessionUpdate, RealtimeClientEvent); +message_from_event!( + RealtimeClientEventInputAudioBufferAppend, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventInputAudioBufferCommit, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventInputAudioBufferClear, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemCreate, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemTruncate, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemDelete, + RealtimeClientEvent +); +message_from_event!( + RealtimeClientEventConversationItemRetrieve, + RealtimeClientEvent +); +message_from_event!(RealtimeClientEventResponseCreate, RealtimeClientEvent); +message_from_event!(RealtimeClientEventResponseCancel, RealtimeClientEvent); +message_from_event!( + RealtimeClientEventOutputAudioBufferClear, + RealtimeClientEvent +); + +impl From for RealtimeClientEventConversationItemCreate { fn from(value: RealtimeConversationItem) -> Self { Self { event_id: None, From fb26251ad92bcf93da54b716d7aca4bcacd8ac83 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 17:11:19 -0800 Subject: [PATCH 38/42] update examples/realtime --- examples/realtime/src/main.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs index 3793b95a..87881734 100644 --- a/examples/realtime/src/main.rs +++ b/examples/realtime/src/main.rs @@ -1,7 +1,8 @@ use std::process::exit; use async_openai::types::realtime::{ - ConversationItemCreateEvent, RealtimeConversationItem, RealtimeServerEvent, ResponseCreateEvent, + RealtimeClientEventConversationItemCreate, RealtimeClientEventResponseCreate, + RealtimeConversationItem, RealtimeServerEvent, }; use futures_util::{future, pin_mut, StreamExt}; @@ -120,13 +121,13 @@ async fn read_stdin(tx: futures_channel::mpsc::UnboundedSender) { .unwrap(); // Create event of type "conversation.item.create" - let event: ConversationItemCreateEvent = item.into(); + let event: RealtimeClientEventConversationItemCreate = item.into(); // Create WebSocket message from client event let message: Message = event.into(); // send WebSocket message containing event of type "conversation.item.create" to server tx.unbounded_send(message).unwrap(); // send WebSocket message containing event of type "response.create" to server - tx.unbounded_send(ResponseCreateEvent::default().into()) + tx.unbounded_send(RealtimeClientEventResponseCreate::default().into()) .unwrap(); } } From d926b060cdf2cdf8680ea517fe2fdf3738dd4144 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 17:25:45 -0800 Subject: [PATCH 39/42] match realtime server event type names with spec --- .../src/types/realtime/server_event.rs | 174 +++++++++--------- 1 file changed, 88 insertions(+), 86 deletions(-) diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs index 4544e2bc..f2ea8a58 100644 --- a/async-openai/src/types/realtime/server_event.rs +++ b/async-openai/src/types/realtime/server_event.rs @@ -6,7 +6,7 @@ use super::{ }; #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ErrorEvent { +pub struct RealtimeServerEventError { /// The unique ID of the server event. pub event_id: String, /// Details of the error. @@ -14,7 +14,7 @@ pub struct ErrorEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct SessionCreatedEvent { +pub struct RealtimeServerEventSessionCreated { /// The unique ID of the server event. pub event_id: String, /// The session resource. @@ -22,7 +22,7 @@ pub struct SessionCreatedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct SessionUpdatedEvent { +pub struct RealtimeServerEventSessionUpdated { /// The unique ID of the server event. pub event_id: String, /// The updated session resource. @@ -30,7 +30,7 @@ pub struct SessionUpdatedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemAddedEvent { +pub struct RealtimeServerEventConversationItemAdded { /// The unique ID of the server event. pub event_id: String, /// A single item within a Realtime conversation. @@ -40,7 +40,7 @@ pub struct ConversationItemAddedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemDoneEvent { +pub struct RealtimeServerEventConversationItemDone { /// The unique ID of the server event. pub event_id: String, /// A single item within a Realtime conversation. @@ -50,7 +50,7 @@ pub struct ConversationItemDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferCommitedEvent { +pub struct RealtimeServerEventInputAudioBufferCommitted { /// The unique ID of the server event. pub event_id: String, /// The ID of the preceding item after which the new item will be inserted. Can be null if the item has no predecessor. @@ -60,13 +60,13 @@ pub struct InputAudioBufferCommitedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferClearedEvent { +pub struct RealtimeServerEventInputAudioBufferCleared { /// The unique ID of the server event. pub event_id: String, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferSpeechStartedEvent { +pub struct RealtimeServerEventInputAudioBufferSpeechStarted { /// The unique ID of the server event. pub event_id: String, /// Milliseconds from the start of all audio written to the buffer during the session when speech was @@ -78,7 +78,7 @@ pub struct InputAudioBufferSpeechStartedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferSpeechStoppedEvent { +pub struct RealtimeServerEventInputAudioBufferSpeechStopped { /// The unique ID of the server event. pub event_id: String, /// Milliseconds since the session started when speech stopped. This will correspond to the end of @@ -89,7 +89,7 @@ pub struct InputAudioBufferSpeechStoppedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct InputAudioBufferTimeoutTriggeredEvent { +pub struct RealtimeServerEventInputAudioBufferTimeoutTriggered { /// The unique ID of the server event. pub event_id: String, /// Millisecond offset of audio written to the input audio buffer at the time the timeout was triggered. @@ -101,7 +101,7 @@ pub struct InputAudioBufferTimeoutTriggeredEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct OutputAudioBufferStartedEvent { +pub struct RealtimeServerEventOutputAudioBufferStarted { /// The unique ID of the server event. pub event_id: String, /// The unique ID of the response that produced the audio. @@ -109,7 +109,7 @@ pub struct OutputAudioBufferStartedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct OutputAudioBufferStoppedEvent { +pub struct RealtimeServerEventOutputAudioBufferStopped { /// The unique ID of the server event. pub event_id: String, /// The unique ID of the response that produced the audio. @@ -117,7 +117,7 @@ pub struct OutputAudioBufferStoppedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct OutputAudioBufferClearedEvent { +pub struct RealtimeServerEventOutputAudioBufferCleared { /// The unique ID of the server event. pub event_id: String, /// The unique ID of the response that produced the audio. @@ -171,7 +171,7 @@ pub enum TranscriptionUsage { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemInputAudioTranscriptionCompletedEvent { +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionCompleted { /// The unique ID of the server event. pub event_id: String, /// The ID of the item containing the audio that is being transcribed. @@ -188,7 +188,7 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemInputAudioTranscriptionDeltaEvent { +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the item containing the audio that is being transcribed. @@ -206,7 +206,7 @@ pub struct ConversationItemInputAudioTranscriptionDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemInputAudioTranscriptionFailedEvent { +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionFailed { /// The unique ID of the server event. pub event_id: String, /// The ID of the user message item. @@ -218,7 +218,7 @@ pub struct ConversationItemInputAudioTranscriptionFailedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemTruncatedEvent { +pub struct RealtimeServerEventConversationItemTruncated { /// The unique ID of the server event. pub event_id: String, /// The ID of the assistant message item that was truncated. @@ -230,7 +230,7 @@ pub struct ConversationItemTruncatedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemDeletedEvent { +pub struct RealtimeServerEventConversationItemDeleted { /// The unique ID of the server event. pub event_id: String, /// The ID of the item that was deleted. @@ -238,7 +238,7 @@ pub struct ConversationItemDeletedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemRetrievedEvent { +pub struct RealtimeServerEventConversationItemRetrieved { /// The unique ID of the server event. pub event_id: String, /// A single item within a Realtime conversation. @@ -246,7 +246,7 @@ pub struct ConversationItemRetrievedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ConversationItemInputAudioTranscriptionSegmentEvent { +pub struct RealtimeServerEventConversationItemInputAudioTranscriptionSegment { /// The unique ID of the server event. pub event_id: String, /// The ID of the item containing the input audio content. @@ -266,7 +266,7 @@ pub struct ConversationItemInputAudioTranscriptionSegmentEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseCreatedEvent { +pub struct RealtimeServerEventResponseCreated { /// The unique ID of the server event. pub event_id: String, /// The response resource. @@ -274,7 +274,7 @@ pub struct ResponseCreatedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseDoneEvent { +pub struct RealtimeServerEventResponseDone { /// The unique ID of the server event. pub event_id: String, /// The response resource. @@ -282,7 +282,7 @@ pub struct ResponseDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputItemAddedEvent { +pub struct RealtimeServerEventResponseOutputItemAdded { /// The unique ID of the server event. pub event_id: String, /// The ID of the Response to which the item belongs. @@ -294,7 +294,7 @@ pub struct ResponseOutputItemAddedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputItemDoneEvent { +pub struct RealtimeServerEventResponseOutputItemDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response to which the item belongs. @@ -323,7 +323,7 @@ pub enum ContentPart { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseContentPartAddedEvent { +pub struct RealtimeServerEventResponseContentPartAdded { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -339,7 +339,7 @@ pub struct ResponseContentPartAddedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseContentPartDoneEvent { +pub struct RealtimeServerEventResponseContentPartDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -355,7 +355,7 @@ pub struct ResponseContentPartDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputTextDeltaEvent { +pub struct RealtimeServerEventResponseTextDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -371,7 +371,7 @@ pub struct ResponseOutputTextDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputTextDoneEvent { +pub struct RealtimeServerEventResponseTextDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -387,7 +387,7 @@ pub struct ResponseOutputTextDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputAudioTranscriptDeltaEvent { +pub struct RealtimeServerEventResponseAudioTranscriptDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -403,7 +403,7 @@ pub struct ResponseOutputAudioTranscriptDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputAudioTranscriptDoneEvent { +pub struct RealtimeServerEventResponseAudioTranscriptDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -419,7 +419,7 @@ pub struct ResponseOutputAudioTranscriptDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputAudioDeltaEvent { +pub struct RealtimeServerEventResponseAudioDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -435,7 +435,7 @@ pub struct ResponseOutputAudioDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseOutputAudioDoneEvent { +pub struct RealtimeServerEventResponseAudioDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -449,7 +449,7 @@ pub struct ResponseOutputAudioDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseFunctionCallArgumentsDeltaEvent { +pub struct RealtimeServerEventResponseFunctionCallArgumentsDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -465,7 +465,7 @@ pub struct ResponseFunctionCallArgumentsDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseFunctionCallArgumentsDoneEvent { +pub struct RealtimeServerEventResponseFunctionCallArgumentsDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -500,14 +500,14 @@ pub struct RealtimeRateLimit { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct RateLimitsUpdatedEvent { +pub struct RealtimeServerEventRateLimitsUpdated { /// The unique ID of the server event. pub event_id: String, pub rate_limits: Vec, } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MCPListToolsInProgressEvent { +pub struct RealtimeServerEventMCPListToolsInProgress { /// The unique ID of the server event. pub event_id: String, /// The ID of the MCP list tools item. @@ -515,7 +515,7 @@ pub struct MCPListToolsInProgressEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MCPListToolsCompletedEvent { +pub struct RealtimeServerEventMCPListToolsCompleted { /// The unique ID of the server event. pub event_id: String, /// The ID of the MCP list tools item. @@ -523,7 +523,7 @@ pub struct MCPListToolsCompletedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MCPListToolsFailedEvent { +pub struct RealtimeServerEventMCPListToolsFailed { /// The unique ID of the server event. pub event_id: String, /// The ID of the MCP list tools item. @@ -531,7 +531,7 @@ pub struct MCPListToolsFailedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseMCPCallArgumentsDeltaEvent { +pub struct RealtimeServerEventResponseMCPCallArgumentsDelta { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -548,7 +548,7 @@ pub struct ResponseMCPCallArgumentsDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseMCPCallArgumentsDoneEvent { +pub struct RealtimeServerEventResponseMCPCallArgumentsDone { /// The unique ID of the server event. pub event_id: String, /// The ID of the response. @@ -562,7 +562,7 @@ pub struct ResponseMCPCallArgumentsDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseMCPCallInProgressEvent { +pub struct RealtimeServerEventResponseMCPCallInProgress { /// The unique ID of the server event. pub event_id: String, /// The index of the output item in the response. @@ -572,7 +572,7 @@ pub struct ResponseMCPCallInProgressEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseMCPCallCompletedEvent { +pub struct RealtimeServerEventResponseMCPCallCompleted { /// The unique ID of the server event. pub event_id: String, /// The index of the output item in the response. @@ -582,7 +582,7 @@ pub struct ResponseMCPCallCompletedEvent { } #[derive(Debug, Serialize, Deserialize, Clone)] -pub struct ResponseMCPCallFailedEvent { +pub struct RealtimeServerEventResponseMCPCallFailed { /// The unique ID of the server event. pub event_id: String, /// The index of the output item in the response. @@ -599,16 +599,16 @@ pub enum RealtimeServerEvent { /// Most errors are recoverable and the session will stay open, we recommend to /// implementors to monitor and log error messages by default. #[serde(rename = "error")] - Error(ErrorEvent), + Error(RealtimeServerEventError), /// Returned when a Session is created. Emitted automatically when a new connection is established as the first server event. /// This event will contain the default Session configuration. #[serde(rename = "session.created")] - SessionCreated(SessionCreatedEvent), + SessionCreated(RealtimeServerEventSessionCreated), /// Returned when a session is updated with a `session.update` event, unless there is an error. #[serde(rename = "session.updated")] - SessionUpdated(SessionUpdatedEvent), + SessionUpdated(RealtimeServerEventSessionUpdated), /// Sent by the server when an Item is added to the default Conversation. This can happen in several cases: /// - When the client sends a conversation.item.create event @@ -619,21 +619,21 @@ pub enum RealtimeServerEvent { /// The event will include the full content of the Item (except when model is generating a Response) except for audio data, /// which can be retrieved separately with a `conversation.item.retrieve` event if necessary. #[serde(rename = "conversation.item.added")] - ConversationItemAdded(ConversationItemAddedEvent), + ConversationItemAdded(RealtimeServerEventConversationItemAdded), /// Returned when a conversation item is finalized. /// /// The event will include the full content of the Item except for audio data, which can be retrieved /// separately with a `conversation.item.retrieve` event if needed. #[serde(rename = "conversation.item.done")] - ConversationItemDone(ConversationItemDoneEvent), + ConversationItemDone(RealtimeServerEventConversationItemDone), /// Returned when a conversation item is retrieved with `conversation.item.retrieve`. /// This is provided as a way to fetch the server's representation of an item, for example to get access /// to the post-processed audio data after noise cancellation and VAD. /// It includes the full content of the Item, including audio data. #[serde(rename = "conversation.item.retrieved")] - ConversationItemRetrieved(ConversationItemRetrievedEvent), + ConversationItemRetrieved(RealtimeServerEventConversationItemRetrieved), /// This event is the output of audio transcription for user audio written to the user audio /// buffer. Transcription begins when the input audio buffer is committed by the client or @@ -645,24 +645,26 @@ pub enum RealtimeServerEvent { /// may diverge somewhat from the model's interpretation, and should be treated as a rough guide. #[serde(rename = "conversation.item.input_audio_transcription.completed")] ConversationItemInputAudioTranscriptionCompleted( - ConversationItemInputAudioTranscriptionCompletedEvent, + RealtimeServerEventConversationItemInputAudioTranscriptionCompleted, ), /// Returned when the text value of an input audio transcription content part is updated with incremental transcription results. #[serde(rename = "conversation.item.input_audio_transcription.delta")] - ConversationItemInputAudioTranscriptionDelta(ConversationItemInputAudioTranscriptionDeltaEvent), + ConversationItemInputAudioTranscriptionDelta( + RealtimeServerEventConversationItemInputAudioTranscriptionDelta, + ), /// Returned when an input audio transcription segment is identified for an item. #[serde(rename = "conversation.item.input_audio_transcription.segment")] ConversationItemInputAudioTranscriptionSegment( - ConversationItemInputAudioTranscriptionSegmentEvent, + RealtimeServerEventConversationItemInputAudioTranscriptionSegment, ), /// Returned when input audio transcription is configured, and a transcription request for a user message failed. /// These events are separate from other `error` events so that the client can identify the related Item. #[serde(rename = "conversation.item.input_audio_transcription.failed")] ConversationItemInputAudioTranscriptionFailed( - ConversationItemInputAudioTranscriptionFailedEvent, + RealtimeServerEventConversationItemInputAudioTranscriptionFailed, ), /// Returned when an earlier assistant audio message item is truncated by the client with a `conversation.item.truncate` event. @@ -671,22 +673,22 @@ pub enum RealtimeServerEvent { /// This action will truncate the audio and remove the server-side text transcript to ensure there is no text in the /// context that hasn't been heard by the user. #[serde(rename = "conversation.item.truncated")] - ConversationItemTruncated(ConversationItemTruncatedEvent), + ConversationItemTruncated(RealtimeServerEventConversationItemTruncated), /// Returned when an item in the conversation is deleted by the client with a `conversation.item.delete` event. /// This event is used to synchronize the server's understanding of the conversation history with the client's view. #[serde(rename = "conversation.item.deleted")] - ConversationItemDeleted(ConversationItemDeletedEvent), + ConversationItemDeleted(RealtimeServerEventConversationItemDeleted), /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode. /// The `item_id` property is the ID of the user message item that will be created, /// thus a `conversation.item.created` event will also be sent to the client. #[serde(rename = "input_audio_buffer.committed")] - InputAudioBufferCommited(InputAudioBufferCommitedEvent), + InputAudioBufferCommitted(RealtimeServerEventInputAudioBufferCommitted), /// Returned when the input audio buffer is cleared by the client with a `input_audio_buffer.clear` event. #[serde(rename = "input_audio_buffer.cleared")] - InputAudioBufferCleared(InputAudioBufferClearedEvent), + InputAudioBufferCleared(RealtimeServerEventInputAudioBufferCleared), /// Sent by the server when in `server_vad` mode to indicate that speech has been detected in the audio buffer. /// This can happen any time audio is added to the buffer (unless speech is already detected). @@ -697,12 +699,12 @@ pub enum RealtimeServerEvent { /// also be included in the `input_audio_buffer.speech_stopped` event (unless the client manually commits the /// audio buffer during VAD activation). #[serde(rename = "input_audio_buffer.speech_started")] - InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent), + InputAudioBufferSpeechStarted(RealtimeServerEventInputAudioBufferSpeechStarted), /// Returned in `server_vad` mode when the server detects the end of speech in the audio buffer. /// The server will also send a `conversation.item.created` event with the user message item that is created from the audio buffer. #[serde(rename = "input_audio_buffer.speech_stopped")] - InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent), + InputAudioBufferSpeechStopped(RealtimeServerEventInputAudioBufferSpeechStopped), /// Returned when the Server VAD timeout is triggered for the input audio buffer. This is /// configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and @@ -718,31 +720,31 @@ pub enum RealtimeServerEvent { /// There may be speech that didn't trigger VAD but is still detected by the model, so the model may respond /// with something relevant to the conversation or a prompt to continue speaking. #[serde(rename = "input_audio_buffer.timeout_triggered")] - InputAudioBufferTimeoutTriggered(InputAudioBufferTimeoutTriggeredEvent), + InputAudioBufferTimeoutTriggered(RealtimeServerEventInputAudioBufferTimeoutTriggered), /// *WebRTC Only*: Emitted when the server begins streaming audio to the client. This /// event is emitted after an audio content part has been added (`response.content_part.added`) to the response. /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). #[serde(rename = "output_audio_buffer.started")] - OutputAudioBufferStarted(OutputAudioBufferStartedEvent), + OutputAudioBufferStarted(RealtimeServerEventOutputAudioBufferStarted), /// *WebRTC Only*: Emitted when the output audio buffer has been completely drained on /// the server, and no more audio is forthcoming. This event is emitted after the full response data has been sent /// to the client (`response.done`). [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). #[serde(rename = "output_audio_buffer.stopped")] - OutputAudioBufferStopped(OutputAudioBufferStoppedEvent), + OutputAudioBufferStopped(RealtimeServerEventOutputAudioBufferStopped), /// *WebRTC Only*: Emitted when the output audio buffer is cleared. This happens either in /// VAD mode when the user has interrupted (`input_audio_buffer.speech_started`), or when the client has /// emitted the `output_audio_buffer.clear` event to manually cut off the current audio response. /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). #[serde(rename = "output_audio_buffer.cleared")] - OutputAudioBufferCleared(OutputAudioBufferClearedEvent), + OutputAudioBufferCleared(RealtimeServerEventOutputAudioBufferCleared), /// Returned when a new Response is created. The first event of response creation, /// where the response is in an initial state of `in_progress`. #[serde(rename = "response.created")] - ResponseCreated(ResponseCreatedEvent), + ResponseCreated(RealtimeServerEventResponseCreated), /// Returned when a Response is done streaming. Always emitted, no matter the final state. /// The Response object included in the `response.done` event will include all output Items in the Response @@ -753,96 +755,96 @@ pub enum RealtimeServerEvent { /// /// A response will contain all output items that were generated during the response, excluding any audio content. #[serde(rename = "response.done")] - ResponseDone(ResponseDoneEvent), + ResponseDone(RealtimeServerEventResponseDone), /// Returned when a new Item is created during Response generation. #[serde(rename = "response.output_item.added")] - ResponseOutputItemAdded(ResponseOutputItemAddedEvent), + ResponseOutputItemAdded(RealtimeServerEventResponseOutputItemAdded), /// Returned when an Item is done streaming. Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.output_item.done")] - ResponseOutputItemDone(ResponseOutputItemDoneEvent), + ResponseOutputItemDone(RealtimeServerEventResponseOutputItemDone), /// Returned when a new content part is added to an assistant message item during response generation. #[serde(rename = "response.content_part.added")] - ResponseContentPartAdded(ResponseContentPartAddedEvent), + ResponseContentPartAdded(RealtimeServerEventResponseContentPartAdded), /// Returned when a content part is done streaming in an assistant message item. /// Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.content_part.done")] - ResponseContentPartDone(ResponseContentPartDoneEvent), + ResponseContentPartDone(RealtimeServerEventResponseContentPartDone), /// Returned when the text value of an "output_text" content part is updated. #[serde(rename = "response.output_text.delta")] - ResponseOutputTextDelta(ResponseOutputTextDeltaEvent), + ResponseOutputTextDelta(RealtimeServerEventResponseTextDelta), /// Returned when the text value of an "output_text" content part is done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.output_text.done")] - ResponseOutputTextDone(ResponseOutputTextDoneEvent), + ResponseOutputTextDone(RealtimeServerEventResponseTextDone), /// Returned when the model-generated transcription of audio output is updated. #[serde(rename = "response.output_audio_transcript.delta")] - ResponseOutputAudioTranscriptDelta(ResponseOutputAudioTranscriptDeltaEvent), + ResponseOutputAudioTranscriptDelta(RealtimeServerEventResponseAudioTranscriptDelta), /// Returned when the model-generated transcription of audio output is done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.output_audio_transcript.done")] - ResponseOutputAudioTranscriptDone(ResponseOutputAudioTranscriptDoneEvent), + ResponseOutputAudioTranscriptDone(RealtimeServerEventResponseAudioTranscriptDone), /// Returned when the model-generated audio is updated. #[serde(rename = "response.output_audio.delta")] - ResponseOutputAudioDelta(ResponseOutputAudioDeltaEvent), + ResponseOutputAudioDelta(RealtimeServerEventResponseAudioDelta), /// Returned when the model-generated audio is done. /// Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.output_audio.done")] - ResponseOutputAudioDone(ResponseOutputAudioDoneEvent), + ResponseOutputAudioDone(RealtimeServerEventResponseAudioDone), /// Returned when the model-generated function call arguments are updated. #[serde(rename = "response.function_call_arguments.delta")] - ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent), + ResponseFunctionCallArgumentsDelta(RealtimeServerEventResponseFunctionCallArgumentsDelta), /// Returned when the model-generated function call arguments are done streaming. /// Also emitted when a Response is interrupted, incomplete, or cancelled. #[serde(rename = "response.function_call_arguments.done")] - ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent), + ResponseFunctionCallArgumentsDone(RealtimeServerEventResponseFunctionCallArgumentsDone), /// Returned when MCP tool call arguments are updated. #[serde(rename = "response.mcp_call_arguments.delta")] - ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent), + ResponseMCPCallArgumentsDelta(RealtimeServerEventResponseMCPCallArgumentsDelta), /// Returned when MCP tool call arguments are finalized during response generation. #[serde(rename = "response.mcp_call_arguments.done")] - ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent), + ResponseMCPCallArgumentsDone(RealtimeServerEventResponseMCPCallArgumentsDone), /// Returned when an MCP tool call is in progress. #[serde(rename = "response.mcp_call.in_progress")] - ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent), + ResponseMCPCallInProgress(RealtimeServerEventResponseMCPCallInProgress), /// Returned when an MCP tool call has completed successfully. #[serde(rename = "response.mcp_call.completed")] - ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent), + ResponseMCPCallCompleted(RealtimeServerEventResponseMCPCallCompleted), /// Returned when an MCP tool call has failed. #[serde(rename = "response.mcp_call.failed")] - ResponseMCPCallFailed(ResponseMCPCallFailedEvent), + ResponseMCPCallFailed(RealtimeServerEventResponseMCPCallFailed), /// Returned when listing MCP tools is in progress for an item. #[serde(rename = "mcp_list_tools.in_progress")] - MCPListToolsInProgress(MCPListToolsInProgressEvent), + MCPListToolsInProgress(RealtimeServerEventMCPListToolsInProgress), /// Returned when listing MCP tools has completed for an item. #[serde(rename = "mcp_list_tools.completed")] - MCPListToolsCompleted(MCPListToolsCompletedEvent), + MCPListToolsCompleted(RealtimeServerEventMCPListToolsCompleted), /// Returned when listing MCP tools has failed for an item. #[serde(rename = "mcp_list_tools.failed")] - MCPListToolsFailed(MCPListToolsFailedEvent), + MCPListToolsFailed(RealtimeServerEventMCPListToolsFailed), /// Emitted at the beginning of a Response to indicate the updated rate limits. /// When a Response is created some tokens will be "reserved" for the output tokens, the rate limits /// shown here reflect that reservation, which is then adjusted accordingly once the Response is completed. #[serde(rename = "rate_limits.updated")] - RateLimitsUpdated(RateLimitsUpdatedEvent), + RateLimitsUpdated(RealtimeServerEventRateLimitsUpdated), } From 1fbd6a253d133a81ed5f65c35ee3a2b5a69994a4 Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 17:36:23 -0800 Subject: [PATCH 40/42] match responses stream event names with spec --- .../src/types/responses/response_stream.rs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/async-openai/src/types/responses/response_stream.rs b/async-openai/src/types/responses/response_stream.rs index c6478cae..58bcc82f 100644 --- a/async-openai/src/types/responses/response_stream.rs +++ b/async-openai/src/types/responses/response_stream.rs @@ -44,10 +44,10 @@ pub enum ResponseStreamEvent { ResponseContentPartDone(ResponseContentPartDoneEvent), /// Emitted when there is an additional text delta. #[serde(rename = "response.output_text.delta")] - ResponseOutputTextDelta(ResponseOutputTextDeltaEvent), + ResponseOutputTextDelta(ResponseTextDeltaEvent), /// Emitted when text content is finalized. #[serde(rename = "response.output_text.done")] - ResponseOutputTextDone(ResponseOutputTextDoneEvent), + ResponseOutputTextDone(ResponseTextDoneEvent), /// Emitted when there is a partial refusal text. #[serde(rename = "response.refusal.delta")] ResponseRefusalDelta(ResponseRefusalDeltaEvent), @@ -98,16 +98,16 @@ pub enum ResponseStreamEvent { ResponseReasoningTextDone(ResponseReasoningTextDoneEvent), /// Emitted when an image generation tool call has completed and the final image is available. #[serde(rename = "response.image_generation_call.completed")] - ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompletedEvent), + ResponseImageGenerationCallCompleted(ResponseImageGenCallCompletedEvent), /// Emitted when an image generation tool call is actively generating an image (intermediate state). #[serde(rename = "response.image_generation_call.generating")] - ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGeneratingEvent), + ResponseImageGenerationCallGenerating(ResponseImageGenCallGeneratingEvent), /// Emitted when an image generation tool call is in progress. #[serde(rename = "response.image_generation_call.in_progress")] - ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgressEvent), + ResponseImageGenerationCallInProgress(ResponseImageGenCallInProgressEvent), /// Emitted when a partial image is available during image generation streaming. #[serde(rename = "response.image_generation_call.partial_image")] - ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImageEvent), + ResponseImageGenerationCallPartialImage(ResponseImageGenCallPartialImageEvent), /// Emitted when there is a delta (partial update) to the arguments of an MCP tool call. #[serde(rename = "response.mcp_call_arguments.delta")] ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent), @@ -227,7 +227,7 @@ pub struct ResponseContentPartDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ResponseOutputTextDeltaEvent { +pub struct ResponseTextDeltaEvent { pub sequence_number: u64, pub item_id: String, pub output_index: u32, @@ -238,7 +238,7 @@ pub struct ResponseOutputTextDeltaEvent { } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ResponseOutputTextDoneEvent { +pub struct ResponseTextDoneEvent { pub sequence_number: u64, pub item_id: String, pub output_index: u32, @@ -385,28 +385,28 @@ pub struct ResponseReasoningTextDoneEvent { } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ResponseImageGenerationCallCompletedEvent { +pub struct ResponseImageGenCallCompletedEvent { pub sequence_number: u64, pub output_index: u32, pub item_id: String, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ResponseImageGenerationCallGeneratingEvent { +pub struct ResponseImageGenCallGeneratingEvent { pub sequence_number: u64, pub output_index: u32, pub item_id: String, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ResponseImageGenerationCallInProgressEvent { +pub struct ResponseImageGenCallInProgressEvent { pub sequence_number: u64, pub output_index: u32, pub item_id: String, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ResponseImageGenerationCallPartialImageEvent { +pub struct ResponseImageGenCallPartialImageEvent { pub sequence_number: u64, pub output_index: u32, pub item_id: String, From 66bdbcdad3ccf76977d79f139d03dfa9b437756d Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 17:41:07 -0800 Subject: [PATCH 41/42] reusable type --- async-openai/src/types/mcp.rs | 14 ++++++++++++++ .../src/types/realtime/conversation_item.rs | 2 +- async-openai/src/types/responses/response.rs | 18 +----------------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/async-openai/src/types/mcp.rs b/async-openai/src/types/mcp.rs index 7b76c5fb..fae078a1 100644 --- a/async-openai/src/types/mcp.rs +++ b/async-openai/src/types/mcp.rs @@ -121,3 +121,17 @@ pub struct MCPToolApprovalFilter { #[serde(skip_serializing_if = "Option::is_none")] pub never: Option, } + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MCPListToolsTool { + /// The JSON schema describing the tool's input. + pub input_schema: serde_json::Value, + /// The name of the tool. + pub name: String, + /// Additional annotations about the tool. + #[serde(skip_serializing_if = "Option::is_none")] + pub annotations: Option, + /// The description of the tool. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, +} diff --git a/async-openai/src/types/realtime/conversation_item.rs b/async-openai/src/types/realtime/conversation_item.rs index a1f28893..895e6d42 100644 --- a/async-openai/src/types/realtime/conversation_item.rs +++ b/async-openai/src/types/realtime/conversation_item.rs @@ -1,8 +1,8 @@ use serde::{Deserialize, Serialize}; use crate::types::{ + mcp::MCPListToolsTool, realtime::{ErrorCodeMessage, ErrorMessage}, - responses::MCPListToolsTool, }; #[derive(Debug, Serialize, Deserialize, Clone)] diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs index 98cae0b3..43e3e84e 100644 --- a/async-openai/src/types/responses/response.rs +++ b/async-openai/src/types/responses/response.rs @@ -1,14 +1,12 @@ use crate::error::OpenAIError; -use crate::types::MCPTool; pub use crate::types::{ CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort, ResponseFormatJsonSchema, }; +use crate::types::{MCPListToolsTool, MCPTool}; use derive_builder::Builder; -use futures::Stream; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use std::pin::Pin; /// Role of messages in the API. #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] @@ -2107,20 +2105,6 @@ pub struct MCPListTools { pub error: Option, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct MCPListToolsTool { - /// The JSON schema describing the tool's input. - pub input_schema: serde_json::Value, - /// The name of the tool. - pub name: String, - /// Additional annotations about the tool. - #[serde(skip_serializing_if = "Option::is_none")] - pub annotations: Option, - /// The description of the tool. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, -} - #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct MCPApprovalRequest { /// JSON string of arguments for the tool. From cc205a9dd2f249cbc4f842a18485c8db9bb776ab Mon Sep 17 00:00:00 2001 From: Himanshu Neema Date: Tue, 4 Nov 2025 17:55:18 -0800 Subject: [PATCH 42/42] updated readme --- async-openai/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/async-openai/README.md b/async-openai/README.md index 9b1fdcab..28271272 100644 --- a/async-openai/README.md +++ b/async-openai/README.md @@ -35,7 +35,7 @@ - [x] Models - [x] Moderations - [x] Organizations | Administration (partially implemented) - - [x] Realtime (Beta) (partially implemented) + - [x] Realtime GA (partially implemented) - [x] Responses (partially implemented) - [x] Uploads - [x] Videos @@ -65,7 +65,6 @@ $Env:OPENAI_API_KEY='sk-...' ## Realtime API Only types for Realtime API are implemented, and can be enabled with feature flag `realtime`. -These types were written before OpenAI released official specs. ## Image Generation Example @@ -179,8 +178,6 @@ To maintain quality of the project, a minimum of the following is a must for cod This project adheres to [Rust Code of Conduct](https://www.rust-lang.org/policies/code-of-conduct) ## Complimentary Crates - -- [openai-func-enums](https://github.com/frankfralick/openai-func-enums) provides procedural macros that make it easier to use this library with OpenAI API's tool calling feature. It also provides derive macros you can add to existing [clap](https://github.com/clap-rs/clap) application subcommands for natural language use of command line tools. It also supports openai's [parallel tool calls](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) and allows you to choose between running multiple tool calls concurrently or own their own OS threads. - [async-openai-wasm](https://github.com/ifsheldon/async-openai-wasm) provides WASM support. ## License