From 4c6445b2dcfca9aed3659ff47e515c2b2d363406 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 21 Oct 2025 17:53:36 -0700
Subject: [PATCH 01/42] updated client events

---
 .../src/types/realtime/client_event.rs        | 139 ++++++++++++++++--
 1 file changed, 125 insertions(+), 14 deletions(-)
diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs
index 87ff7010..7a49e836 100644
--- a/async-openai/src/types/realtime/client_event.rs
+++ b/async-openai/src/types/realtime/client_event.rs
@@ -6,9 +6,12 @@ use super::{item::Item, session_resource::SessionResource};
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
 pub struct SessionUpdateEvent {
     /// Optional client-generated ID used to identify this event.
+    /// This is an arbitrary string that a client may assign. It will be passed
+    /// back if there is an error with the event, but the corresponding
+    /// `session.updated` event will not include it.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
-    /// Session configuration to update.
+    /// Update the Realtime session. Choose either a realtime session or a transcription session.
     pub session: SessionResource,
 }
 
@@ -17,7 +20,8 @@ pub struct InputAudioBufferAppendEvent {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
-    /// Base64-encoded audio bytes.
+    /// Base64-encoded audio bytes. This must be in the format specified by
+    /// the `input_audio_format` field in the session configuration.
     pub audio: String,
 }
 
@@ -42,26 +46,41 @@ pub struct ConversationItemCreateEvent {
     pub event_id: Option<String>,
 
     /// The ID of the preceding item after which the new item will be inserted.
+    /// If not set, the new item will be appended to the end of the conversation.
+    /// If set to `root`, the new item will be added to the beginning of the conversation.
+    /// If set to an existing ID, it allows an item to be inserted mid-conversation.
+    /// If the ID cannot be found, an error will be returned and the item will not be added.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub previous_item_id: Option<String>,
 
-    /// The item to add to the conversation.
+    /// A single item within a Realtime conversation.
     pub item: Item,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct ConversationItemRetrieveEvent {
+    /// Optional client-generated ID used to identify this event.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub event_id: Option<String>,
+
+    /// The ID of the item to retrieve.
+    pub item_id: String,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
 pub struct ConversationItemTruncateEvent {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
 
-    /// The ID of the assistant message item to truncate.
+    /// The ID of the assistant message item to truncate. Only assistant message items can be truncated.
     pub item_id: String,
 
-    /// The index of the content part to truncate.
+    /// The index of the content part to truncate. Set this to `0`.
     pub content_index: u32,
 
     /// Inclusive duration up to which audio is truncated, in milliseconds.
+    /// If the audio_end_ms is greater than the actual audio duration, the server will respond with an error.
     pub audio_end_ms: u32,
 }
 
@@ -81,7 +100,7 @@ pub struct ResponseCreateEvent {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
 
-    /// Configuration for the response.
+    /// Create a new Realtime response with these parameters
     pub response: Option<SessionResource>,
 }
 
@@ -90,47 +109,127 @@ pub struct ResponseCancelEvent {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
+
+    /// A specific response ID to cancel - if not provided, will cancel an
+    /// in-progress response in the default conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct OutputAudioBufferClearEvent {
+    /// Optional client-generated ID used to identify this event.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub event_id: Option<String>,
 }
 
 /// These are events that the OpenAI Realtime WebSocket server will accept from the client.
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type")]
 pub enum ClientEvent {
-    /// Send this event to update the session’s default configuration.
+    /// Send this event to update the session's configuration. The client may send this event at any time to update any field
+    /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
+    ///
+    /// When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective
+    /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`,
+    /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`.
     #[serde(rename = "session.update")]
     SessionUpdate(SessionUpdateEvent),
 
-    /// Send this event to append audio bytes to the input audio buffer.
+    /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit.
+    /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio
+    /// transcription (if enabled) will be generated when the buffer is committed.
+    ///
+    /// If VAD is enabled the audio buffer is used to detect speech and the server will decide when to commit. When Server VAD is disabled,
+    /// you must commit the audio buffer manually. Input audio noise reduction operates on writes to the audio buffer.
+    ///
+    /// The client may choose how much audio to place in each event up to a maximum of 15 MiB, for example streaming smaller chunks from the
+    /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to
+    /// this event.
     #[serde(rename = "input_audio_buffer.append")]
     InputAudioBufferAppend(InputAudioBufferAppendEvent),
 
-    /// Send this event to commit audio bytes to a user message.
+    /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation.
+    /// This event will produce an error if the input audio buffer is empty.
+    /// When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
+    /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model.
+    /// The server will respond with an input_audio_buffer.committed event.
     #[serde(rename = "input_audio_buffer.commit")]
     InputAudioBufferCommit(InputAudioBufferCommitEvent),
 
     /// Send this event to clear the audio bytes in the buffer.
+    /// The server will respond with an `input_audio_buffer.cleared` event.
     #[serde(rename = "input_audio_buffer.clear")]
     InputAudioBufferClear(InputAudioBufferClearEvent),
 
-    /// Send this event when adding an item to the conversation.
+    /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses.
+    /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream,
+    /// but has the current limitation that it cannot populate assistant audio messages.
+    ///
+    /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent.
     #[serde(rename = "conversation.item.create")]
     ConversationItemCreate(ConversationItemCreateEvent),
 
-    /// Send this event when you want to truncate a previous assistant message’s audio.
+    /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history.
+    /// This is useful, for example, to inspect user audio after noise cancellation and VAD.
+    /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history,
+    /// in which case the server will respond with an error.
+    #[serde(rename = "conversation.item.retrieve")]
+    ConversationItemRetrieve(ConversationItemRetrieveEvent),
+
+    /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime,
+    /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not
+    /// yet played. This will synchronize the server's understanding of the audio with the client's playback.
+    ///
+    /// Truncating audio will delete the server-side text transcript to ensure there is not text in the context that hasn't
+    /// been heard by the user.
+    ///
+    /// If successful, the server will respond with a `conversation.item.truncated` event.
     #[serde(rename = "conversation.item.truncate")]
     ConversationItemTruncate(ConversationItemTruncateEvent),
 
-    /// Send this event when you want to remove any item from the conversation history.
+    /// Send this event when you want to remove any item from the conversation history. The server will respond with a
+    /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the
+    /// server will respond with an error.
     #[serde(rename = "conversation.item.delete")]
     ConversationItemDelete(ConversationItemDeleteEvent),
 
-    /// Send this event to trigger a response generation.
+    /// This event instructs the server to create a Response, which means triggering model inference.
+    /// When in Server VAD mode, the server will create Responses automatically.
+    ///
+    /// A Response will include at least one Item, and may have two, in which case the second will be a function call.
+    /// These Items will be appended to the conversation history by default.
+    ///
+    /// The server will respond with a `response.created` event, events for Items and content created, and finally a
+    /// `response.done` event to indicate the Response is complete.
+    ///
+    /// The `response.create` event includes inference configuration like `instructions` and `tools`. If these are set, they will
+    /// override the Session's configuration for this Response only.
+    ///
+    /// Responses can be created out-of-band of the default Conversation, meaning that they can have arbitrary input, and
+    /// it's possible to disable writing the output to the Conversation. Only one Response can write to the default
+    /// Conversation at a time, but otherwise multiple Responses can be created in parallel. The `metadata` field is a good
+    /// way to disambiguate multiple simultaneous Responses.
+    ///
+    /// Clients can set `conversation` to `none` to create a Response that does not write to the default Conversation.
+    /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to
+    /// existing Items.
     #[serde(rename = "response.create")]
     ResponseCreate(ResponseCreateEvent),
 
-    /// Send this event to cancel an in-progress response.
+    /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event
+    /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond
+    /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be
+    /// returned the session will remain unaffected.
     #[serde(rename = "response.cancel")]
     ResponseCancel(ResponseCancelEvent),
+
+    /// **WebRTC Only:** Emit to cut off the current audio response.
+    /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event.
+    /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response.
+    /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc)
+    #[serde(rename = "output_audio_buffer.clear")]
+    OutputAudioBufferClear(OutputAudioBufferClearEvent),
 }
 
 impl From<&ClientEvent> for String {
@@ -196,8 +295,18 @@ event_from!(
     ClientEvent,
     ConversationItemDelete
 );
+event_from!(
+    ConversationItemRetrieveEvent,
+    ClientEvent,
+    ConversationItemRetrieve
+);
 event_from!(ResponseCreateEvent, ClientEvent, ResponseCreate);
 event_from!(ResponseCancelEvent, ClientEvent, ResponseCancel);
+event_from!(
+    OutputAudioBufferClearEvent,
+    ClientEvent,
+    OutputAudioBufferClear
+);
 
 message_from_event!(SessionUpdateEvent, ClientEvent);
 message_from_event!(InputAudioBufferAppendEvent, ClientEvent);
@@ -206,8 +315,10 @@ message_from_event!(InputAudioBufferClearEvent, ClientEvent);
 message_from_event!(ConversationItemCreateEvent, ClientEvent);
 message_from_event!(ConversationItemTruncateEvent, ClientEvent);
 message_from_event!(ConversationItemDeleteEvent, ClientEvent);
+message_from_event!(ConversationItemRetrieveEvent, ClientEvent);
 message_from_event!(ResponseCreateEvent, ClientEvent);
 message_from_event!(ResponseCancelEvent, ClientEvent);
+message_from_event!(OutputAudioBufferClearEvent, ClientEvent);
 
 impl From<Item> for ConversationItemCreateEvent {
     fn from(value: Item) -> Self {

From 70214d71cc24b675ac217471eebc77f0b68e903f Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 13:13:19 -0700
Subject: [PATCH 02/42] updated server event

---
 .../src/types/realtime/server_event.rs        | 477 +++++++++++++++---
 1 file changed, 400 insertions(+), 77 deletions(-)

diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 8795f6e4..0d35690a 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -1,8 +1,8 @@
 use serde::{Deserialize, Serialize};
 
 use super::{
-    content_part::ContentPart, conversation::Conversation, error::RealtimeAPIError, item::Item,
-    rate_limit::RateLimit, response_resource::ResponseResource, session_resource::SessionResource,
+    content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit,
+    response_resource::ResponseResource, session_resource::SessionResource,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -30,19 +30,31 @@ pub struct SessionUpdatedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationCreatedEvent {
+pub struct ConversationItemAddedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// The conversation resource.
-    pub conversation: Conversation,
+    /// A single item within a Realtime conversation.
+    pub item: Item,
+    /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted.
+    pub previous_item_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// A single item within a Realtime conversation.
+    pub item: Item,
+    /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted.
+    pub previous_item_id: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct InputAudioBufferCommitedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// The ID of the preceding item after which the new item will be inserted.
-    pub previous_item_id: String,
+    /// The ID of the preceding item after which the new item will be inserted. Can be null if the item has no predecessor.
+    pub previous_item_id: Option<String>,
     /// The ID of the user message item that will be created.
     pub item_id: String,
 }
@@ -57,7 +69,9 @@ pub struct InputAudioBufferClearedEvent {
 pub struct InputAudioBufferSpeechStartedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// Milliseconds since the session started when speech was detected.
+    /// Milliseconds from the start of all audio written to the buffer during the session when speech was
+    /// first detected. This will correspond to the beginning of audio sent to the model, and thus includes
+    /// the `prefix_padding_ms` configured in the Session.
     pub audio_start_ms: u32,
     /// The ID of the user message item that will be created when speech stops.
     pub item_id: String,
@@ -67,20 +81,47 @@ pub struct InputAudioBufferSpeechStartedEvent {
 pub struct InputAudioBufferSpeechStoppedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// Milliseconds since the session started when speech stopped.
+    /// Milliseconds since the session started when speech stopped. This will correspond to the end of
+    /// audio sent to the model, and thus includes the `min_silence_duration_ms` configured in the Session.
     pub audio_end_ms: u32,
     /// The ID of the user message item that will be created.
     pub item_id: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemCreatedEvent {
+pub struct InputAudioBufferTimeoutTriggeredEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// The ID of the preceding item.
-    pub previous_item_id: Option<String>,
-    /// The item that was created.
-    pub item: Item,
+    /// Millisecond offset of audio written to the input audio buffer at the time the timeout was triggered.
+    pub audio_end_ms: u32,
+    /// Millisecond offset of audio written to the input audio buffer that was after the playback time of the last model response.
+    pub audio_start_ms: u32,
+    /// The ID of the item associated with this segment.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct OutputAudioBufferStartedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The unique ID of the response that produced the audio.
+    pub response_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct OutputAudioBufferStoppedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The unique ID of the response that produced the audio.
+    pub response_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct OutputAudioBufferClearedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The unique ID of the response that produced the audio.
+    pub response_id: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -94,11 +135,46 @@ pub struct LogProb {
     pub token: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct InputTokenDetails {
+    /// Number of audio tokens billed for this request.
+    pub audio_tokens: u32,
+    /// Number of text tokens billed for this request.
+    pub text_tokens: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TokenUsage {
+    /// Number of input tokens billed for this request.
+    pub input_tokens: u32,
+    /// Number of output tokens generated.
+    pub output_tokens: u32,
+    /// Total number of tokens used (input + output).
+    pub total_tokens: u32,
+    /// Details about the input tokens billed for this request.
+    pub input_token_details: Option<InputTokenDetails>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct DurationUsage {
+    ///Duration of the input audio in seconds.
+    pub seconds: f32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum Usage {
+    #[serde(rename = "tokens")]
+    TokenUsage(TokenUsage),
+    #[serde(rename = "duration")]
+    DurationUsage(DurationUsage),
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// The ID of the user message item.
+    /// The ID of the item containing the audio that is being transcribed.
     pub item_id: String,
     /// The index of the content part containing the audio.
     pub content_index: u32,
@@ -106,19 +182,26 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
     pub transcript: String,
     /// Optional per-token log probability data.
     pub logprobs: Option<Vec<LogProb>>,
+    /// Usage statistics for the transcription, this is billed according to the ASR model's pricing rather than
+    /// the realtime model's pricing.
+    pub usage: Usage,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ConversationItemInputAudioTranscriptionDeltaEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// The ID of the user message item.
+    /// The ID of the item containing the audio that is being transcribed.
     pub item_id: String,
-    /// The index of the content part containing the audio.
+    ///The index of the content part in the item's content array.
     pub content_index: u32,
     /// The text delta.
     pub delta: String,
-    /// Optional per-token log probability data.
+    /// The log probabilities of the transcription. These can be enabled by configurating the session with
+    /// `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the array
+    /// corresponds a log probability of which token would be selected for this chunk of transcription. This
+    /// can help to identify if it was possible there were multiple valid options for a given chunk of
+    /// transcription.
     pub logprobs: Option<Vec<LogProb>>,
 }
 
@@ -154,6 +237,34 @@ pub struct ConversationItemDeletedEvent {
     pub item_id: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemRetrievedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// A single item within a Realtime conversation.
+    pub item: Item,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemInputAudioTranscriptionSegmentEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the item containing the input audio content.
+    pub item_id: String,
+    /// The index of the input audio content part within the item.
+    pub content_index: u32,
+    /// The text for this segment.
+    pub text: String,
+    /// The segment identifier.
+    pub id: String,
+    /// The detected speaker label for this segment.
+    pub speaker: String,
+    /// Start time of the segment in seconds.
+    pub start: f32,
+    /// End time of the segment in seconds.
+    pub end: f32,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ResponseCreatedEvent {
     /// The unique ID of the server event.
@@ -174,11 +285,11 @@ pub struct ResponseDoneEvent {
 pub struct ResponseOutputItemAddedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    /// The ID of the response to which the item belongs.
+    /// The ID of the Response to which the item belongs.
     pub response_id: String,
-    /// The index of the output item in the response.
+    /// The index of the output item in the Response.
     pub output_index: u32,
-    /// The item that was added.
+    /// A single item within a Realtime conversation.
     pub item: Item,
 }
 
@@ -188,9 +299,9 @@ pub struct ResponseOutputItemDoneEvent {
     pub event_id: String,
     /// The ID of the response to which the item belongs.
     pub response_id: String,
-    /// The index of the output item in the response.
+    /// The index of the output item in the Response.
     pub output_index: u32,
-    /// The completed item.
+    /// A single item within a Realtime conversation.
     pub item: Item,
 }
 
@@ -216,7 +327,7 @@ pub struct ResponseContentPartDoneEvent {
     pub event_id: String,
     /// The ID of the response.
     pub response_id: String,
-    /// The ID of the item to which the content part was added.
+    /// The ID of the item.
     pub item_id: String,
     /// The index of the output item in the response.
     pub output_index: u32,
@@ -227,7 +338,7 @@ pub struct ResponseContentPartDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseTextDeltaEvent {
+pub struct ResponseOutputTextDeltaEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -243,7 +354,7 @@ pub struct ResponseTextDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseTextDoneEvent {
+pub struct ResponseOutputTextDoneEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -259,7 +370,7 @@ pub struct ResponseTextDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseAudioTranscriptDeltaEvent {
+pub struct ResponseOutputAudioTranscriptDeltaEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -275,7 +386,7 @@ pub struct ResponseAudioTranscriptDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseAudioTranscriptDoneEvent {
+pub struct ResponseOutputAudioTranscriptDoneEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -291,7 +402,7 @@ pub struct ResponseAudioTranscriptDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseAudioDeltaEvent {
+pub struct ResponseOutputAudioDeltaEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -307,7 +418,7 @@ pub struct ResponseAudioDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseAudioDoneEvent {
+pub struct ResponseOutputAudioDoneEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -359,78 +470,256 @@ pub struct RateLimitsUpdatedEvent {
     pub rate_limits: Vec<RateLimit>,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct MCPListToolsInProgressEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the MCP list tools item.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct MCPListToolsCompletedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the MCP list tools item.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct MCPListToolsFailedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the MCP list tools item.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseMCPCallArgumentsDeltaEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the MCP tool call item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The JSON-encoded arguments delta.
+    pub delta: String,
+    /// If present, indicates the delta text was obfuscated.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub obfuscation: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseMCPCallArgumentsDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the MCP tool call item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The final JSON-encoded arguments string.
+    pub arguments: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseMCPCallInProgressEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The ID of the MCP tool call item.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseMCPCallCompletedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The ID of the MCP tool call item.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseMCPCallFailedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The ID of the MCP tool call item.
+    pub item_id: String,
+}
+
 /// These are events emitted from the OpenAI Realtime WebSocket server to the client.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
 pub enum ServerEvent {
-    /// Returned when an error occurs.
+    /// Returned when an error occurs, which could be a client problem or a server problem.
+    /// Most errors are recoverable and the session will stay open, we recommend to
+    /// implementors to monitor and log error messages by default.
     #[serde(rename = "error")]
     Error(ErrorEvent),
 
-    /// Returned when a session is created. Emitted automatically when a new connection is established.
+    /// Returned when a Session is created. Emitted automatically when a new connection is established as the first server event.
+    /// This event will contain the default Session configuration.
     #[serde(rename = "session.created")]
     SessionCreated(SessionCreatedEvent),
 
-    /// Returned when a session is updated.
+    /// Returned when a session is updated with a `session.update` event, unless there is an error.
     #[serde(rename = "session.updated")]
     SessionUpdated(SessionUpdatedEvent),
 
-    /// Returned when a conversation is created. Emitted right after session creation.
-    #[serde(rename = "conversation.created")]
-    ConversationCreated(ConversationCreatedEvent),
-
-    /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode.
-    #[serde(rename = "input_audio_buffer.committed")]
-    InputAudioBufferCommited(InputAudioBufferCommitedEvent),
-
-    /// Returned when the input audio buffer is cleared by the client.
-    #[serde(rename = "input_audio_buffer.cleared")]
-    InputAudioBufferCleared(InputAudioBufferClearedEvent),
-
-    /// Returned in server turn detection mode when speech is detected.
-    #[serde(rename = "input_audio_buffer.speech_started")]
-    InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent),
-
-    /// Returned in server turn detection mode when speech stops.
-    #[serde(rename = "input_audio_buffer.speech_stopped")]
-    InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent),
-
-    /// Returned when a conversation item is created.
-    #[serde(rename = "conversation.item.created")]
-    ConversationItemCreated(ConversationItemCreatedEvent),
-
-    /// Returned when input audio transcription is enabled and a transcription succeeds.
+    /// Sent by the server when an Item is added to the default Conversation. This can happen in several cases:
+    /// - When the client sends a conversation.item.create event
+    /// - When the input audio buffer is committed. In this case the item will be a user message containing the audio from the buffer.
+    /// - When the model is generating a Response. In this case the `conversation.item.added` event will be sent when the model starts
+    ///   generating a specific Item, and thus it will not yet have any content (and `status` will be `in_progress`).
+    ///
+    /// The event will include the full content of the Item (except when model is generating a Response) except for audio data,
+    /// which can be retrieved separately with a `conversation.item.retrieve` event if necessary.
+    #[serde(rename = "conversation.item.added")]
+    ConversationItemAdded(ConversationItemAddedEvent),
+
+    /// Returned when a conversation item is finalized.
+    ///
+    /// The event will include the full content of the Item except for audio data, which can be retrieved
+    /// separately with a `conversation.item.retrieve` event if needed.
+    #[serde(rename = "conversation.item.done")]
+    ConversationItemDone(ConversationItemDoneEvent),
+
+    /// Returned when a conversation item is retrieved with `conversation.item.retrieve`.
+    /// This is provided as a way to fetch the server's representation of an item, for example to get access
+    /// to the post-processed audio data after noise cancellation and VAD.
+    /// It includes the full content of the Item, including audio data.
+    #[serde(rename = "conversation.item.retrieved")]
+    ConversationItemRetrieved(ConversationItemRetrievedEvent),
+
+    /// This event is the output of audio transcription for user audio written to the user audio
+    /// buffer. Transcription begins when the input audio buffer is committed by the client or
+    /// server (when VAD is enabled). Transcription runs asynchronously with Response
+    /// creation, so this event may come before or after the Response events.
+    ///
+    /// Realtime API models accept audio natively, and thus input transcription is a separate process
+    /// run on a separate ASR (Automatic Speech Recognition) model. The transcript
+    /// may diverge somewhat from the model's interpretation, and should be treated as a rough guide.
     #[serde(rename = "conversation.item.input_audio_transcription.completed")]
     ConversationItemInputAudioTranscriptionCompleted(
         ConversationItemInputAudioTranscriptionCompletedEvent,
     ),
 
+    /// Returned when the text value of an input audio transcription content part is updated with incremental transcription results.
     #[serde(rename = "conversation.item.input_audio_transcription.delta")]
     ConversationItemInputAudioTranscriptionDelta(ConversationItemInputAudioTranscriptionDeltaEvent),
 
+    /// Returned when an input audio transcription segment is identified for an item.
+    #[serde(rename = "conversation.item.input_audio_transcription.segment")]
+    ConversationItemInputAudioTranscriptionSegment(
+        ConversationItemInputAudioTranscriptionSegmentEvent,
+    ),
+
     /// Returned when input audio transcription is configured, and a transcription request for a user message failed.
+    /// These events are separate from other `error` events so that the client can identify the related Item.
     #[serde(rename = "conversation.item.input_audio_transcription.failed")]
     ConversationItemInputAudioTranscriptionFailed(
         ConversationItemInputAudioTranscriptionFailedEvent,
     ),
 
-    /// Returned when an earlier assistant audio message item is truncated by the client.
+    /// Returned when an earlier assistant audio message item is truncated by the client with a `conversation.item.truncate` event.
+    /// This event is used to synchronize the server's understanding of the audio with the client's playback.
+    ///
+    /// This action will truncate the audio and remove the server-side text transcript to ensure there is no text in the
+    /// context that hasn't been heard by the user.
     #[serde(rename = "conversation.item.truncated")]
     ConversationItemTruncated(ConversationItemTruncatedEvent),
 
-    /// Returned when an item in the conversation is deleted.
+    /// Returned when an item in the conversation is deleted by the client with a `conversation.item.delete` event.
+    /// This event is used to synchronize the server's understanding of the conversation history with the client's view.
     #[serde(rename = "conversation.item.deleted")]
     ConversationItemDeleted(ConversationItemDeletedEvent),
 
-    /// Returned when a new Response is created. The first event of response creation, where the response is in an initial state of "in_progress".
+    /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode.
+    /// The `item_id` property is the ID of the user message item that will be created,
+    /// thus a `conversation.item.created` event will also be sent to the client.
+    #[serde(rename = "input_audio_buffer.committed")]
+    InputAudioBufferCommited(InputAudioBufferCommitedEvent),
+
+    /// Returned when the input audio buffer is cleared by the client with a `input_audio_buffer.clear` event.
+    #[serde(rename = "input_audio_buffer.cleared")]
+    InputAudioBufferCleared(InputAudioBufferClearedEvent),
+
+    /// Sent by the server when in `server_vad` mode to indicate that speech has been detected in the audio buffer.
+    /// This can happen any time audio is added to the buffer (unless speech is already detected).
+    /// The client may want to use this event to interrupt audio playback or provide visual feedback to the user.
+    ///
+    /// The client should expect to receive a `input_audio_buffer.speech_stopped` event when speech stops.
+    /// The `item_id` property is the ID of the user message item that will be created when speech stops and will
+    /// also be included in the `input_audio_buffer.speech_stopped` event (unless the client manually commits the
+    ///  audio buffer during VAD activation).
+    #[serde(rename = "input_audio_buffer.speech_started")]
+    InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent),
+
+    /// Returned in `server_vad` mode when the server detects the end of speech in the audio buffer.
+    /// The server will also send a `conversation.item.created` event with the user message item that is created from the audio buffer.
+    #[serde(rename = "input_audio_buffer.speech_stopped")]
+    InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent),
+
+    /// Returned when the Server VAD timeout is triggered for the input audio buffer. This is
+    /// configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and
+    /// it indicates that there hasn't been any speech detected for the configured duration.
+    ///
+    /// The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the
+    /// last model response up to the triggering time, as an offset from the beginning of audio
+    /// written to the input audio buffer. This means it demarcates the segment of audio that
+    /// was silent and the difference between the start and end values will roughly match the configured timeout.
+    ///
+    /// The empty audio will be committed to the conversation as an `input_audio` item (there
+    /// will be a `input_audio_buffer.committed` event) and a model response will be generated.
+    /// There may be speech that didn't trigger VAD but is still detected by the model, so the model may respond
+    /// with something relevant to the conversation or a prompt to continue speaking.
+    #[serde(rename = "input_audio_buffer.timeout_triggered")]
+    InputAudioBufferTimeoutTriggered(InputAudioBufferTimeoutTriggeredEvent),
+
+    /// *WebRTC Only*: Emitted when the server begins streaming audio to the client. This
+    /// event is emitted after an audio content part has been added (`response.content_part.added`) to the response.
+    /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    #[serde(rename = "output_audio_buffer.started")]
+    OutputAudioBufferStarted(OutputAudioBufferStartedEvent),
+
+    /// *WebRTC Only*: Emitted when the output audio buffer has been completely drained on
+    /// the server, and no more audio is forthcoming. This event is emitted after the full response data has been sent
+    /// to the client (`response.done`). [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    #[serde(rename = "output_audio_buffer.stopped")]
+    OutputAudioBufferStopped(OutputAudioBufferStoppedEvent),
+
+    /// *WebRTC Only*: Emitted when the output audio buffer is cleared. This happens either in
+    /// VAD mode when the user has interrupted (`input_audio_buffer.speech_started`), or when the client has
+    /// emitted the `output_audio_buffer.clear` event to manually cut off the current audio response.
+    /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    #[serde(rename = "output_audio_buffer.cleared")]
+    OutputAudioBufferCleared(OutputAudioBufferClearedEvent),
+
+    /// Returned when a new Response is created. The first event of response creation,
+    /// where the response is in an initial state of `in_progress`.
     #[serde(rename = "response.created")]
     ResponseCreated(ResponseCreatedEvent),
 
     /// Returned when a Response is done streaming. Always emitted, no matter the final state.
+    /// The Response object included in the `response.done` event will include all output Items in the Response
+    /// but will omit the raw audio data.
+    ///
+    /// Clients should check the `status` field of the Response to determine if it was successful
+    /// (`completed`) or if there was another outcome: `cancelled`, `failed`, or `incomplete`.
+    ///
+    /// A response will contain all output items that were generated during the response, excluding any audio content.
     #[serde(rename = "response.done")]
     ResponseDone(ResponseDoneEvent),
 
-    /// Returned when a new Item is created during response generation.
+    /// Returned when a new Item is created during Response generation.
     #[serde(rename = "response.output_item.added")]
     ResponseOutputItemAdded(ResponseOutputItemAddedEvent),
 
@@ -447,32 +736,32 @@ pub enum ServerEvent {
     #[serde(rename = "response.content_part.done")]
     ResponseContentPartDone(ResponseContentPartDoneEvent),
 
-    /// Returned when the text value of a "text" content part is updated.
-    #[serde(rename = "response.text.delta")]
-    ResponseTextDelta(ResponseTextDeltaEvent),
+    /// Returned when the text value of an "output_text" content part is updated.
+    #[serde(rename = "response.output_text.delta")]
+    ResponseOutputTextDelta(ResponseOutputTextDeltaEvent),
 
-    /// Returned when the text value of a "text" content part is done streaming.
+    /// Returned when the text value of an "output_text" content part is done streaming.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
-    #[serde(rename = "response.text.done")]
-    ResponseTextDone(ResponseTextDoneEvent),
+    #[serde(rename = "response.output_text.done")]
+    ResponseOutputTextDone(ResponseOutputTextDoneEvent),
 
     /// Returned when the model-generated transcription of audio output is updated.
-    #[serde(rename = "response.audio_transcript.delta")]
-    ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent),
+    #[serde(rename = "response.output_audio_transcript.delta")]
+    ResponseOutputAudioTranscriptDelta(ResponseOutputAudioTranscriptDeltaEvent),
 
     /// Returned when the model-generated transcription of audio output is done streaming.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
-    #[serde(rename = "response.audio_transcript.done")]
-    ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent),
+    #[serde(rename = "response.output_audio_transcript.done")]
+    ResponseOutputAudioTranscriptDone(ResponseOutputAudioTranscriptDoneEvent),
 
     /// Returned when the model-generated audio is updated.
-    #[serde(rename = "response.audio.delta")]
-    ResponseAudioDelta(ResponseAudioDeltaEvent),
+    #[serde(rename = "response.output_audio.delta")]
+    ResponseOutputAudioDelta(ResponseOutputAudioDeltaEvent),
 
     /// Returned when the model-generated audio is done.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
-    #[serde(rename = "response.audio.done")]
-    ResponseAudioDone(ResponseAudioDoneEvent),
+    #[serde(rename = "response.output_audio.done")]
+    ResponseOutputAudioDone(ResponseOutputAudioDoneEvent),
 
     /// Returned when the model-generated function call arguments are updated.
     #[serde(rename = "response.function_call_arguments.delta")]
@@ -483,7 +772,41 @@ pub enum ServerEvent {
     #[serde(rename = "response.function_call_arguments.done")]
     ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
 
-    /// Emitted after every "response.done" event to indicate the updated rate limits.
+    /// Returned when MCP tool call arguments are updated.
+    #[serde(rename = "response.mcp_call_arguments.delta")]
+    ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent),
+
+    /// Returned when MCP tool call arguments are finalized during response generation.
+    #[serde(rename = "response.mcp_call_arguments.done")]
+    ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent),
+
+    /// Returned when an MCP tool call is in progress.
+    #[serde(rename = "response.mcp_call.in_progress")]
+    ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent),
+
+    /// Returned when an MCP tool call has completed successfully.
+    #[serde(rename = "response.mcp_call.completed")]
+    ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent),
+
+    /// Returned when an MCP tool call has failed.
+    #[serde(rename = "response.mcp_call.failed")]
+    ResponseMCPCallFailed(ResponseMCPCallFailedEvent),
+
+    /// Returned when listing MCP tools is in progress for an item.
+    #[serde(rename = "mcp_list_tools.in_progress")]
+    MCPListToolsInProgress(MCPListToolsInProgressEvent),
+
+    /// Returned when listing MCP tools has completed for an item.
+    #[serde(rename = "mcp_list_tools.completed")]
+    MCPListToolsCompleted(MCPListToolsCompletedEvent),
+
+    /// Returned when listing MCP tools has failed for an item.
+    #[serde(rename = "mcp_list_tools.failed")]
+    MCPListToolsFailed(MCPListToolsFailedEvent),
+
+    /// Emitted at the beginning of a Response to indicate the updated rate limits.
+    /// When a Response is created some tokens will be "reserved" for the output tokens, the rate limits
+    /// shown here reflect that reservation, which is then adjusted accordingly once the Response is completed.
     #[serde(rename = "rate_limits.updated")]
     RateLimitsUpdated(RateLimitsUpdatedEvent),
 }

From 26542d05d387592a4e643fde565498d80735e08b Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 13:13:30 -0700
Subject: [PATCH 03/42] updated rate limit

---
 async-openai/src/types/realtime/rate_limit.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/async-openai/src/types/realtime/rate_limit.rs b/async-openai/src/types/realtime/rate_limit.rs
index f3fc4aa6..9306e236 100644
--- a/async-openai/src/types/realtime/rate_limit.rs
+++ b/async-openai/src/types/realtime/rate_limit.rs
@@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct RateLimit {
-    /// The name of the rate limit ("requests", "tokens", "input_tokens", "output_tokens").
+    /// The name of the rate limit (requests, tokens).
     pub name: String,
     /// The maximum allowed value for the rate limit.
     pub limit: u32,

From 6cfc72c6955f7e3cd6e945c6ee9fc12293608f83 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 16:18:54 -0700
Subject: [PATCH 04/42] updated session configuration

---
 .../src/types/realtime/session_resource.rs    | 448 +++++++++++++++---
 1 file changed, 378 insertions(+), 70 deletions(-)

diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index 2fe1e5b1..9a4fb9a1 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -1,26 +1,22 @@
 use serde::{Deserialize, Serialize};
 
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub enum AudioFormat {
-    #[serde(rename = "pcm16")]
-    PCM16,
-    #[serde(rename = "g711_law")]
-    G711ULAW,
-    #[serde(rename = "g711_alaw")]
-    G711ALAW,
-}
+use crate::types::responses::RequireApproval;
 
 #[derive(Debug, Default, Serialize, Deserialize, Clone)]
 pub struct AudioTranscription {
-    /// The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency.
+    /// The language of the input audio. Supplying the input language in
+    /// [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format will improve accuracy and latency.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub language: Option<String>,
-    /// The model to use for transcription, current options are gpt-4o-transcribe, gpt-4o-mini-transcribe, and whisper-1.
+    /// The model to use for transcription. Current options are `whisper-1`,
+    /// `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`.
+    /// Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub model: Option<String>,
     /// An optional text to guide the model's style or continue a previous audio segment.
-    /// For whisper-1, the prompt is a list of keywords. For gpt-4o-transcribe models,
-    /// the prompt is a free text string, for example "expect words related to technology".
+    /// For `whisper-1`, the [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). For `gpt-4o-transcribe` models
+    /// (excluding gpt-4o-transcribe-diarize), the prompt is a free text string, for example
+    /// "expect words related to technology".
     #[serde(skip_serializing_if = "Option::is_none")]
     pub prompt: Option<String>,
 }
@@ -28,64 +24,157 @@ pub struct AudioTranscription {
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
 pub enum TurnDetection {
-    /// Type of turn detection, only "server_vad" is currently supported.
+    /// Server-side voice activity detection (VAD) which flips on when user speech is detected
+    /// and off after a period of silence.
     #[serde(rename = "server_vad")]
     ServerVAD {
-        /// Activation threshold for VAD (0.0 to 1.0).
-        threshold: f32,
-        /// Amount of audio to include before speech starts (in milliseconds).
-        prefix_padding_ms: u32,
-        /// Duration of silence to detect speech stop (in milliseconds).
-        silence_duration_ms: u32,
-
         /// Whether or not to automatically generate a response when a VAD stop event occurs.
         #[serde(skip_serializing_if = "Option::is_none")]
         create_response: Option<bool>,
 
+        /// Optional timeout after which a model response will be triggered automatically.
+        /// This is useful for situations in which a long pause from the user is unexpected,
+        /// such as a phone call. The model will effectively prompt the user to continue the
+        /// conversation based on the current context.
+        ///
+        /// The timeout value will be applied after the last model response's audio has finished
+        /// playing, i.e. it's set to the response.done time plus audio playback duration.
+        ///
+        /// An input_audio_buffer.timeout_triggered event (plus events associated with the Response)
+        ///  will be emitted when the timeout is reached. Idle timeout is currently only supported
+        /// for server_vad mode.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        idle_timeout_ms: Option<u32>,
+
         /// Whether or not to automatically interrupt any ongoing response with output to
-        /// the default conversation (i.e. conversation of auto) when a VAD start event occurs.
+        /// the default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
         #[serde(skip_serializing_if = "Option::is_none")]
         interrupt_response: Option<bool>,
+
+        /// Used only for server_vad mode. Amount of audio to include before the VAD detected speech
+        /// (in milliseconds). Defaults to 300ms.
+        prefix_padding_ms: u32,
+        /// Used only for server_vad mode. Duration of silence to detect speech stop
+        /// (in milliseconds). Defaults to 500ms. With shorter values the model will respond
+        ///  more quickly, but may jump in on short pauses from the user.
+        silence_duration_ms: u32,
+
+        /// Used only for server_vad mode. Activation threshold for VAD (0.0 to 1.0),
+        /// this defaults to 0.5. A higher threshold will require louder audio to activate
+        /// the model, and thus might perform better in noisy environments.
+        threshold: f32,
     },
 
+    /// Server-side semantic turn detection which uses a model to determine when the user has
+    ///  finished speaking.
     #[serde(rename = "semantic_vad")]
     SemanticVAD {
-        /// The eagerness of the model to respond.
-        /// `low` will wait longer for the user to continue speaking,
-        /// `high`` will respond more quickly. `auto`` is the default and is equivalent to `medium`
-        eagerness: String,
-
         /// Whether or not to automatically generate a response when a VAD stop event occurs.
         #[serde(skip_serializing_if = "Option::is_none", default)]
         create_response: Option<bool>,
 
+        /// Used only for `semantic_vad` mode. The eagerness of the model to respond.
+        /// `low` will wait longer for the user to continue speaking, `high` will respond more
+        /// quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high`
+        /// have max timeouts of 8s, 4s, and 2s respectively.
+        eagerness: String,
+
         /// Whether or not to automatically interrupt any ongoing response with output to
-        /// the default conversation (i.e. conversation of auto) when a VAD start event occurs.
+        /// the default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
         #[serde(skip_serializing_if = "Option::is_none", default)]
         interrupt_response: Option<bool>,
     },
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub enum MaxResponseOutputTokens {
+pub enum MaxOutputTokens {
     #[serde(rename = "inf")]
     Inf,
     #[serde(untagged)]
     Num(u16),
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FunctionTool {
+    /// The name of the function.
+    pub name: String,
+    /// The description of the function, including guidance on when and how to call it,
+    /// and guidance about what to tell the user when calling (if anything).
+    pub description: String,
+    /// Parameters of the function in JSON Schema.
+    pub parameters: serde_json::Value,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(untagged)]
+pub enum AllowedTools {
+    /// A string array of allowed tool names
+    List(Vec<String>),
+    /// A filter object to specify which tools are allowed.
+    Filter(MCPAllowedToolsFilter),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct MCPAllowedToolsFilter {
+    /// Indicates whether or not a tool modifies data or is read-only.
+    /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    /// it will match this filter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub read_only: Option<bool>,
+    /// List of allowed tool names.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_names: Option<Vec<String>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct MCPTool {
+    /// A label for this MCP server, used to identify it in tool calls.
+    pub server_label: String,
+
+    /// List of allowed tool names or a filter object.
+    pub allowed_tools: AllowedTools,
+
+    /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP
+    /// server URL or a service connector. Your application must handle the OAuth authorization
+    /// flow and provide the token here.
+    pub authorization: Option<String>,
+
+    /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or
+    /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+    ///
+    /// Currently supported `connector_id` values are:
+    /// - Dropbox: `connector_dropbox`
+    /// - Gmail: `connector_gmail`
+    /// - Google Calendar: `connector_googlecalendar`
+    /// - Google Drive: `connector_googledrive`
+    /// - Microsoft Teams: `connector_microsoftteams`
+    /// - Outlook Calendar: `connector_outlookcalendar`
+    /// - Outlook Email: `connector_outlookemail`
+    /// - SharePoint: `connector_sharepoint`
+    pub connector_id: Option<String>,
+
+    /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes.
+    pub headers: Option<serde_json::Value>,
+
+    /// Specify which of the MCP server's tools require approval.
+    pub require_approval: Option<RequireApproval>,
+
+    /// Optional description of the MCP server, used to provide more context.
+    pub server_description: Option<String>,
+
+    /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided.
+    pub server_url: Option<String>,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
 pub enum ToolDefinition {
     #[serde(rename = "function")]
-    Function {
-        /// The name of the function.
-        name: String,
-        /// The description of the function.
-        description: String,
-        /// Parameters of the function in JSON Schema.
-        parameters: serde_json::Value,
-    },
+    Function(FunctionTool),
+    /// Give the model access to additional tools via remote Model Context Protocol (MCP) servers.
+    /// [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    #[serde(rename = "mcp")]
+    MCP(MCPTool),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -94,17 +183,36 @@ pub enum FunctionType {
     Function,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum Tool {
+    /// Use this option to force the model to call a specific function.
+    #[serde(rename = "function")]
+    Function {
+        /// The name of the function to call.
+        name: String,
+    },
+    /// Use this option to force the model to call a specific tool on a remote MCP server.
+    #[serde(rename = "mcp")]
+    MCP {
+        /// The name of the tool to call on the server.
+        name: String,
+        /// The label of the MCP server to use.
+        server_label: String,
+    },
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "lowercase")]
 pub enum ToolChoice {
+    /// `auto` means the model can pick between generating a message or calling one or more tools.
     Auto,
+    /// `none` means the model will not call any tool and instead generates a message.
     None,
+    /// `required` means the model must call one or more tools.
     Required,
     #[serde(untagged)]
-    Function {
-        r#type: FunctionType,
-        name: String,
-    },
+    Tool(Tool),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -115,62 +223,262 @@ pub enum RealtimeVoice {
     Ballad,
     Coral,
     Echo,
-    Fable,
-    Onyx,
-    Nova,
+    Sage,
     Shimmer,
     Verse,
+    Marin,
+    Cedar,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum AudioFormat {
+    /// The PCM audio format. Only a 24kHz sample rate is supported.
+    #[serde(rename = "audio/pcm")]
+    PCMAudioFormat {
+        /// The sample rate of the audio. Always 24000.
+        rate: u32,
+    },
+    /// The G.711 μ-law format.
+    #[serde(rename = "audio/pcmu")]
+    PCMUAudioFormat,
+    /// The G.711 A-law format.
+    #[serde(rename = "audio/pcma")]
+    PCMAAudioFormat,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct SessionResource {
-    /// The default model used for this session.
+pub struct G711ULAWAudioFormat {
+    pub sample_rate: u32,
+    pub channels: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct AudioInput {
+    /// The format of the input audio.
+    pub format: AudioFormat,
+    /// Configuration for input audio noise reduction. This can be set to null to turn off.
+    /// Noise reduction filters audio added to the input audio buffer before it is sent to VAD
+    /// and the model. Filtering the audio can improve VAD and turn detection accuracy
+    /// (reducing false positives) and model performance by improving perception of the
+    /// input audio.
+    pub noise_reduction: NoiseReduction,
+    /// Configuration for input audio transcription, defaults to off and can be set to `null` to turn off once on.
+    /// Input audio transcription is not native to the model, since the model consumes audio directly.
+    /// Transcription runs asynchronously through [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    /// and should be treated as guidance of input audio content rather than precisely what the model
+    /// heard. The client can optionally set the language and prompt for transcription,
+    /// these offer additional guidance to the transcription service.
+    pub transcription: AudioTranscription,
+
+    /// Configuration for turn detection, ether Server VAD or Semantic VAD. This can
+    /// be set to null to turn off, in which case the client must manually trigger model response.
+    ///
+    ///  Server VAD means that the model will detect the start and end of speech
+    /// based on audio volume and respond at the end of user speech.
+    ///
+    /// Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD)
+    /// to semantically estimate whether the user has finished speaking, then dynamically sets
+    /// a timeout based on this probability. For example, if user audio trails off with "uhhm",
+    /// the model will score a low probability of turn end and wait longer for the user to
+    /// continue speaking. This can be useful for more natural conversations, but may have a
+    /// higher latency.    
+    pub turn_detection: TurnDetection,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct AudioOutput {
+    /// The format of the output audio.
+    pub format: AudioFormat,
+    /// The speed of the model's spoken response as a multiple of the original speed.
+    /// 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    /// This value can only be changed in between model turns, not while a response
+    /// is in progress.
+    ///
+    /// This parameter is a post-processing adjustment to the audio after it is generated,
+    /// it's also possible to prompt the model to speak faster or slower.
+    pub speed: f32,
+    /// The voice the model uses to respond. Voice cannot be changed during the session once
+    /// the model has responded with audio at least once. Current voice options are
+    /// `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`.
+    /// We recommend `marin` and `cedar` for best quality.
+    pub voice: RealtimeVoice,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Audio {
+    pub input: AudioInput,
+    pub output: AudioOutput,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Prompt {
+    /// The unique identifier of the prompt template to use.
+    pub id: String,
+    /// Optional map of values to substitute in for variables in your prompt. The substitution
+    /// values can either be strings, or other Response input types like images or files.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<String>,
+    pub variables: Option<serde_json::Value>,
+    /// Optional version of the prompt template.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub version: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum Tracing {
+    /// Enables tracing and sets default values for tracing configuration options. Always `auto`.
+    Auto,
+
+    #[serde(untagged)]
+    Configuration(TracingConfiguration),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TracingConfiguration {
+    /// The group id to attach to this trace to enable filtering and grouping in the Traces Dashboard.
+    pub group_id: String,
+    /// The arbitrary metadata to attach to this trace to enable filtering in the Traces Dashboard.
+    pub metadata: serde_json::Value,
+    /// The name of the workflow to attach to this trace. This is used to name the trace in the Traces Dashboard.
+    pub workflow_name: String,
+}
 
-    /// The set of modalities the model can respond with. To disable audio, set this to ["text"].
+/// The truncation strategy to use for the session.
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum Truncation {
+    /// `auto` is the default truncation strategy.
+    Auto,
+    /// `disabled` will disable truncation and emit errors when the conversation exceeds the input
+    /// token limit.
+    Disabled,
+
+    /// Retain a fraction of the conversation tokens when the conversation exceeds the input token
+    /// limit. This allows you to amortize truncations across multiple turns, which can help improve
+    /// cached token usage.
+    #[serde(untagged)]
+    RetentionRatio(RetentionRatioTruncation),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RetentionRatioTruncation {
+    /// Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the conversation
+    ///  exceeds the input token limit. Setting this to 0.8 means that messages will be dropped
+    /// until 80% of the maximum allowed tokens are used. This helps reduce the frequency of
+    /// truncations and improve cache rates.
+    pub retention_ratio: f32,
+
+    /// Use retention ratio truncation.
+    pub r#type: String,
+
+    /// Optional custom token limits for this truncation strategy. If not provided, the model's
+    ///  default token limits will be used.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub modalities: Option<Vec<String>>,
+    pub token_limits: Option<TokenLimits>,
+}
 
-    //// The default system instructions prepended to model calls.
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TokenLimits {
+    /// Maximum tokens allowed in the conversation after instructions (which including tool
+    /// definitions). For example, setting this to 5,000 would mean that truncation would occur
+    /// when the conversation exceeds 5,000 tokens after instructions. This cannot be higher
+    /// than the model's context window size minus the maximum output tokens.
+    pub post_instructions: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct SessionResource {
+    /// The type of session to create. Always realtime for the Realtime API.
+    pub r#type: String,
+
+    pub audio: Audio,
+
+    /// Additional fields to include in server outputs.
+    ///
+    /// `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<String>>,
+
+    /// The default system instructions (i.e. system message) prepended to model calls.
+    /// This field allows the client to guide the model on desired responses.
+    /// The model can be instructed on response content and format,
+    /// (e.g. "be extremely succinct", "act friendly", "here are examples of good responses")
+    /// and on audio behavior (e.g. "talk quickly", "inject emotion into your voice",
+    /// "laugh frequently"). The instructions are not guaranteed to be followed by the model, but
+    /// they provide guidance to the model on the desired behavior.
+    ///
+    /// Note that the server sets default instructions which will be used if this field is not set
+    /// and are visible in the `session.created` event at the start of the session.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub instructions: Option<String>,
 
-    /// The voice the model uses to respond. Cannot be changed once the model has responded with audio at least once.
+    /// Maximum number of output tokens for a single assistant response,
+    /// inclusive of tool calls. Provide an integer between 1 and 4096 to limit output tokens,
+    /// or `inf` for the maximum available tokens for a given model. Defaults to `inf`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub voice: Option<RealtimeVoice>,
+    pub max_output_tokens: Option<MaxOutputTokens>,
 
-    /// The format of input audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
+    /// The Realtime model used for this session.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub input_audio_format: Option<AudioFormat>,
+    pub model: Option<String>,
 
-    /// The format of output audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
+    /// The set of modalities the model can respond with. It defaults to
+    /// `["audio"]`, indicating that the model will respond with audio plus a transcript. `["text"]`
+    /// can be used to make the model respond with text only. It is not possible to request both
+    /// `text` and `audio` at the same time.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub output_audio_format: Option<AudioFormat>,
+    pub output_modalities: Option<Vec<String>>,
 
-    /// Configuration for input audio transcription. Can be set to null to turn off.
+    /// Reference to a prompt template and its variables.
+    /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub input_audio_transcription: Option<AudioTranscription>,
+    pub prompt: Option<Prompt>,
 
-    /// Configuration for turn detection. Can be set to null to turn off.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub turn_detection: Option<TurnDetection>,
+    /// How the model chooses tools. Provide one of the string modes or force a specific
+    /// function/MCP tool.
+    pub tool_choice: Option<ToolChoice>,
 
-    /// Tools (functions) available to the model.
+    /// Tools available to the model.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tools: Option<Vec<ToolDefinition>>,
 
+    /// Realtime API can write session traces to the [Traces Dashboard](https://platform.openai.com/logs?api=traces).
+    /// Set to null to disable tracing. Once tracing is enabled for a session, the configuration cannot be modified.
+    ///
+    /// `auto` will create a trace for the session with default values for the workflow name,
+    ///  group id, and metadata.
     #[serde(skip_serializing_if = "Option::is_none")]
-    /// How the model chooses tools.
-    pub tool_choice: Option<ToolChoice>,
+    pub tracing: Option<Tracing>,
 
+    /// When the number of tokens in a conversation exceeds the model's input token limit,
+    /// the conversation be truncated, meaning messages (starting from the oldest) will not be
+    /// included in the model's context. A 32k context model with 4,096 max output tokens can
+    /// only include 28,224 tokens in the context before truncation occurs. Clients can configure
+    /// truncation behavior to truncate with a lower max token limit, which is an effective way to
+    /// control token usage and cost. Truncation will reduce the number of cached tokens on the next
+    ///  turn (busting the cache), since messages are dropped from the beginning of the context.
+    /// However, clients can also configure truncation to retain messages up to a fraction of the
+    /// maximum context size, which will reduce the need for future truncations and thus improve
+    /// the cache rate. Truncation can be disabled entirely, which means the server will never
+    /// truncate but would instead return an error if the conversation exceeds the model's input
+    /// token limit.
     #[serde(skip_serializing_if = "Option::is_none")]
-    /// Sampling temperature for the model.
-    pub temperature: Option<f32>,
+    pub truncation: Option<Truncation>,
+}
 
-    /// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
-    /// Provide an integer between 1 and 4096 to limit output tokens, or "inf" for the maximum available tokens for a given model.
-    /// Defaults to "inf".
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_response_output_tokens: Option<MaxResponseOutputTokens>,
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum NoiseReductionType {
+    NearField,
+    FarField,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct NoiseReduction {
+    /// Type of noise reduction. `near_field` is for close-talking microphones such as
+    /// headphones, `far_field` is for far-field microphones such as laptop or conference
+    /// room microphones.
+    pub r#type: NoiseReductionType,
 }

From 2dc4467c572e3ca9d596b9dfe1f7dfd78b9d0266 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 16:25:22 -0700
Subject: [PATCH 05/42] transctiption session configuration

---
 .../src/types/realtime/session_resource.rs    | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index 9a4fb9a1..ded5c759 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -387,6 +387,7 @@ pub struct TokenLimits {
     pub post_instructions: u32,
 }
 
+/// Realtime session object configuration.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct SessionResource {
     /// The type of session to create. Always realtime for the Realtime API.
@@ -482,3 +483,24 @@ pub struct NoiseReduction {
     /// room microphones.
     pub r#type: NoiseReductionType,
 }
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TranscriptionAudio {
+    pub input: AudioInput,
+}
+
+/// Realtime transcription session object configuration.
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TranscriptionSession {
+    /// The type of session to create. Always `transcription` for transcription sessions.
+    pub r#type: String,
+
+    /// Configuration for input and output audio.
+    pub audio: TranscriptionAudio,
+
+    /// Additional fields to include in server outputs.
+    ///
+    /// `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<String>>,
+}

From 7623dae60a9e710dd88e6d7f1f5d14fad338aab2 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 18:46:31 -0700
Subject: [PATCH 06/42] udpates to realtime types

---
 .../src/types/realtime/client_event.rs        |  10 +-
 .../src/types/realtime/conversation.rs        |  13 +-
 .../src/types/realtime/response_resource.rs   | 220 ++++++++++++++++--
 .../src/types/realtime/server_event.rs        |  11 +-
 .../src/types/realtime/session_resource.rs    |  21 +-
 5 files changed, 228 insertions(+), 47 deletions(-)

diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs
index 7a49e836..3a0ddf54 100644
--- a/async-openai/src/types/realtime/client_event.rs
+++ b/async-openai/src/types/realtime/client_event.rs
@@ -1,9 +1,11 @@
 use serde::{Deserialize, Serialize};
 use tokio_tungstenite::tungstenite::Message;
 
-use super::{item::Item, session_resource::SessionResource};
+use crate::types::realtime::{ResponseCreate, Session};
 
-#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+use super::item::Item;
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct SessionUpdateEvent {
     /// Optional client-generated ID used to identify this event.
     /// This is an arbitrary string that a client may assign. It will be passed
@@ -12,7 +14,7 @@ pub struct SessionUpdateEvent {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
     /// Update the Realtime session. Choose either a realtime session or a transcription session.
-    pub session: SessionResource,
+    pub session: Session,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
@@ -101,7 +103,7 @@ pub struct ResponseCreateEvent {
     pub event_id: Option<String>,
 
     /// Create a new Realtime response with these parameters
-    pub response: Option<SessionResource>,
+    pub response: Option<ResponseCreate>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
diff --git a/async-openai/src/types/realtime/conversation.rs b/async-openai/src/types/realtime/conversation.rs
index 3ea43bd8..e678ede8 100644
--- a/async-openai/src/types/realtime/conversation.rs
+++ b/async-openai/src/types/realtime/conversation.rs
@@ -1,10 +1,9 @@
 use serde::{Deserialize, Serialize};
 
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct Conversation {
-    /// The unique ID of the conversation.
-    pub id: String,
-
-    /// The object type, must be "realtime.conversation".
-    pub object: String,
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum Conversation {
+    #[default]
+    Auto,
+    None,
 }
diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index a6c6c32f..a32b5600 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -1,12 +1,73 @@
 use serde::{Deserialize, Serialize};
 
-use super::item::Item;
+use crate::types::realtime::{
+    AudioFormat, Conversation, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice, ToolDefinition,
+};
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct Usage {
-    pub total_tokens: u32,
+    /// Details about the input tokens used in the Response. Cached tokens are tokens from previous
+    /// turns in the conversation that are included as context for the current response. Cached tokens
+    /// here are counted as a subset of input tokens, meaning input tokens will include cached and
+    /// uncached tokens.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_token_details: Option<InputTokenDetails>,
+
+    /// The number of input tokens used in the Response, including text and audio tokens.
     pub input_tokens: u32,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_token_details: Option<OutputTokenDetails>,
+
+    /// The number of output tokens sent in the Response, including text and audio tokens.
     pub output_tokens: u32,
+
+    /// The total number of tokens in the Response including input and output text and audio tokens.
+    pub total_tokens: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct InputTokenDetails {
+    /// The number of audio tokens used as input for the Response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub audio_tokens: Option<u32>,
+    /// The number of cached tokens used as input for the Response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub cached_tokens: Option<u32>,
+
+    /// Details about the cached tokens used as input for the Response.
+    pub cached_token_details: Option<CachedTokenDetails>,
+
+    /// The number of image tokens used as input for the Response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_tokens: Option<u32>,
+
+    /// The number of text tokens used as input for the Response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text_tokens: Option<u32>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct CachedTokenDetails {
+    /// The number of cached audio tokens used as input for the Response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub audio_tokens: Option<u32>,
+
+    /// The number of cached image tokens used as input for the Response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_tokens: Option<u32>,
+
+    /// The number of cached text tokens used as input for the Response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text_tokens: Option<u32>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct OutputTokenDetails {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub audio_tokens: Option<u32>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -20,42 +81,155 @@ pub enum ResponseStatus {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct FailedError {
+pub struct Error {
     pub code: String,
-    pub message: String,
+    pub r#type: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(rename_all = "snake_case")]
-pub enum IncompleteReason {
-    Interruption,
-    MaxOutputTokens,
-    ContentFilter,
+pub struct ResponseStatusDetail {
+    /// A description of the error that caused the response to fail, populated when the status is failed.
+    pub error: Option<Error>,
+    /// The reason the Response did not complete. For a `cancelled` Response, one of `turn_detected`
+    /// (the server VAD detected a new start of speech) or `client_cancelled` (the client sent a cancel
+    /// event). For an incomplete Response, one of `max_output_tokens` or `content_filter` (the
+    ///  server-side safety filter activated and cut off the response).
+    pub reason: Option<String>,
+    /// The type of error that caused the response to fail, corresponding with the `status`
+    /// field (`completed`, `cancelled`, `incomplete`, `failed`).
+    pub r#type: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseAudioOutput {
+    /// The format of the output audio.
+    pub format: AudioFormat,
+
+    /// The voice the model uses to respond. Voice cannot be changed during the session once
+    /// the model has responded with audio at least once. Current voice options are
+    /// `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`.
+    /// We recommend `marin` and `cedar` for best quality.
+    pub voice: RealtimeVoice,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(tag = "type")]
-pub enum ResponseStatusDetail {
-    #[serde(rename = "incomplete")]
-    Incomplete { reason: IncompleteReason },
-    #[serde(rename = "failed")]
-    Failed { error: Option<FailedError> },
-    #[serde(rename = "cancelled")]
-    Cancelled { reason: String },
+pub struct ResponseAudio {
+    /// Configuration for audio output.
+    pub output: ResponseAudioOutput,
 }
 
+/// The response resource.
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseResource {
-    /// The unique ID of the response.
+pub struct ResponseCreate {
+    /// Configuration for audio input and output.
+    pub audio: ResponseAudio,
+
+    /// Controls which conversation the response is added to. Currently supports auto and none,
+    /// with auto as the default value. The auto value means that the contents of the response
+    /// will be added to the default conversation. Set this to none to create an out-of-band
+    /// response which will not add items to default conversation.
+    pub conversation: Conversation,
+
+    /// Input items to include in the prompt for the model. Using this field creates a new context
+    /// for this Response instead of using the default conversation. An empty array `[]` will clear
+    /// the context for this Response. Note that this can include references to items that
+    /// previously appeared in the session using their id.
+    pub input: Vec<serde_json::Value>, // TODO: implement types
+
+    /// The default system instructions (i.e. system message) prepended to model calls.
+    /// This field allows the client to guide the model on desired responses.
+    /// The model can be instructed on response content and format, (e.g. "be extremely succinct",
+    /// "act friendly", "here are examples of good responses") and on audio behavior
+    /// (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently").
+    /// The instructions are not guaranteed to be followed by the model, but they provide
+    /// guidance to the model on the desired behavior. Note that the server sets default
+    /// instructions which will be used if this field is not set and are visible in
+    /// the `session.created` event at the start of the session.
+    pub instructions: String,
+
+    /// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
+    /// Provide an integer between 1 and 4096 to limit output tokens, or inf for the maximum
+    /// available tokens for a given model. Defaults to `inf`.
+    pub max_output_tokens: MaxOutputTokens,
+
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for
+    /// storing additional information about the object in a structured format, and querying
+    /// for objects via API or the dashboard.
+    ///
+    /// Keys are strings with a maximum length of 64 characters. Values are strings with a
+    /// maximum length of 512 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<serde_json::Value>,
+
+    /// The set of modalities the model used to respond, currently the only possible values
+    /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript.
+    /// Setting the output to mode `text` will disable audio output from the model.
+    pub output_modalities: Vec<String>,
+
+    /// Reference to a prompt template and its variables.
+    /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt: Option<Prompt>,
+
+    /// How the model chooses tools. Provide one of the string modes or force a specific
+    /// function/MCP tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoice>,
+
+    /// Tools available to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<ToolDefinition>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Response {
+    /// Configuration for audio output.
+    pub audio: ResponseAudio,
+
+    /// Which conversation the response is added to, determined by the `conversation` field in the
+    /// `response.create` event. If `auto`, the response will be added to the default conversation
+    /// and the value of `conversation_id` will be an id like `conv_1234`. If `none`, the response
+    /// will not be added to any conversation and the value of `conversation_id` will be `null`.
+    /// If responses are being triggered automatically by VAD the response will be added to the
+    /// default conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation_id: Option<String>,
+
+    /// The unique ID of the response, will look like `resp_1234`.
     pub id: String,
+
+    /// Maximum number of output tokens for a single assistant response, inclusive of tool calls,
+    /// that was used in this response.
+    pub max_output_tokens: MaxOutputTokens,
+
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for
+    /// storing additional information about the object in a structured format, and querying
+    /// for objects via API or the dashboard.
+    ///
+    /// Keys are strings with a maximum length of 64 characters. Values are strings with a
+    /// maximum length of 512 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<serde_json::Value>,
+
     /// The object type, must be "realtime.response".
     pub object: String,
-    /// The status of the response
+
+    /// The list of output items generated by the response.
+    pub output: Vec<serde_json::Value>, // TODO: implement types
+
+    /// The set of modalities the model used to respond, currently the only possible values
+    /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript.
+    /// Setting the output to mode `text` will disable audio output from the model.
+    pub output_modalities: Vec<String>,
+
+    /// The final status of the response (`completed`, `cancelled`, `failed`, or `incomplete`, `in_progress`).
     pub status: ResponseStatus,
+
     /// Additional details about the status.
     pub status_details: Option<ResponseStatusDetail>,
-    /// The list of output items generated by the response.
-    pub output: Vec<Item>,
-    /// Usage statistics for the response.
+
+    /// Usage statistics for the Response, this will correspond to billing. A Realtime API session
+    /// will maintain a conversation context and append new Items to the Conversation, thus output
+    /// from previous turns (text and audio tokens) will become the input for later turns.
     pub usage: Option<Usage>,
 }
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 0d35690a..6d16742c 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -1,8 +1,9 @@
 use serde::{Deserialize, Serialize};
 
+use crate::types::realtime::{Response, Session};
+
 use super::{
     content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit,
-    response_resource::ResponseResource, session_resource::SessionResource,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -18,7 +19,7 @@ pub struct SessionCreatedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The session resource.
-    pub session: SessionResource,
+    pub session: Session,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -26,7 +27,7 @@ pub struct SessionUpdatedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The updated session resource.
-    pub session: SessionResource,
+    pub session: Session,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -270,7 +271,7 @@ pub struct ResponseCreatedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The response resource.
-    pub response: ResponseResource,
+    pub response: Response,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -278,7 +279,7 @@ pub struct ResponseDoneEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The response resource.
-    pub response: ResponseResource,
+    pub response: Response,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index ded5c759..3034fdc0 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -387,12 +387,20 @@ pub struct TokenLimits {
     pub post_instructions: u32,
 }
 
-/// Realtime session object configuration.
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct SessionResource {
-    /// The type of session to create. Always realtime for the Realtime API.
-    pub r#type: String,
+#[serde(tag = "type")]
+pub enum Session {
+    /// The type of session to create. Always `realtime` for the Realtime API.
+    #[serde(rename = "realtime")]
+    RealtimeSessionConfiguration(RealtimeSession),
+    /// The type of session to create. Always `transcription` for transcription sessions.
+    #[serde(rename = "transcription")]
+    TranscriptionSessionConfiguration(TranscriptionSession),
+}
 
+/// Realtime session object configuration.
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RealtimeSession {
     pub audio: Audio,
 
     /// Additional fields to include in server outputs.
@@ -436,9 +444,9 @@ pub struct SessionResource {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub prompt: Option<Prompt>,
 
-    #[serde(skip_serializing_if = "Option::is_none")]
     /// How the model chooses tools. Provide one of the string modes or force a specific
     /// function/MCP tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_choice: Option<ToolChoice>,
 
     /// Tools available to the model.
@@ -492,9 +500,6 @@ pub struct TranscriptionAudio {
 /// Realtime transcription session object configuration.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct TranscriptionSession {
-    /// The type of session to create. Always `transcription` for transcription sessions.
-    pub r#type: String,
-
     /// Configuration for input and output audio.
     pub audio: TranscriptionAudio,
 

From 793595019b3fa43f3cf469972b7138c1ef98f2d4 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 19:56:11 -0700
Subject: [PATCH 07/42] updated Item

---
 async-openai/src/types/realtime/item.rs       | 300 ++++++++++++++----
 .../src/types/realtime/response_resource.rs   |   7 +-
 2 files changed, 241 insertions(+), 66 deletions(-)

diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs
index 3af7d0d9..c6571191 100644
--- a/async-openai/src/types/realtime/item.rs
+++ b/async-openai/src/types/realtime/item.rs
@@ -1,99 +1,273 @@
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct SystemMessageContent {
+    /// The text content.
+    pub text: String,
+    /// The content type. Always `input_text` for system messages.
+    pub r#type: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct SystemMessage {
+    /// The content of the message.
+    pub content: Vec<SystemMessageContent>,
+
+    /// The unique ID of the item. This may be provided by the client or generated by the server.
+    pub id: String,
+
+    /// Identifier for the API object being returned - always `realtime.item`.
+    /// Optional when creating a new item.
+    pub object: Option<String>,
+
+    /// The status of the item. Has no effect on the conversation.
+    pub status: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct UserMessageContentInputText {
+    /// The text content (for `input_text`).
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct UserMessageContentInputAudio {
+    /// Base64-encoded audio bytes (for `input_audio`), these will be parsed as the
+    /// format specified in the session input audio type configuration.
+    /// This defaults to PCM 16-bit 24kHz mono if not specified.
+    pub audio: String,
+    /// Transcript of the audio (for `input_audio`). This is not sent to the model,
+    /// but will be attached to the message item for reference.
+    pub transcript: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
 #[serde(rename_all = "snake_case")]
-pub enum ItemType {
-    Message,
-    FunctionCall,
-    FunctionCallOutput,
+pub enum ImageDetail {
+    #[default]
+    Auto,
+    Low,
+    High,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct UserMessageContentInputImage {
+    /// Base64-encoded image bytes (for `input_image`) as a data URI.
+    /// For example `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`.
+    /// Supported formats are PNG and JPEG.
+    pub image_url: String,
+    /// The detail level of the image (for `input_image`). `auto` will default to `high`.
+    pub detail: ImageDetail,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
-pub enum ItemStatus {
-    Completed,
-    InProgress,
-    Incomplete,
+pub enum UserMessageContent {
+    InputText(UserMessageContentInputText),
+    InputAudio(UserMessageContentInputAudio),
+    InputImage(UserMessageContentInputImage),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(rename_all = "lowercase")]
-pub enum ItemRole {
-    User,
-    Assistant,
-    System,
+pub struct UserMessage {
+    /// The content of the message.
+    pub content: Vec<UserMessageContent>,
+
+    /// The unique ID of the item. This may be provided by the client or generated by the server.
+    pub id: String,
+
+    /// Identifier for the API object being returned - always `realtime.item`.
+    /// Optional when creating a new item.
+    pub object: Option<String>,
+
+    /// The status of the item. Has no effect on the conversation.
+    pub status: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct AssistantMessageContentOutputText {
+    /// The text content
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct AssistantMessageContentOutputAudio {
+    /// Base64-encoded audio bytes, these will be parsed as the format specified
+    /// in the session output audio type configuration. This defaults to PCM 16-bit
+    /// 24kHz mono if not specified.
+    pub audio: String,
+    /// The transcript of the audio content, this will always be present if the
+    /// output type is `audio`.
+    pub transcript: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
-pub enum ItemContentType {
-    InputText,
-    InputAudio,
-    Text,
-    Audio,
+pub enum AssistantMessageContent {
+    OutputText(AssistantMessageContentOutputText),
+    OutputAudio(AssistantMessageContentOutputAudio),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ItemContent {
-    /// The content type ("input_text", "input_audio", "text", "audio").
-    pub r#type: ItemContentType,
+pub struct AssistantMessage {
+    /// The content of the message.
+    pub content: Vec<AssistantMessageContent>,
 
-    /// The text content.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<String>,
+    /// The unique ID of the item. This may be provided by the client or generated by the server.
+    pub id: String,
 
-    /// Base64-encoded audio bytes.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub audio: Option<String>,
+    /// Identifier for the API object being returned - always `realtime.item`.
+    /// Optional when creating a new item.
+    pub object: Option<String>,
 
-    /// The transcript of the audio.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub transcript: Option<String>,
+    /// The status of the item. Has no effect on the conversation.
+    pub status: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct Item {
-    /// The unique ID of the item.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub id: Option<String>,
+#[serde(tag = "role")]
+#[serde(rename_all = "lowercase")]
+pub enum Message {
+    System(SystemMessage),
+    User(UserMessage),
+    Assistant(AssistantMessage),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FunctionCall {
+    /// The arguments of the function call. This is a JSON-encoded string representing
+    /// the arguments passed to the function, for example {"arg1": "value1", "arg2": 42}.
+    pub arguments: String,
 
-    /// The type of the item ("message", "function_call", "function_call_output").
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub r#type: Option<ItemType>,
+    /// The name of the function being called.
+    pub name: String,
 
-    /// The status of the item ("completed", "in_progress", "incomplete").
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub status: Option<ItemStatus>,
+    /// The ID of the function call.
+    pub call_id: String,
 
-    /// The role of the message sender ("user", "assistant", "system").
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub role: Option<ItemRole>,
+    /// The unique ID of the item. This may be provided by the client or generated by the server.
+    pub id: String,
 
-    /// The content of the message.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub content: Option<Vec<ItemContent>>,
+    /// Identifier for the API object being returned - always `realtime.item`.
+    /// Optional when creating a new item.
+    pub object: Option<String>,
 
-    /// The ID of the function call (for "function_call" items).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub call_id: Option<String>,
+    /// The status of the item. Has no effect on the conversation.
+    pub status: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FunctionCallOutput {
+    /// The ID of the function call this output is for.
+    pub call_id: String,
 
-    /// The name of the function being called (for "function_call" items).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub name: Option<String>,
+    /// The output of the function call, this is free text and can contain any information
+    /// or simply be empty.
+    pub output: String,
 
-    /// The arguments of the function call (for "function_call" items).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub arguments: Option<String>,
+    /// The unique ID of the item. This may be provided by the client or generated by the server.
+    pub id: String,
 
-    /// The output of the function call (for "function_call_output" items).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output: Option<String>,
+    /// Identifier for the API object being returned - always `realtime.item`.
+    /// Optional when creating a new item.
+    pub object: Option<String>,
+
+    /// The status of the item. Has no effect on the conversation.
+    pub status: String,
 }
 
-impl TryFrom<serde_json::Value> for Item {
-    type Error = serde_json::Error;
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct McpApprovalResponse {
+    /// The ID of the approval request being answered.
+    pub approval_request_id: String,
+
+    /// Whether the request was approved.
+    pub approved: bool,
+
+    /// The unique ID of the approval response.
+    pub id: String,
 
-    fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> {
-        serde_json::from_value(value)
-    }
+    /// Optional reason for the decision.
+    pub reason: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct AvailableMcpTool {
+    /// The JSON schema describing the tool's input.
+    pub input_schema: serde_json::Value,
+
+    /// The name of the tool.
+    pub name: String,
+
+    /// Additional annotations about the tool.
+    pub annotations: Option<serde_json::Value>,
+
+    /// The description of the tool.
+    pub description: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct McpListTools {
+    /// The label of the MCP server.
+    pub server_label: String,
+
+    /// The tools available on the server.
+    pub tools: Vec<AvailableMcpTool>,
+
+    /// The unique ID of the list.
+    pub id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct McpApprovalRequest {
+    /// A JSON string of arguments for the tool.
+    pub arguments: String,
+
+    /// The unique ID of the approval request.
+    pub id: String,
+
+    /// The name of the tool to run.
+    pub name: String,
+
+    /// The label of the MCP server making the request.
+    pub server_label: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct McpCall {
+    /// A JSON string of the arguments passed to the tool.
+    pub arguments: String,
+
+    /// The unique ID of the tool call.
+    pub id: String,
+
+    /// The name of the tool that was run.
+    pub name: String,
+
+    /// The label of the MCP server running the tool.
+    pub server_label: String,
+
+    /// The ID of an associated approval request, if any.
+    pub approval_request_id: Option<String>,
+
+    /// The error from the tool call, if any.
+    pub error: Option<serde_json::Value>, // TODO: implement type
+
+    /// The output from the tool call.
+    pub output: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum Item {
+    Message(Message),
+    FunctionCall(FunctionCall),
+    FunctionCallOutput(FunctionCallOutput),
+    McpApprovalResponse(McpApprovalResponse),
+    McpListTools(McpListTools),
+    McpCall(McpCall),
+    McpApprovalRequest(McpApprovalRequest),
 }
diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index a32b5600..0886eae1 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -1,7 +1,8 @@
 use serde::{Deserialize, Serialize};
 
 use crate::types::realtime::{
-    AudioFormat, Conversation, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice, ToolDefinition,
+    AudioFormat, Conversation, Item, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice,
+    ToolDefinition,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -134,7 +135,7 @@ pub struct ResponseCreate {
     /// for this Response instead of using the default conversation. An empty array `[]` will clear
     /// the context for this Response. Note that this can include references to items that
     /// previously appeared in the session using their id.
-    pub input: Vec<serde_json::Value>, // TODO: implement types
+    pub input: Vec<Item>,
 
     /// The default system instructions (i.e. system message) prepended to model calls.
     /// This field allows the client to guide the model on desired responses.
@@ -215,7 +216,7 @@ pub struct Response {
     pub object: String,
 
     /// The list of output items generated by the response.
-    pub output: Vec<serde_json::Value>, // TODO: implement types
+    pub output: Vec<Item>,
 
     /// The set of modalities the model used to respond, currently the only possible values
     /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript.

From c3c62a43e9b720ec861ee8da5fb43ac9e881cc43 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 20:36:47 -0700
Subject: [PATCH 08/42] updated realtime types

---
 async-openai/src/types/realtime/item.rs       | 36 ++++++++++++++-----
 .../src/types/realtime/response_resource.rs   |  2 +-
 .../src/types/realtime/session_resource.rs    |  4 +--
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs
index c6571191..b6020bf8 100644
--- a/async-openai/src/types/realtime/item.rs
+++ b/async-openai/src/types/realtime/item.rs
@@ -14,14 +14,14 @@ pub struct SystemMessage {
     pub content: Vec<SystemMessageContent>,
 
     /// The unique ID of the item. This may be provided by the client or generated by the server.
-    pub id: String,
+    pub id: Option<String>,
 
     /// Identifier for the API object being returned - always `realtime.item`.
     /// Optional when creating a new item.
     pub object: Option<String>,
 
     /// The status of the item. Has no effect on the conversation.
-    pub status: String,
+    pub status: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -75,14 +75,17 @@ pub struct UserMessage {
     pub content: Vec<UserMessageContent>,
 
     /// The unique ID of the item. This may be provided by the client or generated by the server.
-    pub id: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
 
     /// Identifier for the API object being returned - always `realtime.item`.
     /// Optional when creating a new item.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub object: Option<String>,
 
     /// The status of the item. Has no effect on the conversation.
-    pub status: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -96,7 +99,7 @@ pub struct AssistantMessageContentOutputAudio {
     /// Base64-encoded audio bytes, these will be parsed as the format specified
     /// in the session output audio type configuration. This defaults to PCM 16-bit
     /// 24kHz mono if not specified.
-    pub audio: String,
+    pub audio: Option<String>,
     /// The transcript of the audio content, this will always be present if the
     /// output type is `audio`.
     pub transcript: String,
@@ -116,14 +119,17 @@ pub struct AssistantMessage {
     pub content: Vec<AssistantMessageContent>,
 
     /// The unique ID of the item. This may be provided by the client or generated by the server.
-    pub id: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
 
     /// Identifier for the API object being returned - always `realtime.item`.
     /// Optional when creating a new item.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub object: Option<String>,
 
     /// The status of the item. Has no effect on the conversation.
-    pub status: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -148,10 +154,12 @@ pub struct FunctionCall {
     pub call_id: String,
 
     /// The unique ID of the item. This may be provided by the client or generated by the server.
-    pub id: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
 
     /// Identifier for the API object being returned - always `realtime.item`.
     /// Optional when creating a new item.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub object: Option<String>,
 
     /// The status of the item. Has no effect on the conversation.
@@ -168,10 +176,12 @@ pub struct FunctionCallOutput {
     pub output: String,
 
     /// The unique ID of the item. This may be provided by the client or generated by the server.
-    pub id: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
 
     /// Identifier for the API object being returned - always `realtime.item`.
     /// Optional when creating a new item.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub object: Option<String>,
 
     /// The status of the item. Has no effect on the conversation.
@@ -271,3 +281,11 @@ pub enum Item {
     McpCall(McpCall),
     McpApprovalRequest(McpApprovalRequest),
 }
+
+impl TryFrom<serde_json::Value> for Item {
+    type Error = serde_json::Error;
+
+    fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> {
+        serde_json::from_value(value)
+    }
+}
diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index 0886eae1..2d6342d2 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -185,7 +185,7 @@ pub struct ResponseCreate {
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct Response {
     /// Configuration for audio output.
-    pub audio: ResponseAudio,
+    pub audio: Option<ResponseAudio>,
 
     /// Which conversation the response is added to, determined by the `conversation` field in the
     /// `response.create` event. If `auto`, the response will be added to the default conversation
diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index 3034fdc0..c11f78c6 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -262,14 +262,14 @@ pub struct AudioInput {
     /// and the model. Filtering the audio can improve VAD and turn detection accuracy
     /// (reducing false positives) and model performance by improving perception of the
     /// input audio.
-    pub noise_reduction: NoiseReduction,
+    pub noise_reduction: Option<NoiseReduction>,
     /// Configuration for input audio transcription, defaults to off and can be set to `null` to turn off once on.
     /// Input audio transcription is not native to the model, since the model consumes audio directly.
     /// Transcription runs asynchronously through [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
     /// and should be treated as guidance of input audio content rather than precisely what the model
     /// heard. The client can optionally set the language and prompt for transcription,
     /// these offer additional guidance to the transcription service.
-    pub transcription: AudioTranscription,
+    pub transcription: Option<AudioTranscription>,
 
     /// Configuration for turn detection, ether Server VAD or Semantic VAD. This can
     /// be set to null to turn off, in which case the client must manually trigger model response.

From 26f802a8e19fefdc6e51fdb19865b35b41ab9070 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sat, 1 Nov 2025 20:37:15 -0700
Subject: [PATCH 09/42] update examples/realtime with GA api

---
 examples/realtime/src/main.rs | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs
index 141fefa3..11a87329 100644
--- a/examples/realtime/src/main.rs
+++ b/examples/realtime/src/main.rs
@@ -1,7 +1,7 @@
 use std::process::exit;
 
 use async_openai::types::realtime::{
-    ConversationItemCreateEvent, Item, ResponseCreateEvent, ServerEvent,
+    ConversationItemCreateEvent, Item, Message as RealtimeMessage, ResponseCreateEvent, ServerEvent,
 };
 use futures_util::{future, pin_mut, StreamExt};
 
@@ -13,7 +13,7 @@ use tokio_tungstenite::{
 
 #[tokio::main]
 async fn main() {
-    let url = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-12-17";
+    let url = "wss://api.openai.com/v1/realtime?model=gpt-realtime";
     let api_key = std::env::var("OPENAI_API_KEY").expect("Please provide OPENAPI_API_KEY env var");
 
     let (stdin_tx, stdin_rx) = futures_channel::mpsc::unbounded();
@@ -25,9 +25,6 @@ async fn main() {
         "Authorization",
         format!("Bearer {api_key}").parse().unwrap(),
     );
-    request
-        .headers_mut()
-        .insert("OpenAI-Beta", "realtime=v1".parse().unwrap());
 
     // connect to WebSocket endpoint
     let (ws_stream, _) = connect_async(request).await.expect("Failed to connect");
@@ -57,20 +54,7 @@ async fn main() {
 
                             match server_event {
                                 ServerEvent::ResponseOutputItemDone(event) => {
-                                    event.item.content.unwrap_or(vec![]).iter().for_each(
-                                        |content| {
-                                            if let Some(ref transcript) = content.transcript {
-                                                eprintln!(
-                                                    "[{:?}]: {}",
-                                                    event.item.role,
-                                                    transcript.trim(),
-                                                );
-                                            }
-                                        },
-                                    );
-                                }
-                                ServerEvent::ResponseAudioTranscriptDelta(event) => {
-                                    eprint!("{}", event.delta.trim());
+                                    eprint!("{event:?}");
                                 }
                                 ServerEvent::Error(e) => {
                                     eprint!("{e:?}");

From 68d13447291b02d1239531b93554389baac711b3 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sun, 2 Nov 2025 15:44:13 -0800
Subject: [PATCH 10/42] checkpoint: responses types updates

---
 async-openai/src/types/responses.rs | 515 +++++++++++++++++++---------
 1 file changed, 360 insertions(+), 155 deletions(-)

diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index abccf301..d0105b6f 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -6,7 +6,6 @@ pub use crate::types::{
 use derive_builder::Builder;
 use futures::Stream;
 use serde::{Deserialize, Serialize};
-use serde_json::Value;
 use std::collections::HashMap;
 use std::pin::Pin;
 
@@ -39,7 +38,6 @@ pub enum Input {
     Items(Vec<InputItem>),
 }
 
-/// A context item: currently only messages.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged, rename_all = "snake_case")]
 pub enum InputItem {
@@ -142,6 +140,12 @@ pub struct InputFile {
     file_url: Option<String>,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct Conversation {
+    /// The unique ID of the conversation.
+    pub id: String,
+}
+
 /// Builder for a Responses API request.
 #[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
 #[builder(
@@ -847,11 +851,27 @@ pub struct IncompleteDetails {
     pub reason: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct TopLogProb {
+    pub bytes: Vec<u8>,
+    pub logprob: f64,
+    pub token: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct LogProb {
+    pub bytes: Vec<u8>,
+    pub logprob: f64,
+    pub token: String,
+    pub top_logprobs: Vec<TopLogProb>,
+}
+
 /// A simple text output from the model.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct OutputText {
+pub struct OutputTextContent {
     /// The annotations of the text output.
     pub annotations: Vec<Annotation>,
+    pub logprobs: Option<LogProb>,
     /// The text output from the model.
     pub text: String,
 }
@@ -860,23 +880,27 @@ pub struct OutputText {
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum Annotation {
     /// A citation to a file.
-    FileCitation(FileCitation),
+    FileCitation(FileCitationBody),
     /// A citation for a web resource used to generate a model response.
-    UrlCitation(UrlCitation),
+    UrlCitation(UrlCitationBody),
+    /// A citation for a container file used to generate a model response.
+    ContainerFileCitation(ContainerFileCitationBody),
     /// A path to a file.
     FilePath(FilePath),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FileCitation {
+pub struct FileCitationBody {
     /// The ID of the file.
     file_id: String,
+    /// The filename of the file cited.
+    filename: String,
     /// The index of the file in the list of files.
     index: u32,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct UrlCitation {
+pub struct UrlCitationBody {
     /// The index of the last character of the URL citation in the message.
     end_index: u32,
     /// The index of the first character of the URL citation in the message.
@@ -887,6 +911,20 @@ pub struct UrlCitation {
     url: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ContainerFileCitationBody {
+    /// The ID of the container file.
+    container_id: String,
+    /// The index of the last character of the container file citation in the message.
+    end_index: u32,
+    /// The ID of the file.
+    file_id: String,
+    /// The filename of the container file cited.
+    filename: String,
+    /// The index of the first character of the container file citation in the message.
+    start_index: u32,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct FilePath {
     /// The ID of the file.
@@ -897,8 +935,8 @@ pub struct FilePath {
 
 /// A refusal explanation from the model.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct Refusal {
-    /// The refusal explanationfrom the model.
+pub struct RefusalContent {
+    /// The refusal explanation from the model.
     pub refusal: String,
 }
 
@@ -906,22 +944,23 @@ pub struct Refusal {
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct OutputMessage {
     /// The content of the output message.
-    pub content: Vec<Content>,
+    pub content: Vec<OutputMessageContent>,
     /// The unique ID of the output message.
     pub id: String,
-    /// The role of the output message. Always assistant.
+    /// The role of the output message. Always `assistant`.
     pub role: Role,
-    /// The status of the message input.
+    /// The status of the message input. One of `in_progress`, `completed`, or
+    /// `incomplete`. Populated when input items are returned via API.
     pub status: OutputStatus,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
-pub enum Content {
+pub enum OutputMessageContent {
     /// A text output from the model.
-    OutputText(OutputText),
+    OutputText(OutputTextContent),
     /// A refusal from the model.
-    Refusal(Refusal),
+    Refusal(RefusalContent),
 }
 
 /// Nested content within an output message.
@@ -956,46 +995,58 @@ pub enum OutputContent {
     McpApprovalRequest(McpApprovalRequestOutput),
 }
 
+/// Reasoning text content.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ReasoningTextContent {
+    /// The reasoning text from the model.
+    pub text: String,
+}
+
 /// A reasoning item representing the model's chain of thought, including summary paragraphs.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ReasoningItem {
     /// Unique identifier of the reasoning content.
     pub id: String,
-    /// The summarized chain-of-thought paragraphs.
-    pub summary: Vec<SummaryText>,
+    /// Reasoning summary content.
+    pub summary: Vec<Summary>,
+    /// Reasoning text content.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content: Option<Vec<ReasoningTextContent>>,
     /// The encrypted content of the reasoning item - populated when a response is generated with
     /// `reasoning.encrypted_content` in the `include` parameter.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub encrypted_content: Option<String>,
-    /// The status of the reasoning item.
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub status: Option<OutputStatus>,
 }
 
 /// A single summary text fragment from reasoning.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct SummaryText {
-    /// A short summary of the reasoning used by the model.
+pub struct Summary {
+    /// A summary of the reasoning output from the model so far.
     pub text: String,
 }
 
 /// File search tool call output.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FileSearchCallOutput {
+pub struct FileSearchToolCall {
     /// The unique ID of the file search tool call.
     pub id: String,
     /// The queries used to search for files.
     pub queries: Vec<String>,
-    /// The status of the file search tool call.
-    pub status: FileSearchCallOutputStatus,
+    /// The status of the file search tool call. One of `in_progress`, `searching`,
+    /// `incomplete`,`failed`, or `completed`.
+    pub status: FileSearchToolCallStatus,
     /// The results of the file search tool call.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub results: Option<Vec<FileSearchResult>>,
+    pub results: Option<Vec<FileSearchToolCallResult>>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "snake_case")]
-pub enum FileSearchCallOutputStatus {
+pub enum FileSearchToolCallStatus {
     InProgress,
     Searching,
     Incomplete,
@@ -1005,7 +1056,12 @@ pub enum FileSearchCallOutputStatus {
 
 /// A single result from a file search.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FileSearchResult {
+pub struct FileSearchToolCallResult {
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
+    /// additional information about the object in a structured format, and querying for objects
+    /// API or the dashboard. Keys are strings with a maximum length of 64 characters
+    /// . Values are strings with a maximum length of 512 characters, booleans, or numbers.
+    pub attributes: HashMap<String, serde_json::Value>,
     /// The unique ID of the file.
     pub file_id: String,
     /// The name of the file.
@@ -1014,71 +1070,124 @@ pub struct FileSearchResult {
     pub score: f32,
     /// The text that was retrieved from the file.
     pub text: String,
-    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
-    /// additional information about the object in a structured format, and querying for objects
-    /// API or the dashboard. Keys are strings with a maximum length of 64 characters
-    /// . Values are strings with a maximum length of 512 characters, booleans, or numbers.
-    pub attributes: HashMap<String, serde_json::Value>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct SafetyCheck {
-    /// The ID of the safety check.
+pub struct ComputerCallSafetyCheckParam {
+    /// The ID of the pending safety check.
     pub id: String,
-    /// The type/code of the pending safety check.
-    pub code: String,
+    /// The type of the pending safety check.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub code: Option<String>,
     /// Details about the pending safety check.
-    pub message: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub message: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum WebSearchToolCallStatus {
+    InProgress,
+    Searching,
+    Completed,
+    Failed,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct WebSearchActionSearchSource {
+    /// The type of source. Always `url`.
+    pub r#type: String,
+    /// The URL of the source.
+    pub url: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct WebSearchActionSearch {
+    /// The search query.
+    pub query: String,
+    /// The sources used in the search.
+    pub sources: Option<Vec<WebSearchActionSearchSource>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct WebSearchActionOpenPage {
+    /// The URL opened by the model.
+    pub url: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct WebSearchActionFind {
+    /// The URL of the page searched for the pattern.
+    pub url: String,
+    /// The pattern or text to search for within the page.
+    pub pattern: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum WebSearchToolCallAction {
+    /// Action type "search" - Performs a web search query.
+    Search(WebSearchActionSearch),
+    /// Action type "open_page" - Opens a specific URL from search results.
+    OpenPage(WebSearchActionOpenPage),
+    /// Action type "find": Searches for a pattern within a loaded page.
+    Find(WebSearchActionFind),
 }
 
 /// Web search tool call output.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct WebSearchCallOutput {
+pub struct WebSearchToolCall {
+    /// An object describing the specific action taken in this web search call. Includes
+    /// details on how the model used the web (search, open_page, find).
+    pub action: WebSearchToolCallAction,
     /// The unique ID of the web search tool call.
     pub id: String,
     /// The status of the web search tool call.
-    pub status: String,
+    pub status: WebSearchToolCallStatus,
 }
 
 /// Output from a computer tool call.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ComputerCallOutput {
-    pub action: ComputerCallAction,
+pub struct ComputerToolCall {
+    pub action: ComputerAction,
     /// An identifier used when responding to the tool call with output.
     pub call_id: String,
     /// The unique ID of the computer call.
     pub id: String,
     /// The pending safety checks for the computer call.
-    pub pending_safety_checks: Vec<SafetyCheck>,
-    /// The status of the item.
+    pub pending_safety_checks: Vec<ComputerCallSafetyCheckParam>,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
     pub status: OutputStatus,
 }
 
 /// A point in 2D space.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct Point {
+pub struct DragPoint {
+    /// The x-coordinate.
     pub x: i32,
+    /// The y-coordinate.
     pub y: i32,
 }
 
 /// Represents all user‐triggered actions.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
-pub enum ComputerCallAction {
+pub enum ComputerAction {
     /// A click action.
-    Click(Click),
+    Click(ClickParam),
 
-    /// A double-click action.
-    DoubleClick(DoubleClick),
+    /// A double click action.
+    DoubleClick(DoubleClickAction),
 
     /// A drag action.
     Drag(Drag),
 
-    /// A keypress action.
-    KeyPress(KeyPress),
+    /// A collection of keypresses the model would like to perform.
+    Keypress(KeyPressAction),
 
     /// A mouse move action.
-    Move(MoveAction),
+    Move(Move),
 
     /// A screenshot action.
     Screenshot,
@@ -1086,16 +1195,16 @@ pub enum ComputerCallAction {
     /// A scroll action.
     Scroll(Scroll),
 
-    /// A type (text entry) action.
-    Type(TypeAction),
+    /// An action to type in text.
+    Type(Type),
 
-    /// A wait (no-op) action.
+    /// A wait action.
     Wait,
 }
 
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum ButtonPress {
+#[serde(rename_all = "lowercase")]
+pub enum ClickButtonType {
     Left,
     Right,
     Wheel,
@@ -1105,21 +1214,22 @@ pub enum ButtonPress {
 
 /// A click action.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct Click {
-    /// Which mouse button was pressed.
-    pub button: ButtonPress,
-    /// X‐coordinate of the click.
+pub struct ClickParam {
+    /// Indicates which mouse button was pressed during the click. One of `left`,
+    /// `right`, `wheel`, `back`, or `forward`.
+    pub button: ClickButtonType,
+    /// The x-coordinate where the click occurred.
     pub x: i32,
-    /// Y‐coordinate of the click.
+    /// The y-coordinate where the click occurred.
     pub y: i32,
 }
 
 /// A double click action.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct DoubleClick {
-    /// X‐coordinate of the double click.
+pub struct DoubleClickAction {
+    /// The x-coordinate where the double click occurred.
     pub x: i32,
-    /// Y‐coordinate of the double click.
+    /// The y-coordinate where the double click occurred.
     pub y: i32,
 }
 
@@ -1127,52 +1237,49 @@ pub struct DoubleClick {
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Drag {
     /// The path of points the cursor drags through.
-    pub path: Vec<Point>,
-    /// X‐coordinate at the end of the drag.
-    pub x: i32,
-    /// Y‐coordinate at the end of the drag.
-    pub y: i32,
+    pub path: Vec<DragPoint>,
 }
 
 /// A keypress action.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct KeyPress {
-    /// The list of keys to press (e.g. `["Control", "C"]`).
+pub struct KeyPressAction {
+    /// The combination of keys the model is requesting to be pressed.
+    /// This is an array of strings, each representing a key.
     pub keys: Vec<String>,
 }
 
 /// A mouse move action.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct MoveAction {
-    /// X‐coordinate to move to.
+pub struct Move {
+    /// The x-coordinate to move to.
     pub x: i32,
-    /// Y‐coordinate to move to.
+    /// The y-coordinate to move to.
     pub y: i32,
 }
 
 /// A scroll action.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Scroll {
-    /// Horizontal scroll distance.
+    /// The horizontal scroll distance.
     pub scroll_x: i32,
-    /// Vertical scroll distance.
+    /// The vertical scroll distance.
     pub scroll_y: i32,
-    /// X‐coordinate where the scroll began.
+    /// The x-coordinate where the scroll occurred.
     pub x: i32,
-    /// Y‐coordinate where the scroll began.
+    /// The y-coordinate where the scroll occurred.
     pub y: i32,
 }
 
 /// A typing (text entry) action.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct TypeAction {
+pub struct Type {
     /// The text to type.
     pub text: String,
 }
 
 /// Metadata for a function call request.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FunctionCall {
+pub struct FunctionToolCall {
     /// The unique ID of the function tool call.
     pub id: String,
     /// The unique ID of the function tool call generated by the model.
@@ -1185,56 +1292,75 @@ pub struct FunctionCall {
     pub status: OutputStatus,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ImageGenToolCallStatus {
+    InProgress,
+    Completed,
+    Generating,
+    Failed,
+}
+
 /// Output of an image generation request.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ImageGenerationCallOutput {
-    /// Unique ID of the image generation call.
+pub struct ImageGenToolCall {
+    /// The unique ID of the image generation call.
     pub id: String,
-    /// Base64-encoded generated image, or null.
+    /// The generated image encoded in base64.
     pub result: Option<String>,
-    /// Status of the image generation call.
-    pub status: String,
+    /// The status of the image generation call.
+    pub status: ImageGenToolCallStatus,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum CodeInterpreterToolCallStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+    Interpreting,
+    Failed,
 }
 
 /// Output of a code interpreter request.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CodeInterpreterCallOutput {
-    /// The code that was executed.
+pub struct CodeInterpreterToolCall {
+    /// The code to run, or null if not available.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub code: Option<String>,
-    /// Unique ID of the call.
-    pub id: String,
-    /// Status of the tool call.
-    pub status: String,
     /// ID of the container used to run the code.
     pub container_id: String,
-    /// The outputs of the execution: logs or files.
+    /// The unique ID of the code interpreter tool call.
+    pub id: String,
+    /// The outputs generated by the code interpreter, such as logs or images.
+    /// Can be null if no outputs are available.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub outputs: Option<Vec<CodeInterpreterResult>>,
+    pub outputs: Option<Vec<CodeInterpreterToolCallOutput>>,
+    /// The status of the code interpreter tool call.
+    /// Valid values are `in_progress`, `completed`, `incomplete`, `interpreting`, and `failed`.
+    pub status: CodeInterpreterToolCallStatus,
 }
 
 /// Individual result from a code interpreter: either logs or files.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
-pub enum CodeInterpreterResult {
-    /// Text logs from the execution.
-    Logs(CodeInterpreterTextOutput),
-    /// File outputs from the execution.
-    Files(CodeInterpreterFileOutput),
+pub enum CodeInterpreterToolCallOutput {
+    /// Code interpreter output logs
+    Logs(CodeInterpreterOutputLogs),
+    /// Code interpreter output image
+    Image(CodeInterpreterOutputImage),
 }
 
-/// The output containing execution logs.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CodeInterpreterTextOutput {
-    /// The logs of the code interpreter tool call.
+pub struct CodeInterpreterOutputLogs {
+    /// The logs output from the code interpreter.
     pub logs: String,
 }
 
-/// The output containing file references.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CodeInterpreterFileOutput {
-    /// List of file IDs produced.
-    pub files: Vec<CodeInterpreterFile>,
+pub struct CodeInterpreterOutputImage {
+    /// The URL of the image output from the code interpreter.
+    pub url: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
@@ -1247,73 +1373,88 @@ pub struct CodeInterpreterFile {
 
 /// Output of a local shell command request.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct LocalShellCallOutput {
-    /// Details of the exec action.
-    pub action: LocalShellAction,
-    /// Unique call identifier for responding to the tool call.
+pub struct LocalShellToolCall {
+    /// Execute a shell command on the server.
+    pub action: LocalShellExecAction,
+    /// The unique ID of the local shell tool call generated by the model.
     pub call_id: String,
-    /// Unique ID of the local shell call.
+    /// The unique ID of the local shell call.
     pub id: String,
-    /// Status of the local shell call.
+    /// The status of the local shell call.
     pub status: String,
 }
 
 /// Define the shape of a local shell action (exec).
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct LocalShellAction {
+pub struct LocalShellExecAction {
     /// The command to run.
     pub command: Vec<String>,
     /// Environment variables to set for the command.
     pub env: HashMap<String, String>,
-    /// Optional timeout for the command (ms).
+    /// Optional timeout in milliseconds for the command.
     pub timeout_ms: Option<u64>,
     /// Optional user to run the command as.
     pub user: Option<String>,
-    /// Optional working directory for the command.
+    /// Optional working directory to run the command in.
     pub working_directory: Option<String>,
 }
 
 /// Output of an MCP server tool invocation.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpCallOutput {
-    /// JSON string of the arguments passed.
+pub struct MCPToolCall {
+    /// A JSON string of the arguments passed to the tool.
     pub arguments: String,
-    /// Unique ID of the MCP call.
+    /// The unique ID of the tool call.
     pub id: String,
-    /// Name of the tool invoked.
+    /// The name of the tool that was run.
     pub name: String,
-    /// Label of the MCP server.
+    /// The label of the MCP server running the tool.
     pub server_label: String,
+    /// Unique identifier for the MCP tool call approval request. Include this value
+    /// in a subsequent `mcp_approval_response` input to approve or reject the corresponding
+    /// tool call.
+    pub approval_request_id: Option<String>,
     /// Error message from the call, if any.
     pub error: Option<String>,
-    /// Output from the call, if any.
+    /// The output from the tool call.
     pub output: Option<String>,
+    /// The status of the tool call. One of `in_progress`, `completed`, `incomplete`,
+    /// `calling`, or `failed`.
+    pub status: Option<MCPToolCallStatus>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum MCPToolCallStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+    Calling,
+    Failed,
 }
 
-/// Output listing tools available on an MCP server.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpListToolsOutput {
-    /// Unique ID of the list request.
+pub struct MCPListTools {
+    /// The unique ID of the list.
     pub id: String,
-    /// Label of the MCP server.
+    /// The label of the MCP server.
     pub server_label: String,
-    /// Tools available on the server with metadata.
-    pub tools: Vec<McpToolInfo>,
+    /// The tools available on the server.
+    pub tools: Vec<MCPListToolsTool>,
     /// Error message if listing failed.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub error: Option<String>,
 }
 
-/// Information about a single tool on an MCP server.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpToolInfo {
+pub struct MCPListToolsTool {
+    /// The JSON schema describing the tool's input.
+    pub input_schema: serde_json::Value,
     /// The name of the tool.
     pub name: String,
-    /// The JSON schema describing the tool's input.
-    pub input_schema: Value,
     /// Additional annotations about the tool.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub annotations: Option<Value>,
+    pub annotations: Option<serde_json::Value>,
     /// The description of the tool.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub description: Option<String>,
@@ -1321,14 +1462,14 @@ pub struct McpToolInfo {
 
 /// Output representing a human approval request for an MCP tool.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpApprovalRequestOutput {
+pub struct MCPApprovalRequest {
     /// JSON string of arguments for the tool.
     pub arguments: String,
-    /// Unique ID of the approval request.
+    /// The unique ID of the approval request.
     pub id: String,
-    /// Name of the tool requiring approval.
+    /// The name of the tool to run.
     pub name: String,
-    /// Label of the MCP server making the request.
+    /// The label of the MCP server making the request.
     pub server_label: String,
 }
 
@@ -1347,13 +1488,31 @@ pub struct Usage {
     pub total_tokens: u32,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum Instructions {
+    /// A text input to the model, equivalent to a text input with the `developer` role.
+    Text(String),
+    /// A list of one or many input items to the model, containing different content types.
+    Array(Vec<InputItem>),
+}
+
 /// The complete response returned by the Responses API.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct Response {
+    /// Whether to run the model response in the background.
+    /// [Learn more](https://platform.openai.com/docs/guides/background).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub background: Option<bool>,
+
+    /// The conversation that this response belongs to. Input items and output
+    /// items from this response are automatically added to this conversation.
+    pub conversation: Option<Conversation>,
+
     /// Unix timestamp (in seconds) when this Response was created.
     pub created_at: u64,
 
-    /// Error object if the API failed to generate a response.
+    /// An error object returned when the model fails to generate a Response.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub error: Option<ErrorObject>,
 
@@ -1364,26 +1523,44 @@ pub struct Response {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub incomplete_details: Option<IncompleteDetails>,
 
-    /// Instructions that were inserted as the first item in context.
+    /// A system (or developer) message inserted into the model's context.
+    ///
+    /// When using along with `previous_response_id`, the instructions from a previous response
+    /// will not be carried over to the next response. This makes it simple to swap out
+    /// system (or developer) messages in new responses.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub instructions: Option<String>,
+    pub instructions: Option<Instructions>,
 
-    /// The value of `max_output_tokens` that was honored.
+    /// An upper bound for the number of tokens that can be generated for a response,
+    /// including visible output tokens and
+    /// [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
     #[serde(skip_serializing_if = "Option::is_none")]
     pub max_output_tokens: Option<u32>,
 
-    /// Metadata tags/values that were attached to this response.
+    /// Set of 16 key-value pairs that can be attached to an object. This can be
+    /// useful for storing additional information about the object in a structured
+    /// format, and querying for objects via API or the dashboard.
+    ///
+    /// Keys are strings with a maximum length of 64 characters. Values are strings
+    /// with a maximum length of 512 characters.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub metadata: Option<HashMap<String, String>>,
 
-    /// Model ID used to generate the response.
+    /// Model ID used to generate the response, like gpt-4o or o3. OpenAI offers a
+    /// wide range of models with different capabilities, performance characteristics,
+    /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare available models.
     pub model: String,
 
-    /// The object type – always `response`.
+    /// The object type of this resource - always set to `response`.
     pub object: String,
 
-    /// The array of content items generated by the model.
-    pub output: Vec<OutputContent>,
+    /// An array of content items generated by the model.
+    ///
+    /// The length and order of items in the output array is dependent on the model's response.
+    /// Rather than accessing the first item in the output array and assuming it's an assistant
+    /// message with the content generated by the model, you might consider using
+    /// the `output_text` property where supported in SDKs.
+    pub output: Vec<OutputItem>,
 
     /// SDK-only convenience property that contains the aggregated text output from all
     /// `output_text` items in the `output` array, if any are present.
@@ -2124,27 +2301,55 @@ pub struct ResponseMetadata {
 #[serde(rename_all = "snake_case")]
 #[non_exhaustive]
 pub enum OutputItem {
+    /// An output message from the model.
     Message(OutputMessage),
-    FileSearchCall(FileSearchCallOutput),
-    FunctionCall(FunctionCall),
-    WebSearchCall(WebSearchCallOutput),
-    ComputerCall(ComputerCallOutput),
+    /// The results of a file search tool call. See the
+    /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search)
+    /// for more information.
+    FileSearchCall(FileSearchToolCall),
+    /// A tool call to run a function. See the
+    /// [function calling guide](https://platform.openai.com/docs/guides/function-calling)
+    /// for more information.
+    FunctionCall(FunctionToolCall),
+    /// The results of a web search tool call. See the
+    /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search)
+    /// for more information.
+    WebSearchCall(WebSearchToolCall),
+    /// A tool call to a computer use tool. See the
+    /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use)
+    /// for more information.
+    ComputerCall(ComputerToolCall),
+    /// A description of the chain of thought used by a reasoning model while generating
+    /// a response. Be sure to include these items in your `input` to the Responses API for
+    /// subsequent turns of a conversation if you are manually
+    /// [managing context](https://platform.openai.com/docs/guides/conversation-state).
     Reasoning(ReasoningItem),
-    ImageGenerationCall(ImageGenerationCallOutput),
-    CodeInterpreterCall(CodeInterpreterCallOutput),
-    LocalShellCall(LocalShellCallOutput),
-    McpCall(McpCallOutput),
-    McpListTools(McpListToolsOutput),
-    McpApprovalRequest(McpApprovalRequestOutput),
-    CustomToolCall(CustomToolCallOutput),
+    /// An image generation request made by the model.
+    ImageGenerationCall(ImageGenToolCall),
+    /// A tool call to run code.
+    CodeInterpreterCall(CodeInterpreterToolCall),
+    /// A tool call to run a command on the local shell.
+    LocalShellCall(LocalShellToolCall),
+    /// An invocation of a tool on an MCP server.
+    McpCall(MCPToolCall),
+    /// A list of tools available on an MCP server.
+    McpListTools(MCPListTools),
+    /// A request for human approval of a tool invocation.
+    McpApprovalRequest(MCPApprovalRequest),
+    /// A call to a custom tool created by the model.
+    CustomToolCall(CustomToolCall),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
-pub struct CustomToolCallOutput {
+pub struct CustomToolCall {
+    /// An identifier used to map this custom tool call to a tool call output.
     pub call_id: String,
+    /// The input for the custom tool call generated by the model.
     pub input: String,
+    /// The name of the custom tool being called.
     pub name: String,
+    /// The unique ID of the custom tool call in the OpenAI platform.
     pub id: String,
 }
 

From dd5e23f17b230e35d7bffb3d3cde821212d5d769 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Sun, 2 Nov 2025 19:37:37 -0800
Subject: [PATCH 11/42] checkpoint for updated types

---
 async-openai/src/types/chat.rs      |   7 +-
 async-openai/src/types/impls.rs     |  14 +-
 async-openai/src/types/responses.rs | 626 ++++++++++++++++++++++++----
 3 files changed, 551 insertions(+), 96 deletions(-)

diff --git a/async-openai/src/types/chat.rs b/async-openai/src/types/chat.rs
index d9373db6..e519286d 100644
--- a/async-openai/src/types/chat.rs
+++ b/async-openai/src/types/chat.rs
@@ -504,9 +504,14 @@ pub struct ResponseFormatJsonSchema {
     /// The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
     pub name: String,
     /// The schema for the response format, described as a JSON Schema object.
+    /// Learn how to build JSON schemas [here](https://json-schema.org/).
     #[serde(skip_serializing_if = "Option::is_none")]
     pub schema: Option<serde_json::Value>,
-    /// Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. To learn more, read the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    /// Whether to enable strict schema adherence when generating the output.
+    /// If set to true, the model will always follow the exact schema defined
+    /// in the `schema` field. Only a subset of JSON Schema is supported when
+    /// `strict` is `true`. To learn more, read the [Structured Outputs
+    /// guide](https://platform.openai.com/docs/guides/structured-outputs).
     #[serde(skip_serializing_if = "Option::is_none")]
     pub strict: Option<bool>,
 }
diff --git a/async-openai/src/types/impls.rs b/async-openai/src/types/impls.rs
index b566dc7d..3689dbba 100644
--- a/async-openai/src/types/impls.rs
+++ b/async-openai/src/types/impls.rs
@@ -14,7 +14,7 @@ use crate::{
 use bytes::Bytes;
 
 use super::{
-    responses::{CodeInterpreterContainer, Input, InputContent, Role as ResponsesRole},
+    responses::{CodeInterpreterContainer, EasyInputContent, Input, Role as ResponsesRole},
     AddUploadPartRequest, AudioInput, AudioResponseFormat, ChatCompletionFunctionCall,
     ChatCompletionFunctions, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage,
     ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestDeveloperMessage,
@@ -1053,9 +1053,9 @@ impl Default for Input {
     }
 }
 
-impl Default for InputContent {
+impl Default for EasyInputContent {
     fn default() -> Self {
-        Self::TextInput("".to_string())
+        Self::Text("".to_string())
     }
 }
 
@@ -1077,15 +1077,15 @@ impl Default for ResponsesRole {
     }
 }
 
-impl From<String> for InputContent {
+impl From<String> for EasyInputContent {
     fn from(value: String) -> Self {
-        Self::TextInput(value)
+        Self::Text(value)
     }
 }
 
-impl From<&str> for InputContent {
+impl From<&str> for EasyInputContent {
     fn from(value: &str) -> Self {
-        Self::TextInput(value.to_owned())
+        Self::Text(value.to_owned())
     }
 }
 
diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index d0105b6f..4b60a53b 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -38,14 +38,362 @@ pub enum Input {
     Items(Vec<InputItem>),
 }
 
+/// Content item used to generate a response.
+///
+/// This is a properly discriminated union based on the `type` field, using Rust's
+/// type-safe enum with serde's tag attribute for efficient deserialization.
+///
+/// # OpenAPI Specification
+/// Corresponds to the `Item` schema in the OpenAPI spec with a `type` discriminator.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged, rename_all = "snake_case")]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum Item {
+    /// A message (type: "message").
+    /// Can represent InputMessage (user/system/developer) or OutputMessage (assistant).
+    ///
+    /// InputMessage:
+    ///     A message input to the model with a role indicating instruction following hierarchy.
+    ///     Instructions given with the developer or system role take precedence over instructions given with the user role.
+    /// OutputMessage:
+    ///     A message output from the model.
+    Message(MessageItem),
+
+    /// The results of a file search tool call. See the
+    /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search) for more information.
+    FileSearchCall(FileSearchToolCall),
+
+    /// A tool call to a computer use tool. See the
+    /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) for more information.
+    ComputerCall(ComputerToolCall),
+
+    /// The output of a computer tool call.
+    ComputerCallOutput(ComputerCallOutputItemParam),
+
+    /// The results of a web search tool call. See the
+    /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search) for more information.
+    WebSearchCall(WebSearchToolCall),
+
+    /// A tool call to run a function. See the
+    ///
+    /// [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more information.
+    FunctionCall(FunctionToolCall),
+
+    /// The output of a function tool call.
+    FunctionCallOutput(FunctionCallOutputItemParam),
+
+    /// A description of the chain of thought used by a reasoning model while generating
+    /// a response. Be sure to include these items in your `input` to the Responses API
+    /// for subsequent turns of a conversation if you are manually
+    /// [managing context](https://platform.openai.com/docs/guides/conversation-state).
+    Reasoning(ReasoningItem),
+
+    /// An image generation request made by the model.
+    ImageGenerationCall(ImageGenToolCall),
+
+    /// A tool call to run code.
+    CodeInterpreterCall(CodeInterpreterToolCall),
+
+    /// A tool call to run a command on the local shell.
+    LocalShellCall(LocalShellToolCall),
+
+    /// The output of a local shell tool call.
+    LocalShellCallOutput(LocalShellToolCallOutput),
+
+    /// A list of tools available on an MCP server.
+    McpListTools(MCPListTools),
+
+    /// A request for human approval of a tool invocation.
+    McpApprovalRequest(MCPApprovalRequest),
+
+    /// A response to an MCP approval request.
+    McpApprovalResponse(MCPApprovalResponse),
+
+    /// An invocation of a tool on an MCP server.
+    McpCall(MCPToolCall),
+
+    /// The output of a custom tool call from your code, being sent back to the model.
+    CustomToolCallOutput(CustomToolCallOutput),
+
+    /// A call to a custom tool created by the model.
+    CustomToolCall(CustomToolCall),
+}
+
+/// Input item that can be used in the context for generating a response.
+///
+/// This represents the OpenAPI `InputItem` schema which is an `anyOf`:
+/// 1. `EasyInputMessage` - Simple, user-friendly message input (can use string content)
+/// 2. `Item` - Structured items with proper type discrimination (including InputMessage, OutputMessage, tool calls)
+/// 3. `ItemReferenceParam` - Reference to an existing item by ID (type can be null)
+///
+/// Uses untagged deserialization because these types overlap in structure.
+/// Order matters: more specific structures are tried first.
+///
+/// # OpenAPI Specification
+/// Corresponds to the `InputItem` schema: `anyOf[EasyInputMessage, Item, ItemReferenceParam]`
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
 pub enum InputItem {
-    Message(InputMessage),
-    Custom(serde_json::Value),
+    /// A reference to an existing item by ID.
+    /// Has a required `id` field and optional `type` (can be "item_reference" or null).
+    /// Must be tried first as it's the most minimal structure.
+    ItemReference(ItemReference),
+
+    /// All structured items with proper type discrimination.
+    /// Includes InputMessage, OutputMessage, and all tool calls/outputs.
+    /// Uses the discriminated `Item` enum for efficient, type-safe deserialization.
+    Item(Item),
+
+    /// A simple, user-friendly message input (EasyInputMessage).
+    /// Supports string content and can include assistant role for previous responses.
+    /// Must be tried last as it's the most flexible structure.
+    ///
+    /// A message input to the model with a role indicating instruction following
+    /// hierarchy. Instructions given with the `developer` or `system` role take
+    /// precedence over instructions given with the `user` role. Messages with the
+    /// `assistant` role are presumed to have been generated by the model in previous
+    /// interactions.
+    EasyMessage(EasyInputMessage),
+}
+
+impl InputItem {
+    /// Creates an InputItem from an item reference ID.
+    pub fn from_reference(id: impl Into<String>) -> Self {
+        Self::ItemReference(ItemReference::new(id))
+    }
+
+    /// Creates an InputItem from a structured Item.
+    pub fn from_item(item: Item) -> Self {
+        Self::Item(item)
+    }
+
+    /// Creates an InputItem from an EasyInputMessage.
+    pub fn from_easy_message(message: EasyInputMessage) -> Self {
+        Self::EasyMessage(message)
+    }
+
+    /// Creates a simple text message with the given role and content.
+    pub fn text_message(role: Role, content: impl Into<String>) -> Self {
+        Self::EasyMessage(EasyInputMessage {
+            r#type: InputMessageType::Message,
+            role,
+            content: EasyInputContent::Text(content.into()),
+        })
+    }
+}
+
+/// A message item used within the `Item` enum.
+///
+/// Both InputMessage and OutputMessage have `type: "message"`, so we use an untagged
+/// enum to distinguish them based on their structure:
+/// - OutputMessage: role=assistant, required id & status fields
+/// - InputMessage: role=user/system/developer, content is Vec<ContentType>, optional id/status
+///
+/// Note: EasyInputMessage is NOT included here - it's a separate variant in `InputItem`,
+/// not part of the structured `Item` enum.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum MessageItem {
+    /// An output message from the model (role: assistant, has required id & status).
+    /// This must come first as it has the most specific structure (required id and status fields).
+    Output(OutputMessage),
+
+    /// A structured input message (role: user/system/developer, content is Vec<ContentType>).
+    /// Has structured content list and optional id/status fields.
+    ///
+    /// A message input to the model with a role indicating instruction following hierarchy.
+    /// Instructions given with the `developer` or `system` role take precedence over instructions
+    /// given with the `user` role.
+    Input(InputMessage),
+}
+
+/// A reference to an existing item by ID.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ItemReference {
+    /// The type of item to reference. Can be "item_reference" or null.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub r#type: Option<ItemReferenceType>,
+    /// The ID of the item to reference.
+    pub id: String,
+}
+
+impl ItemReference {
+    /// Create a new item reference with the given ID.
+    pub fn new(id: impl Into<String>) -> Self {
+        Self {
+            r#type: Some(ItemReferenceType::ItemReference),
+            id: id.into(),
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ItemReferenceType {
+    ItemReference,
+}
+
+/// Output from a function call that you're providing back to the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct FunctionCallOutputItemParam {
+    /// The unique ID of the function tool call generated by the model.
+    pub call_id: String,
+    /// Text, image, or file output of the function tool call.
+    pub output: FunctionCallOutput,
+    /// The unique ID of the function tool call output.
+    /// Populated when this item is returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum FunctionCallOutput {
+    /// A JSON string of the output of the function tool call.
+    Text(String),
+    Content(Vec<InputContent>), // TODO use shape which allows null from OpenAPI spec?
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ComputerCallOutputItemParam {
+    /// The ID of the computer tool call that produced the output.
+    pub call_id: String,
+    /// A computer screenshot image used with the computer use tool.
+    pub output: ComputerScreenshotImage,
+    /// The safety checks reported by the API that have been acknowledged by the developer.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub acknowledged_safety_checks: Option<Vec<ComputerCallSafetyCheckParam>>,
+    /// The unique ID of the computer tool call output. Optional when creating.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The status of the message input. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when input items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>, // TODO rename OutputStatus?
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ComputerScreenshotImageType {
+    ComputerScreenshot,
+}
+
+/// A computer screenshot image used with the computer use tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ComputerScreenshotImage {
+    /// Specifies the event type. For a computer screenshot, this property is always
+    /// set to `computer_screenshot`.
+    pub r#type: ComputerScreenshotImageType,
+    /// The identifier of an uploaded file that contains the screenshot.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_id: Option<String>,
+    /// The URL of the screenshot image.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_url: Option<String>,
+}
+
+/// Output from a local shell tool call that you're providing back to the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct LocalShellToolCallOutput {
+    /// The unique ID of the local shell tool call generated by the model.
+    pub id: String,
+
+    /// A JSON string of the output of the local shell tool call.
+    pub output: String,
+
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+}
+
+/// Output from a local shell command execution.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct LocalShellOutput {
+    /// The stdout output from the command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stdout: Option<String>,
+
+    /// The stderr output from the command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stderr: Option<String>,
+
+    /// The exit code of the command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exit_code: Option<i32>,
 }
 
-/// A message to prime the model.
+/// An MCP approval response that you're providing back to the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct MCPApprovalResponse {
+    /// The ID of the approval request being answered.
+    pub approval_request_id: String,
+
+    /// Whether the request was approved.
+    pub approve: bool,
+
+    /// The unique ID of the approval response
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+
+    /// Optional reason for the decision.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum CustomToolCallOutputOutput {
+    /// A string of the output of the custom tool call.
+    Text(String),
+    /// Text, image, or file output of the custom tool call.
+    List(Vec<InputContent>),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CustomToolCallOutput {
+    /// The call ID, used to map this custom tool call output to a custom tool call.
+    pub call_id: String,
+
+    /// The output from the custom tool call generated by your code.
+    /// Can be a string or an list of output content.
+    pub output: CustomToolCallOutputOutput,
+
+    /// The unique ID of the custom tool call output in the OpenAI platform.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+}
+
+/// A simplified message input to the model (EasyInputMessage in the OpenAPI spec).
+///
+/// This is the most user-friendly way to provide messages, supporting both simple
+/// string content and structured content. Role can include `assistant` for providing
+/// previous assistant responses.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "EasyInputMessageArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct EasyInputMessage {
+    /// The type of the message input. Always set to `message`.
+    pub r#type: MessageType,
+    /// The role of the message input. One of `user`, `assistant`, `system`, or `developer`.
+    pub role: Role,
+    /// Text, image, or audio input to the model, used to generate a response.
+    /// Can also contain previous assistant responses.
+    pub content: EasyInputContent,
+}
+
+/// A structured message input to the model (InputMessage in the OpenAPI spec).
+///
+/// This variant requires structured content (not a simple string) and does not support
+/// the `assistant` role (use OutputMessage for that). Used when items are returned via API
+/// with additional metadata.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
     name = "InputMessageArgs",
@@ -55,45 +403,56 @@ pub enum InputItem {
 )]
 #[builder(build_fn(error = "OpenAIError"))]
 pub struct InputMessage {
-    #[serde(default, rename = "type")]
-    pub kind: InputMessageType,
-    /// The role of the message input.
-    pub role: Role,
-    /// Text, image, or audio input to the model, used to generate a response. Can also contain
-    /// previous assistant responses.
-    pub content: InputContent,
+    /// A list of one or many input items to the model, containing different content types.
+    pub content: Vec<InputContent>,
+    /// The role of the message input. One of `user`, `system`, or `developer`.
+    /// Note: `assistant` is NOT allowed here; use OutputMessage instead.
+    pub role: InputRole,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>, // TODO rename OutputStatus to ItemStatus maybe?
+    /// The type of the message input. Always set to `message`.
+    pub r#type: MessageType,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
-#[serde(rename_all = "snake_case")]
-pub enum InputMessageType {
+/// The role for an input message - can only be `user`, `system`, or `developer`.
+/// This type ensures type safety by excluding the `assistant` role (use OutputMessage for that).
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum InputRole {
     #[default]
-    Message,
+    User,
+    System,
+    Developer,
 }
 
+/// Content for EasyInputMessage - can be a simple string or structured list.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
-pub enum InputContent {
+pub enum EasyInputContent {
     /// A text input to the model.
-    TextInput(String),
+    Text(String),
     /// A list of one or many input items to the model, containing different content types.
-    InputItemContentList(Vec<ContentType>),
+    ContentList(Vec<InputContent>),
 }
 
 /// Parts of a message: text, image, file, or audio.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
-pub enum ContentType {
+pub enum InputContent {
     /// A text input to the model.
-    InputText(InputText),
-    /// An image input to the model.
-    InputImage(InputImage),
+    InputText(InputTextContent),
+    /// An image input to the model. Learn about
+    /// [image inputs](https://platform.openai.com/docs/guides/vision).
+    InputImage(InputImageContent),
     /// A file input to the model.
-    InputFile(InputFile),
+    InputFile(InputFileContent),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct InputText {
+pub struct InputTextContent {
+    /// The text input to the model.
     pub text: String,
 }
 
@@ -105,8 +464,9 @@ pub struct InputText {
     default
 )]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct InputImage {
-    /// The detail level of the image to be sent to the model.
+pub struct InputImageContent {
+    /// The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`.
+    /// Defaults to `auto`.
     detail: ImageDetail,
     /// The ID of the file to be sent to the model.
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -125,19 +485,19 @@ pub struct InputImage {
     default
 )]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct InputFile {
+pub struct InputFileContent {
     /// The content of the file to be sent to the model.
     #[serde(skip_serializing_if = "Option::is_none")]
     file_data: Option<String>,
     /// The ID of the file to be sent to the model.
     #[serde(skip_serializing_if = "Option::is_none")]
     file_id: Option<String>,
-    /// The name of the file to be sent to the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    filename: Option<String>,
     /// The URL of the file to be sent to the model.
     #[serde(skip_serializing_if = "Option::is_none")]
     file_url: Option<String>,
+    /// The name of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    filename: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
@@ -312,9 +672,16 @@ pub struct CreateResponse {
     pub user: Option<String>,
 }
 
-/// Service tier request options.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct PromptConfig {
+#[serde(untagged)]
+pub enum ResponsePromptVariables {
+    String(String),
+    Content(InputContent),
+    Custom(serde_json::Value),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct Prompt {
     /// The unique identifier of the prompt template to use.
     pub id: String,
 
@@ -322,17 +689,17 @@ pub struct PromptConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub version: Option<String>,
 
-    /// Optional map of values to substitute in for variables in your prompt. The substitution
-    /// values can either be strings, or other Response input types like images or files.
-    /// For now only supporting Strings.
+    /// Optional map of values to substitute in for variables in your
+    /// prompt. The substitution values can either be strings, or other
+    /// Response input types like images or files.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub variables: Option<HashMap<String, String>>,
+    pub variables: Option<ResponsePromptVariables>,
 }
 
-/// Service tier request options.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Default)]
 #[serde(rename_all = "lowercase")]
 pub enum ServiceTier {
+    #[default]
     Auto,
     Default,
     Flex,
@@ -351,17 +718,27 @@ pub enum Truncation {
 /// o-series reasoning settings.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
-    name = "ReasoningConfigArgs",
+    name = "ReasoningArgs",
     pattern = "mutable",
     setter(into, strip_option),
     default
 )]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct ReasoningConfig {
-    /// Constrain effort on reasoning.
+pub struct Reasoning {
+    /// Constrains effort on reasoning for
+    /// [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    /// Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing
+    /// reasoning effort can result in faster responses and fewer tokens used
+    /// on reasoning in a response.
+    ///
+    /// Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub effort: Option<ReasoningEffort>,
-    /// Summary mode for reasoning.
+    /// A summary of the reasoning performed by the model. This can be
+    /// useful for debugging and understanding the model's reasoning process.
+    /// One of `auto`, `concise`, or `detailed`.
+    ///
+    /// `concise` is only supported for `computer-use-preview` models.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub summary: Option<ReasoningSummary>,
 }
@@ -385,22 +762,42 @@ pub enum ReasoningSummary {
 
 /// Configuration for text response format.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct TextConfig {
-    /// Defines the format: plain text, JSON object, or JSON schema.
-    pub format: TextResponseFormat,
+pub struct ResponseTextParam {
+    /// An object specifying the format that the model must output.
+    ///
+    /// Configuring `{ "type": "json_schema" }` enables Structured Outputs,
+    /// which ensures the model will match your supplied JSON schema. Learn more in the
+    /// [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    ///
+    /// The default format is `{ "type": "text" }` with no additional options.
+    ///
+    /// **Not recommended for gpt-4o and newer models:**
+    ///
+    /// Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    /// ensures the message the model generates is valid JSON. Using `json_schema`
+    /// is preferred for models that support it.
+    pub format: TextResponseFormatConfiguration,
 
+    /// Constrains the verbosity of the model's response. Lower values will result in
+    /// more concise responses, while higher values will result in more verbose responses.
+    ///
+    /// Currently supported values are `low`, `medium`, and `high`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub verbosity: Option<Verbosity>,
 }
 
 #[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
-pub enum TextResponseFormat {
-    /// The type of response format being defined: `text`
+pub enum TextResponseFormatConfiguration {
+    /// Default response format. Used to generate text responses.
     Text,
-    /// The type of response format being defined: `json_object`
+    /// JSON object response format. An older method of generating JSON responses.
+    /// Using `json_schema` is recommended for models that support it.
+    /// Note that the model will not generate JSON without a system or user message
+    /// instructing it to do so.
     JsonObject,
-    /// The type of response format being defined: `json_schema`
+    /// JSON Schema response format. Used to generate structured JSON responses.
+    /// Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs).
     JsonSchema(ResponseFormatJsonSchema),
 }
 
@@ -621,7 +1018,7 @@ pub struct Mcp {
     pub allowed_tools: Option<AllowedTools>,
     /// Optional HTTP headers for the MCP server.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub headers: Option<Value>,
+    pub headers: Option<serde_json::Value>,
     /// Approval policy or filter for tools.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub require_approval: Option<RequireApproval>,
@@ -948,10 +1345,28 @@ pub struct OutputMessage {
     /// The unique ID of the output message.
     pub id: String,
     /// The role of the output message. Always `assistant`.
-    pub role: Role,
+    pub role: AssistantRole,
     /// The status of the message input. One of `in_progress`, `completed`, or
     /// `incomplete`. Populated when input items are returned via API.
     pub status: OutputStatus,
+    /// The type of the output message. Always `message`.
+    pub r#type: MessageType,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum MessageType {
+    #[default]
+    Message,
+}
+
+/// The role for an output message - always `assistant`.
+/// This type ensures type safety by only allowing the assistant role.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum AssistantRole {
+    #[default]
+    Assistant,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
@@ -964,35 +1379,38 @@ pub enum OutputMessageContent {
 }
 
 /// Nested content within an output message.
+///
+/// Note: This enum is similar to OutputItem but may be used in different contexts.
+/// Consider using OutputItem directly if it fits your use case.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum OutputContent {
     /// An output message from the model.
     Message(OutputMessage),
     /// The results of a file search tool call.
-    FileSearchCall(FileSearchCallOutput),
+    FileSearchCall(FileSearchToolCall),
     /// A tool call to run a function.
-    FunctionCall(FunctionCall),
+    FunctionCall(FunctionToolCall),
     /// The results of a web search tool call.
-    WebSearchCall(WebSearchCallOutput),
+    WebSearchCall(WebSearchToolCall),
     /// A tool call to a computer use tool.
-    ComputerCall(ComputerCallOutput),
+    ComputerCall(ComputerToolCall),
     /// A description of the chain of thought used by a reasoning model while generating a response.
     /// Be sure to include these items in your input to the Responses API for subsequent turns of a
     /// conversation if you are manually managing context.
     Reasoning(ReasoningItem),
     /// Image generation tool call output.
-    ImageGenerationCall(ImageGenerationCallOutput),
+    ImageGenerationCall(ImageGenToolCall),
     /// Code interpreter tool call output.
-    CodeInterpreterCall(CodeInterpreterCallOutput),
+    CodeInterpreterCall(CodeInterpreterToolCall),
     /// Local shell tool call output.
-    LocalShellCall(LocalShellCallOutput),
+    LocalShellCall(LocalShellToolCall),
     /// MCP tool invocation output.
-    McpCall(McpCallOutput),
+    McpCall(MCPToolCall),
     /// MCP list-tools output.
-    McpListTools(McpListToolsOutput),
+    McpListTools(MCPListTools),
     /// MCP approval request output.
-    McpApprovalRequest(McpApprovalRequestOutput),
+    McpApprovalRequest(MCPApprovalRequest),
 }
 
 /// Reasoning text content.
@@ -1277,19 +1695,21 @@ pub struct Type {
     pub text: String,
 }
 
-/// Metadata for a function call request.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct FunctionToolCall {
-    /// The unique ID of the function tool call.
-    pub id: String,
+    /// A JSON string of the arguments to pass to the function.
+    pub arguments: String,
     /// The unique ID of the function tool call generated by the model.
     pub call_id: String,
     /// The name of the function to run.
     pub name: String,
-    /// A JSON string of the arguments to pass to the function.
-    pub arguments: String,
-    /// The status of the item.
-    pub status: OutputStatus,
+    /// The unique ID of the function tool call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>, // TODO rename OutputStatus?
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
@@ -1301,7 +1721,6 @@ pub enum ImageGenToolCallStatus {
     Failed,
 }
 
-/// Output of an image generation request.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ImageGenToolCall {
     /// The unique ID of the image generation call.
@@ -1371,7 +1790,6 @@ pub struct CodeInterpreterFile {
     mime_type: String,
 }
 
-/// Output of a local shell command request.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct LocalShellToolCall {
     /// Execute a shell command on the server.
@@ -1381,7 +1799,7 @@ pub struct LocalShellToolCall {
     /// The unique ID of the local shell call.
     pub id: String,
     /// The status of the local shell call.
-    pub status: String,
+    pub status: OutputStatus,
 }
 
 /// Define the shape of a local shell action (exec).
@@ -1460,7 +1878,6 @@ pub struct MCPListToolsTool {
     pub description: Option<String>,
 }
 
-/// Output representing a human approval request for an MCP tool.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct MCPApprovalRequest {
     /// JSON string of arguments for the tool.
@@ -1556,48 +1973,79 @@ pub struct Response {
 
     /// An array of content items generated by the model.
     ///
-    /// The length and order of items in the output array is dependent on the model's response.
-    /// Rather than accessing the first item in the output array and assuming it's an assistant
-    /// message with the content generated by the model, you might consider using
-    /// the `output_text` property where supported in SDKs.
+    /// - The length and order of items in the output array is dependent on the model's response.
+    /// - Rather than accessing the first item in the output array and assuming it's an assistant
+    ///   message with the content generated by the model, you might consider using
+    ///   the `output_text` property where supported in SDKs.
     pub output: Vec<OutputItem>,
 
     /// SDK-only convenience property that contains the aggregated text output from all
     /// `output_text` items in the `output` array, if any are present.
     /// Supported in the Python and JavaScript SDKs.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output_text: Option<String>,
+    // #[serde(skip_serializing_if = "Option::is_none")]
+    // pub output_text: Option<String>,
 
-    /// Whether parallel tool calls were enabled.
+    /// Whether to allow the model to run tool calls in parallel.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub parallel_tool_calls: Option<bool>,
 
-    /// Previous response ID, if creating part of a multi-turn conversation.
+    /// The unique ID of the previous response to the model. Use this to create multi-turn conversations.
+    /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    /// Cannot be used in conjunction with `conversation`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub previous_response_id: Option<String>,
 
-    /// Reasoning configuration echoed back (effort, summary settings).
+    /// Reference to a prompt template and its variables.
+    /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
+    pub prompt: Option<Prompt>,
 
-    /// Whether to store the generated model response for later retrieval via API.
+    /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces
+    /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
+    pub prompt_cache_key: Option<String>,
+
+    /// **gpt-5 and o-series models only**
+    /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<Reasoning>,
 
-    /// The service tier that actually processed this response.
+    /// A stable identifier used to help detect users of your application that may be violating OpenAI's
+    /// usage policies.
+    ///
+    /// The IDs should be a string that uniquely identifies each user. We recommend hashing their username
+    /// or email address, in order to avoid sending us any identifying information. [Learn
+    /// more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub safety_identifier: Option<String>,
+
+    /// Specifies the processing type used for serving the request.
+    /// - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'.
+    /// - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model.
+    /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier.
+    /// - When not set, the default behavior is 'auto'.
+    ///
+    /// When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub service_tier: Option<ServiceTier>,
 
     /// The status of the response generation.
+    /// One of `completed`, `failed`, `in_progress`, `cancelled`, `queued`, or `incomplete`.
     pub status: Status,
 
-    /// Sampling temperature that was used.
+    /// What sampling temperature was used, between 0 and 2. Higher values like 0.8 make
+    /// outputs more random, lower values like 0.2 make output more focused and deterministic.
+    ///
+    /// We generally recommend altering this or `top_p` but not both.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub temperature: Option<f32>,
 
-    /// Text format configuration echoed back (plain, json_object, json_schema).
+    /// Configuration options for a text response from the model. Can be plain
+    /// text or structured JSON data. Learn more:
+    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
+    pub text: Option<ResponseTextParam>,
 
     /// How the model chose or was forced to choose a tool.
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -1630,6 +2078,8 @@ pub enum Status {
     Completed,
     Failed,
     InProgress,
+    Cancelled,
+    Queued,
     Incomplete,
 }
 

From 15c94351f9e34a4a631bd4b98824fce9719b5ab7 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Mon, 3 Nov 2025 12:56:04 -0800
Subject: [PATCH 12/42] checkpoint for updates to responses types

---
 async-openai/src/types/mcp.rs                 | 123 ++++
 async-openai/src/types/mod.rs                 |   2 +
 .../src/types/realtime/session_resource.rs    |  63 +-
 async-openai/src/types/responses.rs           | 616 ++++++++++++------
 4 files changed, 529 insertions(+), 275 deletions(-)
 create mode 100644 async-openai/src/types/mcp.rs

diff --git a/async-openai/src/types/mcp.rs b/async-openai/src/types/mcp.rs
new file mode 100644
index 00000000..7b76c5fb
--- /dev/null
+++ b/async-openai/src/types/mcp.rs
@@ -0,0 +1,123 @@
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+
+use crate::error::OpenAIError;
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum McpToolConnectorId {
+    ConnectorDropbox,
+    ConnectorGmail,
+    ConnectorGooglecalendar,
+    ConnectorGoogledrive,
+    ConnectorMicrosoftteams,
+    ConnectorOutlookcalendar,
+    ConnectorOutlookemail,
+    ConnectorSharepoint,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq, Default)]
+#[builder(
+    name = "MCPToolArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct MCPTool {
+    /// A label for this MCP server, used to identify it in tool calls.
+    pub server_label: String,
+
+    /// List of allowed tool names or a filter object.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub allowed_tools: Option<MCPToolAllowedTools>,
+
+    /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP
+    /// server URL or a service connector. Your application must handle the OAuth authorization
+    /// flow and provide the token here.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub authorization: Option<String>,
+
+    /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or
+    /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+    ///
+    /// Currently supported `connector_id` values are:
+    /// - Dropbox: `connector_dropbox`
+    /// - Gmail: `connector_gmail`
+    /// - Google Calendar: `connector_googlecalendar`
+    /// - Google Drive: `connector_googledrive`
+    /// - Microsoft Teams: `connector_microsoftteams`
+    /// - Outlook Calendar: `connector_outlookcalendar`
+    /// - Outlook Email: `connector_outlookemail`
+    /// - SharePoint: `connector_sharepoint`
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub connector_id: Option<McpToolConnectorId>,
+
+    /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub headers: Option<serde_json::Value>,
+
+    /// Specify which of the MCP server's tools require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub require_approval: Option<MCPToolRequireApproval>,
+
+    /// Optional description of the MCP server, used to provide more context.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub server_description: Option<String>,
+
+    /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub server_url: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum MCPToolAllowedTools {
+    /// A string array of allowed tool names
+    List(Vec<String>),
+    /// A filter object to specify which tools are allowed.
+    Filter(MCPToolFilter),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct MCPToolFilter {
+    /// Indicates whether or not a tool modifies data or is read-only.
+    /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    /// it will match this filter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub read_only: Option<bool>,
+    /// List of allowed tool names.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_names: Option<Vec<String>>,
+}
+
+/// Approval policy or filter for MCP tools.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum MCPToolRequireApproval {
+    /// Specify which of the MCP server's tools require approval. Can be
+    /// `always`, `never`, or a filter object associated with tools
+    /// that require approval.
+    Filter(MCPToolApprovalFilter),
+    /// Specify a single approval policy for all tools. One of `always` or
+    /// `never`. When set to `always`, all tools will require approval. When
+    /// set to `never`, all tools will not require approval.
+    ApprovalSetting(MCPToolApprovalSetting),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum MCPToolApprovalSetting {
+    Always,
+    Never,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct MCPToolApprovalFilter {
+    /// A list of tools that always require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub always: Option<MCPToolFilter>,
+    /// A list of tools that never require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub never: Option<MCPToolFilter>,
+}
diff --git a/async-openai/src/types/mod.rs b/async-openai/src/types/mod.rs
index c1cd4cb5..c6474aa5 100644
--- a/async-openai/src/types/mod.rs
+++ b/async-openai/src/types/mod.rs
@@ -14,6 +14,7 @@ mod file;
 mod fine_tuning;
 mod image;
 mod invites;
+mod mcp;
 mod message;
 mod model;
 mod moderation;
@@ -46,6 +47,7 @@ pub use file::*;
 pub use fine_tuning::*;
 pub use image::*;
 pub use invites::*;
+pub use mcp::*;
 pub use message::*;
 pub use model::*;
 pub use moderation::*;
diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index c11f78c6..ba0f12cf 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -1,6 +1,6 @@
 use serde::{Deserialize, Serialize};
 
-use crate::types::responses::RequireApproval;
+use crate::types::MCPTool;
 
 #[derive(Debug, Default, Serialize, Deserialize, Clone)]
 pub struct AudioTranscription {
@@ -105,67 +105,6 @@ pub struct FunctionTool {
     pub parameters: serde_json::Value,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(untagged)]
-pub enum AllowedTools {
-    /// A string array of allowed tool names
-    List(Vec<String>),
-    /// A filter object to specify which tools are allowed.
-    Filter(MCPAllowedToolsFilter),
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct MCPAllowedToolsFilter {
-    /// Indicates whether or not a tool modifies data or is read-only.
-    /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
-    /// it will match this filter.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub read_only: Option<bool>,
-    /// List of allowed tool names.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_names: Option<Vec<String>>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct MCPTool {
-    /// A label for this MCP server, used to identify it in tool calls.
-    pub server_label: String,
-
-    /// List of allowed tool names or a filter object.
-    pub allowed_tools: AllowedTools,
-
-    /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP
-    /// server URL or a service connector. Your application must handle the OAuth authorization
-    /// flow and provide the token here.
-    pub authorization: Option<String>,
-
-    /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or
-    /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
-    ///
-    /// Currently supported `connector_id` values are:
-    /// - Dropbox: `connector_dropbox`
-    /// - Gmail: `connector_gmail`
-    /// - Google Calendar: `connector_googlecalendar`
-    /// - Google Drive: `connector_googledrive`
-    /// - Microsoft Teams: `connector_microsoftteams`
-    /// - Outlook Calendar: `connector_outlookcalendar`
-    /// - Outlook Email: `connector_outlookemail`
-    /// - SharePoint: `connector_sharepoint`
-    pub connector_id: Option<String>,
-
-    /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes.
-    pub headers: Option<serde_json::Value>,
-
-    /// Specify which of the MCP server's tools require approval.
-    pub require_approval: Option<RequireApproval>,
-
-    /// Optional description of the MCP server, used to provide more context.
-    pub server_description: Option<String>,
-
-    /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided.
-    pub server_url: Option<String>,
-}
-
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
 pub enum ToolDefinition {
diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index 4b60a53b..824749a2 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -1,4 +1,5 @@
 use crate::error::OpenAIError;
+use crate::types::MCPTool;
 pub use crate::types::{
     CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort,
     ResponseFormatJsonSchema,
@@ -174,7 +175,7 @@ impl InputItem {
     /// Creates a simple text message with the given role and content.
     pub fn text_message(role: Role, content: impl Into<String>) -> Self {
         Self::EasyMessage(EasyInputMessage {
-            r#type: InputMessageType::Message,
+            r#type: MessageType::Message,
             role,
             content: EasyInputContent::Text(content.into()),
         })
@@ -804,34 +805,87 @@ pub enum TextResponseFormatConfiguration {
 /// Definitions for model-callable tools.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
-pub enum ToolDefinition {
-    /// File search tool.
-    FileSearch(FileSearch),
-    /// Custom function call.
-    Function(Function),
-    /// Web search preview tool.
-    WebSearchPreview(WebSearchPreview),
-    /// Virtual computer control tool.
-    ComputerUsePreview(ComputerUsePreview),
-    /// Remote Model Context Protocol server.
-    Mcp(Mcp),
-    /// Python code interpreter tool.
-    CodeInterpreter(CodeInterpreter),
-    /// Image generation tool.
-    ImageGeneration(ImageGeneration),
-    /// Local shell command execution tool.
+pub enum Tool {
+    /// Defines a function in your own code the model can choose to call. Learn more about [function
+    /// calling](https://platform.openai.com/docs/guides/tools).
+    Function(FunctionTool),
+    /// A tool that searches for relevant content from uploaded files. Learn more about the [file search
+    /// tool](https://platform.openai.com/docs/guides/tools-file-search).
+    FileSearch(FileSearchTool),
+    /// A tool that controls a virtual computer. Learn more about the [computer
+    /// use tool](https://platform.openai.com/docs/guides/tools-computer-use).
+    ComputerUsePreview(ComputerUsePreviewTool),
+    /// Search the Internet for sources related to the prompt. Learn more about the
+    /// [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
+    WebSearch(WebSearchTool),
+    /// type: web_search_2025_08_26
+    #[serde(rename = "web_search_2025_08_26")]
+    WebSearch20250826(WebSearchTool),
+    /// Give the model access to additional tools via remote Model Context Protocol
+    /// (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    Mcp(MCPTool),
+    /// A tool that runs Python code to help generate a response to a prompt.
+    CodeInterpreter(CodeInterpreterTool),
+    /// A tool that generates images using a model like `gpt-image-1`.
+    ImageGeneration(ImageGenTool),
+    /// A tool that allows the model to execute shell commands in a local environment.
     LocalShell,
+    /// A custom tool that processes input using a specified format. Learn more about   [custom
+    /// tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+    Custom(CustomToolParam),
+    /// This tool searches the web for relevant results to use in a response. Learn more about the [web search
+    ///tool](https://platform.openai.com/docs/guides/tools-web-search).
+    WebSearchPreview(WebSearchTool),
+    /// type: web_search_preview_2025_03_11
+    #[serde(rename = "web_search_preview_2025_03_11")]
+    WebSearchPreview20250311(WebSearchTool),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+pub struct CustomToolParam {
+    /// The name of the custom tool, used to identify it in tool calls.
+    pub name: String,
+    /// Optional description of the custom tool, used to provide more context.
+    pub description: Option<String>,
+    /// The input format for the custom tool. Default is unconstrained text.
+    pub format: CustomToolParamFormat,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum GrammarSyntax {
+    Lark,
+    #[default]
+    Regex,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, Default)]
+pub struct CustomGrammarFormatParam {
+    /// The grammar definition.
+    pub definition: String,
+    /// The syntax of the grammar definition. One of `lark` or `regex`.
+    pub syntax: GrammarSyntax,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum CustomToolParamFormat {
+    /// Unconstrained free-form text.
+    #[default]
+    Text,
+    /// A grammar defined by the user.
+    Grammar(CustomGrammarFormatParam),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
-    name = "FileSearchArgs",
+    name = "FileSearchToolArgs",
     pattern = "mutable",
     setter(into, strip_option),
     default
 )]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct FileSearch {
+pub struct FileSearchTool {
     /// The IDs of the vector stores to search.
     pub vector_store_ids: Vec<String>,
     /// The maximum number of results to return. This number should be between 1 and 50 inclusive.
@@ -847,69 +901,117 @@ pub struct FileSearch {
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
-    name = "FunctionArgs",
+    name = "FunctionToolArgs",
     pattern = "mutable",
     setter(into, strip_option),
     default
 )]
-pub struct Function {
+pub struct FunctionTool {
     /// The name of the function to call.
     pub name: String,
     /// A JSON schema object describing the parameters of the function.
-    pub parameters: serde_json::Value,
-    /// Whether to enforce strict parameter validation.
-    pub strict: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parameters: Option<serde_json::Value>,
+    /// Whether to enforce strict parameter validation. Default `true`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub strict: Option<bool>,
     /// A description of the function. Used by the model to determine whether or not to call the
     /// function.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub description: Option<String>,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct WebSearchToolFilters {
+    /// Allowed domains for the search. If not provided, all domains are allowed.
+    /// Subdomains of the provided domains are allowed as well.
+    ///
+    /// Example: `["pubmed.ncbi.nlm.nih.gov"]`
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub allowed_domains: Option<Vec<String>>,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
-    name = "WebSearchPreviewArgs",
+    name = "WebSearchToolArgs",
     pattern = "mutable",
     setter(into, strip_option),
     default
 )]
-pub struct WebSearchPreview {
-    /// The user's location.
+pub struct WebSearchTool {
+    /// Filters for the search.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub user_location: Option<Location>,
-    /// High level guidance for the amount of context window space to use for the search.
+    pub filters: Option<WebSearchToolFilters>,
+    /// The approximate location of the user.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub search_context_size: Option<WebSearchContextSize>,
+    pub user_location: Option<WebSearchApproximateLocation>,
+    /// High level guidance for the amount of context window space to use for the search. One of `low`,
+    /// `medium`, or `high`. `medium` is the default.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub search_context_size: Option<WebSearchToolSearchContextSize>,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)]
 #[serde(rename_all = "lowercase")]
-pub enum WebSearchContextSize {
+pub enum WebSearchToolSearchContextSize {
     Low,
+    #[default]
     Medium,
     High,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum ComputerEnvironment {
+    Windows,
+    Mac,
+    Linux,
+    Ubuntu,
+    #[default]
+    Browser,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
-    name = "ComputerUsePreviewArgs",
+    name = "ComputerUsePreviewToolArgs",
     pattern = "mutable",
     setter(into, strip_option),
     default
 )]
-pub struct ComputerUsePreview {
+pub struct ComputerUsePreviewTool {
     /// The type of computer environment to control.
-    environment: String,
+    environment: ComputerEnvironment,
     /// The width of the computer display.
     display_width: u32,
     /// The height of the computer display.
     display_height: u32,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+pub enum RankVersionType {
+    #[serde(rename = "auto")]
+    Auto,
+    #[serde(rename = "default-2024-11-15")]
+    Default20241115,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct HybridSearch {
+    /// The weight of the embedding in the reciprocal ranking fusion.
+    pub embedding_weight: f32,
+    /// The weight of the text in the reciprocal ranking fusion.
+    pub text_weight: f32,
+}
+
 /// Options for search result ranking.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct RankingOptions {
+    /// Weights that control how reciprocal rank fusion balances semantic embedding matches versus
+    /// sparse keyword matches when hybrid search is enabled.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub hybrid_search: Option<HybridSearch>,
     /// The ranker to use for the file search.
-    pub ranker: String,
+    pub ranker: RankVersionType,
     /// The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will
     /// attempt to return only the most relevant results, but may return fewer results.
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -923,16 +1025,23 @@ pub enum Filter {
     /// A filter used to compare a specified attribute key to a given value using a defined
     /// comparison operation.
     Comparison(ComparisonFilter),
-    /// Combine multiple filters using and or or.
+    /// Combine multiple filters using `and` or `or`.
     Compound(CompoundFilter),
 }
 
 /// Single comparison filter.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ComparisonFilter {
-    /// Specifies the comparison operator
-    #[serde(rename = "type")]
-    pub op: ComparisonType,
+    /// Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`, `nin`.
+    /// - `eq`: equals
+    /// - `ne`: not equal
+    /// - `gt`: greater than
+    /// - `gte`: greater than or equal
+    /// - `lt`: less than
+    /// - `lte`: less than or equal
+    /// - `in`: in
+    /// - `nin`: not in
+    pub r#type: ComparisonType,
     /// The key to compare against the value.
     pub key: String,
     /// The value to compare against the attribute key; supports string, number, or boolean types.
@@ -948,19 +1057,22 @@ pub enum ComparisonType {
     #[serde(rename = "gt")]
     GreaterThan,
     #[serde(rename = "gte")]
-    GreaterThanOrEqualTo,
+    GreaterThanOrEqual,
     #[serde(rename = "lt")]
     LessThan,
     #[serde(rename = "lte")]
-    LessThanOrEqualTo,
+    LessThanOrEqual,
+    #[serde(rename = "in")]
+    In,
+    #[serde(rename = "nin")]
+    NotIn,
 }
 
-/// Combine multiple filters.
+/// Combine multiple filters using `and` or `or`.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct CompoundFilter {
-    /// Type of operation
-    #[serde(rename = "type")]
-    pub op: CompoundType,
+    /// 'Type of operation: `and` or `or`.'
+    pub r#type: CompoundType,
     /// Array of filters to combine. Items can be ComparisonFilter or CompoundFilter.
     pub filters: Vec<Filter>,
 }
@@ -972,142 +1084,87 @@ pub enum CompoundType {
     Or,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum WebSearchApproximateLocationType {
+    #[default]
+    Approximate,
+}
+
 /// Approximate user location for web search.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
-    name = "LocationArgs",
+    name = "WebSearchApproximateLocationArgs",
     pattern = "mutable",
     setter(into, strip_option),
     default
 )]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct Location {
-    /// The type of location approximation. Always approximate.
-    #[serde(rename = "type")]
-    pub kind: String,
-    /// Free text input for the city of the user, e.g. San Francisco.
+pub struct WebSearchApproximateLocation {
+    /// The type of location approximation. Always `approximate`.
+    pub r#type: WebSearchApproximateLocationType,
+    /// Free text input for the city of the user, e.g. `San Francisco`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub city: Option<String>,
-    /// The two-letter ISO country code of the user, e.g. US.
+    /// The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user,
+    /// e.g. `US`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub country: Option<String>,
-    /// Free text input for the region of the user, e.g. California.
+    /// Free text input for the region of the user, e.g. `California`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub region: Option<String>,
-    /// The IANA timezone of the user, e.g. America/Los_Angeles.
+    /// The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g.
+    /// `America/Los_Angeles`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub timezone: Option<String>,
 }
 
-/// MCP (Model Context Protocol) tool configuration.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "McpArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct Mcp {
-    /// A label for this MCP server.
-    pub server_label: String,
-    /// The URL for the MCP server.
-    pub server_url: String,
-    /// List of allowed tool names or filter object.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub allowed_tools: Option<AllowedTools>,
-    /// Optional HTTP headers for the MCP server.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub headers: Option<serde_json::Value>,
-    /// Approval policy or filter for tools.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub require_approval: Option<RequireApproval>,
-}
-
-/// Allowed tools configuration for MCP.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum AllowedTools {
-    /// A flat list of allowed tool names.
-    List(Vec<String>),
-    /// A filter object specifying allowed tools.
-    Filter(McpAllowedToolsFilter),
-}
-
-/// Filter object for MCP allowed tools.
+/// Container configuration for a code interpreter.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpAllowedToolsFilter {
-    /// Names of tools in the filter
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_names: Option<Vec<String>>,
-}
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum CodeInterpreterToolContainer {
+    /// Configuration for a code interpreter container. Optionally specify the IDs of the
+    /// files to run the code on.
+    Auto(CodeInterpreterContainerAuto),
 
-/// Approval policy or filter for MCP tools.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum RequireApproval {
-    /// A blanket policy: "always" or "never".
-    Policy(RequireApprovalPolicy),
-    /// A filter object specifying which tools require approval.
-    Filter(McpApprovalFilter),
+    /// The container ID.
+    #[serde(untagged)]
+    ContainerID(String),
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum RequireApprovalPolicy {
-    Always,
-    Never,
+impl Default for CodeInterpreterToolContainer {
+    fn default() -> Self {
+        Self::Auto(CodeInterpreterContainerAuto::default())
+    }
 }
 
-/// Filter object for MCP tool approval.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpApprovalFilter {
-    /// A list of tools that always require approval.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub always: Option<McpAllowedToolsFilter>,
-    /// A list of tools that never require approval.
+/// Auto configuration for code interpreter container.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+pub struct CodeInterpreterContainerAuto {
+    /// An optional list of uploaded files to make available to your code.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub never: Option<McpAllowedToolsFilter>,
-}
+    pub file_ids: Option<Vec<String>>,
 
-/// Container configuration for a code interpreter.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum CodeInterpreterContainer {
-    /// A simple container ID.
-    Id(String),
-    /// Auto-configured container with optional files.
-    Container(CodeInterpreterContainerKind),
-}
-
-/// Auto configuration for code interpreter container.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum CodeInterpreterContainerKind {
-    Auto {
-        /// Optional list of uploaded file IDs.
-        #[serde(skip_serializing_if = "Option::is_none")]
-        file_ids: Option<Vec<String>>,
-    },
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub memory_limit: Option<u64>,
 }
 
-/// Code interpreter tool definition.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
-    name = "CodeInterpreterArgs",
+    name = "CodeInterpreterToolArgs",
     pattern = "mutable",
     setter(into, strip_option),
     default
 )]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct CodeInterpreter {
-    /// Container configuration for running code.
-    pub container: CodeInterpreterContainer,
+pub struct CodeInterpreterTool {
+    /// The code interpreter container. Can be a container ID or an object that
+    /// specifies uploaded file IDs to make available to your code.
+    pub container: CodeInterpreterToolContainer,
 }
 
-/// Mask image input for image generation.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct InputImageMask {
+pub struct ImageGenToolInputImageMask {
     /// Base64-encoded mask image.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub image_url: Option<String>,
@@ -1116,6 +1173,22 @@ pub struct InputImageMask {
     pub file_id: Option<String>,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum InputFidelity {
+    #[default]
+    High,
+    Low,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenToolModeration {
+    #[default]
+    Auto,
+    Low,
+}
+
 /// Image generation tool definition.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
@@ -1125,64 +1198,78 @@ pub struct InputImageMask {
     default
 )]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct ImageGeneration {
-    /// Background type: transparent, opaque, or auto.
+pub struct ImageGenTool {
+    /// Background type for the generated image. One of `transparent`,
+    /// `opaque`, or `auto`. Default: `auto`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub background: Option<ImageGenerationBackground>,
-    /// Optional mask for inpainting.
+    pub background: Option<ImageGenToolBackground>,
+    /// Control how much effort the model will exert to match the style and features, especially facial features,
+    /// of input images. This parameter is only supported for `gpt-image-1`. Unsupported
+    /// for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub input_image_mask: Option<InputImageMask>,
-    /// Model to use (default: gpt-image-1).
+    pub input_fidelity: Option<InputFidelity>,
+    /// Optional mask for inpainting. Contains `image_url`
+    /// (string, optional) and `file_id` (string, optional).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_image_mask: Option<ImageGenToolInputImageMask>,
+    /// The image generation model to use. Default: `gpt-image-1`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub model: Option<String>,
-    /// Moderation level (default: auto).
+    /// Moderation level for the generated image. Default: `auto`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub moderation: Option<String>,
-    /// Compression level (0-100).
+    pub moderation: Option<ImageGenToolModeration>,
+    /// Compression level for the output image. Default: 100.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub output_compression: Option<u8>,
-    /// Output format: png, webp, or jpeg.
+    /// The output format of the generated image. One of `png`, `webp`, or
+    /// `jpeg`. Default: `png`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub output_format: Option<ImageGenerationOutputFormat>,
-    /// Number of partial images (0-3).
+    pub output_format: Option<ImageGenToolOutputFormat>,
+    /// Number of partial images to generate in streaming mode, from 0 (default value) to 3.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub partial_images: Option<u8>,
-    /// Quality: low, medium, high, or auto.
+    /// The quality of the generated image. One of `low`, `medium`, `high`,
+    /// or `auto`. Default: `auto`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub quality: Option<ImageGenerationQuality>,
-    /// Size: e.g. "1024x1024" or auto.
+    pub quality: Option<ImageGenToolQuality>,
+    /// The size of the generated image. One of `1024x1024`, `1024x1536`,
+    /// `1536x1024`, or `auto`. Default: `auto`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub size: Option<ImageGenerationSize>,
+    pub size: Option<ImageGenToolSize>,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 #[serde(rename_all = "lowercase")]
-pub enum ImageGenerationBackground {
+pub enum ImageGenToolBackground {
     Transparent,
     Opaque,
+    #[default]
     Auto,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 #[serde(rename_all = "lowercase")]
-pub enum ImageGenerationOutputFormat {
+pub enum ImageGenToolOutputFormat {
+    #[default]
     Png,
     Webp,
     Jpeg,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 #[serde(rename_all = "lowercase")]
-pub enum ImageGenerationQuality {
+pub enum ImageGenToolQuality {
     Low,
     Medium,
     High,
+    #[default]
     Auto,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 #[serde(rename_all = "lowercase")]
-pub enum ImageGenerationSize {
+pub enum ImageGenToolSize {
+    #[default]
     Auto,
     #[serde(rename = "1024x1024")]
     Size1024x1024,
@@ -1192,44 +1279,105 @@ pub enum ImageGenerationSize {
     Size1536x1024,
 }
 
-/// Control how the model picks or is forced to pick a tool.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum ToolChoice {
-    /// Controls which (if any) tool is called by the model.
-    Mode(ToolChoiceMode),
-    /// Indicates that the model should use a built-in tool to generate a response.
-    Hosted {
-        /// The type of hosted tool the model should to use.
-        #[serde(rename = "type")]
-        kind: HostedToolType,
-    },
-    /// Use this option to force the model to call a specific function.
-    Function {
-        /// The name of the function to call.
-        name: String,
-    },
-}
-
-/// Simple tool-choice modes.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "lowercase")]
-pub enum ToolChoiceMode {
-    /// The model will not call any tool and instead generates a message.
-    None,
-    /// The model can pick between generating a message or calling one or more tools.
+pub enum ToolChoiceAllowedMode {
     Auto,
-    /// The model must call one or more tools.
     Required,
 }
 
-/// Hosted tool type identifiers.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum HostedToolType {
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ToolChoiceAllowed {
+    /// Constrains the tools available to the model to a pre-defined set.
+    ///
+    /// `auto` allows the model to pick from among the allowed tools and generate a
+    /// message.
+    ///
+    /// `required` requires the model to call one or more of the allowed tools.
+    mode: ToolChoiceAllowedMode,
+    /// A list of tool definitions that the model should be allowed to call.
+    ///
+    /// For the Responses API, the list of tool definitions might look like:
+    /// ```json
+    /// [
+    ///   { "type": "function", "name": "get_weather" },
+    ///   { "type": "mcp", "server_label": "deepwiki" },
+    ///   { "type": "image_generation" }
+    /// ]
+    /// ```
+    tools: Vec<serde_json::Value>,
+}
+
+/// The type of hosted tool the model should to use. Learn more about
+/// [built-in tools](https://platform.openai.com/docs/guides/tools).
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ToolChoiceTypes {
     FileSearch,
     WebSearchPreview,
     ComputerUsePreview,
+    CodeInterpreter,
+    ImageGeneration,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ToolChoiceFunction {
+    /// The name of the function to call.
+    name: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ToolChoiceMCP {
+    /// The name of the tool to call on the server.
+    name: String,
+    /// The label of the MCP server to use.
+    server_label: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ToolChoiceCustom {
+    /// The name of the custom tool to call.
+    name: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ToolChoiceParam {
+    /// Constrains the tools available to the model to a pre-defined set.
+    AllowedTools(ToolChoiceAllowed),
+
+    /// Use this option to force the model to call a specific function.
+    Function(ToolChoiceFunction),
+
+    /// Use this option to force the model to call a specific tool on a remote MCP server.
+    Mcp(ToolChoiceMCP),
+
+    /// Use this option to force the model to call a custom tool.
+    Custom(ToolChoiceCustom),
+
+    /// Indicates that the model should use a built-in tool to generate a response.
+    /// [Learn more about built-in tools](https://platform.openai.com/docs/guides/tools).
+    #[serde(untagged)]
+    Hosted(ToolChoiceTypes),
+
+    /// Controls which (if any) tool is called by the model.
+    ///
+    /// `none` means the model will not call any tool and instead generates a message.
+    ///
+    /// `auto` means the model can pick between generating a message or calling one or
+    /// more tools.
+    ///
+    /// `required` means the model must call one or more tools.
+    #[serde(untagged)]
+    Mode(ToolChoiceOptions),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ToolChoiceOptions {
+    None,
+    Auto,
+    Required,
 }
 
 /// Error returned by the API when a request fails.
@@ -1890,17 +2038,30 @@ pub struct MCPApprovalRequest {
     pub server_label: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct InputTokenDetails {
+    /// The number of tokens that were retrieved from the cache.
+    /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
+    pub cached_tokens: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct OutputTokenDetails {
+    /// The number of reasoning tokens.
+    pub reasoning_tokens: u32,
+}
+
 /// Usage statistics for a response.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct Usage {
+pub struct ResponseUsage {
     /// The number of input tokens.
     pub input_tokens: u32,
     /// A detailed breakdown of the input tokens.
-    pub input_tokens_details: PromptTokensDetails,
+    pub input_tokens_details: InputTokenDetails,
     /// The number of output tokens.
     pub output_tokens: u32,
     /// A detailed breakdown of the output tokens.
-    pub output_tokens_details: CompletionTokensDetails,
+    pub output_tokens_details: OutputTokenDetails,
     /// The total number of tokens used.
     pub total_tokens: u32,
 }
@@ -2047,29 +2208,58 @@ pub struct Response {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub text: Option<ResponseTextParam>,
 
-    /// How the model chose or was forced to choose a tool.
+    /// How the model should select which tool (or tools) to use when generating
+    /// a response. See the `tools` parameter to see how to specify which tools
+    /// the model can call.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<ToolChoice>,
+    pub tool_choice: Option<ToolChoiceParam>,
 
-    /// Tool definitions that were provided.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<ToolDefinition>>,
+    /// An array of tools the model may call while generating a response. You
+    /// can specify which tool to use by setting the `tool_choice` parameter.
+    ///
+    /// We support the following categories of tools:
+    /// - **Built-in tools**: Tools that are provided by OpenAI that extend the
+    ///   model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search)
+    ///   or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about
+    ///   [built-in tools](https://platform.openai.com/docs/guides/tools).
+    /// - **MCP Tools**: Integrations with third-party systems via custom MCP servers
+    ///   or predefined connectors such as Google Drive and SharePoint. Learn more about
+    ///   [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+    /// - **Function calls (custom tools)**: Functions that are defined by you,
+    ///   enabling the model to call your own code with strongly typed arguments
+    ///   and outputs. Learn more about
+    ///   [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use
+    ///   custom tools to call your own code.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<Tool>>,
+
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each
+    /// token position, each with an associated log probability.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_logprobs: Option<u8>,
 
-    /// Nucleus sampling cutoff that was used.
+    /// An alternative to sampling with temperature, called nucleus sampling,
+    /// where the model considers the results of the tokens with top_p probability
+    /// mass. So 0.1 means only the tokens comprising the top 10% probability mass
+    /// are considered.
+    ///
+    /// We generally recommend altering this or `temperature` but not both.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub top_p: Option<f32>,
 
-    /// Truncation strategy that was applied.
+    ///The truncation strategy to use for the model response.
+    /// - `auto`: If the input to this Response exceeds
+    /// the model's context window size, the model will truncate the
+    /// response to fit the context window by dropping items from the beginning of the conversation.
+    /// - `disabled` (default): If the input size will exceed the context window
+    /// size for a model, the request will fail with a 400 error.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub truncation: Option<Truncation>,
 
-    /// Token usage statistics for this request.
+    /// Represents token usage details including input tokens, output tokens,
+    /// a breakdown of output tokens, and the total tokens used.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub usage: Option<Usage>,
-
-    /// End-user ID for which this response was generated.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
+    pub usage: Option<ResponseUsage>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]

From 84bbfe48acbc914b7ba0235fa72c32102b172426 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Mon, 3 Nov 2025 14:15:26 -0800
Subject: [PATCH 13/42] updates for CreateResponse

---
 async-openai/src/types/responses.rs | 266 +++++++++++++++++++---------
 1 file changed, 182 insertions(+), 84 deletions(-)

diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index 824749a2..095bb63c 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -29,16 +29,23 @@ pub enum OutputStatus {
     Incomplete,
 }
 
-/// Input payload: raw text or structured context items.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
-pub enum Input {
-    /// A text input to the model, equivalent to a text input with the user role.
+pub enum InputParam {
+    ///  A text input to the model, equivalent to a text input with the
+    /// `user` role.
     Text(String),
-    /// A list of one or many input items to the model, containing different content types.
+    /// A list of one or many input items to the model, containing
+    /// different content types.
     Items(Vec<InputItem>),
 }
 
+impl Default for InputParam {
+    fn default() -> Self {
+        Self::Text(String::new())
+    }
+}
+
 /// Content item used to generate a response.
 ///
 /// This is a properly discriminated union based on the `type` field, using Rust's
@@ -507,6 +514,48 @@ pub struct Conversation {
     pub id: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum ConversationParam {
+    /// The unique ID of the conversation.
+    ConversationID(String),
+    /// The conversation that this response belongs to.
+    Object(Conversation),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
+pub enum IncludeEnum {
+    #[serde(rename = "file_search_call.results")]
+    FileSearchCallResults,
+    #[serde(rename = "web_search_call.results")]
+    WebSearchCallResults,
+    #[serde(rename = "web_search_call.action.sources")]
+    WebSearchCallActionSources,
+    #[serde(rename = "message.input_image.image_url")]
+    MessageInputImageImageUrl,
+    #[serde(rename = "computer_call_output.output.image_url")]
+    ComputerCallOutputOutputImageUrl,
+    #[serde(rename = "code_interpreter_call.outputs")]
+    CodeInterpreterCallOutputs,
+    #[serde(rename = "reasoning.encrypted_content")]
+    ReasoningEncryptedContent,
+    #[serde(rename = "message.output_text.logprobs")]
+    MessageOutputTextLogprobs,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseStreamOptions {
+    /// When true, stream obfuscation will be enabled. Stream obfuscation adds
+    /// random characters to an `obfuscation` field on streaming delta events to
+    /// normalize payload sizes as a mitigation to certain side-channel attacks.
+    /// These obfuscation fields are included by default, but add a small amount
+    /// of overhead to the data stream. You can set `include_obfuscation` to
+    /// false to optimize for bandwidth if you trust the network links between
+    /// your application and the OpenAI API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include_obfuscation: Option<bool>,
+}
+
 /// Builder for a Responses API request.
 #[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
 #[builder(
@@ -517,55 +566,70 @@ pub struct Conversation {
 )]
 #[builder(build_fn(error = "OpenAIError"))]
 pub struct CreateResponse {
-    /// Text, image, or file inputs to the model, used to generate a response.
-    pub input: Input,
-
-    /// Model ID used to generate the response, like `gpt-4o`.
-    /// OpenAI offers a wide range of models with different capabilities,
-    /// performance characteristics, and price points.
-    pub model: String,
-
     /// Whether to run the model response in the background.
-    /// boolean or null.
+    /// [Learn more](https://platform.openai.com/docs/guides/background).
     #[serde(skip_serializing_if = "Option::is_none")]
     pub background: Option<bool>,
 
-    /// Specify additional output data to include in the model response.
+    /// The conversation that this response belongs to. Items from this conversation are prepended to
+    ///  `input_items` for this response request.
+    ///
+    /// Input items and output items from this response are automatically added to this conversation after
+    /// this response completes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation: Option<ConversationParam>,
+
+    /// Specify additional output data to include in the model response. Currently supported
+    /// values are:
+    ///
+    /// - `web_search_call.action.sources`: Include the sources of the web search tool call.
+    ///
+    /// - `code_interpreter_call.outputs`: Includes the outputs of python code execution in code
+    ///    interpreter tool call items.
+    ///
+    /// - `computer_call_output.output.image_url`: Include image urls from the computer call
+    ///    output.
     ///
-    /// Supported values:
-    /// - `file_search_call.results`
-    ///   Include the search results of the file search tool call.
-    /// - `message.input_image.image_url`
-    ///   Include image URLs from the input message.
-    /// - `computer_call_output.output.image_url`
-    ///   Include image URLs from the computer call output.
-    /// - `reasoning.encrypted_content`
-    ///   Include an encrypted version of reasoning tokens in reasoning item outputs.
-    ///   This enables reasoning items to be used in multi-turn conversations when
-    ///   using the Responses API statelessly (for example, when the `store` parameter
-    ///   is set to `false`, or when an organization is enrolled in the zero-data-
-    ///   retention program).
+    /// - `file_search_call.results`: Include the search results of the file search tool call.
     ///
-    /// If `None`, no additional data is returned.
+    /// - `message.input_image.image_url`: Include image urls from the input message.
+    ///
+    /// - `message.output_text.logprobs`: Include logprobs with assistant messages.
+    ///
+    /// - `reasoning.encrypted_content`: Includes an encrypted version of reasoning tokens in
+    ///    reasoning item outputs. This enables reasoning items to be used in multi-turn
+    ///    conversations when using the Responses API statelessly (like when the `store` parameter is
+    ///    set to `false`, or when an organization is enrolled in the zero data retention program).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub include: Option<Vec<String>>,
+    pub include: Option<Vec<IncludeEnum>>,
 
-    /// Inserts a system (or developer) message as the first item in the model's context.
+    /// Text, image, or file inputs to the model, used to generate a response.
+    ///
+    /// Learn more:
+    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    /// - [Image inputs](https://platform.openai.com/docs/guides/images)
+    /// - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+    /// - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+    /// - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+    pub input: InputParam,
+
+    /// A system (or developer) message inserted into the model's context.
     ///
-    /// When using along with previous_response_id, the instructions from a previous response will
-    /// not be carried over to the next response. This makes it simple to swap out system
-    /// (or developer) messages in new responses.
+    /// When using along with `previous_response_id`, the instructions from a previous
+    /// response will not be carried over to the next response. This makes it simple
+    /// to swap out system (or developer) messages in new responses.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub instructions: Option<String>,
 
-    /// An upper bound for the number of tokens that can be generated for a
-    /// response, including visible output tokens and reasoning tokens.
+    /// An upper bound for the number of tokens that can be generated for a response, including
+    /// visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
     #[serde(skip_serializing_if = "Option::is_none")]
     pub max_output_tokens: Option<u32>,
 
-    /// The maximum number of total calls to built-in tools that can be processed in a response.
-    /// This maximum number applies across all built-in tool calls, not per individual tool.
-    /// Any further attempts to call a tool by the model will be ignored.
+    /// The maximum number of total calls to built-in tools that can be processed in a response. This
+    /// maximum number applies across all built-in tool calls, not per individual tool. Any further
+    /// attempts to call a tool by the model will be ignored.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub max_tool_calls: Option<u32>,
 
     /// Set of 16 key-value pairs that can be attached to an object. This can be
@@ -577,42 +641,54 @@ pub struct CreateResponse {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub metadata: Option<HashMap<String, String>>,
 
+    /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI
+    /// offers a wide range of models with different capabilities, performance
+    /// characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models)
+    /// to browse and compare available models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
     /// Whether to allow the model to run tool calls in parallel.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub parallel_tool_calls: Option<bool>,
 
-    /// The unique ID of the previous response to the model. Use this to create
-    /// multi-turn conversations.
+    /// The unique ID of the previous response to the model. Use this to create multi-turn conversations.
+    /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    /// Cannot be used in conjunction with `conversation`.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub previous_response_id: Option<String>,
 
     /// Reference to a prompt template and its variables.
+    /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub prompt: Option<PromptConfig>,
+    pub prompt: Option<Prompt>,
 
-    /// **o-series models only**: Configuration options for reasoning models.
+    /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces
+    /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
+    pub prompt_cache_key: Option<String>,
 
-    /// Specifies the latency tier to use for processing the request.
-    ///
-    /// This parameter is relevant for customers subscribed to the Scale tier service.
-    ///
-    /// Supported values:
-    /// - `auto`
-    ///   - If the Project is Scale tier enabled, the system will utilize Scale tier credits until
-    ///     they are exhausted.
-    ///   - If the Project is not Scale tier enabled, the request will be processed using the
-    ///     default service tier with a lower uptime SLA and no latency guarantee.
-    /// - `default`
-    ///   The request will be processed using the default service tier with a lower uptime SLA and
-    ///   no latency guarantee.
-    /// - `flex`
-    ///   The request will be processed with the Flex Processing service tier. Learn more.
+    /// **gpt-5 and o-series models only**
+    /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<Reasoning>,
+
+    /// A stable identifier used to help detect users of your application that may be violating OpenAI's
+    /// usage policies.
     ///
-    /// When not set, the default behavior is `auto`.
+    /// The IDs should be a string that uniquely identifies each user. We recommend hashing their username
+    /// or email address, in order to avoid sending us any identifying information. [Learn
+    /// more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub safety_identifier: Option<String>,
+
+    /// Specifies the processing type used for serving the request.
+    /// - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'.
+    /// - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model.
+    /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier.
+    /// - When not set, the default behavior is 'auto'.
     ///
-    /// When this parameter is set, the response body will include the `service_tier` utilized.
+    /// When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub service_tier: Option<ServiceTier>,
 
@@ -620,11 +696,17 @@ pub struct CreateResponse {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub store: Option<bool>,
 
-    /// If set to true, the model response data will be streamed to the client as it is
-    /// generated using server-sent events.
+    /// If set to true, the model response data will be streamed to the client
+    /// as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    /// See the [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    /// for more information.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub stream: Option<bool>,
 
+    /// Options for streaming responses. Only set this when you set `stream: true`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream_options: Option<ResponseStreamOptions>,
+
     /// What sampling temperature to use, between 0 and 2. Higher values like 0.8
     /// will make the output more random, while lower values like 0.2 will make it
     /// more focused and deterministic. We generally recommend altering this or
@@ -632,45 +714,60 @@ pub struct CreateResponse {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub temperature: Option<f32>,
 
-    /// Configuration options for a text response from the model. Can be plain text
-    /// or structured JSON data.
+    /// Configuration options for a text response from the model. Can be plain
+    /// text or structured JSON data. Learn more:
+    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
+    pub text: Option<ResponseTextParam>,
 
     /// How the model should select which tool (or tools) to use when generating
-    /// a response.
+    /// a response. See the `tools` parameter to see how to specify which tools
+    /// the model can call.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<ToolChoice>,
+    pub tool_choice: Option<ToolChoiceParam>,
 
-    /// An array of tools the model may call while generating a response.
-    /// Can include built-in tools (file_search, web_search_preview,
-    /// computer_use_preview) or custom function definitions.
+    /// An array of tools the model may call while generating a response. You
+    /// can specify which tool to use by setting the `tool_choice` parameter.
+    ///
+    /// We support the following categories of tools:
+    /// - **Built-in tools**: Tools that are provided by OpenAI that extend the
+    ///   model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search)
+    ///   or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about
+    ///   [built-in tools](https://platform.openai.com/docs/guides/tools).
+    /// - **MCP Tools**: Integrations with third-party systems via custom MCP servers
+    ///   or predefined connectors such as Google Drive and SharePoint. Learn more about
+    ///   [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+    /// - **Function calls (custom tools)**: Functions that are defined by you,
+    ///   enabling the model to call your own code with strongly typed arguments
+    ///   and outputs. Learn more about
+    ///   [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use
+    ///   custom tools to call your own code.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<ToolDefinition>>,
+    pub tools: Option<Vec<Tool>>,
 
-    /// An integer between 0 and 20 specifying the number of most likely tokens to return
-    /// at each token position, each with an associated log probability.
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each
+    /// token position, each with an associated log probability.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_logprobs: Option<u32>, // TODO add validation of range
+    pub top_logprobs: Option<u8>,
 
     /// An alternative to sampling with temperature, called nucleus sampling,
     /// where the model considers the results of the tokens with top_p probability
     /// mass. So 0.1 means only the tokens comprising the top 10% probability mass
-    /// are considered. We generally recommend altering this or `temperature` but
-    /// not both.
+    /// are considered.
+    ///
+    /// We generally recommend altering this or `temperature` but not both.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub top_p: Option<f32>,
 
-    /// The truncation strategy to use for the model response:
-    /// - `auto`: drop items in the middle to fit context window.
-    /// - `disabled`: error if exceeding context window.
+    ///The truncation strategy to use for the model response.
+    /// - `auto`: If the input to this Response exceeds
+    /// the model's context window size, the model will truncate the
+    /// response to fit the context window by dropping items from the beginning of the conversation.
+    /// - `disabled` (default): If the input size will exceed the context window
+    /// size for a model, the request will fail with a 400 error.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub truncation: Option<Truncation>,
-
-    /// A unique identifier representing your end-user, which can help OpenAI to
-    /// monitor and detect abuse.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
@@ -2085,6 +2182,7 @@ pub struct Response {
 
     /// The conversation that this response belongs to. Input items and output
     /// items from this response are automatically added to this conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub conversation: Option<Conversation>,
 
     /// Unix timestamp (in seconds) when this Response was created.

From b5bc8edc6f1959c81b4adc3f17595783ae4f2612 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Mon, 3 Nov 2025 14:43:11 -0800
Subject: [PATCH 14/42] add reponses apis

---
 async-openai/src/responses.rs       | 49 ++++++++++++++++++++++++++---
 async-openai/src/types/responses.rs |  7 +++++
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/async-openai/src/responses.rs b/async-openai/src/responses.rs
index 9160b7be..0c58ecfe 100644
--- a/async-openai/src/responses.rs
+++ b/async-openai/src/responses.rs
@@ -1,13 +1,12 @@
+use serde::Serialize;
+
 use crate::{
     config::Config,
     error::OpenAIError,
-    types::responses::{CreateResponse, Response, ResponseStream},
+    types::responses::{CreateResponse, DeleteResponse, Response, ResponseStream},
     Client,
 };
 
-/// Given text input or a list of context items, the model will generate a response.
-///
-/// Related guide: [Responses](https://platform.openai.com/docs/api-reference/responses)
 pub struct Responses<'c, C: Config> {
     client: &'c Client<C>,
 }
@@ -18,7 +17,15 @@ impl<'c, C: Config> Responses<'c, C> {
         Self { client }
     }
 
-    /// Creates a model response for the given input.
+    /// Creates a model response. Provide [text](https://platform.openai.com/docs/guides/text) or
+    /// [image](https://platform.openai.com/docs/guides/images) inputs to generate
+    /// [text](https://platform.openai.com/docs/guides/text) or
+    /// [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have the model call
+    /// your own [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+    /// built-in [tools](https://platform.openai.com/docs/guides/tools) like
+    /// [web search](https://platform.openai.com/docs/guides/tools-web-search)
+    /// or [file search](https://platform.openai.com/docs/guides/tools-file-search) to use your own data
+    /// as input for the model's response.
     #[crate::byot(
         T0 = serde::Serialize,
         R = serde::de::DeserializeOwned
@@ -52,4 +59,36 @@ impl<'c, C: Config> Responses<'c, C> {
         }
         Ok(self.client.post_stream("/responses", request).await)
     }
+
+    /// Retrieves a model response with the given ID.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve<Q>(&self, response_id: &str, query: &Q) -> Result<Response, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(&format!("/responses/{}", response_id), &query)
+            .await
+    }
+
+    /// Deletes a model response with the given ID.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, response_id: &str) -> Result<DeleteResponse, OpenAIError> {
+        self.client
+            .delete(&format!("/responses/{}", response_id))
+            .await
+    }
+
+    /// Cancels a model response with the given ID. Only responses created with the
+    /// `background` parameter set to `true` can be cancelled.
+    /// [Learn more](https://platform.openai.com/docs/guides/background).
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn cancel(&self, response_id: &str) -> Result<Response, OpenAIError> {
+        self.client
+            .post(
+                &format!("/responses/{}/cancel", response_id),
+                serde_json::json!({}),
+            )
+            .await
+    }
 }
diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index 095bb63c..fe733e1f 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -3130,3 +3130,10 @@ pub struct TextAnnotation {
     pub start: u32,
     pub end: u32,
 }
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct DeleteResponse {
+    pub object: String,
+    pub deleted: bool,
+    pub id: String,
+}

From d20e865420120e8bde3166fb5c6c635eab5e000e Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Mon, 3 Nov 2025 16:42:35 -0800
Subject: [PATCH 15/42] list input items

---
 async-openai/src/responses.rs       | 19 ++++++++++++++++++-
 async-openai/src/types/responses.rs | 20 ++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/async-openai/src/responses.rs b/async-openai/src/responses.rs
index 0c58ecfe..0b1974ab 100644
--- a/async-openai/src/responses.rs
+++ b/async-openai/src/responses.rs
@@ -3,7 +3,9 @@ use serde::Serialize;
 use crate::{
     config::Config,
     error::OpenAIError,
-    types::responses::{CreateResponse, DeleteResponse, Response, ResponseStream},
+    types::responses::{
+        CreateResponse, DeleteResponse, Response, ResponseItemList, ResponseStream,
+    },
     Client,
 };
 
@@ -91,4 +93,19 @@ impl<'c, C: Config> Responses<'c, C> {
             )
             .await
     }
+
+    /// Returns a list of input items for a given response.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list_input_items<Q>(
+        &self,
+        response_id: &str,
+        query: &Q,
+    ) -> Result<ResponseItemList, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(&format!("/responses/{}/input_items", response_id), &query)
+            .await
+    }
 }
diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index fe733e1f..6d74a03b 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -3137,3 +3137,23 @@ pub struct DeleteResponse {
     pub deleted: bool,
     pub id: String,
 }
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub enum ItemResource {
+    // TODO: implement this
+}
+
+/// A list of Response items.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseItemList {
+    /// The type of object returned, must be `list`.
+    pub object: String,
+    /// The ID of the first item in the list.
+    pub first_id: Option<String>,
+    /// The ID of the last item in the list.
+    pub last_id: Option<String>,
+    /// Whether there are more items in the list.
+    pub has_more: bool,
+    /// The list of items.
+    pub data: Vec<ItemResource>,
+}

From 30964bff90ab8c87b17f7aae913c171c089ddf6d Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Mon, 3 Nov 2025 17:06:18 -0800
Subject: [PATCH 16/42] add get_input_token_counts for responses

---
 async-openai/src/responses.rs       | 10 ++++
 async-openai/src/types/responses.rs | 84 +++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/async-openai/src/responses.rs b/async-openai/src/responses.rs
index 0b1974ab..223a5b1c 100644
--- a/async-openai/src/responses.rs
+++ b/async-openai/src/responses.rs
@@ -5,6 +5,7 @@ use crate::{
     error::OpenAIError,
     types::responses::{
         CreateResponse, DeleteResponse, Response, ResponseItemList, ResponseStream,
+        TokenCountsBody, TokenCountsResource,
     },
     Client,
 };
@@ -108,4 +109,13 @@ impl<'c, C: Config> Responses<'c, C> {
             .get_with_query(&format!("/responses/{}/input_items", response_id), &query)
             .await
     }
+
+    /// Get input token counts
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn get_input_token_counts(
+        &self,
+        request: TokenCountsBody,
+    ) -> Result<TokenCountsResource, OpenAIError> {
+        self.client.post("/responses/input_tokens", request).await
+    }
 }
diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index 6d74a03b..758a0ad2 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -3157,3 +3157,87 @@ pub struct ResponseItemList {
     /// The list of items.
     pub data: Vec<ItemResource>,
 }
+
+#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
+#[builder(
+    name = "TokenCountsBodyArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct TokenCountsBody {
+    /// The conversation that this response belongs to. Items from this
+    /// conversation are prepended to `input_items` for this response request.
+    /// Input items and output items from this response are automatically added to this
+    /// conversation after this response completes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation: Option<ConversationParam>,
+
+    /// Text, image, or file inputs to the model, used to generate a response
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input: Option<InputParam>,
+
+    /// A system (or developer) message inserted into the model's context.
+    ///
+    /// When used along with `previous_response_id`, the instructions from a previous response will
+    /// not be carried over to the next response. This makes it simple to swap out system (or
+    /// developer) messages in new responses.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+    /// wide range of models with different capabilities, performance characteristics,
+    /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models)
+    /// to browse and compare available models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
+    /// Whether to allow the model to run tool calls in parallel.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    /// The unique ID of the previous response to the model. Use this to create multi-turn
+    /// conversations. Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    /// Cannot be used in conjunction with `conversation`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+
+    /// **gpt-5 and o-series models only**
+    /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<Reasoning>,
+
+    /// Configuration options for a text response from the model. Can be plain
+    /// text or structured JSON data. Learn more:
+    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<ResponseTextParam>,
+
+    /// How the model should select which tool (or tools) to use when generating
+    /// a response. See the `tools` parameter to see how to specify which tools
+    /// the model can call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoiceParam>,
+
+    /// An array of tools the model may call while generating a response. You can specify which tool
+    /// to use by setting the `tool_choice` parameter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<Tool>>,
+
+    ///The truncation strategy to use for the model response.
+    /// - `auto`: If the input to this Response exceeds
+    /// the model's context window size, the model will truncate the
+    /// response to fit the context window by dropping items from the beginning of the conversation.
+    /// - `disabled` (default): If the input size will exceed the context window
+    /// size for a model, the request will fail with a 400 error.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<Truncation>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct TokenCountsResource {
+    pub object: String,
+    pub input_tokens: u32,
+}

From 2413171527a5fc50faaf553812455c0b2d5e6308 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Mon, 3 Nov 2025 17:23:30 -0800
Subject: [PATCH 17/42] implement ItemResource

---
 async-openai/src/types/responses.rs | 30 ++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses.rs
index 758a0ad2..643e63b2 100644
--- a/async-openai/src/types/responses.rs
+++ b/async-openai/src/types/responses.rs
@@ -3139,8 +3139,36 @@ pub struct DeleteResponse {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct InputItemReference {
+    pub r#type: Option<String>,
+    pub id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ItemResourceItem {
+    Message(MessageItem),
+    FileSearchCall(FileSearchToolCall),
+    ComputerCall(ComputerToolCall),
+    ComputerCallOutput(ComputerCallOutputItemParam),
+    WebSearchCall(WebSearchToolCall),
+    FunctionCall(FunctionToolCall),
+    FunctionCallOutput(FunctionCallOutputItemParam),
+    ImageGenerationCall(ImageGenToolCall),
+    CodeInterpreterCall(CodeInterpreterToolCall),
+    LocalShellCall(LocalShellToolCall),
+    LocalShellCallOutput(LocalShellToolCallOutput),
+    McpListTools(MCPListTools),
+    McpApprovalRequest(MCPApprovalRequest),
+    McpApprovalResponse(MCPApprovalResponse),
+    McpCall(MCPToolCall),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
 pub enum ItemResource {
-    // TODO: implement this
+    ItemReference(InputItemReference),
+    Item(ItemResourceItem),
 }
 
 /// A list of Response items.

From 2f78f5d4717b22e59b3375c0aba0061f1872d71e Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Mon, 3 Nov 2025 17:31:50 -0800
Subject: [PATCH 18/42] types/responses dir

---
 async-openai/src/types/responses/mod.rs              | 5 +++++
 async-openai/src/types/{ => responses}/responses.rs  | 0
 async-openai/src/types/responses/responses_stream.rs | 0
 3 files changed, 5 insertions(+)
 create mode 100644 async-openai/src/types/responses/mod.rs
 rename async-openai/src/types/{ => responses}/responses.rs (100%)
 create mode 100644 async-openai/src/types/responses/responses_stream.rs

diff --git a/async-openai/src/types/responses/mod.rs b/async-openai/src/types/responses/mod.rs
new file mode 100644
index 00000000..a57069fd
--- /dev/null
+++ b/async-openai/src/types/responses/mod.rs
@@ -0,0 +1,5 @@
+mod responses;
+mod responses_stream;
+
+pub use responses::*;
+pub use responses_stream::*;
diff --git a/async-openai/src/types/responses.rs b/async-openai/src/types/responses/responses.rs
similarity index 100%
rename from async-openai/src/types/responses.rs
rename to async-openai/src/types/responses/responses.rs
diff --git a/async-openai/src/types/responses/responses_stream.rs b/async-openai/src/types/responses/responses_stream.rs
new file mode 100644
index 00000000..e69de29b

From d9dcf246b7cf7339cc4d05c523c89653d0a571f6 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 09:49:14 -0800
Subject: [PATCH 19/42] response streaming events

---
 async-openai/src/types/responses/mod.rs       |   8 +-
 .../responses/{responses.rs => response.rs}   | 759 +-----------------
 .../src/types/responses/response_stream.rs    | 550 +++++++++++++
 .../src/types/responses/responses_stream.rs   |   0
 4 files changed, 579 insertions(+), 738 deletions(-)
 rename async-openai/src/types/responses/{responses.rs => response.rs} (79%)
 create mode 100644 async-openai/src/types/responses/response_stream.rs
 delete mode 100644 async-openai/src/types/responses/responses_stream.rs

diff --git a/async-openai/src/types/responses/mod.rs b/async-openai/src/types/responses/mod.rs
index a57069fd..8d2635c2 100644
--- a/async-openai/src/types/responses/mod.rs
+++ b/async-openai/src/types/responses/mod.rs
@@ -1,5 +1,5 @@
-mod responses;
-mod responses_stream;
+mod response;
+mod response_stream;
 
-pub use responses::*;
-pub use responses_stream::*;
+pub use response::*;
+pub use response_stream::*;
diff --git a/async-openai/src/types/responses/responses.rs b/async-openai/src/types/responses/response.rs
similarity index 79%
rename from async-openai/src/types/responses/responses.rs
rename to async-openai/src/types/responses/response.rs
index 643e63b2..73d30ab6 100644
--- a/async-openai/src/types/responses/responses.rs
+++ b/async-openai/src/types/responses/response.rs
@@ -956,7 +956,7 @@ pub enum GrammarSyntax {
     Regex,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, Default)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 pub struct CustomGrammarFormatParam {
     /// The grammar definition.
     pub definition: String,
@@ -1508,6 +1508,24 @@ pub struct LogProb {
     pub top_logprobs: Vec<TopLogProb>,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseTopLobProb {
+    /// The log probability of this token.
+    pub logprob: f64,
+    /// A possible text token.
+    pub token: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseLogProb {
+    /// The log probability of this token.
+    pub logprob: f64,
+    /// A possible text token.
+    pub token: String,
+    /// The log probability of the top 20 most likely tokens.
+    pub top_logprobs: Vec<ResponseTopLobProb>,
+}
+
 /// A simple text output from the model.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct OutputTextContent {
@@ -1623,42 +1641,17 @@ pub enum OutputMessageContent {
     Refusal(RefusalContent),
 }
 
-/// Nested content within an output message.
-///
-/// Note: This enum is similar to OutputItem but may be used in different contexts.
-/// Consider using OutputItem directly if it fits your use case.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum OutputContent {
-    /// An output message from the model.
-    Message(OutputMessage),
-    /// The results of a file search tool call.
-    FileSearchCall(FileSearchToolCall),
-    /// A tool call to run a function.
-    FunctionCall(FunctionToolCall),
-    /// The results of a web search tool call.
-    WebSearchCall(WebSearchToolCall),
-    /// A tool call to a computer use tool.
-    ComputerCall(ComputerToolCall),
-    /// A description of the chain of thought used by a reasoning model while generating a response.
-    /// Be sure to include these items in your input to the Responses API for subsequent turns of a
-    /// conversation if you are manually managing context.
-    Reasoning(ReasoningItem),
-    /// Image generation tool call output.
-    ImageGenerationCall(ImageGenToolCall),
-    /// Code interpreter tool call output.
-    CodeInterpreterCall(CodeInterpreterToolCall),
-    /// Local shell tool call output.
-    LocalShellCall(LocalShellToolCall),
-    /// MCP tool invocation output.
-    McpCall(MCPToolCall),
-    /// MCP list-tools output.
-    McpListTools(MCPListTools),
-    /// MCP approval request output.
-    McpApprovalRequest(MCPApprovalRequest),
+    /// A text output from the model.
+    OutputText(OutputTextContent),
+    /// A refusal from the model.
+    Refusal(RefusalContent),
+    /// Reasoning text from the model.
+    ReasoningText(ReasoningTextContent),
 }
 
-/// Reasoning text content.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ReasoningTextContent {
     /// The reasoning text from the model.
@@ -2371,668 +2364,6 @@ pub enum Status {
     Incomplete,
 }
 
-/// Event types for streaming responses from the Responses API
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type")]
-#[non_exhaustive] // Future-proof against breaking changes
-pub enum ResponseEvent {
-    /// Response creation started
-    #[serde(rename = "response.created")]
-    ResponseCreated(ResponseCreated),
-    /// Processing in progress
-    #[serde(rename = "response.in_progress")]
-    ResponseInProgress(ResponseInProgress),
-    /// Response completed (different from done)
-    #[serde(rename = "response.completed")]
-    ResponseCompleted(ResponseCompleted),
-    /// Response failed
-    #[serde(rename = "response.failed")]
-    ResponseFailed(ResponseFailed),
-    /// Response incomplete
-    #[serde(rename = "response.incomplete")]
-    ResponseIncomplete(ResponseIncomplete),
-    /// Response queued
-    #[serde(rename = "response.queued")]
-    ResponseQueued(ResponseQueued),
-    /// Output item added
-    #[serde(rename = "response.output_item.added")]
-    ResponseOutputItemAdded(ResponseOutputItemAdded),
-    /// Content part added
-    #[serde(rename = "response.content_part.added")]
-    ResponseContentPartAdded(ResponseContentPartAdded),
-    /// Text delta update
-    #[serde(rename = "response.output_text.delta")]
-    ResponseOutputTextDelta(ResponseOutputTextDelta),
-    /// Text output completed
-    #[serde(rename = "response.output_text.done")]
-    ResponseOutputTextDone(ResponseOutputTextDone),
-    /// Refusal delta update
-    #[serde(rename = "response.refusal.delta")]
-    ResponseRefusalDelta(ResponseRefusalDelta),
-    /// Refusal completed
-    #[serde(rename = "response.refusal.done")]
-    ResponseRefusalDone(ResponseRefusalDone),
-    /// Content part completed
-    #[serde(rename = "response.content_part.done")]
-    ResponseContentPartDone(ResponseContentPartDone),
-    /// Output item completed
-    #[serde(rename = "response.output_item.done")]
-    ResponseOutputItemDone(ResponseOutputItemDone),
-    /// Function call arguments delta
-    #[serde(rename = "response.function_call_arguments.delta")]
-    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDelta),
-    /// Function call arguments completed
-    #[serde(rename = "response.function_call_arguments.done")]
-    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDone),
-    /// File search call in progress
-    #[serde(rename = "response.file_search_call.in_progress")]
-    ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgress),
-    /// File search call searching
-    #[serde(rename = "response.file_search_call.searching")]
-    ResponseFileSearchCallSearching(ResponseFileSearchCallSearching),
-    /// File search call completed
-    #[serde(rename = "response.file_search_call.completed")]
-    ResponseFileSearchCallCompleted(ResponseFileSearchCallCompleted),
-    /// Web search call in progress
-    #[serde(rename = "response.web_search_call.in_progress")]
-    ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgress),
-    /// Web search call searching
-    #[serde(rename = "response.web_search_call.searching")]
-    ResponseWebSearchCallSearching(ResponseWebSearchCallSearching),
-    /// Web search call completed
-    #[serde(rename = "response.web_search_call.completed")]
-    ResponseWebSearchCallCompleted(ResponseWebSearchCallCompleted),
-    /// Reasoning summary part added
-    #[serde(rename = "response.reasoning_summary_part.added")]
-    ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAdded),
-    /// Reasoning summary part done
-    #[serde(rename = "response.reasoning_summary_part.done")]
-    ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDone),
-    /// Reasoning summary text delta
-    #[serde(rename = "response.reasoning_summary_text.delta")]
-    ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDelta),
-    /// Reasoning summary text done
-    #[serde(rename = "response.reasoning_summary_text.done")]
-    ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDone),
-    /// Reasoning summary delta
-    #[serde(rename = "response.reasoning_summary.delta")]
-    ResponseReasoningSummaryDelta(ResponseReasoningSummaryDelta),
-    /// Reasoning summary done
-    #[serde(rename = "response.reasoning_summary.done")]
-    ResponseReasoningSummaryDone(ResponseReasoningSummaryDone),
-    /// Image generation call in progress
-    #[serde(rename = "response.image_generation_call.in_progress")]
-    ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgress),
-    /// Image generation call generating
-    #[serde(rename = "response.image_generation_call.generating")]
-    ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGenerating),
-    /// Image generation call partial image
-    #[serde(rename = "response.image_generation_call.partial_image")]
-    ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImage),
-    /// Image generation call completed
-    #[serde(rename = "response.image_generation_call.completed")]
-    ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompleted),
-    /// MCP call arguments delta
-    #[serde(rename = "response.mcp_call_arguments.delta")]
-    ResponseMcpCallArgumentsDelta(ResponseMcpCallArgumentsDelta),
-    /// MCP call arguments done
-    #[serde(rename = "response.mcp_call_arguments.done")]
-    ResponseMcpCallArgumentsDone(ResponseMcpCallArgumentsDone),
-    /// MCP call completed
-    #[serde(rename = "response.mcp_call.completed")]
-    ResponseMcpCallCompleted(ResponseMcpCallCompleted),
-    /// MCP call failed
-    #[serde(rename = "response.mcp_call.failed")]
-    ResponseMcpCallFailed(ResponseMcpCallFailed),
-    /// MCP call in progress
-    #[serde(rename = "response.mcp_call.in_progress")]
-    ResponseMcpCallInProgress(ResponseMcpCallInProgress),
-    /// MCP list tools completed
-    #[serde(rename = "response.mcp_list_tools.completed")]
-    ResponseMcpListToolsCompleted(ResponseMcpListToolsCompleted),
-    /// MCP list tools failed
-    #[serde(rename = "response.mcp_list_tools.failed")]
-    ResponseMcpListToolsFailed(ResponseMcpListToolsFailed),
-    /// MCP list tools in progress
-    #[serde(rename = "response.mcp_list_tools.in_progress")]
-    ResponseMcpListToolsInProgress(ResponseMcpListToolsInProgress),
-    /// Code interpreter call in progress
-    #[serde(rename = "response.code_interpreter_call.in_progress")]
-    ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgress),
-    /// Code interpreter call interpreting
-    #[serde(rename = "response.code_interpreter_call.interpreting")]
-    ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpreting),
-    /// Code interpreter call completed
-    #[serde(rename = "response.code_interpreter_call.completed")]
-    ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompleted),
-    /// Code interpreter call code delta
-    #[serde(rename = "response.code_interpreter_call_code.delta")]
-    ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDelta),
-    /// Code interpreter call code done
-    #[serde(rename = "response.code_interpreter_call_code.done")]
-    ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDone),
-    /// Output text annotation added
-    #[serde(rename = "response.output_text.annotation.added")]
-    ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAdded),
-    /// Error occurred
-    #[serde(rename = "error")]
-    ResponseError(ResponseError),
-
-    /// Unknown event type
-    #[serde(untagged)]
-    Unknown(serde_json::Value),
-}
-
-/// Stream of response events
-pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<ResponseEvent, OpenAIError>> + Send>>;
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCreated {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseInProgress {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputItemAdded {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item: OutputItem,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseContentPartAdded {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub part: ContentPart,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputTextDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub delta: String,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub logprobs: Option<serde_json::Value>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseContentPartDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub part: ContentPart,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputItemDone {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item: OutputItem,
-}
-
-/// Response completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCompleted {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Response failed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFailed {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Response incomplete event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseIncomplete {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Response queued event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseQueued {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Text output completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputTextDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub text: String,
-    pub logprobs: Option<Vec<serde_json::Value>>,
-}
-
-/// Refusal delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseRefusalDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub delta: String,
-}
-
-/// Refusal done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseRefusalDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub refusal: String,
-}
-
-/// Function call arguments delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFunctionCallArgumentsDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub delta: String,
-}
-
-/// Function call arguments done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFunctionCallArgumentsDone {
-    pub name: String,
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub arguments: String,
-}
-
-/// Error event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseError {
-    pub sequence_number: u64,
-    pub code: Option<String>,
-    pub message: String,
-    pub param: Option<String>,
-}
-
-/// File search call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFileSearchCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// File search call searching event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFileSearchCallSearching {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// File search call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFileSearchCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Web search call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseWebSearchCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Web search call searching event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseWebSearchCallSearching {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Web search call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseWebSearchCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Reasoning summary part added event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryPartAdded {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub part: serde_json::Value, // Could be more specific but using Value for flexibility
-}
-
-/// Reasoning summary part done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryPartDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub part: serde_json::Value,
-}
-
-/// Reasoning summary text delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryTextDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub delta: String,
-}
-
-/// Reasoning summary text done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryTextDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub text: String,
-}
-
-/// Reasoning summary delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub delta: serde_json::Value,
-}
-
-/// Reasoning summary done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub text: String,
-}
-
-/// Image generation call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Image generation call generating event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallGenerating {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Image generation call partial image event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallPartialImage {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub partial_image_index: u32,
-    pub partial_image_b64: String,
-}
-
-/// Image generation call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP call arguments delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallArgumentsDelta {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub delta: String,
-}
-
-/// MCP call arguments done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallArgumentsDone {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub arguments: String,
-}
-
-/// MCP call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP call failed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallFailed {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP list tools completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpListToolsCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP list tools failed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpListToolsFailed {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP list tools in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpListToolsInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call interpreting event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallInterpreting {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call code delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallCodeDelta {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub delta: String,
-}
-
-/// Code interpreter call code done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallCodeDone {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub code: String,
-}
-
-/// Response metadata
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMetadata {
-    pub id: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub object: Option<String>,
-    pub created_at: u64,
-    pub status: Status,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub usage: Option<Usage>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub error: Option<ErrorObject>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub incomplete_details: Option<IncompleteDetails>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub input: Option<Input>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub instructions: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_output_tokens: Option<u32>,
-    /// Whether the model was run in background mode
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub background: Option<bool>,
-    /// The service tier that was actually used
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub service_tier: Option<ServiceTier>,
-    /// The effective value of top_logprobs parameter
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_logprobs: Option<u32>,
-    /// The effective value of max_tool_calls parameter
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_tool_calls: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output: Option<Vec<OutputItem>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub previous_response_id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<ToolChoice>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<ToolDefinition>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub truncation: Option<Truncation>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<HashMap<String, String>>,
-    /// Prompt cache key for improved performance
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub prompt_cache_key: Option<String>,
-    /// Safety identifier for content filtering
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub safety_identifier: Option<String>,
-}
-
 /// Output item
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type")]
@@ -3091,46 +2422,6 @@ pub struct CustomToolCall {
     pub id: String,
 }
 
-/// Content part
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ContentPart {
-    #[serde(rename = "type")]
-    pub part_type: String,
-    pub text: Option<String>,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub annotations: Option<Vec<serde_json::Value>>,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub logprobs: Option<Vec<serde_json::Value>>,
-}
-
-// ===== RESPONSE COLLECTOR =====
-
-/// Collects streaming response events into a complete response
-
-/// Output text annotation added event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputTextAnnotationAdded {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub annotation_index: u32,
-    pub annotation: TextAnnotation,
-}
-
-/// Text annotation object for output text
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct TextAnnotation {
-    #[serde(rename = "type")]
-    pub annotation_type: String,
-    pub text: String,
-    pub start: u32,
-    pub end: u32,
-}
-
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct DeleteResponse {
     pub object: String,
diff --git a/async-openai/src/types/responses/response_stream.rs b/async-openai/src/types/responses/response_stream.rs
new file mode 100644
index 00000000..c6478cae
--- /dev/null
+++ b/async-openai/src/types/responses/response_stream.rs
@@ -0,0 +1,550 @@
+use futures::Stream;
+use serde::{Deserialize, Serialize};
+use std::pin::Pin;
+
+use crate::{
+    error::OpenAIError,
+    types::responses::{OutputContent, OutputItem, Response, ResponseLogProb, Summary},
+};
+
+/// Stream of response events
+pub type ResponseStream =
+    Pin<Box<dyn Stream<Item = Result<ResponseStreamEvent, OpenAIError>> + Send>>;
+
+/// Event types for streaming responses from the Responses API
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type")]
+pub enum ResponseStreamEvent {
+    /// An event that is emitted when a response is created.
+    #[serde(rename = "response.created")]
+    ResponseCreated(ResponseCreatedEvent),
+    /// Emitted when the response is in progress.
+    #[serde(rename = "response.in_progress")]
+    ResponseInProgress(ResponseInProgressEvent),
+    /// Emitted when the model response is complete.
+    #[serde(rename = "response.completed")]
+    ResponseCompleted(ResponseCompletedEvent),
+    /// An event that is emitted when a response fails.
+    #[serde(rename = "response.failed")]
+    ResponseFailed(ResponseFailedEvent),
+    /// An event that is emitted when a response finishes as incomplete.
+    #[serde(rename = "response.incomplete")]
+    ResponseIncomplete(ResponseIncompleteEvent),
+    /// Emitted when a new output item is added.
+    #[serde(rename = "response.output_item.added")]
+    ResponseOutputItemAdded(ResponseOutputItemAddedEvent),
+    /// Emitted when an output item is marked done.
+    #[serde(rename = "response.output_item.done")]
+    ResponseOutputItemDone(ResponseOutputItemDoneEvent),
+    /// Emitted when a new content part is added.
+    #[serde(rename = "response.content_part.added")]
+    ResponseContentPartAdded(ResponseContentPartAddedEvent),
+    /// Emitted when a content part is done.
+    #[serde(rename = "response.content_part.done")]
+    ResponseContentPartDone(ResponseContentPartDoneEvent),
+    /// Emitted when there is an additional text delta.
+    #[serde(rename = "response.output_text.delta")]
+    ResponseOutputTextDelta(ResponseOutputTextDeltaEvent),
+    /// Emitted when text content is finalized.
+    #[serde(rename = "response.output_text.done")]
+    ResponseOutputTextDone(ResponseOutputTextDoneEvent),
+    /// Emitted when there is a partial refusal text.
+    #[serde(rename = "response.refusal.delta")]
+    ResponseRefusalDelta(ResponseRefusalDeltaEvent),
+    #[serde(rename = "response.refusal.done")]
+    /// Emitted when refusal text is finalized.
+    ResponseRefusalDone(ResponseRefusalDoneEvent),
+    /// Emitted when there is a partial function-call arguments delta.
+    #[serde(rename = "response.function_call_arguments.delta")]
+    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
+    /// Emitted when function-call arguments are finalized.
+    #[serde(rename = "response.function_call_arguments.done")]
+    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
+    /// Emitted when a file search call is initiated.
+    #[serde(rename = "response.file_search_call.in_progress")]
+    ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgressEvent),
+    /// Emitted when a file search is currently searching.
+    #[serde(rename = "response.file_search_call.searching")]
+    ResponseFileSearchCallSearching(ResponseFileSearchCallSearchingEvent),
+    /// Emitted when a file search call is completed (results found).
+    #[serde(rename = "response.file_search_call.completed")]
+    ResponseFileSearchCallCompleted(ResponseFileSearchCallCompletedEvent),
+    /// Emitted when a web search call is initiated.
+    #[serde(rename = "response.web_search_call.in_progress")]
+    ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgressEvent),
+    /// Emitted when a web search call is executing.
+    #[serde(rename = "response.web_search_call.searching")]
+    ResponseWebSearchCallSearching(ResponseWebSearchCallSearchingEvent),
+    /// Emitted when a web search call is completed.
+    #[serde(rename = "response.web_search_call.completed")]
+    ResponseWebSearchCallCompleted(ResponseWebSearchCallCompletedEvent),
+    /// Emitted when a new reasoning summary part is added.
+    #[serde(rename = "response.reasoning_summary_part.added")]
+    ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAddedEvent),
+    /// Emitted when a reasoning summary part is completed.
+    #[serde(rename = "response.reasoning_summary_part.done")]
+    ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDoneEvent),
+    /// Emitted when a delta is added to a reasoning summary text.
+    #[serde(rename = "response.reasoning_summary_text.delta")]
+    ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDeltaEvent),
+    /// Emitted when a reasoning summary text is completed.
+    #[serde(rename = "response.reasoning_summary_text.done")]
+    ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDoneEvent),
+    /// Emitted when a delta is added to a reasoning text.
+    #[serde(rename = "response.reasoning_text.delta")]
+    ResponseReasoningTextDelta(ResponseReasoningTextDeltaEvent),
+    /// Emitted when a reasoning text is completed.
+    #[serde(rename = "response.reasoning_text.done")]
+    ResponseReasoningTextDone(ResponseReasoningTextDoneEvent),
+    /// Emitted when an image generation tool call has completed and the final image is available.
+    #[serde(rename = "response.image_generation_call.completed")]
+    ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompletedEvent),
+    /// Emitted when an image generation tool call is actively generating an image (intermediate state).
+    #[serde(rename = "response.image_generation_call.generating")]
+    ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGeneratingEvent),
+    /// Emitted when an image generation tool call is in progress.
+    #[serde(rename = "response.image_generation_call.in_progress")]
+    ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgressEvent),
+    /// Emitted when a partial image is available during image generation streaming.
+    #[serde(rename = "response.image_generation_call.partial_image")]
+    ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImageEvent),
+    /// Emitted when there is a delta (partial update) to the arguments of an MCP tool call.
+    #[serde(rename = "response.mcp_call_arguments.delta")]
+    ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent),
+    /// Emitted when the arguments for an MCP tool call are finalized.
+    #[serde(rename = "response.mcp_call_arguments.done")]
+    ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent),
+    /// Emitted when an MCP tool call has completed successfully.
+    #[serde(rename = "response.mcp_call.completed")]
+    ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent),
+    /// Emitted when an MCP tool call has failed.
+    #[serde(rename = "response.mcp_call.failed")]
+    ResponseMCPCallFailed(ResponseMCPCallFailedEvent),
+    /// Emitted when an MCP tool call is in progress.
+    #[serde(rename = "response.mcp_call.in_progress")]
+    ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent),
+    /// Emitted when the list of available MCP tools has been successfully retrieved.
+    #[serde(rename = "response.mcp_list_tools.completed")]
+    ResponseMCPListToolsCompleted(ResponseMCPListToolsCompletedEvent),
+    /// Emitted when the attempt to list available MCP tools has failed.
+    #[serde(rename = "response.mcp_list_tools.failed")]
+    ResponseMCPListToolsFailed(ResponseMCPListToolsFailedEvent),
+    /// Emitted when the system is in the process of retrieving the list of available MCP tools.
+    #[serde(rename = "response.mcp_list_tools.in_progress")]
+    ResponseMCPListToolsInProgress(ResponseMCPListToolsInProgressEvent),
+    /// Emitted when a code interpreter call is in progress.
+    #[serde(rename = "response.code_interpreter_call.in_progress")]
+    ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgressEvent),
+    /// Emitted when the code interpreter is actively interpreting the code snippet.
+    #[serde(rename = "response.code_interpreter_call.interpreting")]
+    ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpretingEvent),
+    /// Emitted when the code interpreter call is completed.
+    #[serde(rename = "response.code_interpreter_call.completed")]
+    ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompletedEvent),
+    /// Emitted when a partial code snippet is streamed by the code interpreter.
+    #[serde(rename = "response.code_interpreter_call_code.delta")]
+    ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDeltaEvent),
+    /// Emitted when the code snippet is finalized by the code interpreter.
+    #[serde(rename = "response.code_interpreter_call_code.done")]
+    ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDoneEvent),
+    /// Emitted when an annotation is added to output text content.
+    #[serde(rename = "response.output_text.annotation.added")]
+    ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAddedEvent),
+    /// Emitted when a response is queued and waiting to be processed.
+    #[serde(rename = "response.queued")]
+    ResponseQueued(ResponseQueuedEvent),
+    /// Event representing a delta (partial update) to the input of a custom tool call.
+    #[serde(rename = "response.custom_tool_call_input.delta")]
+    ResponseCustomToolCallInputDelta(ResponseCustomToolCallInputDeltaEvent),
+    /// Event indicating that input for a custom tool call is complete.
+    #[serde(rename = "response.custom_tool_call_input.done")]
+    ResponseCustomToolCallInputDone(ResponseCustomToolCallInputDoneEvent),
+    /// Emitted when an error occurs.
+    #[serde(rename = "error")]
+    ResponseError(ResponseErrorEvent),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCreatedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseInProgressEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCompletedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseFailedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseIncompleteEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseOutputItemAddedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item: OutputItem,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseOutputItemDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item: OutputItem,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseContentPartAddedEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub part: OutputContent,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseContentPartDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub part: OutputContent,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseOutputTextDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub logprobs: Option<Vec<ResponseLogProb>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseOutputTextDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub text: String,
+    pub logprobs: Option<Vec<ResponseLogProb>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseRefusalDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseRefusalDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub refusal: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseFunctionCallArgumentsDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseFunctionCallArgumentsDoneEvent {
+    pub name: String,
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub arguments: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseFileSearchCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseFileSearchCallSearchingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseFileSearchCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseWebSearchCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseWebSearchCallSearchingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseWebSearchCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum SummaryPart {
+    SummaryText(Summary),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseReasoningSummaryPartAddedEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub part: SummaryPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseReasoningSummaryPartDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub part: SummaryPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseReasoningSummaryTextDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseReasoningSummaryTextDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseReasoningTextDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseReasoningTextDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseImageGenerationCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseImageGenerationCallGeneratingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseImageGenerationCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseImageGenerationCallPartialImageEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub partial_image_index: u32,
+    pub partial_image_b64: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPCallArgumentsDeltaEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPCallArgumentsDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub arguments: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPCallFailedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPListToolsCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPListToolsFailedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseMCPListToolsInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCodeInterpreterCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCodeInterpreterCallInterpretingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCodeInterpreterCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCodeInterpreterCallCodeDeltaEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCodeInterpreterCallCodeDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub code: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseOutputTextAnnotationAddedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub annotation_index: u32,
+    pub item_id: String,
+    pub annotation: serde_json::Value,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseQueuedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCustomToolCallInputDeltaEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseCustomToolCallInputDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub input: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseErrorEvent {
+    pub sequence_number: u64,
+    pub code: Option<String>,
+    pub message: String,
+    pub param: Option<String>,
+}
diff --git a/async-openai/src/types/responses/responses_stream.rs b/async-openai/src/types/responses/responses_stream.rs
deleted file mode 100644
index e69de29b..00000000

From 7da4798043643f710c744499d961c5e79387b214 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 09:54:44 -0800
Subject: [PATCH 20/42] fix compilation

---
 async-openai/src/types/impls.rs | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/async-openai/src/types/impls.rs b/async-openai/src/types/impls.rs
index 3689dbba..972c6043 100644
--- a/async-openai/src/types/impls.rs
+++ b/async-openai/src/types/impls.rs
@@ -14,7 +14,7 @@ use crate::{
 use bytes::Bytes;
 
 use super::{
-    responses::{CodeInterpreterContainer, EasyInputContent, Input, Role as ResponsesRole},
+    responses::{EasyInputContent, Role as ResponsesRole},
     AddUploadPartRequest, AudioInput, AudioResponseFormat, ChatCompletionFunctionCall,
     ChatCompletionFunctions, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage,
     ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestDeveloperMessage,
@@ -1047,30 +1047,12 @@ impl AsyncTryFrom<CreateVideoRequest> for reqwest::multipart::Form {
 
 // end: types to multipart form
 
-impl Default for Input {
-    fn default() -> Self {
-        Self::Text("".to_string())
-    }
-}
-
 impl Default for EasyInputContent {
     fn default() -> Self {
         Self::Text("".to_string())
     }
 }
 
-impl From<String> for Input {
-    fn from(value: String) -> Self {
-        Input::Text(value)
-    }
-}
-
-impl From<&str> for Input {
-    fn from(value: &str) -> Self {
-        Input::Text(value.to_owned())
-    }
-}
-
 impl Default for ResponsesRole {
     fn default() -> Self {
         Self::User
@@ -1088,9 +1070,3 @@ impl From<&str> for EasyInputContent {
         Self::Text(value.to_owned())
     }
 }
-
-impl Default for CodeInterpreterContainer {
-    fn default() -> Self {
-        CodeInterpreterContainer::Id("".to_string())
-    }
-}

From 569e59577d2754c2fde6ebcb9b5f06f1a8d72e2b Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 10:06:46 -0800
Subject: [PATCH 21/42] compiling example/responses

---
 examples/responses/src/main.rs | 35 ++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/examples/responses/src/main.rs b/examples/responses/src/main.rs
index 47395185..792382d0 100644
--- a/examples/responses/src/main.rs
+++ b/examples/responses/src/main.rs
@@ -1,11 +1,13 @@
 use std::error::Error;
 
 use async_openai::{
-    types::responses::{
-        AllowedTools, CreateResponseArgs, Input, InputItem, InputMessageArgs, McpArgs,
-        RequireApproval, RequireApprovalPolicy, Role, TextConfig,
-        ToolDefinition::{Mcp, WebSearchPreview},
-        Verbosity, WebSearchPreviewArgs,
+    types::{
+        responses::{
+            CreateResponseArgs, EasyInputContent, EasyInputMessage, InputItem, InputParam,
+            MessageType, ResponseTextParam, Role, TextResponseFormatConfiguration, Tool, Verbosity,
+            WebSearchToolArgs,
+        },
+        MCPToolAllowedTools, MCPToolApprovalSetting, MCPToolArgs, MCPToolRequireApproval,
     },
     Client,
 };
@@ -17,23 +19,24 @@ async fn main() -> Result<(), Box<dyn Error>> {
     let request = CreateResponseArgs::default()
         .max_output_tokens(512u32)
         .model("gpt-4.1")
-        .text(TextConfig {
-            format: async_openai::types::responses::TextResponseFormat::Text,
+        .text(ResponseTextParam {
+            format: TextResponseFormatConfiguration::Text,
             verbosity: Some(Verbosity::Medium), // only here to test the config, but gpt-4.1 only supports medium
         })
-        .input(Input::Items(vec![InputItem::Message(
-            InputMessageArgs::default()
-                .role(Role::User)
-                .content("What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?")
-                .build()?,
+        .input(InputParam::Items(vec![InputItem::EasyMessage(
+            EasyInputMessage {
+                r#type: MessageType::Message,
+                role: Role::User,
+                content: EasyInputContent::Text("What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?".to_string()),
+            }
         )]))
         .tools(vec![
-            WebSearchPreview(WebSearchPreviewArgs::default().build()?),
-            Mcp(McpArgs::default()
+            Tool::WebSearchPreview(WebSearchToolArgs::default().build()?),
+            Tool::Mcp(MCPToolArgs::default()
                 .server_label("deepwiki")
                 .server_url("https://mcp.deepwiki.com/mcp")
-                .require_approval(RequireApproval::Policy(RequireApprovalPolicy::Never))
-                .allowed_tools(AllowedTools::List(vec!["ask_question".to_string()]))
+                .require_approval(MCPToolRequireApproval::ApprovalSetting(MCPToolApprovalSetting::Never))
+                .allowed_tools(MCPToolAllowedTools::List(vec!["ask_question".to_string()]))
                 .build()?),
         ])
         .build()?;

From 6db509fc6b969623559e94e8e2c74322ec3a4bc5 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 11:21:16 -0800
Subject: [PATCH 22/42] fix types

---
 async-openai/src/types/responses/response.rs | 22 +++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs
index 73d30ab6..98cae0b3 100644
--- a/async-openai/src/types/responses/response.rs
+++ b/async-openai/src/types/responses/response.rs
@@ -419,9 +419,9 @@ pub struct InputMessage {
     /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
     /// Populated when items are returned via API.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub status: Option<OutputStatus>, // TODO rename OutputStatus to ItemStatus maybe?
-    /// The type of the message input. Always set to `message`.
-    pub r#type: MessageType,
+    pub status: Option<OutputStatus>,
+    /////The type of the message input. Always set to `message`.
+    //pub r#type: MessageType,
 }
 
 /// The role for an input message - can only be `user`, `system`, or `developer`.
@@ -813,6 +813,11 @@ pub enum Truncation {
     Disabled,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct Billing {
+    pub payer: String,
+}
+
 /// o-series reasoning settings.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
@@ -1531,7 +1536,7 @@ pub struct ResponseLogProb {
 pub struct OutputTextContent {
     /// The annotations of the text output.
     pub annotations: Vec<Annotation>,
-    pub logprobs: Option<LogProb>,
+    pub logprobs: Option<Vec<LogProb>>,
     /// The text output from the model.
     pub text: String,
 }
@@ -1612,8 +1617,8 @@ pub struct OutputMessage {
     /// The status of the message input. One of `in_progress`, `completed`, or
     /// `incomplete`. Populated when input items are returned via API.
     pub status: OutputStatus,
-    /// The type of the output message. Always `message`.
-    pub r#type: MessageType,
+    ///// The type of the output message. Always `message`.
+    //pub r#type: MessageType,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
@@ -2173,6 +2178,10 @@ pub struct Response {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub background: Option<bool>,
 
+    /// Billing information for the response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub billing: Option<Billing>,
+
     /// The conversation that this response belongs to. Input items and output
     /// items from this response are automatically added to this conversation.
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -2368,7 +2377,6 @@ pub enum Status {
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
-#[non_exhaustive]
 pub enum OutputItem {
     /// An output message from the model.
     Message(OutputMessage),

From 0f0bfa13dffb8c793f3dc42ed8f63e04e80da962 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 11:29:58 -0800
Subject: [PATCH 23/42] fix examples/responses-function-call

---
 examples/responses-function-call/src/main.rs | 107 ++++++++++---------
 1 file changed, 56 insertions(+), 51 deletions(-)

diff --git a/examples/responses-function-call/src/main.rs b/examples/responses-function-call/src/main.rs
index 3e2083e8..0dcfc3e2 100644
--- a/examples/responses-function-call/src/main.rs
+++ b/examples/responses-function-call/src/main.rs
@@ -1,7 +1,8 @@
 use async_openai::{
     types::responses::{
-        CreateResponseArgs, FunctionArgs, FunctionCall, Input, InputItem, InputMessageArgs,
-        OutputContent, Role, ToolDefinition,
+        CreateResponseArgs, EasyInputContent, EasyInputMessage, FunctionCallOutput,
+        FunctionCallOutputItemParam, FunctionTool, FunctionToolCall, InputItem, InputParam, Item,
+        MessageType, OutputItem, Role, Tool,
     },
     Client,
 };
@@ -22,48 +23,46 @@ fn check_weather(location: String, units: String) -> String {
 async fn main() -> Result<(), Box<dyn Error>> {
     let client = Client::new();
 
-    let tools = vec![ToolDefinition::Function(
-        FunctionArgs::default()
-            .name("get_weather")
-            .description("Retrieves current weather for the given location")
-            .parameters(serde_json::json!(
-                {
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description": "City and country e.g. Bogotá, Colombia"
-                        },
-                        "units": {
-                            "type": "string",
-                            "enum": [
-                                "celsius",
-                                "fahrenheit"
-                            ],
-                            "description": "Units the temperature will be returned in."
-                        }
+    let tools = vec![Tool::Function(FunctionTool {
+        name: "get_weather".to_string(),
+        description: Some("Retrieves current weather for the given location".to_string()),
+        parameters: Some(serde_json::json!(
+            {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and country e.g. Bogotá, Colombia"
                     },
-                    "required": [
-                        "location",
-                        "units"
-                    ],
-                    "additionalProperties": false
-                }
-            ))
-            .build()?,
-    )];
-
-    let mut input_messages = vec![InputItem::Message(
-        InputMessageArgs::default()
-            .role(Role::User)
-            .content("What's the weather like in Paris today?")
-            .build()?,
-    )];
+                    "units": {
+                        "type": "string",
+                        "enum": [
+                            "celsius",
+                            "fahrenheit"
+                        ],
+                        "description": "Units the temperature will be returned in."
+                    }
+                },
+                "required": [
+                    "location",
+                    "units"
+                ],
+                "additionalProperties": false
+            }
+        )),
+        strict: None,
+    })];
+
+    let mut input_messages = vec![InputItem::EasyMessage(EasyInputMessage {
+        r#type: MessageType::Message,
+        role: Role::User,
+        content: EasyInputContent::Text("What's the weather like in Paris today?".to_string()),
+    })];
 
     let request = CreateResponseArgs::default()
         .max_output_tokens(512u32)
         .model("gpt-4.1")
-        .input(Input::Items(input_messages.clone()))
+        .input(InputParam::Items(input_messages.clone()))
         .tools(tools.clone())
         .build()?;
 
@@ -72,9 +71,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
     let response = client.responses().create(request).await?;
 
     // the model might ask for us to do a function call
-    let function_call_request: Option<FunctionCall> =
-        response.output.into_iter().find_map(|output_content| {
-            if let OutputContent::FunctionCall(inner) = output_content {
+    let function_call_request: Option<FunctionToolCall> =
+        response.output.into_iter().find_map(|output_item| {
+            if let OutputItem::FunctionCall(inner) = output_item {
                 Some(inner)
             } else {
                 None
@@ -97,19 +96,25 @@ async fn main() -> Result<(), Box<dyn Error>> {
         }
     };
 
-    input_messages.push(InputItem::Custom(serde_json::to_value(
-        &OutputContent::FunctionCall(function_call_request.clone()),
-    )?));
-    input_messages.push(InputItem::Custom(serde_json::json!({
-        "type": "function_call_output",
-        "call_id": function_call_request.call_id,
-        "output": function_result,
-    })));
+    // Add the function call from the assistant back to the conversation
+    input_messages.push(InputItem::Item(Item::FunctionCall(
+        function_call_request.clone(),
+    )));
+
+    // Add the function call output back to the conversation
+    input_messages.push(InputItem::Item(Item::FunctionCallOutput(
+        FunctionCallOutputItemParam {
+            call_id: function_call_request.call_id.clone(),
+            output: FunctionCallOutput::Text(function_result),
+            id: None,
+            status: None,
+        },
+    )));
 
     let request = CreateResponseArgs::default()
         .max_output_tokens(512u32)
         .model("gpt-4.1")
-        .input(Input::Items(input_messages))
+        .input(InputParam::Items(input_messages))
         .tools(tools)
         .build()?;
 

From 86d5cf6cc2691e72225027b270d3248b7264fa18 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 11:34:17 -0800
Subject: [PATCH 24/42] fix examples/responses-stream

---
 examples/responses-stream/src/main.rs | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/examples/responses-stream/src/main.rs b/examples/responses-stream/src/main.rs
index 27e8b14e..37be90c6 100644
--- a/examples/responses-stream/src/main.rs
+++ b/examples/responses-stream/src/main.rs
@@ -1,7 +1,8 @@
 use async_openai::{
     Client,
     types::responses::{
-        CreateResponseArgs, Input, InputContent, InputItem, InputMessageArgs, ResponseEvent, Role,
+        CreateResponseArgs, EasyInputContent, EasyInputMessage, InputItem, InputParam, MessageType,
+        ResponseStreamEvent, Role,
     },
 };
 use futures::StreamExt;
@@ -13,13 +14,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let request = CreateResponseArgs::default()
         .model("gpt-4.1")
         .stream(true)
-        .input(Input::Items(vec![InputItem::Message(
-            InputMessageArgs::default()
-                .role(Role::User)
-                .content(InputContent::TextInput(
-                    "Write a haiku about programming.".to_string(),
-                ))
-                .build()?,
+        .input(InputParam::Items(vec![InputItem::EasyMessage(
+            EasyInputMessage {
+                r#type: MessageType::Message,
+                role: Role::User,
+                content: EasyInputContent::Text("Write a haiku about programming.".to_string()),
+            },
         )]))
         .build()?;
 
@@ -28,12 +28,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     while let Some(result) = stream.next().await {
         match result {
             Ok(response_event) => match &response_event {
-                ResponseEvent::ResponseOutputTextDelta(delta) => {
+                ResponseStreamEvent::ResponseOutputTextDelta(delta) => {
                     print!("{}", delta.delta);
                 }
-                ResponseEvent::ResponseCompleted(_)
-                | ResponseEvent::ResponseIncomplete(_)
-                | ResponseEvent::ResponseFailed(_) => {
+                ResponseStreamEvent::ResponseCompleted(_)
+                | ResponseStreamEvent::ResponseIncomplete(_)
+                | ResponseStreamEvent::ResponseFailed(_) => {
                     break;
                 }
                 _ => {

From 342142762758313ed0795bb99aa6ecde9ca30e92 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 11:50:20 -0800
Subject: [PATCH 25/42] update it to RealtimeResponse to distinguish from
 Response

---
 async-openai/src/types/realtime/response_resource.rs | 2 +-
 async-openai/src/types/realtime/server_event.rs      | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index 2d6342d2..7999b784 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -183,7 +183,7 @@ pub struct ResponseCreate {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct Response {
+pub struct RealtimeResponse {
     /// Configuration for audio output.
     pub audio: Option<ResponseAudio>,
 
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 6d16742c..3d73b742 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -1,6 +1,6 @@
 use serde::{Deserialize, Serialize};
 
-use crate::types::realtime::{Response, Session};
+use crate::types::realtime::{RealtimeResponse, Session};
 
 use super::{
     content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit,
@@ -271,7 +271,7 @@ pub struct ResponseCreatedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The response resource.
-    pub response: Response,
+    pub response: RealtimeResponse,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -279,7 +279,7 @@ pub struct ResponseDoneEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The response resource.
-    pub response: Response,
+    pub response: RealtimeResponse,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]

From 7371c88010e063d79f1731b64f1d62393c718243 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 11:57:29 -0800
Subject: [PATCH 26/42] avoid name conflicts

---
 async-openai/src/types/realtime/response_resource.rs | 4 ++--
 async-openai/src/types/realtime/server_event.rs      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index 7999b784..137e562d 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -6,7 +6,7 @@ use crate::types::realtime::{
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct Usage {
+pub struct RealtimeResponseUsage {
     /// Details about the input tokens used in the Response. Cached tokens are tokens from previous
     /// turns in the conversation that are included as context for the current response. Cached tokens
     /// here are counted as a subset of input tokens, meaning input tokens will include cached and
@@ -232,5 +232,5 @@ pub struct RealtimeResponse {
     /// Usage statistics for the Response, this will correspond to billing. A Realtime API session
     /// will maintain a conversation context and append new Items to the Conversation, thus output
     /// from previous turns (text and audio tokens) will become the input for later turns.
-    pub usage: Option<Usage>,
+    pub usage: Option<RealtimeResponseUsage>,
 }
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 3d73b742..739ad9cc 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -137,7 +137,7 @@ pub struct LogProb {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct InputTokenDetails {
+pub struct TokenUsageInputTokenDetails {
     /// Number of audio tokens billed for this request.
     pub audio_tokens: u32,
     /// Number of text tokens billed for this request.
@@ -153,7 +153,7 @@ pub struct TokenUsage {
     /// Total number of tokens used (input + output).
     pub total_tokens: u32,
     /// Details about the input tokens billed for this request.
-    pub input_token_details: Option<InputTokenDetails>,
+    pub input_token_details: Option<TokenUsageInputTokenDetails>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -164,7 +164,7 @@ pub struct DurationUsage {
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
-pub enum Usage {
+pub enum TranscriptionUsage {
     #[serde(rename = "tokens")]
     TokenUsage(TokenUsage),
     #[serde(rename = "duration")]
@@ -185,7 +185,7 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
     pub logprobs: Option<Vec<LogProb>>,
     /// Usage statistics for the transcription, this is billed according to the ASR model's pricing rather than
     /// the realtime model's pricing.
-    pub usage: Usage,
+    pub usage: TranscriptionUsage,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]

From 95a2217017fde4ec5a8e6345c8da4a684224e5aa Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 12:45:09 -0800
Subject: [PATCH 27/42] update realtime types

---
 .../src/types/realtime/client_event.rs        | 54 +++++++++----------
 .../src/types/realtime/server_event.rs        |  2 +-
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs
index 3a0ddf54..fe28dc47 100644
--- a/async-openai/src/types/realtime/client_event.rs
+++ b/async-openai/src/types/realtime/client_event.rs
@@ -128,7 +128,7 @@ pub struct OutputAudioBufferClearEvent {
 /// These are events that the OpenAI Realtime WebSocket server will accept from the client.
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type")]
-pub enum ClientEvent {
+pub enum RealtimeClientEvent {
     /// Send this event to update the session's configuration. The client may send this event at any time to update any field
     /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
     ///
@@ -234,14 +234,14 @@ pub enum ClientEvent {
     OutputAudioBufferClear(OutputAudioBufferClearEvent),
 }
 
-impl From<&ClientEvent> for String {
-    fn from(value: &ClientEvent) -> Self {
+impl From<&RealtimeClientEvent> for String {
+    fn from(value: &RealtimeClientEvent) -> Self {
         serde_json::to_string(value).unwrap()
     }
 }
 
-impl From<ClientEvent> for Message {
-    fn from(value: ClientEvent) -> Self {
+impl From<RealtimeClientEvent> for Message {
+    fn from(value: RealtimeClientEvent) -> Self {
         Message::Text(String::from(&value).into())
     }
 }
@@ -266,61 +266,61 @@ macro_rules! event_from {
     };
 }
 
-event_from!(SessionUpdateEvent, ClientEvent, SessionUpdate);
+event_from!(SessionUpdateEvent, RealtimeClientEvent, SessionUpdate);
 event_from!(
     InputAudioBufferAppendEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     InputAudioBufferAppend
 );
 event_from!(
     InputAudioBufferCommitEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     InputAudioBufferCommit
 );
 event_from!(
     InputAudioBufferClearEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     InputAudioBufferClear
 );
 event_from!(
     ConversationItemCreateEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     ConversationItemCreate
 );
 event_from!(
     ConversationItemTruncateEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     ConversationItemTruncate
 );
 event_from!(
     ConversationItemDeleteEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     ConversationItemDelete
 );
 event_from!(
     ConversationItemRetrieveEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     ConversationItemRetrieve
 );
-event_from!(ResponseCreateEvent, ClientEvent, ResponseCreate);
-event_from!(ResponseCancelEvent, ClientEvent, ResponseCancel);
+event_from!(ResponseCreateEvent, RealtimeClientEvent, ResponseCreate);
+event_from!(ResponseCancelEvent, RealtimeClientEvent, ResponseCancel);
 event_from!(
     OutputAudioBufferClearEvent,
-    ClientEvent,
+    RealtimeClientEvent,
     OutputAudioBufferClear
 );
 
-message_from_event!(SessionUpdateEvent, ClientEvent);
-message_from_event!(InputAudioBufferAppendEvent, ClientEvent);
-message_from_event!(InputAudioBufferCommitEvent, ClientEvent);
-message_from_event!(InputAudioBufferClearEvent, ClientEvent);
-message_from_event!(ConversationItemCreateEvent, ClientEvent);
-message_from_event!(ConversationItemTruncateEvent, ClientEvent);
-message_from_event!(ConversationItemDeleteEvent, ClientEvent);
-message_from_event!(ConversationItemRetrieveEvent, ClientEvent);
-message_from_event!(ResponseCreateEvent, ClientEvent);
-message_from_event!(ResponseCancelEvent, ClientEvent);
-message_from_event!(OutputAudioBufferClearEvent, ClientEvent);
+message_from_event!(SessionUpdateEvent, RealtimeClientEvent);
+message_from_event!(InputAudioBufferAppendEvent, RealtimeClientEvent);
+message_from_event!(InputAudioBufferCommitEvent, RealtimeClientEvent);
+message_from_event!(InputAudioBufferClearEvent, RealtimeClientEvent);
+message_from_event!(ConversationItemCreateEvent, RealtimeClientEvent);
+message_from_event!(ConversationItemTruncateEvent, RealtimeClientEvent);
+message_from_event!(ConversationItemDeleteEvent, RealtimeClientEvent);
+message_from_event!(ConversationItemRetrieveEvent, RealtimeClientEvent);
+message_from_event!(ResponseCreateEvent, RealtimeClientEvent);
+message_from_event!(ResponseCancelEvent, RealtimeClientEvent);
+message_from_event!(OutputAudioBufferClearEvent, RealtimeClientEvent);
 
 impl From<Item> for ConversationItemCreateEvent {
     fn from(value: Item) -> Self {
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 739ad9cc..1fae8c4f 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -559,7 +559,7 @@ pub struct ResponseMCPCallFailedEvent {
 /// These are events emitted from the OpenAI Realtime WebSocket server to the client.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
-pub enum ServerEvent {
+pub enum RealtimeServerEvent {
     /// Returned when an error occurs, which could be a client problem or a server problem.
     /// Most errors are recoverable and the session will stay open, we recommend to
     /// implementors to monitor and log error messages by default.

From 58557c00ef74a7ed5928ffb1d568c42a770c6006 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 12:45:23 -0800
Subject: [PATCH 28/42] update realtime example

---
 examples/realtime/src/main.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs
index 11a87329..4f79b656 100644
--- a/examples/realtime/src/main.rs
+++ b/examples/realtime/src/main.rs
@@ -1,7 +1,7 @@
 use std::process::exit;
 
 use async_openai::types::realtime::{
-    ConversationItemCreateEvent, Item, Message as RealtimeMessage, ResponseCreateEvent, ServerEvent,
+    ConversationItemCreateEvent, Item, RealtimeServerEvent, ResponseCreateEvent,
 };
 use futures_util::{future, pin_mut, StreamExt};
 
@@ -43,7 +43,7 @@ async fn main() {
             match message {
                 Message::Text(_) => {
                     let data = message.clone().into_data();
-                    let server_event: Result<ServerEvent, serde_json::Error> =
+                    let server_event: Result<RealtimeServerEvent, serde_json::Error> =
                         serde_json::from_slice(&data);
                     match server_event {
                         Ok(server_event) => {
@@ -53,10 +53,10 @@ async fn main() {
                             eprint!("{:32} | ", event_type.as_str().unwrap());
 
                             match server_event {
-                                ServerEvent::ResponseOutputItemDone(event) => {
+                                RealtimeServerEvent::ResponseOutputItemDone(event) => {
                                     eprint!("{event:?}");
                                 }
-                                ServerEvent::Error(e) => {
+                                RealtimeServerEvent::Error(e) => {
                                     eprint!("{e:?}");
                                 }
                                 _ => {}

From ce11c0532025fe64ac33ebc3d66e94894c5dd33a Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 13:00:36 -0800
Subject: [PATCH 29/42] update names

---
 async-openai/src/types/realtime/session_resource.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index ba0f12cf..97f42bbf 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -331,10 +331,10 @@ pub struct TokenLimits {
 pub enum Session {
     /// The type of session to create. Always `realtime` for the Realtime API.
     #[serde(rename = "realtime")]
-    RealtimeSessionConfiguration(RealtimeSession),
+    RealtimeSession(RealtimeSession),
     /// The type of session to create. Always `transcription` for transcription sessions.
     #[serde(rename = "transcription")]
-    TranscriptionSessionConfiguration(TranscriptionSession),
+    RealtimeTranscriptionSession(RealtimeTranscriptionSession),
 }
 
 /// Realtime session object configuration.
@@ -438,7 +438,7 @@ pub struct TranscriptionAudio {
 
 /// Realtime transcription session object configuration.
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct TranscriptionSession {
+pub struct RealtimeTranscriptionSession {
     /// Configuration for input and output audio.
     pub audio: TranscriptionAudio,
 

From 5f3dbed9f5ae78cd84e5882b4d82313b711d001c Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 14:36:49 -0800
Subject: [PATCH 30/42] updated realtime spec

---
 .../src/types/realtime/client_event.rs        |  4 +-
 .../src/types/realtime/response_resource.rs   | 15 ++--
 .../src/types/realtime/session_resource.rs    | 87 ++++++-------------
 3 files changed, 38 insertions(+), 68 deletions(-)

diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs
index fe28dc47..c6edb1c0 100644
--- a/async-openai/src/types/realtime/client_event.rs
+++ b/async-openai/src/types/realtime/client_event.rs
@@ -1,7 +1,7 @@
 use serde::{Deserialize, Serialize};
 use tokio_tungstenite::tungstenite::Message;
 
-use crate::types::realtime::{ResponseCreate, Session};
+use crate::types::realtime::{RealtimeResponseCreateParams, Session};
 
 use super::item::Item;
 
@@ -103,7 +103,7 @@ pub struct ResponseCreateEvent {
     pub event_id: Option<String>,
 
     /// Create a new Realtime response with these parameters
-    pub response: Option<ResponseCreate>,
+    pub response: Option<RealtimeResponseCreateParams>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index 137e562d..eb7040f5 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -1,8 +1,11 @@
 use serde::{Deserialize, Serialize};
 
-use crate::types::realtime::{
-    AudioFormat, Conversation, Item, MaxOutputTokens, Prompt, RealtimeVoice, ToolChoice,
-    ToolDefinition,
+use crate::types::{
+    realtime::{
+        Conversation, Item, MaxOutputTokens, RealtimeAudioFormats, RealtimeTool, RealtimeVoice,
+        ToolChoice,
+    },
+    responses::Prompt,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -104,7 +107,7 @@ pub struct ResponseStatusDetail {
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ResponseAudioOutput {
     /// The format of the output audio.
-    pub format: AudioFormat,
+    pub format: RealtimeAudioFormats,
 
     /// The voice the model uses to respond. Voice cannot be changed during the session once
     /// the model has responded with audio at least once. Current voice options are
@@ -121,7 +124,7 @@ pub struct ResponseAudio {
 
 /// The response resource.
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseCreate {
+pub struct RealtimeResponseCreateParams {
     /// Configuration for audio input and output.
     pub audio: ResponseAudio,
 
@@ -179,7 +182,7 @@ pub struct ResponseCreate {
 
     /// Tools available to the model.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<ToolDefinition>>,
+    pub tools: Option<Vec<RealtimeTool>>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index 97f42bbf..66780844 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -1,6 +1,9 @@
 use serde::{Deserialize, Serialize};
 
-use crate::types::MCPTool;
+use crate::types::{
+    responses::{Prompt, ToolChoiceFunction, ToolChoiceMCP, ToolChoiceOptions},
+    MCPTool,
+};
 
 #[derive(Debug, Default, Serialize, Deserialize, Clone)]
 pub struct AudioTranscription {
@@ -23,7 +26,7 @@ pub struct AudioTranscription {
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
-pub enum TurnDetection {
+pub enum RealtimeTurnDetection {
     /// Server-side voice activity detection (VAD) which flips on when user speech is detected
     /// and off after a period of silence.
     #[serde(rename = "server_vad")]
@@ -95,7 +98,7 @@ pub enum MaxOutputTokens {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct FunctionTool {
+pub struct RealtimeFunctionTool {
     /// The name of the function.
     pub name: String,
     /// The description of the function, including guidance on when and how to call it,
@@ -107,9 +110,9 @@ pub struct FunctionTool {
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
-pub enum ToolDefinition {
+pub enum RealtimeTool {
     #[serde(rename = "function")]
-    Function(FunctionTool),
+    Function(RealtimeFunctionTool),
     /// Give the model access to additional tools via remote Model Context Protocol (MCP) servers.
     /// [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
     #[serde(rename = "mcp")]
@@ -123,35 +126,15 @@ pub enum FunctionType {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(tag = "type")]
-pub enum Tool {
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ToolChoice {
     /// Use this option to force the model to call a specific function.
-    #[serde(rename = "function")]
-    Function {
-        /// The name of the function to call.
-        name: String,
-    },
+    Function(ToolChoiceFunction),
     /// Use this option to force the model to call a specific tool on a remote MCP server.
-    #[serde(rename = "mcp")]
-    MCP {
-        /// The name of the tool to call on the server.
-        name: String,
-        /// The label of the MCP server to use.
-        server_label: String,
-    },
-}
+    Mcp(ToolChoiceMCP),
 
-#[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(rename_all = "lowercase")]
-pub enum ToolChoice {
-    /// `auto` means the model can pick between generating a message or calling one or more tools.
-    Auto,
-    /// `none` means the model will not call any tool and instead generates a message.
-    None,
-    /// `required` means the model must call one or more tools.
-    Required,
     #[serde(untagged)]
-    Tool(Tool),
+    Mode(ToolChoiceOptions),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -171,7 +154,7 @@ pub enum RealtimeVoice {
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
-pub enum AudioFormat {
+pub enum RealtimeAudioFormats {
     /// The PCM audio format. Only a 24kHz sample rate is supported.
     #[serde(rename = "audio/pcm")]
     PCMAudioFormat {
@@ -195,13 +178,13 @@ pub struct G711ULAWAudioFormat {
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct AudioInput {
     /// The format of the input audio.
-    pub format: AudioFormat,
+    pub format: RealtimeAudioFormats,
     /// Configuration for input audio noise reduction. This can be set to null to turn off.
     /// Noise reduction filters audio added to the input audio buffer before it is sent to VAD
     /// and the model. Filtering the audio can improve VAD and turn detection accuracy
     /// (reducing false positives) and model performance by improving perception of the
     /// input audio.
-    pub noise_reduction: Option<NoiseReduction>,
+    pub noise_reduction: Option<NoiseReductionType>,
     /// Configuration for input audio transcription, defaults to off and can be set to `null` to turn off once on.
     /// Input audio transcription is not native to the model, since the model consumes audio directly.
     /// Transcription runs asynchronously through [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
@@ -222,13 +205,13 @@ pub struct AudioInput {
     /// the model will score a low probability of turn end and wait longer for the user to
     /// continue speaking. This can be useful for more natural conversations, but may have a
     /// higher latency.    
-    pub turn_detection: TurnDetection,
+    pub turn_detection: RealtimeTurnDetection,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct AudioOutput {
     /// The format of the output audio.
-    pub format: AudioFormat,
+    pub format: RealtimeAudioFormats,
     /// The speed of the model's spoken response as a multiple of the original speed.
     /// 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
     /// This value can only be changed in between model turns, not while a response
@@ -250,19 +233,6 @@ pub struct Audio {
     pub output: AudioOutput,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct Prompt {
-    /// The unique identifier of the prompt template to use.
-    pub id: String,
-    /// Optional map of values to substitute in for variables in your prompt. The substitution
-    /// values can either be strings, or other Response input types like images or files.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub variables: Option<serde_json::Value>,
-    /// Optional version of the prompt template.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub version: Option<String>,
-}
-
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "lowercase")]
 pub enum Tracing {
@@ -286,7 +256,7 @@ pub struct TracingConfiguration {
 /// The truncation strategy to use for the session.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "lowercase")]
-pub enum Truncation {
+pub enum RealtimeTruncation {
     /// `auto` is the default truncation strategy.
     Auto,
     /// `disabled` will disable truncation and emit errors when the conversation exceeds the input
@@ -338,6 +308,7 @@ pub enum Session {
 }
 
 /// Realtime session object configuration.
+/// openapi spec type: RealtimeSessionCreateRequestGA
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct RealtimeSession {
     pub audio: Audio,
@@ -390,7 +361,7 @@ pub struct RealtimeSession {
 
     /// Tools available to the model.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<ToolDefinition>>,
+    pub tools: Option<Vec<RealtimeTool>>,
 
     /// Realtime API can write session traces to the [Traces Dashboard](https://platform.openai.com/logs?api=traces).
     /// Set to null to disable tracing. Once tracing is enabled for a session, the configuration cannot be modified.
@@ -413,30 +384,26 @@ pub struct RealtimeSession {
     /// truncate but would instead return an error if the conversation exceeds the model's input
     /// token limit.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub truncation: Option<Truncation>,
+    pub truncation: Option<RealtimeTruncation>,
 }
 
+/// Type of noise reduction. `near_field` is for close-talking microphones such as
+/// headphones, `far_field` is for far-field microphones such as laptop or conference
+/// room microphones.
 #[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(rename_all = "snake_case")]
+#[serde(tag = "type", rename_all = "snake_case")]
 pub enum NoiseReductionType {
     NearField,
     FarField,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct NoiseReduction {
-    /// Type of noise reduction. `near_field` is for close-talking microphones such as
-    /// headphones, `far_field` is for far-field microphones such as laptop or conference
-    /// room microphones.
-    pub r#type: NoiseReductionType,
-}
-
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct TranscriptionAudio {
     pub input: AudioInput,
 }
 
 /// Realtime transcription session object configuration.
+/// openapi spec type: RealtimeTranscriptionSessionCreateRequestGA
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct RealtimeTranscriptionSession {
     /// Configuration for input and output audio.

From 4e0fa1dc61b79d385efd87e01e1d5e7b1539e67c Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 14:55:58 -0800
Subject: [PATCH 31/42] RealtimeConversationItem

---
 async-openai/src/types/realtime/client_event.rs     | 10 ++++------
 async-openai/src/types/realtime/item.rs             |  4 ++--
 .../src/types/realtime/response_resource.rs         |  8 ++++----
 async-openai/src/types/realtime/server_event.rs     | 13 +++++++------
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs
index c6edb1c0..d77e04bb 100644
--- a/async-openai/src/types/realtime/client_event.rs
+++ b/async-openai/src/types/realtime/client_event.rs
@@ -1,9 +1,7 @@
 use serde::{Deserialize, Serialize};
 use tokio_tungstenite::tungstenite::Message;
 
-use crate::types::realtime::{RealtimeResponseCreateParams, Session};
-
-use super::item::Item;
+use crate::types::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session};
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct SessionUpdateEvent {
@@ -56,7 +54,7 @@ pub struct ConversationItemCreateEvent {
     pub previous_item_id: Option<String>,
 
     /// A single item within a Realtime conversation.
-    pub item: Item,
+    pub item: RealtimeConversationItem,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
@@ -322,8 +320,8 @@ message_from_event!(ResponseCreateEvent, RealtimeClientEvent);
 message_from_event!(ResponseCancelEvent, RealtimeClientEvent);
 message_from_event!(OutputAudioBufferClearEvent, RealtimeClientEvent);
 
-impl From<Item> for ConversationItemCreateEvent {
-    fn from(value: Item) -> Self {
+impl From<RealtimeConversationItem> for ConversationItemCreateEvent {
+    fn from(value: RealtimeConversationItem) -> Self {
         Self {
             event_id: None,
             previous_item_id: None,
diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs
index b6020bf8..4402a946 100644
--- a/async-openai/src/types/realtime/item.rs
+++ b/async-openai/src/types/realtime/item.rs
@@ -272,7 +272,7 @@ pub struct McpCall {
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
-pub enum Item {
+pub enum RealtimeConversationItem {
     Message(Message),
     FunctionCall(FunctionCall),
     FunctionCallOutput(FunctionCallOutput),
@@ -282,7 +282,7 @@ pub enum Item {
     McpApprovalRequest(McpApprovalRequest),
 }
 
-impl TryFrom<serde_json::Value> for Item {
+impl TryFrom<serde_json::Value> for RealtimeConversationItem {
     type Error = serde_json::Error;
 
     fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> {
diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index eb7040f5..57f866c5 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -2,8 +2,8 @@ use serde::{Deserialize, Serialize};
 
 use crate::types::{
     realtime::{
-        Conversation, Item, MaxOutputTokens, RealtimeAudioFormats, RealtimeTool, RealtimeVoice,
-        ToolChoice,
+        Conversation, MaxOutputTokens, RealtimeAudioFormats, RealtimeConversationItem,
+        RealtimeTool, RealtimeVoice, ToolChoice,
     },
     responses::Prompt,
 };
@@ -138,7 +138,7 @@ pub struct RealtimeResponseCreateParams {
     /// for this Response instead of using the default conversation. An empty array `[]` will clear
     /// the context for this Response. Note that this can include references to items that
     /// previously appeared in the session using their id.
-    pub input: Vec<Item>,
+    pub input: Vec<RealtimeConversationItem>,
 
     /// The default system instructions (i.e. system message) prepended to model calls.
     /// This field allows the client to guide the model on desired responses.
@@ -219,7 +219,7 @@ pub struct RealtimeResponse {
     pub object: String,
 
     /// The list of output items generated by the response.
-    pub output: Vec<Item>,
+    pub output: Vec<RealtimeConversationItem>,
 
     /// The set of modalities the model used to respond, currently the only possible values
     /// are [\"audio\"], [\"text\"]. Audio output always include a text transcript.
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 1fae8c4f..a440648d 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -3,7 +3,8 @@ use serde::{Deserialize, Serialize};
 use crate::types::realtime::{RealtimeResponse, Session};
 
 use super::{
-    content_part::ContentPart, error::RealtimeAPIError, item::Item, rate_limit::RateLimit,
+    content_part::ContentPart, error::RealtimeAPIError, item::RealtimeConversationItem,
+    rate_limit::RateLimit,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -35,7 +36,7 @@ pub struct ConversationItemAddedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// A single item within a Realtime conversation.
-    pub item: Item,
+    pub item: RealtimeConversationItem,
     /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted.
     pub previous_item_id: Option<String>,
 }
@@ -45,7 +46,7 @@ pub struct ConversationItemDoneEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// A single item within a Realtime conversation.
-    pub item: Item,
+    pub item: RealtimeConversationItem,
     /// The ID of the item that precedes this one, if any. This is used to maintain ordering when items are inserted.
     pub previous_item_id: Option<String>,
 }
@@ -243,7 +244,7 @@ pub struct ConversationItemRetrievedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// A single item within a Realtime conversation.
-    pub item: Item,
+    pub item: RealtimeConversationItem,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -291,7 +292,7 @@ pub struct ResponseOutputItemAddedEvent {
     /// The index of the output item in the Response.
     pub output_index: u32,
     /// A single item within a Realtime conversation.
-    pub item: Item,
+    pub item: RealtimeConversationItem,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -303,7 +304,7 @@ pub struct ResponseOutputItemDoneEvent {
     /// The index of the output item in the Response.
     pub output_index: u32,
     /// A single item within a Realtime conversation.
-    pub item: Item,
+    pub item: RealtimeConversationItem,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]

From c39abf0729458925c6b4e9e90a1736a48404b211 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 14:56:17 -0800
Subject: [PATCH 32/42] RealtimeConversationItem

---
 examples/realtime/src/main.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs
index 4f79b656..3793b95a 100644
--- a/examples/realtime/src/main.rs
+++ b/examples/realtime/src/main.rs
@@ -1,7 +1,7 @@
 use std::process::exit;
 
 use async_openai::types::realtime::{
-    ConversationItemCreateEvent, Item, RealtimeServerEvent, ResponseCreateEvent,
+    ConversationItemCreateEvent, RealtimeConversationItem, RealtimeServerEvent, ResponseCreateEvent,
 };
 use futures_util::{future, pin_mut, StreamExt};
 
@@ -107,7 +107,7 @@ async fn read_stdin(tx: futures_channel::mpsc::UnboundedSender<Message>) {
         }
 
         // Create item from json representation
-        let item = Item::try_from(serde_json::json!({
+        let item = RealtimeConversationItem::try_from(serde_json::json!({
             "type": "message",
             "role": "user",
             "content": [

From c8e614827e553ef213e68e390ba058558874e5c5 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 15:24:27 -0800
Subject: [PATCH 33/42] updates for the spec

---
 async-openai/src/types/realtime/error.rs | 11 ++++
 async-openai/src/types/realtime/item.rs  | 77 ++++++++++++------------
 2 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/async-openai/src/types/realtime/error.rs b/async-openai/src/types/realtime/error.rs
index 6ce907c3..34fb9eac 100644
--- a/async-openai/src/types/realtime/error.rs
+++ b/async-openai/src/types/realtime/error.rs
@@ -17,3 +17,14 @@ pub struct RealtimeAPIError {
     /// The event_id of the client event that caused the error, if applicable.
     pub event_id: Option<String>,
 }
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ErrorCodeMessage {
+    pub code: String,
+    pub message: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ErrorMessage {
+    pub message: String,
+}
diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs
index 4402a946..80a31ef4 100644
--- a/async-openai/src/types/realtime/item.rs
+++ b/async-openai/src/types/realtime/item.rs
@@ -1,5 +1,10 @@
 use serde::{Deserialize, Serialize};
 
+use crate::types::{
+    realtime::{ErrorCodeMessage, ErrorMessage},
+    responses::MCPListToolsTool,
+};
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct SystemMessageContent {
     /// The text content.
@@ -9,7 +14,7 @@ pub struct SystemMessageContent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct SystemMessage {
+pub struct RealtimeConversationItemMessageSystem {
     /// The content of the message.
     pub content: Vec<SystemMessageContent>,
 
@@ -70,7 +75,7 @@ pub enum UserMessageContent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct UserMessage {
+pub struct RealtimeConversationItemMessageUser {
     /// The content of the message.
     pub content: Vec<UserMessageContent>,
 
@@ -114,7 +119,7 @@ pub enum AssistantMessageContent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct AssistantMessage {
+pub struct RealtimeConversationItemMessageAssistant {
     /// The content of the message.
     pub content: Vec<AssistantMessageContent>,
 
@@ -135,14 +140,14 @@ pub struct AssistantMessage {
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "role")]
 #[serde(rename_all = "lowercase")]
-pub enum Message {
-    System(SystemMessage),
-    User(UserMessage),
-    Assistant(AssistantMessage),
+pub enum RealtimeConversationItemMessage {
+    System(RealtimeConversationItemMessageSystem),
+    User(RealtimeConversationItemMessageUser),
+    Assistant(RealtimeConversationItemMessageAssistant),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct FunctionCall {
+pub struct RealtimeConversationItemFunctionCall {
     /// The arguments of the function call. This is a JSON-encoded string representing
     /// the arguments passed to the function, for example {"arg1": "value1", "arg2": 42}.
     pub arguments: String,
@@ -167,7 +172,7 @@ pub struct FunctionCall {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct FunctionCallOutput {
+pub struct RealtimeConversationItemFunctionCallOutput {
     /// The ID of the function call this output is for.
     pub call_id: String,
 
@@ -189,7 +194,7 @@ pub struct FunctionCallOutput {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct McpApprovalResponse {
+pub struct RealtimeMCPApprovalResponse {
     /// The ID of the approval request being answered.
     pub approval_request_id: String,
 
@@ -204,34 +209,19 @@ pub struct McpApprovalResponse {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct AvailableMcpTool {
-    /// The JSON schema describing the tool's input.
-    pub input_schema: serde_json::Value,
-
-    /// The name of the tool.
-    pub name: String,
-
-    /// Additional annotations about the tool.
-    pub annotations: Option<serde_json::Value>,
-
-    /// The description of the tool.
-    pub description: String,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct McpListTools {
+pub struct RealtimeMCPListTools {
     /// The label of the MCP server.
     pub server_label: String,
 
     /// The tools available on the server.
-    pub tools: Vec<AvailableMcpTool>,
+    pub tools: Vec<MCPListToolsTool>,
 
     /// The unique ID of the list.
     pub id: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct McpApprovalRequest {
+pub struct RealtimeMCPApprovalRequest {
     /// A JSON string of arguments for the tool.
     pub arguments: String,
 
@@ -246,7 +236,18 @@ pub struct McpApprovalRequest {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct McpCall {
+pub struct RealtimeMCPProtocolError {}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum RealtimeMCPToolCallError {
+    ProtocolError(ErrorCodeMessage),
+    ToolExecutionError(ErrorMessage),
+    HttpError(ErrorCodeMessage),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RealtimeMCPToolCall {
     /// A JSON string of the arguments passed to the tool.
     pub arguments: String,
 
@@ -263,23 +264,23 @@ pub struct McpCall {
     pub approval_request_id: Option<String>,
 
     /// The error from the tool call, if any.
-    pub error: Option<serde_json::Value>, // TODO: implement type
+    pub error: Option<RealtimeMCPToolCallError>,
 
     /// The output from the tool call.
-    pub output: String,
+    pub output: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
 pub enum RealtimeConversationItem {
-    Message(Message),
-    FunctionCall(FunctionCall),
-    FunctionCallOutput(FunctionCallOutput),
-    McpApprovalResponse(McpApprovalResponse),
-    McpListTools(McpListTools),
-    McpCall(McpCall),
-    McpApprovalRequest(McpApprovalRequest),
+    Message(RealtimeConversationItemMessage),
+    FunctionCall(RealtimeConversationItemFunctionCall),
+    FunctionCallOutput(RealtimeConversationItemFunctionCallOutput),
+    McpApprovalResponse(RealtimeMCPApprovalResponse),
+    McpListTools(RealtimeMCPListTools),
+    McpCall(RealtimeMCPToolCall),
+    McpApprovalRequest(RealtimeMCPApprovalRequest),
 }
 
 impl TryFrom<serde_json::Value> for RealtimeConversationItem {

From 46c7159d0e5ee927e5622ec269f9930604ec0a23 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 16:45:10 -0800
Subject: [PATCH 34/42] update types to match spec

---
 .../src/types/realtime/response_resource.rs   | 34 +++++++++++++++----
 .../src/types/realtime/server_event.rs        | 18 +++++-----
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index 57f866c5..1dac0634 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -1,3 +1,5 @@
+use std::collections::HashMap;
+
 use serde::{Deserialize, Serialize};
 
 use crate::types::{
@@ -76,7 +78,7 @@ pub struct OutputTokenDetails {
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "snake_case")]
-pub enum ResponseStatus {
+pub enum RealtimeResponseStatus {
     InProgress,
     Completed,
     Cancelled,
@@ -91,17 +93,35 @@ pub struct Error {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseStatusDetail {
+#[serde(rename_all = "lowercase")]
+pub enum RealtimeResponseStatusDetailType {
+    Completed,
+    Cancelled,
+    Incomplete,
+    Failed,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum RealtimeResponseStatusDetailReason {
+    TurnDetected,
+    ClientCancelled,
+    MaxOutputTokens,
+    ContentFilter,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RealtimeResponseStatusDetail {
     /// A description of the error that caused the response to fail, populated when the status is failed.
     pub error: Option<Error>,
     /// The reason the Response did not complete. For a `cancelled` Response, one of `turn_detected`
     /// (the server VAD detected a new start of speech) or `client_cancelled` (the client sent a cancel
     /// event). For an incomplete Response, one of `max_output_tokens` or `content_filter` (the
     ///  server-side safety filter activated and cut off the response).
-    pub reason: Option<String>,
+    pub reason: Option<RealtimeResponseStatusDetailReason>,
     /// The type of error that caused the response to fail, corresponding with the `status`
     /// field (`completed`, `cancelled`, `incomplete`, `failed`).
-    pub r#type: String,
+    pub r#type: RealtimeResponseStatusDetailType,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -213,7 +233,7 @@ pub struct RealtimeResponse {
     /// Keys are strings with a maximum length of 64 characters. Values are strings with a
     /// maximum length of 512 characters.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<serde_json::Value>,
+    pub metadata: Option<HashMap<String, String>>,
 
     /// The object type, must be "realtime.response".
     pub object: String,
@@ -227,10 +247,10 @@ pub struct RealtimeResponse {
     pub output_modalities: Vec<String>,
 
     /// The final status of the response (`completed`, `cancelled`, `failed`, or `incomplete`, `in_progress`).
-    pub status: ResponseStatus,
+    pub status: RealtimeResponseStatus,
 
     /// Additional details about the status.
-    pub status_details: Option<ResponseStatusDetail>,
+    pub status_details: Option<RealtimeResponseStatusDetail>,
 
     /// Usage statistics for the Response, this will correspond to billing. A Realtime API session
     /// will maintain a conversation context and append new Items to the Conversation, thus output
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index a440648d..b4994aab 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -128,12 +128,12 @@ pub struct OutputAudioBufferClearedEvent {
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
 /// Log probability information for a transcribed token.
-pub struct LogProb {
-    /// Raw UTF-8 bytes for the token.
+pub struct LogProbProperties {
+    /// The bytes that were used to generate the log probability.
     pub bytes: Vec<u8>,
     /// The log probability of the token.
     pub logprob: f64,
-    /// The token string.
+    /// The token that was used to generate the log probability.
     pub token: String,
 }
 
@@ -146,7 +146,7 @@ pub struct TokenUsageInputTokenDetails {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct TokenUsage {
+pub struct TranscriptTextUsageTokens {
     /// Number of input tokens billed for this request.
     pub input_tokens: u32,
     /// Number of output tokens generated.
@@ -158,7 +158,7 @@ pub struct TokenUsage {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct DurationUsage {
+pub struct TranscriptTextUsageDuration {
     ///Duration of the input audio in seconds.
     pub seconds: f32,
 }
@@ -167,9 +167,9 @@ pub struct DurationUsage {
 #[serde(tag = "type")]
 pub enum TranscriptionUsage {
     #[serde(rename = "tokens")]
-    TokenUsage(TokenUsage),
+    Tokens(TranscriptTextUsageTokens),
     #[serde(rename = "duration")]
-    DurationUsage(DurationUsage),
+    Duration(TranscriptTextUsageDuration),
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -183,7 +183,7 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
     /// The transcribed text.
     pub transcript: String,
     /// Optional per-token log probability data.
-    pub logprobs: Option<Vec<LogProb>>,
+    pub logprobs: Option<Vec<LogProbProperties>>,
     /// Usage statistics for the transcription, this is billed according to the ASR model's pricing rather than
     /// the realtime model's pricing.
     pub usage: TranscriptionUsage,
@@ -204,7 +204,7 @@ pub struct ConversationItemInputAudioTranscriptionDeltaEvent {
     /// corresponds a log probability of which token would be selected for this chunk of transcription. This
     /// can help to identify if it was possible there were multiple valid options for a given chunk of
     /// transcription.
-    pub logprobs: Option<Vec<LogProb>>,
+    pub logprobs: Option<Vec<LogProbProperties>>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]

From 6486c1c55afde2b62f2f7cec02c1eeb4e2be05d2 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 16:55:51 -0800
Subject: [PATCH 35/42] types updated

---
 async-openai/src/types/realtime/item.rs               |  3 +--
 async-openai/src/types/realtime/mod.rs                |  8 ++++----
 async-openai/src/types/realtime/rate_limit.rs         | 11 +++++++++--
 .../realtime/{response_resource.rs => response.rs}    |  0
 async-openai/src/types/realtime/server_event.rs       |  6 ++----
 .../realtime/{session_resource.rs => session.rs}      |  0
 6 files changed, 16 insertions(+), 12 deletions(-)
 rename async-openai/src/types/realtime/{response_resource.rs => response.rs} (100%)
 rename async-openai/src/types/realtime/{session_resource.rs => session.rs} (100%)

diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/item.rs
index 80a31ef4..a1f28893 100644
--- a/async-openai/src/types/realtime/item.rs
+++ b/async-openai/src/types/realtime/item.rs
@@ -271,8 +271,7 @@ pub struct RealtimeMCPToolCall {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
+#[serde(tag = "type", rename_all = "snake_case")]
 pub enum RealtimeConversationItem {
     Message(RealtimeConversationItemMessage),
     FunctionCall(RealtimeConversationItemFunctionCall),
diff --git a/async-openai/src/types/realtime/mod.rs b/async-openai/src/types/realtime/mod.rs
index b47605f8..071164e5 100644
--- a/async-openai/src/types/realtime/mod.rs
+++ b/async-openai/src/types/realtime/mod.rs
@@ -4,9 +4,9 @@ mod conversation;
 mod error;
 mod item;
 mod rate_limit;
-mod response_resource;
+mod response;
 mod server_event;
-mod session_resource;
+mod session;
 
 pub use client_event::*;
 pub use content_part::*;
@@ -14,6 +14,6 @@ pub use conversation::*;
 pub use error::*;
 pub use item::*;
 pub use rate_limit::*;
-pub use response_resource::*;
+pub use response::*;
 pub use server_event::*;
-pub use session_resource::*;
+pub use session::*;
diff --git a/async-openai/src/types/realtime/rate_limit.rs b/async-openai/src/types/realtime/rate_limit.rs
index 9306e236..86a50e21 100644
--- a/async-openai/src/types/realtime/rate_limit.rs
+++ b/async-openai/src/types/realtime/rate_limit.rs
@@ -1,9 +1,16 @@
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct RateLimit {
+#[serde(rename_all = "lowercase")]
+pub enum RealtimeRateLimitName {
+    Requests,
+    Tokens,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RealtimeRateLimit {
     /// The name of the rate limit (requests, tokens).
-    pub name: String,
+    pub name: RealtimeRateLimitName,
     /// The maximum allowed value for the rate limit.
     pub limit: u32,
     /// The remaining value before the limit is reached.
diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response.rs
similarity index 100%
rename from async-openai/src/types/realtime/response_resource.rs
rename to async-openai/src/types/realtime/response.rs
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index b4994aab..1b7512f9 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -1,10 +1,8 @@
 use serde::{Deserialize, Serialize};
 
-use crate::types::realtime::{RealtimeResponse, Session};
-
 use super::{
     content_part::ContentPart, error::RealtimeAPIError, item::RealtimeConversationItem,
-    rate_limit::RateLimit,
+    rate_limit::RealtimeRateLimit, response::RealtimeResponse, session::Session,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -469,7 +467,7 @@ pub struct ResponseFunctionCallArgumentsDoneEvent {
 pub struct RateLimitsUpdatedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
-    pub rate_limits: Vec<RateLimit>,
+    pub rate_limits: Vec<RealtimeRateLimit>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session.rs
similarity index 100%
rename from async-openai/src/types/realtime/session_resource.rs
rename to async-openai/src/types/realtime/session.rs

From 5a1cd63ce7450db6a63129c84012149776c52503 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 17:01:14 -0800
Subject: [PATCH 36/42] update realtime types

---
 .../src/types/realtime/content_part.rs        | 18 ---------
 .../src/types/realtime/conversation.rs        |  9 -----
 .../{item.rs => conversation_item.rs}         |  0
 async-openai/src/types/realtime/mod.rs        | 10 +----
 async-openai/src/types/realtime/rate_limit.rs | 20 ----------
 async-openai/src/types/realtime/response.rs   | 12 +++++-
 .../src/types/realtime/server_event.rs        | 40 ++++++++++++++++++-
 7 files changed, 50 insertions(+), 59 deletions(-)
 delete mode 100644 async-openai/src/types/realtime/content_part.rs
 delete mode 100644 async-openai/src/types/realtime/conversation.rs
 rename async-openai/src/types/realtime/{item.rs => conversation_item.rs} (100%)
 delete mode 100644 async-openai/src/types/realtime/rate_limit.rs

diff --git a/async-openai/src/types/realtime/content_part.rs b/async-openai/src/types/realtime/content_part.rs
deleted file mode 100644
index eec93ab3..00000000
--- a/async-openai/src/types/realtime/content_part.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-use serde::{Deserialize, Serialize};
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(tag = "type")]
-pub enum ContentPart {
-    #[serde(rename = "text")]
-    Text {
-        /// The text content
-        text: String,
-    },
-    #[serde(rename = "audio")]
-    Audio {
-        /// Base64-encoded audio data
-        audio: Option<String>,
-        /// The transcript of the audio
-        transcript: String,
-    },
-}
diff --git a/async-openai/src/types/realtime/conversation.rs b/async-openai/src/types/realtime/conversation.rs
deleted file mode 100644
index e678ede8..00000000
--- a/async-openai/src/types/realtime/conversation.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-use serde::{Deserialize, Serialize};
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default)]
-#[serde(rename_all = "lowercase")]
-pub enum Conversation {
-    #[default]
-    Auto,
-    None,
-}
diff --git a/async-openai/src/types/realtime/item.rs b/async-openai/src/types/realtime/conversation_item.rs
similarity index 100%
rename from async-openai/src/types/realtime/item.rs
rename to async-openai/src/types/realtime/conversation_item.rs
diff --git a/async-openai/src/types/realtime/mod.rs b/async-openai/src/types/realtime/mod.rs
index 071164e5..386a92b0 100644
--- a/async-openai/src/types/realtime/mod.rs
+++ b/async-openai/src/types/realtime/mod.rs
@@ -1,19 +1,13 @@
 mod client_event;
-mod content_part;
-mod conversation;
+mod conversation_item;
 mod error;
-mod item;
-mod rate_limit;
 mod response;
 mod server_event;
 mod session;
 
 pub use client_event::*;
-pub use content_part::*;
-pub use conversation::*;
+pub use conversation_item::*;
 pub use error::*;
-pub use item::*;
-pub use rate_limit::*;
 pub use response::*;
 pub use server_event::*;
 pub use session::*;
diff --git a/async-openai/src/types/realtime/rate_limit.rs b/async-openai/src/types/realtime/rate_limit.rs
deleted file mode 100644
index 86a50e21..00000000
--- a/async-openai/src/types/realtime/rate_limit.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-use serde::{Deserialize, Serialize};
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-#[serde(rename_all = "lowercase")]
-pub enum RealtimeRateLimitName {
-    Requests,
-    Tokens,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct RealtimeRateLimit {
-    /// The name of the rate limit (requests, tokens).
-    pub name: RealtimeRateLimitName,
-    /// The maximum allowed value for the rate limit.
-    pub limit: u32,
-    /// The remaining value before the limit is reached.
-    pub remaining: u32,
-    /// Seconds until the rate limit resets.
-    pub reset_seconds: f32,
-}
diff --git a/async-openai/src/types/realtime/response.rs b/async-openai/src/types/realtime/response.rs
index 1dac0634..c9f28bb2 100644
--- a/async-openai/src/types/realtime/response.rs
+++ b/async-openai/src/types/realtime/response.rs
@@ -4,8 +4,8 @@ use serde::{Deserialize, Serialize};
 
 use crate::types::{
     realtime::{
-        Conversation, MaxOutputTokens, RealtimeAudioFormats, RealtimeConversationItem,
-        RealtimeTool, RealtimeVoice, ToolChoice,
+        MaxOutputTokens, RealtimeAudioFormats, RealtimeConversationItem, RealtimeTool,
+        RealtimeVoice, ToolChoice,
     },
     responses::Prompt,
 };
@@ -142,6 +142,14 @@ pub struct ResponseAudio {
     pub output: ResponseAudioOutput,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum Conversation {
+    #[default]
+    Auto,
+    None,
+}
+
 /// The response resource.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct RealtimeResponseCreateParams {
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 1b7512f9..4544e2bc 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -1,8 +1,8 @@
 use serde::{Deserialize, Serialize};
 
 use super::{
-    content_part::ContentPart, error::RealtimeAPIError, item::RealtimeConversationItem,
-    rate_limit::RealtimeRateLimit, response::RealtimeResponse, session::Session,
+    conversation_item::RealtimeConversationItem, error::RealtimeAPIError,
+    response::RealtimeResponse, session::Session,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -305,6 +305,23 @@ pub struct ResponseOutputItemDoneEvent {
     pub item: RealtimeConversationItem,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum ContentPart {
+    #[serde(rename = "text")]
+    Text {
+        /// The text content
+        text: String,
+    },
+    #[serde(rename = "audio")]
+    Audio {
+        /// Base64-encoded audio data
+        audio: Option<String>,
+        /// The transcript of the audio
+        transcript: String,
+    },
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ResponseContentPartAddedEvent {
     /// The unique ID of the server event.
@@ -463,6 +480,25 @@ pub struct ResponseFunctionCallArgumentsDoneEvent {
     pub arguments: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum RealtimeRateLimitName {
+    Requests,
+    Tokens,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RealtimeRateLimit {
+    /// The name of the rate limit (requests, tokens).
+    pub name: RealtimeRateLimitName,
+    /// The maximum allowed value for the rate limit.
+    pub limit: u32,
+    /// The remaining value before the limit is reached.
+    pub remaining: u32,
+    /// Seconds until the rate limit resets.
+    pub reset_seconds: f32,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct RateLimitsUpdatedEvent {
     /// The unique ID of the server event.

From 9571490acae575c6af18c70f08d370a60163b721 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 17:11:09 -0800
Subject: [PATCH 37/42] match realtime client event to spec

---
 .../src/types/realtime/client_event.rs        | 128 +++++++++++-------
 1 file changed, 82 insertions(+), 46 deletions(-)

diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs
index d77e04bb..0881b9b9 100644
--- a/async-openai/src/types/realtime/client_event.rs
+++ b/async-openai/src/types/realtime/client_event.rs
@@ -4,7 +4,7 @@ use tokio_tungstenite::tungstenite::Message;
 use crate::types::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session};
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct SessionUpdateEvent {
+pub struct RealtimeClientEventSessionUpdate {
     /// Optional client-generated ID used to identify this event.
     /// This is an arbitrary string that a client may assign. It will be passed
     /// back if there is an error with the event, but the corresponding
@@ -16,7 +16,7 @@ pub struct SessionUpdateEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct InputAudioBufferAppendEvent {
+pub struct RealtimeClientEventInputAudioBufferAppend {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -26,21 +26,21 @@ pub struct InputAudioBufferAppendEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct InputAudioBufferCommitEvent {
+pub struct RealtimeClientEventInputAudioBufferCommit {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct InputAudioBufferClearEvent {
+pub struct RealtimeClientEventInputAudioBufferClear {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemCreateEvent {
+pub struct RealtimeClientEventConversationItemCreate {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -58,7 +58,7 @@ pub struct ConversationItemCreateEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct ConversationItemRetrieveEvent {
+pub struct RealtimeClientEventConversationItemRetrieve {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -68,7 +68,7 @@ pub struct ConversationItemRetrieveEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct ConversationItemTruncateEvent {
+pub struct RealtimeClientEventConversationItemTruncate {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -85,7 +85,7 @@ pub struct ConversationItemTruncateEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct ConversationItemDeleteEvent {
+pub struct RealtimeClientEventConversationItemDelete {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -95,7 +95,7 @@ pub struct ConversationItemDeleteEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct ResponseCreateEvent {
+pub struct RealtimeClientEventResponseCreate {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -105,7 +105,7 @@ pub struct ResponseCreateEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct ResponseCancelEvent {
+pub struct RealtimeClientEventResponseCancel {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -117,7 +117,7 @@ pub struct ResponseCancelEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct OutputAudioBufferClearEvent {
+pub struct RealtimeClientEventOutputAudioBufferClear {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
@@ -134,7 +134,7 @@ pub enum RealtimeClientEvent {
     /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`,
     /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`.
     #[serde(rename = "session.update")]
-    SessionUpdate(SessionUpdateEvent),
+    SessionUpdate(RealtimeClientEventSessionUpdate),
 
     /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit.
     /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio
@@ -147,7 +147,7 @@ pub enum RealtimeClientEvent {
     /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to
     /// this event.
     #[serde(rename = "input_audio_buffer.append")]
-    InputAudioBufferAppend(InputAudioBufferAppendEvent),
+    InputAudioBufferAppend(RealtimeClientEventInputAudioBufferAppend),
 
     /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation.
     /// This event will produce an error if the input audio buffer is empty.
@@ -155,12 +155,12 @@ pub enum RealtimeClientEvent {
     /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model.
     /// The server will respond with an input_audio_buffer.committed event.
     #[serde(rename = "input_audio_buffer.commit")]
-    InputAudioBufferCommit(InputAudioBufferCommitEvent),
+    InputAudioBufferCommit(RealtimeClientEventInputAudioBufferCommit),
 
     /// Send this event to clear the audio bytes in the buffer.
     /// The server will respond with an `input_audio_buffer.cleared` event.
     #[serde(rename = "input_audio_buffer.clear")]
-    InputAudioBufferClear(InputAudioBufferClearEvent),
+    InputAudioBufferClear(RealtimeClientEventInputAudioBufferClear),
 
     /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses.
     /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream,
@@ -168,14 +168,14 @@ pub enum RealtimeClientEvent {
     ///
     /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent.
     #[serde(rename = "conversation.item.create")]
-    ConversationItemCreate(ConversationItemCreateEvent),
+    ConversationItemCreate(RealtimeClientEventConversationItemCreate),
 
     /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history.
     /// This is useful, for example, to inspect user audio after noise cancellation and VAD.
     /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history,
     /// in which case the server will respond with an error.
     #[serde(rename = "conversation.item.retrieve")]
-    ConversationItemRetrieve(ConversationItemRetrieveEvent),
+    ConversationItemRetrieve(RealtimeClientEventConversationItemRetrieve),
 
     /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime,
     /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not
@@ -186,13 +186,13 @@ pub enum RealtimeClientEvent {
     ///
     /// If successful, the server will respond with a `conversation.item.truncated` event.
     #[serde(rename = "conversation.item.truncate")]
-    ConversationItemTruncate(ConversationItemTruncateEvent),
+    ConversationItemTruncate(RealtimeClientEventConversationItemTruncate),
 
     /// Send this event when you want to remove any item from the conversation history. The server will respond with a
     /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the
     /// server will respond with an error.
     #[serde(rename = "conversation.item.delete")]
-    ConversationItemDelete(ConversationItemDeleteEvent),
+    ConversationItemDelete(RealtimeClientEventConversationItemDelete),
 
     /// This event instructs the server to create a Response, which means triggering model inference.
     /// When in Server VAD mode, the server will create Responses automatically.
@@ -215,21 +215,21 @@ pub enum RealtimeClientEvent {
     /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to
     /// existing Items.
     #[serde(rename = "response.create")]
-    ResponseCreate(ResponseCreateEvent),
+    ResponseCreate(RealtimeClientEventResponseCreate),
 
     /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event
     /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond
     /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be
     /// returned the session will remain unaffected.
     #[serde(rename = "response.cancel")]
-    ResponseCancel(ResponseCancelEvent),
+    ResponseCancel(RealtimeClientEventResponseCancel),
 
     /// **WebRTC Only:** Emit to cut off the current audio response.
     /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event.
     /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response.
     /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc)
     #[serde(rename = "output_audio_buffer.clear")]
-    OutputAudioBufferClear(OutputAudioBufferClearEvent),
+    OutputAudioBufferClear(RealtimeClientEventOutputAudioBufferClear),
 }
 
 impl From<&RealtimeClientEvent> for String {
@@ -264,63 +264,99 @@ macro_rules! event_from {
     };
 }
 
-event_from!(SessionUpdateEvent, RealtimeClientEvent, SessionUpdate);
 event_from!(
-    InputAudioBufferAppendEvent,
+    RealtimeClientEventSessionUpdate,
+    RealtimeClientEvent,
+    SessionUpdate
+);
+event_from!(
+    RealtimeClientEventInputAudioBufferAppend,
     RealtimeClientEvent,
     InputAudioBufferAppend
 );
 event_from!(
-    InputAudioBufferCommitEvent,
+    RealtimeClientEventInputAudioBufferCommit,
     RealtimeClientEvent,
     InputAudioBufferCommit
 );
 event_from!(
-    InputAudioBufferClearEvent,
+    RealtimeClientEventInputAudioBufferClear,
     RealtimeClientEvent,
     InputAudioBufferClear
 );
 event_from!(
-    ConversationItemCreateEvent,
+    RealtimeClientEventConversationItemCreate,
     RealtimeClientEvent,
     ConversationItemCreate
 );
 event_from!(
-    ConversationItemTruncateEvent,
+    RealtimeClientEventConversationItemTruncate,
     RealtimeClientEvent,
     ConversationItemTruncate
 );
 event_from!(
-    ConversationItemDeleteEvent,
+    RealtimeClientEventConversationItemDelete,
     RealtimeClientEvent,
     ConversationItemDelete
 );
 event_from!(
-    ConversationItemRetrieveEvent,
+    RealtimeClientEventConversationItemRetrieve,
     RealtimeClientEvent,
     ConversationItemRetrieve
 );
-event_from!(ResponseCreateEvent, RealtimeClientEvent, ResponseCreate);
-event_from!(ResponseCancelEvent, RealtimeClientEvent, ResponseCancel);
 event_from!(
-    OutputAudioBufferClearEvent,
+    RealtimeClientEventResponseCreate,
+    RealtimeClientEvent,
+    ResponseCreate
+);
+event_from!(
+    RealtimeClientEventResponseCancel,
+    RealtimeClientEvent,
+    ResponseCancel
+);
+event_from!(
+    RealtimeClientEventOutputAudioBufferClear,
     RealtimeClientEvent,
     OutputAudioBufferClear
 );
 
-message_from_event!(SessionUpdateEvent, RealtimeClientEvent);
-message_from_event!(InputAudioBufferAppendEvent, RealtimeClientEvent);
-message_from_event!(InputAudioBufferCommitEvent, RealtimeClientEvent);
-message_from_event!(InputAudioBufferClearEvent, RealtimeClientEvent);
-message_from_event!(ConversationItemCreateEvent, RealtimeClientEvent);
-message_from_event!(ConversationItemTruncateEvent, RealtimeClientEvent);
-message_from_event!(ConversationItemDeleteEvent, RealtimeClientEvent);
-message_from_event!(ConversationItemRetrieveEvent, RealtimeClientEvent);
-message_from_event!(ResponseCreateEvent, RealtimeClientEvent);
-message_from_event!(ResponseCancelEvent, RealtimeClientEvent);
-message_from_event!(OutputAudioBufferClearEvent, RealtimeClientEvent);
-
-impl From<RealtimeConversationItem> for ConversationItemCreateEvent {
+message_from_event!(RealtimeClientEventSessionUpdate, RealtimeClientEvent);
+message_from_event!(
+    RealtimeClientEventInputAudioBufferAppend,
+    RealtimeClientEvent
+);
+message_from_event!(
+    RealtimeClientEventInputAudioBufferCommit,
+    RealtimeClientEvent
+);
+message_from_event!(
+    RealtimeClientEventInputAudioBufferClear,
+    RealtimeClientEvent
+);
+message_from_event!(
+    RealtimeClientEventConversationItemCreate,
+    RealtimeClientEvent
+);
+message_from_event!(
+    RealtimeClientEventConversationItemTruncate,
+    RealtimeClientEvent
+);
+message_from_event!(
+    RealtimeClientEventConversationItemDelete,
+    RealtimeClientEvent
+);
+message_from_event!(
+    RealtimeClientEventConversationItemRetrieve,
+    RealtimeClientEvent
+);
+message_from_event!(RealtimeClientEventResponseCreate, RealtimeClientEvent);
+message_from_event!(RealtimeClientEventResponseCancel, RealtimeClientEvent);
+message_from_event!(
+    RealtimeClientEventOutputAudioBufferClear,
+    RealtimeClientEvent
+);
+
+impl From<RealtimeConversationItem> for RealtimeClientEventConversationItemCreate {
     fn from(value: RealtimeConversationItem) -> Self {
         Self {
             event_id: None,

From fb26251ad92bcf93da54b716d7aca4bcacd8ac83 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 17:11:19 -0800
Subject: [PATCH 38/42] update examples/realtime

---
 examples/realtime/src/main.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/realtime/src/main.rs b/examples/realtime/src/main.rs
index 3793b95a..87881734 100644
--- a/examples/realtime/src/main.rs
+++ b/examples/realtime/src/main.rs
@@ -1,7 +1,8 @@
 use std::process::exit;
 
 use async_openai::types::realtime::{
-    ConversationItemCreateEvent, RealtimeConversationItem, RealtimeServerEvent, ResponseCreateEvent,
+    RealtimeClientEventConversationItemCreate, RealtimeClientEventResponseCreate,
+    RealtimeConversationItem, RealtimeServerEvent,
 };
 use futures_util::{future, pin_mut, StreamExt};
 
@@ -120,13 +121,13 @@ async fn read_stdin(tx: futures_channel::mpsc::UnboundedSender<Message>) {
         .unwrap();
 
         // Create event of type "conversation.item.create"
-        let event: ConversationItemCreateEvent = item.into();
+        let event: RealtimeClientEventConversationItemCreate = item.into();
         // Create WebSocket message from client event
         let message: Message = event.into();
         // send WebSocket message containing event of type "conversation.item.create" to server
         tx.unbounded_send(message).unwrap();
         // send WebSocket message containing event of type "response.create" to server
-        tx.unbounded_send(ResponseCreateEvent::default().into())
+        tx.unbounded_send(RealtimeClientEventResponseCreate::default().into())
             .unwrap();
     }
 }

From d926b060cdf2cdf8680ea517fe2fdf3738dd4144 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 17:25:45 -0800
Subject: [PATCH 39/42] match realtime server event type names with spec

---
 .../src/types/realtime/server_event.rs        | 174 +++++++++---------
 1 file changed, 88 insertions(+), 86 deletions(-)

diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 4544e2bc..f2ea8a58 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -6,7 +6,7 @@ use super::{
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ErrorEvent {
+pub struct RealtimeServerEventError {
     /// The unique ID of the server event.
     pub event_id: String,
     /// Details of the error.
@@ -14,7 +14,7 @@ pub struct ErrorEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct SessionCreatedEvent {
+pub struct RealtimeServerEventSessionCreated {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The session resource.
@@ -22,7 +22,7 @@ pub struct SessionCreatedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct SessionUpdatedEvent {
+pub struct RealtimeServerEventSessionUpdated {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The updated session resource.
@@ -30,7 +30,7 @@ pub struct SessionUpdatedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemAddedEvent {
+pub struct RealtimeServerEventConversationItemAdded {
     /// The unique ID of the server event.
     pub event_id: String,
     /// A single item within a Realtime conversation.
@@ -40,7 +40,7 @@ pub struct ConversationItemAddedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemDoneEvent {
+pub struct RealtimeServerEventConversationItemDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// A single item within a Realtime conversation.
@@ -50,7 +50,7 @@ pub struct ConversationItemDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct InputAudioBufferCommitedEvent {
+pub struct RealtimeServerEventInputAudioBufferCommitted {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the preceding item after which the new item will be inserted. Can be null if the item has no predecessor.
@@ -60,13 +60,13 @@ pub struct InputAudioBufferCommitedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct InputAudioBufferClearedEvent {
+pub struct RealtimeServerEventInputAudioBufferCleared {
     /// The unique ID of the server event.
     pub event_id: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct InputAudioBufferSpeechStartedEvent {
+pub struct RealtimeServerEventInputAudioBufferSpeechStarted {
     /// The unique ID of the server event.
     pub event_id: String,
     /// Milliseconds from the start of all audio written to the buffer during the session when speech was
@@ -78,7 +78,7 @@ pub struct InputAudioBufferSpeechStartedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct InputAudioBufferSpeechStoppedEvent {
+pub struct RealtimeServerEventInputAudioBufferSpeechStopped {
     /// The unique ID of the server event.
     pub event_id: String,
     /// Milliseconds since the session started when speech stopped. This will correspond to the end of
@@ -89,7 +89,7 @@ pub struct InputAudioBufferSpeechStoppedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct InputAudioBufferTimeoutTriggeredEvent {
+pub struct RealtimeServerEventInputAudioBufferTimeoutTriggered {
     /// The unique ID of the server event.
     pub event_id: String,
     /// Millisecond offset of audio written to the input audio buffer at the time the timeout was triggered.
@@ -101,7 +101,7 @@ pub struct InputAudioBufferTimeoutTriggeredEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct OutputAudioBufferStartedEvent {
+pub struct RealtimeServerEventOutputAudioBufferStarted {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The unique ID of the response that produced the audio.
@@ -109,7 +109,7 @@ pub struct OutputAudioBufferStartedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct OutputAudioBufferStoppedEvent {
+pub struct RealtimeServerEventOutputAudioBufferStopped {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The unique ID of the response that produced the audio.
@@ -117,7 +117,7 @@ pub struct OutputAudioBufferStoppedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct OutputAudioBufferClearedEvent {
+pub struct RealtimeServerEventOutputAudioBufferCleared {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The unique ID of the response that produced the audio.
@@ -171,7 +171,7 @@ pub enum TranscriptionUsage {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
+pub struct RealtimeServerEventConversationItemInputAudioTranscriptionCompleted {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the item containing the audio that is being transcribed.
@@ -188,7 +188,7 @@ pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemInputAudioTranscriptionDeltaEvent {
+pub struct RealtimeServerEventConversationItemInputAudioTranscriptionDelta {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the item containing the audio that is being transcribed.
@@ -206,7 +206,7 @@ pub struct ConversationItemInputAudioTranscriptionDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemInputAudioTranscriptionFailedEvent {
+pub struct RealtimeServerEventConversationItemInputAudioTranscriptionFailed {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the user message item.
@@ -218,7 +218,7 @@ pub struct ConversationItemInputAudioTranscriptionFailedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemTruncatedEvent {
+pub struct RealtimeServerEventConversationItemTruncated {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the assistant message item that was truncated.
@@ -230,7 +230,7 @@ pub struct ConversationItemTruncatedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemDeletedEvent {
+pub struct RealtimeServerEventConversationItemDeleted {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the item that was deleted.
@@ -238,7 +238,7 @@ pub struct ConversationItemDeletedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemRetrievedEvent {
+pub struct RealtimeServerEventConversationItemRetrieved {
     /// The unique ID of the server event.
     pub event_id: String,
     /// A single item within a Realtime conversation.
@@ -246,7 +246,7 @@ pub struct ConversationItemRetrievedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ConversationItemInputAudioTranscriptionSegmentEvent {
+pub struct RealtimeServerEventConversationItemInputAudioTranscriptionSegment {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the item containing the input audio content.
@@ -266,7 +266,7 @@ pub struct ConversationItemInputAudioTranscriptionSegmentEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseCreatedEvent {
+pub struct RealtimeServerEventResponseCreated {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The response resource.
@@ -274,7 +274,7 @@ pub struct ResponseCreatedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseDoneEvent {
+pub struct RealtimeServerEventResponseDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The response resource.
@@ -282,7 +282,7 @@ pub struct ResponseDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputItemAddedEvent {
+pub struct RealtimeServerEventResponseOutputItemAdded {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the Response to which the item belongs.
@@ -294,7 +294,7 @@ pub struct ResponseOutputItemAddedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputItemDoneEvent {
+pub struct RealtimeServerEventResponseOutputItemDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response to which the item belongs.
@@ -323,7 +323,7 @@ pub enum ContentPart {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseContentPartAddedEvent {
+pub struct RealtimeServerEventResponseContentPartAdded {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -339,7 +339,7 @@ pub struct ResponseContentPartAddedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseContentPartDoneEvent {
+pub struct RealtimeServerEventResponseContentPartDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -355,7 +355,7 @@ pub struct ResponseContentPartDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputTextDeltaEvent {
+pub struct RealtimeServerEventResponseTextDelta {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -371,7 +371,7 @@ pub struct ResponseOutputTextDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputTextDoneEvent {
+pub struct RealtimeServerEventResponseTextDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -387,7 +387,7 @@ pub struct ResponseOutputTextDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputAudioTranscriptDeltaEvent {
+pub struct RealtimeServerEventResponseAudioTranscriptDelta {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -403,7 +403,7 @@ pub struct ResponseOutputAudioTranscriptDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputAudioTranscriptDoneEvent {
+pub struct RealtimeServerEventResponseAudioTranscriptDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -419,7 +419,7 @@ pub struct ResponseOutputAudioTranscriptDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputAudioDeltaEvent {
+pub struct RealtimeServerEventResponseAudioDelta {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -435,7 +435,7 @@ pub struct ResponseOutputAudioDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseOutputAudioDoneEvent {
+pub struct RealtimeServerEventResponseAudioDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -449,7 +449,7 @@ pub struct ResponseOutputAudioDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseFunctionCallArgumentsDeltaEvent {
+pub struct RealtimeServerEventResponseFunctionCallArgumentsDelta {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -465,7 +465,7 @@ pub struct ResponseFunctionCallArgumentsDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseFunctionCallArgumentsDoneEvent {
+pub struct RealtimeServerEventResponseFunctionCallArgumentsDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -500,14 +500,14 @@ pub struct RealtimeRateLimit {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct RateLimitsUpdatedEvent {
+pub struct RealtimeServerEventRateLimitsUpdated {
     /// The unique ID of the server event.
     pub event_id: String,
     pub rate_limits: Vec<RealtimeRateLimit>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct MCPListToolsInProgressEvent {
+pub struct RealtimeServerEventMCPListToolsInProgress {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the MCP list tools item.
@@ -515,7 +515,7 @@ pub struct MCPListToolsInProgressEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct MCPListToolsCompletedEvent {
+pub struct RealtimeServerEventMCPListToolsCompleted {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the MCP list tools item.
@@ -523,7 +523,7 @@ pub struct MCPListToolsCompletedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct MCPListToolsFailedEvent {
+pub struct RealtimeServerEventMCPListToolsFailed {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the MCP list tools item.
@@ -531,7 +531,7 @@ pub struct MCPListToolsFailedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseMCPCallArgumentsDeltaEvent {
+pub struct RealtimeServerEventResponseMCPCallArgumentsDelta {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -548,7 +548,7 @@ pub struct ResponseMCPCallArgumentsDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseMCPCallArgumentsDoneEvent {
+pub struct RealtimeServerEventResponseMCPCallArgumentsDone {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the response.
@@ -562,7 +562,7 @@ pub struct ResponseMCPCallArgumentsDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseMCPCallInProgressEvent {
+pub struct RealtimeServerEventResponseMCPCallInProgress {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The index of the output item in the response.
@@ -572,7 +572,7 @@ pub struct ResponseMCPCallInProgressEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseMCPCallCompletedEvent {
+pub struct RealtimeServerEventResponseMCPCallCompleted {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The index of the output item in the response.
@@ -582,7 +582,7 @@ pub struct ResponseMCPCallCompletedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ResponseMCPCallFailedEvent {
+pub struct RealtimeServerEventResponseMCPCallFailed {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The index of the output item in the response.
@@ -599,16 +599,16 @@ pub enum RealtimeServerEvent {
     /// Most errors are recoverable and the session will stay open, we recommend to
     /// implementors to monitor and log error messages by default.
     #[serde(rename = "error")]
-    Error(ErrorEvent),
+    Error(RealtimeServerEventError),
 
     /// Returned when a Session is created. Emitted automatically when a new connection is established as the first server event.
     /// This event will contain the default Session configuration.
     #[serde(rename = "session.created")]
-    SessionCreated(SessionCreatedEvent),
+    SessionCreated(RealtimeServerEventSessionCreated),
 
     /// Returned when a session is updated with a `session.update` event, unless there is an error.
     #[serde(rename = "session.updated")]
-    SessionUpdated(SessionUpdatedEvent),
+    SessionUpdated(RealtimeServerEventSessionUpdated),
 
     /// Sent by the server when an Item is added to the default Conversation. This can happen in several cases:
     /// - When the client sends a conversation.item.create event
@@ -619,21 +619,21 @@ pub enum RealtimeServerEvent {
     /// The event will include the full content of the Item (except when model is generating a Response) except for audio data,
     /// which can be retrieved separately with a `conversation.item.retrieve` event if necessary.
     #[serde(rename = "conversation.item.added")]
-    ConversationItemAdded(ConversationItemAddedEvent),
+    ConversationItemAdded(RealtimeServerEventConversationItemAdded),
 
     /// Returned when a conversation item is finalized.
     ///
     /// The event will include the full content of the Item except for audio data, which can be retrieved
     /// separately with a `conversation.item.retrieve` event if needed.
     #[serde(rename = "conversation.item.done")]
-    ConversationItemDone(ConversationItemDoneEvent),
+    ConversationItemDone(RealtimeServerEventConversationItemDone),
 
     /// Returned when a conversation item is retrieved with `conversation.item.retrieve`.
     /// This is provided as a way to fetch the server's representation of an item, for example to get access
     /// to the post-processed audio data after noise cancellation and VAD.
     /// It includes the full content of the Item, including audio data.
     #[serde(rename = "conversation.item.retrieved")]
-    ConversationItemRetrieved(ConversationItemRetrievedEvent),
+    ConversationItemRetrieved(RealtimeServerEventConversationItemRetrieved),
 
     /// This event is the output of audio transcription for user audio written to the user audio
     /// buffer. Transcription begins when the input audio buffer is committed by the client or
@@ -645,24 +645,26 @@ pub enum RealtimeServerEvent {
     /// may diverge somewhat from the model's interpretation, and should be treated as a rough guide.
     #[serde(rename = "conversation.item.input_audio_transcription.completed")]
     ConversationItemInputAudioTranscriptionCompleted(
-        ConversationItemInputAudioTranscriptionCompletedEvent,
+        RealtimeServerEventConversationItemInputAudioTranscriptionCompleted,
     ),
 
     /// Returned when the text value of an input audio transcription content part is updated with incremental transcription results.
     #[serde(rename = "conversation.item.input_audio_transcription.delta")]
-    ConversationItemInputAudioTranscriptionDelta(ConversationItemInputAudioTranscriptionDeltaEvent),
+    ConversationItemInputAudioTranscriptionDelta(
+        RealtimeServerEventConversationItemInputAudioTranscriptionDelta,
+    ),
 
     /// Returned when an input audio transcription segment is identified for an item.
     #[serde(rename = "conversation.item.input_audio_transcription.segment")]
     ConversationItemInputAudioTranscriptionSegment(
-        ConversationItemInputAudioTranscriptionSegmentEvent,
+        RealtimeServerEventConversationItemInputAudioTranscriptionSegment,
     ),
 
     /// Returned when input audio transcription is configured, and a transcription request for a user message failed.
     /// These events are separate from other `error` events so that the client can identify the related Item.
     #[serde(rename = "conversation.item.input_audio_transcription.failed")]
     ConversationItemInputAudioTranscriptionFailed(
-        ConversationItemInputAudioTranscriptionFailedEvent,
+        RealtimeServerEventConversationItemInputAudioTranscriptionFailed,
     ),
 
     /// Returned when an earlier assistant audio message item is truncated by the client with a `conversation.item.truncate` event.
@@ -671,22 +673,22 @@ pub enum RealtimeServerEvent {
     /// This action will truncate the audio and remove the server-side text transcript to ensure there is no text in the
     /// context that hasn't been heard by the user.
     #[serde(rename = "conversation.item.truncated")]
-    ConversationItemTruncated(ConversationItemTruncatedEvent),
+    ConversationItemTruncated(RealtimeServerEventConversationItemTruncated),
 
     /// Returned when an item in the conversation is deleted by the client with a `conversation.item.delete` event.
     /// This event is used to synchronize the server's understanding of the conversation history with the client's view.
     #[serde(rename = "conversation.item.deleted")]
-    ConversationItemDeleted(ConversationItemDeletedEvent),
+    ConversationItemDeleted(RealtimeServerEventConversationItemDeleted),
 
     /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode.
     /// The `item_id` property is the ID of the user message item that will be created,
     /// thus a `conversation.item.created` event will also be sent to the client.
     #[serde(rename = "input_audio_buffer.committed")]
-    InputAudioBufferCommited(InputAudioBufferCommitedEvent),
+    InputAudioBufferCommitted(RealtimeServerEventInputAudioBufferCommitted),
 
     /// Returned when the input audio buffer is cleared by the client with a `input_audio_buffer.clear` event.
     #[serde(rename = "input_audio_buffer.cleared")]
-    InputAudioBufferCleared(InputAudioBufferClearedEvent),
+    InputAudioBufferCleared(RealtimeServerEventInputAudioBufferCleared),
 
     /// Sent by the server when in `server_vad` mode to indicate that speech has been detected in the audio buffer.
     /// This can happen any time audio is added to the buffer (unless speech is already detected).
@@ -697,12 +699,12 @@ pub enum RealtimeServerEvent {
     /// also be included in the `input_audio_buffer.speech_stopped` event (unless the client manually commits the
     ///  audio buffer during VAD activation).
     #[serde(rename = "input_audio_buffer.speech_started")]
-    InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent),
+    InputAudioBufferSpeechStarted(RealtimeServerEventInputAudioBufferSpeechStarted),
 
     /// Returned in `server_vad` mode when the server detects the end of speech in the audio buffer.
     /// The server will also send a `conversation.item.created` event with the user message item that is created from the audio buffer.
     #[serde(rename = "input_audio_buffer.speech_stopped")]
-    InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent),
+    InputAudioBufferSpeechStopped(RealtimeServerEventInputAudioBufferSpeechStopped),
 
     /// Returned when the Server VAD timeout is triggered for the input audio buffer. This is
     /// configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and
@@ -718,31 +720,31 @@ pub enum RealtimeServerEvent {
     /// There may be speech that didn't trigger VAD but is still detected by the model, so the model may respond
     /// with something relevant to the conversation or a prompt to continue speaking.
     #[serde(rename = "input_audio_buffer.timeout_triggered")]
-    InputAudioBufferTimeoutTriggered(InputAudioBufferTimeoutTriggeredEvent),
+    InputAudioBufferTimeoutTriggered(RealtimeServerEventInputAudioBufferTimeoutTriggered),
 
     /// *WebRTC Only*: Emitted when the server begins streaming audio to the client. This
     /// event is emitted after an audio content part has been added (`response.content_part.added`) to the response.
     /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
     #[serde(rename = "output_audio_buffer.started")]
-    OutputAudioBufferStarted(OutputAudioBufferStartedEvent),
+    OutputAudioBufferStarted(RealtimeServerEventOutputAudioBufferStarted),
 
     /// *WebRTC Only*: Emitted when the output audio buffer has been completely drained on
     /// the server, and no more audio is forthcoming. This event is emitted after the full response data has been sent
     /// to the client (`response.done`). [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
     #[serde(rename = "output_audio_buffer.stopped")]
-    OutputAudioBufferStopped(OutputAudioBufferStoppedEvent),
+    OutputAudioBufferStopped(RealtimeServerEventOutputAudioBufferStopped),
 
     /// *WebRTC Only*: Emitted when the output audio buffer is cleared. This happens either in
     /// VAD mode when the user has interrupted (`input_audio_buffer.speech_started`), or when the client has
     /// emitted the `output_audio_buffer.clear` event to manually cut off the current audio response.
     /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
     #[serde(rename = "output_audio_buffer.cleared")]
-    OutputAudioBufferCleared(OutputAudioBufferClearedEvent),
+    OutputAudioBufferCleared(RealtimeServerEventOutputAudioBufferCleared),
 
     /// Returned when a new Response is created. The first event of response creation,
     /// where the response is in an initial state of `in_progress`.
     #[serde(rename = "response.created")]
-    ResponseCreated(ResponseCreatedEvent),
+    ResponseCreated(RealtimeServerEventResponseCreated),
 
     /// Returned when a Response is done streaming. Always emitted, no matter the final state.
     /// The Response object included in the `response.done` event will include all output Items in the Response
@@ -753,96 +755,96 @@ pub enum RealtimeServerEvent {
     ///
     /// A response will contain all output items that were generated during the response, excluding any audio content.
     #[serde(rename = "response.done")]
-    ResponseDone(ResponseDoneEvent),
+    ResponseDone(RealtimeServerEventResponseDone),
 
     /// Returned when a new Item is created during Response generation.
     #[serde(rename = "response.output_item.added")]
-    ResponseOutputItemAdded(ResponseOutputItemAddedEvent),
+    ResponseOutputItemAdded(RealtimeServerEventResponseOutputItemAdded),
 
     /// Returned when an Item is done streaming. Also emitted when a Response is interrupted, incomplete, or cancelled.
     #[serde(rename = "response.output_item.done")]
-    ResponseOutputItemDone(ResponseOutputItemDoneEvent),
+    ResponseOutputItemDone(RealtimeServerEventResponseOutputItemDone),
 
     /// Returned when a new content part is added to an assistant message item during response generation.
     #[serde(rename = "response.content_part.added")]
-    ResponseContentPartAdded(ResponseContentPartAddedEvent),
+    ResponseContentPartAdded(RealtimeServerEventResponseContentPartAdded),
 
     /// Returned when a content part is done streaming in an assistant message item.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
     #[serde(rename = "response.content_part.done")]
-    ResponseContentPartDone(ResponseContentPartDoneEvent),
+    ResponseContentPartDone(RealtimeServerEventResponseContentPartDone),
 
     /// Returned when the text value of an "output_text" content part is updated.
     #[serde(rename = "response.output_text.delta")]
-    ResponseOutputTextDelta(ResponseOutputTextDeltaEvent),
+    ResponseOutputTextDelta(RealtimeServerEventResponseTextDelta),
 
     /// Returned when the text value of an "output_text" content part is done streaming.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
     #[serde(rename = "response.output_text.done")]
-    ResponseOutputTextDone(ResponseOutputTextDoneEvent),
+    ResponseOutputTextDone(RealtimeServerEventResponseTextDone),
 
     /// Returned when the model-generated transcription of audio output is updated.
     #[serde(rename = "response.output_audio_transcript.delta")]
-    ResponseOutputAudioTranscriptDelta(ResponseOutputAudioTranscriptDeltaEvent),
+    ResponseOutputAudioTranscriptDelta(RealtimeServerEventResponseAudioTranscriptDelta),
 
     /// Returned when the model-generated transcription of audio output is done streaming.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
     #[serde(rename = "response.output_audio_transcript.done")]
-    ResponseOutputAudioTranscriptDone(ResponseOutputAudioTranscriptDoneEvent),
+    ResponseOutputAudioTranscriptDone(RealtimeServerEventResponseAudioTranscriptDone),
 
     /// Returned when the model-generated audio is updated.
     #[serde(rename = "response.output_audio.delta")]
-    ResponseOutputAudioDelta(ResponseOutputAudioDeltaEvent),
+    ResponseOutputAudioDelta(RealtimeServerEventResponseAudioDelta),
 
     /// Returned when the model-generated audio is done.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
     #[serde(rename = "response.output_audio.done")]
-    ResponseOutputAudioDone(ResponseOutputAudioDoneEvent),
+    ResponseOutputAudioDone(RealtimeServerEventResponseAudioDone),
 
     /// Returned when the model-generated function call arguments are updated.
     #[serde(rename = "response.function_call_arguments.delta")]
-    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
+    ResponseFunctionCallArgumentsDelta(RealtimeServerEventResponseFunctionCallArgumentsDelta),
 
     /// Returned when the model-generated function call arguments are done streaming.
     /// Also emitted when a Response is interrupted, incomplete, or cancelled.
     #[serde(rename = "response.function_call_arguments.done")]
-    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
+    ResponseFunctionCallArgumentsDone(RealtimeServerEventResponseFunctionCallArgumentsDone),
 
     /// Returned when MCP tool call arguments are updated.
     #[serde(rename = "response.mcp_call_arguments.delta")]
-    ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent),
+    ResponseMCPCallArgumentsDelta(RealtimeServerEventResponseMCPCallArgumentsDelta),
 
     /// Returned when MCP tool call arguments are finalized during response generation.
     #[serde(rename = "response.mcp_call_arguments.done")]
-    ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent),
+    ResponseMCPCallArgumentsDone(RealtimeServerEventResponseMCPCallArgumentsDone),
 
     /// Returned when an MCP tool call is in progress.
     #[serde(rename = "response.mcp_call.in_progress")]
-    ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent),
+    ResponseMCPCallInProgress(RealtimeServerEventResponseMCPCallInProgress),
 
     /// Returned when an MCP tool call has completed successfully.
     #[serde(rename = "response.mcp_call.completed")]
-    ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent),
+    ResponseMCPCallCompleted(RealtimeServerEventResponseMCPCallCompleted),
 
     /// Returned when an MCP tool call has failed.
     #[serde(rename = "response.mcp_call.failed")]
-    ResponseMCPCallFailed(ResponseMCPCallFailedEvent),
+    ResponseMCPCallFailed(RealtimeServerEventResponseMCPCallFailed),
 
     /// Returned when listing MCP tools is in progress for an item.
     #[serde(rename = "mcp_list_tools.in_progress")]
-    MCPListToolsInProgress(MCPListToolsInProgressEvent),
+    MCPListToolsInProgress(RealtimeServerEventMCPListToolsInProgress),
 
     /// Returned when listing MCP tools has completed for an item.
     #[serde(rename = "mcp_list_tools.completed")]
-    MCPListToolsCompleted(MCPListToolsCompletedEvent),
+    MCPListToolsCompleted(RealtimeServerEventMCPListToolsCompleted),
 
     /// Returned when listing MCP tools has failed for an item.
     #[serde(rename = "mcp_list_tools.failed")]
-    MCPListToolsFailed(MCPListToolsFailedEvent),
+    MCPListToolsFailed(RealtimeServerEventMCPListToolsFailed),
 
     /// Emitted at the beginning of a Response to indicate the updated rate limits.
     /// When a Response is created some tokens will be "reserved" for the output tokens, the rate limits
     /// shown here reflect that reservation, which is then adjusted accordingly once the Response is completed.
     #[serde(rename = "rate_limits.updated")]
-    RateLimitsUpdated(RateLimitsUpdatedEvent),
+    RateLimitsUpdated(RealtimeServerEventRateLimitsUpdated),
 }

From 1fbd6a253d133a81ed5f65c35ee3a2b5a69994a4 Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 17:36:23 -0800
Subject: [PATCH 40/42] match responses stream event names with spec

---
 .../src/types/responses/response_stream.rs    | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/async-openai/src/types/responses/response_stream.rs b/async-openai/src/types/responses/response_stream.rs
index c6478cae..58bcc82f 100644
--- a/async-openai/src/types/responses/response_stream.rs
+++ b/async-openai/src/types/responses/response_stream.rs
@@ -44,10 +44,10 @@ pub enum ResponseStreamEvent {
     ResponseContentPartDone(ResponseContentPartDoneEvent),
     /// Emitted when there is an additional text delta.
     #[serde(rename = "response.output_text.delta")]
-    ResponseOutputTextDelta(ResponseOutputTextDeltaEvent),
+    ResponseOutputTextDelta(ResponseTextDeltaEvent),
     /// Emitted when text content is finalized.
     #[serde(rename = "response.output_text.done")]
-    ResponseOutputTextDone(ResponseOutputTextDoneEvent),
+    ResponseOutputTextDone(ResponseTextDoneEvent),
     /// Emitted when there is a partial refusal text.
     #[serde(rename = "response.refusal.delta")]
     ResponseRefusalDelta(ResponseRefusalDeltaEvent),
@@ -98,16 +98,16 @@ pub enum ResponseStreamEvent {
     ResponseReasoningTextDone(ResponseReasoningTextDoneEvent),
     /// Emitted when an image generation tool call has completed and the final image is available.
     #[serde(rename = "response.image_generation_call.completed")]
-    ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompletedEvent),
+    ResponseImageGenerationCallCompleted(ResponseImageGenCallCompletedEvent),
     /// Emitted when an image generation tool call is actively generating an image (intermediate state).
     #[serde(rename = "response.image_generation_call.generating")]
-    ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGeneratingEvent),
+    ResponseImageGenerationCallGenerating(ResponseImageGenCallGeneratingEvent),
     /// Emitted when an image generation tool call is in progress.
     #[serde(rename = "response.image_generation_call.in_progress")]
-    ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgressEvent),
+    ResponseImageGenerationCallInProgress(ResponseImageGenCallInProgressEvent),
     /// Emitted when a partial image is available during image generation streaming.
     #[serde(rename = "response.image_generation_call.partial_image")]
-    ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImageEvent),
+    ResponseImageGenerationCallPartialImage(ResponseImageGenCallPartialImageEvent),
     /// Emitted when there is a delta (partial update) to the arguments of an MCP tool call.
     #[serde(rename = "response.mcp_call_arguments.delta")]
     ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent),
@@ -227,7 +227,7 @@ pub struct ResponseContentPartDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ResponseOutputTextDeltaEvent {
+pub struct ResponseTextDeltaEvent {
     pub sequence_number: u64,
     pub item_id: String,
     pub output_index: u32,
@@ -238,7 +238,7 @@ pub struct ResponseOutputTextDeltaEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ResponseOutputTextDoneEvent {
+pub struct ResponseTextDoneEvent {
     pub sequence_number: u64,
     pub item_id: String,
     pub output_index: u32,
@@ -385,28 +385,28 @@ pub struct ResponseReasoningTextDoneEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ResponseImageGenerationCallCompletedEvent {
+pub struct ResponseImageGenCallCompletedEvent {
     pub sequence_number: u64,
     pub output_index: u32,
     pub item_id: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ResponseImageGenerationCallGeneratingEvent {
+pub struct ResponseImageGenCallGeneratingEvent {
     pub sequence_number: u64,
     pub output_index: u32,
     pub item_id: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ResponseImageGenerationCallInProgressEvent {
+pub struct ResponseImageGenCallInProgressEvent {
     pub sequence_number: u64,
     pub output_index: u32,
     pub item_id: String,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ResponseImageGenerationCallPartialImageEvent {
+pub struct ResponseImageGenCallPartialImageEvent {
     pub sequence_number: u64,
     pub output_index: u32,
     pub item_id: String,

From 66bdbcdad3ccf76977d79f139d03dfa9b437756d Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 17:41:07 -0800
Subject: [PATCH 41/42] reusable type

---
 async-openai/src/types/mcp.rs                  | 14 ++++++++++++++
 .../src/types/realtime/conversation_item.rs    |  2 +-
 async-openai/src/types/responses/response.rs   | 18 +-----------------
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/async-openai/src/types/mcp.rs b/async-openai/src/types/mcp.rs
index 7b76c5fb..fae078a1 100644
--- a/async-openai/src/types/mcp.rs
+++ b/async-openai/src/types/mcp.rs
@@ -121,3 +121,17 @@ pub struct MCPToolApprovalFilter {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub never: Option<MCPToolFilter>,
 }
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct MCPListToolsTool {
+    /// The JSON schema describing the tool's input.
+    pub input_schema: serde_json::Value,
+    /// The name of the tool.
+    pub name: String,
+    /// Additional annotations about the tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub annotations: Option<serde_json::Value>,
+    /// The description of the tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+}
diff --git a/async-openai/src/types/realtime/conversation_item.rs b/async-openai/src/types/realtime/conversation_item.rs
index a1f28893..895e6d42 100644
--- a/async-openai/src/types/realtime/conversation_item.rs
+++ b/async-openai/src/types/realtime/conversation_item.rs
@@ -1,8 +1,8 @@
 use serde::{Deserialize, Serialize};
 
 use crate::types::{
+    mcp::MCPListToolsTool,
     realtime::{ErrorCodeMessage, ErrorMessage},
-    responses::MCPListToolsTool,
 };
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs
index 98cae0b3..43e3e84e 100644
--- a/async-openai/src/types/responses/response.rs
+++ b/async-openai/src/types/responses/response.rs
@@ -1,14 +1,12 @@
 use crate::error::OpenAIError;
-use crate::types::MCPTool;
 pub use crate::types::{
     CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort,
     ResponseFormatJsonSchema,
 };
+use crate::types::{MCPListToolsTool, MCPTool};
 use derive_builder::Builder;
-use futures::Stream;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
-use std::pin::Pin;
 
 /// Role of messages in the API.
 #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
@@ -2107,20 +2105,6 @@ pub struct MCPListTools {
     pub error: Option<String>,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct MCPListToolsTool {
-    /// The JSON schema describing the tool's input.
-    pub input_schema: serde_json::Value,
-    /// The name of the tool.
-    pub name: String,
-    /// Additional annotations about the tool.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub annotations: Option<serde_json::Value>,
-    /// The description of the tool.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-}
-
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct MCPApprovalRequest {
     /// JSON string of arguments for the tool.

From cc205a9dd2f249cbc4f842a18485c8db9bb776ab Mon Sep 17 00:00:00 2001
From: Himanshu Neema <himanshun.iitkgp@gmail.com>
Date: Tue, 4 Nov 2025 17:55:18 -0800
Subject: [PATCH 42/42] updated readme

---
 async-openai/README.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/async-openai/README.md b/async-openai/README.md
index 9b1fdcab..28271272 100644
--- a/async-openai/README.md
+++ b/async-openai/README.md
@@ -35,7 +35,7 @@
   - [x] Models
   - [x] Moderations
   - [x] Organizations | Administration (partially implemented)
-  - [x] Realtime (Beta) (partially implemented)
+  - [x] Realtime GA (partially implemented)
   - [x] Responses (partially implemented)
   - [x] Uploads
   - [x] Videos
@@ -65,7 +65,6 @@ $Env:OPENAI_API_KEY='sk-...'
 ## Realtime API
 
 Only types for Realtime API are implemented, and can be enabled with feature flag `realtime`.
-These types were written before OpenAI released official specs.
 
 ## Image Generation Example
 
@@ -179,8 +178,6 @@ To maintain quality of the project, a minimum of the following is a must for cod
 This project adheres to [Rust Code of Conduct](https://www.rust-lang.org/policies/code-of-conduct)
 
 ## Complimentary Crates
-
-- [openai-func-enums](https://github.com/frankfralick/openai-func-enums) provides procedural macros that make it easier to use this library with OpenAI API's tool calling feature. It also provides derive macros you can add to existing [clap](https://github.com/clap-rs/clap) application subcommands for natural language use of command line tools. It also supports openai's [parallel tool calls](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) and allows you to choose between running multiple tool calls concurrently or own their own OS threads.
 - [async-openai-wasm](https://github.com/ifsheldon/async-openai-wasm) provides WASM support.
 
 ## License