diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..6c75d73a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,16 @@ +# Enforce LF line endings for all text files +* text=auto eol=lf + +# Explicitly set binary files +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.pdf binary +*.mp3 binary +*.mp4 binary +*.zip binary +*.tar binary +*.gz binary + diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index 6446bfb3..232720d1 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -3,8 +3,6 @@ name: PR Checks on: pull_request: types: [opened, synchronize, reopened] - pull_request_target: - types: [opened, synchronize, reopened] jobs: build: diff --git a/async-openai/README.md b/async-openai/README.md index 1bcc79f7..471fc1b9 100644 --- a/async-openai/README.md +++ b/async-openai/README.md @@ -76,7 +76,7 @@ Support for webhook event types, signature verification, and building webhook ev ```rust use async_openai::{ - types::{CreateImageRequestArgs, ImageSize, ImageResponseFormat}, + types::images::{CreateImageRequestArgs, ImageResponseFormat, ImageSize}, Client, }; use std::error::Error; @@ -94,7 +94,7 @@ async fn main() -> Result<(), Box> { .user("async-openai") .build()?; - let response = client.images().create(request).await?; + let response = client.images().generate(request).await?; // Download and save images to ./data directory. // Each url is downloaded and saved in dedicated Tokio task. diff --git a/async-openai/src/image.rs b/async-openai/src/image.rs index fd7394a8..8516727a 100644 --- a/async-openai/src/image.rs +++ b/async-openai/src/image.rs @@ -1,8 +1,9 @@ use crate::{ config::Config, error::OpenAIError, - types::{ - CreateImageEditRequest, CreateImageRequest, CreateImageVariationRequest, ImagesResponse, + types::images::{ + CreateImageEditRequest, CreateImageRequest, CreateImageVariationRequest, ImageEditStream, + ImageGenStream, ImagesResponse, }, Client, }; @@ -21,24 +22,84 @@ impl<'c, C: Config> Images<'c, C> { /// Creates an image given a prompt. #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)] - pub async fn create(&self, request: CreateImageRequest) -> Result { + pub async fn generate( + &self, + request: CreateImageRequest, + ) -> Result { self.client.post("/images/generations", request).await } - /// Creates an edited or extended image given an original image and a prompt. + /// Creates an image given a prompt. + #[crate::byot( + T0 = serde::Serialize, + R = serde::de::DeserializeOwned, + stream = "true", + where_clause = "R: std::marker::Send + 'static" + )] + #[allow(unused_mut)] + pub async fn generate_stream( + &self, + mut request: CreateImageRequest, + ) -> Result { + #[cfg(not(feature = "byot"))] + { + if request.stream.is_some() && !request.stream.unwrap() { + return Err(OpenAIError::InvalidArgument( + "When stream is false, use Image::generate".into(), + )); + } + + request.stream = Some(true); + } + + Ok(self + .client + .post_stream("/images/generations", request) + .await) + } + + /// Creates an edited or extended image given one or more source images and a prompt. + /// This endpoint only supports gpt-image-1 and dall-e-2. #[crate::byot( T0 = Clone, R = serde::de::DeserializeOwned, where_clause = "reqwest::multipart::Form: crate::traits::AsyncTryFrom", )] - pub async fn create_edit( + pub async fn edit( &self, request: CreateImageEditRequest, ) -> Result { self.client.post_form("/images/edits", request).await } - /// Creates a variation of a given image. + /// Creates an edited or extended image given one or more source images and a prompt. + /// This endpoint only supports gpt-image-1 and dall-e-2. + #[crate::byot( + T0 = Clone, + R = serde::de::DeserializeOwned, + stream = "true", + where_clause = "R: std::marker::Send + 'static, reqwest::multipart::Form: crate::traits::AsyncTryFrom" + )] + #[allow(unused_mut)] + pub async fn edit_stream( + &self, + mut request: CreateImageEditRequest, + ) -> Result { + #[cfg(not(feature = "byot"))] + { + if let Some(stream) = request.stream { + if !stream { + return Err(OpenAIError::InvalidArgument( + "When stream is false, use Image::edit".into(), + )); + } + } + request.stream = Some(true); + } + self.client.post_form_stream("/images/edits", request).await + } + + /// Creates a variation of a given image. This endpoint only supports dall-e-2. #[crate::byot( T0 = Clone, R = serde::de::DeserializeOwned, diff --git a/async-openai/src/types/audio/audio_types.rs b/async-openai/src/types/audio/audio_types.rs index e7e13681..e0ab2dcd 100644 --- a/async-openai/src/types/audio/audio_types.rs +++ b/async-openai/src/types/audio/audio_types.rs @@ -1,480 +1,481 @@ -use bytes::Bytes; -use derive_builder::Builder; -use serde::{Deserialize, Serialize}; - -use crate::error::OpenAIError; -use crate::types::common::InputSource; -use crate::types::LogProbProperties; - -// openapi spec type: VoiceIdsShared -#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum Voice { - #[default] - Alloy, - Ash, - Ballad, - Coral, - Echo, - Sage, - Shimmer, - Verse, - Marin, - Cedar, -} - -#[derive(Debug, Default, Clone, PartialEq)] -pub struct AudioInput { - pub source: InputSource, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum AudioResponseFormat { - #[default] - Json, - Text, - Srt, - VerboseJson, - Vtt, - DiarizedJson, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum TranslationResponseFormat { - #[default] - Json, - Text, - Srt, - VerboseJson, - Vtt, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum SpeechResponseFormat { - #[default] - Mp3, - Opus, - Aac, - Flac, - Pcm, - Wav, -} - -#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] -pub enum SpeechModel { - #[default] - #[serde(rename = "tts-1")] - Tts1, - #[serde(rename = "tts-1-hd")] - Tts1Hd, - #[serde(rename = "gpt-4o-mini-tts")] - Gpt4oMiniTts, - #[serde(untagged)] - Other(String), -} - -#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum TimestampGranularity { - Word, - #[default] - Segment, -} - -#[derive(Clone, Default, Debug, Builder, PartialEq)] -#[builder(name = "CreateTranscriptionRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateTranscriptionRequest { - /// The audio file object (not file name) to transcribe, in one of these formats: - /// flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. - pub file: AudioInput, - - /// ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `whisper-1` - /// (which is powered by our open source Whisper V2 model), and `gpt-4o-transcribe-diarize`. - pub model: String, - - /// The language of the input audio. Supplying the input language in - /// [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format will improve - /// accuracy and latency. - pub language: Option, - - /// An optional text to guide the model's style or continue a previous audio segment. The - /// [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should match the audio - /// language. This field is not supported when using `gpt-4o-transcribe-diarize`. - pub prompt: Option, - - /// The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`, `vtt`, or - /// `diarized_json`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported format is - /// `json`. For `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and - /// `diarized_json`, with `diarized_json` required to receive speaker annotations. - pub response_format: Option, - - /// TThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more - /// random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the - /// model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically - /// increase the temperature until certain thresholds are hit. - pub temperature: Option, // default: 0 - - /// Additional information to include in the transcription response. - - /// `logprobs` will return the log probabilities of the tokens in the - /// response to understand the model's confidence in the transcription. - /// `logprobs` only works with response_format set to `json` and only with - /// the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`. This field is not supported when - /// using `gpt-4o-transcribe-diarize`. - pub include: Option>, - - /// The timestamp granularities to populate for this transcription. `response_format` must be set - /// `verbose_json` to use timestamp granularities. Either or both of these options are supported: - /// `word`, or `segment`. Note: There is no additional latency for segment timestamps, but generating - /// word timestamps incurs additional latency. This option is not available for `gpt-4o-transcribe-diarize`. - pub timestamp_granularities: Option>, - - /// If set to true, the model response data will be streamed to the client - /// as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). - /// See the [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) - /// for more information. - /// Note: Streaming is not supported for the `whisper-1` model and will be ignored. - pub stream: Option, - - /// Controls how the audio is cut into chunks. When set to `"auto"`, the server first normalizes - /// loudness and then uses voice activity detection (VAD) to choose boundaries. `server_vad` object - /// can be provided to tweak VAD detection parameters manually. If unset, the audio is transcribed as - /// a single block. Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30 - /// seconds. - pub chunking_strategy: Option, - - /// Optional list of speaker names that correspond to the audio samples provided in - /// `known_speaker_references[]`. Each entry should be a short identifier (for example `customer` or - /// `agent`). Up to 4 speakers are supported. - pub known_speaker_names: Option>, - - /// Optional list of audio samples (as [data - /// URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs)) that contain - /// known speaker references matching `known_speaker_names[]`. Each sample must be between 2 and 10 - /// seconds, and can use any of the same input audio formats supported by `file`. - pub known_speaker_references: Option>, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -#[serde(rename_all = "snake_case")] -pub enum TranscriptionChunkingStrategy { - #[default] - Auto, - #[serde(untagged)] - ServerVad(VadConfig), -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -#[serde(rename_all = "snake_case")] -pub enum VadConfigType { - #[default] - ServerVad, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -pub struct VadConfig { - pub r#type: VadConfigType, - /// Amount of audio to include before the VAD detected speech (in milliseconds). Default: 300. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix_padding_ms: Option, - - /// Duration of silence to detect speech stop (in milliseconds). - /// With shorter values the model will respond more quickly, - /// but may jump in on short pauses from the user. Default: 200. - #[serde(skip_serializing_if = "Option::is_none")] - pub silence_duration_ms: Option, - - /// Sensitivity threshold (0.0 to 1.0) for voice activity detection. A - /// higher threshold will require louder audio to activate the model, and - /// thus might perform better in noisy environments. Default: 0.5. - #[serde(skip_serializing_if = "Option::is_none")] - pub threshold: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum TranscriptionInclude { - Logprobs, -} - -/// Represents a transcription response returned by model, based on the provided -/// input. -#[derive(Debug, Deserialize, Clone, Serialize)] -pub struct CreateTranscriptionResponseJson { - /// The transcribed text. - pub text: String, - - /// The log probabilities of the tokens in the transcription. Only returned with the models - /// `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added to the `include` array. - pub logprobs: Option>, - - /// Token usage statistics for the request. - pub usage: TranscriptionUsage, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TokenUsageInputTokenDetails { - /// Number of audio tokens billed for this request. - pub audio_tokens: u32, - /// Number of text tokens billed for this request. - pub text_tokens: u32, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TranscriptTextUsageTokens { - /// Number of input tokens billed for this request. - pub input_tokens: u32, - /// Number of output tokens generated. - pub output_tokens: u32, - /// Total number of tokens used (input + output). - pub total_tokens: u32, - /// Details about the input tokens billed for this request. - pub input_token_details: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TranscriptTextUsageDuration { - ///Duration of the input audio in seconds. - pub seconds: f32, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(tag = "type")] -pub enum TranscriptionUsage { - #[serde(rename = "tokens")] - Tokens(TranscriptTextUsageTokens), - #[serde(rename = "duration")] - Duration(TranscriptTextUsageDuration), -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "lowercase")] -pub enum CreateTranscriptionResponseDiarizedJsonTask { - Transcribe, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct CreateTranscriptionResponseDiarizedJson { - /// The type of task that was run. Always `transcribe`. - pub task: Option, - - /// Duration of the input audio in seconds. - pub duration: Option, - - /// The concatenated transcript text for the entire audio input. - pub text: String, - - /// Segments of the transcript annotated with timestamps and speaker labels. - pub segments: Vec, - - /// Token or duration usage statistics for the request. - pub usage: TranscriptionUsage, -} - -/// Represents a verbose json transcription response returned by model, based on -/// the provided input. -#[derive(Debug, Deserialize, Clone, Serialize)] -pub struct CreateTranscriptionResponseVerboseJson { - /// The language of the input audio. - pub language: String, - - /// The duration of the input audio. - pub duration: f32, - - /// The transcribed text. - pub text: String, - - /// Extracted words and their corresponding timestamps. - #[serde(skip_serializing_if = "Option::is_none")] - pub words: Option>, - - /// Segments of the transcribed text and their corresponding details. - #[serde(skip_serializing_if = "Option::is_none")] - pub segments: Option>, - - /// Usage statistics for models billed by audio input duration. - pub usage: TranscriptTextUsageDuration, -} - -#[derive(Debug, Deserialize, Clone, Serialize)] -pub struct TranscriptionWord { - /// The text content of the word. - pub word: String, - - /// Start time of the word in seconds. - pub start: f32, - - /// End time of the word in seconds. - pub end: f32, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub enum TranscriptionDiarizedSegmentType { - #[serde(rename = "transcript.text.segment")] - TranscriptTextSegment, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TranscriptionDiarizedSegment { - /// The type of the segment. Always `transcript.text.segment`. - pub r#type: TranscriptionDiarizedSegmentType, - - /// Unique identifier for the segment. - pub id: String, - - /// Start timestamp of the segment in seconds. - pub start: f32, - - /// End timestamp of the segment in seconds. - pub end: f32, - - /// Transcript text for this segment. - pub text: String, - - /// Speaker label for this segment. - /// When known speakers are provided, the label matches known_speaker_names[]. - /// Otherwise speakers are labeled sequentially using capital letters (`A`, `B`, ...). - pub speaker: String, -} - -#[derive(Debug, Deserialize, Clone, Serialize)] -pub struct TranscriptionSegment { - /// Unique identifier of the segment. - pub id: i32, - - // Seek offset of the segment. - pub seek: i32, - - /// Start time of the segment in seconds. - pub start: f32, - - /// End time of the segment in seconds. - pub end: f32, - - /// Text content of the segment. - pub text: String, - - /// Array of token IDs for the text content. - pub tokens: Vec, - - /// Temperature parameter used for generating the segment. - pub temperature: f32, - - /// Average logprob of the segment. If the value is lower than -1, consider - /// the logprobs failed. - pub avg_logprob: f32, - - /// Compression ratio of the segment. If the value is greater than 2.4, - /// consider the compression failed. - pub compression_ratio: f32, - - /// Probability of no speech in the segment. If the value is higher than 1.0 - /// and the `avg_logprob` is below -1, consider this segment silent. - pub no_speech_prob: f32, -} - -#[derive(Clone, Default, Debug, Builder, PartialEq, Serialize, Deserialize)] -#[builder(name = "CreateSpeechRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateSpeechRequest { - /// The text to generate audio for. The maximum length is 4096 characters. - pub input: String, - - /// One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1`, - /// `tts-1-hd` or `gpt-4o-mini-tts`. - pub model: SpeechModel, - - /// The voice to use when generating the audio. Supported voices are `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer` and `verse`. - - /// The voice to use when generating the audio. Supported voices are `alloy`, `ash`, `ballad`, - /// `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`. Previews of the voices - /// are available in the [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options). - pub voice: Voice, - - /// Control the voice of your generated audio with additional instructions. - /// Does not work with `tts-1` or `tts-1-hd`. - #[serde(skip_serializing_if = "Option::is_none")] - pub instructions: Option, - - /// The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`. - #[serde(skip_serializing_if = "Option::is_none")] - pub response_format: Option, - - /// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. - #[serde(skip_serializing_if = "Option::is_none")] - pub speed: Option, // default: 1.0 - - /// The format to stream the audio in. Supported formats are `sse` and `audio`. `sse` is not - /// supported for `tts-1` or `tts-1-hd`. - #[serde(skip_serializing_if = "Option::is_none")] - pub stream_format: Option, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum StreamFormat { - #[default] - #[serde(rename = "sse")] - SSE, - #[serde(rename = "audio")] - Audio, -} - -#[derive(Clone, Default, Debug, Builder, PartialEq)] -#[builder(name = "CreateTranslationRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateTranslationRequest { - /// The audio file object (not file name) translate, in one of these - /// formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. - pub file: AudioInput, - - /// ID of the model to use. Only `whisper-1` (which is powered by our open - /// source Whisper V2 model) is currently available. - pub model: String, - - /// An optional text to guide the model's style or continue a previous audio - /// segment. The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should be in English. - pub prompt: Option, - - /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt. - pub response_format: Option, - - /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit. - pub temperature: Option, // default: 0 -} - -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] -pub struct CreateTranslationResponseJson { - pub text: String, -} - -#[derive(Debug, Deserialize, Clone, Serialize)] -pub struct CreateTranslationResponseVerboseJson { - /// The language of the output translation (always `english`). - pub language: String, - /// The duration of the input audio. - pub duration: String, - /// The translated text. - pub text: String, - /// Segments of the translated text and their corresponding details. - #[serde(skip_serializing_if = "Option::is_none")] - pub segments: Option>, -} - -#[derive(Debug, Clone)] -pub struct CreateSpeechResponse { - pub bytes: Bytes, -} +use bytes::Bytes; +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; +use crate::types::common::InputSource; +use crate::types::LogProbProperties; + +// openapi spec type: VoiceIdsShared +#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum Voice { + #[default] + Alloy, + Ash, + Ballad, + Coral, + Echo, + Fable, + Onyx, + Nova, + Sage, + Shimmer, + Verse, +} + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct AudioInput { + pub source: InputSource, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum AudioResponseFormat { + #[default] + Json, + Text, + Srt, + VerboseJson, + Vtt, + DiarizedJson, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum TranslationResponseFormat { + #[default] + Json, + Text, + Srt, + VerboseJson, + Vtt, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum SpeechResponseFormat { + #[default] + Mp3, + Opus, + Aac, + Flac, + Pcm, + Wav, +} + +#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] +pub enum SpeechModel { + #[default] + #[serde(rename = "tts-1")] + Tts1, + #[serde(rename = "tts-1-hd")] + Tts1Hd, + #[serde(rename = "gpt-4o-mini-tts")] + Gpt4oMiniTts, + #[serde(untagged)] + Other(String), +} + +#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum TimestampGranularity { + Word, + #[default] + Segment, +} + +#[derive(Clone, Default, Debug, Builder, PartialEq)] +#[builder(name = "CreateTranscriptionRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateTranscriptionRequest { + /// The audio file object (not file name) to transcribe, in one of these formats: + /// flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + pub file: AudioInput, + + /// ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `whisper-1` + /// (which is powered by our open source Whisper V2 model), and `gpt-4o-transcribe-diarize`. + pub model: String, + + /// The language of the input audio. Supplying the input language in + /// [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format will improve + /// accuracy and latency. + pub language: Option, + + /// An optional text to guide the model's style or continue a previous audio segment. The + /// [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should match the audio + /// language. This field is not supported when using `gpt-4o-transcribe-diarize`. + pub prompt: Option, + + /// The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`, `vtt`, or + /// `diarized_json`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported format is + /// `json`. For `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and + /// `diarized_json`, with `diarized_json` required to receive speaker annotations. + pub response_format: Option, + + /// TThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + /// random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the + /// model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically + /// increase the temperature until certain thresholds are hit. + pub temperature: Option, // default: 0 + + /// Additional information to include in the transcription response. + + /// `logprobs` will return the log probabilities of the tokens in the + /// response to understand the model's confidence in the transcription. + /// `logprobs` only works with response_format set to `json` and only with + /// the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`. This field is not supported when + /// using `gpt-4o-transcribe-diarize`. + pub include: Option>, + + /// The timestamp granularities to populate for this transcription. `response_format` must be set + /// `verbose_json` to use timestamp granularities. Either or both of these options are supported: + /// `word`, or `segment`. Note: There is no additional latency for segment timestamps, but generating + /// word timestamps incurs additional latency. This option is not available for `gpt-4o-transcribe-diarize`. + pub timestamp_granularities: Option>, + + /// If set to true, the model response data will be streamed to the client + /// as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + /// See the [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) + /// for more information. + /// Note: Streaming is not supported for the `whisper-1` model and will be ignored. + pub stream: Option, + + /// Controls how the audio is cut into chunks. When set to `"auto"`, the server first normalizes + /// loudness and then uses voice activity detection (VAD) to choose boundaries. `server_vad` object + /// can be provided to tweak VAD detection parameters manually. If unset, the audio is transcribed as + /// a single block. Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30 + /// seconds. + pub chunking_strategy: Option, + + /// Optional list of speaker names that correspond to the audio samples provided in + /// `known_speaker_references[]`. Each entry should be a short identifier (for example `customer` or + /// `agent`). Up to 4 speakers are supported. + pub known_speaker_names: Option>, + + /// Optional list of audio samples (as [data + /// URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs)) that contain + /// known speaker references matching `known_speaker_names[]`. Each sample must be between 2 and 10 + /// seconds, and can use any of the same input audio formats supported by `file`. + pub known_speaker_references: Option>, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "snake_case")] +pub enum TranscriptionChunkingStrategy { + #[default] + Auto, + #[serde(untagged)] + ServerVad(VadConfig), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(rename_all = "snake_case")] +pub enum VadConfigType { + #[default] + ServerVad, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +pub struct VadConfig { + pub r#type: VadConfigType, + /// Amount of audio to include before the VAD detected speech (in milliseconds). Default: 300. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix_padding_ms: Option, + + /// Duration of silence to detect speech stop (in milliseconds). + /// With shorter values the model will respond more quickly, + /// but may jump in on short pauses from the user. Default: 200. + #[serde(skip_serializing_if = "Option::is_none")] + pub silence_duration_ms: Option, + + /// Sensitivity threshold (0.0 to 1.0) for voice activity detection. A + /// higher threshold will require louder audio to activate the model, and + /// thus might perform better in noisy environments. Default: 0.5. + #[serde(skip_serializing_if = "Option::is_none")] + pub threshold: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum TranscriptionInclude { + Logprobs, +} + +/// Represents a transcription response returned by model, based on the provided +/// input. +#[derive(Debug, Deserialize, Clone, Serialize)] +pub struct CreateTranscriptionResponseJson { + /// The transcribed text. + pub text: String, + + /// The log probabilities of the tokens in the transcription. Only returned with the models + /// `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added to the `include` array. + pub logprobs: Option>, + + /// Token usage statistics for the request. + pub usage: TranscriptionUsage, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TokenUsageInputTokenDetails { + /// Number of audio tokens billed for this request. + pub audio_tokens: u32, + /// Number of text tokens billed for this request. + pub text_tokens: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptTextUsageTokens { + /// Number of input tokens billed for this request. + pub input_tokens: u32, + /// Number of output tokens generated. + pub output_tokens: u32, + /// Total number of tokens used (input + output). + pub total_tokens: u32, + /// Details about the input tokens billed for this request. + pub input_token_details: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptTextUsageDuration { + ///Duration of the input audio in seconds. + pub seconds: f32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(tag = "type")] +pub enum TranscriptionUsage { + #[serde(rename = "tokens")] + Tokens(TranscriptTextUsageTokens), + #[serde(rename = "duration")] + Duration(TranscriptTextUsageDuration), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "lowercase")] +pub enum CreateTranscriptionResponseDiarizedJsonTask { + Transcribe, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct CreateTranscriptionResponseDiarizedJson { + /// The type of task that was run. Always `transcribe`. + pub task: Option, + + /// Duration of the input audio in seconds. + pub duration: Option, + + /// The concatenated transcript text for the entire audio input. + pub text: String, + + /// Segments of the transcript annotated with timestamps and speaker labels. + pub segments: Vec, + + /// Token or duration usage statistics for the request. + pub usage: TranscriptionUsage, +} + +/// Represents a verbose json transcription response returned by model, based on +/// the provided input. +#[derive(Debug, Deserialize, Clone, Serialize)] +pub struct CreateTranscriptionResponseVerboseJson { + /// The language of the input audio. + pub language: String, + + /// The duration of the input audio. + pub duration: f32, + + /// The transcribed text. + pub text: String, + + /// Extracted words and their corresponding timestamps. + #[serde(skip_serializing_if = "Option::is_none")] + pub words: Option>, + + /// Segments of the transcribed text and their corresponding details. + #[serde(skip_serializing_if = "Option::is_none")] + pub segments: Option>, + + /// Usage statistics for models billed by audio input duration. + pub usage: TranscriptTextUsageDuration, +} + +#[derive(Debug, Deserialize, Clone, Serialize)] +pub struct TranscriptionWord { + /// The text content of the word. + pub word: String, + + /// Start time of the word in seconds. + pub start: f32, + + /// End time of the word in seconds. + pub end: f32, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub enum TranscriptionDiarizedSegmentType { + #[serde(rename = "transcript.text.segment")] + TranscriptTextSegment, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct TranscriptionDiarizedSegment { + /// The type of the segment. Always `transcript.text.segment`. + pub r#type: TranscriptionDiarizedSegmentType, + + /// Unique identifier for the segment. + pub id: String, + + /// Start timestamp of the segment in seconds. + pub start: f32, + + /// End timestamp of the segment in seconds. + pub end: f32, + + /// Transcript text for this segment. + pub text: String, + + /// Speaker label for this segment. + /// When known speakers are provided, the label matches known_speaker_names[]. + /// Otherwise speakers are labeled sequentially using capital letters (`A`, `B`, ...). + pub speaker: String, +} + +#[derive(Debug, Deserialize, Clone, Serialize)] +pub struct TranscriptionSegment { + /// Unique identifier of the segment. + pub id: i32, + + // Seek offset of the segment. + pub seek: i32, + + /// Start time of the segment in seconds. + pub start: f32, + + /// End time of the segment in seconds. + pub end: f32, + + /// Text content of the segment. + pub text: String, + + /// Array of token IDs for the text content. + pub tokens: Vec, + + /// Temperature parameter used for generating the segment. + pub temperature: f32, + + /// Average logprob of the segment. If the value is lower than -1, consider + /// the logprobs failed. + pub avg_logprob: f32, + + /// Compression ratio of the segment. If the value is greater than 2.4, + /// consider the compression failed. + pub compression_ratio: f32, + + /// Probability of no speech in the segment. If the value is higher than 1.0 + /// and the `avg_logprob` is below -1, consider this segment silent. + pub no_speech_prob: f32, +} + +#[derive(Clone, Default, Debug, Builder, PartialEq, Serialize, Deserialize)] +#[builder(name = "CreateSpeechRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateSpeechRequest { + /// The text to generate audio for. The maximum length is 4096 characters. + pub input: String, + + /// One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1`, + /// `tts-1-hd` or `gpt-4o-mini-tts`. + pub model: SpeechModel, + + /// The voice to use when generating the audio. Supported voices are `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer` and `verse`. + + /// The voice to use when generating the audio. Supported voices are `alloy`, `ash`, `ballad`, + /// `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`. Previews of the voices + /// are available in the [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options). + pub voice: Voice, + + /// Control the voice of your generated audio with additional instructions. + /// Does not work with `tts-1` or `tts-1-hd`. + #[serde(skip_serializing_if = "Option::is_none")] + pub instructions: Option, + + /// The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`. + #[serde(skip_serializing_if = "Option::is_none")] + pub response_format: Option, + + /// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. + #[serde(skip_serializing_if = "Option::is_none")] + pub speed: Option, // default: 1.0 + + /// The format to stream the audio in. Supported formats are `sse` and `audio`. `sse` is not + /// supported for `tts-1` or `tts-1-hd`. + #[serde(skip_serializing_if = "Option::is_none")] + pub stream_format: Option, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum StreamFormat { + #[default] + #[serde(rename = "sse")] + SSE, + #[serde(rename = "audio")] + Audio, +} + +#[derive(Clone, Default, Debug, Builder, PartialEq)] +#[builder(name = "CreateTranslationRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateTranslationRequest { + /// The audio file object (not file name) translate, in one of these + /// formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. + pub file: AudioInput, + + /// ID of the model to use. Only `whisper-1` (which is powered by our open + /// source Whisper V2 model) is currently available. + pub model: String, + + /// An optional text to guide the model's style or continue a previous audio + /// segment. The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should be in English. + pub prompt: Option, + + /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt. + pub response_format: Option, + + /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit. + pub temperature: Option, // default: 0 +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +pub struct CreateTranslationResponseJson { + pub text: String, +} + +#[derive(Debug, Deserialize, Clone, Serialize)] +pub struct CreateTranslationResponseVerboseJson { + /// The language of the output translation (always `english`). + pub language: String, + /// The duration of the input audio. + pub duration: String, + /// The translated text. + pub text: String, + /// Segments of the translated text and their corresponding details. + #[serde(skip_serializing_if = "Option::is_none")] + pub segments: Option>, +} + +#[derive(Debug, Clone)] +pub struct CreateSpeechResponse { + pub bytes: Bytes, +} diff --git a/async-openai/src/types/common.rs b/async-openai/src/types/common.rs index 250e0443..a2050567 100644 --- a/async-openai/src/types/common.rs +++ b/async-openai/src/types/common.rs @@ -1,34 +1,34 @@ -use std::{collections::HashMap, path::PathBuf}; - -use bytes::Bytes; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, PartialEq)] -pub enum InputSource { - Path { path: PathBuf }, - Bytes { filename: String, bytes: Bytes }, - VecU8 { filename: String, vec: Vec }, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum OrganizationRole { - Owner, - Reader, -} - -/// Set of 16 key-value pairs that can be attached to an object. -/// This can be useful for storing additional information about the -/// object in a structured format, and querying for objects via API -/// or the dashboard. Keys are strings with a maximum length of 64 -/// characters. Values are strings with a maximum length of 512 -/// characters. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -#[serde(transparent)] -pub struct Metadata(HashMap); - -impl From> for Metadata { - fn from(value: HashMap) -> Self { - Self(value) - } -} +use std::{collections::HashMap, path::PathBuf}; + +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq)] +pub enum InputSource { + Path { path: PathBuf }, + Bytes { filename: String, bytes: Bytes }, + VecU8 { filename: String, vec: Vec }, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum OrganizationRole { + Owner, + Reader, +} + +/// Set of 16 key-value pairs that can be attached to an object. +/// This can be useful for storing additional information about the +/// object in a structured format, and querying for objects via API +/// or the dashboard. Keys are strings with a maximum length of 64 +/// characters. Values are strings with a maximum length of 512 +/// characters. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(transparent)] +pub struct Metadata(HashMap); + +impl From> for Metadata { + fn from(value: HashMap) -> Self { + Self(value) + } +} diff --git a/async-openai/src/types/completion.rs b/async-openai/src/types/completion.rs index 3c15dd6b..7e1e8fec 100644 --- a/async-openai/src/types/completion.rs +++ b/async-openai/src/types/completion.rs @@ -1,141 +1,141 @@ -use std::{collections::HashMap, pin::Pin}; - -use derive_builder::Builder; -use futures::Stream; -use serde::{Deserialize, Serialize}; - -use crate::error::OpenAIError; - -use super::{ChatCompletionStreamOptions, Choice, CompletionUsage, Prompt, Stop}; - -#[derive(Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)] -#[builder(name = "CreateCompletionRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateCompletionRequest { - /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them. - pub model: String, - - /// The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. - /// - /// Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. - pub prompt: Prompt, - - /// The suffix that comes after a completion of inserted text. - /// - /// This parameter is only supported for `gpt-3.5-turbo-instruct`. - #[serde(skip_serializing_if = "Option::is_none")] - pub suffix: Option, // default: null - - /// The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the completion. - /// - /// The token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. - #[serde(skip_serializing_if = "Option::is_none")] - pub max_tokens: Option, - - /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. - /// - /// We generally recommend altering this or `top_p` but not both. - #[serde(skip_serializing_if = "Option::is_none")] - pub temperature: Option, // min: 0, max: 2, default: 1, - - /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. - /// - /// We generally recommend altering this or `temperature` but not both. - #[serde(skip_serializing_if = "Option::is_none")] - pub top_p: Option, // min: 0, max: 1, default: 1 - - /// How many completions to generate for each prompt. - - /// **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. - /// - #[serde(skip_serializing_if = "Option::is_none")] - pub n: Option, // min:1 max: 128, default: 1 - - /// Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) - /// as they become available, with the stream terminated by a `data: [DONE]` message. - #[serde(skip_serializing_if = "Option::is_none")] - pub stream: Option, // nullable: true - - #[serde(skip_serializing_if = "Option::is_none")] - pub stream_options: Option, - - /// Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response. - /// - /// The maximum value for `logprobs` is 5. - #[serde(skip_serializing_if = "Option::is_none")] - pub logprobs: Option, // min:0 , max: 5, default: null, nullable: true - - /// Echo back the prompt in addition to the completion - #[serde(skip_serializing_if = "Option::is_none")] - pub echo: Option, - - /// Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. - #[serde(skip_serializing_if = "Option::is_none")] - pub stop: Option, - - /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - /// - /// [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details) - #[serde(skip_serializing_if = "Option::is_none")] - pub presence_penalty: Option, // min: -2.0, max: 2.0, default 0 - - /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - /// - /// [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details) - #[serde(skip_serializing_if = "Option::is_none")] - pub frequency_penalty: Option, // min: -2.0, max: 2.0, default: 0 - - /// Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed. - /// - /// When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`. - /// - /// **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. - #[serde(skip_serializing_if = "Option::is_none")] - pub best_of: Option, //min: 0, max: 20, default: 1 - - /// Modify the likelihood of specified tokens appearing in the completion. - /// - /// Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. - /// - /// As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated. - #[serde(skip_serializing_if = "Option::is_none")] - pub logit_bias: Option>, // default: null - - /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids). - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, - - /// If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. - /// - /// Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. - #[serde(skip_serializing_if = "Option::is_none")] - pub seed: Option, -} - -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] -pub struct CreateCompletionResponse { - /// A unique identifier for the completion. - pub id: String, - pub choices: Vec, - /// The Unix timestamp (in seconds) of when the completion was created. - pub created: u32, - - /// The model used for completion. - pub model: String, - /// This fingerprint represents the backend configuration that the model runs with. - /// - /// Can be used in conjunction with the `seed` request parameter to understand when backend changes have been - /// made that might impact determinism. - pub system_fingerprint: Option, - - /// The object type, which is always "text_completion" - pub object: String, - pub usage: Option, -} - -/// Parsed server side events stream until an \[DONE\] is received from server. -pub type CompletionResponseStream = - Pin> + Send>>; +use std::{collections::HashMap, pin::Pin}; + +use derive_builder::Builder; +use futures::Stream; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; + +use super::{ChatCompletionStreamOptions, Choice, CompletionUsage, Prompt, Stop}; + +#[derive(Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)] +#[builder(name = "CreateCompletionRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateCompletionRequest { + /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them. + pub model: String, + + /// The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. + /// + /// Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. + pub prompt: Prompt, + + /// The suffix that comes after a completion of inserted text. + /// + /// This parameter is only supported for `gpt-3.5-turbo-instruct`. + #[serde(skip_serializing_if = "Option::is_none")] + pub suffix: Option, // default: null + + /// The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the completion. + /// + /// The token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_tokens: Option, + + /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. + /// + /// We generally recommend altering this or `top_p` but not both. + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, // min: 0, max: 2, default: 1, + + /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. + /// + /// We generally recommend altering this or `temperature` but not both. + #[serde(skip_serializing_if = "Option::is_none")] + pub top_p: Option, // min: 0, max: 1, default: 1 + + /// How many completions to generate for each prompt. + + /// **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + /// + #[serde(skip_serializing_if = "Option::is_none")] + pub n: Option, // min:1 max: 128, default: 1 + + /// Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + /// as they become available, with the stream terminated by a `data: [DONE]` message. + #[serde(skip_serializing_if = "Option::is_none")] + pub stream: Option, // nullable: true + + #[serde(skip_serializing_if = "Option::is_none")] + pub stream_options: Option, + + /// Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response. + /// + /// The maximum value for `logprobs` is 5. + #[serde(skip_serializing_if = "Option::is_none")] + pub logprobs: Option, // min:0 , max: 5, default: null, nullable: true + + /// Echo back the prompt in addition to the completion + #[serde(skip_serializing_if = "Option::is_none")] + pub echo: Option, + + /// Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. + #[serde(skip_serializing_if = "Option::is_none")] + pub stop: Option, + + /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + /// + /// [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details) + #[serde(skip_serializing_if = "Option::is_none")] + pub presence_penalty: Option, // min: -2.0, max: 2.0, default 0 + + /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + /// + /// [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details) + #[serde(skip_serializing_if = "Option::is_none")] + pub frequency_penalty: Option, // min: -2.0, max: 2.0, default: 0 + + /// Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed. + /// + /// When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`. + /// + /// **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + #[serde(skip_serializing_if = "Option::is_none")] + pub best_of: Option, //min: 0, max: 20, default: 1 + + /// Modify the likelihood of specified tokens appearing in the completion. + /// + /// Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. + /// + /// As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated. + #[serde(skip_serializing_if = "Option::is_none")] + pub logit_bias: Option>, // default: null + + /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids). + #[serde(skip_serializing_if = "Option::is_none")] + pub user: Option, + + /// If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. + /// + /// Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + #[serde(skip_serializing_if = "Option::is_none")] + pub seed: Option, +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +pub struct CreateCompletionResponse { + /// A unique identifier for the completion. + pub id: String, + pub choices: Vec, + /// The Unix timestamp (in seconds) of when the completion was created. + pub created: u32, + + /// The model used for completion. + pub model: String, + /// This fingerprint represents the backend configuration that the model runs with. + /// + /// Can be used in conjunction with the `seed` request parameter to understand when backend changes have been + /// made that might impact determinism. + pub system_fingerprint: Option, + + /// The object type, which is always "text_completion" + pub object: String, + pub usage: Option, +} + +/// Parsed server side events stream until an \[DONE\] is received from server. +pub type CompletionResponseStream = + Pin> + Send>>; diff --git a/async-openai/src/types/embedding.rs b/async-openai/src/types/embedding.rs index ea05ac3b..641e19bc 100644 --- a/async-openai/src/types/embedding.rs +++ b/async-openai/src/types/embedding.rs @@ -1,122 +1,122 @@ -use base64::engine::{general_purpose, Engine}; -use derive_builder::Builder; -use serde::{Deserialize, Serialize}; - -use crate::error::OpenAIError; - -#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)] -#[serde(untagged)] -pub enum EmbeddingInput { - String(String), - StringArray(Vec), - // Minimum value is 0, maximum value is 100257 (inclusive). - IntegerArray(Vec), - ArrayOfIntegerArray(Vec>), -} - -#[derive(Debug, Serialize, Default, Clone, PartialEq, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum EncodingFormat { - #[default] - Float, - Base64, -} - -#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)] -#[builder(name = "CreateEmbeddingRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateEmbeddingRequest { - /// ID of the model to use. You can use the - /// [List models](https://platform.openai.com/docs/api-reference/models/list) - /// API to see all of your available models, or see our - /// [Model overview](https://platform.openai.com/docs/models/overview) - /// for descriptions of them. - pub model: String, - - /// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. - pub input: EmbeddingInput, - - /// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). Defaults to float - #[serde(skip_serializing_if = "Option::is_none")] - pub encoding_format: Option, - - /// A unique identifier representing your end-user, which will help OpenAI - /// to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids). - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, - - /// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models. - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} - -/// Represents an embedding vector returned by embedding endpoint. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct Embedding { - /// The index of the embedding in the list of embeddings. - pub index: u32, - /// The object type, which is always "embedding". - pub object: String, - /// The embedding vector, which is a list of floats. The length of vector - /// depends on the model as listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings). - pub embedding: Vec, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct Base64EmbeddingVector(pub String); - -impl From for Vec { - fn from(value: Base64EmbeddingVector) -> Self { - let bytes = general_purpose::STANDARD - .decode(value.0) - .expect("openai base64 encoding to be valid"); - let chunks = bytes.chunks_exact(4); - chunks - .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])) - .collect() - } -} - -/// Represents an base64-encoded embedding vector returned by embedding endpoint. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct Base64Embedding { - /// The index of the embedding in the list of embeddings. - pub index: u32, - /// The object type, which is always "embedding". - pub object: String, - /// The embedding vector, encoded in base64. - pub embedding: Base64EmbeddingVector, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct EmbeddingUsage { - /// The number of tokens used by the prompt. - pub prompt_tokens: u32, - /// The total number of tokens used by the request. - pub total_tokens: u32, -} - -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] -pub struct CreateEmbeddingResponse { - pub object: String, - /// The name of the model used to generate the embedding. - pub model: String, - /// The list of embeddings generated by the model. - pub data: Vec, - /// The usage information for the request. - pub usage: EmbeddingUsage, -} - -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] -pub struct CreateBase64EmbeddingResponse { - pub object: String, - /// The name of the model used to generate the embedding. - pub model: String, - /// The list of embeddings generated by the model. - pub data: Vec, - /// The usage information for the request. - pub usage: EmbeddingUsage, -} +use base64::engine::{general_purpose, Engine}; +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; + +#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)] +#[serde(untagged)] +pub enum EmbeddingInput { + String(String), + StringArray(Vec), + // Minimum value is 0, maximum value is 100257 (inclusive). + IntegerArray(Vec), + ArrayOfIntegerArray(Vec>), +} + +#[derive(Debug, Serialize, Default, Clone, PartialEq, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum EncodingFormat { + #[default] + Float, + Base64, +} + +#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)] +#[builder(name = "CreateEmbeddingRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateEmbeddingRequest { + /// ID of the model to use. You can use the + /// [List models](https://platform.openai.com/docs/api-reference/models/list) + /// API to see all of your available models, or see our + /// [Model overview](https://platform.openai.com/docs/models/overview) + /// for descriptions of them. + pub model: String, + + /// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. + pub input: EmbeddingInput, + + /// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). Defaults to float + #[serde(skip_serializing_if = "Option::is_none")] + pub encoding_format: Option, + + /// A unique identifier representing your end-user, which will help OpenAI + /// to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids). + #[serde(skip_serializing_if = "Option::is_none")] + pub user: Option, + + /// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models. + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, +} + +/// Represents an embedding vector returned by embedding endpoint. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct Embedding { + /// The index of the embedding in the list of embeddings. + pub index: u32, + /// The object type, which is always "embedding". + pub object: String, + /// The embedding vector, which is a list of floats. The length of vector + /// depends on the model as listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings). + pub embedding: Vec, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct Base64EmbeddingVector(pub String); + +impl From for Vec { + fn from(value: Base64EmbeddingVector) -> Self { + let bytes = general_purpose::STANDARD + .decode(value.0) + .expect("openai base64 encoding to be valid"); + let chunks = bytes.chunks_exact(4); + chunks + .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])) + .collect() + } +} + +/// Represents an base64-encoded embedding vector returned by embedding endpoint. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct Base64Embedding { + /// The index of the embedding in the list of embeddings. + pub index: u32, + /// The object type, which is always "embedding". + pub object: String, + /// The embedding vector, encoded in base64. + pub embedding: Base64EmbeddingVector, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct EmbeddingUsage { + /// The number of tokens used by the prompt. + pub prompt_tokens: u32, + /// The total number of tokens used by the request. + pub total_tokens: u32, +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +pub struct CreateEmbeddingResponse { + pub object: String, + /// The name of the model used to generate the embedding. + pub model: String, + /// The list of embeddings generated by the model. + pub data: Vec, + /// The usage information for the request. + pub usage: EmbeddingUsage, +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +pub struct CreateBase64EmbeddingResponse { + pub object: String, + /// The name of the model used to generate the embedding. + pub model: String, + /// The list of embeddings generated by the model. + pub data: Vec, + /// The usage information for the request. + pub usage: EmbeddingUsage, +} diff --git a/async-openai/src/types/fine_tuning.rs b/async-openai/src/types/fine_tuning.rs index a5c6d321..bc0932e5 100644 --- a/async-openai/src/types/fine_tuning.rs +++ b/async-openai/src/types/fine_tuning.rs @@ -1,348 +1,348 @@ -use derive_builder::Builder; -use serde::{Deserialize, Serialize}; - -use crate::error::OpenAIError; - -#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] -#[serde(untagged)] -pub enum NEpochs { - NEpochs(u8), - #[default] - #[serde(rename = "auto")] - Auto, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] -#[serde(untagged)] -pub enum BatchSize { - BatchSize(u16), - #[default] - #[serde(rename = "auto")] - Auto, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] -#[serde(untagged)] -pub enum LearningRateMultiplier { - LearningRateMultiplier(f32), - #[default] - #[serde(rename = "auto")] - Auto, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] -pub struct Hyperparameters { - /// Number of examples in each batch. A larger batch size means that model parameters - /// are updated less frequently, but with lower variance. - pub batch_size: BatchSize, - /// Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - /// overfitting. - pub learning_rate_multiplier: LearningRateMultiplier, - /// The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - pub n_epochs: NEpochs, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] -#[serde(untagged)] -pub enum Beta { - Beta(f32), - #[default] - #[serde(rename = "auto")] - Auto, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] -pub struct DPOHyperparameters { - /// The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. - pub beta: Beta, - /// Number of examples in each batch. A larger batch size means that model parameters - /// are updated less frequently, but with lower variance. - pub batch_size: BatchSize, - /// Scaling factor for the learning rate. A smaller learning rate may be useful to avoid - /// overfitting. - pub learning_rate_multiplier: LearningRateMultiplier, - /// The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - pub n_epochs: NEpochs, -} - -#[derive(Debug, Serialize, Deserialize, Clone, Default, Builder, PartialEq)] -#[builder(name = "CreateFineTuningJobRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateFineTuningJobRequest { - /// The name of the model to fine-tune. You can select one of the - /// [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned). - pub model: String, - - /// The ID of an uploaded file that contains training data. - /// - /// See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to upload a file. - /// - /// Your dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`. - /// - /// The contents of the file should differ depending on if the model uses the [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) format, or if the fine-tuning method uses the [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format. - /// - /// See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details. - pub training_file: String, - - /// The hyperparameters used for the fine-tuning job. - /// This value is now deprecated in favor of `method`, and should be passed in under the `method` parameter. - #[deprecated] - pub hyperparameters: Option, - - /// A string of up to 64 characters that will be added to your fine-tuned model name. - /// - /// For example, a `suffix` of "custom-model-name" would produce a model name like `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`. - #[serde(skip_serializing_if = "Option::is_none")] - pub suffix: Option, // default: null, minLength:1, maxLength:40 - - /// The ID of an uploaded file that contains validation data. - /// - /// If you provide this file, the data is used to generate validation - /// metrics periodically during fine-tuning. These metrics can be viewed in - /// the fine-tuning results file. - /// The same data should not be present in both train and validation files. - /// - /// Your dataset must be formatted as a JSONL file. You must upload your file with the purpose `fine-tune`. - /// - /// See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details. - #[serde(skip_serializing_if = "Option::is_none")] - pub validation_file: Option, - - /// A list of integrations to enable for your fine-tuning job. - #[serde(skip_serializing_if = "Option::is_none")] - pub integrations: Option>, - - /// The seed controls the reproducibility of the job. Passing in the same seed and job parameters should produce the same results, but may differ in rare cases. - /// If a seed is not specified, one will be generated for you. - #[serde(skip_serializing_if = "Option::is_none")] - pub seed: Option, // min:0, max: 2147483647 - - #[serde(skip_serializing_if = "Option::is_none")] - pub method: Option, -} - -/// The method used for fine-tuning. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -#[serde(tag = "type", rename_all = "lowercase")] -pub enum FineTuneMethod { - Supervised { - supervised: FineTuneSupervisedMethod, - }, - DPO { - dpo: FineTuneDPOMethod, - }, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct FineTuneSupervisedMethod { - pub hyperparameters: Hyperparameters, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct FineTuneDPOMethod { - pub hyperparameters: DPOHyperparameters, -} - -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize, Default)] -#[serde(rename_all = "lowercase")] -pub enum FineTuningJobIntegrationType { - #[default] - Wandb, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct FineTuningIntegration { - /// The type of integration to enable. Currently, only "wandb" (Weights and Biases) is supported. - pub r#type: FineTuningJobIntegrationType, - - /// The settings for your integration with Weights and Biases. This payload specifies the project that - /// metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags - /// to your run, and set a default entity (team, username, etc) to be associated with your run. - pub wandb: WandB, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct WandB { - /// The name of the project that the new run will be created under. - pub project: String, - /// A display name to set for the run. If not set, we will use the Job ID as the name. - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - /// The entity to use for the run. This allows you to set the team or username of the WandB user that you would - /// like associated with the run. If not set, the default entity for the registered WandB API key is used. - #[serde(skip_serializing_if = "Option::is_none")] - pub entity: Option, - /// A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some - /// default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". - #[serde(skip_serializing_if = "Option::is_none")] - pub tags: Option>, -} - -/// For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct FineTuneJobError { - /// A machine-readable error code. - pub code: String, - /// A human-readable error message. - pub message: String, - /// The parameter that was invalid, usually `training_file` or `validation_file`. - /// This field will be null if the failure was not parameter-specific. - pub param: Option, // nullable true -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum FineTuningJobStatus { - ValidatingFiles, - Queued, - Running, - Succeeded, - Failed, - Cancelled, -} - -/// The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct FineTuningJob { - /// The object identifier, which can be referenced in the API endpoints. - pub id: String, - /// The Unix timestamp (in seconds) for when the fine-tuning job was created. - pub created_at: u32, - /// For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. - pub error: Option, - /// The name of the fine-tuned model that is being created. - /// The value will be null if the fine-tuning job is still running. - pub fine_tuned_model: Option, // nullable: true - /// The Unix timestamp (in seconds) for when the fine-tuning job was finished. - /// The value will be null if the fine-tuning job is still running. - pub finished_at: Option, // nullable true - - /// The hyperparameters used for the fine-tuning job. - /// See the [fine-tuning guide](/docs/guides/fine-tuning) for more details. - pub hyperparameters: Hyperparameters, - - /// The base model that is being fine-tuned. - pub model: String, - - /// The object type, which is always "fine_tuning.job". - pub object: String, - /// The organization that owns the fine-tuning job. - pub organization_id: String, - - /// The compiled results file ID(s) for the fine-tuning job. - /// You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - pub result_files: Vec, - - /// The current status of the fine-tuning job, which can be either - /// `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - pub status: FineTuningJobStatus, - - /// The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. - pub trained_tokens: Option, - - /// The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - pub training_file: String, - - /// The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). - pub validation_file: Option, - - /// A list of integrations to enable for this fine-tuning job. - pub integrations: Option>, // maxItems: 5 - - /// The seed used for the fine-tuning job. - pub seed: u32, - - /// The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. - pub estimated_finish: Option, - - pub method: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ListPaginatedFineTuningJobsResponse { - pub data: Vec, - pub has_more: bool, - pub object: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ListFineTuningJobEventsResponse { - pub data: Vec, - pub object: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ListFineTuningJobCheckpointsResponse { - pub data: Vec, - pub object: String, - pub first_id: Option, - pub last_id: Option, - pub has_more: bool, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum Level { - Info, - Warn, - Error, -} - -///Fine-tuning job event object -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FineTuningJobEvent { - /// The object identifier. - pub id: String, - /// The Unix timestamp (in seconds) for when the fine-tuning job event was created. - pub created_at: u32, - /// The log level of the event. - pub level: Level, - /// The message of the event. - pub message: String, - /// The object type, which is always "fine_tuning.job.event". - pub object: String, - /// The type of event. - pub r#type: Option, - /// The data associated with the event. - pub data: Option, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum FineTuningJobEventType { - Message, - Metrics, -} - -/// The `fine_tuning.job.checkpoint` object represents a model checkpoint for a fine-tuning job that is ready to use. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FineTuningJobCheckpoint { - /// The checkpoint identifier, which can be referenced in the API endpoints. - pub id: String, - /// The Unix timestamp (in seconds) for when the checkpoint was created. - pub created_at: u32, - /// The name of the fine-tuned checkpoint model that is created. - pub fine_tuned_model_checkpoint: String, - /// The step number that the checkpoint was created at. - pub step_number: u32, - /// Metrics at the step number during the fine-tuning job. - pub metrics: FineTuningJobCheckpointMetrics, - /// The name of the fine-tuning job that this checkpoint was created from. - pub fine_tuning_job_id: String, - /// The object type, which is always "fine_tuning.job.checkpoint". - pub object: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct FineTuningJobCheckpointMetrics { - pub step: u32, - pub train_loss: f32, - pub train_mean_token_accuracy: f32, - pub valid_loss: f32, - pub valid_mean_token_accuracy: f32, - pub full_valid_loss: f32, - pub full_valid_mean_token_accuracy: f32, -} +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; + +#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] +#[serde(untagged)] +pub enum NEpochs { + NEpochs(u8), + #[default] + #[serde(rename = "auto")] + Auto, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] +#[serde(untagged)] +pub enum BatchSize { + BatchSize(u16), + #[default] + #[serde(rename = "auto")] + Auto, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] +#[serde(untagged)] +pub enum LearningRateMultiplier { + LearningRateMultiplier(f32), + #[default] + #[serde(rename = "auto")] + Auto, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] +pub struct Hyperparameters { + /// Number of examples in each batch. A larger batch size means that model parameters + /// are updated less frequently, but with lower variance. + pub batch_size: BatchSize, + /// Scaling factor for the learning rate. A smaller learning rate may be useful to avoid + /// overfitting. + pub learning_rate_multiplier: LearningRateMultiplier, + /// The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. + pub n_epochs: NEpochs, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] +#[serde(untagged)] +pub enum Beta { + Beta(f32), + #[default] + #[serde(rename = "auto")] + Auto, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)] +pub struct DPOHyperparameters { + /// The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model. + pub beta: Beta, + /// Number of examples in each batch. A larger batch size means that model parameters + /// are updated less frequently, but with lower variance. + pub batch_size: BatchSize, + /// Scaling factor for the learning rate. A smaller learning rate may be useful to avoid + /// overfitting. + pub learning_rate_multiplier: LearningRateMultiplier, + /// The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. + pub n_epochs: NEpochs, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Default, Builder, PartialEq)] +#[builder(name = "CreateFineTuningJobRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateFineTuningJobRequest { + /// The name of the model to fine-tune. You can select one of the + /// [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned). + pub model: String, + + /// The ID of an uploaded file that contains training data. + /// + /// See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to upload a file. + /// + /// Your dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`. + /// + /// The contents of the file should differ depending on if the model uses the [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) format, or if the fine-tuning method uses the [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format. + /// + /// See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details. + pub training_file: String, + + /// The hyperparameters used for the fine-tuning job. + /// This value is now deprecated in favor of `method`, and should be passed in under the `method` parameter. + #[deprecated] + pub hyperparameters: Option, + + /// A string of up to 64 characters that will be added to your fine-tuned model name. + /// + /// For example, a `suffix` of "custom-model-name" would produce a model name like `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`. + #[serde(skip_serializing_if = "Option::is_none")] + pub suffix: Option, // default: null, minLength:1, maxLength:40 + + /// The ID of an uploaded file that contains validation data. + /// + /// If you provide this file, the data is used to generate validation + /// metrics periodically during fine-tuning. These metrics can be viewed in + /// the fine-tuning results file. + /// The same data should not be present in both train and validation files. + /// + /// Your dataset must be formatted as a JSONL file. You must upload your file with the purpose `fine-tune`. + /// + /// See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details. + #[serde(skip_serializing_if = "Option::is_none")] + pub validation_file: Option, + + /// A list of integrations to enable for your fine-tuning job. + #[serde(skip_serializing_if = "Option::is_none")] + pub integrations: Option>, + + /// The seed controls the reproducibility of the job. Passing in the same seed and job parameters should produce the same results, but may differ in rare cases. + /// If a seed is not specified, one will be generated for you. + #[serde(skip_serializing_if = "Option::is_none")] + pub seed: Option, // min:0, max: 2147483647 + + #[serde(skip_serializing_if = "Option::is_none")] + pub method: Option, +} + +/// The method used for fine-tuning. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum FineTuneMethod { + Supervised { + supervised: FineTuneSupervisedMethod, + }, + DPO { + dpo: FineTuneDPOMethod, + }, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct FineTuneSupervisedMethod { + pub hyperparameters: Hyperparameters, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct FineTuneDPOMethod { + pub hyperparameters: DPOHyperparameters, +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize, Default)] +#[serde(rename_all = "lowercase")] +pub enum FineTuningJobIntegrationType { + #[default] + Wandb, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct FineTuningIntegration { + /// The type of integration to enable. Currently, only "wandb" (Weights and Biases) is supported. + pub r#type: FineTuningJobIntegrationType, + + /// The settings for your integration with Weights and Biases. This payload specifies the project that + /// metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags + /// to your run, and set a default entity (team, username, etc) to be associated with your run. + pub wandb: WandB, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct WandB { + /// The name of the project that the new run will be created under. + pub project: String, + /// A display name to set for the run. If not set, we will use the Job ID as the name. + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + /// The entity to use for the run. This allows you to set the team or username of the WandB user that you would + /// like associated with the run. If not set, the default entity for the registered WandB API key is used. + #[serde(skip_serializing_if = "Option::is_none")] + pub entity: Option, + /// A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some + /// default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". + #[serde(skip_serializing_if = "Option::is_none")] + pub tags: Option>, +} + +/// For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct FineTuneJobError { + /// A machine-readable error code. + pub code: String, + /// A human-readable error message. + pub message: String, + /// The parameter that was invalid, usually `training_file` or `validation_file`. + /// This field will be null if the failure was not parameter-specific. + pub param: Option, // nullable true +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum FineTuningJobStatus { + ValidatingFiles, + Queued, + Running, + Succeeded, + Failed, + Cancelled, +} + +/// The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct FineTuningJob { + /// The object identifier, which can be referenced in the API endpoints. + pub id: String, + /// The Unix timestamp (in seconds) for when the fine-tuning job was created. + pub created_at: u32, + /// For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure. + pub error: Option, + /// The name of the fine-tuned model that is being created. + /// The value will be null if the fine-tuning job is still running. + pub fine_tuned_model: Option, // nullable: true + /// The Unix timestamp (in seconds) for when the fine-tuning job was finished. + /// The value will be null if the fine-tuning job is still running. + pub finished_at: Option, // nullable true + + /// The hyperparameters used for the fine-tuning job. + /// See the [fine-tuning guide](/docs/guides/fine-tuning) for more details. + pub hyperparameters: Hyperparameters, + + /// The base model that is being fine-tuned. + pub model: String, + + /// The object type, which is always "fine_tuning.job". + pub object: String, + /// The organization that owns the fine-tuning job. + pub organization_id: String, + + /// The compiled results file ID(s) for the fine-tuning job. + /// You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + pub result_files: Vec, + + /// The current status of the fine-tuning job, which can be either + /// `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. + pub status: FineTuningJobStatus, + + /// The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running. + pub trained_tokens: Option, + + /// The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + pub training_file: String, + + /// The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + pub validation_file: Option, + + /// A list of integrations to enable for this fine-tuning job. + pub integrations: Option>, // maxItems: 5 + + /// The seed used for the fine-tuning job. + pub seed: u32, + + /// The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running. + pub estimated_finish: Option, + + pub method: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ListPaginatedFineTuningJobsResponse { + pub data: Vec, + pub has_more: bool, + pub object: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ListFineTuningJobEventsResponse { + pub data: Vec, + pub object: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ListFineTuningJobCheckpointsResponse { + pub data: Vec, + pub object: String, + pub first_id: Option, + pub last_id: Option, + pub has_more: bool, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum Level { + Info, + Warn, + Error, +} + +///Fine-tuning job event object +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FineTuningJobEvent { + /// The object identifier. + pub id: String, + /// The Unix timestamp (in seconds) for when the fine-tuning job event was created. + pub created_at: u32, + /// The log level of the event. + pub level: Level, + /// The message of the event. + pub message: String, + /// The object type, which is always "fine_tuning.job.event". + pub object: String, + /// The type of event. + pub r#type: Option, + /// The data associated with the event. + pub data: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum FineTuningJobEventType { + Message, + Metrics, +} + +/// The `fine_tuning.job.checkpoint` object represents a model checkpoint for a fine-tuning job that is ready to use. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FineTuningJobCheckpoint { + /// The checkpoint identifier, which can be referenced in the API endpoints. + pub id: String, + /// The Unix timestamp (in seconds) for when the checkpoint was created. + pub created_at: u32, + /// The name of the fine-tuned checkpoint model that is created. + pub fine_tuned_model_checkpoint: String, + /// The step number that the checkpoint was created at. + pub step_number: u32, + /// Metrics at the step number during the fine-tuning job. + pub metrics: FineTuningJobCheckpointMetrics, + /// The name of the fine-tuning job that this checkpoint was created from. + pub fine_tuning_job_id: String, + /// The object type, which is always "fine_tuning.job.checkpoint". + pub object: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct FineTuningJobCheckpointMetrics { + pub step: u32, + pub train_loss: f32, + pub train_mean_token_accuracy: f32, + pub valid_loss: f32, + pub valid_mean_token_accuracy: f32, + pub full_valid_loss: f32, + pub full_valid_mean_token_accuracy: f32, +} diff --git a/async-openai/src/types/image.rs b/async-openai/src/types/image.rs deleted file mode 100644 index a81cf6b2..00000000 --- a/async-openai/src/types/image.rs +++ /dev/null @@ -1,214 +0,0 @@ -use derive_builder::Builder; -use serde::{Deserialize, Serialize}; - -use crate::error::OpenAIError; - -use super::InputSource; - -#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -pub enum ImageSize { - #[serde(rename = "256x256")] - S256x256, - #[serde(rename = "512x512")] - S512x512, - #[default] - #[serde(rename = "1024x1024")] - S1024x1024, - #[serde(rename = "1792x1024")] - S1792x1024, - #[serde(rename = "1024x1792")] - S1024x1792, -} - -#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] -pub enum DallE2ImageSize { - #[serde(rename = "256x256")] - S256x256, - #[serde(rename = "512x512")] - S512x512, - #[default] - #[serde(rename = "1024x1024")] - S1024x1024, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageResponseFormat { - #[default] - Url, - #[serde(rename = "b64_json")] - B64Json, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] -pub enum ImageModel { - #[default] - #[serde(rename = "dall-e-2")] - DallE2, - #[serde(rename = "dall-e-3")] - DallE3, - #[serde(untagged)] - Other(String), -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageQuality { - #[default] - Standard, - HD, - High, - Medium, - Low, - Auto, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageStyle { - #[default] - Vivid, - Natural, -} - -#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageModeration { - #[default] - Auto, - Low, -} - -#[derive(Debug, Clone, Serialize, Deserialize, Default, Builder, PartialEq)] -#[builder(name = "CreateImageRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateImageRequest { - /// A text description of the desired image(s). The maximum length is 1000 characters for `dall-e-2` - /// and 4000 characters for `dall-e-3`. - pub prompt: String, - - /// The model to use for image generation. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - - /// The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported. - #[serde(skip_serializing_if = "Option::is_none")] - pub n: Option, // min:1 max:10 default:1 - - /// The quality of the image that will be generated. `hd` creates images with finer details and greater - /// consistency across the image. This param is only supported for `dall-e-3`. - #[serde(skip_serializing_if = "Option::is_none")] - pub quality: Option, - - /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. - #[serde(skip_serializing_if = "Option::is_none")] - pub response_format: Option, - - /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. - /// Must be one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models. - #[serde(skip_serializing_if = "Option::is_none")] - pub size: Option, - - /// The style of the generated images. Must be one of `vivid` or `natural`. - /// Vivid causes the model to lean towards generating hyper-real and dramatic images. - /// Natural causes the model to produce more natural, less hyper-real looking images. - /// This param is only supported for `dall-e-3`. - #[serde(skip_serializing_if = "Option::is_none")] - pub style: Option, - - /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids). - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, - - /// Control the content-moderation level for images generated by gpt-image-1. - /// Must be either `low` for less restrictive filtering or `auto` (default value). - #[serde(skip_serializing_if = "Option::is_none")] - pub moderation: Option, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -#[serde(untagged)] -pub enum Image { - /// The URL of the generated image, if `response_format` is `url` (default). - Url { - url: String, - revised_prompt: Option, - }, - /// The base64-encoded JSON of the generated image, if `response_format` is `b64_json`. - B64Json { - b64_json: std::sync::Arc, - revised_prompt: Option, - }, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct ImagesResponse { - pub created: u32, - pub data: Vec>, -} - -#[derive(Debug, Default, Clone, PartialEq)] -pub struct ImageInput { - pub source: InputSource, -} - -#[derive(Debug, Clone, Default, Builder, PartialEq)] -#[builder(name = "CreateImageEditRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateImageEditRequest { - /// The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask. - pub image: Vec, - - /// A text description of the desired image(s). The maximum length is 1000 characters. - pub prompt: String, - - /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`. - pub mask: Option, - - /// The model to use for image generation. Only `dall-e-2` is supported at this time. - pub model: Option, - - /// The number of images to generate. Must be between 1 and 10. - pub n: Option, // min:1 max:10 default:1 - - /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`. - pub size: Option, - - /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. - pub response_format: Option, - - /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids). - pub user: Option, -} - -#[derive(Debug, Default, Clone, Builder, PartialEq)] -#[builder(name = "CreateImageVariationRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateImageVariationRequest { - /// The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and square. - pub image: ImageInput, - - /// The model to use for image generation. Only `dall-e-2` is supported at this time. - pub model: Option, - - /// The number of images to generate. Must be between 1 and 10. - pub n: Option, // min:1 max:10 default:1 - - /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`. - pub size: Option, - - /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. - pub response_format: Option, - - /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids). - pub user: Option, -} diff --git a/async-openai/src/types/images/image.rs b/async-openai/src/types/images/image.rs new file mode 100644 index 00000000..3376823a --- /dev/null +++ b/async-openai/src/types/images/image.rs @@ -0,0 +1,418 @@ +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; +use crate::types::InputSource; + +#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +pub enum ImageSize { + #[default] + #[serde(rename = "auto")] + Auto, + #[serde(rename = "256x256")] + S256x256, + #[serde(rename = "512x512")] + S512x512, + #[serde(rename = "1024x1024")] + S1024x1024, + #[serde(rename = "1792x1024")] + S1792x1024, + #[serde(rename = "1024x1792")] + S1024x1792, + #[serde(rename = "1536x1024")] + S1536x1024, + #[serde(rename = "1024x1536")] + S1024x1536, +} + +#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +pub enum DallE2ImageSize { + #[serde(rename = "256x256")] + S256x256, + #[serde(rename = "512x512")] + S512x512, + #[default] + #[serde(rename = "1024x1024")] + S1024x1024, +} + +#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +pub enum DallE3ImageSize { + #[default] + #[serde(rename = "1024x1024")] + S1024x1024, + #[serde(rename = "1792x1024")] + S1792x1024, + #[serde(rename = "1024x1792")] + S1024x1792, +} + +#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +pub enum GptImage1ImageSize { + #[default] + #[serde(rename = "auto")] + Auto, + #[serde(rename = "1024x1024")] + S1024x1024, + #[serde(rename = "1536x1024")] + S1536x1024, + #[serde(rename = "1024x1536")] + S1024x1536, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ImageResponseFormat { + #[default] + Url, + #[serde(rename = "b64_json")] + B64Json, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] +pub enum ImageModel { + #[serde(rename = "gpt-image-1")] + GptImage1, + #[serde(rename = "gpt-image-1-mini")] + GptImage1Mini, + #[default] + #[serde(rename = "dall-e-2")] + DallE2, + #[serde(rename = "dall-e-3")] + DallE3, + #[serde(untagged)] + Other(String), +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ImageQuality { + Standard, + HD, + High, + Medium, + Low, + #[default] + Auto, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ImageStyle { + #[default] + Vivid, + Natural, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ImageModeration { + #[default] + Auto, + Low, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ImageOutputFormat { + #[default] + Png, + Jpeg, + Webp, +} + +#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ImageBackground { + #[default] + Auto, + Transparent, + Opaque, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default, Builder, PartialEq)] +#[builder(name = "CreateImageRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateImageRequest { + /// A text description of the desired image(s). The maximum length is 32000 characters for + /// `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`. + pub prompt: String, + + /// The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or `gpt-image-1`. Defaults + /// to `dall-e-2` unless a parameter specific to `gpt-image-1` is used. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + + /// The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported. + #[serde(skip_serializing_if = "Option::is_none")] + pub n: Option, // min:1 max:10 default:1 + + /// The quality of the image that will be generated. + /// + /// - `auto` (default value) will automatically select the best quality for the given model. + /// - `high`, `medium` and `low` are supported for `gpt-image-1`. + /// - `hd` and `standard` are supported for `dall-e-3`. + /// - `standard` is the only option for `dall-e-2`. + #[serde(skip_serializing_if = "Option::is_none")] + pub quality: Option, + + /// The format in which generated images with `dall-e-2` and `dall-e-3` are returned. Must be one of + /// `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This + /// parameter isn't supported for `gpt-image-1` which will always return base64-encoded images. + #[serde(skip_serializing_if = "Option::is_none")] + pub response_format: Option, + + /// The format in which the generated images are returned. This parameter is only supported for + /// `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. + #[serde(skip_serializing_if = "Option::is_none")] + pub output_format: Option, + + /// The compression level (0-100%) for the generated images. This parameter is only supported for + /// `gpt-image-1` with the `webp` or `jpeg` output formats, and defaults to 100. + #[serde(skip_serializing_if = "Option::is_none")] + pub output_compression: Option, + + /// Generate the image in streaming mode. Defaults to `false`. See the + /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more + /// information. This parameter is only supported for `gpt-image-1`. + #[serde(skip_serializing_if = "Option::is_none")] + pub stream: Option, + + /// The number of partial images to generate. This parameter is used for + /// streaming responses that return partial images. Value must be between 0 and 3. + /// When set to 0, the response will be a single image sent in one streaming event. + /// Note that the final image may be sent before the full number of partial images + /// are generated if the full image is generated more quickly. + #[serde(skip_serializing_if = "Option::is_none")] + pub partial_images: Option, + + /// The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` + /// (portrait), or `auto` (default value) for `gpt-image-1`, one of `256x256`, `512x512`, or + /// `1024x1024` for `dall-e-2`, and one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`. + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, + + /// Control the content-moderation level for images generated by `gpt-image-1`. Must be either `low` + /// for less restrictive filtering or `auto` (default value). + #[serde(skip_serializing_if = "Option::is_none")] + pub moderation: Option, + + /// Allows to set transparency for the background of the generated image(s). + /// This parameter is only supported for `gpt-image-1`. Must be one of + /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the + /// model will automatically determine the best background for the image. + /// If `transparent`, the output format needs to support transparency, so it + /// should be set to either `png` (default value) or `webp`. + #[serde(skip_serializing_if = "Option::is_none")] + pub background: Option, + + /// The style of the generated images. This parameter is only supported for `dall-e-3`. Must be one of + ///`vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic + /// images. Natural causes the model to produce more natural, less hyper-real looking images. + #[serde(skip_serializing_if = "Option::is_none")] + pub style: Option, + + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + ///[Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + #[serde(skip_serializing_if = "Option::is_none")] + pub user: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +#[serde(untagged)] +pub enum Image { + /// The URL of the generated image, if `response_format` is `url` (default). + Url { + url: String, + revised_prompt: Option, + }, + /// The base64-encoded JSON of the generated image, if `response_format` is `b64_json`. + B64Json { + b64_json: std::sync::Arc, + revised_prompt: Option, + }, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ImageResponseBackground { + Transparent, + Opaque, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct ImageGenInputUsageDetails { + /// The number of text tokens in the input prompt. + pub text_tokens: u32, + /// The number of image tokens in the input prompt. + pub image_tokens: u32, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct ImageGenUsage { + /// The number of tokens (images and text) in the input prompt. + pub input_tokens: u32, + /// The total number of tokens (images and text) used for the image generation. + pub total_tokens: u32, + /// The number of output tokens generated by the model. + pub output_tokens: u32, + /// The input tokens detailed information for the image generation. + pub input_tokens_details: ImageGenInputUsageDetails, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct ImagesResponse { + /// The Unix timestamp (in seconds) of when the image was created. + pub created: u32, + /// The list of generated images. + pub data: Vec>, + /// The background parameter used for the image generation. Either `transparent` or `opaque`. + pub background: Option, + /// The output format of the image generation. Either `png`, `webp`, or `jpeg`. + pub output_format: Option, + /// The size of the generated image. Either `1024x1024`, `1536x1024`, `1024x1536`. + pub size: Option, + /// The quality of the image generated. Either `low`, `medium`, or `high`. + pub quality: Option, + /// For `gpt-image-1` only, the token usage information for the image generation. + pub usage: Option, +} + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct ImageInput { + pub source: InputSource, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum InputFidelity { + High, + #[default] + Low, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ImageEditInput { + Image(ImageInput), + Images(Vec), +} + +#[derive(Debug, Clone, Default, Builder, PartialEq)] +#[builder(name = "CreateImageEditRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateImageEditRequest { + /// The image(s) to edit. Must be a supported image file or an array of images. + /// + /// For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less + /// than 50MB. You can provide up to 16 images. + /// + /// For `dall-e-2`, you can only provide one image, and it should be a square + /// `png` file less than 4MB. + pub image: ImageEditInput, + + /// A text description of the desired image(s). The maximum length is 1000 characters + /// for `dall-e-2`, and 32000 characters for `gpt-image-1`. + pub prompt: String, + + /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where + /// `image` should be edited. If there are multiple images provided, the mask will be applied on the + /// first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`. + pub mask: Option, + + /// Allows to set transparency for the background of the generated image(s). + /// This parameter is only supported for `gpt-image-1`. Must be one of + /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the + /// model will automatically determine the best background for the image. + /// + /// If `transparent`, the output format needs to support transparency, so it + /// should be set to either `png` (default value) or `webp`. + pub background: Option, + + /// The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are supported. + /// Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1` is used. + pub model: Option, + + /// The number of images to generate. Must be between 1 and 10. + pub n: Option, // min:1 max:10 default:1 + + /// The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), + /// `1024x1536` (portrait), or `auto` (default value) for `gpt-image-1`, and one of `256x256`, + /// `512x512`, or `1024x1024` for `dall-e-2`. + pub size: Option, + + /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs + /// are only valid for 60 minutes after the image has been generated. This parameter is only supported + /// for `dall-e-2`, as `gpt-image-1` will always return base64-encoded images. + pub response_format: Option, + + /// The format in which the generated images are returned. This parameter is + /// only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. + /// The default value is `png`. + pub output_format: Option, + + /// The compression level (0-100%) for the generated images. This parameter + /// is only supported for `gpt-image-1` with the `webp` or `jpeg` output + /// formats, and defaults to 100. + pub output_compression: Option, + + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + pub user: Option, + + /// Control how much effort the model will exert to match the style and features, especially facial + /// features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for + /// `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + pub input_fidelity: Option, + + /// Edit the image in streaming mode. Defaults to `false`. See the + /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more + /// information. + pub stream: Option, + + /// The number of partial images to generate. This parameter is used for + /// streaming responses that return partial images. Value must be between 0 and 3. + /// When set to 0, the response will be a single image sent in one streaming event. + + /// Note that the final image may be sent before the full number of partial images + /// are generated if the full image is generated more quickly. + pub partial_images: Option, + + /// The quality of the image that will be generated. `high`, `medium` and `low` are only supported for + /// `gpt-image-1`. `dall-e-2` only supports `standard` quality. Defaults to `auto`. + pub quality: Option, +} + +#[derive(Debug, Default, Clone, Builder, PartialEq)] +#[builder(name = "CreateImageVariationRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateImageVariationRequest { + /// The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and + /// square. + pub image: ImageInput, + + /// The model to use for image generation. Only `dall-e-2` is supported at this time. + pub model: Option, + + /// The number of images to generate. Must be between 1 and 10. + pub n: Option, // min:1 max:10 default:1 + + /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs + /// are only valid for 60 minutes after the image has been generated. + pub response_format: Option, + + /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`. + pub size: Option, + + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + pub user: Option, +} diff --git a/async-openai/src/types/images/mod.rs b/async-openai/src/types/images/mod.rs new file mode 100644 index 00000000..13a6e0e1 --- /dev/null +++ b/async-openai/src/types/images/mod.rs @@ -0,0 +1,5 @@ +mod image; +mod stream; + +pub use image::*; +pub use stream::*; diff --git a/async-openai/src/types/images/stream.rs b/async-openai/src/types/images/stream.rs new file mode 100644 index 00000000..7b867313 --- /dev/null +++ b/async-openai/src/types/images/stream.rs @@ -0,0 +1,156 @@ +use std::pin::Pin; + +use futures::Stream; +use serde::{Deserialize, Serialize}; + +use crate::{ + error::OpenAIError, + traits::EventType, + types::images::{ImageBackground, ImageGenUsage, ImageOutputFormat, ImageQuality, ImageSize}, +}; + +/// Emitted when a partial image is available during image generation streaming. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageGenPartialImageEvent { + /// Base64-encoded partial image data, suitable for rendering as an image. + pub b64_json: String, + /// The Unix timestamp when the event was created. + pub created_at: u32, + /// The size of the requested image. + pub size: ImageSize, + /// The quality setting for the requested image. + pub quality: ImageQuality, + /// The background setting for the requested image. + pub background: ImageBackground, + /// The output format for the requested image. + pub output_format: ImageOutputFormat, + /// 0-based index for the partial image (streaming). + pub partial_image_index: u8, +} + +/// Emitted when image generation has completed and the final image is available. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageGenCompletedEvent { + /// Base64-encoded image data, suitable for rendering as an image. + pub b64_json: String, + /// The Unix timestamp when the event was created. + pub created_at: u32, + /// The size of the generated image. + pub size: ImageSize, + /// The quality setting for the generated image. + pub quality: ImageQuality, + /// The background setting for the generated image. + pub background: ImageBackground, + /// The output format for the generated image. + pub output_format: ImageOutputFormat, + /// Token usage information for the image generation. + pub usage: ImageGenUsage, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type")] +pub enum ImageGenStreamEvent { + /// Emitted when a partial image is available during image generation streaming. + #[serde(rename = "image_generation.partial_image")] + PartialImage(ImageGenPartialImageEvent), + /// Emitted when image generation has completed and the final image is available. + #[serde(rename = "image_generation.completed")] + Completed(ImageGenCompletedEvent), +} + +pub type ImageGenStream = + Pin> + Send>>; + +/// Emitted when a partial image is available during image editing streaming. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageEditPartialImageEvent { + /// Base64-encoded partial image data, suitable for rendering as an image. + pub b64_json: String, + /// The Unix timestamp when the event was created. + pub created_at: u32, + /// The size of the requested edited image. + pub size: ImageSize, + /// The quality setting for the requested edited image. + pub quality: ImageQuality, + /// The background setting for the requested edited image. + pub background: ImageBackground, + /// The output format for the requested edited image. + pub output_format: ImageOutputFormat, + /// 0-based index for the partial image (streaming). + pub partial_image_index: u8, +} + +/// Emitted when image editing has completed and the final image is available. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageEditCompletedEvent { + /// Base64-encoded final image data, suitable for rendering as an image. + pub b64_json: String, + /// The Unix timestamp when the event was created. + pub created_at: u32, + /// The size of the edited image. + pub size: ImageSize, + /// The quality setting for the edited image. + pub quality: ImageQuality, + /// The background setting for the edited image. + pub background: ImageBackground, + /// The output format for the edited image. + pub output_format: ImageOutputFormat, + /// Token usage information for the image edit. + pub usage: ImageGenUsage, +} + +pub type ImageEditStream = + Pin> + Send>>; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type")] +pub enum ImageEditStreamEvent { + /// Emitted when a partial image is available during image editing streaming. + #[serde(rename = "image_edit.partial_image")] + PartialImage(ImageEditPartialImageEvent), + /// Emitted when image editing has completed and the final image is available. + #[serde(rename = "image_edit.completed")] + Completed(ImageEditCompletedEvent), +} + +impl EventType for ImageGenPartialImageEvent { + fn event_type(&self) -> &'static str { + "image_generation.partial_image" + } +} + +impl EventType for ImageGenCompletedEvent { + fn event_type(&self) -> &'static str { + "image_generation.completed" + } +} + +impl EventType for ImageGenStreamEvent { + fn event_type(&self) -> &'static str { + match self { + ImageGenStreamEvent::PartialImage(event) => event.event_type(), + ImageGenStreamEvent::Completed(event) => event.event_type(), + } + } +} + +impl EventType for ImageEditPartialImageEvent { + fn event_type(&self) -> &'static str { + "image_edit.partial_image" + } +} + +impl EventType for ImageEditCompletedEvent { + fn event_type(&self) -> &'static str { + "image_edit.completed" + } +} + +impl EventType for ImageEditStreamEvent { + fn event_type(&self) -> &'static str { + match self { + ImageEditStreamEvent::PartialImage(event) => event.event_type(), + ImageEditStreamEvent::Completed(event) => event.event_type(), + } + } +} diff --git a/async-openai/src/types/impls.rs b/async-openai/src/types/impls.rs index 5bbd160c..cc353974 100644 --- a/async-openai/src/types/impls.rs +++ b/async-openai/src/types/impls.rs @@ -9,6 +9,7 @@ use crate::{ traits::AsyncTryFrom, types::{ audio::{TranscriptionChunkingStrategy, TranslationResponseFormat}, + images::{ImageBackground, ImageEditInput, ImageOutputFormat, ImageQuality, InputFidelity}, InputSource, VideoSize, }, util::{create_all_dir, create_file_part}, @@ -21,6 +22,10 @@ use super::{ AudioInput, AudioResponseFormat, CreateSpeechResponse, CreateTranscriptionRequest, CreateTranslationRequest, TimestampGranularity, TranscriptionInclude, }, + images::{ + CreateImageEditRequest, CreateImageVariationRequest, DallE2ImageSize, Image, ImageInput, + ImageModel, ImageResponseFormat, ImageSize, ImagesResponse, + }, responses::{EasyInputContent, Role as ResponsesRole}, AddUploadPartRequest, ChatCompletionFunctionCall, ChatCompletionFunctions, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage, @@ -32,10 +37,8 @@ use super::{ ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent, ChatCompletionRequestUserMessage, ChatCompletionRequestUserMessageContent, ChatCompletionRequestUserMessageContentPart, ChatCompletionToolChoiceOption, - CreateContainerFileRequest, CreateFileRequest, CreateImageEditRequest, - CreateImageVariationRequest, CreateMessageRequestContent, CreateVideoRequest, DallE2ImageSize, - EmbeddingInput, FileExpiresAfterAnchor, FileInput, FilePurpose, FunctionName, Image, - ImageInput, ImageModel, ImageResponseFormat, ImageSize, ImageUrl, ImagesResponse, + CreateContainerFileRequest, CreateFileRequest, CreateMessageRequestContent, CreateVideoRequest, + EmbeddingInput, FileExpiresAfterAnchor, FileInput, FilePurpose, FunctionName, ImageUrl, ModerationInput, Prompt, Role, Stop, }; @@ -168,6 +171,99 @@ impl_input!(AudioInput); impl_input!(FileInput); impl_input!(ImageInput); +impl Default for ImageEditInput { + fn default() -> Self { + Self::Image(ImageInput::default()) + } +} + +impl From for ImageEditInput { + fn from(value: ImageInput) -> Self { + Self::Image(value) + } +} + +impl From> for ImageEditInput { + fn from(value: Vec) -> Self { + Self::Images(value) + } +} + +// Single path-like values +impl From<&str> for ImageEditInput { + fn from(value: &str) -> Self { + Self::Image(value.into()) + } +} + +impl From for ImageEditInput { + fn from(value: String) -> Self { + Self::Image(value.into()) + } +} + +impl From<&Path> for ImageEditInput { + fn from(value: &Path) -> Self { + Self::Image(value.into()) + } +} + +impl From for ImageEditInput { + fn from(value: PathBuf) -> Self { + Self::Image(value.into()) + } +} + +// Arrays of path-like values +impl From<[&str; N]> for ImageEditInput { + fn from(value: [&str; N]) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + +impl From<[String; N]> for ImageEditInput { + fn from(value: [String; N]) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + +impl From<[&Path; N]> for ImageEditInput { + fn from(value: [&Path; N]) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + +impl From<[PathBuf; N]> for ImageEditInput { + fn from(value: [PathBuf; N]) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + +// Vectors of path-like values +impl<'a> From> for ImageEditInput { + fn from(value: Vec<&'a str>) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + +impl From> for ImageEditInput { + fn from(value: Vec) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + +impl From> for ImageEditInput { + fn from(value: Vec<&Path>) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + +impl From> for ImageEditInput { + fn from(value: Vec) -> Self { + Self::Images(value.into_iter().map(|v| ImageInput::from(v)).collect()) + } +} + impl Display for VideoSize { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( @@ -194,6 +290,9 @@ impl Display for ImageSize { Self::S1024x1024 => "1024x1024", Self::S1792x1024 => "1792x1024", Self::S1024x1792 => "1024x1792", + Self::S1536x1024 => "1536x1024", + Self::S1024x1536 => "1024x1536", + Self::Auto => "auto", } ) } @@ -221,12 +320,72 @@ impl Display for ImageModel { match self { Self::DallE2 => "dall-e-2", Self::DallE3 => "dall-e-3", + Self::GptImage1 => "gpt-image-1", + Self::GptImage1Mini => "gpt-image-1-mini", Self::Other(other) => other, } ) } } +impl Display for ImageBackground { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Self::Transparent => "transparent", + Self::Opaque => "opaque", + Self::Auto => "auto", + } + ) + } +} + +impl Display for ImageOutputFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Self::Png => "png", + Self::Jpeg => "jpeg", + Self::Webp => "webp", + } + ) + } +} + +impl Display for InputFidelity { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Self::High => "high", + Self::Low => "low", + } + ) + } +} + +impl Display for ImageQuality { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Self::Low => "low", + Self::Medium => "medium", + Self::High => "high", + Self::Auto => "auto", + Self::Standard => "standard", + Self::HD => "hd", + } + ) + } +} + impl Display for ImageResponseFormat { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( @@ -993,12 +1152,19 @@ impl AsyncTryFrom for reqwest::multipart::Form { type Error = OpenAIError; async fn try_from(request: CreateImageEditRequest) -> Result { - let mut form = reqwest::multipart::Form::new() - .text("prompt", request.prompt); + let mut form = reqwest::multipart::Form::new().text("prompt", request.prompt); - for image in request.image { - let image_part = create_file_part(image.source).await?; - form = form.part("image[]", image_part); + match request.image { + ImageEditInput::Image(image) => { + let image_part = create_file_part(image.source).await?; + form = form.part("image", image_part); + } + ImageEditInput::Images(images) => { + for image in images { + let image_part = create_file_part(image.source).await?; + form = form.part("image[]", image_part); + } + } } if let Some(mask) = request.mask { @@ -1006,28 +1172,58 @@ impl AsyncTryFrom for reqwest::multipart::Form { form = form.part("mask", mask_part); } + if let Some(background) = request.background { + form = form.text("background", background.to_string()) + } + if let Some(model) = request.model { form = form.text("model", model.to_string()) } - if request.n.is_some() { - form = form.text("n", request.n.unwrap().to_string()) + if let Some(n) = request.n { + form = form.text("n", n.to_string()) } - if request.size.is_some() { - form = form.text("size", request.size.unwrap().to_string()) + if let Some(size) = request.size { + form = form.text("size", size.to_string()) } - if request.response_format.is_some() { - form = form.text( - "response_format", - request.response_format.unwrap().to_string(), - ) + if let Some(response_format) = request.response_format { + form = form.text("response_format", response_format.to_string()) } - if request.user.is_some() { - form = form.text("user", request.user.unwrap()) + if let Some(output_format) = request.output_format { + form = form.text("output_format", output_format.to_string()) } + + if let Some(output_compression) = request.output_compression { + form = form.text("output_compression", output_compression.to_string()) + } + + if let Some(output_compression) = request.output_compression { + form = form.text("output_compression", output_compression.to_string()) + } + + if let Some(user) = request.user { + form = form.text("user", user) + } + + if let Some(input_fidelity) = request.input_fidelity { + form = form.text("input_fidelity", input_fidelity.to_string()) + } + + if let Some(stream) = request.stream { + form = form.text("stream", stream.to_string()) + } + + if let Some(partial_images) = request.partial_images { + form = form.text("partial_images", partial_images.to_string()) + } + + if let Some(quality) = request.quality { + form = form.text("quality", quality.to_string()) + } + Ok(form) } } diff --git a/async-openai/src/types/mod.rs b/async-openai/src/types/mod.rs index 42eb56bc..cd86898d 100644 --- a/async-openai/src/types/mod.rs +++ b/async-openai/src/types/mod.rs @@ -13,7 +13,7 @@ mod containers; mod embedding; mod file; mod fine_tuning; -mod image; +pub mod images; mod invites; mod logprob; mod mcp; @@ -50,7 +50,6 @@ pub use containers::*; pub use embedding::*; pub use file::*; pub use fine_tuning::*; -pub use image::*; pub use invites::*; pub use logprob::*; pub use mcp::*; diff --git a/async-openai/src/types/model.rs b/async-openai/src/types/model.rs index 034213a6..86651eb2 100644 --- a/async-openai/src/types/model.rs +++ b/async-openai/src/types/model.rs @@ -1,27 +1,27 @@ -use serde::{Deserialize, Serialize}; - -/// Describes an OpenAI model offering that can be used with the API. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct Model { - /// The model identifier, which can be referenced in the API endpoints. - pub id: String, - /// The object type, which is always "model". - pub object: String, - /// The Unix timestamp (in seconds) when the model was created. - pub created: u32, - /// The organization that owns the model. - pub owned_by: String, -} - -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] -pub struct ListModelResponse { - pub object: String, - pub data: Vec, -} - -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] -pub struct DeleteModelResponse { - pub id: String, - pub object: String, - pub deleted: bool, -} +use serde::{Deserialize, Serialize}; + +/// Describes an OpenAI model offering that can be used with the API. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct Model { + /// The model identifier, which can be referenced in the API endpoints. + pub id: String, + /// The object type, which is always "model". + pub object: String, + /// The Unix timestamp (in seconds) when the model was created. + pub created: u32, + /// The organization that owns the model. + pub owned_by: String, +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +pub struct ListModelResponse { + pub object: String, + pub data: Vec, +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +pub struct DeleteModelResponse { + pub id: String, + pub object: String, + pub deleted: bool, +} diff --git a/async-openai/src/types/moderation.rs b/async-openai/src/types/moderation.rs index f8c1c0ff..979bd904 100644 --- a/async-openai/src/types/moderation.rs +++ b/async-openai/src/types/moderation.rs @@ -1,227 +1,227 @@ -use derive_builder::Builder; -use serde::{Deserialize, Serialize}; - -use crate::error::OpenAIError; - -#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)] -#[serde(untagged)] -pub enum ModerationInput { - /// A single string of text to classify for moderation - String(String), - - /// An array of strings to classify for moderation - StringArray(Vec), - - /// An array of multi-modal inputs to the moderation model - MultiModal(Vec), -} - -/// Content part for multi-modal moderation input -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(tag = "type")] -pub enum ModerationContentPart { - /// An object describing text to classify - #[serde(rename = "text")] - Text { - /// A string of text to classify - text: String, - }, - - /// An object describing an image to classify - #[serde(rename = "image_url")] - ImageUrl { - /// Contains either an image URL or a data URL for a base64 encoded image - image_url: ModerationImageUrl, - }, -} - -/// Image URL configuration for image moderation -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ModerationImageUrl { - /// Either a URL of the image or the base64 encoded image data - pub url: String, -} - -#[derive(Debug, Default, Clone, Serialize, Builder, PartialEq, Deserialize)] -#[builder(name = "CreateModerationRequestArgs")] -#[builder(pattern = "mutable")] -#[builder(setter(into, strip_option), default)] -#[builder(derive(Debug))] -#[builder(build_fn(error = "OpenAIError"))] -pub struct CreateModerationRequest { - /// Input (or inputs) to classify. Can be a single string, an array of strings, or - /// an array of multi-modal input objects similar to other models. - pub input: ModerationInput, - - /// The content moderation model you would like to use. Learn more in the - /// [moderation guide](https://platform.openai.com/docs/guides/moderation), and learn about - /// available models [here](https://platform.openai.com/docs/models/moderation). - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct Category { - /// Content that expresses, incites, or promotes hate based on race, gender, - /// ethnicity, religion, nationality, sexual orientation, disability status, or - /// caste. Hateful content aimed at non-protected groups (e.g., chess players) - /// is harrassment. - pub hate: bool, - #[serde(rename = "hate/threatening")] - /// Hateful content that also includes violence or serious harm towards the - /// targeted group based on race, gender, ethnicity, religion, nationality, - /// sexual orientation, disability status, or caste. - pub hate_threatening: bool, - /// Content that expresses, incites, or promotes harassing language towards any target. - pub harassment: bool, - /// Harassment content that also includes violence or serious harm towards any target. - #[serde(rename = "harassment/threatening")] - pub harassment_threatening: bool, - /// Content that includes instructions or advice that facilitate the planning or execution of wrongdoing, or that gives advice or instruction on how to commit illicit acts. For example, "how to shoplift" would fit this category. - pub illicit: bool, - /// Content that includes instructions or advice that facilitate the planning or execution of wrongdoing that also includes violence, or that gives advice or instruction on the procurement of any weapon. - #[serde(rename = "illicit/violent")] - pub illicit_violent: bool, - /// Content that promotes, encourages, or depicts acts of self-harm, such as suicide, cutting, and eating disorders. - #[serde(rename = "self-harm")] - pub self_harm: bool, - /// Content where the speaker expresses that they are engaging or intend to engage in acts of self-harm, such as suicide, cutting, and eating disorders. - #[serde(rename = "self-harm/intent")] - pub self_harm_intent: bool, - /// Content that encourages performing acts of self-harm, such as suicide, cutting, and eating disorders, or that gives instructions or advice on how to commit such acts. - #[serde(rename = "self-harm/instructions")] - pub self_harm_instructions: bool, - /// Content meant to arouse sexual excitement, such as the description of sexual activity, or that promotes sexual services (excluding sex education and wellness). - pub sexual: bool, - /// Sexual content that includes an individual who is under 18 years old. - #[serde(rename = "sexual/minors")] - pub sexual_minors: bool, - /// Content that depicts death, violence, or physical injury. - pub violence: bool, - /// Content that depicts death, violence, or physical injury in graphic detail. - #[serde(rename = "violence/graphic")] - pub violence_graphic: bool, -} - -/// A list of the categories along with their scores as predicted by model. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct CategoryScore { - /// The score for the category 'hate'. - pub hate: f32, - /// The score for the category 'hate/threatening'. - #[serde(rename = "hate/threatening")] - pub hate_threatening: f32, - /// The score for the category 'harassment'. - pub harassment: f32, - /// The score for the category 'harassment/threatening'. - #[serde(rename = "harassment/threatening")] - pub harassment_threatening: f32, - /// The score for the category 'illicit'. - pub illicit: f32, - /// The score for the category 'illicit/violent'. - #[serde(rename = "illicit/violent")] - pub illicit_violent: f32, - /// The score for the category 'self-harm'. - #[serde(rename = "self-harm")] - pub self_harm: f32, - /// The score for the category 'self-harm/intent'. - #[serde(rename = "self-harm/intent")] - pub self_harm_intent: f32, - /// The score for the category 'self-harm/instructions'. - #[serde(rename = "self-harm/instructions")] - pub self_harm_instructions: f32, - /// The score for the category 'sexual'. - pub sexual: f32, - /// The score for the category 'sexual/minors'. - #[serde(rename = "sexual/minors")] - pub sexual_minors: f32, - /// The score for the category 'violence'. - pub violence: f32, - /// The score for the category 'violence/graphic'. - #[serde(rename = "violence/graphic")] - pub violence_graphic: f32, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct ContentModerationResult { - /// Whether any of the below categories are flagged. - pub flagged: bool, - /// A list of the categories, and whether they are flagged or not. - pub categories: Category, - /// A list of the categories along with their scores as predicted by model. - pub category_scores: CategoryScore, - /// A list of the categories along with the input type(s) that the score applies to. - pub category_applied_input_types: CategoryAppliedInputTypes, -} - -/// Represents if a given text input is potentially harmful. -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] -pub struct CreateModerationResponse { - /// The unique identifier for the moderation request. - pub id: String, - /// The model used to generate the moderation results. - pub model: String, - /// A list of moderation objects. - pub results: Vec, -} - -/// A list of the categories along with the input type(s) that the score applies to. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct CategoryAppliedInputTypes { - /// The applied input type(s) for the category 'hate'. - pub hate: Vec, - - /// The applied input type(s) for the category 'hate/threatening'. - #[serde(rename = "hate/threatening")] - pub hate_threatening: Vec, - - /// The applied input type(s) for the category 'harassment'. - pub harassment: Vec, - - /// The applied input type(s) for the category 'harassment/threatening'. - #[serde(rename = "harassment/threatening")] - pub harassment_threatening: Vec, - - /// The applied input type(s) for the category 'illicit'. - pub illicit: Vec, - - /// The applied input type(s) for the category 'illicit/violent'. - #[serde(rename = "illicit/violent")] - pub illicit_violent: Vec, - - /// The applied input type(s) for the category 'self-harm'. - #[serde(rename = "self-harm")] - pub self_harm: Vec, - - /// The applied input type(s) for the category 'self-harm/intent'. - #[serde(rename = "self-harm/intent")] - pub self_harm_intent: Vec, - - /// The applied input type(s) for the category 'self-harm/instructions'. - #[serde(rename = "self-harm/instructions")] - pub self_harm_instructions: Vec, - - /// The applied input type(s) for the category 'sexual'. - pub sexual: Vec, - - /// The applied input type(s) for the category 'sexual/minors'. - #[serde(rename = "sexual/minors")] - pub sexual_minors: Vec, - - /// The applied input type(s) for the category 'violence'. - pub violence: Vec, - - /// The applied input type(s) for the category 'violence/graphic'. - #[serde(rename = "violence/graphic")] - pub violence_graphic: Vec, -} - -/// The type of input that was moderated -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ModInputType { - /// Text content that was moderated - Text, - /// Image content that was moderated - Image, -} +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use crate::error::OpenAIError; + +#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)] +#[serde(untagged)] +pub enum ModerationInput { + /// A single string of text to classify for moderation + String(String), + + /// An array of strings to classify for moderation + StringArray(Vec), + + /// An array of multi-modal inputs to the moderation model + MultiModal(Vec), +} + +/// Content part for multi-modal moderation input +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(tag = "type")] +pub enum ModerationContentPart { + /// An object describing text to classify + #[serde(rename = "text")] + Text { + /// A string of text to classify + text: String, + }, + + /// An object describing an image to classify + #[serde(rename = "image_url")] + ImageUrl { + /// Contains either an image URL or a data URL for a base64 encoded image + image_url: ModerationImageUrl, + }, +} + +/// Image URL configuration for image moderation +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ModerationImageUrl { + /// Either a URL of the image or the base64 encoded image data + pub url: String, +} + +#[derive(Debug, Default, Clone, Serialize, Builder, PartialEq, Deserialize)] +#[builder(name = "CreateModerationRequestArgs")] +#[builder(pattern = "mutable")] +#[builder(setter(into, strip_option), default)] +#[builder(derive(Debug))] +#[builder(build_fn(error = "OpenAIError"))] +pub struct CreateModerationRequest { + /// Input (or inputs) to classify. Can be a single string, an array of strings, or + /// an array of multi-modal input objects similar to other models. + pub input: ModerationInput, + + /// The content moderation model you would like to use. Learn more in the + /// [moderation guide](https://platform.openai.com/docs/guides/moderation), and learn about + /// available models [here](https://platform.openai.com/docs/models/moderation). + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct Category { + /// Content that expresses, incites, or promotes hate based on race, gender, + /// ethnicity, religion, nationality, sexual orientation, disability status, or + /// caste. Hateful content aimed at non-protected groups (e.g., chess players) + /// is harrassment. + pub hate: bool, + #[serde(rename = "hate/threatening")] + /// Hateful content that also includes violence or serious harm towards the + /// targeted group based on race, gender, ethnicity, religion, nationality, + /// sexual orientation, disability status, or caste. + pub hate_threatening: bool, + /// Content that expresses, incites, or promotes harassing language towards any target. + pub harassment: bool, + /// Harassment content that also includes violence or serious harm towards any target. + #[serde(rename = "harassment/threatening")] + pub harassment_threatening: bool, + /// Content that includes instructions or advice that facilitate the planning or execution of wrongdoing, or that gives advice or instruction on how to commit illicit acts. For example, "how to shoplift" would fit this category. + pub illicit: bool, + /// Content that includes instructions or advice that facilitate the planning or execution of wrongdoing that also includes violence, or that gives advice or instruction on the procurement of any weapon. + #[serde(rename = "illicit/violent")] + pub illicit_violent: bool, + /// Content that promotes, encourages, or depicts acts of self-harm, such as suicide, cutting, and eating disorders. + #[serde(rename = "self-harm")] + pub self_harm: bool, + /// Content where the speaker expresses that they are engaging or intend to engage in acts of self-harm, such as suicide, cutting, and eating disorders. + #[serde(rename = "self-harm/intent")] + pub self_harm_intent: bool, + /// Content that encourages performing acts of self-harm, such as suicide, cutting, and eating disorders, or that gives instructions or advice on how to commit such acts. + #[serde(rename = "self-harm/instructions")] + pub self_harm_instructions: bool, + /// Content meant to arouse sexual excitement, such as the description of sexual activity, or that promotes sexual services (excluding sex education and wellness). + pub sexual: bool, + /// Sexual content that includes an individual who is under 18 years old. + #[serde(rename = "sexual/minors")] + pub sexual_minors: bool, + /// Content that depicts death, violence, or physical injury. + pub violence: bool, + /// Content that depicts death, violence, or physical injury in graphic detail. + #[serde(rename = "violence/graphic")] + pub violence_graphic: bool, +} + +/// A list of the categories along with their scores as predicted by model. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct CategoryScore { + /// The score for the category 'hate'. + pub hate: f32, + /// The score for the category 'hate/threatening'. + #[serde(rename = "hate/threatening")] + pub hate_threatening: f32, + /// The score for the category 'harassment'. + pub harassment: f32, + /// The score for the category 'harassment/threatening'. + #[serde(rename = "harassment/threatening")] + pub harassment_threatening: f32, + /// The score for the category 'illicit'. + pub illicit: f32, + /// The score for the category 'illicit/violent'. + #[serde(rename = "illicit/violent")] + pub illicit_violent: f32, + /// The score for the category 'self-harm'. + #[serde(rename = "self-harm")] + pub self_harm: f32, + /// The score for the category 'self-harm/intent'. + #[serde(rename = "self-harm/intent")] + pub self_harm_intent: f32, + /// The score for the category 'self-harm/instructions'. + #[serde(rename = "self-harm/instructions")] + pub self_harm_instructions: f32, + /// The score for the category 'sexual'. + pub sexual: f32, + /// The score for the category 'sexual/minors'. + #[serde(rename = "sexual/minors")] + pub sexual_minors: f32, + /// The score for the category 'violence'. + pub violence: f32, + /// The score for the category 'violence/graphic'. + #[serde(rename = "violence/graphic")] + pub violence_graphic: f32, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct ContentModerationResult { + /// Whether any of the below categories are flagged. + pub flagged: bool, + /// A list of the categories, and whether they are flagged or not. + pub categories: Category, + /// A list of the categories along with their scores as predicted by model. + pub category_scores: CategoryScore, + /// A list of the categories along with the input type(s) that the score applies to. + pub category_applied_input_types: CategoryAppliedInputTypes, +} + +/// Represents if a given text input is potentially harmful. +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct CreateModerationResponse { + /// The unique identifier for the moderation request. + pub id: String, + /// The model used to generate the moderation results. + pub model: String, + /// A list of moderation objects. + pub results: Vec, +} + +/// A list of the categories along with the input type(s) that the score applies to. +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct CategoryAppliedInputTypes { + /// The applied input type(s) for the category 'hate'. + pub hate: Vec, + + /// The applied input type(s) for the category 'hate/threatening'. + #[serde(rename = "hate/threatening")] + pub hate_threatening: Vec, + + /// The applied input type(s) for the category 'harassment'. + pub harassment: Vec, + + /// The applied input type(s) for the category 'harassment/threatening'. + #[serde(rename = "harassment/threatening")] + pub harassment_threatening: Vec, + + /// The applied input type(s) for the category 'illicit'. + pub illicit: Vec, + + /// The applied input type(s) for the category 'illicit/violent'. + #[serde(rename = "illicit/violent")] + pub illicit_violent: Vec, + + /// The applied input type(s) for the category 'self-harm'. + #[serde(rename = "self-harm")] + pub self_harm: Vec, + + /// The applied input type(s) for the category 'self-harm/intent'. + #[serde(rename = "self-harm/intent")] + pub self_harm_intent: Vec, + + /// The applied input type(s) for the category 'self-harm/instructions'. + #[serde(rename = "self-harm/instructions")] + pub self_harm_instructions: Vec, + + /// The applied input type(s) for the category 'sexual'. + pub sexual: Vec, + + /// The applied input type(s) for the category 'sexual/minors'. + #[serde(rename = "sexual/minors")] + pub sexual_minors: Vec, + + /// The applied input type(s) for the category 'violence'. + pub violence: Vec, + + /// The applied input type(s) for the category 'violence/graphic'. + #[serde(rename = "violence/graphic")] + pub violence_graphic: Vec, +} + +/// The type of input that was moderated +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ModInputType { + /// Text content that was moderated + Text, + /// Image content that was moderated + Image, +} diff --git a/async-openai/src/types/responses/mod.rs b/async-openai/src/types/responses/mod.rs index 546ba303..51f2d733 100644 --- a/async-openai/src/types/responses/mod.rs +++ b/async-openai/src/types/responses/mod.rs @@ -1,7 +1,7 @@ mod conversation; mod response; -mod response_stream; +mod stream; pub use conversation::*; pub use response::*; -pub use response_stream::*; +pub use stream::*; diff --git a/async-openai/src/types/responses/response_stream.rs b/async-openai/src/types/responses/stream.rs similarity index 100% rename from async-openai/src/types/responses/response_stream.rs rename to async-openai/src/types/responses/stream.rs diff --git a/async-openai/src/types/video.rs b/async-openai/src/types/video.rs index 787255e8..ce227995 100644 --- a/async-openai/src/types/video.rs +++ b/async-openai/src/types/video.rs @@ -1,7 +1,7 @@ use derive_builder::Builder; use serde::{Deserialize, Serialize}; -use crate::{error::OpenAIError, types::ImageInput}; +use crate::{error::OpenAIError, types::images::ImageInput}; #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] pub enum VideoSize { diff --git a/examples/create-image-variation/src/main.rs b/examples/create-image-variation/src/main.rs index e0244fec..97db3456 100644 --- a/examples/create-image-variation/src/main.rs +++ b/examples/create-image-variation/src/main.rs @@ -1,5 +1,5 @@ use async_openai::{ - types::{CreateImageVariationRequestArgs, DallE2ImageSize, ImageResponseFormat}, + types::images::{CreateImageVariationRequestArgs, DallE2ImageSize, ImageResponseFormat}, Client, }; use std::error::Error; diff --git a/examples/gemini-openai-compatibility/src/gemini_types.rs b/examples/gemini-openai-compatibility/src/gemini_types.rs index 6a245432..51ada220 100644 --- a/examples/gemini-openai-compatibility/src/gemini_types.rs +++ b/examples/gemini-openai-compatibility/src/gemini_types.rs @@ -3,7 +3,7 @@ use std::pin::Pin; /// Gemini types (Generally user defined types) for Gemini API use async_openai::{ error::OpenAIError, - types::{ChatChoice, ChatChoiceStream, CompletionUsage, Image}, + types::{images::Image, ChatChoice, ChatChoiceStream, CompletionUsage}, }; use futures::Stream; use serde::{Deserialize, Serialize}; diff --git a/examples/gemini-openai-compatibility/src/main.rs b/examples/gemini-openai-compatibility/src/main.rs index 1bac59fa..abcf4072 100644 --- a/examples/gemini-openai-compatibility/src/main.rs +++ b/examples/gemini-openai-compatibility/src/main.rs @@ -1,10 +1,10 @@ use async_openai::{ config::OpenAIConfig, types::{ + images::{CreateImageRequestArgs, Image, ImageModel, ImageResponseFormat}, ChatCompletionRequestMessage, ChatCompletionRequestUserMessage, ChatCompletionRequestUserMessageContentPart, CreateChatCompletionRequestArgs, - CreateEmbeddingRequestArgs, CreateImageRequestArgs, Image, ImageModel, ImageResponseFormat, - InputAudio, ResponseFormat, ResponseFormatJsonSchema, + CreateEmbeddingRequestArgs, InputAudio, ResponseFormat, ResponseFormatJsonSchema, }, Client, }; @@ -202,7 +202,7 @@ async fn generate_image(prompt: &str) -> Result<(), Box> { .response_format(ImageResponseFormat::B64Json) .build()?; - let response: GeminiImagesResponse = client.images().create_byot(request).await?; + let response: GeminiImagesResponse = client.images().generate_byot(request).await?; let images = response.data; diff --git a/examples/image-edit-stream/Cargo.toml b/examples/image-edit-stream/Cargo.toml new file mode 100644 index 00000000..14b9d759 --- /dev/null +++ b/examples/image-edit-stream/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "image-edit-stream" +version = "0.1.0" +edition = "2021" +publish = false + + +[dependencies] +async-openai = {path = "../../async-openai"} +tokio = { version = "1.43.0", features = ["full"] } +futures = "0.3.31" +base64 = "0.22.1" diff --git a/examples/create-image-edit/images/mask.png b/examples/image-edit-stream/images/mask.png similarity index 100% rename from examples/create-image-edit/images/mask.png rename to examples/image-edit-stream/images/mask.png diff --git a/examples/create-image-edit/images/sunlit_lounge.png b/examples/image-edit-stream/images/sunlit_lounge.png similarity index 100% rename from examples/create-image-edit/images/sunlit_lounge.png rename to examples/image-edit-stream/images/sunlit_lounge.png diff --git a/examples/image-edit-stream/src/main.rs b/examples/image-edit-stream/src/main.rs new file mode 100644 index 00000000..ad75fe22 --- /dev/null +++ b/examples/image-edit-stream/src/main.rs @@ -0,0 +1,88 @@ +use async_openai::{ + traits::EventType, + types::images::{ + CreateImageEditRequestArgs, ImageEditStreamEvent, ImageModel, ImageOutputFormat, ImageSize, + }, + Client, +}; +use base64::{engine::general_purpose, Engine as _}; +use futures::StreamExt; +use std::error::Error; +use std::fs; +use std::path::Path; + +fn save_image(event: ImageEditStreamEvent) -> Result<(), Box> { + // Create data directory if it doesn't exist + let data_dir = Path::new("./data"); + if !data_dir.exists() { + fs::create_dir_all(data_dir)?; + } + + // Extract b64_json and output_format from the event + let (b64_json, output_format, created_at, filename_suffix) = match event { + ImageEditStreamEvent::PartialImage(event) => ( + event.b64_json, + event.output_format, + event.created_at, + format!("partial_{}", event.partial_image_index), + ), + ImageEditStreamEvent::Completed(event) => ( + event.b64_json, + event.output_format, + event.created_at, + "completed".to_string(), + ), + }; + + // Determine file extension from output_format + let extension = match output_format { + ImageOutputFormat::Png => "png", + ImageOutputFormat::Jpeg => "jpeg", + ImageOutputFormat::Webp => "webp", + }; + + // Create unique filename + let filename = format!("image_{}_{}.{}", created_at, filename_suffix, extension); + let filepath = data_dir.join(&filename); + + // Decode base64 + let image_data = general_purpose::STANDARD.decode(b64_json)?; + + // Write to file (create or overwrite) + fs::write(&filepath, image_data)?; + + println!("Saved image to: {}", filepath.display()); + + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let client = Client::new(); + + let request = CreateImageEditRequestArgs::default() + .image("./images/sunlit_lounge.png") + .mask("./images/mask.png") + .prompt("A sunlit indoor lounge area with a duck in the pool") + .partial_images(2) + .n(1) + .size(ImageSize::S1024x1024) + .model(ImageModel::GptImage1) + .stream(true) + .user("async-openai") + .build()?; + + let mut stream = client.images().edit_stream(request).await?; + + while let Some(event) = stream.next().await { + match event { + Ok(event) => { + println!("Saving image from event: {:?}", event.event_type()); + save_image(event)?; + } + Err(e) => eprintln!("Error: {:?}", e), + } + } + + Ok(()) +} diff --git a/examples/create-image/Cargo.toml b/examples/image-edit/Cargo.toml similarity index 88% rename from examples/create-image/Cargo.toml rename to examples/image-edit/Cargo.toml index 4c48adac..24eb2f63 100644 --- a/examples/create-image/Cargo.toml +++ b/examples/image-edit/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "create-image" +name = "image-edit" version = "0.1.0" edition = "2021" publish = false diff --git a/examples/create-image-edit/README.md b/examples/image-edit/README.md similarity index 100% rename from examples/create-image-edit/README.md rename to examples/image-edit/README.md diff --git a/examples/image-edit/images/mask.png b/examples/image-edit/images/mask.png new file mode 100644 index 00000000..4807969d Binary files /dev/null and b/examples/image-edit/images/mask.png differ diff --git a/examples/image-edit/images/sunlit_lounge.png b/examples/image-edit/images/sunlit_lounge.png new file mode 100644 index 00000000..24e5cb0d Binary files /dev/null and b/examples/image-edit/images/sunlit_lounge.png differ diff --git a/examples/create-image-edit/src/main.rs b/examples/image-edit/src/main.rs similarity index 77% rename from examples/create-image-edit/src/main.rs rename to examples/image-edit/src/main.rs index 20e96735..31d6c25a 100644 --- a/examples/create-image-edit/src/main.rs +++ b/examples/image-edit/src/main.rs @@ -1,9 +1,8 @@ use async_openai::{ - types::{CreateImageEditRequestArgs, DallE2ImageSize, ImageResponseFormat}, + types::images::{CreateImageEditRequestArgs, ImageResponseFormat, ImageSize}, Client, }; use std::error::Error; - #[tokio::main] async fn main() -> Result<(), Box> { let client = Client::new(); @@ -13,12 +12,12 @@ async fn main() -> Result<(), Box> { .mask("./images/mask.png") .prompt("A sunlit indoor lounge area with a duck in the pool") .n(1) - .size(DallE2ImageSize::S1024x1024) + .size(ImageSize::S1024x1024) .response_format(ImageResponseFormat::Url) .user("async-openai") .build()?; - let response = client.images().create_edit(request).await?; + let response = client.images().edit(request).await?; let paths = response.save("./data").await?; diff --git a/examples/image-gen-stream/Cargo.toml b/examples/image-gen-stream/Cargo.toml new file mode 100644 index 00000000..907e9882 --- /dev/null +++ b/examples/image-gen-stream/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "image-gen-stream" +version = "0.1.0" +edition = "2021" +publish = false + + +[dependencies] +async-openai = {path = "../../async-openai"} +tokio = { version = "1.43.0", features = ["full"] } +futures = "0.3.31" +base64 = "0.22.1" diff --git a/examples/image-gen-stream/src/main.rs b/examples/image-gen-stream/src/main.rs new file mode 100644 index 00000000..4b746abd --- /dev/null +++ b/examples/image-gen-stream/src/main.rs @@ -0,0 +1,85 @@ +use async_openai::{ + traits::EventType, + types::images::{ + CreateImageRequestArgs, ImageGenStreamEvent, ImageModel, ImageOutputFormat, ImageSize, + }, + Client, +}; +use base64::{engine::general_purpose, Engine as _}; +use futures::StreamExt; +use std::error::Error; +use std::fs; +use std::path::Path; + +fn save_image(event: ImageGenStreamEvent) -> Result<(), Box> { + // Create data directory if it doesn't exist + let data_dir = Path::new("./data"); + if !data_dir.exists() { + fs::create_dir_all(data_dir)?; + } + + // Extract b64_json and output_format from the event + let (b64_json, output_format, created_at, filename_suffix) = match event { + ImageGenStreamEvent::PartialImage(event) => ( + event.b64_json, + event.output_format, + event.created_at, + format!("partial_{}", event.partial_image_index), + ), + ImageGenStreamEvent::Completed(event) => ( + event.b64_json, + event.output_format, + event.created_at, + "completed".to_string(), + ), + }; + + // Determine file extension from output_format + let extension = match output_format { + ImageOutputFormat::Png => "png", + ImageOutputFormat::Jpeg => "jpeg", + ImageOutputFormat::Webp => "webp", + }; + + // Create unique filename + let filename = format!("image_{}_{}.{}", created_at, filename_suffix, extension); + let filepath = data_dir.join(&filename); + + // Decode base64 + let image_data = general_purpose::STANDARD.decode(b64_json)?; + + // Write to file (create or overwrite) + fs::write(&filepath, image_data)?; + + println!("Saved image to: {}", filepath.display()); + + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + // create client, reads OPENAI_API_KEY environment variable for API key. + let client = Client::new(); + + let request = CreateImageRequestArgs::default() + .model(ImageModel::GptImage1) + .prompt("humans dancing a victory dance") + .size(ImageSize::S1024x1024) + .partial_images(2) + .stream(true) + .build()?; + + let mut stream = client.images().generate_stream(request).await?; + + while let Some(event) = stream.next().await { + match event { + Ok(event) => { + println!("Saving image from event: {:?}", event.event_type()); + save_image(event)?; + } + Err(e) => eprintln!("Error: {:?}", e), + } + } + + Ok(()) +} diff --git a/examples/create-image-b64-json/Cargo.toml b/examples/image-generate-b64-json/Cargo.toml similarity index 84% rename from examples/create-image-b64-json/Cargo.toml rename to examples/image-generate-b64-json/Cargo.toml index 4d5e1c94..87a2b045 100644 --- a/examples/create-image-b64-json/Cargo.toml +++ b/examples/image-generate-b64-json/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "create-image-b64-json" +name = "image-generate-b64-json" version = "0.1.0" edition = "2021" publish = false diff --git a/examples/create-image-b64-json/README.md b/examples/image-generate-b64-json/README.md similarity index 100% rename from examples/create-image-b64-json/README.md rename to examples/image-generate-b64-json/README.md diff --git a/examples/create-image-b64-json/src/main.rs b/examples/image-generate-b64-json/src/main.rs similarity index 86% rename from examples/create-image-b64-json/src/main.rs rename to examples/image-generate-b64-json/src/main.rs index 3cae2396..88d7fc4c 100644 --- a/examples/create-image-b64-json/src/main.rs +++ b/examples/image-generate-b64-json/src/main.rs @@ -1,5 +1,5 @@ use async_openai::{ - types::{CreateImageRequestArgs, ImageResponseFormat, ImageSize}, + types::images::{CreateImageRequestArgs, ImageResponseFormat, ImageSize}, Client, }; use std::error::Error; @@ -17,7 +17,7 @@ async fn main() -> Result<(), Box> { .user("async-openai") .build()?; - let response = client.images().create(request).await?; + let response = client.images().generate(request).await?; // Response already contains image data in base64 format. // Save each image to ./data directory in dedicated Tokio task. diff --git a/examples/create-image-edit/Cargo.toml b/examples/image-generate/Cargo.toml similarity index 86% rename from examples/create-image-edit/Cargo.toml rename to examples/image-generate/Cargo.toml index a18f6dd1..71b5cbe0 100644 --- a/examples/create-image-edit/Cargo.toml +++ b/examples/image-generate/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "create-image-edit" +name = "image-generate" version = "0.1.0" edition = "2021" publish = false diff --git a/examples/create-image/README.md b/examples/image-generate/README.md similarity index 100% rename from examples/create-image/README.md rename to examples/image-generate/README.md diff --git a/examples/create-image/src/main.rs b/examples/image-generate/src/main.rs similarity index 85% rename from examples/create-image/src/main.rs rename to examples/image-generate/src/main.rs index 5de3467a..4bcd5a17 100644 --- a/examples/create-image/src/main.rs +++ b/examples/image-generate/src/main.rs @@ -1,5 +1,5 @@ use async_openai::{ - types::{CreateImageRequestArgs, ImageResponseFormat, ImageSize}, + types::images::{CreateImageRequestArgs, ImageResponseFormat, ImageSize}, Client, }; use std::error::Error; @@ -17,7 +17,7 @@ async fn main() -> Result<(), Box> { .user("async-openai") .build()?; - let response = client.images().create(request).await?; + let response = client.images().generate(request).await?; // Download and save images to ./data directory. // Each url is downloaded and saved in dedicated Tokio task.