diff --git a/.gitignore b/.gitignore index d6662d5..4e96986 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,7 @@ typings # Temporary files tmp/ temp/ +temp-openapi.yaml .prism.log codegen.log diff --git a/.openapi-generator/FILES b/.openapi-generator/FILES index 14a9bfc..1080058 100644 --- a/.openapi-generator/FILES +++ b/.openapi-generator/FILES @@ -1,10 +1,10 @@ .gitignore .npmignore -.openapi-generator-ignore api.ts base.ts common.ts configuration.ts +docs/BaseTranscriptionConfiguration.md docs/CreateReplacementRuleset201Response.md docs/CreateReplacementRulesetRequest.md docs/ErrorResponse.md @@ -22,6 +22,7 @@ docs/OpenaiCompatibleCreateTranscription200Response.md docs/OpenaiCompatibleCreateTranslation200Response.md docs/RegexGroupRule.md docs/RegexRule.md +docs/RemoteTranscriptionConfiguration.md docs/ReplacementRule.md docs/ReplacementRulesApi.md docs/SpeechToTextApi.md @@ -31,7 +32,6 @@ docs/TranscriptOutputFormat.md docs/TranscriptionDetailed.md docs/TranscriptionModelIdentifier.md docs/TranscriptionOnlyText.md -docs/TranscriptionOptions.md docs/TranscriptionProvider.md docs/TranscriptionResponse.md docs/TranscriptionSegment.md diff --git a/api.ts b/api.ts index b44202e..8d13ef0 100644 --- a/api.ts +++ b/api.ts @@ -4,7 +4,7 @@ * Speechall API * The Speechall REST API provides powerful and flexible speech-to-text capabilities. It allows you to transcribe audio files using various underlying STT providers and models, optionally apply custom text replacement rules, and access results in multiple formats. The API includes standard endpoints for transcription and endpoints compatible with the OpenAI API structure. * - * The version of the OpenAPI document: 0.0.1 + * The version of the OpenAPI document: 0.1.0 * * * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). @@ -23,6 +23,93 @@ import type { RequestArgs } from './base'; // @ts-ignore import { BASE_PATH, COLLECTION_FORMATS, BaseAPI, RequiredError, operationServerMap } from './base'; +/** + * Common configuration options for transcription, applicable to both direct uploads and remote URLs. + * @export + * @interface BaseTranscriptionConfiguration + */ +export interface BaseTranscriptionConfiguration { + /** + * + * @type {TranscriptionModelIdentifier} + * @memberof BaseTranscriptionConfiguration + */ + 'model': TranscriptionModelIdentifier; + /** + * + * @type {TranscriptLanguageCode} + * @memberof BaseTranscriptionConfiguration + */ + 'language'?: TranscriptLanguageCode; + /** + * + * @type {TranscriptOutputFormat} + * @memberof BaseTranscriptionConfiguration + */ + 'output_format'?: TranscriptOutputFormat; + /** + * The unique identifier (UUID) of a pre-defined replacement ruleset to apply to the final transcription text. + * @type {string} + * @memberof BaseTranscriptionConfiguration + */ + 'ruleset_id'?: string; + /** + * Whether to add punctuation. Support varies by model (e.g., Deepgram, AssemblyAI). Defaults to `true`. + * @type {boolean} + * @memberof BaseTranscriptionConfiguration + */ + 'punctuation'?: boolean; + /** + * Level of timestamp detail (`word` or `segment`). Defaults to `segment`. + * @type {string} + * @memberof BaseTranscriptionConfiguration + */ + 'timestamp_granularity'?: BaseTranscriptionConfigurationTimestampGranularityEnum; + /** + * Enable speaker diarization. Defaults to `false`. + * @type {boolean} + * @memberof BaseTranscriptionConfiguration + */ + 'diarization'?: boolean; + /** + * Optional text prompt to guide the transcription model. Support varies (e.g., OpenAI). + * @type {string} + * @memberof BaseTranscriptionConfiguration + */ + 'initial_prompt'?: string; + /** + * Controls output randomness for supported models (e.g., OpenAI). Value between 0 and 1. + * @type {number} + * @memberof BaseTranscriptionConfiguration + */ + 'temperature'?: number; + /** + * Enable provider-specific smart formatting (e.g., Deepgram). Defaults vary. + * @type {boolean} + * @memberof BaseTranscriptionConfiguration + */ + 'smart_format'?: boolean; + /** + * Hint for the number of expected speakers for diarization (e.g., RevAI, Deepgram). + * @type {number} + * @memberof BaseTranscriptionConfiguration + */ + 'speakers_expected'?: number; + /** + * List of custom words/phrases to improve recognition (e.g., Deepgram, AssemblyAI). + * @type {Array} + * @memberof BaseTranscriptionConfiguration + */ + 'custom_vocabulary'?: Array; +} + +export const BaseTranscriptionConfigurationTimestampGranularityEnum = { + Word: 'word', + Segment: 'segment' +} as const; + +export type BaseTranscriptionConfigurationTimestampGranularityEnum = typeof BaseTranscriptionConfigurationTimestampGranularityEnum[keyof typeof BaseTranscriptionConfigurationTimestampGranularityEnum]; + /** * * @export @@ -424,6 +511,105 @@ export const RegexRuleFlagsEnum = { export type RegexRuleFlagsEnum = typeof RegexRuleFlagsEnum[keyof typeof RegexRuleFlagsEnum]; +/** + * Configuration options for transcribing audio specified by a remote URL via the `/transcribe-remote` endpoint. + * @export + * @interface RemoteTranscriptionConfiguration + */ +export interface RemoteTranscriptionConfiguration { + /** + * + * @type {TranscriptionModelIdentifier} + * @memberof RemoteTranscriptionConfiguration + */ + 'model': TranscriptionModelIdentifier; + /** + * + * @type {TranscriptLanguageCode} + * @memberof RemoteTranscriptionConfiguration + */ + 'language'?: TranscriptLanguageCode; + /** + * + * @type {TranscriptOutputFormat} + * @memberof RemoteTranscriptionConfiguration + */ + 'output_format'?: TranscriptOutputFormat; + /** + * The unique identifier (UUID) of a pre-defined replacement ruleset to apply to the final transcription text. + * @type {string} + * @memberof RemoteTranscriptionConfiguration + */ + 'ruleset_id'?: string; + /** + * Whether to add punctuation. Support varies by model (e.g., Deepgram, AssemblyAI). Defaults to `true`. + * @type {boolean} + * @memberof RemoteTranscriptionConfiguration + */ + 'punctuation'?: boolean; + /** + * Level of timestamp detail (`word` or `segment`). Defaults to `segment`. + * @type {string} + * @memberof RemoteTranscriptionConfiguration + */ + 'timestamp_granularity'?: RemoteTranscriptionConfigurationTimestampGranularityEnum; + /** + * Enable speaker diarization. Defaults to `false`. + * @type {boolean} + * @memberof RemoteTranscriptionConfiguration + */ + 'diarization'?: boolean; + /** + * Optional text prompt to guide the transcription model. Support varies (e.g., OpenAI). + * @type {string} + * @memberof RemoteTranscriptionConfiguration + */ + 'initial_prompt'?: string; + /** + * Controls output randomness for supported models (e.g., OpenAI). Value between 0 and 1. + * @type {number} + * @memberof RemoteTranscriptionConfiguration + */ + 'temperature'?: number; + /** + * Enable provider-specific smart formatting (e.g., Deepgram). Defaults vary. + * @type {boolean} + * @memberof RemoteTranscriptionConfiguration + */ + 'smart_format'?: boolean; + /** + * Hint for the number of expected speakers for diarization (e.g., RevAI, Deepgram). + * @type {number} + * @memberof RemoteTranscriptionConfiguration + */ + 'speakers_expected'?: number; + /** + * List of custom words/phrases to improve recognition (e.g., Deepgram, AssemblyAI). + * @type {Array} + * @memberof RemoteTranscriptionConfiguration + */ + 'custom_vocabulary'?: Array; + /** + * The publicly accessible URL of the audio file to transcribe. The API server must be able to fetch the audio from this URL. + * @type {string} + * @memberof RemoteTranscriptionConfiguration + */ + 'file_url': string; + /** + * An array of replacement rules to be applied directly to this transcription request, in order. This allows defining rules inline instead of (or in addition to) using a pre-saved `ruleset_id`. + * @type {Array} + * @memberof RemoteTranscriptionConfiguration + */ + 'replacement_ruleset'?: Array; +} + +export const RemoteTranscriptionConfigurationTimestampGranularityEnum = { + Word: 'word', + Segment: 'segment' +} as const; + +export type RemoteTranscriptionConfigurationTimestampGranularityEnum = typeof RemoteTranscriptionConfigurationTimestampGranularityEnum[keyof typeof RemoteTranscriptionConfigurationTimestampGranularityEnum]; + /** * @type ReplacementRule * Defines a single rule for finding and replacing text in a transcription. Use one of the specific rule types (`ExactRule`, `RegexRule`, `RegexGroupRule`). The `kind` property acts as a discriminator. @@ -827,8 +1013,12 @@ export const TranscriptionModelIdentifier = { AmazonTranscribe: 'amazon.transcribe', AssemblyaiBest: 'assemblyai.best', AssemblyaiNano: 'assemblyai.nano', + AssemblyaiSlam1: 'assemblyai.slam-1', + AssemblyaiUniversal: 'assemblyai.universal', AzureStandard: 'azure.standard', CloudflareWhisper: 'cloudflare.whisper', + CloudflareWhisperLargeV3Turbo: 'cloudflare.whisper-large-v3-turbo', + CloudflareWhisperTinyEn: 'cloudflare.whisper-tiny-en', DeepgramBase: 'deepgram.base', DeepgramBaseConversationalai: 'deepgram.base-conversationalai', DeepgramBaseFinance: 'deepgram.base-finance', @@ -843,6 +1033,8 @@ export const TranscriptionModelIdentifier = { DeepgramEnhancedMeeting: 'deepgram.enhanced-meeting', DeepgramEnhancedPhonecall: 'deepgram.enhanced-phonecall', DeepgramNova: 'deepgram.nova', + DeepgramNovaGeneral: 'deepgram.nova-general', + DeepgramNovaPhonecall: 'deepgram.nova-phonecall', DeepgramNova2: 'deepgram.nova-2', DeepgramNova2Atc: 'deepgram.nova-2-atc', DeepgramNova2Automotive: 'deepgram.nova-2-automotive', @@ -856,14 +1048,16 @@ export const TranscriptionModelIdentifier = { DeepgramNova2Video: 'deepgram.nova-2-video', DeepgramNova2Voicemail: 'deepgram.nova-2-voicemail', DeepgramNova3: 'deepgram.nova-3', - DeepgramNovaGeneral: 'deepgram.nova-general', - DeepgramNovaPhonecall: 'deepgram.nova-phonecall', + DeepgramNova3General: 'deepgram.nova-3-general', + DeepgramNova3Medical: 'deepgram.nova-3-medical', DeepgramWhisper: 'deepgram.whisper', DeepgramWhisperBase: 'deepgram.whisper-base', DeepgramWhisperLarge: 'deepgram.whisper-large', DeepgramWhisperMedium: 'deepgram.whisper-medium', DeepgramWhisperSmall: 'deepgram.whisper-small', DeepgramWhisperTiny: 'deepgram.whisper-tiny', + FalaiElevenlabsSpeechToText: 'falai.elevenlabs-speech-to-text', + FalaiSpeechToText: 'falai.speech-to-text', FalaiWhisper: 'falai.whisper', FalaiWizper: 'falai.wizper', FireworksaiWhisperV3: 'fireworksai.whisper-v3', @@ -871,6 +1065,10 @@ export const TranscriptionModelIdentifier = { GladiaStandard: 'gladia.standard', GoogleEnhanced: 'google.enhanced', GoogleStandard: 'google.standard', + GeminiGemini25FlashPreview0520: 'gemini.gemini-2.5-flash-preview-05-20', + GeminiGemini25ProPreview0605: 'gemini.gemini-2.5-pro-preview-06-05', + GeminiGemini20Flash: 'gemini.gemini-2.0-flash', + GeminiGemini20FlashLite: 'gemini.gemini-2.0-flash-lite', GroqDistilWhisperLargeV3En: 'groq.distil-whisper-large-v3-en', GroqWhisperLargeV3: 'groq.whisper-large-v3', GroqWhisperLargeV3Turbo: 'groq.whisper-large-v3-turbo', @@ -879,6 +1077,7 @@ export const TranscriptionModelIdentifier = { OpenaiGpt4oTranscribe: 'openai.gpt-4o-transcribe', OpenaiGpt4oMiniTranscribe: 'openai.gpt-4o-mini-transcribe', RevaiMachine: 'revai.machine', + RevaiFusion: 'revai.fusion', SpeechmaticsEnhanced: 'speechmatics.enhanced', SpeechmaticsStandard: 'speechmatics.standard' } as const; @@ -905,99 +1104,6 @@ export interface TranscriptionOnlyText { */ 'text': string; } -/** - * Configuration options for transcribing audio specified by a remote URL via the `/transcribe-remote` endpoint. - * @export - * @interface TranscriptionOptions - */ -export interface TranscriptionOptions { - /** - * The publicly accessible URL of the audio file to transcribe. The API server must be able to fetch the audio from this URL. - * @type {string} - * @memberof TranscriptionOptions - */ - 'file_url': string; - /** - * - * @type {TranscriptionModelIdentifier} - * @memberof TranscriptionOptions - */ - 'model': TranscriptionModelIdentifier; - /** - * - * @type {TranscriptLanguageCode} - * @memberof TranscriptionOptions - */ - 'language'?: TranscriptLanguageCode; - /** - * - * @type {TranscriptOutputFormat} - * @memberof TranscriptionOptions - */ - 'output_format'?: TranscriptOutputFormat; - /** - * Whether to add punctuation. Support varies by model (e.g., Deepgram, AssemblyAI). Defaults to `true`. - * @type {boolean} - * @memberof TranscriptionOptions - */ - 'punctuation'?: boolean; - /** - * Level of timestamp detail (`word` or `segment`). Defaults to `segment`. - * @type {string} - * @memberof TranscriptionOptions - */ - 'timestamp_granularity'?: TranscriptionOptionsTimestampGranularityEnum; - /** - * Enable speaker diarization. Defaults to `false`. - * @type {boolean} - * @memberof TranscriptionOptions - */ - 'diarization'?: boolean; - /** - * Optional text prompt to guide the transcription model. Support varies (e.g., OpenAI). - * @type {string} - * @memberof TranscriptionOptions - */ - 'initial_prompt'?: string; - /** - * Controls output randomness for supported models (e.g., OpenAI). Value between 0 and 1. - * @type {number} - * @memberof TranscriptionOptions - */ - 'temperature'?: number; - /** - * Enable provider-specific smart formatting (e.g., Deepgram). Defaults vary. - * @type {boolean} - * @memberof TranscriptionOptions - */ - 'smart_format'?: boolean; - /** - * Hint for the number of expected speakers for diarization (e.g., RevAI, Deepgram). - * @type {number} - * @memberof TranscriptionOptions - */ - 'speakers_expected'?: number; - /** - * List of custom words/phrases to improve recognition (e.g., Deepgram, AssemblyAI). - * @type {Array} - * @memberof TranscriptionOptions - */ - 'custom_vocabulary'?: Array; - /** - * An array of replacement rules to be applied directly to this transcription request, in order. This allows defining rules inline instead of using a pre-saved `ruleset_id`. - * @type {Array} - * @memberof TranscriptionOptions - */ - 'replacement_ruleset'?: Array; -} - -export const TranscriptionOptionsTimestampGranularityEnum = { - Word: 'word', - Segment: 'segment' -} as const; - -export type TranscriptionOptionsTimestampGranularityEnum = typeof TranscriptionOptionsTimestampGranularityEnum[keyof typeof TranscriptionOptionsTimestampGranularityEnum]; - /** * The identifier for the underlying Speech-to-Text service provider (e.g., \'openai\', \'deepgram\'). * @export @@ -1012,6 +1118,7 @@ export const TranscriptionProvider = { Deepgram: 'deepgram', Falai: 'falai', Fireworksai: 'fireworksai', + Gemini: 'gemini', Gladia: 'gladia', Google: 'google', Groq: 'groq', @@ -1656,13 +1763,13 @@ export const SpeechToTextApiAxiosParamCreator = function (configuration?: Config /** * This endpoint allows you to transcribe an audio file hosted at a publicly accessible URL. Provide the URL and transcription options within the JSON request body. Useful for transcribing files already stored online. * @summary Transcribe an audio file located at a remote URL. - * @param {TranscriptionOptions} transcriptionOptions JSON object containing the URL of the audio file and the desired transcription options. + * @param {RemoteTranscriptionConfiguration} remoteTranscriptionConfiguration JSON object containing the URL of the audio file and the desired transcription options. * @param {*} [options] Override http request option. * @throws {RequiredError} */ - transcribeRemote: async (transcriptionOptions: TranscriptionOptions, options: RawAxiosRequestConfig = {}): Promise => { - // verify required parameter 'transcriptionOptions' is not null or undefined - assertParamExists('transcribeRemote', 'transcriptionOptions', transcriptionOptions) + transcribeRemote: async (remoteTranscriptionConfiguration: RemoteTranscriptionConfiguration, options: RawAxiosRequestConfig = {}): Promise => { + // verify required parameter 'remoteTranscriptionConfiguration' is not null or undefined + assertParamExists('transcribeRemote', 'remoteTranscriptionConfiguration', remoteTranscriptionConfiguration) const localVarPath = `/transcribe-remote`; // use dummy base URL string because the URL constructor only accepts absolute URLs. const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); @@ -1686,7 +1793,7 @@ export const SpeechToTextApiAxiosParamCreator = function (configuration?: Config setSearchParams(localVarUrlObj, localVarQueryParameter); let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(transcriptionOptions, localVarRequestOptions, configuration) + localVarRequestOptions.data = serializeDataIfNeeded(remoteTranscriptionConfiguration, localVarRequestOptions, configuration) return { url: toPathString(localVarUrlObj), @@ -1743,12 +1850,12 @@ export const SpeechToTextApiFp = function(configuration?: Configuration) { /** * This endpoint allows you to transcribe an audio file hosted at a publicly accessible URL. Provide the URL and transcription options within the JSON request body. Useful for transcribing files already stored online. * @summary Transcribe an audio file located at a remote URL. - * @param {TranscriptionOptions} transcriptionOptions JSON object containing the URL of the audio file and the desired transcription options. + * @param {RemoteTranscriptionConfiguration} remoteTranscriptionConfiguration JSON object containing the URL of the audio file and the desired transcription options. * @param {*} [options] Override http request option. * @throws {RequiredError} */ - async transcribeRemote(transcriptionOptions: TranscriptionOptions, options?: RawAxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.transcribeRemote(transcriptionOptions, options); + async transcribeRemote(remoteTranscriptionConfiguration: RemoteTranscriptionConfiguration, options?: RawAxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { + const localVarAxiosArgs = await localVarAxiosParamCreator.transcribeRemote(remoteTranscriptionConfiguration, options); const localVarOperationServerIndex = configuration?.serverIndex ?? 0; const localVarOperationServerBasePath = operationServerMap['SpeechToTextApi.transcribeRemote']?.[localVarOperationServerIndex]?.url; return (axios, basePath) => createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration)(axios, localVarOperationServerBasePath || basePath); @@ -1797,12 +1904,12 @@ export const SpeechToTextApiFactory = function (configuration?: Configuration, b /** * This endpoint allows you to transcribe an audio file hosted at a publicly accessible URL. Provide the URL and transcription options within the JSON request body. Useful for transcribing files already stored online. * @summary Transcribe an audio file located at a remote URL. - * @param {TranscriptionOptions} transcriptionOptions JSON object containing the URL of the audio file and the desired transcription options. + * @param {RemoteTranscriptionConfiguration} remoteTranscriptionConfiguration JSON object containing the URL of the audio file and the desired transcription options. * @param {*} [options] Override http request option. * @throws {RequiredError} */ - transcribeRemote(transcriptionOptions: TranscriptionOptions, options?: RawAxiosRequestConfig): AxiosPromise { - return localVarFp.transcribeRemote(transcriptionOptions, options).then((request) => request(axios, basePath)); + transcribeRemote(remoteTranscriptionConfiguration: RemoteTranscriptionConfiguration, options?: RawAxiosRequestConfig): AxiosPromise { + return localVarFp.transcribeRemote(remoteTranscriptionConfiguration, options).then((request) => request(axios, basePath)); }, }; }; @@ -1852,13 +1959,13 @@ export class SpeechToTextApi extends BaseAPI { /** * This endpoint allows you to transcribe an audio file hosted at a publicly accessible URL. Provide the URL and transcription options within the JSON request body. Useful for transcribing files already stored online. * @summary Transcribe an audio file located at a remote URL. - * @param {TranscriptionOptions} transcriptionOptions JSON object containing the URL of the audio file and the desired transcription options. + * @param {RemoteTranscriptionConfiguration} remoteTranscriptionConfiguration JSON object containing the URL of the audio file and the desired transcription options. * @param {*} [options] Override http request option. * @throws {RequiredError} * @memberof SpeechToTextApi */ - public transcribeRemote(transcriptionOptions: TranscriptionOptions, options?: RawAxiosRequestConfig) { - return SpeechToTextApiFp(this.configuration).transcribeRemote(transcriptionOptions, options).then((request) => request(this.axios, this.basePath)); + public transcribeRemote(remoteTranscriptionConfiguration: RemoteTranscriptionConfiguration, options?: RawAxiosRequestConfig) { + return SpeechToTextApiFp(this.configuration).transcribeRemote(remoteTranscriptionConfiguration, options).then((request) => request(this.axios, this.basePath)); } } diff --git a/base.ts b/base.ts index 7743d94..fd9ea77 100644 --- a/base.ts +++ b/base.ts @@ -4,7 +4,7 @@ * Speechall API * The Speechall REST API provides powerful and flexible speech-to-text capabilities. It allows you to transcribe audio files using various underlying STT providers and models, optionally apply custom text replacement rules, and access results in multiple formats. The API includes standard endpoints for transcription and endpoints compatible with the OpenAI API structure. * - * The version of the OpenAPI document: 0.0.1 + * The version of the OpenAPI document: 0.1.0 * * * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). diff --git a/common.ts b/common.ts index 0d26416..586ac16 100644 --- a/common.ts +++ b/common.ts @@ -4,7 +4,7 @@ * Speechall API * The Speechall REST API provides powerful and flexible speech-to-text capabilities. It allows you to transcribe audio files using various underlying STT providers and models, optionally apply custom text replacement rules, and access results in multiple formats. The API includes standard endpoints for transcription and endpoints compatible with the OpenAI API structure. * - * The version of the OpenAPI document: 0.0.1 + * The version of the OpenAPI document: 0.1.0 * * * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). diff --git a/configuration.ts b/configuration.ts index 96fe6d6..619b885 100644 --- a/configuration.ts +++ b/configuration.ts @@ -4,7 +4,7 @@ * Speechall API * The Speechall REST API provides powerful and flexible speech-to-text capabilities. It allows you to transcribe audio files using various underlying STT providers and models, optionally apply custom text replacement rules, and access results in multiple formats. The API includes standard endpoints for transcription and endpoints compatible with the OpenAI API structure. * - * The version of the OpenAPI document: 0.0.1 + * The version of the OpenAPI document: 0.1.0 * * * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). diff --git a/docs/BaseTranscriptionConfiguration.md b/docs/BaseTranscriptionConfiguration.md new file mode 100644 index 0000000..3a7bc39 --- /dev/null +++ b/docs/BaseTranscriptionConfiguration.md @@ -0,0 +1,43 @@ +# BaseTranscriptionConfiguration + +Common configuration options for transcription, applicable to both direct uploads and remote URLs. + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**model** | [**TranscriptionModelIdentifier**](TranscriptionModelIdentifier.md) | | [default to undefined] +**language** | [**TranscriptLanguageCode**](TranscriptLanguageCode.md) | | [optional] [default to undefined] +**output_format** | [**TranscriptOutputFormat**](TranscriptOutputFormat.md) | | [optional] [default to undefined] +**ruleset_id** | **string** | The unique identifier (UUID) of a pre-defined replacement ruleset to apply to the final transcription text. | [optional] [default to undefined] +**punctuation** | **boolean** | Whether to add punctuation. Support varies by model (e.g., Deepgram, AssemblyAI). Defaults to `true`. | [optional] [default to true] +**timestamp_granularity** | **string** | Level of timestamp detail (`word` or `segment`). Defaults to `segment`. | [optional] [default to TimestampGranularityEnum_Segment] +**diarization** | **boolean** | Enable speaker diarization. Defaults to `false`. | [optional] [default to false] +**initial_prompt** | **string** | Optional text prompt to guide the transcription model. Support varies (e.g., OpenAI). | [optional] [default to undefined] +**temperature** | **number** | Controls output randomness for supported models (e.g., OpenAI). Value between 0 and 1. | [optional] [default to undefined] +**smart_format** | **boolean** | Enable provider-specific smart formatting (e.g., Deepgram). Defaults vary. | [optional] [default to undefined] +**speakers_expected** | **number** | Hint for the number of expected speakers for diarization (e.g., RevAI, Deepgram). | [optional] [default to undefined] +**custom_vocabulary** | **Array<string>** | List of custom words/phrases to improve recognition (e.g., Deepgram, AssemblyAI). | [optional] [default to undefined] + +## Example + +```typescript +import { BaseTranscriptionConfiguration } from './api'; + +const instance: BaseTranscriptionConfiguration = { + model, + language, + output_format, + ruleset_id, + punctuation, + timestamp_granularity, + diarization, + initial_prompt, + temperature, + smart_format, + speakers_expected, + custom_vocabulary, +}; +``` + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/docs/RemoteTranscriptionConfiguration.md b/docs/RemoteTranscriptionConfiguration.md new file mode 100644 index 0000000..ad15323 --- /dev/null +++ b/docs/RemoteTranscriptionConfiguration.md @@ -0,0 +1,47 @@ +# RemoteTranscriptionConfiguration + +Configuration options for transcribing audio specified by a remote URL via the `/transcribe-remote` endpoint. + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**model** | [**TranscriptionModelIdentifier**](TranscriptionModelIdentifier.md) | | [default to undefined] +**language** | [**TranscriptLanguageCode**](TranscriptLanguageCode.md) | | [optional] [default to undefined] +**output_format** | [**TranscriptOutputFormat**](TranscriptOutputFormat.md) | | [optional] [default to undefined] +**ruleset_id** | **string** | The unique identifier (UUID) of a pre-defined replacement ruleset to apply to the final transcription text. | [optional] [default to undefined] +**punctuation** | **boolean** | Whether to add punctuation. Support varies by model (e.g., Deepgram, AssemblyAI). Defaults to `true`. | [optional] [default to true] +**timestamp_granularity** | **string** | Level of timestamp detail (`word` or `segment`). Defaults to `segment`. | [optional] [default to TimestampGranularityEnum_Segment] +**diarization** | **boolean** | Enable speaker diarization. Defaults to `false`. | [optional] [default to false] +**initial_prompt** | **string** | Optional text prompt to guide the transcription model. Support varies (e.g., OpenAI). | [optional] [default to undefined] +**temperature** | **number** | Controls output randomness for supported models (e.g., OpenAI). Value between 0 and 1. | [optional] [default to undefined] +**smart_format** | **boolean** | Enable provider-specific smart formatting (e.g., Deepgram). Defaults vary. | [optional] [default to undefined] +**speakers_expected** | **number** | Hint for the number of expected speakers for diarization (e.g., RevAI, Deepgram). | [optional] [default to undefined] +**custom_vocabulary** | **Array<string>** | List of custom words/phrases to improve recognition (e.g., Deepgram, AssemblyAI). | [optional] [default to undefined] +**file_url** | **string** | The publicly accessible URL of the audio file to transcribe. The API server must be able to fetch the audio from this URL. | [default to undefined] +**replacement_ruleset** | [**Array<ReplacementRule>**](ReplacementRule.md) | An array of replacement rules to be applied directly to this transcription request, in order. This allows defining rules inline instead of (or in addition to) using a pre-saved `ruleset_id`. | [optional] [default to undefined] + +## Example + +```typescript +import { RemoteTranscriptionConfiguration } from './api'; + +const instance: RemoteTranscriptionConfiguration = { + model, + language, + output_format, + ruleset_id, + punctuation, + timestamp_granularity, + diarization, + initial_prompt, + temperature, + smart_format, + speakers_expected, + custom_vocabulary, + file_url, + replacement_ruleset, +}; +``` + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/docs/SpeechToTextApi.md b/docs/SpeechToTextApi.md index b5b2423..7b16d9e 100644 --- a/docs/SpeechToTextApi.md +++ b/docs/SpeechToTextApi.md @@ -156,7 +156,7 @@ const { status, data } = await apiInstance.transcribe( [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) # **transcribeRemote** -> TranscriptionResponse transcribeRemote(transcriptionOptions) +> TranscriptionResponse transcribeRemote(remoteTranscriptionConfiguration) This endpoint allows you to transcribe an audio file hosted at a publicly accessible URL. Provide the URL and transcription options within the JSON request body. Useful for transcribing files already stored online. @@ -166,16 +166,16 @@ This endpoint allows you to transcribe an audio file hosted at a publicly access import { SpeechToTextApi, Configuration, - TranscriptionOptions + RemoteTranscriptionConfiguration } from './api'; const configuration = new Configuration(); const apiInstance = new SpeechToTextApi(configuration); -let transcriptionOptions: TranscriptionOptions; //JSON object containing the URL of the audio file and the desired transcription options. +let remoteTranscriptionConfiguration: RemoteTranscriptionConfiguration; //JSON object containing the URL of the audio file and the desired transcription options. const { status, data } = await apiInstance.transcribeRemote( - transcriptionOptions + remoteTranscriptionConfiguration ); ``` @@ -183,7 +183,7 @@ const { status, data } = await apiInstance.transcribeRemote( |Name | Type | Description | Notes| |------------- | ------------- | ------------- | -------------| -| **transcriptionOptions** | **TranscriptionOptions**| JSON object containing the URL of the audio file and the desired transcription options. | | +| **remoteTranscriptionConfiguration** | **RemoteTranscriptionConfiguration**| JSON object containing the URL of the audio file and the desired transcription options. | | ### Return type diff --git a/docs/TranscriptionModelIdentifier.md b/docs/TranscriptionModelIdentifier.md index 2b7841f..d189a23 100644 --- a/docs/TranscriptionModelIdentifier.md +++ b/docs/TranscriptionModelIdentifier.md @@ -10,10 +10,18 @@ Unique identifier for a specific Speech-to-Text model, composed as `provider.mod * `AssemblyaiNano` (value: `'assemblyai.nano'`) +* `AssemblyaiSlam1` (value: `'assemblyai.slam-1'`) + +* `AssemblyaiUniversal` (value: `'assemblyai.universal'`) + * `AzureStandard` (value: `'azure.standard'`) * `CloudflareWhisper` (value: `'cloudflare.whisper'`) +* `CloudflareWhisperLargeV3Turbo` (value: `'cloudflare.whisper-large-v3-turbo'`) + +* `CloudflareWhisperTinyEn` (value: `'cloudflare.whisper-tiny-en'`) + * `DeepgramBase` (value: `'deepgram.base'`) * `DeepgramBaseConversationalai` (value: `'deepgram.base-conversationalai'`) @@ -42,6 +50,10 @@ Unique identifier for a specific Speech-to-Text model, composed as `provider.mod * `DeepgramNova` (value: `'deepgram.nova'`) +* `DeepgramNovaGeneral` (value: `'deepgram.nova-general'`) + +* `DeepgramNovaPhonecall` (value: `'deepgram.nova-phonecall'`) + * `DeepgramNova2` (value: `'deepgram.nova-2'`) * `DeepgramNova2Atc` (value: `'deepgram.nova-2-atc'`) @@ -68,9 +80,9 @@ Unique identifier for a specific Speech-to-Text model, composed as `provider.mod * `DeepgramNova3` (value: `'deepgram.nova-3'`) -* `DeepgramNovaGeneral` (value: `'deepgram.nova-general'`) +* `DeepgramNova3General` (value: `'deepgram.nova-3-general'`) -* `DeepgramNovaPhonecall` (value: `'deepgram.nova-phonecall'`) +* `DeepgramNova3Medical` (value: `'deepgram.nova-3-medical'`) * `DeepgramWhisper` (value: `'deepgram.whisper'`) @@ -84,6 +96,10 @@ Unique identifier for a specific Speech-to-Text model, composed as `provider.mod * `DeepgramWhisperTiny` (value: `'deepgram.whisper-tiny'`) +* `FalaiElevenlabsSpeechToText` (value: `'falai.elevenlabs-speech-to-text'`) + +* `FalaiSpeechToText` (value: `'falai.speech-to-text'`) + * `FalaiWhisper` (value: `'falai.whisper'`) * `FalaiWizper` (value: `'falai.wizper'`) @@ -98,6 +114,14 @@ Unique identifier for a specific Speech-to-Text model, composed as `provider.mod * `GoogleStandard` (value: `'google.standard'`) +* `GeminiGemini25FlashPreview0520` (value: `'gemini.gemini-2.5-flash-preview-05-20'`) + +* `GeminiGemini25ProPreview0605` (value: `'gemini.gemini-2.5-pro-preview-06-05'`) + +* `GeminiGemini20Flash` (value: `'gemini.gemini-2.0-flash'`) + +* `GeminiGemini20FlashLite` (value: `'gemini.gemini-2.0-flash-lite'`) + * `GroqDistilWhisperLargeV3En` (value: `'groq.distil-whisper-large-v3-en'`) * `GroqWhisperLargeV3` (value: `'groq.whisper-large-v3'`) @@ -114,6 +138,8 @@ Unique identifier for a specific Speech-to-Text model, composed as `provider.mod * `RevaiMachine` (value: `'revai.machine'`) +* `RevaiFusion` (value: `'revai.fusion'`) + * `SpeechmaticsEnhanced` (value: `'speechmatics.enhanced'`) * `SpeechmaticsStandard` (value: `'speechmatics.standard'`) diff --git a/docs/TranscriptionProvider.md b/docs/TranscriptionProvider.md index d03ae3b..fd9bd69 100644 --- a/docs/TranscriptionProvider.md +++ b/docs/TranscriptionProvider.md @@ -18,6 +18,8 @@ The identifier for the underlying Speech-to-Text service provider (e.g., \'opena * `Fireworksai` (value: `'fireworksai'`) +* `Gemini` (value: `'gemini'`) + * `Gladia` (value: `'gladia'`) * `Google` (value: `'google'`) diff --git a/example.ts b/example.ts index 47cdf41..5a1ae39 100644 --- a/example.ts +++ b/example.ts @@ -3,7 +3,7 @@ import { SpeechToTextApi, // OpenAICompatibleSpeechToTextApi, ReplacementRulesApi, - TranscriptionOptions, + RemoteTranscriptionConfiguration, TranscriptionModelIdentifier } from './index'; @@ -33,7 +33,7 @@ async function main(): Promise { // Example 1: Basic transcription console.log('Example 1: Basic transcription...'); - const basicOptions: TranscriptionOptions = { + const basicOptions: RemoteTranscriptionConfiguration = { file_url: 'https://example.com/sample-audio.mp3', model: TranscriptionModelIdentifier.DeepgramNova2General, language: 'en', @@ -45,7 +45,7 @@ async function main(): Promise { // Example 2: Advanced transcription with options console.log('\nExample 2: Advanced transcription...'); - const advancedOptions: TranscriptionOptions = { + const advancedOptions: RemoteTranscriptionConfiguration = { file_url: 'https://example.com/meeting-audio.mp3', model: TranscriptionModelIdentifier.DeepgramNova2Meeting, language: 'en', diff --git a/index.ts b/index.ts index 7803840..a46dd3d 100644 --- a/index.ts +++ b/index.ts @@ -4,7 +4,7 @@ * Speechall API * The Speechall REST API provides powerful and flexible speech-to-text capabilities. It allows you to transcribe audio files using various underlying STT providers and models, optionally apply custom text replacement rules, and access results in multiple formats. The API includes standard endpoints for transcription and endpoints compatible with the OpenAI API structure. * - * The version of the OpenAPI document: 0.0.1 + * The version of the OpenAPI document: 0.1.0 * * * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). diff --git a/package.json b/package.json index 0b8b670..df2230c 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,9 @@ "prepublishOnly": "npm run clean && npm run build", "test": "echo \"Error: no test specified\" && exit 1", "lint": "eslint . --ext .ts", - "lint:fix": "eslint . --ext .ts --fix" + "lint:fix": "eslint . --ext .ts --fix", + "generate": "node scripts/generate-sdk.js", + "generate:bash": "bash scripts/generate-sdk.sh" }, "keywords": [ "speechall", diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..e522094 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,92 @@ +# SDK Generation Scripts + +This directory contains scripts to automatically generate the TypeScript SDK from the OpenAPI specification. + +## Available Scripts + +### 1. Node.js Script (`generate-sdk.js`) +- **Usage**: `npm run generate` or `node scripts/generate-sdk.js` +- **Language**: JavaScript (Node.js) +- **Features**: + - Downloads OpenAPI spec from remote URL + - Generates TypeScript SDK using openapi-generator + - Proper error handling and cleanup + - Process interruption handling + +### 2. Bash Script (`generate-sdk.sh`) +- **Usage**: `npm run generate:bash` or `bash scripts/generate-sdk.sh` +- **Language**: Bash +- **Features**: + - Simple and lightweight + - Colorized output + - Dependency checking + - Automatic cleanup + +## Prerequisites + +- **openapi-generator**: Must be installed and available in PATH + ```bash + # Install with Homebrew (macOS) + brew install openapi-generator + + # Or with npm globally + npm install -g @openapitools/openapi-generator-cli + ``` + +## Configuration + +Both scripts are configured to: +- Download from: `https://raw.githubusercontent.com/Speechall/speechall-openapi/refs/heads/main/openapi.yaml` +- Generate: TypeScript Axios client +- Output to: Current directory (.) + +To modify these settings, edit the configuration variables at the top of either script. + +## Generated Files + +The scripts will generate/update: +- `api.ts` - API client classes +- `base.ts` - Base classes and utilities +- `common.ts` - Common types and utilities +- `configuration.ts` - Configuration classes +- `index.ts` - Main export file +- `docs/` - Documentation files +- `.openapi-generator/` - Generator metadata + +## Best Practices + +1. **Always commit before regenerating** - The scripts will overwrite existing files +2. **Review changes** - Check the generated code for any breaking changes +3. **Run tests** - Ensure the generated SDK works as expected +4. **Update version** - Consider updating the package version after regeneration + +## Troubleshooting + +### Common Issues + +1. **"openapi-generator command not found"** + - Install openapi-generator using the methods above + - Verify installation: `openapi-generator version` + +2. **"Failed to download OpenAPI specification"** + - Check internet connection + - Verify the OpenAPI URL is accessible + - Check if the repository is public + +3. **"Permission denied"** + - Make scripts executable: `chmod +x scripts/*.sh` + +### Manual Generation + +If the scripts fail, you can generate manually: + +```bash +# Download the spec +curl -o temp-openapi.yaml https://raw.githubusercontent.com/Speechall/speechall-openapi/refs/heads/main/openapi.yaml + +# Generate the SDK +openapi-generator generate -i temp-openapi.yaml -g typescript-axios -o . + +# Clean up +rm temp-openapi.yaml +``` \ No newline at end of file diff --git a/scripts/generate-sdk.js b/scripts/generate-sdk.js new file mode 100755 index 0000000..8cd8dca --- /dev/null +++ b/scripts/generate-sdk.js @@ -0,0 +1,92 @@ +#!/usr/bin/env node + +const https = require('https'); +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); + +const OPENAPI_URL = 'https://raw.githubusercontent.com/Speechall/speechall-openapi/refs/heads/main/openapi.yaml'; +const TEMP_SPEC_FILE = 'temp-openapi.yaml'; + +async function downloadOpenAPISpec() { + console.log('๐Ÿ“ฅ Downloading OpenAPI specification...'); + + return new Promise((resolve, reject) => { + const file = fs.createWriteStream(TEMP_SPEC_FILE); + + https.get(OPENAPI_URL, (response) => { + if (response.statusCode !== 200) { + reject(new Error(`Failed to download OpenAPI spec: HTTP ${response.statusCode}`)); + return; + } + + response.pipe(file); + + file.on('finish', () => { + file.close(); + console.log('โœ… OpenAPI specification downloaded successfully'); + resolve(); + }); + + file.on('error', (err) => { + fs.unlink(TEMP_SPEC_FILE, () => {}); // Delete temp file on error + reject(err); + }); + }).on('error', (err) => { + reject(err); + }); + }); +} + +function generateSDK() { + console.log('๐Ÿ”ง Generating TypeScript SDK...'); + + try { + execSync(`openapi-generator generate -i ${TEMP_SPEC_FILE} -g typescript-axios -o .`, { + stdio: 'inherit' + }); + console.log('โœ… SDK generated successfully'); + } catch (error) { + console.error('โŒ Failed to generate SDK:', error.message); + process.exit(1); + } +} + +function cleanup() { + console.log('๐Ÿงน Cleaning up temporary files...'); + + if (fs.existsSync(TEMP_SPEC_FILE)) { + fs.unlinkSync(TEMP_SPEC_FILE); + console.log('โœ… Temporary files cleaned up'); + } +} + +async function main() { + console.log('๐Ÿš€ Starting SDK generation process...\n'); + + try { + await downloadOpenAPISpec(); + generateSDK(); + cleanup(); + console.log('\n๐ŸŽ‰ SDK generation completed successfully!'); + } catch (error) { + console.error('\nโŒ SDK generation failed:', error.message); + cleanup(); + process.exit(1); + } +} + +// Handle process interruption +process.on('SIGINT', () => { + console.log('\nโš ๏ธ Process interrupted'); + cleanup(); + process.exit(1); +}); + +process.on('SIGTERM', () => { + console.log('\nโš ๏ธ Process terminated'); + cleanup(); + process.exit(1); +}); + +main(); \ No newline at end of file diff --git a/scripts/generate-sdk.sh b/scripts/generate-sdk.sh new file mode 100755 index 0000000..447b082 --- /dev/null +++ b/scripts/generate-sdk.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# Configuration +OPENAPI_URL="https://raw.githubusercontent.com/Speechall/speechall-openapi/refs/heads/main/openapi.yaml" +TEMP_SPEC_FILE="temp-openapi.yaml" +GENERATOR="typescript-axios" +OUTPUT_DIR="." + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Function to print colored output +print_status() { + echo -e "${BLUE}$1${NC}" +} + +print_success() { + echo -e "${GREEN}โœ… $1${NC}" +} + +print_error() { + echo -e "${RED}โŒ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}โš ๏ธ $1${NC}" +} + +# Function to cleanup temporary files +cleanup() { + if [ -f "$TEMP_SPEC_FILE" ]; then + rm -f "$TEMP_SPEC_FILE" + print_success "Temporary files cleaned up" + fi +} + +# Trap to ensure cleanup happens on script exit +trap cleanup EXIT INT TERM + +# Check if openapi-generator is installed +if ! command -v openapi-generator &> /dev/null; then + print_error "openapi-generator is not installed or not in PATH" + print_status "Please install it with: brew install openapi-generator" + exit 1 +fi + +print_status "๐Ÿš€ Starting SDK generation process...\n" + +# Download OpenAPI specification +print_status "๐Ÿ“ฅ Downloading OpenAPI specification..." +if curl -s -f "$OPENAPI_URL" -o "$TEMP_SPEC_FILE"; then + print_success "OpenAPI specification downloaded successfully" +else + print_error "Failed to download OpenAPI specification from $OPENAPI_URL" + exit 1 +fi + +# Generate SDK +print_status "๐Ÿ”ง Generating TypeScript SDK..." +if openapi-generator generate -i "$TEMP_SPEC_FILE" -g "$GENERATOR" -o "$OUTPUT_DIR"; then + print_success "SDK generated successfully" +else + print_error "Failed to generate SDK" + exit 1 +fi + +print_success "\n๐ŸŽ‰ SDK generation completed successfully!" \ No newline at end of file