diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py index 058b03f..4ce7032 100644 --- a/assemblyai/__version__.py +++ b/assemblyai/__version__.py @@ -1 +1 @@ -__version__ = "0.45.1" +__version__ = "0.45.2" diff --git a/assemblyai/types.py b/assemblyai/types.py index aef647a..641e678 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -668,6 +668,9 @@ class RawTranscriptionConfig(BaseModel): keyterms_prompt: Optional[List[str]] = None "The list of key terms used to generate the transcript with the Slam-1 speech model. Can't be used together with `prompt`." + language_codes: Optional[List[Union[str, LanguageCode]]] = None + "List of language codes detected in the audio file when language detection is enabled" + model_config = ConfigDict(extra="allow") @@ -1237,6 +1240,12 @@ def speech_threshold(self, threshold: Optional[float]) -> None: self._raw_transcription_config.speech_threshold = threshold + @property + def language_codes(self) -> Optional[List[Union[str, LanguageCode]]]: + "Returns the list of language codes detected in the audio file when language detection is enabled." + + return self._raw_transcription_config.language_codes + # endregion # region: Convenience (helper) methods @@ -1913,6 +1922,9 @@ class BaseTranscript(BaseModel): language_confidence: Optional[float] = None "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)." + language_codes: Optional[List[Union[str, LanguageCode]]] = None + "List of language codes detected in the audio file when language detection is enabled" + speech_threshold: Optional[float] = None "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive" diff --git a/tests/unit/test_transcript.py b/tests/unit/test_transcript.py index 6ffca1e..e490885 100644 --- a/tests/unit/test_transcript.py +++ b/tests/unit/test_transcript.py @@ -452,6 +452,17 @@ def test_delete_by_id_async(httpx_mock: HTTPXMock): assert transcript.audio_url == mock_transcript_response["audio_url"] +def test_language_codes_field(): + """Test language_codes field deserialization""" + mock_response = factories.generate_dict_factory( + factories.TranscriptCompletedResponseFactory + )() + mock_response["language_codes"] = ["en", "es"] + + response = aai.types.TranscriptResponse(**mock_response) + assert response.language_codes == ["en", "es"] + + def test_speech_model_used_field_deserialization(): """ Tests that the speech_model_used field can be properly deserialized.