From 65237962b87064fa8d57bccae52139e1d8dbdde6 Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 28 Jul 2025 13:09:30 +0200 Subject: [PATCH 01/15] adding live_txt_with_audio with test --- genai/live/live_txt_with_audio.py | 70 +++++++++++++++++++++++++++++++ genai/live/requirements.txt | 3 +- genai/live/test_live_examples.py | 6 +++ 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 genai/live/live_txt_with_audio.py diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py new file mode 100644 index 00000000000..11ca218854e --- /dev/null +++ b/genai/live/live_txt_with_audio.py @@ -0,0 +1,70 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile + +import asyncio + + +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_txt_with_audio] + import io + import requests + from google import genai + from google.genai.types import Modality, LiveConnectConfig, Blob + import soundfile as sf + import librosa + + client = genai.Client() + model = "gemini-2.0-flash-live-preview-04-09" + config = LiveConnectConfig(response_modalities=[Modality.TEXT]) + + async with client.aio.live.connect(model=model, config=config) as session: + audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + response = requests.get(audio_url) + response.raise_for_status() + buffer = io.BytesIO(response.content) + y, sr = librosa.load(buffer, sr=16000) + sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") + buffer.seek(0) + audio_bytes = buffer.read() + + # If you've pre-converted to sample.pcm using ffmpeg, use this instead: + # audio_bytes = Path("sample.pcm").read_bytes() + + print("> Answer to this audio url", audio_url, "\n") + + await session.send_realtime_input( + media=Blob(data=audio_bytes, mime_type="audio/pcm;rate=16000") + ) + + response = [] + + async for message in session.receive(): + if message.text is not None: + response.append(message.text) + + print("".join(response)) + # Example output: + # > Answer to this audio url https://storage.googleapis.com/generativeai-downloads/data/16000.wav + # Yes, I can hear you. How can I help you today? + # [STOP googlegenaisdk_live_txt_with_audio] + return response + + + +if __name__ == "__main__": + asyncio.run(generate_content()) \ No newline at end of file diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index c12e6a7e2f7..8fa57b6175c 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -1,3 +1,4 @@ google-genai==1.20.0 scipy==1.15.3 -websockets==15.0.1 \ No newline at end of file +websockets==15.0.1 +librosa==0.11.0 \ No newline at end of file diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index ce382539861..1ce0eeb5bb2 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -25,6 +25,7 @@ import live_websocket_textgen_with_audio import live_websocket_textgen_with_txt import live_with_txt +import live_txt_with_audio os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -55,3 +56,8 @@ async def test_live_websocket_audiogen_with_txt() -> None: @pytest.mark.asyncio async def test_live_websocket_audiotranscript_with_txt() -> None: assert await live_websocket_audiotranscript_with_txt.generate_content() + + +@pytest.mark.asyncio +async def test_live_txt_with_audio() -> None: + assert await live_txt_with_audio.generate_content() From 9b400a31e9741b6bd5eab1a36d7797ecd712caac Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 28 Jul 2025 14:38:57 +0200 Subject: [PATCH 02/15] adding live_audio_with_txt with test --- genai/live/live_audio_with_txt.py | 87 +++++++++++++++++++++++++++++++ genai/live/requirements.txt | 3 +- genai/live/test_live_examples.py | 7 +++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 genai/live/live_audio_with_txt.py diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py new file mode 100644 index 00000000000..f12eaab447d --- /dev/null +++ b/genai/live/live_audio_with_txt.py @@ -0,0 +1,87 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile + +import asyncio + + +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_audio_with_txt] + import numpy as np + from IPython.display import Audio, Markdown, display + from google import genai + from google.genai.types import ( + Content, + LiveConnectConfig, + Modality, + Part, + SpeechConfig, + VoiceConfig, + PrebuiltVoiceConfig, + ) + + client = genai.Client() + voice_name = "Aoede" + + config = LiveConnectConfig( + response_modalities=[Modality.AUDIO], + speech_config=SpeechConfig( + voice_config=VoiceConfig( + prebuilt_voice_config=PrebuiltVoiceConfig( + voice_name=voice_name, + ) + ), + ), + ) + model = "gemini-2.0-flash-live-preview-04-09" + + async with client.aio.live.connect( + model=model, + config=config, + ) as session: + text_input = "Hello? Gemini are you there?" + print("> ", text_input, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)])) + + audio_data = [] + async for message in session.receive(): + if ( + message.server_content.model_turn + and message.server_content.model_turn.parts + ): + for part in message.server_content.model_turn.parts: + if part.inline_data: + audio_data.append( + np.frombuffer(part.inline_data.data, dtype=np.int16) + ) + + if audio_data: + print("Received audio answer:") + display(Audio(np.concatenate(audio_data), rate=24000, autoplay=True)) + + # Example output: + # > Hello? Gemini are you there? + # Received audio answer: + # + # [STOP googlegenaisdk_live_audio_with_txt] + return [] + + +if __name__ == "__main__": + asyncio.run(generate_content()) \ No newline at end of file diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index 8fa57b6175c..0c3280258af 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -1,4 +1,5 @@ google-genai==1.20.0 scipy==1.15.3 websockets==15.0.1 -librosa==0.11.0 \ No newline at end of file +librosa==0.11.0 +IPython==8.26.0 diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 1ce0eeb5bb2..372f84341d7 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -26,6 +26,7 @@ import live_websocket_textgen_with_txt import live_with_txt import live_txt_with_audio +import live_audio_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -61,3 +62,9 @@ async def test_live_websocket_audiotranscript_with_txt() -> None: @pytest.mark.asyncio async def test_live_txt_with_audio() -> None: assert await live_txt_with_audio.generate_content() + + +@pytest.mark.asyncio +async def test_live_audio_with_txt() -> None: + result = await live_audio_with_txt.generate_content() + assert result is not None From 973f133bc6d48a03caa4601ad7bbf3141ac93b29 Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 28 Jul 2025 14:40:26 +0200 Subject: [PATCH 03/15] adding live_audio_with_txt with test --- genai/live/live_audio_with_txt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py index f12eaab447d..27e9a90ea6e 100644 --- a/genai/live/live_audio_with_txt.py +++ b/genai/live/live_audio_with_txt.py @@ -72,7 +72,7 @@ async def generate_content() -> list[str]: ) if audio_data: - print("Received audio answer:") + print("Received audio answer: ") display(Audio(np.concatenate(audio_data), rate=24000, autoplay=True)) # Example output: From 4858df193c231e882851bc1653b14399bda25dc0 Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 28 Jul 2025 15:06:03 +0200 Subject: [PATCH 04/15] adding live_transcribe_with_audio with test --- genai/live/live_audio_with_txt.py | 15 +++-- genai/live/live_transcribe_with_audio.py | 66 +++++++++++++++++++ genai/live/live_txt_with_audio.py | 7 +- .../live/live_websocket_audiogen_with_txt.py | 28 ++++---- ...live_websocket_audiotranscript_with_txt.py | 24 ++++--- .../live/live_websocket_textgen_with_audio.py | 24 ++++--- genai/live/live_websocket_textgen_with_txt.py | 20 +++--- genai/live/live_with_txt.py | 4 +- genai/live/test_live_examples.py | 7 ++ 9 files changed, 144 insertions(+), 51 deletions(-) create mode 100644 genai/live/live_transcribe_with_audio.py diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py index 27e9a90ea6e..f7b0954bec5 100644 --- a/genai/live/live_audio_with_txt.py +++ b/genai/live/live_audio_with_txt.py @@ -36,6 +36,7 @@ async def generate_content() -> list[str]: client = genai.Client() voice_name = "Aoede" + model = "gemini-2.0-flash-live-preview-04-09" config = LiveConnectConfig( response_modalities=[Modality.AUDIO], @@ -47,23 +48,23 @@ async def generate_content() -> list[str]: ), ), ) - model = "gemini-2.0-flash-live-preview-04-09" async with client.aio.live.connect( - model=model, - config=config, + model=model, + config=config, ) as session: text_input = "Hello? Gemini are you there?" print("> ", text_input, "\n") await session.send_client_content( - turns=Content(role="user", parts=[Part(text=text_input)])) + turns=Content(role="user", parts=[Part(text=text_input)]) + ) audio_data = [] async for message in session.receive(): if ( - message.server_content.model_turn - and message.server_content.model_turn.parts + message.server_content.model_turn + and message.server_content.model_turn.parts ): for part in message.server_content.model_turn.parts: if part.inline_data: @@ -84,4 +85,4 @@ async def generate_content() -> list[str]: if __name__ == "__main__": - asyncio.run(generate_content()) \ No newline at end of file + asyncio.run(generate_content()) diff --git a/genai/live/live_transcribe_with_audio.py b/genai/live/live_transcribe_with_audio.py new file mode 100644 index 00000000000..201c8c4ba3f --- /dev/null +++ b/genai/live/live_transcribe_with_audio.py @@ -0,0 +1,66 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile + +import asyncio + + +async def generate_content(): + # [START googlegenaisdk_live_transcribe_with_audio] + from google import genai + from google.genai.types import ( + LiveConnectConfig, + Modality, + AudioTranscriptionConfig, + Part, + Content, + ) + + client = genai.Client() + model = "gemini-2.0-flash-live-preview-04-09" + config = LiveConnectConfig( + response_modalities=[Modality.AUDIO], + input_audio_transcription=AudioTranscriptionConfig(), + output_audio_transcription=AudioTranscriptionConfig(), + ) + + async with client.aio.live.connect(model=model, config=config) as session: + input_txt = "Hello? Gemini are you there?" + print("> ", input_txt, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=input_txt)]), turn_complete=True + ) + + response = [] + + async for message in session.receive(): + if message.server_content.output_transcription: + if message.server_content.output_transcription.text is not None: + response.append(message.server_content.output_transcription.text) + + print("".join(response)) + + # Example output: + # > Hello? Gemini are you there? + # Yes, I'm here. What would you like to talk about? + # [STOP googlegenaisdk_live_transcribe_with_audio] + return response + + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py index 11ca218854e..d27fca65999 100644 --- a/genai/live/live_txt_with_audio.py +++ b/genai/live/live_txt_with_audio.py @@ -33,7 +33,9 @@ async def generate_content() -> list[str]: config = LiveConnectConfig(response_modalities=[Modality.TEXT]) async with client.aio.live.connect(model=model, config=config) as session: - audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + audio_url = ( + "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + ) response = requests.get(audio_url) response.raise_for_status() buffer = io.BytesIO(response.content) @@ -65,6 +67,5 @@ async def generate_content() -> list[str]: return response - if __name__ == "__main__": - asyncio.run(generate_content()) \ No newline at end of file + asyncio.run(generate_content()) diff --git a/genai/live/live_websocket_audiogen_with_txt.py b/genai/live/live_websocket_audiogen_with_txt.py index f7b6f07e5f8..277d4d5f8ba 100644 --- a/genai/live/live_websocket_audiogen_with_txt.py +++ b/genai/live/live_websocket_audiogen_with_txt.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -55,9 +57,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -66,9 +66,7 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = { "response_modalities": ["AUDIO"], "speech_config": { @@ -77,7 +75,9 @@ async def generate_content() -> str: }, } - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -120,7 +120,9 @@ async def generate_content() -> str: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Collect audio chunks @@ -129,7 +131,9 @@ async def generate_content() -> str: for part in model_turn["parts"]: if part["inlineData"]["mimeType"] == "audio/pcm": audio_chunk = base64.b64decode(part["inlineData"]["data"]) - aggregated_response_parts.append(np.frombuffer(audio_chunk, dtype=np.int16)) + aggregated_response_parts.append( + np.frombuffer(audio_chunk, dtype=np.int16) + ) # End of response if server_content.get("turnComplete"): @@ -137,7 +141,9 @@ async def generate_content() -> str: # Save audio to a file if aggregated_response_parts: - wavfile.write("output.wav", 24000, np.concatenate(aggregated_response_parts)) + wavfile.write( + "output.wav", 24000, np.concatenate(aggregated_response_parts) + ) # Example response: # Setup Response: {'setupComplete': {}} # Input: Hello? Gemini are you there? diff --git a/genai/live/live_websocket_audiotranscript_with_txt.py b/genai/live/live_websocket_audiotranscript_with_txt.py index 5192b81ef17..5304e1914bb 100644 --- a/genai/live/live_websocket_audiotranscript_with_txt.py +++ b/genai/live/live_websocket_audiotranscript_with_txt.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -55,9 +57,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -66,9 +66,7 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = { "response_modalities": ["AUDIO"], "speech_config": { @@ -77,7 +75,9 @@ async def generate_content() -> str: }, } - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -125,7 +125,9 @@ async def generate_content() -> str: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Transcriptions @@ -142,7 +144,9 @@ async def generate_content() -> str: for part in model_turn["parts"]: if part["inlineData"]["mimeType"] == "audio/pcm": audio_chunk = base64.b64decode(part["inlineData"]["data"]) - aggregated_response_parts.append(np.frombuffer(audio_chunk, dtype=np.int16)) + aggregated_response_parts.append( + np.frombuffer(audio_chunk, dtype=np.int16) + ) # End of response if server_content.get("turnComplete"): diff --git a/genai/live/live_websocket_textgen_with_audio.py b/genai/live/live_websocket_textgen_with_audio.py index de6fd9d55c3..f91cff35b57 100644 --- a/genai/live/live_websocket_textgen_with_audio.py +++ b/genai/live/live_websocket_textgen_with_audio.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -65,9 +67,7 @@ def read_wavefile(filepath: str) -> tuple[str, str]: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -76,12 +76,12 @@ def read_wavefile(filepath: str) -> tuple[str, str]: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = {"response_modalities": ["TEXT"]} - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -105,7 +105,9 @@ def read_wavefile(filepath: str) -> tuple[str, str]: return "Error: WebSocket setup failed." # 3. Send audio message - encoded_audio_message, mime_type = read_wavefile("hello_gemini_are_you_there.wav") + encoded_audio_message, mime_type = read_wavefile( + "hello_gemini_are_you_there.wav" + ) # Example audio message: "Hello? Gemini are you there?" user_message = { @@ -136,7 +138,9 @@ def read_wavefile(filepath: str) -> tuple[str, str]: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Collect text responses diff --git a/genai/live/live_websocket_textgen_with_txt.py b/genai/live/live_websocket_textgen_with_txt.py index b36487cc9a0..f8e88fa0521 100644 --- a/genai/live/live_websocket_textgen_with_txt.py +++ b/genai/live/live_websocket_textgen_with_txt.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -51,9 +53,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -62,12 +62,12 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = {"response_modalities": ["TEXT"]} - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -110,7 +110,9 @@ async def generate_content() -> str: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Collect text responses diff --git a/genai/live/live_with_txt.py b/genai/live/live_with_txt.py index a3c75188439..fd412af7740 100644 --- a/genai/live/live_with_txt.py +++ b/genai/live/live_with_txt.py @@ -35,7 +35,9 @@ async def generate_content() -> list[str]: ) as session: text_input = "Hello? Gemini, are you there?" print("> ", text_input, "\n") - await session.send_client_content(turns=Content(role="user", parts=[Part(text=text_input)])) + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) response = [] diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 372f84341d7..4f05dc4299d 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -27,6 +27,7 @@ import live_with_txt import live_txt_with_audio import live_audio_with_txt +import live_transcribe_with_audio os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -68,3 +69,9 @@ async def test_live_txt_with_audio() -> None: async def test_live_audio_with_txt() -> None: result = await live_audio_with_txt.generate_content() assert result is not None + + +@pytest.mark.asyncio +async def test_live_transcribe_with_audio() -> None: + result = await live_transcribe_with_audio.generate_content() + assert result is not None From 359384bdbbd9e2d04829f0a3d1915bf82b8dc5db Mon Sep 17 00:00:00 2001 From: Robert Kozak <50328216+Guiners@users.noreply.github.com> Date: Mon, 1 Sep 2025 14:24:40 +0200 Subject: [PATCH 05/15] Update genai/live/test_live_examples.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- genai/live/test_live_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 4f05dc4299d..a05e76f1a40 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -25,8 +25,8 @@ import live_websocket_textgen_with_audio import live_websocket_textgen_with_txt import live_with_txt -import live_txt_with_audio import live_audio_with_txt +import live_txt_with_audio import live_transcribe_with_audio os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" From 68ae455c6994ae3d879d8ebcb70c2d0393e1bc08 Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 1 Sep 2025 14:33:26 +0200 Subject: [PATCH 06/15] codereview fix --- genai/live/live_audio_with_txt.py | 2 +- genai/live/live_txt_with_audio.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py index f7b0954bec5..40a78a64734 100644 --- a/genai/live/live_audio_with_txt.py +++ b/genai/live/live_audio_with_txt.py @@ -80,7 +80,7 @@ async def generate_content() -> list[str]: # > Hello? Gemini are you there? # Received audio answer: # - # [STOP googlegenaisdk_live_audio_with_txt] + # [END googlegenaisdk_live_audio_with_txt] return [] diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py index d27fca65999..e6a0e00e19d 100644 --- a/genai/live/live_txt_with_audio.py +++ b/genai/live/live_txt_with_audio.py @@ -63,7 +63,7 @@ async def generate_content() -> list[str]: # Example output: # > Answer to this audio url https://storage.googleapis.com/generativeai-downloads/data/16000.wav # Yes, I can hear you. How can I help you today? - # [STOP googlegenaisdk_live_txt_with_audio] + # [END googlegenaisdk_live_txt_with_audio] return response From 2585661b4e5b0a112fbbbf579b3b2c0104eaa93b Mon Sep 17 00:00:00 2001 From: Guiners Date: Fri, 5 Sep 2025 10:51:43 +0200 Subject: [PATCH 07/15] codereview fix --- genai/live/live_audiogen_with_txt.py | 12 +++++++++--- genai/live/live_code_exec_with_txt.py | 10 ++++++++-- genai/live/live_func_call_with_txt.py | 12 +++++++++--- genai/live/live_ground_googsearch_with_txt.py | 10 ++++++++-- genai/live/live_structured_ouput_with_txt.py | 6 ++++-- genai/live/live_transcribe_with_audio.py | 9 +++++++-- genai/live/live_with_txt.py | 9 +++++++-- genai/live/test_live_examples.py | 10 +++++----- 8 files changed, 57 insertions(+), 21 deletions(-) diff --git a/genai/live/live_audiogen_with_txt.py b/genai/live/live_audiogen_with_txt.py index cf7f24a6fc4..477d4f0d40d 100644 --- a/genai/live/live_audiogen_with_txt.py +++ b/genai/live/live_audiogen_with_txt.py @@ -24,9 +24,15 @@ async def generate_content() -> None: import numpy as np import scipy.io.wavfile as wavfile from google import genai - from google.genai.types import (Content, LiveConnectConfig, Modality, Part, - PrebuiltVoiceConfig, SpeechConfig, - VoiceConfig) + from google.genai.types import ( + Content, + LiveConnectConfig, + Modality, + Part, + PrebuiltVoiceConfig, + SpeechConfig, + VoiceConfig, + ) client = genai.Client() model = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_code_exec_with_txt.py b/genai/live/live_code_exec_with_txt.py index 70db7402ee7..6aa1cae7b6d 100644 --- a/genai/live/live_code_exec_with_txt.py +++ b/genai/live/live_code_exec_with_txt.py @@ -18,8 +18,14 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_code_exec_with_txt] from google import genai - from google.genai.types import (Content, LiveConnectConfig, Modality, Part, - Tool, ToolCodeExecution) + from google.genai.types import ( + Content, + LiveConnectConfig, + Modality, + Part, + Tool, + ToolCodeExecution, + ) client = genai.Client() model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_func_call_with_txt.py b/genai/live/live_func_call_with_txt.py index 7761a49b7b6..b29dd96b78a 100644 --- a/genai/live/live_func_call_with_txt.py +++ b/genai/live/live_func_call_with_txt.py @@ -20,9 +20,15 @@ async def generate_content() -> list[FunctionResponse]: # [START googlegenaisdk_live_func_call_with_txt] from google import genai - from google.genai.types import (Content, FunctionDeclaration, - FunctionResponse, LiveConnectConfig, - Modality, Part, Tool) + from google.genai.types import ( + Content, + FunctionDeclaration, + FunctionResponse, + LiveConnectConfig, + Modality, + Part, + Tool, + ) client = genai.Client() model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_ground_googsearch_with_txt.py b/genai/live/live_ground_googsearch_with_txt.py index cfca4a87e1c..82f4281ae6a 100644 --- a/genai/live/live_ground_googsearch_with_txt.py +++ b/genai/live/live_ground_googsearch_with_txt.py @@ -19,8 +19,14 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_ground_googsearch_with_txt] from google import genai - from google.genai.types import (Content, GoogleSearch, LiveConnectConfig, - Modality, Part, Tool) + from google.genai.types import ( + Content, + GoogleSearch, + LiveConnectConfig, + Modality, + Part, + Tool, + ) client = genai.Client() model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_structured_ouput_with_txt.py b/genai/live/live_structured_ouput_with_txt.py index f0b2466ff5f..38b7df52fcb 100644 --- a/genai/live/live_structured_ouput_with_txt.py +++ b/genai/live/live_structured_ouput_with_txt.py @@ -30,8 +30,10 @@ def generate_content() -> CalendarEvent: import google.auth.transport.requests import openai from google.auth import default - from openai.types.chat import (ChatCompletionSystemMessageParam, - ChatCompletionUserMessageParam) + from openai.types.chat import ( + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, + ) project_id = os.environ["GOOGLE_CLOUD_PROJECT"] location = "us-central1" diff --git a/genai/live/live_transcribe_with_audio.py b/genai/live/live_transcribe_with_audio.py index b702672bc76..644c486675f 100644 --- a/genai/live/live_transcribe_with_audio.py +++ b/genai/live/live_transcribe_with_audio.py @@ -22,8 +22,13 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_transcribe_with_audio] from google import genai - from google.genai.types import (AudioTranscriptionConfig, Content, - LiveConnectConfig, Modality, Part) + from google.genai.types import ( + AudioTranscriptionConfig, + Content, + LiveConnectConfig, + Modality, + Part, + ) client = genai.Client() model = "gemini-live-2.5-flash-preview-native-audio" diff --git a/genai/live/live_with_txt.py b/genai/live/live_with_txt.py index 8b8b0908127..76fab43398b 100644 --- a/genai/live/live_with_txt.py +++ b/genai/live/live_with_txt.py @@ -18,8 +18,13 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_with_txt] from google import genai - from google.genai.types import (Content, HttpOptions, LiveConnectConfig, - Modality, Part) + from google.genai.types import ( + Content, + HttpOptions, + LiveConnectConfig, + Modality, + Part, + ) client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index adab0df412e..7959d1a91e2 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -105,16 +105,16 @@ async def test_live_structured_ouput_with_txt() -> None: @pytest.mark.asyncio async def test_live_txt_with_audio() -> None: - assert await live_txt_with_audio.generate_content() + assert await live_txt_with_audio.generate_content() @pytest.mark.asyncio async def test_live_audio_with_txt() -> None: - result = await live_audio_with_txt.generate_content() - assert result is not None + result = await live_audio_with_txt.generate_content() + assert result is not None @pytest.mark.asyncio async def test_live_transcribe_with_audio() -> None: - result = await live_transcribe_with_audio.generate_content() - assert result is not None + result = await live_transcribe_with_audio.generate_content() + assert result is not None From 83ff2133361ffdaaa39e28aaee0c2a821db7e3b1 Mon Sep 17 00:00:00 2001 From: Guiners Date: Fri, 5 Sep 2025 15:10:31 +0200 Subject: [PATCH 08/15] Revert "codereview fix" This reverts commit 2585661b4e5b0a112fbbbf579b3b2c0104eaa93b. --- genai/live/live_audiogen_with_txt.py | 12 +++--------- genai/live/live_code_exec_with_txt.py | 10 ++-------- genai/live/live_func_call_with_txt.py | 12 +++--------- genai/live/live_ground_googsearch_with_txt.py | 10 ++-------- genai/live/live_structured_ouput_with_txt.py | 6 ++---- genai/live/live_transcribe_with_audio.py | 9 ++------- genai/live/live_with_txt.py | 9 ++------- genai/live/test_live_examples.py | 10 +++++----- 8 files changed, 21 insertions(+), 57 deletions(-) diff --git a/genai/live/live_audiogen_with_txt.py b/genai/live/live_audiogen_with_txt.py index 477d4f0d40d..cf7f24a6fc4 100644 --- a/genai/live/live_audiogen_with_txt.py +++ b/genai/live/live_audiogen_with_txt.py @@ -24,15 +24,9 @@ async def generate_content() -> None: import numpy as np import scipy.io.wavfile as wavfile from google import genai - from google.genai.types import ( - Content, - LiveConnectConfig, - Modality, - Part, - PrebuiltVoiceConfig, - SpeechConfig, - VoiceConfig, - ) + from google.genai.types import (Content, LiveConnectConfig, Modality, Part, + PrebuiltVoiceConfig, SpeechConfig, + VoiceConfig) client = genai.Client() model = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_code_exec_with_txt.py b/genai/live/live_code_exec_with_txt.py index 6aa1cae7b6d..70db7402ee7 100644 --- a/genai/live/live_code_exec_with_txt.py +++ b/genai/live/live_code_exec_with_txt.py @@ -18,14 +18,8 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_code_exec_with_txt] from google import genai - from google.genai.types import ( - Content, - LiveConnectConfig, - Modality, - Part, - Tool, - ToolCodeExecution, - ) + from google.genai.types import (Content, LiveConnectConfig, Modality, Part, + Tool, ToolCodeExecution) client = genai.Client() model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_func_call_with_txt.py b/genai/live/live_func_call_with_txt.py index b29dd96b78a..7761a49b7b6 100644 --- a/genai/live/live_func_call_with_txt.py +++ b/genai/live/live_func_call_with_txt.py @@ -20,15 +20,9 @@ async def generate_content() -> list[FunctionResponse]: # [START googlegenaisdk_live_func_call_with_txt] from google import genai - from google.genai.types import ( - Content, - FunctionDeclaration, - FunctionResponse, - LiveConnectConfig, - Modality, - Part, - Tool, - ) + from google.genai.types import (Content, FunctionDeclaration, + FunctionResponse, LiveConnectConfig, + Modality, Part, Tool) client = genai.Client() model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_ground_googsearch_with_txt.py b/genai/live/live_ground_googsearch_with_txt.py index 82f4281ae6a..cfca4a87e1c 100644 --- a/genai/live/live_ground_googsearch_with_txt.py +++ b/genai/live/live_ground_googsearch_with_txt.py @@ -19,14 +19,8 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_ground_googsearch_with_txt] from google import genai - from google.genai.types import ( - Content, - GoogleSearch, - LiveConnectConfig, - Modality, - Part, - Tool, - ) + from google.genai.types import (Content, GoogleSearch, LiveConnectConfig, + Modality, Part, Tool) client = genai.Client() model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/live_structured_ouput_with_txt.py b/genai/live/live_structured_ouput_with_txt.py index 38b7df52fcb..f0b2466ff5f 100644 --- a/genai/live/live_structured_ouput_with_txt.py +++ b/genai/live/live_structured_ouput_with_txt.py @@ -30,10 +30,8 @@ def generate_content() -> CalendarEvent: import google.auth.transport.requests import openai from google.auth import default - from openai.types.chat import ( - ChatCompletionSystemMessageParam, - ChatCompletionUserMessageParam, - ) + from openai.types.chat import (ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam) project_id = os.environ["GOOGLE_CLOUD_PROJECT"] location = "us-central1" diff --git a/genai/live/live_transcribe_with_audio.py b/genai/live/live_transcribe_with_audio.py index 644c486675f..b702672bc76 100644 --- a/genai/live/live_transcribe_with_audio.py +++ b/genai/live/live_transcribe_with_audio.py @@ -22,13 +22,8 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_transcribe_with_audio] from google import genai - from google.genai.types import ( - AudioTranscriptionConfig, - Content, - LiveConnectConfig, - Modality, - Part, - ) + from google.genai.types import (AudioTranscriptionConfig, Content, + LiveConnectConfig, Modality, Part) client = genai.Client() model = "gemini-live-2.5-flash-preview-native-audio" diff --git a/genai/live/live_with_txt.py b/genai/live/live_with_txt.py index 76fab43398b..8b8b0908127 100644 --- a/genai/live/live_with_txt.py +++ b/genai/live/live_with_txt.py @@ -18,13 +18,8 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_with_txt] from google import genai - from google.genai.types import ( - Content, - HttpOptions, - LiveConnectConfig, - Modality, - Part, - ) + from google.genai.types import (Content, HttpOptions, LiveConnectConfig, + Modality, Part) client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) model_id = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 7959d1a91e2..adab0df412e 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -105,16 +105,16 @@ async def test_live_structured_ouput_with_txt() -> None: @pytest.mark.asyncio async def test_live_txt_with_audio() -> None: - assert await live_txt_with_audio.generate_content() + assert await live_txt_with_audio.generate_content() @pytest.mark.asyncio async def test_live_audio_with_txt() -> None: - result = await live_audio_with_txt.generate_content() - assert result is not None + result = await live_audio_with_txt.generate_content() + assert result is not None @pytest.mark.asyncio async def test_live_transcribe_with_audio() -> None: - result = await live_transcribe_with_audio.generate_content() - assert result is not None + result = await live_transcribe_with_audio.generate_content() + assert result is not None From 2bf1e877961e3fa92f70ed0904284f01fd523156 Mon Sep 17 00:00:00 2001 From: Guiners Date: Fri, 5 Sep 2025 15:45:02 +0200 Subject: [PATCH 09/15] codereview fix --- genai/live/test_live_examples.py | 1 - .../test_text_generation_examples.py | 16 +++++- .../text_generation/textgen_code_with_pdf.py | 55 +++++++++++++++++++ 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 genai/text_generation/textgen_code_with_pdf.py diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index adab0df412e..8290feff451 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -22,7 +22,6 @@ import live_audio_with_txt import live_txt_with_audio -import live_transcribe_with_audio import live_audiogen_with_txt import live_code_exec_with_txt import live_func_call_with_txt diff --git a/genai/text_generation/test_text_generation_examples.py b/genai/text_generation/test_text_generation_examples.py index 3381ae7ec8c..a29764ec61e 100644 --- a/genai/text_generation/test_text_generation_examples.py +++ b/genai/text_generation/test_text_generation_examples.py @@ -37,6 +37,8 @@ import textgen_with_video import textgen_with_youtube_video import thinking_textgen_with_txt +import textgen_code_with_pdf + os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" @@ -137,9 +139,21 @@ def test_textgen_with_youtube_video() -> None: assert response +def test_model_optimizer_textgen_with_txt() -> None: + os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" + response = model_optimizer_textgen_with_txt.generate_content() + os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" + assert response + + +def test_textgen_code_with_pdf() -> None: + response = textgen_code_with_pdf.generate_content() + assert response + + # Migrated to Model Optimser Folder # def test_model_optimizer_textgen_with_txt() -> None: # os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" # response = model_optimizer_textgen_with_txt.generate_content() # os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" -# assert response +# assert response \ No newline at end of file diff --git a/genai/text_generation/textgen_code_with_pdf.py b/genai/text_generation/textgen_code_with_pdf.py new file mode 100644 index 00000000000..da4ca76b73a --- /dev/null +++ b/genai/text_generation/textgen_code_with_pdf.py @@ -0,0 +1,55 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !This sample works with Google Cloud Vertex AI API only. + + +def generate_content() -> str: + # [START googlegenaisdk_textgen_code_with_pdf] + from google import genai + from google.genai.types import HttpOptions, Part + + client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) + model_id = "gemini-2.5-flash" + prompt = "Convert this python code to use Google Python Style Guide." + print("> ", prompt, "\n") + pdf_uri = "https://storage.googleapis.com/cloud-samples-data/generative-ai/text/inefficient_fibonacci_series_python_code.pdf" + + pdf_file = Part.from_uri( + file_uri=pdf_uri, + mime_type="application/pdf", + ) + + response = client.models.generate_content( + model=model_id, + contents=[pdf_file, prompt], + ) + + print(response.text) + # Example response: + # > Convert this python code to use Google Python Style Guide. + # + # def generate_fibonacci_sequence(num_terms: int) -> list[int]: + # """Generates the Fibonacci sequence up to a specified number of terms. + # + # This function calculates the Fibonacci sequence starting with 0 and 1. + # It handles base cases for 0, 1, and 2 terms efficiently. + # + # # ... + # [END googlegenaisdk_textgen_code_with_pdf] + return response.text + + +if __name__ == "__main__": + generate_content() From ed867b130fff56eb0609811519ada0d9d440fa42 Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 8 Sep 2025 11:03:42 +0200 Subject: [PATCH 10/15] codereview fix --- genai/text_generation/test_text_generation_examples.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/genai/text_generation/test_text_generation_examples.py b/genai/text_generation/test_text_generation_examples.py index a29764ec61e..e63579e1f5e 100644 --- a/genai/text_generation/test_text_generation_examples.py +++ b/genai/text_generation/test_text_generation_examples.py @@ -139,13 +139,6 @@ def test_textgen_with_youtube_video() -> None: assert response -def test_model_optimizer_textgen_with_txt() -> None: - os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" - response = model_optimizer_textgen_with_txt.generate_content() - os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" - assert response - - def test_textgen_code_with_pdf() -> None: response = textgen_code_with_pdf.generate_content() assert response From 806147855b590d5ec997a3b22343a8875129c835 Mon Sep 17 00:00:00 2001 From: Guiners Date: Tue, 9 Sep 2025 10:09:03 +0200 Subject: [PATCH 11/15] codereview fix --- ...t.py => live_structured_output_with_txt.py} | 0 genai/live/test_live_examples.py | 18 ++++++------------ .../test_text_generation_examples.py | 6 +++--- 3 files changed, 9 insertions(+), 15 deletions(-) rename genai/live/{live_structured_ouput_with_txt.py => live_structured_output_with_txt.py} (100%) diff --git a/genai/live/live_structured_ouput_with_txt.py b/genai/live/live_structured_output_with_txt.py similarity index 100% rename from genai/live/live_structured_ouput_with_txt.py rename to genai/live/live_structured_output_with_txt.py diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 8290feff451..858cf065b15 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -21,12 +21,12 @@ import pytest import live_audio_with_txt -import live_txt_with_audio import live_audiogen_with_txt +import live_txt_with_audio import live_code_exec_with_txt import live_func_call_with_txt import live_ground_googsearch_with_txt -import live_structured_ouput_with_txt +import live_structured_output_with_txt import live_transcribe_with_audio import live_txtgen_with_audio import live_websocket_audiogen_with_txt @@ -99,21 +99,15 @@ async def test_live_txtgen_with_audio() -> None: @pytest.mark.asyncio async def test_live_structured_ouput_with_txt() -> None: - assert live_structured_ouput_with_txt.generate_content() + assert live_structured_output_with_txt.generate_content() @pytest.mark.asyncio async def test_live_txt_with_audio() -> None: - assert await live_txt_with_audio.generate_content() + assert await live_txt_with_audio.generate_content() @pytest.mark.asyncio async def test_live_audio_with_txt() -> None: - result = await live_audio_with_txt.generate_content() - assert result is not None - - -@pytest.mark.asyncio -async def test_live_transcribe_with_audio() -> None: - result = await live_transcribe_with_audio.generate_content() - assert result is not None + result = await live_audio_with_txt.generate_content() + assert result is not None diff --git a/genai/text_generation/test_text_generation_examples.py b/genai/text_generation/test_text_generation_examples.py index e63579e1f5e..45c03acdbdc 100644 --- a/genai/text_generation/test_text_generation_examples.py +++ b/genai/text_generation/test_text_generation_examples.py @@ -36,8 +36,8 @@ import textgen_with_txt_stream import textgen_with_video import textgen_with_youtube_video -import thinking_textgen_with_txt import textgen_code_with_pdf +import thinking_textgen_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" @@ -143,10 +143,10 @@ def test_textgen_code_with_pdf() -> None: response = textgen_code_with_pdf.generate_content() assert response - + # Migrated to Model Optimser Folder # def test_model_optimizer_textgen_with_txt() -> None: # os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" # response = model_optimizer_textgen_with_txt.generate_content() # os.environ["GOOGLE_CLOUD_LOCATION"] = "global" # "us-central1" -# assert response \ No newline at end of file +# assert response From a68b7b908e2dcc0bdd19e2a7753d0abe6c04f768 Mon Sep 17 00:00:00 2001 From: Guiners Date: Tue, 9 Sep 2025 17:04:03 +0200 Subject: [PATCH 12/15] linter fix --- genai/live/live_audio_with_txt.py | 14 ++++---------- genai/live/live_txt_with_audio.py | 7 ++++--- genai/live/test_live_examples.py | 2 +- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py index 40a78a64734..96f43187710 100644 --- a/genai/live/live_audio_with_txt.py +++ b/genai/live/live_audio_with_txt.py @@ -22,17 +22,11 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_audio_with_txt] import numpy as np - from IPython.display import Audio, Markdown, display from google import genai - from google.genai.types import ( - Content, - LiveConnectConfig, - Modality, - Part, - SpeechConfig, - VoiceConfig, - PrebuiltVoiceConfig, - ) + from google.genai.types import (Content, LiveConnectConfig, Modality, Part, + PrebuiltVoiceConfig, SpeechConfig, + VoiceConfig) + from IPython.display import Audio, Markdown, display client = genai.Client() voice_name = "Aoede" diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py index e6a0e00e19d..8ddc2703c99 100644 --- a/genai/live/live_txt_with_audio.py +++ b/genai/live/live_txt_with_audio.py @@ -22,11 +22,12 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_txt_with_audio] import io + + import librosa import requests - from google import genai - from google.genai.types import Modality, LiveConnectConfig, Blob import soundfile as sf - import librosa + from google import genai + from google.genai.types import Blob, LiveConnectConfig, Modality client = genai.Client() model = "gemini-2.0-flash-live-preview-04-09" diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 858cf065b15..d99a8636425 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -22,12 +22,12 @@ import live_audio_with_txt import live_audiogen_with_txt -import live_txt_with_audio import live_code_exec_with_txt import live_func_call_with_txt import live_ground_googsearch_with_txt import live_structured_output_with_txt import live_transcribe_with_audio +import live_txt_with_audio import live_txtgen_with_audio import live_websocket_audiogen_with_txt import live_websocket_audiotranscript_with_txt From b9be377f3bab51aec2a54b1b6323c4fbac6f99b2 Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 2 Oct 2025 14:53:25 +0200 Subject: [PATCH 13/15] model update --- genai/live/live_audio_with_txt.py | 33 +++++++++++++++++++------------ genai/live/requirements.txt | 2 +- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py index 96f43187710..fedb762919a 100644 --- a/genai/live/live_audio_with_txt.py +++ b/genai/live/live_audio_with_txt.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,21 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. - # Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav -# Install helpers for converting files: pip install librosa soundfile +# Install helpers for converting files: pip install librosa soundfile simpleaudio import asyncio + + + + async def generate_content() -> list[str]: # [START googlegenaisdk_live_audio_with_txt] - import numpy as np from google import genai - from google.genai.types import (Content, LiveConnectConfig, Modality, Part, - PrebuiltVoiceConfig, SpeechConfig, - VoiceConfig) - from IPython.display import Audio, Markdown, display + from google.genai.types import ( + Content, LiveConnectConfig, Modality, Part, + PrebuiltVoiceConfig, SpeechConfig, VoiceConfig + ) + import numpy as np + import soundfile as sf + import simpleaudio as sa + + def play_audio(audio_array: np.ndarray, sample_rate: int = 24000): + sf.write("output.wav", audio_array, sample_rate) + wave_obj = sa.WaveObject.from_wave_file("output.wav") + play_obj = wave_obj.play() + play_obj.wait_done() client = genai.Client() voice_name = "Aoede" @@ -68,12 +79,8 @@ async def generate_content() -> list[str]: if audio_data: print("Received audio answer: ") - display(Audio(np.concatenate(audio_data), rate=24000, autoplay=True)) + play_audio(np.concatenate(audio_data), sample_rate=24000) - # Example output: - # > Hello? Gemini are you there? - # Received audio answer: - # # [END googlegenaisdk_live_audio_with_txt] return [] diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index 7e61ab14207..c3c3a3e35c1 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -6,4 +6,4 @@ soundfile==0.12.1 openai==1.99.1 setuptools==80.9.0 librosa==0.11.0 -IPython==8.26.0 \ No newline at end of file +simpleaudio==1.0.0 \ No newline at end of file From ef084c53d7d7e612996ac32137fab4573dcd3059 Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 2 Oct 2025 14:58:04 +0200 Subject: [PATCH 14/15] model update --- genai/live/live_audio_with_txt.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py index fedb762919a..ac82a299944 100644 --- a/genai/live/live_audio_with_txt.py +++ b/genai/live/live_audio_with_txt.py @@ -18,10 +18,6 @@ import asyncio - - - - async def generate_content() -> list[str]: # [START googlegenaisdk_live_audio_with_txt] from google import genai @@ -33,7 +29,7 @@ async def generate_content() -> list[str]: import soundfile as sf import simpleaudio as sa - def play_audio(audio_array: np.ndarray, sample_rate: int = 24000): + def play_audio(audio_array: np.ndarray, sample_rate: int = 24000) -> None: sf.write("output.wav", audio_array, sample_rate) wave_obj = sa.WaveObject.from_wave_file("output.wav") play_obj = wave_obj.play() From 18b318d8260671282bc218ca4666fdbbd3949a05 Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 6 Oct 2025 18:16:33 +0200 Subject: [PATCH 15/15] code review fix --- genai/text_generation/test_text_generation_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai/text_generation/test_text_generation_examples.py b/genai/text_generation/test_text_generation_examples.py index 45c03acdbdc..cc8f471db99 100644 --- a/genai/text_generation/test_text_generation_examples.py +++ b/genai/text_generation/test_text_generation_examples.py @@ -22,6 +22,7 @@ import textgen_async_with_txt import textgen_chat_stream_with_txt import textgen_chat_with_txt +import textgen_code_with_pdf import textgen_config_with_txt import textgen_sys_instr_with_txt import textgen_transcript_with_gcs_audio @@ -36,7 +37,6 @@ import textgen_with_txt_stream import textgen_with_video import textgen_with_youtube_video -import textgen_code_with_pdf import thinking_textgen_with_txt