From 7131f8e4d4af7b086b44a50872c810ada65f2ea9 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Thu, 20 Feb 2025 10:34:04 -0600 Subject: [PATCH 1/2] docs(genai): Add Batch Embeddings Sample for Gen AI SDK - Removed extra file from #13172 --- .../batchpredict_embeddings_with_gcs.py | 68 +++++++++++++++++++ .../test_batch_prediction_examples.py | 8 +++ genai/embeddings/embed_content_text.py | 45 ------------ 3 files changed, 76 insertions(+), 45 deletions(-) create mode 100644 genai/batch_prediction/batchpredict_embeddings_with_gcs.py delete mode 100644 genai/embeddings/embed_content_text.py diff --git a/genai/batch_prediction/batchpredict_embeddings_with_gcs.py b/genai/batch_prediction/batchpredict_embeddings_with_gcs.py new file mode 100644 index 00000000000..f2e7f617273 --- /dev/null +++ b/genai/batch_prediction/batchpredict_embeddings_with_gcs.py @@ -0,0 +1,68 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def generate_content(output_uri: str) -> str: + # [START googlegenaisdk_batchpredict_embeddings_with_gcs] + import time + + from google import genai + from google.genai.types import CreateBatchJobConfig, JobState, HttpOptions + + client = genai.Client(http_options=HttpOptions(api_version="v1")) + # TODO(developer): Update and un-comment below line + # output_uri = "gs://your-bucket/your-prefix" + + # See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.batches.Batches.create + job = client.batches.create( + model="text-embedding-005", + # Source link: https://storage.cloud.google.com/cloud-samples-data/generative-ai/embeddings/embeddings_input.jsonl + src="gs://cloud-samples-data/generative-ai/embeddings/embeddings_input.jsonl", + config=CreateBatchJobConfig(dest=output_uri), + ) + print(f"Job name: {job.name}") + print(f"Job state: {job.state}") + # Example response: + # Job name: projects/%PROJECT_ID%/locations/us-central1/batchPredictionJobs/9876453210000000000 + # Job state: JOB_STATE_PENDING + + # See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.types.BatchJob + completed_states = { + JobState.JOB_STATE_SUCCEEDED, + JobState.JOB_STATE_FAILED, + JobState.JOB_STATE_CANCELLED, + JobState.JOB_STATE_PAUSED, + } + + while job.state not in completed_states: + time.sleep(30) + job = client.batches.get(name=job.name) + print(f"Job state: {job.state}") + if job.state == JobState.JOB_STATE_FAILED: + print(f"Error: {job.error}") + break + + # Example response: + # Job state: JOB_STATE_PENDING + # Job state: JOB_STATE_RUNNING + # Job state: JOB_STATE_RUNNING + # ... + # Job state: JOB_STATE_SUCCEEDED + + # [END googlegenaisdk_batchpredict_embeddings_with_gcs] + return job.state + + +if __name__ == "__main__": + generate_content(output_uri="gs://python-docs-samples-tests/embeddings_output") diff --git a/genai/batch_prediction/test_batch_prediction_examples.py b/genai/batch_prediction/test_batch_prediction_examples.py index a64e0864022..f9979c352f6 100644 --- a/genai/batch_prediction/test_batch_prediction_examples.py +++ b/genai/batch_prediction/test_batch_prediction_examples.py @@ -25,6 +25,7 @@ import pytest +import batchpredict_embeddings_with_gcs import batchpredict_with_bq import batchpredict_with_gcs @@ -61,6 +62,13 @@ def gcs_output_uri() -> str: blob.delete() +def test_batch_prediction_embeddings_with_gcs(gcs_output_uri: str) -> None: + response = batchpredict_embeddings_with_gcs.generate_content( + output_uri=gcs_output_uri + ) + assert response == JobState.JOB_STATE_SUCCEEDED + + def test_batch_prediction_with_bq(bq_output_uri: str) -> None: response = batchpredict_with_bq.generate_content(output_uri=bq_output_uri) assert response == JobState.JOB_STATE_SUCCEEDED diff --git a/genai/embeddings/embed_content_text.py b/genai/embeddings/embed_content_text.py deleted file mode 100644 index 787362c2755..00000000000 --- a/genai/embeddings/embed_content_text.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def embed_content() -> str: - # [START googlegenaisdk_embeddings_docretrieval_with_txt] - from google import genai - from google.genai.types import EmbedContentConfig - - client = genai.Client() - response = client.models.embed_content( - model="text-embedding-005", - contents=[ - "How do I get a driver's license/learner's permit?", - "How do I renew my driver's license?", - "How do I change my address on my driver's license?", - ], - config=EmbedContentConfig( - task_type="RETRIEVAL_DOCUMENT", # Optional - output_dimensionality=768, # Optional - title="Driver's License", # Optional - ), - ) - print(response) - # Example response: - # embeddings=[ContentEmbedding(values=[-0.06302902102470398, 0.00928034819662571, 0.014716853387653828, -0.028747491538524628, ... ], - # statistics=ContentEmbeddingStatistics(truncated=False, token_count=13.0))] - # metadata=EmbedContentMetadata(billable_character_count=112) - # [END googlegenaisdk_embeddings_docretrieval_with_txt] - return response - - -if __name__ == "__main__": - embed_content() From 613b9d29dc29ac69478ed1bce03b04da429b6f02 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Thu, 20 Feb 2025 10:46:46 -0600 Subject: [PATCH 2/2] Remove sample bucket URI --- genai/batch_prediction/batchpredict_embeddings_with_gcs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai/batch_prediction/batchpredict_embeddings_with_gcs.py b/genai/batch_prediction/batchpredict_embeddings_with_gcs.py index f2e7f617273..66efa35fe6a 100644 --- a/genai/batch_prediction/batchpredict_embeddings_with_gcs.py +++ b/genai/batch_prediction/batchpredict_embeddings_with_gcs.py @@ -65,4 +65,4 @@ def generate_content(output_uri: str) -> str: if __name__ == "__main__": - generate_content(output_uri="gs://python-docs-samples-tests/embeddings_output") + generate_content(output_uri="gs://your-bucket/your-prefix")