In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Handling large-scale embedding generation for Vertex AI Vector Search

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/embeddings/large-embs-generation-for-vvs.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fembeddings%2Flarge-embs-generation-for-vvs.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/embeddings/large-embs-generation-for-vvs.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/large-embs-generation-for-vvs.ipynb">
      <img width="32px" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/large-embs-generation-for-vvs.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/large-embs-generation-for-vvs.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/large-embs-generation-for-vvs.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/53/X_logo_2023_original.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/large-embs-generation-for-vvs.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/large-embs-generation-for-vvs.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>

| | |
|-|-|
| Author(s) |  [Kaz Sato](https://github.com/kazunori279/) |

## Overview

This tutorial provides a simple example of how to efficiently generate text and multimodal embeddings for millions of items on a notebook, using [Vertex AI Embeddings API](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings) and [Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview).

Embeddings API provides [batch predictions](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/batch-prediction-genai-embeddings#generative-ai-batch-embedding-python_vertex_ai_sdk) for generating text embeddings for large datasets, but there are limitations such as:

1. some models including multimodal and multilingual are not supported
2. you cannot specify [task types](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/task-types) for the text embeddings.

For those cases, you may find this notebook useful.

### Things to consider

When processing a large dataset to generate embeddings, you'll need code that does:

1. **Rate Limiting**: Control the frequency of your API calls to stay within your usage limits. This prevents exceeding quotas and ensures continuous processing for the large dataset.
1. **Multithreading**: Process data and make API calls concurrently. This maximizes your quota usage and reduces the impact of latency, speeding up the overall process.
1. **Checkpointing**: To avoid starting from scratch if an error occurs, Save the progress periodically. This lets you resume from the last saved point.

This tutorial provides a straightforward example of how to implement these techniques effectively. You can choose the size of sample dataset from 10K, 100K or 1M items. With the default quota limit of Embedding API (as of Dec 2024), this example can generate:

- 10K text embeddings in a few minutes
- 100K text embeddings in roughly 5 minutes
- 1M text embeddings in roughly one hour

**Note**: This approach is intended for development purposes only. For production environments, consider building a robust MLOps pipeline with [Vertex AI Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines/introduction) and [Dataflow](https://cloud.google.com/products/dataflow) to manage your embedding generation workflow.

## Understanding quota for Embeddings API

When using any Vertex AI API extensively, it's essential to ensure you have enough quota for your API calls. For the Embeddings API, the following pages detail the relevant quotas (Quota values listed below are current for us-central1 as of December 2024, but each region has different values and may change at any time. Please check the individual pages for the most up-to-date information).

### [Quotas by region and by model](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas#view-the-quotas-by-region-and-by-model)

- base_model: textembedding-gecko	**1,500 requests per minute**	(region: us-central1)
- base_model: multimodalembedding	**120 requests per minute** (region: us-central1)

These are quotas that limits the number of API calls you can make. If you exceed this limit, you'll encounter ResourceExhausted errors. We will cover how to implement "throttling" in a later section to prevent this from happening. This will involve using code to control the rate of your API calls and stay within the quota.

In many cases, you might need to request a quota increase to generate a large number of embeddings in a timely manner.  See the [View and manage quotas](https://cloud.google.com/docs/quotas/view-manage) page for instructions on how to check your current quota values and make increase requests.

### [Text embedding limits](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas#text-embedding-limits)

- Each text embedding model request can have up to **250 input texts**
- **20,000 tokens per request**

This is the limits on the number of texts (or tokens of the texts) you can send with a single API call. Sending too many at once, such as the maximums shown above, can cause processing delays and make it difficult to monitor system health. We'll discuss strategies for finding the right balance later on.

## Get started

Let's get started. In this section, we'll set up the necessary libraries and environment variables for this tutorial.

### Install Vertex AI SDK and other required packages


This line installs the Google Cloud AI Platform library, which is necessary for interacting with Vertex AI services, including the Embeddings API.

In [None]:
%pip install --upgrade --user --quiet google-cloud-aiplatform

### Restart runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.

The restart might take a minute or longer. After it's restarted, continue to the next step.

In [2]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. In Colab or Colab Enterprise, you might see an error message that says "Your session crashed for an unknown reason." This is expected. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

The following code defines project configuration variables. If you're running this notebook on Google Colab, you'll need to enter the `PROJECT_ID` manually. However, on Colab Enterprise and Vertex AI Workbench, it will be detected automatically. The code also generates a unique session ID based on the current timestamp.

In [1]:
# Use the environment variable if the user doesn't provide Project ID.
import os

PROJECT_ID = (
    ""  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
)
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))
LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

# generate an unique id for this session
from datetime import datetime

UID = datetime.now().strftime("%m%d%H%M%S")
f"Unique ID for this session is: {UID}"

## Prepare dataset

### Download GBIF dataset

The following code downloads a [GBIF (Global Biodiversity Information Facility)](https://www.gbif.org/) dataset from Google Cloud Storage as a sample dataset that contains animal photos with their name and description. You can choose between 10k, 100k, or 1M items. It then uncompresses the downloaded gzip file to make it usable.

In [2]:
# choose dataset
FILE_NAME = "gbif_10k.json"
# FILE_NAME = "gbif_100k.json"
# FILE_NAME = "gbif_1m.json"

# download dataset
!rm -rf ./gbif_*.json.gz
!wget https://storage.googleapis.com/gcp-samples-ic0-ac/datasets/{FILE_NAME}.gz

# uncompress the gzip file
!rm -rf ./gbif_*.json
!gzip -d ./gbif_*.json.gz

This code then loads the json file into a list. The first three items of the list are then printed. We will use `name` and `description` for generating text embeddings, and `gcsUri` for generating multimodal embeddings for the animal photo.

In [3]:
import json

with open(FILE_NAME) as f:
    items = [json.loads(line) for line in f]
items[:3]

In [4]:
# print the animal image of the first item
from IPython.display import Image

print(f"Name: {items[0]['name']}")
print(f"Description: {items[0]['description']}")
Image(url=items[0]["url"])

## Generate text embeddings

In this section, we will generate text embeddings for the sample dataset.

### How to use Embedding API for text

This code utilizes the Embeddings API to obtain [an embedding model](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#supported-models). It defines a function `generate_text_embeddings()` that accepts a list of items and generates their text embeddings using the model, with a specified [task type](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/task-types) and dimensionality.

In [5]:
import vertexai
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel

TEXT_EMB_MODEL_NAME = "text-embedding-005"
TEXT_EMB_TASK_TYPE = "SEMANTIC_SIMILARITY"
TEXT_EMB_DIMENSIONALITY = 768

vertexai.init(project=PROJECT_ID, location=LOCATION)
text_emb_model = TextEmbeddingModel.from_pretrained(TEXT_EMB_MODEL_NAME)

In [6]:
from collections.abc import Callable
from typing import Any


def generate_text_embeddings(items: list[dict[str, Any]]) -> list[list[float]]:
    """
    Generate text embeddings for items.
    """

    # Combine name and description for embedding input.
    names: list[str] = [item["name"] + " " + item["description"] for item in items]

    # Prepare inputs for the text embedding model.
    inputs: list[TextEmbeddingInput] = [
        TextEmbeddingInput(name, TEXT_EMB_TASK_TYPE) for name in names
    ]
    kwargs = {"output_dimensionality": TEXT_EMB_DIMENSIONALITY}

    # Get embeddings from the model.
    return [emb.values for emb in text_emb_model.get_embeddings(inputs, **kwargs)]

In [7]:
# test it
test_items = items[:1]
print(generate_text_embeddings(test_items))

### Define a worker thread

Instead of generating embeddings for each item individually, we'll use multiple worker threads per API call to maximize quota utilization and make the entire job robust to errors. This code defines `run_worker_thread()`, which wraps `generate_text_embeddings()` to run as a worker thread, adding generated embeddings (or errors) into a queue.

In [8]:
import queue


def run_worker_thread(
    generation_func: Callable[[list[dict[str, Any]], "queue.Queue"], None],
    items: list[dict[str, Any]],
    emb_queue: "queue.Queue",
    err_queue: "queue.Queue",
) -> None:
    """
    Runs a worker thread that generates embeddings with a single API call and handles
    potential errors.

    Args:
        generation_func: A function that takes a list of items and returns their embeddings.
        items: The list of items to process. Each item should be a dictionary.
        emb_queue: The queue to put the generated embeddings into.
        err_queue: The queue to put any encountered errors into.
    """
    try:
        embs = generation_func(items)
        for i in range(0, len(items)):
            emb_queue.put({"id": items[i]["id"], "embedding": embs[i]})
    except Exception as e:
        err_queue.put(str(e))

### Prepare a Cloud Storage bucket

We will store the generated embeddings to a Cloud Storage bucket and folder created by this code.

In [9]:
from google.cloud import storage

GCS_BUCKET = f"{PROJECT_ID}-embs-{UID}"
GCS_TEXT_EMB_PATH = "text_embs"

# create a bucket
storage_client = storage.Client()
storage_bucket = storage_client.bucket(GCS_BUCKET)
storage_bucket = storage_client.create_bucket(storage_bucket, location="us-central1")

# create a folder for storing text embeddings
empty_blob = storage_bucket.blob(GCS_TEXT_EMB_PATH + "/")
empty_blob.upload_from_string("")
print(f"\nCreated text embedding folder: gs://{GCS_BUCKET}/{GCS_TEXT_EMB_PATH}")

### Prepare a queue manager thread

This code defines a queue manager, `run_queue_manager_thread()`, that receives embeddings from the worker threads and handles potential errors. The manager periodically flushes successful embeddings to Cloud Storage using `flush_emb_queue()` and errors to a log file using `flush_err_queue()`.

In [10]:
QUEUE_FLUSH_THRESHOLD = 10000
ERR_FILE_NAME = f"err_{UID}.log"

is_queue_manager_enabled: bool = True


def flush_emb_queue(emb_queue: queue.Queue, gcs_path: str, count: int) -> None:
    """
    Flushes the embedding queue to Cloud Storage.

    Args:
    emb_queue: The queue containing embedding dictionaries.
    gcs_path: The destination path in Cloud Storage.
    count: The number of embeddings to flush from the queue.
    """
    timestamp: str = datetime.now().strftime("%m%d%H%M-%S%f")
    embs: str = ""
    for _ in range(0, count):
        emb: dict = emb_queue.get()
        embs += json.dumps(emb) + "\n"
    gcs_file = storage_bucket.blob(f"{gcs_path}/{timestamp}_embs.json")
    gcs_file.upload_from_string(embs, content_type="application/json")
    print(f"Uploaded {count} embeddings to {gcs_file.name}")


def flush_err_queue(err_queue: queue.Queue) -> None:
    """Flushes the error queue to the error log file."""
    with open(ERR_FILE_NAME, "a") as file:
        while not err_queue.empty():
            error = err_queue.get()
            file.write(f"Error {error}\n")
            print(f"Error {error}")


def run_queue_manager_thread(
    emb_queue: queue.Queue, err_queue: queue.Queue, gcs_path: str
) -> None:
    """Runs the queue manager thread, which monitors and flushes the embedding and error queues.

    Args:
        emb_queue: The queue for storing embeddings.
        err_queue: The queue for storing errors.
        gcs_path: The path to Cloud Storage where embeddings will be stored.
    """

    # Continue managing the queues while enabled
    global is_queue_manager_enabled
    while is_queue_manager_enabled:
        time.sleep(0.1)

        # Flush the embedding queue if it exceeds the threshold
        if emb_queue.qsize() > QUEUE_FLUSH_THRESHOLD:
            flush_emb_queue(emb_queue, gcs_path, QUEUE_FLUSH_THRESHOLD)

        # Flush the error queue if it contains any errors
        if err_queue.qsize() > 0:
            flush_err_queue(err_queue)

    # Perform a final flush of both queues when the queue manager is disabled
    flush_emb_queue(emb_queue, gcs_path, emb_queue.qsize())
    flush_err_queue(err_queue)

### Define the main loop

`generate_embeddings()` is the main loop that performs the following: 1) initializes and starts the queue manager thread, 2) runs a loop that spawns a new worker thread for each API call, adhering to the quota limit, and 3) ensures all embeddings and any errors are processed from the queue.

In [11]:
import threading
import time

from tqdm.notebook import tqdm


def generate_embeddings(
    generation_func: Callable[[list[dict[str, Any]], "queue.Queue"], None],
    reqs_per_min_quota: int,
    items_per_req: int,
    gcs_path: str,
    items: list[dict[str, Any]],
) -> None:
    """
    Generates embeddings with throttling and error handling.

    Args:
        generation_func: The function used to generate the embeddings.
        reqs_per_min_quota: The maximum number of requests allowed per minute for the model.
        items_per_req: The number of items to include in each request.
        items: The dataset we'll be working with.
    """

    # All threads.
    threads: list[threading.Thread] = []

    # Throttling interval.
    req_interval: float = 1.0 / (reqs_per_min_quota / 60)

    # Start queue manager thread.
    global is_queue_manager_enabled
    is_queue_manager_enabled = True
    emb_queue: "queue.Queue" = queue.Queue()
    err_queue: "queue.Queue" = queue.Queue()
    queue_manager_thread: threading.Thread = threading.Thread(
        target=run_queue_manager_thread, args=(emb_queue, err_queue, gcs_path)
    )
    queue_manager_thread.start()

    # Generate embeddings.
    for i in tqdm(range(0, len(items), items_per_req)):

        # Throttle requests.
        time.sleep(req_interval)

        # Start a worker thread.
        items_slice: list[dict[str, Any]] = items[i : i + items_per_req]
        worker_thread: threading.Thread = threading.Thread(
            target=run_worker_thread,
            args=(generation_func, items_slice, emb_queue, err_queue),
        )
        worker_thread.start()
        threads.append(worker_thread)

    # Wait for all worker threads to finish.
    print(f"Waiting for {len(threads)} threads to finish...")
    for i in tqdm(range(0, len(threads), 1)):
        threads[i].join()

    # Wait for the queue manager to stop.
    print("Waiting for the queue manager to finish...")
    is_queue_manager_enabled = False
    queue_manager_thread.join()

    # Print error count.
    if os.path.exists(ERR_FILE_NAME):
        with open(ERR_FILE_NAME) as f:
            error_count: int = len(f.readlines())
        print(f"{error_count} errors recorded in {ERR_FILE_NAME}")
    else:
        print("No errors recorded")
    print("Done!")

### Run it

Okay, let's run this! We'll use the following parameters:

- `generation_func`: The function used to generate the embeddings.
- `reqs_per_min_quota`: The maximum number of requests allowed per minute for the model.
- `items_per_req`: The number of items to include in each request.
- `items`: The dataset we'll be working with.

Regarding `items_per_req`, the text embedding model allows a maximum of 250 items per request, as mentioned earlier. However, setting it to the maximum value slows down the entire throughput. You might want to choose a more moderate value, like 20, for better performance.

In [12]:
# Start generating text embeddings
generate_embeddings(
    generation_func=generate_text_embeddings,
    reqs_per_min_quota=1500,
    items_per_req=20,
    gcs_path=GCS_TEXT_EMB_PATH,
    items=items,
)

print(f"\nCreated text embeddings on folder: gs://{GCS_BUCKET}/{GCS_TEXT_EMB_PATH}")

### Monitoring errors

While embedding generation is in progress, you can monitor the error log for any recorded errors.

### Monitoring quota usage

You can also monitor your quota usage in the Google Cloud console. To do this, follow the instructions under [View quotas in the Google Cloud console](https://cloud.google.com/docs/quotas/view-manage#viewing_your_quota_console). Use the filter and type "embedding" to find the relevant quota, then click the Show usage chart button on the right to see your usage details.

Typically, you'll find that this tool utilizes around 80-90% of the specified quota limit.

<br/>
<center><img src="https://storage.googleapis.com/gcp-samples-ic0-ac/images/quota_usage.png" width="400"/></center>
<br/>


### (Optional) Creating a Vector Search index

Optionally, you can try using the generated embeddings for building [Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview) index for semantic search.

Check out the JSONL files stored on the Cloud Storage folder by using [Cloud Storage console](https://console.cloud.google.com/storage/browser). The file looks like:

```
{
    "id": "2251880622",
    "embedding": [-0.005421683192253113, 0.030090663582086563,...
}
...
```

This format conforms the [input data format of Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/setup/format-structure). You can directly use the files to create an index by either:

- Using [Vector Search console](https://cloud.google.com/vertex-ai/docs/vector-search/create-manage-index#create_index-console): Specify the Cloud Storage path `gs://...` on the create new index dialog.
- Using [Vector Search Python SDK](https://cloud.google.com/vertex-ai/docs/vector-search/create-manage-index#create_index-python_vertex_ai_sdk): Specify the Cloud Storage path `gs://...` as `contents_delta_uri` parameter of `create_tree_ah_index()`.

For further usage of Vector Search, refer to [the product documentation](https://cloud.google.com/vertex-ai/docs/vector-search/overview).

## Generate Multimodal Embeddings

The tool above can be reused to generate other types of embeddings by defining a new `generation_func`. The following code demonstrates how to generate multimodal embeddings for the animal photos.

In [13]:
# Create a folder for storing multimodal embeddings
GCS_MM_EMB_PATH = "mm_embs"
empty_blob = storage_bucket.blob(GCS_MM_EMB_PATH + "/")
empty_blob.upload_from_string("")
print(f"\nCreated multimodal embedding folder: gs://{GCS_BUCKET}/{GCS_MM_EMB_PATH}")

In [14]:
# Prepare a multimodal embedding model
from vertexai.vision_models import (
    Image,
    MultiModalEmbeddingModel,
    MultiModalEmbeddingResponse,
)

MM_EMB_MODEL_NAME = "multimodalembedding"
MM_EMB_DIMENSIONALITY = 1408

mm_emb_model = MultiModalEmbeddingModel.from_pretrained(MM_EMB_MODEL_NAME)

In [15]:
def generate_mm_embeddings(items: list[dict[str, Any]]) -> list[list[float]]:
    """
    Generate multimodal embeddings for items.
    """

    # Extract GCS URIs from items.
    gcsUris: list[str] = [item["gcsUri"] for item in items]

    # Load images from the GCS URIs.
    images: list[Image] = [Image.load_from_file(gcsUri) for gcsUri in gcsUris]

    # Get multimodal embeddings.
    embs: list[MultiModalEmbeddingResponse] = [
        mm_emb_model.get_embeddings(image=image, dimension=MM_EMB_DIMENSIONALITY)
        for image in images
    ]

    # Return the image embeddings.
    return [emb.image_embedding for emb in embs]

In [16]:
# Run it
generate_embeddings(
    generation_func=generate_mm_embeddings,
    reqs_per_min_quota=120,
    items_per_req=1,
    gcs_path=GCS_MM_EMB_PATH,
    items=items,
)

For generating multimodal embeddings, there is a couple of difference from the text embeddings:

- `reqs_per_min_quota`: The default quota limit is lower than text embedding mode.
- `items_per_req`: The multimodal model can only take single image per request.

With these differences, it would take much longer time to generate embeddings for all items.

## Cleaning up

That was it. Finally, remove the Cloud Storage bucket that use used for this tutorial.

In [None]:
storage_bucket.delete(force=True)
print("Storage bucket deleted")