In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vector Search 2.0 Public Preview Quickstart

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/embeddings/vector-search-2-quickstart.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fembeddings%2Fvector-search-2-quickstart.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/vector-search-2-quickstart.ipynb">
      <img width="32px" src="https://raw.githubusercontent.com/primer/octicons/refs/heads/main/icons/mark-github-24.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/vector-search-2-quickstart.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/vector-search-2-quickstart.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/vector-search-2-quickstart.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/vector-search-2-quickstart.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/vector-search-2-quickstart.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>            

## Objectives

In this notebook, you will learn how to get started with the Vector Search 2.0 public preview API.

### **Warning: delete your objects after the tutorial**

In case you are using your own Cloud project, please make sure to delete all the Collection and any associated Indexes after finishing this tutorial. Otherwise the remaining assets would incur unexpected costs.


## Prerequisites

This tutorial requires a Google Cloud project that is linked with a billing account. To create a new project, take a look at [this document](https://cloud.google.com/vertex-ai/docs/start/cloud-environment) to create a project and setup a billing account for it.
To get the permissions that you need to give a service account access to enable APIs and interact with Vertex AI resources, ask your administrator to grant you the [Security Admin](https://cloud.google.com/iam/docs/roles-permissions/iam#iam.securityAdmin) (`roles/iam.securityAdmin`) IAM role on your project. For more information about granting roles, see [Manage access to projects, folders, and organizations](https://cloud.google.com/iam/docs/granting-changing-revoking-access).[link text](https://)


## Install the Vector Search SDK

In [None]:
%pip install google-cloud-vectorsearch

## Environment variables

Set environment variables for your project and location.

In [None]:
PROJECT_ID = "your-project-id"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

## Authentication

On Colab, run the following to authenticate calls to the Vector Search APIs:

In [None]:
import sys

# Additional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

## Enable APIs

Run the following commands to enable APIs for Vector Search and, if using Auto-Embeddings or Semantic Search, the Vertex AI API with this Google Cloud project.


In [None]:
! gcloud services enable vectorsearch.googleapis.com aiplatform.googleapis.com --project "{PROJECT_ID}"

## Initialize Clients

In [None]:
from google.cloud import vectorsearch_v1beta

vector_search_service_client = vectorsearch_v1beta.VectorSearchServiceClient()
data_object_service_client = vectorsearch_v1beta.DataObjectServiceClient()
data_object_search_service_client = vectorsearch_v1beta.DataObjectSearchServiceClient()

## Create Collection

In [None]:
import getpass
from datetime import datetime

collection_id = f"movies-demo-{getpass.getuser()}-{datetime.now().strftime('%m-%d-%y')}"

In [None]:
request = vectorsearch_v1beta.CreateCollectionRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
    collection_id=collection_id,
    collection={
        "data_schema": {
            "type": "object",
            "properties": {
                "year": {"type": "number"},
                "genre": {"type": "string"},
                "director": {"type": "string"},
                "title": {"type": "string"},
            },
        },
        "vector_schema": {
            "plot_embedding": {"dense_vector": {"dimensions": 3}},
            "soundtrack_embedding": {"dense_vector": {"dimensions": 5}},
            "genre_embedding": {
                "dense_vector": {
                    "dimensions": 4,
                    "vertex_embedding_config": {
                        # If a data object is created without a supplied value for genre_embedding, it will be
                        # auto-generated based on this config.
                        "model_id": "text-embedding-004",
                        "text_template": ("Movie: {title} Genre: {genre} Year: {year}"),
                        "task_type": "RETRIEVAL_DOCUMENT",
                    },
                }
            },
            "sparse_embedding": {"sparse_vector": {}},
        },
    },
)
operation = vector_search_service_client.create_collection(request=request)
operation.result()

## Get Collection

In [None]:
request = vectorsearch_v1beta.GetCollectionRequest(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}"
)
vector_search_service_client.get_collection(request)

## List Collections

In [None]:
request = vectorsearch_v1beta.ListCollectionsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}"
)
vector_search_service_client.list_collections(request)

## Generate Sample Data

In [None]:
import math
import random

def normalize(v):
    norm = math.sqrt(sum(x * x for x in v))
    return [x / norm for x in v]


# Sparse embedding generation


VOCAB_SIZE = 50000  # Imagine a vocabulary of 50,000 possible items
NON_ZERO_COUNT = 7  # We want to represent an item that has 7 active features
VALUE_RANGE = (1, 10)  # Let's say the values represent ratings from 1 to 10


def generate_sparse_embedding(
    vocabulary_size: int, num_non_zero: int, value_range: tuple = (1, 100)
) -> tuple[list[int], list[int]]:
    """Generates a random sparse representation with unique indices and corresponding values.

    Args:
        vocabulary_size (int): The total size of the embedding space (e.g.,
          10000).
        num_non_zero (int): The number of non-zero elements to generate.
        value_range (tuple): A tuple (min, max) for the range of random integer
          values.

    Returns:
        tuple[list[int], list[int]]: A tuple containing two lists:
                                     - A sorted list of unique indices.
                                     - A list of corresponding random values.
    """
    # Ensure we don't try to pick more unique items than available
    if num_non_zero > vocabulary_size:
        raise ValueError(
            "Number of non-zero elements cannot exceed the vocabulary size."
        )

    # --- Generate Indices ---
    # Pick a random sample of unique indices from the full range of the vocabulary.
    # range(vocabulary_size) creates a sequence from 0 to vocabulary_size - 1.
    indices = random.sample(range(vocabulary_size), num_non_zero)

    # It's a common and good practice for sparse formats to have sorted indices.
    indices.sort()

    # --- Generate Values ---
    # For each index, generate a corresponding random integer value.
    min_val, max_val = value_range
    values = [random.randint(min_val, max_val) for _ in range(num_non_zero)]

    return indices, values


movies = [
    {
        "id": "the-shawshank-redemption",
        "data": {
            "title": "The Shawshank Redemption",
            "genre": "Drama",
            "year": 1994,
            "director": "Frank Darabont",
        },
    },
    {
        "id": "the-godfather",
        "data": {
            "title": "The Godfather",
            "genre": "Crime",
            "year": 1972,
            "director": "Francis Ford Coppola",
        },
    },
    {
        "id": "the-dark-knight",
        "data": {
            "title": "The Dark Knight",
            "genre": "Action",
            "year": 2008,
            "director": "Christopher Nolan",
        },
    },
    {
        "id": "pulp-fiction",
        "data": {
            "title": "Pulp Fiction",
            "genre": "Crime",
            "year": 1994,
            "director": "Quentin Tarantino",
        },
    },
    {
        "id": "schindlers-list",
        "data": {
            "title": "Schindler's List",
            "genre": "Biography",
            "year": 1993,
            "director": "Steven Spielberg",
        },
    },
    {
        "id": "twelve-angry-men",
        "data": {
            "title": "12 Angry Men",
            "genre": "Drama",
            "year": 1957,
            "director": "Sidney Lumet",
        },
    },
    {
        "id": "the-lord-of-the-rings-the-return-of-the-king",
        "data": {
            "title": "The Lord of the Rings: The Return of the King",
            "genre": "Adventure",
            "year": 2003,
            "director": "Peter Jackson",
        },
    },
    {
        "id": "spirited-away",
        "data": {
            "title": "Spirited Away",
            "genre": "Animation",
            "year": 2001,
            "director": "Hayao Miyazaki",
        },
    },
    {
        "id": "parasite",
        "data": {
            "title": "Parasite",
            "genre": "Thriller",
            "year": 2019,
            "director": "Bong Joon-ho",
        },
    },
    {
        "id": "the-matrix",
        "data": {
            "title": "The Matrix",
            "genre": "Sci-Fi",
            "year": 1999,
            "director": "The Wachowskis",
        },
    },
    {
        "id": "inception",
        "data": {
            "title": "Inception",
            "genre": "Sci-Fi",
            "year": 2010,
            "director": "Christopher Nolan",
        },
    },
    {
        "id": "interstellar",
        "data": {
            "title": "Interstellar",
            "genre": "Sci-Fi",
            "year": 2014,
            "director": "Christopher Nolan",
        },
    },
    {
        "id": "the-silence-of-the-lambs",
        "data": {
            "title": "The Silence of the Lambs",
            "genre": "Thriller",
            "year": 1991,
            "director": "Jonathan Demme",
        },
    },
    {
        "id": "psycho",
        "data": {
            "title": "Psycho",
            "genre": "Horror",
            "year": 1960,
            "director": "Alfred Hitchcock",
        },
    },
    {
        "id": "the-green-mile",
        "data": {
            "title": "The Green Mile",
            "genre": "Drama",
            "year": 1999,
            "director": "Frank Darabont",
        },
    },
    {
        "id": "forrest-gump",
        "data": {
            "title": "Forrest Gump",
            "genre": "Drama",
            "year": 1994,
            "director": "Robert Zemeckis",
        },
    },
    {
        "id": "fight-club",
        "data": {
            "title": "Fight Club",
            "genre": "Drama",
            "year": 1999,
            "director": "David Fincher",
        },
    },
    {
        "id": "the-lion-king",
        "data": {
            "title": "The Lion King",
            "genre": "Animation",
            "year": 1994,
            "director": "Roger Allers",
        },
    },
    {
        "id": "beauty-and-the-beast",
        "data": {
            "title": "Beauty and the Beast",
            "genre": "Animation",
            "year": 1991,
            "director": "Gary Trousdale",
        },
    },
    {
        "id": "toy-story",
        "data": {
            "title": "Toy Story",
            "genre": "Animation",
            "year": 1995,
            "director": "John Lasseter",
        },
    },
    {
        "id": "goodfellas",
        "data": {
            "title": "Goodfellas",
            "genre": "Crime",
            "year": 1990,
            "director": "Martin Scorsese",
        },
    },
    {
        "id": "seven",
        "data": {
            "title": "Seven",
            "genre": "Thriller",
            "year": 1995,
            "director": "David Fincher",
        },
    },
    {
        "id": "se7en",
        "data": {
            "title": "Se7en",
            "genre": "Thriller",
            "year": 1995,
            "director": "David Fincher",
        },
    },
    {
        "id": "city-of-god",
        "data": {
            "title": "City of God",
            "genre": "Crime",
            "year": 2002,
            "director": "Fernando Meirelles",
        },
    },
    {
        "id": "the-departed",
        "data": {
            "title": "The Departed",
            "genre": "Crime",
            "year": 2006,
            "director": "Martin Scorsese",
        },
    },
    {
        "id": "oldboy",
        "data": {
            "title": "Oldboy",
            "genre": "Thriller",
            "year": 2003,
            "director": "Park Chan-wook",
        },
    },
    {
        "id": "memento",
        "data": {
            "title": "Memento",
            "genre": "Thriller",
            "year": 2000,
            "director": "Christopher Nolan",
        },
    },
    {
        "id": "shutter-island",
        "data": {
            "title": "Shutter Island",
            "genre": "Thriller",
            "year": 2010,
            "director": "Martin Scorsese",
        },
    },
    {
        "id": "the-usual-suspects",
        "data": {
            "title": "The Usual Suspects",
            "genre": "Thriller",
            "year": 1995,
            "director": "Bryan Singer",
        },
    },
    {
        "id": "gone-girl",
        "data": {
            "title": "Gone Girl",
            "genre": "Thriller",
            "year": 2014,
            "director": "David Fincher",
        },
    },
    {
        "id": "the-sixth-sense",
        "data": {
            "title": "The Sixth Sense",
            "genre": "Horror",
            "year": 1999,
            "director": "M. Night Shyamalan",
        },
    },
    {
        "id": "the-others",
        "data": {
            "title": "The Others",
            "genre": "Horror",
            "year": 2001,
            "director": "Alejandro Amenábar",
        },
    },
    {
        "id": "the-ring",
        "data": {
            "title": "The Ring",
            "genre": "Horror",
            "year": 2002,
            "director": "Gore Verbinski",
        },
    },
    {
        "id": "the-exorcist",
        "data": {
            "title": "The Exorcist",
            "genre": "Horror",
            "year": 1973,
            "director": "William Friedkin",
        },
    },
    {
        "id": "singin-in-the-rain",
        "data": {
            "title": "Singin' in the Rain",
            "genre": "Musical",
            "year": 1952,
            "director": "Stanley Donen",
        },
    },
    {
        "id": "the-sound-of-music",
        "data": {
            "title": "The Sound of Music",
            "genre": "Musical",
            "year": 1965,
            "director": "Robert Wise",
        },
    },
    {
        "id": "west-side-story",
        "data": {
            "title": "West Side Story",
            "genre": "Musical",
            "year": 1961,
            "director": "Robert Wise",
        },
    },
    {
        "id": "seven-samurai",
        "data": {
            "title": "Seven Samurai",
            "genre": "Adventure",
            "year": 1954,
            "director": "Akira Kurosawa",
        },
    },
    {
        "id": "my-neighbor-totoro",
        "data": {
            "title": "My Neighbor Totoro",
            "genre": "Animation",
            "year": 1988,
            "director": "Hayao Miyazaki",
        },
    },
    {
        "id": "howls-moving-castle",
        "data": {
            "title": "Howl's Moving Castle",
            "genre": "Animation",
            "year": 2004,
            "director": "Hayao Miyazaki",
        },
    },
    {
        "id": "ponyo",
        "data": {
            "title": "Ponyo",
            "genre": "Animation",
            "year": 2008,
            "director": "Hayao Miyazaki",
        },
    },
    {
        "id": "the-secret-world-of-arrietty",
        "data": {
            "title": "The Secret World of Arrietty",
            "genre": "Animation",
            "year": 2010,
            "director": "Hiromasa Yonebayashi",
        },
    },
    {
        "id": "oklahoma",
        "data": {
            "title": "Oklahoma!",
            "genre": "Musical",
            "year": 1955,
            "director": "Fred Zinnemann",
        },
    },
    {
        "id": "the-king-and-i",
        "data": {
            "title": "The King and I",
            "genre": "Musical",
            "year": 1956,
            "director": "Walter Lang",
        },
    },
    {
        "id": "my-fair-lady",
        "data": {
            "title": "My Fair Lady",
            "genre": "Musical",
            "year": 1964,
            "director": "George Cukor",
        },
    },
    {
        "id": "cabaret",
        "data": {
            "title": "Cabaret",
            "genre": "Musical",
            "year": 1972,
            "director": "Bob Fosse",
        },
    },
    {
        "id": "grease",
        "data": {
            "title": "Grease",
            "genre": "Musical",
            "year": 1978,
            "director": "Randal Kleiser",
        },
    },
    {
        "id": "chicago",
        "data": {
            "title": "Chicago",
            "genre": "Musical",
            "year": 2002,
            "director": "Rob Marshall",
        },
    },
    {
        "id": "hairspray",
        "data": {
            "title": "Hairspray",
            "genre": "Musical",
            "year": 2007,
            "director": "Adam Shankman",
        },
    },
    {
        "id": "les-miserables",
        "data": {
            "title": "Les Misérables",
            "genre": "Musical",
            "year": 2012,
            "director": "Tom Hooper",
        },
    },
]

for movie in movies:
    movie["vectors"] = {}
    movie["vectors"]["plot_embedding"] = {
        "dense": {"values": normalize([random.random() for _ in range(3)])}
    }
    movie["vectors"]["genre_embedding"] = {
        "dense": {"values": normalize([random.random() for _ in range(4)])}
    }

    # Create a cluster for "Musical" movies in soundtrack_embedding
    if movie["data"]["genre"] == "Musical":
        movie["vectors"]["soundtrack_embedding"] = {
            "dense": {
                "values": normalize([0.9 + random.random() * 0.1 for _ in range(5)])
            }
        }
    else:
        movie["vectors"]["soundtrack_embedding"] = {
            "dense": {"values": normalize([random.random() for _ in range(5)])}
        }
    indices, values = generate_sparse_embedding(
        vocabulary_size=VOCAB_SIZE,
        num_non_zero=NON_ZERO_COUNT,
        value_range=VALUE_RANGE,
    )
    movie["vectors"]["sparse_embedding"] = {
        "sparse": {"values": values, "indices": indices}
    }

## Populate Data Objects

In [None]:
movies[0]

### Create Data Object

In [None]:
request = vectorsearch_v1beta.CreateDataObjectRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    data_object_id=movies[0]["id"],
    data_object={
        "data": movies[0]["data"],
        "vectors": movies[0]["vectors"],
    },
)
data_object_service_client.create_data_object(request=request)

### Batch Create Data Objects

In [None]:
rest_of_movies_batch_request = []
for movie in movies[1:-1]:
    rest_of_movies_batch_request.append(
        {
            "data_object_id": movie["id"],
            "data_object": {"data": movie["data"], "vectors": movie["vectors"]},
        }
    )
request = vectorsearch_v1beta.BatchCreateDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    requests=rest_of_movies_batch_request,
)
data_object_service_client.batch_create_data_objects(request)

### Import Data Objects from GCS file(s)

The import will fail if the collection already has an ANN index.

#### Prepare GCS data for import

The example below writes one data object to a JSON file in the specified GCS bucket.

In [None]:
import json
import os

IMPORT_BUCKET = f"my-movie-demo-import-{PROJECT_ID}"

# Directory must only contain import data.
IMPORT_DIRECTORY = "import-data/"
IMPORT_FILE = "movies.json"

# Error directory must be empty.
IMPORT_ERROR_DIRECTORY = "import-errors/"

In [None]:
import copy

movie_to_import = copy.deepcopy(movies[-1])
movie_to_import["vectors"] = {
    key: vector_info["dense"]["values"]
    if isinstance(vector_info, dict) and "dense" in vector_info
    else vector_info
    for key, vector_info in movie_to_import["vectors"].items()
}
movie_string = json.dumps(movie_to_import)
movie_string

In [None]:
blob_name = os.path.join(IMPORT_DIRECTORY, IMPORT_FILE)
gcs_uri = f"gs://{IMPORT_BUCKET}/{blob_name}"
try:
    from google.cloud import storage

    storage_client = storage.Client(project=PROJECT_ID)
    bucket = storage_client.bucket(IMPORT_BUCKET)
    # To programmatically create the GCS bucket, uncomment the following line:
    # storage_client.create_bucket(bucket)
    blob = bucket.blob(blob_name)
    blob.upload_from_string(movie_string)
    print(f"Successfully uploaded movie data to {gcs_uri}")
except Exception as e:
    print(f"Could not write to GCS using google-cloud-storage. Error: {e}")
    print(f"Please manually copy the following JSON string to {gcs_uri}")
    print(movie_string)
    print("Sample command:")
    print(f"echo '{movie_string}' | gcloud storage cp - {gcs_uri}")
    print("(the bucket must already exist)")

#### Perform the import

In [None]:
contents_uri = f"gs://{IMPORT_BUCKET}/{IMPORT_DIRECTORY}"
error_uri = f"gs://{IMPORT_BUCKET}/{IMPORT_ERROR_DIRECTORY}"
print(f"Importing from {contents_uri}")
print(f"Errors will be written to {error_uri}")
request = vectorsearch_v1beta.ImportDataObjectsRequest(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    gcs_import={
        "contents_uri": contents_uri,
        "error_uri": error_uri,
    },
)
import_lro = vector_search_service_client.import_data_objects(request)

In [None]:
print(f"Waiting for import LRO: {import_lro.operation.name}")
import_lro.result()
print("Import LRO complete.")

## Get Data Object

In [None]:
request = vectorsearch_v1beta.GetDataObjectRequest(
        name=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}/dataObjects/{movies[0]["id"]}",
    )
data_object_service_client.get_data_object(request=request)

## Update Data Object

In [None]:
request = vectorsearch_v1beta.UpdateDataObjectRequest(
    data_object={
        "name": f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}/dataObjects/{movies[0]['id']}",
        "data": {"title": f"{movies[0]['data']['title']} (updated)"},
        "vectors": {"plot_embedding": {"dense": {"values": [1.0, 1.0, 1.0]}}},
    }
)
data_object_service_client.update_data_object(request)

## Batch Update Data Object

In [None]:
movies[9]["data"]["title"] = movies[9]["data"]["title"] + " updated"
movies[10]["data"]["title"] = movies[10]["data"]["title"] + " updated"
movies[11]["data"]["title"] = movies[11]["data"]["title"] + " updated"
collection_name = (
    f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}"
)
requests = [
    {
        "data_object": {
            "name": f"{collection_name}/dataObjects/{movies[9]['id']}",
            "data": movies[9]["data"],
            "vectors": movies[9]["vectors"],
        }
    },
    {
        "data_object": {
            "name": f"{collection_name}/dataObjects/{movies[10]['id']}",
            "data": movies[10]["data"],
            "vectors": movies[10]["vectors"],
        }
    },
    {
        "data_object": {
            "name": f"{collection_name}/dataObjects/{movies[11]['id']}",
            "data": movies[11]["data"],
            "vectors": movies[11]["vectors"],
        }
    },
]
request = vectorsearch_v1beta.BatchUpdateDataObjectsRequest(
    parent=collection_name,
    requests=requests,
)
data_object_service_client.batch_update_data_objects(request)
# scifi = data_object_search_service_client.query_data_objects(
#     request={"parent": collection_name, "filter": {"genre": {"$eq": "Sci-Fi"}}}
# )
# print([m.data["title"] for m in scifi])

## Create Data Object with Auto-Embeddings

This requires the user has already enabled the Vertex Prediction API, as described in the `Enable APIs` section above.

In [None]:
movie_without_embedding = {
    "id": "the-matrix-2",
    "data": {
        "title": "The Matrix",
        "genre": "Sci-Fi",
        "year": 2003,
        "director": "The Wachowskis",
    },
}
movies.append(movie_without_embedding)
request = vectorsearch_v1beta.CreateDataObjectRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    data_object_id=movie_without_embedding["id"],
    data_object={"data": movie_without_embedding["data"], "vectors": {}},
)
data_object_service_client.create_data_object(request=request)

## Query (list) data objects

In [None]:
paged_response = data_object_search_service_client.query_data_objects(
    vectorsearch_v1beta.QueryDataObjectsRequest(
        parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
        page_size=2,
        output_fields={
            "data_fields": "*",
            "vector_fields": "*",
            "metadata_fields": "*",
        },
    )
)
page1 = next(paged_response.pages)
next_page_token_1 = page1.next_page_token
page1.data_objects

In [None]:
# Page 2
paged_response_2 = data_object_search_service_client.query_data_objects(
    vectorsearch_v1beta.QueryDataObjectsRequest(
        parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
        page_size=2,
        page_token=next_page_token_1,
        output_fields={
            "data_fields": "*",
            "vector_fields": "*",
            "metadata_fields": "*",
        },
    )
)
page2 = next(paged_response_2.pages)
page2.data_objects

## Query with filters

In [None]:
thrillers_request = vectorsearch_v1beta.QueryDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    filter={"genre": {"$eq": "Thriller"}},
    output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
)
thrillers = data_object_search_service_client.query_data_objects(thrillers_request)
print([m.data["title"] for m in thrillers])

thrillers_since_1995_request = vectorsearch_v1beta.QueryDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    filter={"$and": [{"genre": {"$eq": "Thriller"}}, {"year": {"$gte": 1995}}]},
    output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
)
thrillers_since_1995 = data_object_search_service_client.query_data_objects(
    thrillers_since_1995_request
)
list(thrillers_since_1995)

nested_conditionals_request = vectorsearch_v1beta.QueryDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    filter={
        "$or": [
            {"director": {"$eq": "Akira Kurosawa"}},
            {
                "$and": [
                    {"director": {"$eq": "David Fincher"}},
                    {"genre": {"$ne": "Thriller"}},
                ]
            },
        ]
    },
    output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
)
nested_conditionals = data_object_search_service_client.query_data_objects(
    nested_conditionals_request
)
list(nested_conditionals)

## Query with aggregates

In [None]:
aggregate_request = vectorsearch_v1beta.AggregateDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    aggregate="COUNT",
)
data_object_search_service_client.aggregate_data_objects(aggregate_request)

## Search

In [None]:
search_request_1 = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    vector_search=vectorsearch_v1beta.VectorSearch(
        search_field="genre_embedding",
        vector=vectorsearch_v1beta.DenseVector(values=normalize([0.1, 0.2, 0.3, 0.4])),
        top_k=5,
        output_fields=vectorsearch_v1beta.OutputFields(
            data_fields=["*"], vector_fields=["*"], metadata_fields=["*"]
        ),
    ),
)
results = data_object_search_service_client.search_data_objects(search_request_1)
for result in results:
    print(result.data_object)

In [None]:
search_request_2 = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    vector_search=vectorsearch_v1beta.VectorSearch(
        search_field="soundtrack_embedding",
        vector=vectorsearch_v1beta.DenseVector(
            values=normalize([0.1, 0.1, 0.1, 0.1, 0.1])
        ),
        top_k=5,
        output_fields=vectorsearch_v1beta.OutputFields(
            data_fields=["*"], vector_fields=["*"], metadata_fields=["*"]
        ),
    ),
)
results = data_object_search_service_client.search_data_objects(search_request_2)
for result in results:
    print(result.data_object)

## Search with filters

In [None]:
search_filter_request_1 = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    vector_search=vectorsearch_v1beta.VectorSearch(
        search_field="plot_embedding",
        vector=vectorsearch_v1beta.DenseVector(values=normalize([0.3, 0.4, 0.5])),
        filter={"genre": {"$eq": "Thriller"}},
        top_k=5,
        output_fields=vectorsearch_v1beta.OutputFields(
            data_fields=["*"], vector_fields=["*"], metadata_fields=["*"]
        ),
    ),
)
results = data_object_search_service_client.search_data_objects(search_filter_request_1)
list(results)

In [None]:
search_filter_request_2 = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    vector_search=vectorsearch_v1beta.VectorSearch(
        search_field="plot_embedding",
        vector=vectorsearch_v1beta.DenseVector(values=normalize([0.3, 0.4, 0.5])),
        filter={
            "$and": [
                {"genre": {"$eq": "Thriller"}},
                {"director": {"$eq": "David Fincher"}},
            ]
        },
        top_k=5,
        output_fields=vectorsearch_v1beta.OutputFields(
            data_fields=["*"], vector_fields=["*"], metadata_fields=["*"]
        ),
    ),
)
results = data_object_search_service_client.search_data_objects(search_filter_request_2)
list(results)

In [None]:
search_filter_request_3 = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    vector_search=vectorsearch_v1beta.VectorSearch(
        search_field="plot_embedding",
        vector=vectorsearch_v1beta.DenseVector(values=normalize([0.3, 0.4, 0.5])),
        filter={
            "$and": [
                {"genre": {"$eq": "Thriller"}},
                {"director": {"$eq": "David Fincher"}},
                {"title": {"$ne": "Seven"}},
            ]
        },
        top_k=5,
        output_fields=vectorsearch_v1beta.OutputFields(
            data_fields=["*"], vector_fields=["*"], metadata_fields=["*"]
        ),
    ),
)
results = data_object_search_service_client.search_data_objects(search_filter_request_3)
list(results)

## Semantic Search

In [None]:
semantic_search_request = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    semantic_search=vectorsearch_v1beta.SemanticSearch(
        search_text="Wonderful genre of a Wonderful movie",
        search_field="genre_embedding",
        task_type="RETRIEVAL_QUERY",
        top_k=5,
        output_fields=vectorsearch_v1beta.OutputFields(
            data_fields=["*"], vector_fields=["*"], metadata_fields=["*"]
        ),
    ),
)
results = data_object_search_service_client.search_data_objects(semantic_search_request)
for result in results:
    print(result.data_object)

## Text Search

In [None]:
text_search_request = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    text_search=vectorsearch_v1beta.TextSearch(
        search_text="king OR castle",
        data_field_names=["title"],
        top_k=5,
        output_fields=vectorsearch_v1beta.OutputFields(
            data_fields=["*"], vector_fields=["*"], metadata_fields=["*"]
        ),
    ),
)
results = data_object_search_service_client.search_data_objects(text_search_request)
for result in results:
    print(result.data_object)

## Batch Search

In [None]:
batch_request_1 = vectorsearch_v1beta.BatchSearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    searches=[
        vectorsearch_v1beta.Search(
            vector_search=vectorsearch_v1beta.VectorSearch(
                search_field="genre_embedding",
                vector=vectorsearch_v1beta.DenseVector(values=[0.1, 0.2, 0.3, 0.4]),
                top_k=5,
                output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
            )
        ),
        vectorsearch_v1beta.Search(
            vector_search=vectorsearch_v1beta.VectorSearch(
                search_field="soundtrack_embedding",
                vector=vectorsearch_v1beta.DenseVector(
                    values=[0.1, 0.1, 0.1, 0.1, 0.1]
                ),
                top_k=5,
                output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
            )
        ),
        vectorsearch_v1beta.Search(
            vector_search=vectorsearch_v1beta.VectorSearch(
                search_field="plot_embedding",
                vector=vectorsearch_v1beta.DenseVector(values=[0.3, 0.4, 0.5]),
                top_k=5,
                output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
            )
        ),
    ],
)
data_object_search_service_client.batch_search_data_objects(batch_request_1)

In [None]:
batch_request_2 = vectorsearch_v1beta.BatchSearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    searches=[
        vectorsearch_v1beta.Search(
            vector_search=vectorsearch_v1beta.VectorSearch(
                search_field="genre_embedding",
                vector=vectorsearch_v1beta.DenseVector(values=[0.1, 0.2, 0.3, 0.4]),
                top_k=5,
                output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
            )
        ),
        vectorsearch_v1beta.Search(
            vector_search=vectorsearch_v1beta.VectorSearch(
                search_field="soundtrack_embedding",
                vector=vectorsearch_v1beta.DenseVector(
                    values=[0.1, 0.1, 0.1, 0.1, 0.1]
                ),
                top_k=5,
                output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
            )
        ),
        vectorsearch_v1beta.Search(
            vector_search=vectorsearch_v1beta.VectorSearch(
                search_field="plot_embedding",
                vector=vectorsearch_v1beta.DenseVector(values=[0.3, 0.4, 0.5]),
                top_k=5,
                output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
            )
        ),
    ],
    combine=vectorsearch_v1beta.BatchSearchDataObjectsRequest.CombineResultsOptions(
        ranker=vectorsearch_v1beta.Ranker(
            rrf=vectorsearch_v1beta.ReciprocalRankFusion(weights=[1.0, 1.0, 1.0])
        )
    ),
)
data_object_search_service_client.batch_search_data_objects(batch_request_2)

## Create ANN Index

In [None]:
request = vectorsearch_v1beta.CreateIndexRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    index_id="plot_index",
    index={
        "index_field": "plot_embedding",
        "filter_fields": ["year", "genre"],
        "store_fields": ["title"],
    },
)
dense_index_lro = vector_search_service_client.create_index(request)
dense_index_operation_name = dense_index_lro.operation.name
dense_index_operation_name

## Create Sparse ANN Index

In [None]:
request = vectorsearch_v1beta.CreateIndexRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    index_id="sparse_index",
    index={
        "index_field": "sparse_embedding",
        "filter_fields": ["year", "genre"],
        "store_fields": ["title"],
    },
)
sparse_index_lro = vector_search_service_client.create_index(request)
sparse_index_operation_name = sparse_index_lro.operation.name
sparse_index_operation_name

## Poll LROs

Index creation operations typically take several minutes or more to complete. The progress can be polled via the operation LROs:

In [None]:
print(f"Waiting for dense index LRO: {dense_index_lro.operation.name}")
dense_index_lro.result()
print("Dense index ready.")

In [None]:
print(f"Waiting for sparse index LRO: {sparse_index_lro.operation.name}")
sparse_index_lro.result()
print("Sparse index ready.")

## Get ANN Index

In [None]:
request = vectorsearch_v1beta.GetIndexRequest(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}/indexes/plot_index"
)
vector_search_service_client.get_index(request)

## List ANN Indexes

In [None]:
request = vectorsearch_v1beta.ListIndexesRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}"
)
vector_search_service_client.list_indexes(request)

## Delete Indexes

In [None]:
request = vectorsearch_v1beta.DeleteIndexRequest(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}/indexes/plot_index"
)
delete_index_lro = vector_search_service_client.delete_index(request)

In [None]:
request = vectorsearch_v1beta.DeleteIndexRequest(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}/indexes/sparse_index"
)
delete_sparse_index_lro = vector_search_service_client.delete_index(request)

In [None]:
print(f"Waiting for dense index deletion LRO: {delete_index_lro.operation.name}")
delete_index_lro.result()
print("Dense index deleted.")

print(
    f"Waiting for sparse index deletion LRO: {delete_sparse_index_lro.operation.name}"
)
delete_sparse_index_lro.result()
print("Sparse index deleted.")

## Delete data objects

In [None]:
request = vectorsearch_v1beta.QueryDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    filter={"$or": [{"title": {"$eq": "Seven"}}, {"title": {"$eq": "Se7en"}}]},
    output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
)
data_object_search_service_client.query_data_objects(request)

In [None]:
delete_request = vectorsearch_v1beta.DeleteDataObjectRequest(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}/dataObjects/seven"
)
data_object_service_client.delete_data_object(delete_request)

In [None]:
request = vectorsearch_v1beta.SearchDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    vector_search=vectorsearch_v1beta.VectorSearch(
        search_field="plot_embedding",
        vector=vectorsearch_v1beta.DenseVector(values=normalize([0.3, 0.4, 0.5])),
        filter={
            "$and": [
                {"genre": {"$eq": "Thriller"}},
                {"director": {"$eq": "David Fincher"}},
            ]
        },
        output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
    ),
)
data_object_search_service_client.search_data_objects(request)

## Batch Delete data objects

In [None]:
request = vectorsearch_v1beta.QueryDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    filter={"$or": [{"genre": {"$eq": "Sci-Fi"}}]},
    output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
)
data_object_search_service_client.query_data_objects(request)

In [None]:
batch_delete_request = vectorsearch_v1beta.BatchDeleteDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    requests=[
        vectorsearch_v1beta.DeleteDataObjectRequest(
            name=f"{collection_name}/dataObjects/the-matrix"
        ),
        vectorsearch_v1beta.DeleteDataObjectRequest(
            name=f"{collection_name}/dataObjects/inception"
        ),
    ],
)
data_object_service_client.batch_delete_data_objects(batch_delete_request)

In [None]:
request = vectorsearch_v1beta.QueryDataObjectsRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}",
    filter={"$or": [{"genre": {"$eq": "Sci-Fi"}}]},
    output_fields=vectorsearch_v1beta.OutputFields(data_fields=["*"]),
)
data_object_search_service_client.query_data_objects(request)

## Clean up Collection

Please also delete any ANN indexes, as described in the `Delete Indexes` section above.

In [None]:
collection_name = (
    f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/{collection_id}"
)
for movie in movies:
    request = vectorsearch_v1beta.DeleteDataObjectRequest(
        name=f"{collection_name}/dataObjects/{movie['id']}"
    )
    try:
        data_object_service_client.delete_data_object(request)
    except:
        pass
request = vectorsearch_v1beta.DeleteCollectionRequest(name=collection_name)
vector_search_service_client.delete_collection(request).result()