In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Using Vertex AI Matching Engine for StackOverflow Questions
![ ](https://www.google-analytics.com/collect?v=2&tid=G-L6X3ECH596&cid=1&en=page_view&sid=1&dt=sdk_matching_engine_create_stack_overflow_embeddings.ipynb&dl=notebooks%2Fofficial%2Fmatching_engine%2Fsdk_matching_engine_create_stack_overflow_embeddings.ipynb)
<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/matching_engine/sdk_matching_engine_create_stack_overflow_embeddings.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/matching_engine/sdk_matching_engine_create_stack_overflow_embeddings.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
      <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/matching_engine/sdk_matching_engine_create_stack_overflow_embeddings.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>
</table>

## Overview

This example demonstrates how to encode custom text embeddings using the StackOverflow dataset and the sentence-T5 model. These are uploaded to the Vertex AI Matching Engine service. It is a high scale, low latency solution, to find similar vectors (or more specifically "embeddings") for a large corpus. Moreover, it is a fully managed offering, further reducing operational overhead. It is built upon [Approximate Nearest Neighbor (ANN) technology](https://ai.googleblog.com/2020/07/announcing-scann-efficient-vector.html) developed by Google Research.

**Pre-requisite**: This notebook requires you to already have a VPC network set up. See the "Prepare a VPC network" section in [Create Vertex AI Matching Engine index notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/matching_engine/sdk_matching_engine_for_indexing.ipynb).

Learn more about [Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview).

### Objective

In this notebook, you learn how to encode custom text embeddings, create an Approximate Nearest Neighbor (ANN) index, and query against indexes.

This tutorial uses the following Google Cloud ML services:

- `Vertex AI Matching Engine`

The steps performed include:

* Create ANN index
* Create an index endpoint with VPC Network
* Deploy ANN index
* Perform online query


### Dataset

The dataset used for this tutorial is the [StackOverflow dataset](https://console.cloud.google.com/marketplace/product/stack-exchange/stack-overflow).

> Stack Overflow is the largest online community for programmers to learn, share their knowledge, and advance their careers. Updated on a quarterly basis, this BigQuery dataset includes an archive of Stack Overflow content, including posts, votes, tags, and badges. This dataset is updated to mirror the Stack Overflow content on the Internet Archive, and is also available through the Stack Exchange Data Explorer.

## Installation

Install the latest version of Cloud Storage, BigQuery, and the Vertex AI SDK for Python.

In [None]:
# Install the packages
! pip3 install --upgrade google-cloud-aiplatform \
                        google-cloud-storage \
                        'google-cloud-bigquery[pandas]'

Install the latest version of Redis for low-latency data retrieval

In [None]:
# Install the redis package
! pip install --upgrade redis

In [None]:
# ! pip install google_cloud_aiplatform-1.25.dev20230502+language.models-py2.py3-none-any.whl "shapely<2.0.0" "protobuf==3.19.6" --force-reinstall

### Colab only: Uncomment the following cell to restart the kernel.

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages
# import IPython

# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)

## Before you begin
#### Set your project ID

If you don't know your project ID, try the following:
* Run `gcloud config list`.
* Run `gcloud projects list`.
* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)

In [None]:
! gcloud config list

In [1]:
PROJECT_ID = "[YOUR-PROJECT-ID]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

Updated property [core/project].


#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [5]:
REGION = "us-central1"  # @param {type: "string"}

### Authenticate your Google Cloud account

Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below.

**1. Vertex AI Workbench**
* Do nothing as you are already authenticated.

**2. Local JupyterLab instance, uncomment and run:**

In [6]:
# ! gcloud auth login

**3. Colab, uncomment and run:**

In [7]:
# from google.colab import auth
# auth.authenticate_user()

**4. Service account or other**
* See how to grant Cloud Storage permissions to your service account at https://cloud.google.com/storage/docs/gsutil/commands/iam#ch-examples.

* Authentication: Rerun the `gcloud auth login` command in the Vertex AI Workbench notebook terminal when you are logged out and need the credential again.

### Colab only: Uncomment the following cell to restart the kernel.

In [8]:
# Automatically restart kernel after installs so that your environment can access the new packages
# import IPython

# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [27]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [28]:
! gsutil mb -l $REGION -p $PROJECT_ID $BUCKET_URI

Creating gs://your-bucket-name-plated-life-378620-unique/...


## Prepare the data

You will use [Stack Overflow dataset](https://console.cloud.google.com/marketplace/product/stack-exchange/stack-overflow) of question and answers hosted on BigQuery.

> This public dataset is hosted in Google BigQuery and is included in BigQuery's 1TB/mo of free tier processing. This means that each user receives 1TB of free BigQuery processing every month, which can be used to run queries on this public dataset.

The BigQuery table is too large to fit into memory, so you need to write a generator called `query_bigquery_chunks` to yield chunks of the dataframe for processing.

In [11]:
import math
from typing import Any, Generator

import pandas as pd
from google.cloud import bigquery

client = bigquery.Client(project=PROJECT_ID)
QUERY_TEMPLATE = """
        SELECT distinct q.id, q.title, q.body
        FROM (SELECT * FROM `bigquery-public-data.stackoverflow.posts_questions` where Score>0 ORDER BY View_Count desc) AS q 
        LIMIT {limit} OFFSET {offset};
        """


def query_bigquery_chunks(
    max_rows: int, rows_per_chunk: int, start_chunk: int = 0
) -> Generator[pd.DataFrame, Any, None]:
    for offset in range(start_chunk, max_rows, rows_per_chunk):
        query = QUERY_TEMPLATE.format(limit=rows_per_chunk, offset=offset)
        query_job = client.query(query)
        rows = query_job.result()
        df = rows.to_dataframe()
        df["title_with_body"] = df.title + "\n" + df.body
        yield df

In [175]:
# Get a dataframe of 1000 rows for demonstration purposes
df = next(query_bigquery_chunks(max_rows=1000, rows_per_chunk=1000))

In [176]:
# Examine the data
df.head()

Unnamed: 0,id,title,body,title_with_body
0,12615525,What are the different use cases of joblib ver...,<p>Background: I'm just getting started with s...,What are the different use cases of joblib ver...
1,12788972,Set database timeout in Entity Framework,"<p>My command keeps timing out, so I need to c...",Set database timeout in Entity Framework\n<p>M...
2,18405374,Test a factory of a 3rd party class,<p>My application uses a third party jar (no a...,Test a factory of a 3rd party class\n<p>My app...
3,18350790,Sublime Text 3 (and 2): newly installed dictio...,<p>I'm a well-experienced mac user but no prog...,Sublime Text 3 (and 2): newly installed dictio...
4,18695061,Closing dropdown in Spinner in Android,<p>I need to animate an icon of an arrow when ...,Closing dropdown in Spinner in Android\n<p>I n...


In [177]:
# Extract the question ids and question text
ids = df.id.tolist()
questions = df.title.tolist()

# Verify the length
len(ids)

1000

#### Instantiate the text encoding model

Use the [PaLM](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings) API developed by Google for converting text to embeddings.

> Text embeddings are a dense vector representation of a piece of content such that, if two pieces of content are semantically similar, their respective embeddings are located near each other in the embedding vector space. This representation can be used to solve common NLP tasks, such as:
> - Semantic search: Search text ranked by semantic similarity.
> - Recommendation: Return items with text attributes similar to the given text.
> - Classification: Return the class of items whose text attributes are similar to the given text.
> - Clustering: Cluster items whose text attributes are similar to the given text.
> - Outlier Detection: Return items where text attributes are least related to the given text.

#### Defining an encoding function

Define a function to be used later that will take sentences and convert them to embeddings.

In [179]:
from typing import List, Optional

# Load the PaLM API embedding model
from vertexai.preview.language_models import TextEmbeddingModel

model = TextEmbeddingModel.from_pretrained("textembedding-gecko-001")

# Define an embedding method that uses the model
def encode_texts_to_embeddings(sentences: List[str]) -> List[Optional[List[float]]]:
    try:
        embeddings = model.get_embeddings(sentences)
        return [embedding.values for embedding in embeddings]
    except Exception:
        return [None for _ in range(len(sentences))]

In [217]:
import functools
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Generator, List, Tuple

import numpy as np
from tqdm.auto import tqdm


# Generator function to yield batches of sentences
def generate_batches(
    sentences: List[str], batch_size: int
) -> Generator[List[str], None, None]:
    for i in range(0, len(sentences), batch_size):
        yield sentences[i : i + batch_size]


def encode_text_to_embedding_batched(
    sentences: List[str], api_calls_per_second: int = 10, batch_size: int = 5
) -> Tuple[List[bool], np.ndarray]:

    embeddings_list: List[List[float]] = []

    # Prepare the batches using a generator
    batches = generate_batches(sentences, batch_size)

    seconds_per_job = 1 / api_calls_per_second

    with ThreadPoolExecutor() as executor:
        futures = []
        for batch in tqdm(
            batches, total=math.ceil(len(sentences) / batch_size), position=0
        ):
            futures.append(
                executor.submit(functools.partial(encode_texts_to_embeddings), batch)
            )
            time.sleep(seconds_per_job)

        for future in futures:
            embeddings_list.extend(future.result())

    is_successful = [
        embedding is not None for sentence, embedding in zip(sentences, embeddings_list)
    ]
    embeddings_list_successful = np.squeeze(
        np.stack([embedding for embedding in embeddings_list if embedding is not None])
    )
    return is_successful, embeddings_list_successful

#### Test the encoding function

Encode a subset of data and see if the embeddings and distance metrics make sense.

According to the [embedding documentation](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings#colab_example_of_semantic_search_using_embeddings), the similarity of embeddings is calculated using the dot-product. 

In [220]:
# Encode a subset of questions for validation
questions = df.title.tolist()[:500]
is_successful, question_embeddings = encode_text_to_embedding_batched(
    sentences=df.title.tolist()[:500]
)

# Filter for successfully embedded sentences
questions = np.array(questions)[is_successful]

  0%|          | 0/100 [00:00<?, ?it/s]

Save the dimension size for later usage when creating the index.

In [221]:
DIMENSIONS = len(question_embeddings[0])

DIMENSIONS

768

In [222]:
import random

question_index = random.randint(0, 99)

print(f"Query question = {questions[question_index]}")
scores = np.dot(question_embeddings[question_index], question_embeddings.T)

# Print top 20 matches
for index, (question, score) in enumerate(
    sorted(zip(questions, scores), key=lambda x: x[1], reverse=True)[:20]
):
    print(f"\t{index}: {question}: {score}")

Query question = SignalR - connection.hubName is undefined
	0: SignalR - connection.hubName is undefined: 0.9999994517616809
	1: How to solve a login/database missing error regarding Sitecore Training Website?: 0.6360554028470855
	2: NameError: name 'helloworld' is not defined: 0.6137728867637725
	3: Cannot Resolve @style/Theme.Sherlock: 0.6077803902687016
	4: Load an assembly (dll) from a network drive in C#: 0.6053678317274827
	5: OleDbException: No value given for one or more required parameters: 0.6047448126425075
	6: C# HTTPModule Could not load type CGI Request: 0.6003740454733572
	7: SecurityException - Dapper on shared hosting: 0.5930472323803362
	8: Wix 3.5 preprocessor extension - undefined preprocessor function: 0.5873751287246225
	9: need to parse refname in post-receive script: 0.5854348455753308
	10: @Url.Content not encoding text - ASP.NET MVC with Razor: 0.5845889360671046
	11: The symbol you provided is not a function: 0.58359112051759
	12: SharePoint Redirect site log

#### Save the embeddings in JSONL format.

The data must be formatted in JSONL format, which means each embedding dictionary is written as a JSON string on its own line.

See more information in the docs at [Input data format and structure](https://cloud.google.com/vertex-ai/docs/matching-engine/match-eng-setup#input-data-format).

In [226]:
import tempfile
from pathlib import Path

# Create temporary file to write embeddings to
embeddings_file_path = Path(tempfile.mkdtemp())

print(f"Embeddings directory: {embeddings_file_path}")

Embeddings directory: /var/tmp/tmpb4hc2lc4


Write embeddings in batches to prevent out-of-memory errors

In [227]:
import gc
import json

BQ_NUM_ROWS = 50000
BQ_CHUNK_SIZE = 1000
BQ_NUM_CHUNKS = math.ceil(BQ_NUM_ROWS / BQ_CHUNK_SIZE)

START_CHUNK = 0

# Create a rate limit of 300 requests per minute. Adjust this depending on your quota.
API_CALLS_PER_SECOND = 300 / 60
# According to the docs, each request can process 5 instances per request
ITEMS_PER_REQUEST = 5

# Loop through each generated dataframe, convert
for i, df in tqdm(
    enumerate(
        query_bigquery_chunks(
            max_rows=BQ_NUM_ROWS, rows_per_chunk=BQ_CHUNK_SIZE, start_chunk=START_CHUNK
        )
    ),
    total=BQ_NUM_CHUNKS - START_CHUNK,
    position=-1,
    desc="Chunk of rows from BigQuery",
):
    # Create a unique output file for each chunk
    chunk_path = embeddings_file_path.joinpath(
        f"{embeddings_file_path.stem}_{i+START_CHUNK}.json"
    )
    with open(chunk_path, "a") as f:
        id_chunk = df.id

        # Convert batch to embeddings
        is_successful, question_chunk_embeddings = encode_text_to_embedding_batched(
            sentences=df.title_with_body,
            api_calls_per_second=API_CALLS_PER_SECOND,
            batch_size=ITEMS_PER_REQUEST,
        )

        # Append to file
        embeddings_formatted = [
            json.dumps(
                {
                    "id": str(id),
                    "embedding": [str(value) for value in embedding],
                }
            )
            + "\n"
            for id, embedding in zip(id_chunk[is_successful], question_chunk_embeddings)
        ]
        f.writelines(embeddings_formatted)

        # Delete the DataFrame and any other large data structures
        del df
        gc.collect()

Chunk of rows from BigQuery:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [None]:
embeddings_file_path.stem

In [229]:
# # ! gsutil ls {remote_folder}
# ! gsutil rm -rf {remote_folder}
# # embeddings_file_path.parent

Upload the training data to a Google Cloud Storage bucket.

In [230]:
remote_folder = f"{BUCKET_URI}/{embeddings_file_path.stem}/"
! gsutil rm -rf {remote_folder}
! gsutil -m cp -r {embeddings_file_path}/* {remote_folder}

CommandException: 1 files/objects could not be removed.
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_0.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_1.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_10.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_11.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_12.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_2.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_20.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_23.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_18.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/tmpb4hc2lc4_25.json [Content-Type=application/json]...
Copying file:///var/tmp/tmpb4hc2lc4/t

## Create Indexes


### Create ANN Index (for Production Usage)

In [231]:
DISPLAY_NAME = "stack_overflow_8M"
DESCRIPTION = "question titles and bodies from stackoverflow"

Create the ANN index configuration:

To learn more about configuring the index, see [Input data format and structure](https://cloud.google.com/vertex-ai/docs/matching-engine/match-eng-setup#input-data-format).


In [232]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

In [233]:
DIMENSIONS = 768

tree_ah_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
    display_name=DISPLAY_NAME,
    contents_delta_uri=remote_folder,
    dimensions=DIMENSIONS,
    approximate_neighbors_count=150,
    distance_measure_type="DOT_PRODUCT_DISTANCE",
    leaf_node_embedding_count=500,
    leaf_nodes_to_search_percent=80,
    description=DESCRIPTION,
)

Creating MatchingEngineIndex
Create MatchingEngineIndex backing LRO: projects/782921078983/locations/us-central1/indexes/150193288354201600/operations/5246750946649702400
MatchingEngineIndex created. Resource name: projects/782921078983/locations/us-central1/indexes/150193288354201600
To use this MatchingEngineIndex in another session:
index = aiplatform.MatchingEngineIndex('projects/782921078983/locations/us-central1/indexes/150193288354201600')


In [234]:
INDEX_RESOURCE_NAME = tree_ah_index.resource_name
INDEX_RESOURCE_NAME

'projects/782921078983/locations/us-central1/indexes/150193288354201600'

Using the resource name, you can retrieve an existing MatchingEngineIndex.

In [235]:
tree_ah_index = aiplatform.MatchingEngineIndex(index_name=INDEX_RESOURCE_NAME)

## Create an IndexEndpoint with VPC Network

In [236]:
# # Retrieve the project number
# PROJECT_NUMBER = !gcloud projects list --filter="PROJECT_ID:'{PROJECT_ID}'" --format='value(PROJECT_NUMBER)'
# PROJECT_NUMBER = PROJECT_NUMBER[0]

# VPC_NETWORK = "matching-engine"
# VPC_NETWORK_FULL = "projects/{}/global/networks/{}".format(PROJECT_NUMBER, VPC_NETWORK)
# VPC_NETWORK_FULL

In [237]:
my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
    display_name=DISPLAY_NAME,
    description=DISPLAY_NAME,
    # network=VPC_NETWORK_FULL,
    public_endpoint_enabled=True,
)

Creating MatchingEngineIndexEndpoint
Create MatchingEngineIndexEndpoint backing LRO: projects/782921078983/locations/us-central1/indexEndpoints/1958951488696877056/operations/3813480365239042048
MatchingEngineIndexEndpoint created. Resource name: projects/782921078983/locations/us-central1/indexEndpoints/1958951488696877056
To use this MatchingEngineIndexEndpoint in another session:
index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/782921078983/locations/us-central1/indexEndpoints/1958951488696877056')


In [243]:
# my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/782921078983/locations/us-central1/indexEndpoints/1958951488696877056')

## Deploy Indexes

### Deploy ANN Index

In [239]:
# # DEPLOYED_INDEX_ID = 'stack_overflow_8M_1c74'
# import uuid

# DEPLOYED_INDEX_ID = f"stack_overflow_8M_{str(uuid.uuid4())[:4]}"

In [240]:
# TODO
DEPLOYED_INDEX_ID = "deployed_index_id_unique"

DEPLOYED_INDEX_ID

'stack_overflow_8M_d298'

In [None]:
my_index_endpoint = my_index_endpoint.deploy_index(
    index=tree_ah_index, deployed_index_id=DEPLOYED_INDEX_ID
)

my_index_endpoint.deployed_indexes

#### Verify number of declared items matches the number of embeddings

Each IndexEndpoint can have multiple indexes deployed to it. For each index, you can retrieved the number of deployed vectors using the `index_endpoint._gca_resource.index_stats.vectors_count`. The numbers may not match exactly due to potential failures using the embedding service.

In [244]:
number_of_vectors = sum(
    aiplatform.MatchingEngineIndex(
        deployed_index.index
    )._gca_resource.index_stats.vectors_count
    for deployed_index in my_index_endpoint.deployed_indexes
)

print(f"Expected: {BQ_NUM_ROWS}, Actual: {number_of_vectors}")

Expected: 50000, Actual: 49992


## Create Online Queries

After you built your indexes, you may query against the deployed index to find nearest neighbors.

Note: For the DOT_PRODUCT_DISTANCE distance type, the "distance" property returned with each MatchNeighbor actually refers to the similarity.

In [249]:
test_embeddings = encode_texts_to_embeddings(sentences=["Install GPU for Tensorflow"])

In [250]:
# Test query
NUM_NEIGHBOURS = 10

response = my_index_endpoint.find_neighbors(
    deployed_index_id=DEPLOYED_INDEX_ID,
    queries=test_embeddings,
    num_neighbors=NUM_NEIGHBOURS,
)

response

[[MatchNeighbor(id='68704846', distance=0.7809396386146545),
  MatchNeighbor(id='41366327', distance=0.7776130437850952),
  MatchNeighbor(id='70984360', distance=0.776228666305542),
  MatchNeighbor(id='66525883', distance=0.7567992806434631),
  MatchNeighbor(id='47248054', distance=0.7561374306678772),
  MatchNeighbor(id='52857901', distance=0.7410272359848022),
  MatchNeighbor(id='58525872', distance=0.7396792769432068),
  MatchNeighbor(id='61884137', distance=0.7386407852172852),
  MatchNeighbor(id='64786672', distance=0.7366656064987183),
  MatchNeighbor(id='58561680', distance=0.7357790470123291)]]

Verify that the retrieved results are relevant by checking the StackOverflow link

In [252]:
for match_index, neighbor in enumerate(response[0]):
    print(f"https://stackoverflow.com/questions/{neighbor.id}")

https://stackoverflow.com/questions/68704846
https://stackoverflow.com/questions/41366327
https://stackoverflow.com/questions/70984360
https://stackoverflow.com/questions/66525883
https://stackoverflow.com/questions/47248054
https://stackoverflow.com/questions/52857901
https://stackoverflow.com/questions/58525872
https://stackoverflow.com/questions/61884137
https://stackoverflow.com/questions/64786672
https://stackoverflow.com/questions/58561680


## Storing and retrieving titles from a Redis data store
When you productionize this code into a service, you will need to convert the nearest nearest id's returned from Vertex AI Matching Engine into data usable by downstream services.

In this case, you'll need to convert the id's to titles.

You can use Google Cloud's Memorystore to deploy a managed Redis instance to save the id-title key-value pairs.

See more information on [Memorystore](https://cloud.google.com/memorystore/docs/redis/create-manage-instances?hl=en)

In [None]:
REDIS_INSTANCE_NAME = "stackoverflow-questions-palm"

# Create a Redis instance
! gcloud redis instances create '{REDIS_INSTANCE_NAME}' --size=10 --region='{REGION}' --network='{VPC_NETWORK_FULL}' --connect-mode=private-service-access

In [None]:
# Get host and port info
REDIS_HOST = ! gcloud redis instances list --filter="INSTANCE_NAME:'{REDIS_INSTANCE_NAME}'" --region {REGION}  --format='value(HOST)'
REDIS_PORT = ! gcloud redis instances list --filter="INSTANCE_NAME:'{REDIS_INSTANCE_NAME}'" --region {REGION} --format='value(PORT)'

if isinstance(REDIS_HOST, list):
    REDIS_HOST = REDIS_HOST[0]

if isinstance(REDIS_PORT, list):
    REDIS_PORT = REDIS_PORT[0]

print(f"REDIS_HOST = {REDIS_HOST}")
print(f"REDIS_PORT = {REDIS_PORT}")

In [None]:
# Connect to the instance
import redis

redis_client = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT)
# redis_client.flushdb()

In [None]:
%%time
# Convert the id -> title relationship into a dict and write to redis
for df in tqdm(
    query_bigquery_chunks(
        max_rows=BQ_NUM_ROWS, rows_per_chunk=BQ_CHUNK_SIZE, start_chunk=0
    ),
    total=BQ_NUM_CHUNKS,
    position=0,
    desc="Chunk of rows from BigQuery",
):
    ids = df.id.tolist()
    titles = df.title.tolist()
    bodies = df.body.tolist()

    # create a Redis pipeline
    pipe = redis_client.pipeline()

    # iterate over the data and add hset commands to the pipeline
    for (id, title, body) in tqdm(zip(ids, titles, bodies), total=len(ids), position=1):
        pipe.hset(
            str(id),
            mapping={
                "title": str(title),
                "body": str(body[:100]),
            },
        )

    # execute the pipeline
    _ = pipe.execute()

In [None]:
# Verify that redis can retrieve the correct information
df = next(query_bigquery_chunks(max_rows=10, rows_per_chunk=10))

[
    f"Actual = {title}, Retrieved = {redis_client.hgetall(str(id))}"
    for id, title in zip(df.id, df.title)
]

## Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.
You can also manually delete resources that you created by running the following code.

In [None]:
# Force undeployment of indexes and delete endpoint
my_index_endpoint.delete(force=True)

In [None]:
# Delete indexes
tree_ah_index.delete()

In [None]:
# Delete redis instance
! gcloud redis instances delete '{REDIS_INSTANCE_NAME}' --region {REGION} --quiet