# Objective

Undeploy & Delete unused Vertex AI Vector Search Indexes.

# Install

In [1]:
!pip install google-cloud-aiplatform



---

#### ⚠️ Do not forget to click the "RESTART RUNTIME" button above.

---

# Authenticate

In [2]:
import sys

if 'google.colab' in sys.modules:
    from google.colab import auth as google_auth
    google_auth.authenticate_user()

In [3]:
PROJECT_ID = 'solutions-2023-mar-107' # @param {type:"string"}
REGION = 'us-central1' # @param {type:"string"}
BUCKET_URI = "gs://vector_search_regional/flipkart_batch_update" # @param {type:"string"} # WHERE EMBEDDINGS ARE STORED
ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)

List all the deployed index

In [4]:
!gcloud ai index-endpoints list \
  --project=$PROJECT_ID \
  --region=$REGION

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
---
createTime: '2023-12-18T08:15:01.086330Z'
deployedIndexes:
- automaticResources:
    maxReplicaCount: 2
    minReplicaCount: 2
  createTime: '2023-12-18T08:15:52.664772Z'
  deploymentGroup: default
  id: flipkart_multimodal_18K
  index: projects/411826505131/locations/us-central1/indexes/2594851839597871104
  indexSyncTime: '2023-12-18T09:11:37.090339Z'
description: Endpoint on flipkart
displayName: flipkart_batch
encryptionSpec: {}
etag: AMEw9yOXuIk7A9H9NmtcLZIEzOzIHB9Bg0xAO4CH0Jd_GqhfaJR2lsD430wsz6ZflYXO
name: projects/411826505131/locations/us-central1/indexEndpoints/6297373683249840128
publicEndpointDomainName: 1154454212.us-central1-411826505131.vdb.vertexai.goog
updateTime: '2023-12-18T08:15:01.784596Z'
---
createTime: '2023-12-08T10:20:13.996148Z'
deployedIndexes:
- createTime: '2023-12-08T10:24:03.327365Z'
  dedicatedResources:
    machineSpec:
      machineType: e2-standard-16
    maxReplicaCount: 2
    minRep

In [23]:
!gcloud ai indexes list \
  --project=$PROJECT_ID \
  --region=$REGION

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
---
createTime: '2023-12-18T06:51:22.796157Z'
deployedIndexes:
- deployedIndexId: flipkart_multimodal_18K
  indexEndpoint: projects/411826505131/locations/us-central1/indexEndpoints/6297373683249840128
description: Based on ~18K Flipkart product listings with both description and image
displayName: flipkart_batch
encryptionSpec: {}
etag: AMEw9yPw4ORvCK7Tce2FF5m6KjZig3J_NeP0gt5cI59Ysrg3h4NEhJ7AHZkD2E1tJal2
indexStats:
  shardsCount: 1
  vectorsCount: '36394'
indexUpdateMethod: BATCH_UPDATE
metadata:
  config:
    algorithmConfig:
      treeAhConfig:
        leafNodeEmbeddingCount: '500'
        leafNodesToSearchPercent: 7
    approximateNeighborsCount: 150
    dimensions: 1408
    distanceMeasureType: COSINE_DISTANCE
    shardSize: SHARD_SIZE_MEDIUM
metadataSchemaUri: gs://google-cloud-aiplatform/schema/matchingengine/metadata/nearest_neighbor_search_1.0.0.yaml
name: projects/411826505131/locations/us-central1/indexes/25948

#### Interested fields

id: flipkart_multimodal_18K

index: projects/411826505131/locations/us-central1/indexes/2594851839597871104

displayName: flipkart_batch


name: projects/411826505131/locations/us-central1/indexEndpoints/6297373683249840128

In [6]:
from google.cloud import aiplatform
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

In [34]:
DEPLOYED_INDEX_ID = 'flipkart_multimodal_18K' # @param {type:"string"} #endpoind id
endpoint_name = 'projects/411826505131/locations/us-central1/indexEndpoints/6297373683249840128' # @param {type:"string"} #index id

In [7]:
my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/411826505131/locations/us-central1/indexEndpoints/6297373683249840128')


In [33]:
INDEX = 'projects/411826505131/locations/us-central1/indexes/2594851839597871104' # @param {type:"string"}
my_index = aiplatform.MatchingEngineIndex(INDEX)

 Lets Update the embeddings for this index with update_embeddings

        Args:
            contents_delta_uri (str):
                Required. Allows inserting, updating  or deleting the contents of the Matching Engine Index.
                The string must be a valid Google Cloud Storage directory path. If this
                field is set when calling IndexService.UpdateIndex, then no other
                Index field can be  also updated as part of the same call.
                The expected structure and format of the files this URI points to is
                described at
                https://cloud.google.com/vertex-ai/docs/vector-search/setup/format-structure
            is_complete_overwrite (bool):
                Optional. If this field is set together with contentsDeltaUri when calling IndexService.UpdateIndex,
                then existing content of the Index will be replaced by the data from the contentsDeltaUri.

        Returns:
            MatchingEngineIndex - The updated index resource object.


In [9]:
my_index.update_embeddings(BUCKET_URI)

INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:Updating MatchingEngineIndex index: projects/411826505131/locations/us-central1/indexes/2594851839597871104
INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:Update MatchingEngineIndex index backing LRO: projects/411826505131/locations/us-central1/indexes/2594851839597871104/operations/1023688687449276416
INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:MatchingEngineIndex index Updated. Resource name: projects/411826505131/locations/us-central1/indexes/2594851839597871104


<google.cloud.aiplatform.matching_engine.matching_engine_index.MatchingEngineIndex object at 0x7cd034433eb0> 
resource name: projects/411826505131/locations/us-central1/indexes/2594851839597871104

---

#### Note: The existing index's vectorsCount gets updated!

---

In [27]:
# Query
import base64
from google.cloud import aiplatform
from google.protobuf import struct_pb2
from functools import cache
import time
import typing
import logging

# Inspired from https://stackoverflow.com/questions/34269772/type-hints-in-namedtuple.
class EmbeddingResponse(typing.NamedTuple):
  text_embedding: typing.Sequence[float]
  image_embedding: typing.Sequence[float]


class EmbeddingPredictionClient:
  """Wrapper around Prediction Service Client."""
  def __init__(self, project : str,
    location : str = "us-central1",
    api_regional_endpoint: str = "us-central1-aiplatform.googleapis.com"):
    client_options = {"api_endpoint": api_regional_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    self.client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    self.location = location
    self.project = project

  def get_embedding(self, text : str = None, image_path : str = None):
    """image_path can be a local path or a GCS URI."""
    if not text and not image_path:
      raise ValueError('At least one of text or image_bytes must be specified.')

    instance = struct_pb2.Struct()
    if text:
      if len(text) > 1024:
        logging.warning('Text must be less than 1024 characters. Truncating text.')
        text = text[:1024]
      instance.fields['text'].string_value = text

    if image_path:
      image_struct = instance.fields['image'].struct_value
      if image_path.lower().startswith('gs://'):
        image_struct.fields['gcsUri'].string_value = image_path
      else:
        with open(image_path, "rb") as f:
          image_bytes = f.read()
        encoded_content = base64.b64encode(image_bytes).decode("utf-8")
        image_struct.fields['bytesBase64Encoded'].string_value = encoded_content

    instances = [instance]
    endpoint = (f"projects/{self.project}/locations/{self.location}"
      "/publishers/google/models/multimodalembedding@001")
    response = self.client.predict(endpoint=endpoint, instances=instances)

    text_embedding = None
    if text:
      text_emb_value = response.predictions[0]['textEmbedding']
      text_embedding = [v for v in text_emb_value]

    image_embedding = None
    if image_path:
      image_emb_value = response.predictions[0]['imageEmbedding']
      image_embedding = [v for v in image_emb_value]

    return EmbeddingResponse(
      text_embedding=text_embedding,
      image_embedding=image_embedding)

@cache
def get_client(project):
  return EmbeddingPredictionClient(project)


def embed(project,text,image_path=None):
  client = get_client(project)
  start = time.time()
  response = client.get_embedding(text=text, image_path=image_path)
  end = time.time()
  print('Embedding Time: ', end - start)
  return response

In [28]:
res = embed(PROJECT_ID,
            "Key Features of Vishudh Printed Women's Straight Kurta BLACK, GREY Straight,Specifications of Vishudh Printed Women's Straight Kurta Kurta Details Sleeve Sleeveless Number of Contents in Sales Package Pack of 1 Fabric 100% POLYESTER Type Straight Neck ROUND NECK General Details Pattern Printed Occasion Festive Ideal For Women's In the Box Kurta Additional Details Style Code VNKU004374 BLACK::GREY Fabric Care Gentle Machine Wash in Lukewarm Water, Do Not Bleach",
            'gs://genai-product-catalog/flipkart_20k_oct26/3ecb859759e5311cbab6850e98879522_0.jpg')

Embedding Time:  0.8157088756561279


In [35]:
NUM_NEIGHBORS = 5
response = my_index_endpoint.find_neighbors(
    deployed_index_id='my_index',
    queries=[res.text_embedding,res.image_embedding],
    num_neighbors=NUM_NEIGHBORS,
)

response

NotFound: ignored

In [17]:
# For complete override
# my_index.update_embeddings(BUCKET_URI, is_complete_overwrite=True)

## Update [Batch index with filtering]

In [18]:
PROJECT_ID = 'solutions-2023-mar-107' # @param {type:"string"}
REGION = 'us-central1' # @param {type:"string"}
BUCKET_URI = "gs://vector_search_regional/flipkart_batch_update" # @param {type:"string"} # WHERE EMBEDDINGS ARE STORED
index_to_be_updated = '4261605914190020608' # @param {type:'string'}

In [22]:
AUTH_TOKEN = !gcloud auth print-access-token
PROJECT_NUMBER = !gcloud projects list --filter="PROJECT_ID:'{PROJECT_ID}'" --format='value(PROJECT_NUMBER)'
PROJECT_NUMBER = PROJECT_NUMBER[0]

print("PROJECT_NUMBER: {}".format(PROJECT_NUMBER))

!gcloud config set project {PROJECT_ID} --quiet
!gcloud config set ai_platform/region {REGION} --quiet


INDEX = 'projects/{0}/locations/{1}/indexes/{2}'.format(PROJECT_NUMBER, REGION, index_to_be_updated)
print("INDEX: {}".format(INDEX))

PROJECT_NUMBER: 411826505131
Updated property [core/project].
Updated property [ai_platform/region].
INDEX: projects/411826505131/locations/us-central1/indexes/4261605914190020608


In [24]:
# vectorsCount: '36374' -> before update

In [25]:
from google.cloud import aiplatform

def update_batch_index(index_to_be_updated):
  aiplatform.init(project=PROJECT_ID, location=REGION)
  my_index = aiplatform.MatchingEngineIndex(INDEX)
  my_index.update_embeddings(BUCKET_URI, is_complete_overwrite=False)

In [26]:
# Updating Batch index (created with filters)
update_batch_index(INDEX)

INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:Updating MatchingEngineIndex index: projects/411826505131/locations/us-central1/indexes/4261605914190020608
INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:Update MatchingEngineIndex index backing LRO: projects/411826505131/locations/us-central1/indexes/4261605914190020608/operations/3852512203391369216
INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:MatchingEngineIndex index Updated. Resource name: projects/411826505131/locations/us-central1/indexes/4261605914190020608


In [32]:
# vectorsCount: '36394' <- After update

In [None]:
NUM_NEIGHBORS = 5
my_index = aiplatform.MatchingEngineIndex(INDEX)

response = my_index_endpoint.find_neighbors(
    deployed_index_id='2594851839597871104',
    queries=[res.text_embedding,res.image_embedding],
    num_neighbors=NUM_NEIGHBORS,
)

response