## Prepare VPC Network and Install Packages

In [15]:
PROJECT_ID = "qwiklabs-gcp-04-a7949fa34b56"
NETWORK_NAME = "default"
PEERING_RANGE_NAME = "cym-range"

# Reserve IP range
! gcloud compute addresses create {PEERING_RANGE_NAME} --global --prefix-length=16 --network={NETWORK_NAME} --purpose=VPC_PEERING --project={PROJECT_ID} --description="peering range for cymbal demo"

# Set up peering with service networking
! gcloud services vpc-peerings connect --service=servicenetworking.googleapis.com --network={NETWORK_NAME} --ranges={PEERING_RANGE_NAME} --project={PROJECT_ID}

Created [https://www.googleapis.com/compute/v1/projects/qwiklabs-gcp-04-a7949fa34b56/global/addresses/cym-range].
Operation "operations/pssn.p24-460625037798-a0bfa36f-9c4b-4e15-bb47-12de71606043" finished successfully.


In [16]:
! pip install -U git+https://github.com/googleapis/python-aiplatform.git@main --user

Collecting git+https://github.com/googleapis/python-aiplatform.git@main
  Cloning https://github.com/googleapis/python-aiplatform.git (to revision main) to /var/tmp/pip-req-build-crv5twl0
  Running command git clone --filter=blob:none --quiet https://github.com/googleapis/python-aiplatform.git /var/tmp/pip-req-build-crv5twl0
  Resolved https://github.com/googleapis/python-aiplatform.git to commit f917269b35b6582aecabd7a75610b2225407ae1f
  Preparing metadata (setup.py) ... [?25ldone


! pip install -U grpcio-tools --user
! pip install -U h5py --user
! pip install proto-plus==1.24.0.dev1

In [18]:
# Automatically restart kernel after installs
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## create a cloud storage bucket

In [1]:
BUCKET_NAME = "gs://qwiklabs-gcp-04-a7949fa34b56-aip"
REGION = "us-central1"
PROJECT_ID = "qwiklabs-gcp-04-a7949fa34b56"
NETWORK_NAME = "default"

In [2]:
! gsutil mb -l $REGION -p $PROJECT_ID $BUCKET_NAME

Creating gs://qwiklabs-gcp-04-a7949fa34b56-aip/...


In [3]:
! gsutil ls -al $BUCKET_NAME

## Import Libraries

Import the Vertex AI (unified) client<br>
library into your Python environment.


In [5]:
# Upgrade protobuf to the latest version
!pip install --upgrade protobuf

import time
import grpc
import h5py
from google.cloud import aiplatform_v1
from google.protobuf import struct_pb2

Collecting protobuf
  Using cached protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Using cached protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl (316 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
apache-beam 2.46.0 requires protobuf<4,>3.12.2, but you have protobuf 5.28.3 which is incompatible.
google-api-core 1.34.1 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<4.0.0dev,>=3.19.5, but you have protobuf 5.28.3 which is incompatible.
google-cloud-bigquery-storage 2.16.2 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you

In [6]:
REGION = "us-central1"
ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)
NETWORK_NAME = "default"


AUTH_TOKEN = !gcloud auth print-access-token
PROJECT_NUMBER = !gcloud projects list --filter="PROJECT_ID:'{PROJECT_ID}'" --format='value(PROJECT_NUMBER)'
PROJECT_NUMBER = PROJECT_NUMBER[0]

PARENT = "projects/{}/locations/{}".format(PROJECT_ID, REGION)

print("ENDPOINT: {}".format(ENDPOINT))
print("PROJECT_ID: {}".format(PROJECT_ID))
print("REGION: {}".format(REGION))

!gcloud config set project {PROJECT_ID} --quiet
!gcloud config set ai_platform/region {REGION} --quiet

ENDPOINT: us-central1-aiplatform.googleapis.com
PROJECT_ID: qwiklabs-gcp-04-a7949fa34b56
REGION: us-central1
Updated property [core/project].
Updated property [ai_platform/region].


## Prepare the Data

The GloVe dataset consists of a set of pre-trained embeddings. The embeddings are split into a "train" split, and a "test" split. We will create a vector search index from the "train" split, and use the embedding vectors in the "test" split as query vectors to test the vector search index.

Note: While the data split uses the term "train", these are pre-trained embeddings and thus are ready to be indexed for search. The terms "train" and "test" split are used just to be consistent with usual machine learning terminology.

In [7]:
! gsutil cp gs://cloud-samples-data/vertex-ai/matching_engine/glove-100-angular.hdf5 .

Copying gs://cloud-samples-data/vertex-ai/matching_engine/glove-100-angular.hdf5...
\ [1 files][462.9 MiB/462.9 MiB]                                                
Operation completed over 1 objects/462.9 MiB.                                    


Read the data into memory.

In [8]:
# The number of nearest neighbors to be retrieved from database for each query.
k = 10

h5 = h5py.File("glove-100-angular.hdf5", "r")
train = h5["train"]
test = h5["test"]

In [9]:
train[0]

array([-0.11333  ,  0.48402  ,  0.090771 , -0.22439  ,  0.034206 ,
       -0.55831  ,  0.041849 , -0.53573  ,  0.18809  , -0.58722  ,
        0.015313 , -0.014555 ,  0.80842  , -0.038519 ,  0.75348  ,
        0.70502  , -0.17863  ,  0.3222   ,  0.67575  ,  0.67198  ,
        0.26044  ,  0.4187   , -0.34122  ,  0.2286   , -0.53529  ,
        1.2582   , -0.091543 ,  0.19716  , -0.037454 , -0.3336   ,
        0.31399  ,  0.36488  ,  0.71263  ,  0.1307   , -0.24654  ,
       -0.52445  , -0.036091 ,  0.55068  ,  0.10017  ,  0.48095  ,
        0.71104  , -0.053462 ,  0.22325  ,  0.30917  , -0.39926  ,
        0.036634 , -0.35431  , -0.42795  ,  0.46444  ,  0.25586  ,
        0.68257  , -0.20821  ,  0.38433  ,  0.055773 , -0.2539   ,
       -0.20804  ,  0.52522  , -0.11399  , -0.3253   , -0.44104  ,
        0.17528  ,  0.62255  ,  0.50237  , -0.7607   , -0.071786 ,
        0.0080131, -0.13286  ,  0.50097  ,  0.18824  , -0.54722  ,
       -0.42664  ,  0.4292   ,  0.14877  , -0.0072514, -0.1648

Save the train split in JSONL format.

In [10]:
# Add restricts to each data point, in this demo, we only add one namespace and the allowlist is set to be the same as the id.
# Later on, we will demo how to return only the allowlisted data points.
# Split datapoins into two groups 'a' and 'b'. The datapoint whose ids are even are in group 'a', otherwise are in group 'b'
# We will demo how to configure the query to return up to k data points for each group.
with open("glove100.json", "w") as f:
    for i in range(len(train)):
        f.write('{"id":"' + str(i) + '",')
        f.write('"embedding":[' + ",".join(str(x) for x in train[i]) + "],")
        f.write('"restricts":[{"namespace": "class", "allow": ["' + str(i) + '"]}],')
        f.write('"crowding_tag":' + ('"a"' if i % 2 == 0 else '"b"') + "}")
        f.write("\n")
        if i >= 100:
            break

Upload the training data to Google Cloud Storage Bucket created earlier.

In [11]:
# NOTE: Everything in this GCS DIR will be DELETED before uploading the data.

! gsutil rm -rf {BUCKET_NAME}/*

CommandException: 1 files/objects could not be removed.


! gsutil cp glove100.json {BUCKET_NAME}/glove100.json

In [13]:
! gsutil ls {BUCKET_NAME}

gs://qwiklabs-gcp-04-a7949fa34b56-aip/glove100.json


## Create Stream Update Index

Run the following code snippet in the next cells to create a instance of the IndexServiceClient from the AI Platform (Unified) Python client library. This is used for interacting with AI Platform services related to indexes, such as creating and managing Approximate Nearest Neighbor (ANN)

In [14]:
index_client = aiplatform_v1.IndexServiceClient(
    client_options=dict(api_endpoint=ENDPOINT)
)

In [15]:
DIMENSIONS = 100
DISPLAY_NAME = "glove_100_1"

Let's define the configuration for creating an Approximate Nearest Neighbor (ANN) index.

In [18]:
treeAhConfig = struct_pb2.Struct(
    fields={
        "leafNodeEmbeddingCount": struct_pb2.Value(number_value=500),
        "leafNodesToSearchPercent": struct_pb2.Value(number_value=7),
    }
)

algorithmConfig = struct_pb2.Struct(
    fields={"treeAhConfig": struct_pb2.Value(struct_value=treeAhConfig)}
)

config = struct_pb2.Struct(
    fields={
        "dimensions": struct_pb2.Value(number_value=DIMENSIONS),
        "approximateNeighborsCount": struct_pb2.Value(number_value=150),
        "distanceMeasureType": struct_pb2.Value(string_value="DOT_PRODUCT_DISTANCE"),
        "algorithmConfig": struct_pb2.Value(struct_value=algorithmConfig),
    }
)

metadata = struct_pb2.Struct(
    fields={
        "config": struct_pb2.Value(struct_value=config),
        "contentsDeltaUri": struct_pb2.Value(string_value=BUCKET_NAME),
    }
)

ann_index = {
    "display_name": DISPLAY_NAME,
    "description": "Glove 100 ANN index",
    "metadata": struct_pb2.Value(struct_value=metadata),
    "index_update_method": aiplatform_v1.Index.IndexUpdateMethod.STREAM_UPDATE,
}

In [19]:
ann_index = index_client.create_index(parent=PARENT, index=ann_index)

In [20]:
ann_index.result()

name: "projects/460625037798/locations/us-central1/indexes/6186991511914151936"
display_name: "glove_100_1"
description: "Glove 100 ANN index"
metadata_schema_uri: "gs://google-cloud-aiplatform/schema/matchingengine/metadata/nearest_neighbor_search_1.0.0.yaml"
metadata {
  struct_value {
    fields {
      key: "config"
      value {
        struct_value {
          fields {
            key: "shardSize"
            value {
              string_value: "SHARD_SIZE_MEDIUM"
            }
          }
          fields {
            key: "distanceMeasureType"
            value {
              string_value: "DOT_PRODUCT_DISTANCE"
            }
          }
          fields {
            key: "dimensions"
            value {
              number_value: 100
            }
          }
          fields {
            key: "approximateNeighborsCount"
            value {
              number_value: 150
            }
          }
          fields {
            key: "algorithmConfig"
            value {
 

In [21]:
INDEX_RESOURCE_NAME = ann_index.result().name
INDEX_RESOURCE_NAME

'projects/460625037798/locations/us-central1/indexes/6186991511914151936'

## Create an IndexEndpoint with VPC Network

Run the following code snippet to initialize an AI Platform Index Endpoint Service client.

In [22]:
index_endpoint_client = aiplatform_v1.IndexEndpointServiceClient(
    client_options=dict(api_endpoint=ENDPOINT)
)

In [23]:
VPC_NETWORK_NAME = "projects/{}/global/networks/{}".format(PROJECT_NUMBER, NETWORK_NAME)
VPC_NETWORK_NAME

'projects/460625037798/global/networks/default'

In [24]:
index_endpoint = {
    "display_name": "index_endpoint_for_demo",
    "network": VPC_NETWORK_NAME,
}

Create an AI Platform Index Endpoint.

In [25]:
r = index_endpoint_client.create_index_endpoint(
    parent=PARENT, index_endpoint=index_endpoint
)

In [26]:
r.result()

name: "projects/460625037798/locations/us-central1/indexEndpoints/8734709487052521472"
display_name: "index_endpoint_for_demo"
network: "projects/460625037798/global/networks/default"
encryption_spec {
}

In [27]:
INDEX_ENDPOINT_NAME = r.result().name
INDEX_ENDPOINT_NAME

'projects/460625037798/locations/us-central1/indexEndpoints/8734709487052521472'

## Deploy Stream Update Index

In [28]:
DEPLOYED_INDEX_ID = "stream_update_glove_deployed"

In [29]:
deploy_ann_index = {
    "id": DEPLOYED_INDEX_ID,
    "display_name": DEPLOYED_INDEX_ID,
    "index": INDEX_RESOURCE_NAME,
}

In [30]:
r = index_endpoint_client.deploy_index(
    index_endpoint=INDEX_ENDPOINT_NAME, deployed_index=deploy_ann_index
)

In [31]:
# Poll the operation until it's done successfullly.

while True:
    if r.done():
        break
    print("Poll the operation to deploy index...")
    time.sleep(60)
     

Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...
Poll the operation to deploy index...


In [32]:
r.result()

deployed_index {
  id: "stream_update_glove_deployed"
}

## Create Online Queries

After you built your indexes, you may query against the deployed index through the online querying gRPC API (Match service) within the virtual machine instances from the same region.

Run the following code snippet to create write match_service.proto locally. This is a Protocol Buffer (protobuf) definition for a service called MatchService.

In [33]:
%%writefile match_service.proto

syntax = "proto3";

package google.cloud.aiplatform.container.v1;

// MatchService is a Google managed service for efficient vector similarity
// search at scale.
service MatchService {
  // Returns the nearest neighbors for the query. If it is a sharded
  // deployment, calls the other shards and aggregates the responses.
  rpc Match(MatchRequest) returns (MatchResponse) {}
}

// Parameters for a match query.
message MatchRequest {
  // The ID of the DeploydIndex that will serve the request.
  // This MatchRequest is sent to a specific IndexEndpoint of the Control API,
  // as per the IndexEndpoint.network. That IndexEndpoint also has
  // IndexEndpoint.deployed_indexes, and each such index has an
  // DeployedIndex.id field.
  // The value of the field below must equal one of the DeployedIndex.id
  // fields of the IndexEndpoint that is being called for this request.
  string deployed_index_id = 1;

  // The embedding values.
  repeated float float_val = 2;

  // The number of nearest neighbors to be retrieved from database for
  // each query. If not set, will use the default from
  // the service configuration.
  int32 num_neighbors = 3;

  // The list of restricts.
  repeated Namespace restricts = 4;

  // Crowding is a constraint on a neighbor list produced by nearest neighbor
  // search requiring that no more than some value k' of the k neighbors
  // returned have the same value of crowding_attribute.
  // It's used for improving result diversity.
  // This field is the maximum number of matches with the same crowding tag.
  int32 per_crowding_attribute_num_neighbors = 5;

  // The number of neighbors to find via approximate search before
  // exact reordering is performed. If not set, the default value from scam
  // config is used; if set, this value must be > 0.
  int32 approx_num_neighbors = 6;

  // The fraction of the number of leaves to search, set at query time allows
  // user to tune search performance. This value increase result in both search
  // accuracy and latency increase. The value should be between 0.0 and 1.0. If
  // not set or set to 0.0, query uses the default value specified in
  // NearestNeighborSearchConfig.TreeAHConfig.leaf_nodes_to_search_percent.
  int32 leaf_nodes_to_search_percent_override = 7;
}

// Response of a match query.
message MatchResponse {
  message Neighbor {
    // The ids of the matches.
    string id = 1;

    // The distances of the matches.
    double distance = 2;
  }
  // All its neighbors.
  repeated Neighbor neighbor = 1;
}

// Namespace specifies the rules for determining the datapoints that are
// eligible for each matching query, overall query is an AND across namespaces.
message Namespace {
  // The string name of the namespace that this proto is specifying,
  // such as "color", "shape", "geo", or "tags".
  string name = 1;

  // The allowed tokens in the namespace.
  repeated string allow_tokens = 2;

  // The denied tokens in the namespace.
  // The denied tokens have exactly the same format as the token fields, but
  // represents a negation. When a token is denied, then matches will be
  // excluded whenever the other datapoint has that token.
  //
  // For example, if a query specifies {color: red, blue, !purple}, then that
  // query will match datapoints that are red or blue, but if those points are
  // also purple, then they will be excluded even if they are red/blue.
  repeated string deny_tokens = 3;
}

Writing match_service.proto


In [34]:
! git clone https://github.com/googleapis/googleapis.git

Cloning into 'googleapis'...
remote: Enumerating objects: 236358, done.[K
remote: Counting objects: 100% (16547/16547), done.[K
remote: Compressing objects: 100% (448/448), done.[K
remote: Total 236358 (delta 16210), reused 16136 (delta 16098), pack-reused 219811 (from 1)
Receiving objects: 100% (236358/236358), 207.60 MiB | 27.24 MiB/s, done.
Resolving deltas: 100% (199259/199259), done.


Compile the protocol buffer, that generates the following files: match_service_pb2.py and match_service_pb2_grpc.py.

In [35]:
! python -m grpc_tools.protoc -I=. --proto_path=./googleapis --python_out=. --grpc_python_out=. match_service.proto

Obtain the private Endpoint.

In [36]:
DEPLOYED_INDEX_SERVER_IP = (
    list(index_endpoint_client.list_index_endpoints(parent=PARENT))[0]
    .deployed_indexes[0]
    .private_endpoints.match_grpc_address
)
DEPLOYED_INDEX_SERVER_IP

'10.89.0.15'

Test your query.

In [37]:
import match_service_pb2
import match_service_pb2_grpc

channel = grpc.insecure_channel("{}:10000".format(DEPLOYED_INDEX_SERVER_IP))
stub = match_service_pb2_grpc.MatchServiceStub(channel)

In [38]:
# Test query
query = [
    -0.11333,
    0.48402,
    0.090771,
    -0.22439,
    0.034206,
    -0.55831,
    0.041849,
    -0.53573,
    0.18809,
    -0.58722,
    0.015313,
    -0.014555,
    0.80842,
    -0.038519,
    0.75348,
    0.70502,
    -0.17863,
    0.3222,
    0.67575,
    0.67198,
    0.26044,
    0.4187,
    -0.34122,
    0.2286,
    -0.53529,
    1.2582,
    -0.091543,
    0.19716,
    -0.037454,
    -0.3336,
    0.31399,
    0.36488,
    0.71263,
    0.1307,
    -0.24654,
    -0.52445,
    -0.036091,
    0.55068,
    0.10017,
    0.48095,
    0.71104,
    -0.053462,
    0.22325,
    0.30917,
    -0.39926,
    0.036634,
    -0.35431,
    -0.42795,
    0.46444,
    0.25586,
    0.68257,
    -0.20821,
    0.38433,
    0.055773,
    -0.2539,
    -0.20804,
    0.52522,
    -0.11399,
    -0.3253,
    -0.44104,
    0.17528,
    0.62255,
    0.50237,
    -0.7607,
    -0.071786,
    0.0080131,
    -0.13286,
    0.50097,
    0.18824,
    -0.54722,
    -0.42664,
    0.4292,
    0.14877,
    -0.0072514,
    -0.16484,
    -0.059798,
    0.9895,
    -0.61738,
    0.054169,
    0.48424,
    -0.35084,
    -0.27053,
    0.37829,
    0.11503,
    -0.39613,
    0.24266,
    0.39147,
    -0.075256,
    0.65093,
    -0.20822,
    -0.17456,
    0.53571,
    -0.16537,
    0.13582,
    -0.56016,
    0.016964,
    0.1277,
    0.94071,
    -0.22608,
    -0.021106,
]

In [39]:
request = match_service_pb2.MatchRequest()
request.deployed_index_id = DEPLOYED_INDEX_ID
for val in query:
    request.float_val.append(val)

# The output before stream update
response = stub.Match(request)
response

neighbor {
  id: "0"
  distance: 17.592369079589844
}
neighbor {
  id: "31"
  distance: 14.614908218383789
}
neighbor {
  id: "50"
  distance: 11.242000579833984
}
neighbor {
  id: "42"
  distance: 10.925321578979492
}
neighbor {
  id: "46"
  distance: 10.185911178588867
}
neighbor {
  id: "100"
  distance: 10.031323432922363
}
neighbor {
  id: "71"
  distance: 9.4601297378540039
}
neighbor {
  id: "64"
  distance: 9.3296346664428711
}
neighbor {
  id: "54"
  distance: 9.25944709777832
}
neighbor {
  id: "98"
  distance: 8.94312858581543
}

Insert datapoints.

In [40]:
insert_datapoints_payload = aiplatform_v1.IndexDatapoint(
    datapoint_id="101",
    feature_vector=query,
    restricts=[{"namespace": "class", "allow_list": ["101"]}],
    crowding_tag=aiplatform_v1.IndexDatapoint.CrowdingTag(crowding_attribute="b"),
)

upsert_request = aiplatform_v1.UpsertDatapointsRequest(
    index=INDEX_RESOURCE_NAME, datapoints=[insert_datapoints_payload]
)

index_client.upsert_datapoints(request=upsert_request)

request = match_service_pb2.MatchRequest()
request.deployed_index_id = DEPLOYED_INDEX_ID
for val in query:
    request.float_val.append(val)

# The new inserted datapoint with id 101 will show up in the output
response = stub.Match(request)
response

neighbor {
  id: "0"
  distance: 17.592369079589844
}
neighbor {
  id: "31"
  distance: 14.614908218383789
}
neighbor {
  id: "50"
  distance: 11.242000579833984
}
neighbor {
  id: "42"
  distance: 10.925321578979492
}
neighbor {
  id: "46"
  distance: 10.185911178588867
}
neighbor {
  id: "100"
  distance: 10.031323432922363
}
neighbor {
  id: "71"
  distance: 9.4601297378540039
}
neighbor {
  id: "64"
  distance: 9.3296346664428711
}
neighbor {
  id: "54"
  distance: 9.25944709777832
}
neighbor {
  id: "98"
  distance: 8.94312858581543
}

Add filtering.

In [41]:
request = match_service_pb2.MatchRequest()
request.deployed_index_id = DEPLOYED_INDEX_ID
for val in query:
    request.float_val.append(val)

# Only the datapoints whose id is 1 and 101 will show up in the output
restrict = match_service_pb2.Namespace()
restrict.name = "class"
restrict.allow_tokens.append("1")
restrict.allow_tokens.append("101")

request.restricts.append(restrict)

response = stub.Match(request)
response

neighbor {
  id: "101"
  distance: 17.592369079589844
}
neighbor {
  id: "1"
  distance: 2.4347081184387207
}

In [42]:
update_datapoints_payload = aiplatform_v1.IndexDatapoint(
    datapoint_id="101",
    feature_vector=query,
    restricts=[{"namespace": "class", "allow_list": ["102"]}],
    crowding_tag=aiplatform_v1.IndexDatapoint.CrowdingTag(crowding_attribute="b"),
)

upsert_request = aiplatform_v1.UpsertDatapointsRequest(
    index=INDEX_RESOURCE_NAME, datapoints=[update_datapoints_payload]
)

index_client.upsert_datapoints(request=upsert_request)

response = stub.Match(request)
response

neighbor {
  id: "1"
  distance: 2.4347081184387207
}

Add crowding.

In [43]:
request = match_service_pb2.MatchRequest()
request.deployed_index_id = DEPLOYED_INDEX_ID
for val in query:
    request.float_val.append(val)

# Set the limit of the number of neighbors in each crowding to 1
# So no more than one neighbor of each crowding group will appear in the output
request.per_crowding_attribute_num_neighbors = 1

response = stub.Match(request)
response

neighbor {
  id: "0"
  distance: 17.592369079589844
}
neighbor {
  id: "101"
  distance: 17.592369079589844
}

Update datapoint crowding.

In [44]:
# Change the crowding_attribute from 'b' to 'a' for the datapoint with id '101' by using stream update
update_datapoints_payload = aiplatform_v1.IndexDatapoint(
    datapoint_id="101",
    feature_vector=query,
    restricts=[{"namespace": "class", "allow_list": ["101"]}],
    crowding_tag=aiplatform_v1.IndexDatapoint.CrowdingTag(crowding_attribute="a"),
)

upsert_request = aiplatform_v1.UpsertDatapointsRequest(
    index=INDEX_RESOURCE_NAME, datapoints=[update_datapoints_payload]
)

index_client.upsert_datapoints(request=upsert_request)

response = stub.Match(request)
response

neighbor {
  id: "0"
  distance: 17.592369079589844
}
neighbor {
  id: "31"
  distance: 14.614908218383789
}

Remove datapoints.

In [45]:
# Remove the datapoint with id '101' from the index
remove_request = aiplatform_v1.RemoveDatapointsRequest(
    index=INDEX_RESOURCE_NAME, datapoint_ids=["101"]
)

index_client.remove_datapoints(request=remove_request)

request = match_service_pb2.MatchRequest()
request.deployed_index_id = DEPLOYED_INDEX_ID
for val in query:
    request.float_val.append(val)

response = stub.Match(request)
response

neighbor {
  id: "0"
  distance: 17.592369079589844
}
neighbor {
  id: "31"
  distance: 14.614908218383789
}
neighbor {
  id: "50"
  distance: 11.242000579833984
}
neighbor {
  id: "42"
  distance: 10.925321578979492
}
neighbor {
  id: "46"
  distance: 10.185911178588867
}
neighbor {
  id: "100"
  distance: 10.031323432922363
}
neighbor {
  id: "71"
  distance: 9.4601297378540039
}
neighbor {
  id: "64"
  distance: 9.3296346664428711
}
neighbor {
  id: "54"
  distance: 9.25944709777832
}
neighbor {
  id: "98"
  distance: 8.94312858581543
}