<h4>Collection Configuration</h4>

In [None]:
from pprint import pprint

def print_results(results):

    for el in results:
        pprint(el)

In [None]:
# Open the configuration file
import yaml

with open("credentials.yaml") as f:
    credentials = yaml.safe_load(f)
    
    CLUSTER_ENDPOINT = credentials["CLUSTER_ENDPOINT"]
    TOKEN = credentials["TOKEN"]

In [None]:
from pymilvus import MilvusClient, DataType

# Set up a Milvus client
client = MilvusClient(
    uri = CLUSTER_ENDPOINT,
    token = TOKEN 
)

# Create schema
schema = MilvusClient.create_schema(
    auto_id = False,
    enable_dynamic_field = True,
)

# Add fields to schema
schema.add_field(field_name = "id", datatype = DataType.INT64, is_primary = True)
schema.add_field(field_name = "vector", datatype = DataType.FLOAT_VECTOR, dim = 5)

index_params = client.prepare_index_params()

index_params.add_index(
    field_name = "vector", 
    index_type = "AUTOINDEX",
    metric_type = "IP",
    params = { "nlist": 128 }
)

import time

# Create a collection with the index loaded simultaneously
client.create_collection(
    collection_name = "quick_setup_collection",
    schema = schema,
    index_params = index_params
)

<h4>Generate Random Data</h4>

In [None]:
import random

# Insert randomly generated vectors
colors = ["green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey"]
data = []

for i in range(10000):
    current_color = random.choice(colors)
    current_tag = random.randint(1000, 9999)
    data.append({
        "id": i,
        "vector": [ random.uniform(-1, 1) for _ in range(5) ],
        "color": current_color,
        "tag": current_tag,
        "color_tag": f"{current_color}_{str(current_tag)}"
    })

print(data[0])

insert_result = client.insert(
    collection_name = "quick_setup_collection",
    data = data,
)

print(insert_result["insert_count"])

<h4>Search with Iterators</h4>

In [None]:
from pymilvus import Collection, connections

# Search with iterator
connections.connect(
    uri = CLUSTER_ENDPOINT,
    token = TOKEN
)
collection = Collection("quick_setup_collection")

query_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]]
search_params = {
    "metric_type": "IP",
    "params": {"nprobe": 10}
}

iterator = collection.search_iterator(
    data = query_vectors,
    anns_field = "vector",
    batch_size = 10, # Number of elements per page returned with .next()
    param = search_params,
    output_fields = ["color_tag"],
    limit = 3
)

results = []

while True:
    result = iterator.next()
    if not result:
        iterator.close()
        break
    
    for hit in result:
        results.append(hit.to_dict())

print(results)

<h4>Querying with Iterators</h4>

In [None]:
# Query with iterator
iterator = collection.query_iterator(
    batch_size = 10,
    expr = "color_tag like \"brown_8%\"",
    output_fields = ["color_tag"]
)

results = []

while True:
    result = iterator.next()
    if not result:
        iterator.close()
        break

    results += result

# Check the results
print(f"There are {len(results)} entities found. The first 3 are as follows:")

print(results[:3])

In [None]:
# Drop collection
client.drop_collection(
    collection_name = "quick_setup_collection",
)