<h4>Collection Setup</h4>

In [None]:
# Open the configuration file
import yaml

with open("credentials.yaml") as f:
    credentials = yaml.safe_load(f)
    
    CLUSTER_ENDPOINT = credentials["CLUSTER_ENDPOINT"]
    TOKEN = credentials["TOKEN"]

In [None]:
from pymilvus import connections, utility, Collection, FieldSchema, CollectionSchema, DataType
import random

# Connect to cluster
connections.connect(
    uri = CLUSTER_ENDPOINT, 
    token = TOKEN
)

# Create schema
fields = [
    FieldSchema(name = "film_id", dtype = DataType.INT64, is_primary = True),
    FieldSchema(name = "filmVector", dtype = DataType.FLOAT_VECTOR, dim = 5), 
    FieldSchema(name = "posterVector", dtype = DataType.FLOAT_VECTOR, dim = 5)] 

schema = CollectionSchema(fields = fields, enable_dynamic_field = False)

# Create collection
collection = Collection(name = "film_collection", schema = schema)

# Create index for each vector field
index_params = {
    "metric_type": "L2",
    "index_type": "AUTOINDEX"
}

collection.create_index("filmVector", index_params)
collection.create_index("posterVector", index_params)

# Generate random entities to insert
entities = []

for _ in range(1000):
    # generate random values for each field in the schema
    film_id = random.randint(1, 1000)
    film_vector = [ random.random() for _ in range(5) ]
    poster_vector = [ random.random() for _ in range(5) ]

    # creat a dictionary for each entity
    entity = {
        "film_id": film_id,
        "filmVector": film_vector,
        "posterVector": poster_vector
    }

    # add the entity to the list
    entities.append(entity)
    
collection.insert(entities)

<h4>ANNSearch and Hybrid Search</h4>
<p>ANNSearch (Approximate Nearest Neighbor Search) is a type of search for Semantic Similarity. Compared to the standard NNSearch, it trades accuracy for performances and it is very effective on large datasets.</p>

In [None]:
from pymilvus import AnnSearchRequest

# Create ANN search request for filmVector
query_film_vector = [[0.8896863042430693, 0.370613100114602, 0.23779315077113428, 0.38227915951132996, 0.5997064603128835]]

search_param_film = {
    "data": query_film_vector, # Query vector
    "anns_field": "filmVector", # Vector field name
    "param": {
        "metric_type": "L2", # This parameter value must be identical to the one used in the collection schema
        "params": {"nprobe": 10} # Number of closest buckets to find (search step 1) before looking for the best one (search step 2)
    },
    "limit": 2 # Number of search results to return in this AnnSearchRequest
}
request_film = AnnSearchRequest(**search_param_film)

# Create ANN search request for posterVector
query_poster_vector = [[0.02550758562349764, 0.006085637357292062, 0.5325251250159071, 0.7676432650114147, 0.5521074424751443]]
search_param_poster = {
    "data": query_poster_vector, # Query vector
    "anns_field": "posterVector", # Vector field name
    "param": {
        "metric_type": "L2", # This parameter value must be identical to the one used in the collection schema
        "params": {"nprobe": 10}
    },
    "limit": 2 # Number of search results to return in this AnnSearchRequest
}
request_poster = AnnSearchRequest(**search_param_poster)

# Store these two requests as a list
requests = [request_film, request_poster]

<h4>Define a Weight Function</h4>

In [None]:
from pymilvus import WeightedRanker

# Use WeightedRanker to combine results with specified weights
# Assign weights of 0.8 to film search and 0.2 to poster search
rerank = WeightedRanker(0.8, 0.2)

<h4>Perform Hybrid Search</h4>

In [None]:
# Before conducting hybrid search, load the collection into memory.
collection.load()

search_results = collection.hybrid_search(
    requests, # List of AnnSearchRequests created in step 1
    rerank, # Reranking strategy specified in step 2
    limit = 2 # Number of final search results to return
)

print(search_results)

In [None]:
# Drop collection
utility.drop_collection(
    collection_name = "film_collection",
)