In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('DATA/products_with_tags_and_description_seperated.csv')

In [3]:
def to_string(row):
    return ", ".join([
        str(row.get('color', '')),
        str(row.get('craft', '')),
        str(row.get('fabric', '')),
        str(row.get('Product Types', ''))
    ])

In [2]:
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image, UnidentifiedImageError

model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

def get_image_vector(image_path, model, processor):
    try:
        image = Image.open(image_path).convert("RGB")  # convert ensures it's a valid RGB image
    except (FileNotFoundError, UnidentifiedImageError) as e:
        raise ValueError(f"Invalid image file at {image_path}: {e}")

    inputs = processor(images=image, return_tensors="pt").to(model.device)
    with torch.no_grad():
        image_embedding = model.get_image_features(**inputs)
    image_embedding = image_embedding / image_embedding.norm(dim=-1, keepdim=True)
    return image_embedding.squeeze().cpu().tolist()

def get_text_vector(text, model, processor):
    inputs = processor(text=text, return_tensors="pt", truncation=True).to(model.device)
    with torch.no_grad():
        text_embedding = model.get_text_features(**inputs)

    text_embedding = text_embedding / text_embedding.norm(dim=-1, keepdim=True) 

    return text_embedding.squeeze().cpu().tolist()

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [5]:
df['description'] = df['description'].fillna('')
df['Description'] = df['Description'].fillna('')
df['color'] = df['color'].fillna('')
df['craft'] = df['craft'].fillna('')
df['fabric'] = df['fabric'].fillna('')
df['Product Types'] = df['Product Types'].fillna('')

df['Tags'] = df.apply(to_string, axis=1)

df['description_vector'] = df['description'].apply(lambda x: get_text_vector(x, model, processor))
df['image_description_vector'] = df['Description'].apply(lambda x: get_text_vector(x, model, processor))
df['Tags_vector'] = df['Tags'].apply(lambda x: get_text_vector(x, model, processor))

In [6]:
df

Unnamed: 0.1,Unnamed: 0,Handle,Title,Type,description,craft,fabric,color,Product Types,Description,Tags,description_vector,image_description_vector,Tags_vector
0,0,gaia-co-ordinate-set-3583,Gaia Co ordinate Set,Co-ordinate Set,This powder blue co-ordinate set is crafted fr...,"Dori Embroidery, Thread Embroidery, Bead Work,...",Linen,Blue,Co-ordinate Set,The dress is a light blue two-piece set featur...,"Blue, Dori Embroidery, Thread Embroidery, Bead...","[0.04155544936656952, -0.01020028255879879, -0...","[0.05512223020195961, 0.04318203032016754, -0....","[0.04994598776102066, -0.013021372258663177, -..."
1,1,shiqa-co-ordinate-set-3582,Shiqa Co ordinate Set,Co-ordinate Set,This mint green co-ordinate set is crafted fro...,Thread Embroidery,Linen,Green,Co-ordinate Set,The dress is a light green two-piece set featu...,"Green, Thread Embroidery, Linen, Co-ordinate Set","[0.042135123163461685, 0.0191117562353611, -0....","[0.052434291690588, 0.06576637178659439, -0.04...","[0.062471214681863785, 0.031147204339504242, 0..."
2,2,arzen-co-ordinate-set-3581,Arzen Co ordinate Set,Co-ordinate Set,This co-ordinate set is crafted from pure line...,Thread Embroidery,Linen,"Pink, White",Co-ordinate Set,,"Pink, White, Thread Embroidery, Linen, Co-ordi...","[0.0266643725335598, 0.012301371432840824, 0.0...","[0.009879766032099724, -0.0018297001952305436,...","[0.060877781361341476, 0.0018306276760995388, ..."
3,3,alisha-co-ordinate-set-3580,Alisha Co ordinate Set,Co-ordinate Set,This white beige co-ordinate set is crafted fr...,Thread Embroidery,Linen,"Beige, White",Co-ordinate Set,The dress is a two-piece set featuring a squar...,"Beige, White, Thread Embroidery, Linen, Co-ord...","[0.059070486575365067, 0.03154120221734047, -0...","[0.0720837339758873, 0.0655950978398323, -0.01...","[0.03869662433862686, 0.05238012224435806, 0.0..."
4,4,cyra-co-ordinate-set-3579,Cyra Co ordinate Set,Co-ordinate Set,This co-ordinate set is crafted from pure line...,"Thread Embroidery, Bead Work, Sequin Embroider...",Linen,"Yellow, White",Co-ordinate Set,"The ensemble features a light yellow, button-d...","Yellow, White, Thread Embroidery, Bead Work, S...","[0.008695674128830433, 0.020176971331238747, 0...","[0.051097724586725235, 0.08910145610570908, -0...","[0.009910985827445984, 0.015220469795167446, 0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1043,1043,rati-kurta-set,Rati Kurta Set,Kurta Set,Featuring a pink paisley embroidered straight ...,Zardozi Embroidery,"Silk, Organza, Tussar","Pink, Beige",Kurta Set,The dress is a vibrant pink salwar kameez with...,"Pink, Beige, Zardozi Embroidery, Silk, Organza...","[0.02275579236447811, -0.001762106199748814, -...","[0.015424798242747784, -0.0069868359714746475,...","[0.031747005879879, 0.005678887944668531, -0.0..."
1044,1044,harshi-anarkali-set,Harshi Anarkali Set,Anarkali Set,This set features red mid length organza anark...,"Zardozi Embroidery, Dori Embroidery","Silk, Organza, Kora",Red,Anarkali Set,The dress is a red salwar kameez with intricat...,"Red, Zardozi Embroidery, Dori Embroidery, Silk...","[0.026264622807502747, -0.029688198119401932, ...","[0.01564633473753929, 0.019283883273601532, -0...","[0.036237362772226334, -0.009034180082380772, ..."
1045,1045,indira-kurta-set,Indira Kurta Set,Kurta Set,Featuring a yellow raw silk short suit with pl...,Zardozi Embroidery,"Raw Silk, Kora","Yellow, Silver",Kurta Set,The dress is a yellow salwar kameez with silve...,"Yellow, Silver, Zardozi Embroidery, Raw Silk, ...","[0.003864881582558155, 0.0031391980592161417, ...","[0.004383155610412359, 0.0109744006767869, -0....","[-0.024429136887192726, 0.008997435681521893, ..."
1046,1046,charusheela-kurta-set,Charusheela Kurta Set,Kurta Set,This set features a black embroidered raw silk...,Zardozi Embroidery,"Raw Silk, Kora",Black,Kurta Set,"The dress is a long, black, embroidered kurta ...","Black, Zardozi Embroidery, Raw Silk, Kora, Kur...","[0.006826208438724279, -0.027781272307038307, ...","[0.03344716131687164, 0.026030350476503372, -0...","[-0.0091616902500391, -0.0031778812408447266, ..."


In [7]:
from tqdm import tqdm  
from transformers import CLIPModel, CLIPProcessor
import torch

image_vectors = []

for handle in tqdm(df['Handle']):
    image_path = f"DATA/jpeg_ROI_images/{handle}.jpg"
    try:
        vec = get_image_vector(image_path, model, processor)
    except Exception as e:
        print(f"Failed to process {handle}: {e}")
        vec = [0.0] * 768  # Or skip, or use None
    image_vectors.append(vec)

df['image_dense'] = image_vectors


 80%|███████▉  | 838/1048 [20:12<14:09,  4.04s/it]  

Failed to process majedah-anarkali-set: Invalid image file at DATA/jpeg_ROI_images/majedah-anarkali-set.jpg: [Errno 2] No such file or directory: 'DATA/jpeg_ROI_images/majedah-anarkali-set.jpg'


100%|██████████| 1048/1048 [24:41<00:00,  1.41s/it]


In [8]:
df.columns

Index(['Unnamed: 0', 'Handle', 'Title', 'Type', 'description', 'craft',
       'fabric', 'color', 'Product Types', 'Description', 'Tags',
       'description_vector', 'image_description_vector', 'Tags_vector',
       'image_dense'],
      dtype='object')

In [9]:
df_copy = df.copy()

In [10]:
df_copy.drop(columns=['Unnamed: 0'], inplace = True)

In [11]:
df_copy.rename(columns={'Tags_vector': 'tags_vector'}, inplace=True)
df_copy.rename(columns={'Product Types': 'Product_Types'}, inplace=True)

In [12]:
df_copy['craft'] = df_copy['craft'].fillna("Unknown").astype(str)

In [13]:
# import ast
# import pandas as pd

# df_copy = pd.read_csv('DATA/products_with_all_vectors.csv')
# df_copy.columns

In [31]:
# df_copy['description_vector'] = df_copy['description_vector'].apply(ast.literal_eval)
# df_copy['image_description_vector'] = df_copy['image_description_vector'].apply(ast.literal_eval)
# df_copy['tags_vector'] = df_copy['tags_vector'].apply(ast.literal_eval)
# df_copy['image_dense'] = df_copy['image_dense'].apply(ast.literal_eval)
# df_copy.drop(['Unnamed: 0'], axis=1,  inplace=True)

# Milvus

In [3]:
from pymilvus import MilvusClient, DataType
from pymilvus import Function, FunctionType

client = MilvusClient(
    uri="http://localhost:19530",
    token="root:Milvus"
)



In [4]:
print(client.list_collections())

['SemanticSearch_bgremoved', 'SemanticSearch', 'SemanticSearch2']


In [5]:
from pymilvus import connections, Collection

# Step 1: Connect to Milvus
connections.connect(
    alias="default",
    host="127.0.0.1",  # Or your Milvus host
    port="19530"
)

# Step 2: Load the collection
collection = Collection("SemanticSearch_bgremoved")
collection.load()

# Step 3: Query first entry
results = collection.query(
    expr="",                 # No filter = return all
    output_fields=["*"],     # Get all fields
    limit=1             # Only the first row
)

# Step 4: Print the result
print(results)



data: ["{'fabric': 'Satin, Chiffon', 'Product_Types': 'Co-ordinate Set', 'Description': 'The dress is a two-piece set in a light golden hue, featuring a long-sleeved tunic top with a mandarin collar, decorative floral cutouts on the yoke and sleeves, a button-down front with paisley patterned trim, and matching straight-leg pants with a subtle paisley pattern throughout.\\n', 'Tags': 'Beige, White, Eyelet Embroidery, Thread Embroidery, Satin, Chiffon, Co-ordinate Set', 'description': 'Elevate your style with this exquisite co-ordinate set in a sophisticated beige hue, crafted from luxurious pure silk satin and pure satin jacquard. The shirt, featuring a band collar neck and a front-open design with yoke and gathers, exudes elegance and flair. Embellished full sleeves of pure satin jacquard add a luxurious contrast, while cutwork detailing outlined with beige silk thread adds intricate charm. Paired with straight-fit pants in beige pure satin jacquard, the ensemble offers a harmonious a

In [15]:
schema = MilvusClient.create_schema(
    auto_id=False,
)

In [16]:
schema.add_field(field_name="Handle", datatype=DataType.VARCHAR, max_length=512, is_primary=True)
schema.add_field(field_name="Title", datatype=DataType.VARCHAR, max_length=512)
schema.add_field(field_name="Type", datatype=DataType.VARCHAR, max_length=512)
schema.add_field(field_name='craft', datatype=DataType.VARCHAR, max_length=256)
schema.add_field(field_name="fabric", datatype=DataType.VARCHAR, max_length=256)
schema.add_field(field_name="Product_Types", datatype=DataType.VARCHAR, max_length=256)
schema.add_field(field_name="Tags", datatype=DataType.VARCHAR, max_length=256, enable_analyzer=True)
schema.add_field(field_name="color", datatype=DataType.VARCHAR, max_length=256)
schema.add_field(field_name="description", datatype=DataType.VARCHAR, max_length=1024, enable_analyzer=True)
schema.add_field(field_name="Description", datatype=DataType.VARCHAR, max_length=1024, enable_analyzer=True)

schema.add_field(field_name="description_vector", datatype=DataType.FLOAT_VECTOR, dim = 768)
schema.add_field(field_name="image_description_vector", datatype=DataType.FLOAT_VECTOR, dim = 768)
schema.add_field(field_name="tags_vector", datatype=DataType.FLOAT_VECTOR, dim = 768)
schema.add_field(field_name="image_dense", datatype=DataType.FLOAT_VECTOR, dim = 768)
schema.add_field(field_name="description_vector_sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
schema.add_field(field_name="image_description_vector_sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
schema.add_field(field_name="tags_vector_sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)

# Add function to schema
bm25_function_1 = Function(
    name="description_bm25_emb",
    input_field_names=["description"],
    output_field_names=["description_vector_sparse"],
    function_type=FunctionType.BM25,
)
bm25_function_2 = Function(
    name="image_description_bm25_emb",
    input_field_names=["Description"],
    output_field_names=["image_description_vector_sparse"],
    function_type=FunctionType.BM25,
)
bm25_function_3 = Function(
    name="tags_bm25_emb",
    input_field_names=["Tags"],
    output_field_names=["tags_vector_sparse"],
    function_type=FunctionType.BM25,
)
schema.add_function(bm25_function_1)
schema.add_function(bm25_function_2)
schema.add_function(bm25_function_3)

{'auto_id': False, 'description': '', 'fields': [{'name': 'Handle', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 512}, 'is_primary': True, 'auto_id': False}, {'name': 'Title', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 512}}, {'name': 'Type', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 512}}, {'name': 'craft', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 256}}, {'name': 'fabric', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 256}}, {'name': 'Product_Types', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 256}}, {'name': 'Tags', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 256, 'enable_analyzer': True}}, {'name': 'color', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 256}}, {'name': 'description', 'description': '', 'type': <DataType.VARCHAR: 21

In [17]:
from pymilvus import MilvusClient

index_params = client.prepare_index_params()

index_params.add_index(
    field_name="description_vector",
    index_type="FLAT", 
    index_name="description_vector_index",
    metric_type="COSINE", 
)
index_params.add_index(
    field_name="image_description_vector", 
    index_type="FLAT",
    index_name="image_description_vector_index",
    metric_type="COSINE",
)
index_params.add_index(
    field_name="tags_vector", # Name of the vector field to be indexed
    index_type="FLAT", # Type of the index to create
    index_name="tags_vector_index", # Name of the index to create
    metric_type="COSINE", # Metric type used to measure similarity
)
index_params.add_index(
    field_name="image_dense",
    index_name="image_dense_index",
    index_type="FLAT",
    metric_type="COSINE"
)

index_params.add_index(
    field_name="image_description_vector_sparse",
    index_name="image_description_vector_sparse_index",
    index_type="SPARSE_INVERTED_INDEX",
    metric_type="BM25",
    params={"inverted_index_algo": "DAAT_MAXSCORE"}, # or "DAAT_WAND" or "TAAT_NAIVE"
)
index_params.add_index(
    field_name="description_vector_sparse",
    index_name="description_vector_sparse_index",
    index_type="SPARSE_INVERTED_INDEX",
    metric_type="BM25",
    params={"inverted_index_algo": "DAAT_MAXSCORE"}, # or "DAAT_WAND" or "TAAT_NAIVE"
)
index_params.add_index(
    field_name="tags_vector_sparse",
    index_name="tags_vector_sparse_index",
    index_type="SPARSE_INVERTED_INDEX",
    metric_type="BM25",
    params={"inverted_index_algo": "DAAT_MAXSCORE"}, # or "DAAT_WAND" or "TAAT_NAIVE"
)


In [18]:
if client.has_collection(collection_name="SemanticSearch_bgremoved"):
    client.drop_collection(collection_name="SemanticSearch_bgremoved")
    
client.create_collection(
    collection_name="SemanticSearch_bgremoved",
    schema=schema,
    index_params=index_params
)
# client.create_partition(
#     collection_name="SemanticSearch",
#     partition_name="A"
# )
# client.create_partition(
#     collection_name="SemanticSearch",
#     partition_name="B"
# )

In [19]:
print(client.list_collections())

['SemanticSearch2', 'SemanticSearch_bgremoved', 'SemanticSearch']


In [20]:
res = client.get_load_state(
    collection_name="SemanticSearch_bgremoved"
)
print(res)

{'state': <LoadState: Loaded>}


In [23]:
df_copy

Unnamed: 0,Handle,Title,Type,description,craft,fabric,color,Product_Types,Description,Tags,description_vector,image_description_vector,tags_vector,image_dense
0,gaia-co-ordinate-set-3583,Gaia Co ordinate Set,Co-ordinate Set,This powder blue co-ordinate set is crafted fr...,"Dori Embroidery, Thread Embroidery, Bead Work,...",Linen,Blue,Co-ordinate Set,The dress is a light blue two-piece set featur...,"Blue, Dori Embroidery, Thread Embroidery, Bead...","[0.04155544936656952, -0.01020028255879879, -0...","[0.05512223020195961, 0.04318203032016754, -0....","[0.04994598776102066, -0.013021372258663177, -...","[0.059547509998083115, 0.03097514994442463, 0...."
1,shiqa-co-ordinate-set-3582,Shiqa Co ordinate Set,Co-ordinate Set,This mint green co-ordinate set is crafted fro...,Thread Embroidery,Linen,Green,Co-ordinate Set,The dress is a light green two-piece set featu...,"Green, Thread Embroidery, Linen, Co-ordinate Set","[0.042135123163461685, 0.0191117562353611, -0....","[0.052434291690588, 0.06576637178659439, -0.04...","[0.062471214681863785, 0.031147204339504242, 0...","[0.05378404259681702, 0.054888319224119186, 0...."
2,arzen-co-ordinate-set-3581,Arzen Co ordinate Set,Co-ordinate Set,This co-ordinate set is crafted from pure line...,Thread Embroidery,Linen,"Pink, White",Co-ordinate Set,,"Pink, White, Thread Embroidery, Linen, Co-ordi...","[0.0266643725335598, 0.012301371432840824, 0.0...","[0.009879766032099724, -0.0018297001952305436,...","[0.060877781361341476, 0.0018306276760995388, ...","[0.041696250438690186, 0.02879191003739834, 0...."
3,alisha-co-ordinate-set-3580,Alisha Co ordinate Set,Co-ordinate Set,This white beige co-ordinate set is crafted fr...,Thread Embroidery,Linen,"Beige, White",Co-ordinate Set,The dress is a two-piece set featuring a squar...,"Beige, White, Thread Embroidery, Linen, Co-ord...","[0.059070486575365067, 0.03154120221734047, -0...","[0.0720837339758873, 0.0655950978398323, -0.01...","[0.03869662433862686, 0.05238012224435806, 0.0...","[0.060613494366407394, 0.04498656094074249, 0...."
4,cyra-co-ordinate-set-3579,Cyra Co ordinate Set,Co-ordinate Set,This co-ordinate set is crafted from pure line...,"Thread Embroidery, Bead Work, Sequin Embroider...",Linen,"Yellow, White",Co-ordinate Set,"The ensemble features a light yellow, button-d...","Yellow, White, Thread Embroidery, Bead Work, S...","[0.008695674128830433, 0.020176971331238747, 0...","[0.051097724586725235, 0.08910145610570908, -0...","[0.009910985827445984, 0.015220469795167446, 0...","[0.042811069637537, 0.07642874121665955, 0.022..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1043,rati-kurta-set,Rati Kurta Set,Kurta Set,Featuring a pink paisley embroidered straight ...,Zardozi Embroidery,"Silk, Organza, Tussar","Pink, Beige",Kurta Set,The dress is a vibrant pink salwar kameez with...,"Pink, Beige, Zardozi Embroidery, Silk, Organza...","[0.02275579236447811, -0.001762106199748814, -...","[0.015424798242747784, -0.0069868359714746475,...","[0.031747005879879, 0.005678887944668531, -0.0...","[-0.0027435123920440674, 0.0026699856389313936..."
1044,harshi-anarkali-set,Harshi Anarkali Set,Anarkali Set,This set features red mid length organza anark...,"Zardozi Embroidery, Dori Embroidery","Silk, Organza, Kora",Red,Anarkali Set,The dress is a red salwar kameez with intricat...,"Red, Zardozi Embroidery, Dori Embroidery, Silk...","[0.026264622807502747, -0.029688198119401932, ...","[0.01564633473753929, 0.019283883273601532, -0...","[0.036237362772226334, -0.009034180082380772, ...","[0.03626110032200813, 0.03499206155538559, 0.0..."
1045,indira-kurta-set,Indira Kurta Set,Kurta Set,Featuring a yellow raw silk short suit with pl...,Zardozi Embroidery,"Raw Silk, Kora","Yellow, Silver",Kurta Set,The dress is a yellow salwar kameez with silve...,"Yellow, Silver, Zardozi Embroidery, Raw Silk, ...","[0.003864881582558155, 0.0031391980592161417, ...","[0.004383155610412359, 0.0109744006767869, -0....","[-0.024429136887192726, 0.008997435681521893, ...","[0.026281479746103287, 0.032295700162649155, 0..."
1046,charusheela-kurta-set,Charusheela Kurta Set,Kurta Set,This set features a black embroidered raw silk...,Zardozi Embroidery,"Raw Silk, Kora",Black,Kurta Set,"The dress is a long, black, embroidered kurta ...","Black, Zardozi Embroidery, Raw Silk, Kora, Kur...","[0.006826208438724279, -0.027781272307038307, ...","[0.03344716131687164, 0.026030350476503372, -0...","[-0.0091616902500391, -0.0031778812408447266, ...","[0.03698626160621643, 0.031135989353060722, 0...."


In [None]:
from pymilvus import MilvusClient
from tqdm import tqdm  # for progress bar (optional)

data = df_copy.to_dict(orient='records')
batch_size = 1 # You can adjust this based on performance/memory

for i in tqdm(range(0, len(data), batch_size)):
    batch = data[i:i + batch_size]
    res = client.insert(
        collection_name="SemanticSearch_bgremoved",
        data=batch
    )

  0%|          | 0/105 [00:00<?, ?it/s]

100%|██████████| 105/105 [03:57<00:00,  2.26s/it]


In [35]:
# data = df_copy.to_dict(orient='records')

# res = client.insert(
#     collection_name="SemanticSearch",
#     partition_name="A",
#     data=data[:500]
# )

In [43]:
stats = client.get_collection_stats(collection_name="SemanticSearch_bgremoved")
num_entities = stats["row_count"]
print(f"Total entries in collection: {num_entities}")

Total entries in collection: 1048


In [25]:
client.has_collection(collection_name="SemanticSearch_bgremoved")

True

In [26]:
query = "Peach Kurta with flowering print"
# query = "Blue Floral lehenga with silk work"
query_vector = get_text_vector(query, model, processor)

In [27]:
from pymilvus import AnnSearchRequest

search_param_1 = {
    "data": [query_vector],
    "anns_field": "description_vector",
    "param": {"nprobe": 10},
    "limit": 10
}
search_param_2 = {
    "data": [query_vector],
    "anns_field": "image_description_vector",
    "param": {"nprobe": 10},
    "limit": 10
}
search_param_3 = {
    "data": [query_vector],
    "anns_field": "tags_vector",
    "param": {"nprobe": 10},
    "limit": 10
}

search_param_4 = {
    "data": [query],
    "anns_field": "description_vector_sparse",
    "param": {"drop_ratio_search": 0.2},
    "limit": 10
}
search_param_5 = {
    "data": [query],
    "anns_field": "image_description_vector_sparse",
    "param": {"drop_ratio_search": 0.2},
    "limit": 10
}
search_param_6 = {
    "data": [query],
    "anns_field": "tags_vector_sparse",
    "param": {"drop_ratio_search": 0.2},
    "limit": 10
}
search_param_7 = {
    "data": [query_vector],
    "anns_field": "image_dense",
    "param": {"nprobe": 10},
    "limit": 10
}
text_request_1 = AnnSearchRequest(**search_param_1)
text_request_2 = AnnSearchRequest(**search_param_2)
text_request_3 = AnnSearchRequest(**search_param_3)
text_request_4 = AnnSearchRequest(**search_param_4)
text_request_5 = AnnSearchRequest(**search_param_5)
text_request_6 = AnnSearchRequest(**search_param_6)
text_request_7 = AnnSearchRequest(**search_param_7)

text_search_requests = [text_request_1, text_request_2, text_request_3, text_request_4, text_request_5, text_request_6, text_request_7]

In [28]:
# from pymilvus import MilvusClient

# res1 = client.hybrid_search(
#     collection_name="SemanticSearch",
#     reqs=reqs1,
#     ranker=ranker,
#     limit=10
# )
# for hits in res1:
#     print("TopK results:")
#     for hit in hits:
#         print(hit)


In [6]:
query_vector = get_image_vector("DATA/jpeg_ROI_images/aairah-kurta-set.jpg", model, processor)

In [7]:
from pymilvus import AnnSearchRequest

search_param_1 = {
    "data": [query_vector],
    "anns_field": "description_vector",
    "param": {"nprobe": 10},
    "limit": 10
}
search_param_2 = {
    "data": [query_vector],
    "anns_field": "image_description_vector",
    "param": {"nprobe": 10},
    "limit": 10
}
search_param_3 = {
    "data": [query_vector],
    "anns_field": "tags_vector",
    "param": {"nprobe": 10},
    "limit": 10
}

search_param_4 = {
    "data": [query_vector],
    "anns_field": "image_dense",
    "param": {"nprobe": 10},
    "limit": 10
}

image_request_1 = AnnSearchRequest(**search_param_1)
image_request_2 = AnnSearchRequest(**search_param_2)
image_request_3 = AnnSearchRequest(**search_param_3)
image_request_4 = AnnSearchRequest(**search_param_4)

image_search_requests = [image_request_1, image_request_2, image_request_3, image_request_4]

In [8]:
from pymilvus import MilvusClient
from pymilvus import RRFRanker

ranker = RRFRanker(100)

res2 = client.hybrid_search(
    collection_name="SemanticSearch_bgremoved",
    reqs=image_search_requests,
    ranker=ranker,
    limit=10
)
for hits in res2:
    print("TopK results:")
    for hit in hits:
        print(hit)


TopK results:
{'Handle': 'aairah-kurta-set', 'distance': 0.019801979884505272, 'entity': {}}
{'Handle': 'sonth-sharara-set', 'distance': 0.019417475908994675, 'entity': {}}
{'Handle': 'nargis-kurta-set-pink', 'distance': 0.019149716943502426, 'entity': {}}
{'Handle': 'cynthia-kurta-set', 'distance': 0.018608273938298225, 'entity': {}}
{'Handle': 'aali-sharara-set-3370', 'distance': 0.009900989942252636, 'entity': {}}
{'Handle': 'paawani-kurta-set', 'distance': 0.009900989942252636, 'entity': {}}
{'Handle': 'afshaneh-kurta-set', 'distance': 0.009803921915590763, 'entity': {}}
{'Handle': 'aprajhita-kurta-set', 'distance': 0.009803921915590763, 'entity': {}}
{'Handle': 'daria-kurta-set-pink', 'distance': 0.009803921915590763, 'entity': {}}
{'Handle': 'delilah-tunic', 'distance': 0.009708737954497337, 'entity': {}}


In [9]:
from pymilvus import Function, FunctionType

In [None]:
from pymilvus import WeightedRanker

rerank= WeightedRanker(0.8, 0.3) 

In [10]:
ranker = Function(
    output_field_names=[],
    name="weight",
    input_field_names=[], # Must be an empty list
    function_type=FunctionType.RERANK,
    params={
        "reranker": "weighted", 
        "weights": [0.1, 0.1, 0.1, 0.7],
        # "norm_score": True  # Optional
}
)


In [12]:
results = client.hybrid_search(
    collection_name='SemanticSearch_bgremoved',
    reqs=[image_request_1, image_request_2, image_request_3, image_request_4],
    ranker=ranker,
    limit=10
    )

In [13]:
results

data: [[{'Handle': 'aairah-kurta-set', 'distance': 0.7308912873268127, 'entity': {}}, {'Handle': 'sonth-sharara-set', 'distance': 0.697704017162323, 'entity': {}}, {'Handle': 'afshaneh-kurta-set', 'distance': 0.673143208026886, 'entity': {}}, {'Handle': 'shanaaz-sharara-set', 'distance': 0.6661190390586853, 'entity': {}}, {'Handle': 'ara-kurta-set', 'distance': 0.666009783744812, 'entity': {}}, {'Handle': 'inaya-anarkali-set', 'distance': 0.66548752784729, 'entity': {}}, {'Handle': 'sanaz-kurta-set', 'distance': 0.6628775000572205, 'entity': {}}, {'Handle': 'qamara-kurta-set', 'distance': 0.659689724445343, 'entity': {}}, {'Handle': 'sampa-sharara-set', 'distance': 0.6595166921615601, 'entity': {}}, {'Handle': 'lilly-kurta-set', 'distance': 0.6593001484870911, 'entity': {}}]]