In [1]:
from pymilvus import MilvusClient, DataType
import random
import artificial_data
import os 
import sys
from itertools import islice
import numpy as np
import pandas as pd

ROOT_DIR = os.path.split(os.environ['VIRTUAL_ENV'])[0]
#sys.path.insert(0, ROOT_DIR)
if ROOT_DIR not in sys.path:
  sys.path.append(ROOT_DIR)

from Utils.model_helpers import OPEN_CLIP_MODEL, get_model_name
from Utils.general_helpers import get_root_dir
import joblib


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# if ROOT_DIR in sys.path:
#   sys.path.remove(ROOT_DIR)

In [4]:
sys.path

['e:\\ELFAK\\MASTER RAD\\MASTER_RAD-Vector_Search',
 'e:\\ELFAK\\MASTER RAD\\MASTER_RAD-Vector_Search\\Code\\Vector_Database\\Milvus',
 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\python310.zip',
 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\DLLs',
 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib',
 'C:\\Users\\FT\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0',
 'e:\\ELFAK\\MASTER RAD\\MASTER_RAD-Vector_Search\\.venv',
 '',
 'e:\\ELFAK\\MASTER RAD\\MASTER_RAD-Vector_Search\\.venv\\lib\\site-packages',
 'e:\\ELFAK\\MASTER RAD\\MASTER_RAD-Vector_Search\\.venv\\lib\\site-packages\\win32',
 'e:\\ELFAK\\MASTER RAD\\MASTER_RAD-Vector_Search\\.venv\\lib\\site-packages\\win32\\lib',
 'e:\\ELFAK\\MASTER RAD\\MASTER_RAD-Vector_Search\\.venv\\lib\\site-packages\\Pythonwin',
 'e:\\E

In [2]:
# 1. Set up a Milvus client
client = MilvusClient(
    uri="http://localhost:19530"
)

COLLECTION_NAME = "pet_visual_only_collection"


In [3]:
client.drop_collection(
    collection_name=COLLECTION_NAME
)

# Creating database schema and indexes

In [4]:
# 3. Create a collection in customized setup mode

# 3.1. Create schema
schema = MilvusClient.create_schema(
    auto_id=True,
    enable_dynamic_field=True,
)


# 3.2. Add fields to schema
schema.add_field(field_name="pet_id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="pet_image_filename", datatype=DataType.VARCHAR, max_length=50)
schema.add_field(field_name="pet_finder_name", datatype=DataType.VARCHAR, max_length=50)
schema.add_field(field_name="pet_finder_contact", datatype=DataType.VARCHAR, max_length=50)
schema.add_field(field_name="pet_latitude", datatype=DataType.FLOAT)
schema.add_field(field_name="pet_longitude", datatype=DataType.FLOAT)
schema.add_field(field_name="is_found", datatype=DataType.BOOL)
schema.add_field(field_name="is_missing", datatype=DataType.BOOL)

schema.add_field(field_name="pet_visual_vector", datatype=DataType.FLOAT_VECTOR, dim=1024)

# 3.3. Prepare index parameters
index_params = client.prepare_index_params()

# 3.4. Add indexes
index_params.add_index(
    field_name="pet_id"
)

index_params.add_index(
  field_name="pet_image_filename",
  
  index_name="pet_image_filename_index"
)
index_params.add_index(
  field_name="pet_finder_name",
  
  index_name="pet_finder_name_index"
)

index_params.add_index(
  field_name="pet_finder_contact",
  
  index_name="pet_finder_contact_index"
)

index_params.add_index(
  field_name="pet_latitude",
  
  index_name="pet_latitude_index"
)

index_params.add_index(
  field_name="pet_longitude",
  
  index_name="pet_longitude_index"
)

index_params.add_index(
    field_name="pet_visual_vector", 
    index_type="AUTOINDEX",
    metric_type="COSINE"
)

# index_params.add_index(
#     field_name="pet_textual_vector", 
#     index_type="AUTOINDEX",
#     metric_type="COSINE"
# )

# index_params.add_index(
#     field_name="pet_textual_vector", 
#     metric_type = "L2",
#     index_type = "IVF_FLAT",
#     params = {"nlist":1024}
# )

# # 4.2. Add an index on the vector field.
# index_params.add_index(
#     field_name="pet_textual_vector",
#     metric_type="COSINE",
#     index_type='HNSW',
#     params = {"M":10, 'efConstruction':3},
#     index_name="pet_textual_vector"
# )

# 3.5. Create a collection
client.create_collection(
    collection_name=COLLECTION_NAME,
    schema=schema,
    index_params=index_params
)

In [20]:
client.list_collections()

['quick_setup', 'customized_setup', 'pet_visual_only_collection']

In [None]:
from pymilvus import Collection

collection = Collection(COLLECTION_NAME) # Hash obtained from connections.list_connections()
schema = collection.schema


# Adding data to Milvus

### Loading feature vectors

In [5]:
#Loading cache file which contains images' filenames and feature vectors
model_name = get_model_name(OPEN_CLIP_MODEL.xlm_roberta_large_ViT_H.value)
# cache_file_name = os.path.join(ROOT_DIR, f"{model_name}-cache_using_pytorch.jlb")
new_cache_file_name = os.path.join(ROOT_DIR, f"new_{model_name}-cache_using_pytorch.jlb")

cache = joblib.load(new_cache_file_name)
location_data = artificial_data.generate_location_data(len(cache)) # number_of_test_entities)
missing_found_data = artificial_data.generate_missing_found_data(len(cache)) # number_of_test_entities)

pet_finder_names = artificial_data.pet_finder_names
artificial_pet_data = [ {
    #"pet_id": index,
    "pet_image_filename": image_filename_as_key,
    "pet_finder_name": f"{random.choice(pet_finder_names)}",
    "pet_finder_contact": '06' + str(random.randint(0, 5)) + ''.join([str(random.randint(0, 9)) for _ in range(7)]),
    "pet_latitude": location_data.iloc[index]['lat'],
    "pet_longitude":location_data.iloc[index]['lon'],
    "is_found": missing_found_data[index][0],
    "is_missing": missing_found_data[index][1],
    "pet_visual_vector": inner_dict['embedded_image'] 
} for index, (image_filename_as_key, inner_dict) in enumerate(cache.items())]

print(len(artificial_pet_data))
artificial_pet_data

37497


[{'pet_image_filename': 'cat.10690.jpg',
  'pet_finder_name': 'Stefan Nikolić',
  'pet_finder_contact': '0603328403',
  'pet_latitude': 43.372,
  'pet_longitude': 21.874,
  'is_found': False,
  'is_missing': True,
  'pet_visual_vector': tensor([-0.3910, -0.2813,  0.2039,  ...,  0.4444,  0.6260,  0.0409])},
 {'pet_image_filename': 'cat.6034.jpg',
  'pet_finder_name': 'Tamara Đorđević',
  'pet_finder_contact': '0603117750',
  'pet_latitude': 44.784,
  'pet_longitude': 20.468,
  'is_found': False,
  'is_missing': True,
  'pet_visual_vector': tensor([-0.5274, -0.1684,  0.2831,  ..., -0.0188,  0.6736, -0.5160])},
 {'pet_image_filename': 'cat.9812.jpg',
  'pet_finder_name': 'Jovana Petrović',
  'pet_finder_contact': '0655041461',
  'pet_latitude': 44.8,
  'pet_longitude': 20.485,
  'is_found': True,
  'is_missing': False,
  'pet_visual_vector': tensor([-0.1834, -0.1049,  0.0937,  ...,  0.3164,  0.8117, -0.3759])},
 {'pet_image_filename': 'dog.6354.jpg',
  'pet_finder_name': 'Vuk Janković',
 

In [6]:
res = client.insert(
    collection_name=COLLECTION_NAME,
    data=artificial_pet_data[:10000]
)

print(res)

{'insert_count': 10000, 'ids': [449803551958602387, 449803551958602388, 449803551958602389, 449803551958602390, 449803551958602391, 449803551958602392, 449803551958602393, 449803551958602394, 449803551958602395, 449803551958602396, 449803551958602397, 449803551958602398, 449803551958602399, 449803551958602400, 449803551958602401, 449803551958602402, 449803551958602403, 449803551958602404, 449803551958602405, 449803551958602406, 449803551958602407, 449803551958602408, 449803551958602409, 449803551958602410, 449803551958602411, 449803551958602412, 449803551958602413, 449803551958602414, 449803551958602415, 449803551958602416, 449803551958602417, 449803551958602418, 449803551958602419, 449803551958602420, 449803551958602421, 449803551958602422, 449803551958602423, 449803551958602424, 449803551958602425, 449803551958602426, 449803551958602427, 449803551958602428, 449803551958602429, 449803551958602430, 449803551958602431, 449803551958602432, 449803551958602433, 449803551958602434, 44980355

In [7]:
res = client.insert(
    collection_name=COLLECTION_NAME,
    data=artificial_pet_data[10000:25000]
)

print(res)

{'insert_count': 15000, 'ids': [449803551958612395, 449803551958612396, 449803551958612397, 449803551958612398, 449803551958612399, 449803551958612400, 449803551958612401, 449803551958612402, 449803551958612403, 449803551958612404, 449803551958612405, 449803551958612406, 449803551958612407, 449803551958612408, 449803551958612409, 449803551958612410, 449803551958612411, 449803551958612412, 449803551958612413, 449803551958612414, 449803551958612415, 449803551958612416, 449803551958612417, 449803551958612418, 449803551958612419, 449803551958612420, 449803551958612421, 449803551958612422, 449803551958612423, 449803551958612424, 449803551958612425, 449803551958612426, 449803551958612427, 449803551958612428, 449803551958612429, 449803551958612430, 449803551958612431, 449803551958612432, 449803551958612433, 449803551958612434, 449803551958612435, 449803551958612436, 449803551958612437, 449803551958612438, 449803551958612439, 449803551958612440, 449803551958612441, 449803551958612442, 44980355

In [8]:
res = client.insert(
    collection_name=COLLECTION_NAME,
    data=artificial_pet_data[25000:]
)

print(res)

{'insert_count': 12497, 'ids': [449803551958627407, 449803551958627408, 449803551958627409, 449803551958627410, 449803551958627411, 449803551958627412, 449803551958627413, 449803551958627414, 449803551958627415, 449803551958627416, 449803551958627417, 449803551958627418, 449803551958627419, 449803551958627420, 449803551958627421, 449803551958627422, 449803551958627423, 449803551958627424, 449803551958627425, 449803551958627426, 449803551958627427, 449803551958627428, 449803551958627429, 449803551958627430, 449803551958627431, 449803551958627432, 449803551958627433, 449803551958627434, 449803551958627435, 449803551958627436, 449803551958627437, 449803551958627438, 449803551958627439, 449803551958627440, 449803551958627441, 449803551958627442, 449803551958627443, 449803551958627444, 449803551958627445, 449803551958627446, 449803551958627447, 449803551958627448, 449803551958627449, 449803551958627450, 449803551958627451, 449803551958627452, 449803551958627453, 449803551958627454, 44980355

# Database check

In [10]:
from flask import Flask, request, jsonify, send_file
import os
from werkzeug.utils import secure_filename
from pymilvus import MilvusClient, DataType, connections, Collection
import sys
import numpy as np

ROOT_DIR = os.path.split(os.environ['VIRTUAL_ENV'])[0]
#sys.path.insert(0, ROOT_DIR)
if ROOT_DIR not in sys.path:
  sys.path.append(ROOT_DIR)

from Utils.model_helpers import get_device, get_open_clip_model, OPEN_CLIP_MODEL

# 1. Set up a Milvus client
client = MilvusClient(
    uri="http://localhost:19530"
)

COLLECTION_NAME = "pet_visual_only_collection"
# connections.add_connection(
#         default={"alias": "default", "host": "localhost", "port": "19530"}
#     )
connections.connect(
  alias="default", 
  host='localhost', 
  port='19530'
)

search_term = ""
top_number = 50
device = get_device()

model, preprocess, tokenizer, model_name, pretrained, embedding_size = get_open_clip_model(OPEN_CLIP_MODEL.xlm_roberta_large_ViT_H.value)
field_names = [field.name for field in schema.fields]

SHOULD_BE_NORMALISED = "norm"
search_embedding = model.encode_text(tokenizer(search_term).to(device)).cpu().detach().numpy()

if SHOULD_BE_NORMALISED == "norm":
    search_embedding = search_embedding / np.linalg.norm(search_embedding, ord=2, axis=-1, keepdims=True)

pet_results = client.search(
    collection_name=COLLECTION_NAME, # Replace with the actual name of your collection
    # Replace with your query vector
    data = search_embedding,
    limit = top_number, # Max. number of search results to return
    output_fields = [field for field in field_names if field not in ['pet_visual_vector']]

    #search_params={"metric_type": "IP", "params": {}} # Search parameters
)
pet_results = pet_results[0]
print(pet_results)

No GPU available, using the CPU instead.
No GPU available, using the CPU instead.
[{'id': 9694, 'distance': 0.17044958472251892, 'entity': {'is_found': False, 'is_missing': True, 'pet_id': 9694, 'pet_image_filename': 'cat.8005.jpg', 'pet_finder_name': 'Petar Janković', 'pet_finder_contact': '0643090586', 'pet_latitude': 43.33300018310547, 'pet_longitude': 21.885000228881836}}, {'id': 36754, 'distance': 0.16859035193920135, 'entity': {'is_found': True, 'is_missing': False, 'pet_id': 36754, 'pet_image_filename': 'cat.5456.jpg', 'pet_finder_name': 'Sofija Nikolić', 'pet_finder_contact': '0645935245', 'pet_latitude': 43.33300018310547, 'pet_longitude': 21.92300033569336}}, {'id': 3688, 'distance': 0.166713148355484, 'entity': {'is_found': False, 'is_missing': True, 'pet_id': 3688, 'pet_image_filename': 'cat.7988.jpg', 'pet_finder_name': 'Tamara Đorđević', 'pet_finder_contact': '0650643251', 'pet_latitude': 44.832000732421875, 'pet_longitude': 20.44099998474121}}, {'id': 13746, 'distance': 