In [None]:
import pickle 

import numpy as np

from milvus import Milvus, IndexType, MetricType, Status

In [None]:
client = Milvus(host='localhost', port='19530')
collection_name = "VBS_full"
status, ok = client.has_collection(collection_name)

if ok:
    print(f"Collection {collection_name} already exists! Drop collection to re-create.")
#     return
else:
    client.create_collection({
        "collection_name": collection_name,
        "dimension": 768,
        "index_file_size": 2048,
        "metric_type": MetricType.L2,
    })

_, collections = client.list_collections()

_, collection = client.get_collection_info(collection_name)
print(collection)

In [81]:
# status = client.drop_collection(collection_name)
# print(status)

Status(code=0, message='Delete collection successfully!')


In [83]:
with open("/home/vbs2/vbs/L14_336_features_128.pkl", "rb") as f:
    data = pickle.load(f)

In [84]:
features_list = list(sorted(list(data.items()), key=lambda pair: pair[0]))

In [85]:
image_list, vectors = zip(*features_list)
vectors = np.array(list(vectors), dtype=np.float32)
print(vectors.shape)

(2508110, 768)


In [141]:
np.linalg.norm(vectors[230] / np.linalg.norm(vectors[230]))

1.0

In [86]:
with open("/home/vbs2/vbs/FIRST-Server/helpers/first/keyframes_list.txt") as f:
    images = [line.strip() for line in f]

assert list(image_list) == images

In [88]:
%%time
# Insert by batch since milvus only support 256MB inserts at a time
n = vectors.shape[0]
bs = n // 256
all_ids = []

for i in range(0, n, bs):
    batch = vectors[i:i+bs]
    status, batch_ids = client.insert(collection_name=collection_name, records=batch.astype(np.float32))
    while not status.OK():
        status, batch_ids = client.insert(collection_name=collection_name, records=batch.astype(np.float32))
    all_ids.extend(batch_ids)

CPU times: user 2min 50s, sys: 4.61 s, total: 2min 54s
Wall time: 10min 33s


In [128]:
with open("/home/vbs2/vbs/FIRST-Server/helpers/first/milvus_ids.txt", 'w') as f:
    print(*all_ids, sep='\n', file=f)

In [None]:
status, stats = client.get_collection_stats(collection_name)
print(stats)

In [89]:
%%time
client.create_index(collection_name, IndexType.IVF_FLAT, params={"nlist": 2048})

CPU times: user 1.17 s, sys: 92.8 ms, total: 1.26 s
Wall time: 25min 26s


Status(code=0, message='Build index successfully!')

In [4]:
status, index = client.get_index_info(collection_name)
print(index)

(collection_name='VBS_full', index_type=<IndexType: IVF_FLAT>, params={'nlist': 2048})


In [124]:
%%time
search_param = {
    "nprobe": 1024
}


param = {
    'collection_name': collection_name,
    'query_records': np.random.rand(1, 768).astype(np.float32),
    'top_k': 2000,
    'params': search_param,
}

status, results = client.search(**param)
if status.OK():
    print(results.shape)

(1, 2000)
CPU times: user 3.64 ms, sys: 0 ns, total: 3.64 ms
Wall time: 148 ms
