## 使用 Milvus 向量库查询索引

官网：https://milvus.io/docs/milvus_lite.md

样例：https://github.com/milvus-io/milvus-lite/blob/main/examples/example.py

In [1]:
import random
from milvus import default_server
from pymilvus import (
    connections,
    FieldSchema, CollectionSchema, DataType,
    Collection,
    utility
)

In [2]:
# Optional, if you want store all related data to specific location
# default it wil using %APPDATA%/milvus-io/milvus-server
default_server.set_base_dir('db')

# star you milvus server
default_server.start()

_HOST = '127.0.0.1'
# The port may be changed, by default it's 19530
_PORT = default_server.listen_port

# Const names
_COLLECTION_NAME = 'test'

# Vector parameters
_DIM = 1536

# Index parameters
_METRIC_TYPE = 'L2'
_INDEX_TYPE = 'IVF_FLAT'
_NLIST = 1024
_NPROBE = 16
_TOPK = 8

In [3]:
# create a connection
print(f"\nCreate connection...")
connections.connect(host=_HOST, port=_PORT)
print(f"\nList connections:")
print(connections.list_connections())

# show collections
print(utility.list_collections())


Create connection...

List connections:
[('default', <pymilvus.client.grpc_handler.GrpcHandler object at 0x00000217ED0DF850>)]
['test']


In [4]:
collection = Collection(_COLLECTION_NAME)

# load data to memory
collection.load()

In [15]:
source = 3
company_id = 2
month = 202310

print(collection.num_entities)

search_param = {
    "data": [[(source*1e9+company_id*1e8+month*10)*1e-9 for _ in range(_DIM)]],
    "anns_field": 'embedding',
    "param": {"metric_type": _METRIC_TYPE, "params": {"nprobe": _NPROBE}},
    "limit": _TOPK,
    "expr": '(company == 3 or company==1) and source == 3',
    "output_fields": ["id", "month", "company", "source"]
}
results = collection.search(**search_param)

for i, result in enumerate(results):
    print("\nSearch result for {}th vector: ".format(i))
    for j, res in enumerate(result):
        print("Top {}: {}".format(j, res))

4800

Search result for 0th vector: 
Top 0: id: 1204, distance: 15.360064506530762, entity: {'source': 3, 'id': 1204, 'month': 202310, 'company': 1}
Top 1: id: 1206, distance: 15.360064506530762, entity: {'source': 3, 'id': 1206, 'month': 202310, 'company': 1}
Top 2: id: 1207, distance: 15.360064506530762, entity: {'source': 3, 'id': 1207, 'month': 202310, 'company': 1}
Top 3: id: 1203, distance: 15.360064506530762, entity: {'source': 3, 'id': 1203, 'month': 202310, 'company': 1}
Top 4: id: 1201, distance: 15.360064506530762, entity: {'source': 3, 'id': 1201, 'month': 202310, 'company': 1}
Top 5: id: 1205, distance: 15.360064506530762, entity: {'source': 3, 'id': 1205, 'month': 202310, 'company': 1}
Top 6: id: 1202, distance: 15.360064506530762, entity: {'source': 3, 'id': 1202, 'month': 202310, 'company': 1}
Top 7: id: 1200, distance: 15.360064506530762, entity: {'source': 3, 'id': 1200, 'month': 202310, 'company': 1}


In [None]:
# release memory
collection.release()

# # drop collection index
# collection.drop_index()

# # drop collection
# collection.drop()