## 【实操】Milvus的基本功能

### 建表

In [1]:
# 在pymilvus 中，指定一个本地文件名作为 MilvusClient 的 uri 参数将使用 Milvus Lite
# from pymilvus import MilvusClient
# client = MilvusClient("./milvus_demo.db")
# 远程的也同理client = MilvusClient(uri="http://localhost:19530", token="username:password")

### 演示attu工具的使用

In [2]:
# 将 Milvus Lite 作为 Python 模块启动
# 演示attu的界面

from milvus import default_server
from pymilvus import connections, utility

# Start your milvus server
default_server.start()

# Now you can connect with localhost and the given port
# Port is defined by default_server.listen_port
connections.connect(host='127.0.0.1', port=default_server.listen_port)

# Check if the server is ready.
print(utility.get_server_version())

# Stop your milvus server
# default_server.stop()



v2.3.5-lite


### 创建collection并插入数据

In [3]:
# # 以下是如何使用 Milvus Lite 进行文本搜索的简单演示
from pymilvus import MilvusClient

# # client = MilvusClient("./milvus_demo.db")
client = MilvusClient(uri="http://127.0.0.1:19530")
client.create_collection(
    collection_name="demo_collection",
    dimension=384 
)


### 插入示例数据

In [4]:
import numpy as np
docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
]

vectors = [[ np.random.uniform(-1, 1) for _ in range(384) ] for _ in range(len(docs)) ]
data = [ {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"} for i in range(len(vectors)) ]
res = client.insert(
    collection_name="demo_collection",
    data=data
)


### 查询数据

In [5]:
res = client.search(
    collection_name="demo_collection",
    data=[vectors[0]],
    filter="subject == 'history'",
    limit=2,
    output_fields=["text", "subject"],
)
print(res)


data: [[{'id': 0, 'distance': 1.0, 'entity': {'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history'}}, {'id': 1, 'distance': 0.000612135510891676, 'entity': {'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}}]]


In [6]:
res = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["text", "subject"],
)
print(res)

data: ["{'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history', 'id': 0}", "{'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history', 'id': 1}"]


### 其他一些常用功能

In [7]:
# 查看collection信息
schema = client.describe_collection(collection_name="demo_collection")
print("Collection schema:", schema)

Collection schema: {'collection_name': 'demo_collection', 'auto_id': False, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'params': {}, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}], 'functions': [], 'aliases': [], 'collection_id': 458653500473081913, 'consistency_level': 2, 'properties': {}, 'num_partitions': 1, 'enable_dynamic_field': True, 'created_timestamp': 458653503242371075}


In [8]:
# 列出所有集合
collections = client.list_collections()
print("Collections:", collections)

Collections: ['demo_collection']


In [9]:
# 更新数据
# 插入或更新数据，如果 ID 已存在则更新，否则插入
docs = [
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = [[ np.random.uniform(-1, 1) for _ in range(384) ] for _ in range(len(docs)) ]
data = [ {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"} for i in range(len(vectors)) ]
client.upsert(
    collection_name="demo_collection",
    data=data
)


{'upsert_count': 3, 'cost': 0}

In [10]:
#删除数据
res = client.delete(
    collection_name="demo_collection",
    filter="subject == 'history'",
)
print(res)


{'delete_count': 3}


In [11]:
# 删除集合
client.drop_collection(collection_name="demo_collection")

In [13]:
# Stop your milvus server
default_server.stop()

### 索引的操作

In [None]:
## 创建集合
fields = [ FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=False), 
           FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=128) ]
client.create_collection(
    collection_name="example_collection",
    schema= CollectionSchema(fields=fields, enable_dynamic_field=True),
    metric_type="L2",  # 欧几里得距离
)

# 创建 HNSW 索引
index_params={
        "field_name":"vector",
        "index_type": "HNSW",
        "metric_type": "L2",
        "index_name":"vector_index",
        "params": {"M": 16, "efConstruction": 200}
    }
client.create_index(
    collection_name="example_collection",
    index_params = IndexParams(**index_params)
)

# 查看索引信息
index_info = client.describe_index(collection_name="example_collection", index_name = "vector_index")
print("Index info:", index_info)

# 删除索引
# client.drop_index(collection_name="example_collection", index_name = "vector_index")


### 其他一些数据库管理

In [None]:
# 分区
# 权限管理

### AsyncMilvusClient

In [None]:
# AsyncMilvusClient 是 pymilvus 2.5.x 新增的异步客户端，支持异步编程，适合高并发场景。
# 功能与 MilvusClient 类似，但使用 await 关键字