## 导入包

In [1]:
import random
import numpy as np

from milvus import Milvus, IndexType, MetricType, Status

TypeError: Descriptors cannot be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [1]:
from milvus import DataType
import numpy as np
from pymilvus_orm import Collection, CollectionSchema, FieldSchema, DataType

ModuleNotFoundError: No module named 'pymilvus_orm'

## 连接milvus服务

In [2]:

myclient = pymilvus.MilvusClient(
    uri="http://localhost:19530",
    host="127.0.0.1",
    user='root',
    db_name= '',
    token = '',
    password='89io*(IO'
)


AttributeError: module 'pymilvus' has no attribute 'MilvusClient'

## 创建collection
collection必须要有一个field是主键，一个field是存储向量，另外还可以创建其他类型的field

In [3]:
 
field_name = "example_field"
collection_name = "example_collection"

def create_collection(field_name,collection_name):
 
    # 主键
    field_id = FieldSchema(name="field_id", dtype=DataType.INT64, is_primary=True, auto_id=True)
    # 向量检索的field
    field = FieldSchema(name=field_name, dtype=DataType.FLOAT_VECTOR, dim=8)
    cat_id = FieldSchema(name="cat_id", dtype=DataType.INT64)
    schema = CollectionSchema(fields=[field_id, field, cat_id], description="example collection")

    collection = Collection(name=collection_name, schema=schema)
    # print(pymilvus_orm.utility.get_connection().has_collection(collection_name))
    # print(pymilvus_orm.utility.get_connection().list_collections())

    return collection

collection还可以将数据存储在不同的分区。默认是有一个"Default partition"的分区，不指定分区的话，都会存储在default分区。

In [4]:
def create_partition(collection ):
    """
    为collection创建分区
    :param collection:
    :return:
    """
    partition_name = "example_partition"
    partition = collection.create_partition(partition_name)

    print(collection.partitions)

    print(collection.has_partition(partition_name))
 

## 插入数据
- 插入数据可以根据实际需要，是否插入到特定的分区。当前版本数据格式只能是list，numpy的ndarray也不行；
- 如果主键设置自增auto_id=True，则无需添加主键的值了；
- 数据插入之后，它是存储在内存中，还需要将其传输到磁盘中，下次可以继续使用。
 

In [ ]:
def insert(collection: Collection, partition_name=None):
    """
    插入数据
    :param partition_name: 指定插入的分区
    :param collection:
    :return:
    """
    # 由于主键field_id设置自增，所以无需插入
    mr = collection.insert([
        # 只能是list
        np.random.random([10000, 8]).tolist(),  # 向量
        np.random.randint(0, 10, [10000]).tolist()  # cat_id
    ], partition_name=partition_name)
    print(mr.primary_keys)

    # 插入的数据存储在内存，需要传输到磁盘
    pymilvus_orm.utility.get_connection().flush([collection.name])

 

## 创建索引
- 为向量对应的field创建索引，目的就是实现高效的向量邻近搜索。
- 目前支持的索引类型包括：
- ![索引类型](https://pic1.zhimg.com/v2-e0ab34436d6f9ded6595a41595556818_b.jpg)

In [7]:
def create_index(collection: Collection):
    """
    为向量检索的field 创建索引
    :param collection:
    :return:
    """
    index_param = {
        "metric_type": "L2",
        "index_type": "IVF_FLAT",
        "params": {"nlist": 1024}
    }
    collection.create_index(field_name=field_name, index_params=index_param)
    print(collection.index().params)
 

NameError: name 'Collection' is not defined

## 查询
- 除了一般的向量搜索，milvus还支持带表达式的标量过滤功能。
- 例如下方代码中，就增加expr="cat_id==2"条件：即只在cat_id为2的向量中进行检索（上面创建了名称为cat_id的field）。
- 但是目前还不支持字符串的过滤功能，官方后续会增加；支持关系运算符（如==, >）、逻辑运算符(AND &&, OR ||)和IN运算符。


In [ ]:
 
def search(collection: Collection, partition_name=None):
    """
    向量检索
    :param collection:
    :param partition_name: 检索指定分区的向量
    :return:
    """
    # 将collection加载到内存
    collection.load()
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    # 向量搜索
    result = collection.search(data=np.random.random([5, 8]).tolist(),
                               anns_field=field_name, param=search_params, limit=10,
                               partition_names=[partition_name] if partition_name else None)
    print(result[0].ids)
    print(result[0].distances)

    # 表达式：只检索cat_id为2的向量
    result = collection.search(data=np.random.random([5, 8]).tolist(),
                               anns_field=field_name, param=search_params, limit=10,
                               expr="cat_id==2")
    print(result[0].ids)
    print(result[0].distances)

## 删除数据
- 目前支持以下三种删除操作

In [ ]:
def drop(collection: Collection):
    # 删除collection
    collection.drop()
    # 删除索引
    collection.drop_index()
    # 删除分区
    collection.drop_partition("partition_name")
 

## 释放

In [ ]:
def release(collection: Collection = None):
    # 从内存中释放collection
    if collection:
        collection.release()

    # 断开与服务器的连接，释放资源
    connections.disconnect("default")
 