连接数据库

In [13]:
from pymilvus import MilvusClient, DataType, Function, FunctionType

In [14]:
client = MilvusClient(
    uri="http://localhost:19530",
)

设计schema

In [15]:
# 创建启用动态字段的schema
schema = client.create_schema(
    auto_id=True,
    enable_dynamic_field=True,
)

In [16]:
schema.add_field("id", DataType.INT64, is_primary=True)
schema.add_field("text", DataType.VARCHAR, max_length=1500)
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=1024)
schema.add_field("timestamp",DataType.INT64)

{'auto_id': True, 'description': '', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 1500}}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 1024}}, {'name': 'timestamp', 'description': '', 'type': <DataType.INT64: 5>}], 'enable_dynamic_field': True}

In [17]:
text_embedding_function = Function(
    name="bgem3_embedding",                  # Unique identifier for this embedding function
    function_type=FunctionType.TEXTEMBEDDING, # Type of embedding function
    input_field_names=["text"],           # Scalar field to embed
    output_field_names=["vector"],             # Vector field to store embeddings
    params={                                  # Provider-specific configuration (highest priority)
        "provider": "siliconflow",                # Must be set to "siliconflow"
        "model_name": "BAAI/bge-m3",    # Specifies the SiliconFlow
    }
)

In [18]:
schema.add_function(text_embedding_function)

{'auto_id': True, 'description': '', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 1500}}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 1024}, 'is_function_output': True}, {'name': 'timestamp', 'description': '', 'type': <DataType.INT64: 5>}], 'enable_dynamic_field': True, 'functions': [{'name': 'bgem3_embedding', 'description': '', 'type': <FunctionType.TEXTEMBEDDING: 2>, 'input_field_names': ['text'], 'output_field_names': ['vector'], 'params': {'provider': 'siliconflow', 'model_name': 'BAAI/bge-m3'}}]}

In [19]:
index_params = client.prepare_index_params()

index_params.add_index(
    field_name="vector",
    index_type="AUTOINDEX",
    metric_type="COSINE"
)

In [20]:
if 'aoi_memories' not in client.list_databases():
    client.create_database(db_name="aoi_memories")

创collection前记得换数据库

In [21]:
client.use_database(db_name="aoi_memories")

In [22]:
if "diaries" in client.list_collections():
    client.drop_collection("diaries")
client.create_collection(
    collection_name='diaries',
    schema=schema,
    index_params=index_params
)

现在已经建好db：aoi_memories与collection:memory了！

In [23]:
client.describe_collection("diaries")

{'collection_name': 'diaries',
 'auto_id': True,
 'num_shards': 1,
 'description': '',
 'fields': [{'field_id': 100,
   'name': 'id',
   'description': '',
   'type': <DataType.INT64: 5>,
   'params': {},
   'auto_id': True,
   'is_primary': True},
  {'field_id': 101,
   'name': 'text',
   'description': '',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 1500}},
  {'field_id': 102,
   'name': 'vector',
   'description': '',
   'type': <DataType.FLOAT_VECTOR: 101>,
   'params': {'dim': 1024},
   'is_function_output': True},
  {'field_id': 103,
   'name': 'timestamp',
   'description': '',
   'type': <DataType.INT64: 5>,
   'params': {}}],
 'functions': [{'name': 'bgem3_embedding',
   'id': 100,
   'description': '',
   'type': <FunctionType.TEXTEMBEDDING: 2>,
   'params': {'provider': 'siliconflow', 'model_name': 'BAAI/bge-m3'},
   'input_field_names': ['text'],
   'input_field_ids': [101],
   'output_field_names': ['vector'],
   'output_field_ids': [102]}],
 'aliases': 

接下来，让我们插入diaries中的数据到数据库中

开始插入吧。

In [24]:
# client.insert('memory',data)