# Milvus CURD

In [99]:
from pymilvus import (
    connections,
    utility,
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
    db,
    Partition
)

import random
import datetime


## Connection

In [100]:
conn = connections.connect(
  alias="default",
  host='18.171.129.243',
  port='80',
  user="root",
  password="NetMindMilvusDB",
)

## Create: Database, Collection, Partition, Entities

### Create Database

In [80]:
# create database
# database = db.create_database("gaming2")

db.using_database("test_milvus_db")

db.list_database()

['default', 'gaming', 'gaming2', 'test_milvus_db']

In [101]:
db.using_database("test_milvus_db")

In [82]:
utility.list_collections()

['test_milvus_coll', 'test_people_collection']

In [110]:
collection = Collection('test_milvus_coll')
collection.schema

{'auto_id': False, 'description': '', 'fields': [{'name': 'docstore_id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'meta_data_dict', 'description': '', 'type': <DataType.JSON: 23>}, {'name': 'description', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 1536}}, {'name': 'full_content', 'description': '', 'type': <DataType.JSON: 23>}]}

In [119]:
res = collection.query(
  expr = "meta_data_dict['importance'] == '8'",
  partition="default",
  offset = 0,
  limit = 10, 
  output_fields = ["id", "create_time"]
)

RPC error: [query], <MilvusException: (code=65535, message=field id not exist)>, <Time:{'RPC start': '2024-01-30 15:21:04.411416', 'RPC error': '2024-01-30 15:21:04.426345'}>


MilvusException: <MilvusException: (code=65535, message=field id not exist)>

### Create Collection

In [14]:
# collection schema

create_time = FieldSchema(
  name="create_time",
  dtype=DataType.VARCHAR,
  max_length=50,
)

last_modified = FieldSchema(
  name="last_modified",
  dtype=DataType.VARCHAR,
  max_length=50,
)

memory_id = FieldSchema(
    name="id",
    dtype=DataType.INT64,
    is_primary=True,
)

character_id = FieldSchema(
    name="character_id",
    dtype=DataType.INT64
)

importance = FieldSchema(
  name="importance",
  dtype=DataType.INT8,
)

category = FieldSchema(
  name="category",
  dtype=DataType.VARCHAR,
  max_length=50,
)

vector = FieldSchema(
  name="memory_vector",
  dtype=DataType.FLOAT_VECTOR,  # BINARY_VECTOR or FLOAT_VECTOR
  default_value=[1.0] * 1024,
  dim=1024
)

content = FieldSchema(
    name="real_memory",
    dtype=DataType.VARCHAR,
    max_length=9999,
    default_value="Unknown"
)

In [15]:
# create collection

coll_schema = CollectionSchema(
  fields=[memory_id, character_id, importance, create_time, category, vector, content, last_modified],
  description="memory",
  
  # 不开启 dynamic 意味着之后插入的所有数据实体都要与 schema 匹配；而开启后，可以插入具有新字段的实体，有点类似 nonsql
  # https://milvus.io/docs/dynamic_schema.md#Create-collection-with-dynamic-schema-enabled
  enable_dynamic_field=True
)

collection_name = "character_memory"

collection = Collection(
    name=collection_name,
    schema=coll_schema,
    using='default',  # 在哪个 server 上创建 collection
    shards_num=1,  # 把写入操作分配到 2 个不同的 node / server 上并行进行
    
)

### Create Partition

In [16]:
collection = Collection("character_memory")      # Get an existing collection.
collection.create_partition("observation")

{"name":"observation","collection_name":"character_memory","description":""}

### Create Entities

In [17]:
start_date = datetime.date(2018, 1, 1)
end_date = datetime.date(2024, 12, 31)

def random_date():
    """Generate a random date between `start_date` and `end_date`"""
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    return random_date

In [18]:
entities = [
  [i for i in range(3000)],
  [i for i in range(3000)],
  [random.choice([1, 2]) for i in range(3000)],
  [random_date().strftime('%Y-%m-%d') for _ in range(3000)],
  ["observation" for _ in range(3000)],
  [[random.randint(0, 10)] * 1024 for _ in range(3000)],
  ["The test content" for _ in range(3000)],
  [random_date().strftime('%Y-%m-%d') for _ in range(3000)],
]

### Insert into Collection and Partition

In [19]:
collection = Collection("character_memory")  
collection.insert(entities)  # 插入 collection

collection.insert(entities, 'observation')  # 插入 partition

(insert count: 3000, delete count: 0, upsert count: 0, timestamp: 447357770371170305, success count: 3000, err count: 0)

## Update

Milvus 不支持 update 操作

1. 需要删除后，再插入

2. upsert

In [20]:
collection = Collection("character_memory")      # Get an existing collection.
collection.delete("id == 10")

# ids = collection.insert([...])

(insert count: 0, delete count: 1, upsert count: 0, timestamp: 0, success count: 0, err count: 0)

In [None]:
collection = Collection("character_memory") # Get an existing collection.
mr = collection.upsert(entities)

## Read

### Database

In [21]:
db.list_database()

['gaming2', 'default', 'gaming']

### Collection

In [22]:
collection.schema                # Return the schema.CollectionSchema of the collection.
collection.description           # Return the description of the collection.
collection.name                  # Return the name of the collection.
collection.is_empty              # Return the boolean value that indicates if the collection is empty.
collection.num_entities          # Return the number of entities in the collection.
collection.primary_field         # Return the schema.FieldSchema of the primary key field.
collection.partitions            # Return the list[Partition] object.
collection.indexes               # Return the list[Index] object.

[]

### Entities

In [23]:
index_params = {
  "metric_type":"L2",
  "index_type":"IVF_FLAT",
  "params":{"nlist":2}
}

collection = Collection("character_memory")      
collection.create_index(
  field_name="memory_vector", 
  index_params=index_params
)

utility.index_building_progress("character_memory")

partition = Partition("character_memory", "observation")       # Get an existing partition.
partition.load(replica_number=1)

In [24]:
res = collection.query(
  expr = "create_time >= '2021-06-15'",
  partition="observation",
  offset = 0,
  limit = 10, 
  output_fields = ["id", "create_time"]
)

res

[{'id': 0, 'create_time': '2024-12-09'},
 {'id': 1, 'create_time': '2022-12-10'},
 {'id': 2, 'create_time': '2024-12-17'},
 {'id': 4, 'create_time': '2024-01-30'},
 {'id': 5, 'create_time': '2021-11-09'},
 {'id': 6, 'create_time': '2022-03-13'},
 {'id': 8, 'create_time': '2022-02-04'},
 {'id': 9, 'create_time': '2023-12-31'},
 {'id': 11, 'create_time': '2024-08-11'},
 {'id': 13, 'create_time': '2022-07-21'}]

In [27]:
search_params = {
    "metric_type": "L2", 
    "offset": 0, 
    "ignore_growing": False, 
    "params": {"nprobe": 10}
}

results = collection.search(
    data=[[0.1] * 1024], 
    anns_field="memory_vector", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
    # set the names of the fields you want to 
    # retrieve from the search result.
    output_fields=['id', 'real_memory'],
    consistency_level="Strong"
)

results[0][0]
partition.release()


## Delete: Database, Collection, Partition, Entities

In [29]:
db.drop_database("character_memory")

utility.drop_collection("new_collection")

collection.drop_partition(partition_name="observation")


collection.delete("id == 10", partition_name="observation")

RPC error: [delete], <MilvusException: (code=200, message=Failed to get partition id: partition not found[partition=observation])>, <Time:{'RPC start': '2024-01-29 13:23:18.873518', 'RPC error': '2024-01-29 13:23:18.889841'}>


MilvusException: <MilvusException: (code=200, message=Failed to get partition id: partition not found[partition=observation])>

## Dis-connection

In [3]:
connections.disconnect(alias="default")

# MongoDB CURD

## Connection

In [93]:
from urllib.parse import quote_plus
from pymongo import MongoClient

# 用户名和密码
username = "NetMind"
password = "NetMindMongoDB"

# 将用户名和密码进行转义
escaped_username = quote_plus(username)
escaped_password = quote_plus(password)

client = MongoClient(f"mongodb://{escaped_username}:{escaped_password}@18.171.129.243:27017")

In [61]:
# client = MongoClient('mongodb://18.171.129.243:27017')

## Create: Database, Collection

In [94]:
db = client['test_db1']

if 'mycollection1' not in db.list_collection_names():
    collection = db.create_collection('mycollection1')

collection = db['mycollection1']

In [95]:
# 列出所有 database
client.list_database_names()

['admin',
 'config',
 'local',
 'test_attribute_storage',
 'test_db',
 'test_db1',
 'test_people_collection',
 'test_people_partition',
 'zips-db']

In [96]:
# 列出 database 里所有 collection
db = client['test_people_collection']
db.list_collection_names()

['test_collection', 'registered_attributes', 'milvus_data']

In [97]:
db.drop_collection('milvus_data')

{'nIndexesWas': 1, 'ns': 'test_people_collection.milvus_data', 'ok': 1.0}

In [98]:
db.drop_collection('registered_attributes')

{'nIndexesWas': 1,
 'ns': 'test_people_collection.registered_attributes',
 'ok': 1.0}

In [45]:
collection = client['test_db']["mycollection"]

# 插入单个文档
document = {"name": "John", "age": 30}
inserted_id = collection.insert_one(document).inserted_id

# 插入多个文档
documents = [
    {"name": "Alice", "age": 25},
    {"name": "Bob", "age": 35}
]
inserted_ids = collection.insert_many(documents).inserted_ids

## Read

In [46]:
result = collection.find_one({"name": "John"})
print(result)

# 查询多个文档
results = collection.find()
for document in results:
    print(document)

{'_id': ObjectId('65b7d8c9418dfdd8fc1c7c19'), 'name': 'John', 'age': 30}
{'_id': ObjectId('65b7d8c9418dfdd8fc1c7c19'), 'name': 'John', 'age': 30}
{'_id': ObjectId('65b7d8c9418dfdd8fc1c7c1a'), 'name': 'Alice', 'age': 25}
{'_id': ObjectId('65b7d8c9418dfdd8fc1c7c1b'), 'name': 'Bob', 'age': 35}


In [49]:
collection = client['test_db']["mycollection1"]

In [50]:
results = collection.find()
results

<pymongo.cursor.Cursor at 0x7fa448746ec0>

In [48]:
results = collection.find()
for document in results:
    print(document['name'])

John
Alice
Bob


In [54]:
# 查询所有数据
cursor = collection.find({})

# 遍历并打印数据
for document in cursor:
    print(document)

{'_id': ObjectId('65b285e9651391e5b6934d69'), 'name': 'John', 'age': 30}
{'_id': ObjectId('65b285e9651391e5b6934d6a'), 'name': 'Alice', 'age': 25}
{'_id': ObjectId('65b285e9651391e5b6934d6b'), 'name': 'Bob', 'age': 35}


## Update

In [55]:
collection.update_one({"name": "John"}, {"$set": {"age": 31}})

# 更新多个文档
collection.update_many({"age": {"$lt": 30}}, {"$set": {"status": "young"}})

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

In [56]:
# 查询所有数据
cursor = collection.find({})

# 遍历并打印数据
for document in cursor:
    print(document)

{'_id': ObjectId('65b285e9651391e5b6934d69'), 'name': 'John', 'age': 31}
{'_id': ObjectId('65b285e9651391e5b6934d6a'), 'name': 'Alice', 'age': 25, 'status': 'young'}
{'_id': ObjectId('65b285e9651391e5b6934d6b'), 'name': 'Bob', 'age': 35}


## Delete

In [57]:
# 删除单个文档
collection.delete_one({"name": "Alice"})

# 删除多个文档
collection.delete_many({"age": {"$gt": 40}})

DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)

In [58]:
# 查询所有数据
cursor = collection.find({})

# 遍历并打印数据
for document in cursor:
    print(document)

{'_id': ObjectId('65b285e9651391e5b6934d69'), 'name': 'John', 'age': 31}
{'_id': ObjectId('65b285e9651391e5b6934d6b'), 'name': 'Bob', 'age': 35}


In [59]:
# 删除 database 和 collection

db['mycollection'].drop()

client.drop_database('test_db')

## Dis-connection

In [60]:
client.close()