## 对比FLAT索引性能

In [1]:
# 连接部署在Docker上的Standalone版本Milvus
from pymilvus import connections, db

conn = connections.connect(host="127.0.0.1", port=19530)

# 查看当前已有数据库
db.list_database()

['FLAT_100k_128',
 'HNSW_100k_128',
 'IVF_PQ_100k_128',
 'default',
 'wwf_mac_docker',
 'RESH_100k_128']

In [2]:
# 创建测试RESH的数据库
database = db.create_database("FLAT_100k_128")

# 列出所有数据库
db.list_database()

RPC error: [create_database], <MilvusException: (code=65535, message=database already exist: FLAT_100k_128)>, <Time:{'RPC start': '2024-11-05 21:03:30.569458', 'RPC error': '2024-11-05 21:03:30.571816'}>


MilvusException: <MilvusException: (code=65535, message=database already exist: FLAT_100k_128)>

In [6]:
# 创建Collection
from pymilvus import MilvusClient, DataType

client = MilvusClient(
    uri="http://localhost:19530"
)

# client.create_collection(
#     collection_name="FLAT_100k_128_collection",
#     dimension=128,
#     index_params="FLAT",
#     # index_params="IVF_FLAT",
#     # index_params="IVF_SQ8",
#     # index_params="IVF_PQ",
#     # index_params="HNSW",
#     
#     metric_type="L2"
#     # metric_type="IP"
#     # metric_type="COSINE"
# )
# 
# res = client.get_load_state(
#     collection_name="FLAT_100k_128_collection"
# )
# 
# print(res)

In [3]:
import numpy as np

# 加载向量数据
ResNet50_features = np.load('D:\\Python_Project\\Learned_Index\\Milvus\\RESH\\Original_data\\128_100k_ResNet50_vector.npy')

# 打印每个特征向量的形状
print("Features shape:", ResNet50_features.shape)

# 为每个特征生成唯一的 ID 列表
ResNet50_features_ids = list(range(len(ResNet50_features)))

# 随机选择 n 个向量
n = 100  # 你可以修改 n 的值
random_indices = np.random.choice(len(ResNet50_features), size=n, replace=False)
random_vectors = ResNet50_features[random_indices]

# 将随机选出的向量转为 [[ ]] 的形式
random_vectors_list = random_vectors.tolist()

# 打印随机选出的向量
print(f"Randomly selected {n} vectors in [[ ]] format:", random_vectors_list)

Features shape: (100000, 128)
Randomly selected 100 vectors in [[ ]] format: [[-0.31843292713165283, 0.38203275203704834, -0.03642767667770386, -0.6445465087890625, 0.13534516096115112, 0.21610519289970398, -0.09315978735685349, -0.10104375332593918, 0.2861758768558502, -0.4295685887336731, -0.24169908463954926, -0.10728038102388382, -0.3620072305202484, -0.09964197129011154, -0.20797693729400635, 0.03354489058256149, 0.06802722811698914, 0.4613555073738098, 0.06697122752666473, -0.3451226055622101, 0.7112191319465637, -0.01660383678972721, 0.3230225741863251, 0.17623618245124817, 0.5096321105957031, 0.0006500380113720894, -0.2657259404659271, -0.5504412055015564, -0.34622329473495483, 0.19337019324302673, 0.16623206436634064, 0.31427156925201416, -0.11549989134073257, 0.4678223133087158, 0.5120296478271484, 0.3858036994934082, 0.3170291483402252, -0.326404333114624, 0.14718541502952576, -0.006032692268490791, 0.07370764017105103, 0.3245243728160858, 0.6832085847854614, -0.084693059325

In [6]:
# 通用的插入函数，用于将高维向量插入到Collection
def insert_to_milvus(collection_name, features, ids):
    # features 是一个 NumPy 数组，直接使用
    # 构建 Milvus 所需的插入数据结构
    data = [{"id": ids[i], "vector": features[i].tolist()} for i in range(len(features))]

    # 插入数据到集合中 (假设 client 是正确连接的 Milvus 客户端实例)
    res = client.insert(collection_name=collection_name, data=data)

    # 打印插入结果
    print(f"Inserting into {collection_name} completed with result: {len(res)}")

# 插入特征到 _collection
insert_to_milvus("FLAT_100k_128_collection", ResNet50_features, ResNet50_features_ids)

Inserting into FLAT_100k_128_collection completed with result: 3


In [4]:
import numpy as np

# 加载查询负载--完全随机负载100
Sample_features = np.load('D:\Python_Project\Learned_Index\Milvus\RESH\Query\Completely_random\sampled_100_vectors.npy')

# 打印每个特征向量的形状
print("Sample Features Shape:", Sample_features.shape)

# # 为每个特征生成唯一的 ID 列表
# Sample_features_ids = list(range(len(Sample_features)))
# 
# # 随机选择 n 个向量
# n = 100  # 你可以修改 n 的值
# random_indices = np.random.choice(len(Sample_features), size=n, replace=False)
# random_vectors = Sample_features[random_indices]

# 将随机选出的向量转为 [[ ]] 的形式
Sample_features_list = Sample_features.tolist()

# 打印随机选出的向量
# print(f"Randomly selected {n} vectors in [[ ]] format:", random_vectors_list)

Sample Features Shape: (100, 128)


In [51]:
# 批量向量搜索 10NN
# import json
# res = client.search(
#     collection_name="FLAT_100k_128_collection",
#     data=Sample_features_list,
#     limit=10,
#     search_params={"metric_type": "L2",} 
# )
# result = json.dumps(res, indent=4)
# result = json.dumps(res)
# print(result)
import time

# 记录开始时间
start_time = time.time()

# 执行搜索操作
res = client.search( 
    collection_name="FLAT_100k_128_collection",
    data=Sample_features_list,
    limit=10,
    search_params={"metric_type": "L2",}
)

# 记录结束时间
end_time = time.time()

# 计算并打印运行时间（单位：毫秒）
elapsed_time = (end_time - start_time) * 1000  # 转换为毫秒
print(f"运行时间: {elapsed_time:.2f} 毫秒")


运行时间: 48.24 毫秒
