## 对比FLAT索引性能

In [1]:
# 连接部署在Docker上的Standalone版本Milvus
from pymilvus import connections, db

conn = connections.connect(host="127.0.0.1", port=19530)

# 查看当前已有数据库
db.list_database()

['default',
 'wwf_mac_docker',
 'RESH_100k_128',
 'FLAT_100k_128',
 'HNSW_100k_128']

In [2]:
# 创建测试RESH的数据库
database = db.create_database("IVF_PQ_100k_128")

# 列出所有数据库
db.list_database()

['wwf_mac_docker',
 'RESH_100k_128',
 'FLAT_100k_128',
 'HNSW_100k_128',
 'IVF_PQ_100k_128',
 'default']

In [3]:
# 创建Collection
from pymilvus import MilvusClient, DataType

client = MilvusClient(
    uri="http://localhost:19530"
)

client.create_collection(
    collection_name="IVF_PQ_100k_128_collection",
    dimension=128,
    index_params="IVF_PQ",
    # index_params="IVF_FLAT",
    # index_params="IVF_SQ8",
    # index_params="IVF_PQ",
    # index_params="HNSW",
    
    metric_type="L2"
    # metric_type="IP"
    # metric_type="COSINE"
)

res = client.get_load_state(
    collection_name="IVF_PQ_100k_128_collection"
)

print(res)

{'state': <LoadState: Loaded>}


In [4]:
import numpy as np

# 加载向量数据
ResNet50_features = np.load('D:\\Python_Project\\Learned_Index\\Milvus\\RESH\\Original_data\\128_100k_ResNet50_vector.npy')

# 打印每个特征向量的形状
print("Features shape:", ResNet50_features.shape)

# 为每个特征生成唯一的 ID 列表
ResNet50_features_ids = list(range(len(ResNet50_features)))

# 随机选择 n 个向量
n = 100  # 你可以修改 n 的值
random_indices = np.random.choice(len(ResNet50_features), size=n, replace=False)
random_vectors = ResNet50_features[random_indices]

# 将随机选出的向量转为 [[ ]] 的形式
random_vectors_list = random_vectors.tolist()

# 打印随机选出的向量
print(f"Randomly selected {n} vectors in [[ ]] format:", random_vectors_list)

Features shape: (100000, 128)
Randomly selected 100 vectors in [[ ]] format: [[-0.2690880298614502, 0.14092440903186798, 0.4268725514411926, -0.7835665941238403, -0.03454005718231201, 0.18969708681106567, -0.2324274629354477, -0.5006119608879089, 0.9216982126235962, -0.27210670709609985, -0.6909586787223816, -0.047334589064121246, -0.9079777598381042, 0.15261590480804443, 0.1450376808643341, 0.45457273721694946, 0.24949556589126587, 0.7876711487770081, -0.37592145800590515, -0.567878007888794, 0.5251071453094482, -0.15146362781524658, 0.014821721240878105, -0.4297426640987396, 0.4374500513076782, -0.5264546275138855, -0.6358568072319031, -0.3587281405925751, -0.6787538528442383, 0.3447459638118744, -0.18217553198337555, 0.45019519329071045, -0.16980431973934174, 0.3532274663448334, 0.035347726196050644, 0.10810759663581848, 0.09151696413755417, -0.040263980627059937, -0.13012248277664185, -0.5698701739311218, -0.7540392279624939, 0.26169753074645996, 0.4164671301841736, -0.106197044253

In [5]:
# 通用的插入函数，用于将高维向量插入到Collection
def insert_to_milvus(collection_name, features, ids):
    # features 是一个 NumPy 数组，直接使用
    # 构建 Milvus 所需的插入数据结构
    data = [{"id": ids[i], "vector": features[i].tolist()} for i in range(len(features))]

    # 插入数据到集合中 (假设 client 是正确连接的 Milvus 客户端实例)
    res = client.insert(collection_name=collection_name, data=data)

    # 打印插入结果
    print(f"Inserting into {collection_name} completed with result: {len(res)}")

# 插入特征到 _collection
insert_to_milvus("IVF_PQ_100k_128_collection", ResNet50_features, ResNet50_features_ids)

Inserting into IVF_PQ_100k_128_collection completed with result: 3


In [6]:
import numpy as np

# 加载查询负载--完全随机负载100
Sample_features = np.load('D:\Python_Project\Learned_Index\Milvus\RESH\Query\Completely_random\sampled_100_vectors.npy')

# 打印每个特征向量的形状
print("Sample Features Shape:", Sample_features.shape)

# # 为每个特征生成唯一的 ID 列表
# Sample_features_ids = list(range(len(Sample_features)))
# 
# # 随机选择 n 个向量
# n = 100  # 你可以修改 n 的值
# random_indices = np.random.choice(len(Sample_features), size=n, replace=False)
# random_vectors = Sample_features[random_indices]

# 将随机选出的向量转为 [[ ]] 的形式
Sample_features_list = Sample_features.tolist()

# 打印随机选出的向量
# print(f"Randomly selected {n} vectors in [[ ]] format:", random_vectors_list)

Sample Features Shape: (100, 128)


In [25]:
# 批量向量搜索 10NN
# import json
# res = client.search(
#     collection_name="FLAT_100k_128_collection",
#     data=Sample_features_list,
#     limit=10,
#     search_params={"metric_type": "L2",} 
# )
# result = json.dumps(res, indent=4)
# result = json.dumps(res)
# print(result)
import time

# 记录开始时间
start_time = time.time()

# 执行搜索操作
res = client.search( 
    collection_name="IVF_PQ_100k_128_collection",
    data=Sample_features_list,
    limit=10,
    search_params={"metric_type": "L2",}
)

# 记录结束时间
end_time = time.time()

# 计算并打印运行时间（单位：毫秒）
elapsed_time = (end_time - start_time) * 1000  # 转换为毫秒
print(f"运行时间: {elapsed_time:.2f} 毫秒")


运行时间: 53.47 毫秒
