### 图谱入库

In [None]:
# 入库
from pymilvus import MilvusClient, DataType
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
import json

embedding_model = SentenceTransformer(
    "/path/to/Qwen3-Embedding-4B",
    device="cuda:7"
)
dim = 2560

client = MilvusClient("http://localhost:19530")

# 边
schema = client.create_schema()
schema.add_field(field_name="uuid", datatype=DataType.INT64, is_primary=True, auto_id=True)
schema.add_field(field_name="triple", datatype=DataType.JSON)
schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=dim)

index_params = client.prepare_index_params()
index_params.add_index(
    field_name="embedding",
    index_name="embedding_index",
    index_type="AUTOINDEX",
    metric_type="COSINE"
)

collection_name = "triples"
if client.has_collection(collection_name):
    client.drop_collection(collection_name)
client.create_collection(
    collection_name=collection_name,
    schema=schema,
    index_params=index_params
)

batch_size = 64
with open('./all_relations.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
data = [data[i: i+batch_size] for i in range(0, len(data), batch_size)]
for batch in tqdm(data):
    embeddings = embedding_model.encode([json.dumps(item, ensure_ascii=False) for item in batch])
    insert_item = [{"triple": item, "embedding": embeddings[i]} for i, item in enumerate(batch)]
    client.insert(collection_name=collection_name, data=insert_item)

# 实体
schema = client.create_schema()
schema.add_field(field_name="uuid", datatype=DataType.INT64, is_primary=True, auto_id=True)
schema.add_field(field_name="entity", datatype=DataType.JSON)
schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=dim)

index_params = client.prepare_index_params()
index_params.add_index(
    field_name="embedding",
    index_name="embedding_index",
    index_type="AUTOINDEX",
    metric_type="COSINE"
)

collection_name = "entities"
if client.has_collection(collection_name):
    client.drop_collection(collection_name)
client.create_collection(
    collection_name=collection_name,
    schema=schema,
    index_params=index_params
)

batch_size = 64
with open('./all_entities.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
data = [data[i: i+batch_size] for i in range(0, len(data), batch_size)]
for batch in tqdm(data):
    embeddings = embedding_model.encode([json.dumps(item, ensure_ascii=False) for item in batch])
    insert_item = [{"entity": item, "embedding": embeddings[i]} for i, item in enumerate(batch)]
    client.insert(collection_name=collection_name, data=insert_item)

### 病案入库

In [None]:
# 入库
from pymilvus import MilvusClient, DataType
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
import json

embedding_model = SentenceTransformer(
    "/path/to/Qwen3-Embedding-4B",
    device="cuda:6"
)
dim = 2560

client = MilvusClient("http://localhost:19530")

schema = client.create_schema()
schema.add_field(field_name="uuid", datatype=DataType.INT64, is_primary=True, auto_id=True)
schema.add_field(field_name="full_case", datatype=DataType.JSON)
schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=dim)

index_params = client.prepare_index_params()
index_params.add_index(
    field_name="embedding",
    index_name="embedding_index",
    index_type="AUTOINDEX",
    metric_type="COSINE"
)

collection_name = "medical_cases"
if client.has_collection(collection_name):
    client.drop_collection(collection_name)
client.create_collection(
    collection_name=collection_name,
    schema=schema,
    index_params=index_params
)

batch_size = 8
with open('./all_medical_cases.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
data = [data[i: i+batch_size] for i in range(0, len(data), batch_size)]
for batch in tqdm(data):
    embeddings = embedding_model.encode([item['病案内容'] for item in batch])
    insert_item = [{"full_case": item, "embedding": embeddings[i]} for i, item in enumerate(batch)]
    client.insert(collection_name=collection_name, data=insert_item)