In [10]:
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection, Index
from sentence_transformers import SentenceTransformer

# Conectarse a Milvus
connections.connect("default", host="127.0.0.1", port="19530")

# Definir el esquema para la colección
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=768)  # Asumiendo 768 dimensiones para BERT
]

schema = CollectionSchema(fields, "Embeddings collection schema")
collection = Collection(name="embeddings_collection", schema=schema)

# Insertar algunos vectores
model = SentenceTransformer('bert-base-nli-mean-tokens')

sentences = ["Los perros comen comida para perro", "Batman", "El agua moja"]
embeddings = model.encode(sentences)

# Convertir embeddings a lista para la inserción
embeddings = embeddings.tolist()

# Inserción en Milvus
insert_result = collection.insert([embeddings])

# Crear un índice para la colección
index_params = {
    "index_type": "IVF_FLAT",  # Puedes usar otros tipos como "IVF_SQ8", "IVF_PQ", etc.
    "params": {"nlist": 128},  # Parámetros del índice
    "metric_type": "L2"  # Distancia Euclidiana (puedes usar "IP" para Inner Product)
}

index = Index(collection, "embedding", index_params)

# Cargar la colección para la búsqueda
collection.load()

# Simular la búsqueda de un vector similar
query_sentence = "Batman"
query_embedding = model.encode([query_sentence]).tolist()

# Definir parámetros de búsqueda
search_params = {
    "metric_type": "L2",  # O "IP" para Inner Product
    "params": {"nprobe": 10}
}

# Realizar la búsqueda
results = collection.search(query_embedding, "embedding", search_params, limit=3, output_fields=["id", "embedding"])

# Mostrar resultados
for result in results[0]:
    print(f"ID: {result.id}, Distance: {result.distance}")
    # Recuperar el embedding original asociado al resultado
    print(f"Embedding: {result.entity.get('embedding')}")


ID: 451828410314131389, Distance: 4.276469456332421e-11
Embedding: [-0.04034863039851189, -0.17345155775547028, 0.08642729371786118, 0.3546779453754425, -0.49720409512519836, 0.054471660405397415, -0.3046944737434387, 0.25453776121139526, -1.2276450395584106, 0.2919151782989502, -1.1208826303482056, 0.05468099191784859, 0.5434548854827881, 0.2457434982061386, 0.6999454498291016, 0.03812474384903908, -0.25922489166259766, 0.11242366582155228, -0.5330581665039062, -0.5647518038749695, -0.453056663274765, -0.1542367935180664, 0.17081785202026367, -0.7219336628913879, 0.38816431164741516, -1.1071504354476929, 0.44108685851097107, -0.35759398341178894, -0.8591535687446594, 0.5661332607269287, -0.27571916580200195, -0.5170292854309082, 0.8813145160675049, -0.5743573307991028, 0.45764851570129395, -0.06696294993162155, -0.7330661416053772, -0.7422056794166565, -0.33786511421203613, -0.5527206063270569, 0.8373575806617737, 0.10988423228263855, 0.23441924154758453, 0.25915268063545227, -0.83682