In [15]:
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection, Index
from sentence_transformers import SentenceTransformer

# Conectarse a Milvus
connections.connect("default", host="127.0.0.1", port="19530")

# Definir el esquema para la colección, incluyendo el campo para la oración
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="sentence", dtype=DataType.VARCHAR, max_length=512),  # Campo para almacenar la oración
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=768)  # Asumiendo 768 dimensiones para BERT
]

schema = CollectionSchema(fields, "Embeddings collection schema")
collection = Collection(name="embeddings_collection", schema=schema)

# Insertar algunos vectores
model = SentenceTransformer('bert-base-nli-mean-tokens')

# sentences = ["Los perros comen comida para perro", "Batman", "El agua moja"]
# embeddings = model.encode(sentences)

# # Convertir embeddings a lista para la inserción
# embeddings = embeddings.tolist()

# # Inserción en Milvus con las oraciones
# insert_result = collection.insert([sentences, embeddings])

# # Crear un índice para la colección
# index_params = {
#     "index_type": "IVF_FLAT",  # Puedes usar otros tipos como "IVF_SQ8", "IVF_PQ", etc.
#     "params": {"nlist": 128},  # Parámetros del índice
#     "metric_type": "L2"  # Distancia Euclidiana (puedes usar "IP" para Inner Product)
# }

# index = Index(collection, "embedding", index_params)

# Cargar la colección para la búsqueda
collection.load()

# Simular la búsqueda de un vector similar
query_sentence = "Fruta"
query_embedding = model.encode([query_sentence]).tolist()

# Definir parámetros de búsqueda
search_params = {
    "metric_type": "L2",  # O "IP" para Inner Product
    "params": {"nprobe": 10}
}

# Realizar la búsqueda, incluyendo la oración original en los resultados
results = collection.search(query_embedding, "embedding", search_params, limit=3, output_fields=["id", "sentence", "embedding"])

# Mostrar resultados
for result in results[0]:
    print(f"ID: {result.id}, Sentence: {result.entity.get('sentence')}, Distance: {result.distance}")
    # Recuperar el embedding original asociado al resultado
    print(f"Embedding: {result.entity.get('embedding')}")


ID: 452014510521188458, Sentence: Frutilla, Distance: 35.981361389160156
Embedding: [-0.3557904362678528, -0.7520185112953186, 1.7504218816757202, 0.21710476279258728, 0.42934244871139526, 0.520322859287262, -0.45694494247436523, 0.5300481915473938, -0.13337251543998718, 0.06792959570884705, -0.4364891052246094, 0.623395562171936, 0.5315899848937988, 0.7439078092575073, 0.613878071308136, 0.34427255392074585, -0.9653444290161133, -0.15714213252067566, 0.2030114233493805, -0.7980279326438904, 0.12241677939891815, 0.6109797954559326, -0.3688145577907562, -0.671744704246521, -0.20011253654956818, -0.7077829241752625, 0.41054025292396545, -1.7507450580596924, -0.3259153664112091, 0.08974325656890869, 0.02475792169570923, -0.3157457411289215, 0.6797226667404175, 0.165561243891716, 0.15504615008831024, 0.2587616443634033, -0.1956014633178711, 0.1662573218345642, -0.19751863181591034, -0.5974143743515015, 1.4721734523773193, 0.059585846960544586, 0.8480245471000671, -0.07627509534358978, -0.6

In [5]:
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection, Index, list_collections

connections.connect("default", host="localhost", port="19530")

# get collections

collections = list_collections()

print(collections)

# drop collection

collection = Collection(name="embeddings_collection")
collection.drop()

# get collections

collections = list_collections()

print(collections)

[]


SchemaNotReadyException: <SchemaNotReadyException: (code=1, message=Collection 'embeddings_collection' not exist, or you can pass in schema to create one.)>