In [1]:
!pip install pymilvus

Collecting pymilvus
  Obtaining dependency information for pymilvus from https://files.pythonhosted.org/packages/10/bb/c509f33df70463d26df4b12aa3d0df697da93da6e3dc0e4776199a0b5753/pymilvus-2.4.4-py3-none-any.whl.metadata
  Downloading pymilvus-2.4.4-py3-none-any.whl.metadata (5.4 kB)
Collecting setuptools>=67 (from pymilvus)
  Obtaining dependency information for setuptools>=67 from https://files.pythonhosted.org/packages/55/b3/b3a3415d2debd837106ed417f8681d8af63037fed367fa1b85dbfef081f1/setuptools-70.1.0-py3-none-any.whl.metadata
  Using cached setuptools-70.1.0-py3-none-any.whl.metadata (6.0 kB)
Collecting grpcio<=1.63.0,>=1.49.1 (from pymilvus)
  Obtaining dependency information for grpcio<=1.63.0,>=1.49.1 from https://files.pythonhosted.org/packages/fb/e0/a92c7b0eeeb3d2b033ba2b8b5b2db1e640a841a452622d2a08351997affe/grpcio-1.63.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading grpcio-1.63.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.

In [1]:
# Set Up Vector Database
from pymilvus import MilvusClient

client = MilvusClient("milvus_demo.db")

In [3]:
# Create a Collection
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
client.create_collection(
    collection_name="demo_collection",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)


In [8]:
!pip install "pymilvus[model]"

Collecting milvus-model>=0.1.0 (from pymilvus[model])
  Obtaining dependency information for milvus-model>=0.1.0 from https://files.pythonhosted.org/packages/84/c3/292de4ed428dd50df9f8d47be9eba48282843daf2fcbd940cade30acfb4b/milvus_model-0.2.3-py3-none-any.whl.metadata
  Downloading milvus_model-0.2.3-py3-none-any.whl.metadata (1.6 kB)
Collecting onnxruntime (from milvus-model>=0.1.0->pymilvus[model])
  Obtaining dependency information for onnxruntime from https://files.pythonhosted.org/packages/88/4f/20c4384e742d44caaa3c6cf39dfde2b863079e008eafbfdcc63a8c670589/onnxruntime-1.18.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata
  Downloading onnxruntime-1.18.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.3 kB)
Collecting coloredlogs (from onnxruntime->milvus-model>=0.1.0->pymilvus[model])
  Obtaining dependency information for coloredlogs from https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5

In [4]:
# Represent text with vectors
from pymilvus import model


embedding_fn = model.DefaultEmbeddingFunction()

docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = embedding_fn.encode_documents(docs)
print("Dim:", embedding_fn.dim, vectors[0].shape)  # Dim: 768 (768,)

data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))


  from .autonotebook import tqdm as notebook_tqdm


Dim: 768 (768,)
Data has 3 entities, each with fields:  dict_keys(['id', 'vector', 'text', 'subject'])
Vector dim: 768


In [5]:
# Insert Data
res = client.insert(collection_name="demo_collection", data=data)

print(res)


{'insert_count': 3, 'ids': [0, 1, 2], 'cost': 0}


In [6]:
# Vector search
query_vectors = embedding_fn.encode_queries(["Who is Alan Turing?"])

res = client.search(
    collection_name="demo_collection",  # target collection
    data=query_vectors,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "subject"],  # specifies fields to be returned
)

print(res)


data: ["[{'id': 2, 'distance': 0.5859944820404053, 'entity': {'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}}, {'id': 1, 'distance': 0.5118255019187927, 'entity': {'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}}]"] , extra_info: {'cost': 0}


In [12]:
# Vector Search with Metadata Filtering
docs = [
    "Machine learning has been used for drug design.",
    "Computational synthesis with AI algorithms predicts molecular properties.",
    "DDR1 is involved in cancers and fibrosis.",
]
vectors = embedding_fn.encode_documents(docs)
data = [
    {"id": 3 + i, "vector": vectors[i], "text": docs[i], "subject": "biology"}
    for i in range(len(vectors))
]

client.insert(collection_name="demo_collection", data=data)

res = client.search(
    collection_name="demo_collection",
    data=embedding_fn.encode_queries(["tell me AI related information"]),
    filter="subject == 'biology'",
    limit=2,
    output_fields=["text", "subject"],
)

print(res)


data: ["[{'id': 4, 'distance': 0.27030572295188904, 'entity': {'text': 'Computational synthesis with AI algorithms predicts molecular properties.', 'subject': 'biology'}}, {'id': 3, 'distance': 0.1642588973045349, 'entity': {'text': 'Machine learning has been used for drug design.', 'subject': 'biology'}}]"] , extra_info: {'cost': 0}
