In [None]:

from pymilvus import MilvusClient, DataType

client = MilvusClient(
    uri="http://localhost:19530",
    token="root:Milvus"
)

schema = MilvusClient.create_schema(
    auto_id=False,
    enable_dynamic_field=True,
)

schema.add_field(field_name="my_id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="my_vector", datatype=DataType.FLOAT_VECTOR, dim=5)
schema.add_field(field_name="my_varchar", datatype=DataType.VARCHAR, max_length=512)

from sentence_transformers import SentenceTransformer

# Load a pre-trained embedding model
embedding_fn = SentenceTransformer('all-MiniLM-L6-v2')

docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

# Encode documents into vectors
vectors = embedding_fn.encode(docs)
print("Dim:", len(vectors[0]), vectors[0].shape)  # Dim: 384 (384,)

data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))
import random

docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]
vectors = [[random.uniform(-1, 1) for _ in range(768)] for _ in docs]
data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))


from pymilvus import model


embedding_fn = model.DefaultEmbeddingFunction()

docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = embedding_fn.encode_documents(docs)
print("Dim:", embedding_fn.dim, vectors[0].shape)  # Dim: 768 (768,)

data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))
print("check:", data[0].values())
# Simulated query and initial results from Milvus
query = "hybrid search in milvus"
query_words = set(query.lower().split())
print("Query words:", query_words)

# Simulated vector search results from Milvus (unordered)
milvus_results = [
    {"id": 1, "content": "Milvus supports vector search and hybrid search features.", "score": 0.88},
    {"id": 2, "content": "You can use Docker to run Milvus easily.", "score": 0.91},
    {"id": 3, "content": "Hybrid search combines keyword and vector search for better results.", "score": 0.85},
]
print("Milvus results:", milvus_results)
# Simple reranker based on keyword overlap
def rerank_by_keyword_overlap(results, query_words):
    def overlap_score(content):
        content_words = set(content.lower().split())
        return len(query_words.intersection(content_words))
    
    return sorted(results, key=lambda r: overlap_score(r["content"]), reverse=True)

# Apply reranking
reranked_results = rerank_by_keyword_overlap(milvus_results, query_words)
print("Reranked results:", reranked_results)

# Show results
for idx, res in enumerate(reranked_results, 1):
    print(f"{idx}. ID: {res['id']}, Overlap Score: {len(query_words.intersection(set(res['content'].lower().split())))}, Content: {res['content']}")

from pymilvus import MilvusClient

client = MilvusClient(uri="http://localhost:19530")  # Uses local Milvus Lite

if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
client.create_collection(
    collection_name="demo_collection",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)

from pymilvus import model


embedding_fn = model.DefaultEmbeddingFunction()

texts = [
    "Milvus is an open-source vector database for scalable similarity search.",
    "Vector search enables semantic search in unstructured data like text or images.",
    "You can deploy Milvus using Docker or Kubernetes for flexibility.",
    "Embedding models like BERT convert text into vector representations.",
    "Hybrid search in Milvus combines keyword-based and vector-based retrieval.",
    "Use FAISS, HNSW, or IVF indexes in Milvus for efficient similarity search.",
    "Milvus supports filtering results with scalar fields like tags or categories.",
    "Text embeddings capture semantic meaning beyond exact keyword matching.",
    "You can use sentence-transformers to generate embeddings for your documents.",
    "Milvus integrates well with FastAPI, LangChain, and other modern tools.",
    "Reranking strategies help improve search result quality after retrieval.",
    "Milvus is commonly used in AI applications like chatbots and recommendation systems.",
]


vectors = embedding_fn.encode_documents(texts)
print("Dim:", embedding_fn.dim, vectors[0].shape)  # Dim: 768 (768,)

data = [
    {"id": i, "vector": vectors[i], "text": texts[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))

res = client.insert(collection_name="demo_collection", data=data)

print(res)

texts = [
    "Milvus is an open-source vector database for scalable similarity search.",
    "Vector search enables semantic search in unstructured data like text or images.",
    "You can deploy Milvus using Docker or Kubernetes for flexibility.",
    "Embedding models like BERT convert text into vector representations.",
    "Hybrid search in Milvus combines keyword-based and vector-based retrieval.",
    "Use FAISS, HNSW, or IVF indexes in Milvus for efficient similarity search.",
    "Milvus supports filtering results with scalar fields like tags or categories.",
    "Text embeddings capture semantic meaning beyond exact keyword matching.",
    "You can use sentence-transformers to generate embeddings for your documents.",
    "Milvus integrates well with FastAPI, LangChain, and other modern tools.",
    "Reranking strategies help improve search result quality after retrieval.",
    "Milvus is commonly used in AI applications like chatbots and recommendation systems.",
]


from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")  # This outputs 384-dim vectors
embeddings = model.encode(texts)

data = {
    "vector": embeddings.tolist(),  # Convert numpy array to list of lists
    "text": texts
}

query_vectors = embedding_fn.encode_queries(["semantic search with embeddings"])

res = client.search(
    collection_name="demo_collection",  # target collection
    data=query_vectors,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "subject"],  # specifies fields to be returned
)

print(res)

texts = [
    "Machine learning has been used for drug design.",
    "Computational synthesis with AI algorithms predicts molecular properties.",
    "DDR1 is involved in cancers and fibrosis.",
]
vectors = embedding_fn.encode_documents(texts)
data = [
    {"id": 12 + i, "vector": vectors[i], "text": texts[i], "subject": "biology"}
    for i in range(len(vectors))
]

client.insert(collection_name="demo_collection", data=data)

res = client.search(
    collection_name="demo_collection",
    data=embedding_fn.encode_queries(["tell me AI related information"]),
    filter="subject == 'biology'",
    limit=2,
    output_fields=["text", "subject"],
)

print(res)

print(texts)
res = client.delete(collection_name="demo_collection", ids=[0, 2])

print(res)

res = client.delete(
    collection_name="demo_collection",
    filter="subject == 'biology'",
)

print(res)

