In [None]:
from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# Connect to Milvus
connections.connect("default", host="localhost", port="19530")

# Define collection schema
dim = 768  # Dimension of the embedding vector
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=65535),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = CollectionSchema(fields, "RAG collection")

# Create collection
collection_name = "rag_collection"
collection = Collection(name=collection_name, schema=schema)

# Create index (FLAT)
index_params = {
    "metric_type": "L2",
    "index_type": "FLAT",
    "params": {}
}
collection.create_index(field_name="embedding", index_params=index_params)

# Load models
encoder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
generator = pipeline('text-generation', model='gpt2')

def add_documents(documents):
    embeddings = encoder.encode(documents)
    entities = [
        [i for i in range(len(documents))],
        documents,
        embeddings.tolist()
    ]
    collection.insert(entities)
    collection.flush()

def search_similar_documents(query, top_k=5):
    query_embedding = encoder.encode([query])[0]
    search_params = {"metric_type": "L2", "params": {}}
    results = collection.search(
        data=[query_embedding.tolist()],
        anns_field="embedding",
        param=search_params,
        limit=top_k,
        output_fields=["content"]
    )
    return [hit.entity.get('content') for hit in results[0]]

def generate_answer(query, context):
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    response = generator(prompt, max_length=100, num_return_sequences=1)
    return response[0]['generated_text']

# Example usage
documents = [
    "The capital of France is Paris.",
    "The Eiffel Tower is located in Paris.",
    "London is the capital of the United Kingdom.",
    "The United States has 50 states."
]

# Add documents to the collection
add_documents(documents)

# Search for similar documents
query = "What is the capital of France?"
similar_docs = search_similar_documents(query)

# Generate an answer
context = " ".join(similar_docs)
answer = generate_answer(query, context)

print(f"Query: {query}")
print(f"Similar documents: {similar_docs}")
print(f"Generated answer: {answer}")

# Clean up
collection.drop()
connections.disconnect("default")