In [6]:
import os
import getpass
from dotenv import load_dotenv
load_dotenv()


True

In [7]:
import os
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

PERSISTENT_DIR = "./chroma_db"
COLLECTION_NAME = "rag_mcp"

def get_collection():
    client = chromadb.PersistentClient(path=PERSISTENT_DIR)

    embedding_fn = OpenAIEmbeddingFunction(
        api_key=os.environ["OPENAI_API_KEY"],
        model_name="text-embedding-ada-002",  # works; if deprecated, use text-embedding-3-small
    )

    collection = client.get_or_create_collection(
        name=COLLECTION_NAME,
        embedding_function=embedding_fn,
    )
    return client, collection


In [8]:
import os
import nest_asyncio
from llama_index.core import SimpleDirectoryReader
from llama_parse import LlamaParse

nest_asyncio.apply()  # REQUIRED for Jupyter on Windows

DATA_DIR = r"D:\Narwal\mcp_rag\data"
LLAMA_CLOUD_API_KEY = os.environ["LLAMA_CLOUD_API_KEY"]

def ingest_data_dir():
    client, _ = get_collection()

    # wipe and re-create collection (dev-safe)
    try:
        client.delete_collection(name=COLLECTION_NAME)
    except Exception:
        pass

    client, collection = get_collection()

    parser = LlamaParse(
        api_key=LLAMA_CLOUD_API_KEY,
        result_type="text",
    )

    file_extractor = {".pdf": parser}

    documents = SimpleDirectoryReader(
        DATA_DIR,
        file_extractor=file_extractor,
    ).load_data()

    for doc in documents:
        collection.add(
            documents=[doc.text],
            metadatas=[doc.metadata],
            ids=[doc.doc_id],
        )

    print(f"Ingested {collection.count()} documents")


In [9]:
ingest_data_dir()


2025-12-22 02:15:07,110 - INFO - HTTP Request: POST https://api.cloud.llamaindex.ai/api/parsing/upload "HTTP/1.1 200 OK"


Started parsing the file under job_id 617263f5-21c4-4ecf-9a41-8c6842f4964b


2025-12-22 02:15:08,370 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/617263f5-21c4-4ecf-9a41-8c6842f4964b "HTTP/1.1 200 OK"
2025-12-22 02:15:10,654 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/617263f5-21c4-4ecf-9a41-8c6842f4964b "HTTP/1.1 200 OK"
2025-12-22 02:15:13,873 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/617263f5-21c4-4ecf-9a41-8c6842f4964b "HTTP/1.1 200 OK"
2025-12-22 02:15:18,191 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/617263f5-21c4-4ecf-9a41-8c6842f4964b "HTTP/1.1 200 OK"
2025-12-22 02:15:45,022 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/617263f5-21c4-4ecf-9a41-8c6842f4964b "HTTP/1.1 200 OK"
2025-12-22 02:15:45,431 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/617263f5-21c4-4ecf-9a41-8c6842f4964b/result/text "HTTP/1.1 200 OK"


Error while parsing the file '<bytes/buffer>': Event loop is closed


2025-12-22 02:15:59,264 - INFO - HTTP Request: POST https://api.cloud.llamaindex.ai/api/parsing/upload "HTTP/1.1 200 OK"


Started parsing the file under job_id 2b57ffe1-e8a1-4b78-aa60-5a6bc960cc3f


2025-12-22 02:16:00,586 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/2b57ffe1-e8a1-4b78-aa60-5a6bc960cc3f "HTTP/1.1 200 OK"
2025-12-22 02:16:03,146 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/2b57ffe1-e8a1-4b78-aa60-5a6bc960cc3f "HTTP/1.1 200 OK"
2025-12-22 02:16:06,423 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/2b57ffe1-e8a1-4b78-aa60-5a6bc960cc3f "HTTP/1.1 200 OK"
2025-12-22 02:16:10,702 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/2b57ffe1-e8a1-4b78-aa60-5a6bc960cc3f "HTTP/1.1 200 OK"
2025-12-22 02:16:16,663 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/2b57ffe1-e8a1-4b78-aa60-5a6bc960cc3f "HTTP/1.1 200 OK"
2025-12-22 02:16:22,352 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/2b57ffe1-e8a1-4b78-aa60-5a6bc960cc3f "HTTP/1.1 200 OK"
2025-12-22 02:16:29,155 - INFO - HTTP Request: GET https://api.cloud.llamain

: 

In [3]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from qdrant_client import QdrantClient

load_dotenv()

# ---- config ----
COLLECTION_NAME = "rag_mcp"

# ---- clients ----
openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

qdrant = QdrantClient(
    url=os.environ["QDRANT_URL"],
    api_key=os.environ["QDRANT_API_KEY"],
)

# ---- make a query embedding ----
query_text = "physics simulation"

embedding = openai_client.embeddings.create(
    model="text-embedding-ada-002",
    input=query_text,
).data[0].embedding

# ---- query ONE result ----
resp = qdrant.query_points(
    collection_name=COLLECTION_NAME,
    query=embedding,
    limit=1,
    with_payload=True,
)

# ---- print EVERYTHING ----
point = resp.points[0]

point 


ScoredPoint(id='9f073b3b-9107-4aa9-923e-2604298c0395', version=11, score=0.81821674, payload={'text': '-\nsidering both geometry and all physical attributes.                                In to-\ntal, we collect 1,568 valid scores from 14 volunteers and\nnormalize the scores. The results show that the outputs of\nPhysX-Anything align much better with human preferences\nthan those of other methods, confirming its robust gener-\native performance in both geometry and physical proper-\n\n                                                                                                     6\n', 'metadata': {'file_path': 'D:\\Narwal\\mcp_rag\\data\\paper1.pdf', 'file_name': 'paper1.pdf', 'file_type': 'application/pdf', 'file_size': 4994615, 'creation_date': '2025-12-22', 'last_modified_date': '2025-12-22'}, 'chunk_index': 1, 'source_doc': 'dca54582-14cc-44a2-acaa-a22cbd7cdeb9'}, vector=None, shard_key=None, order_value=None)

In [7]:
import os, requests
from dotenv import load_dotenv
load_dotenv()
VOYAGE_API_KEY = os.environ["VOYAGE_API_KEY"]
url = "https://api.voyageai.com/v1/rerank"

def rerank(query: str, chunks: list[str], top_k: int = 5):
    payload = {
        "model": "rerank-2.5",   # or "rerank-2.5"
        "query": query,
        "documents": chunks,
        "top_k": top_k,
    }
    headers = {"Authorization": f"Bearer {VOYAGE_API_KEY}"}
    r = requests.post(url, json=payload, headers=headers)
    r.raise_for_status()
    return r.json()
query = "What is Model Context Protocol?"

chunks = [
    "Model Context Protocol (MCP) is a standard that allows large language models to call tools using a structured interface.",
    "FastAPI is a modern Python web framework used to build APIs quickly and efficiently.",
    "MCP enables LLMs to interact with external systems through JSON-RPC style messages.",
    "Qdrant is a vector database designed for similarity search and retrieval.",
    "The protocol defines how models expose tools and how clients invoke them."
]
result = rerank(query, chunks, top_k=3)


In [8]:
result

{'object': 'list',
 'data': [{'relevance_score': 0.890625, 'index': 0},
  {'relevance_score': 0.7109375, 'index': 4},
  {'relevance_score': 0.6328125, 'index': 2}],
 'model': 'rerank-2.5',
 'usage': {'total_tokens': 93}}