# 04 â€“ RAG Query and Evaluation

This notebook is for running RAG queries and doing basic evaluation.

- Use `SimpleRAGPipeline.run_query` to inspect retrieved chunks and prompts.
- Later: add labeled queries and retrieval metrics.



In [None]:
import sys
from pathlib import Path

# Add src directory to path so we can import modules
sys.path.insert(0, str(Path("../src").resolve()))

from rag_pipeline.lab_pipeline import RAGLabConfig, SimpleRAGPipeline

DATA_DIR = Path("../data/raw")

config = RAGLabConfig(
    data_root=DATA_DIR,
    chunk_size=500,
    chunk_overlap=100,
    top_k=5,
)

pipeline = SimpleRAGPipeline(config=config)

# Build index (idempotent if called multiple times in the same session)
stats = pipeline.index_directory(exts=[".txt", ".md", ".pdf", ".json", ".csv"])
print("Index stats:", stats)



In [None]:
# Define a few sample queries to inspect retrieval behavior
queries = [
    "What is the main topic of these documents?",
    "Summarize any instructions contained in the docs.",
]

for q in queries:
    print("\n" + "=" * 80)
    print(f"Query: {q}")
    result = pipeline.run_query(q, top_k=3)

    print("\nPrompt snippet:\n")
    print(result["prompt"][:500])

    print("\nTop retrieved chunks (truncated):")
    for i, doc in enumerate(result["retrieved"], start=1):
        print("-" * 60)
        print(f"Rank {i} | Score: {doc['score']:.4f}")
        print(doc["text"][:400])

