In [2]:
"""
Retrieval-Augmented Generation (RAG) — Hands-on notebook
By Pavan Susarla — practical lessons: Easy -> Medium -> Hard
"""

import os
import re

# -----------------------------
# COMMON IMPORTS & CONFIG
# -----------------------------
from typing import Any, Dict, List

import faiss
import numpy as np
from openai import OpenAI
from sentence_transformers import SentenceTransformer

client = OpenAI(
    api_key="myeky"
)


# -----------------------------
# UTIL: simple chunking
# -----------------------------
def simple_chunk_text(
    text: str, chunk_size: int = 1000, overlap: int = 200
) -> List[str]:
    chunks = []
    start = 0
    n = len(text)
    while start < n:
        end = min(start + chunk_size, n)
        chunks.append(text[start:end])
        start = end - overlap if end < n else n
    return chunks


# -----------------------------
# LESSON 1 — Minimal RAG (FAISS + OpenAI)
# -----------------------------
EMB_MODEL = "all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMB_MODEL)

docs = [
    "LangChain helps you build applications with LLMs by connecting components.",
    "FAISS is a fast similarity search library from Facebook AI Research.",
    "Chroma is an open source vector database oriented for LangChain integrations.",
]

# Create embeddings
vectors = embedder.encode(docs, convert_to_numpy=True)
vec_dim = vectors.shape[1]

# Build FAISS index
index = faiss.IndexFlatL2(vec_dim)
index.add(vectors)


def faiss_search(query: str, top_k: int = 2):
    qv = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(qv, top_k)
    return [
        {"idx": int(i), "score": float(D[0][j]), "text": docs[int(i)]}
        for j, i in enumerate(I[0])
    ]


PROMPT_TEMPLATE = """
You are a helpful assistant. Use the following retrieved documents as context (do not invent facts).

Context:
{context}

Question: {question}
Answer:
"""


def generate_answer_openai(question: str, retrieved_texts: List[str]):
    context = "\n---\n".join(retrieved_texts)
    prompt = PROMPT_TEMPLATE.format(context=context, question=question)
    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ],
        max_tokens=300,
        temperature=0.0,
    )
    return resp.choices[0].message.content


# Test minimal RAG
q = "What is FAISS?"
res = faiss_search(q)
print("retrieved:", res)
ans = generate_answer_openai(q, [r["text"] for r in res])
print("\nassistant answer:\n", ans)

from langchain.chains import RetrievalQA

# -----------------------------
# LESSON 2 — LangChain + Chroma (real-world RAG)
# -----------------------------
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI

doc_text = """LangChain integrates with Chroma to store and retrieve document embeddings efficiently.
Chroma provides persistent vector storage and similarity search for AI pipelines."""

# 1️⃣ Split document into chunks
chunker = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = chunker.create_documents([doc_text])

# 2️⃣ Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 3️⃣ Initialize Chroma DB
chroma_db = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    collection_name="rag_demo",
    persist_directory="./chroma_store",
)
retriever = chroma_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# 4️⃣ Initialize new LLM client (from langchain_openai)
os.environ["OPENAI_API_KEY"] = (
    "mykey")

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# 5️⃣ Build RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
question = "How does Chroma integrate with LangChain?"
response = qa_chain.invoke({"query": question})
print(response)

# -----------------------------
# LESSON 3 — Advanced Chroma (metadata, hybrid, evaluation)
# -----------------------------
meta_docs = [
    ("Chroma supports metadata-based retrieval.", {"topic": "metadata"}),
    ("Chroma allows hybrid lexical + semantic search.", {"topic": "hybrid"}),
]
meta_texts = [d[0] for d in meta_docs]
meta_metadata = [d[1] for d in meta_docs]
meta_vectors = embeddings.embed_documents(meta_texts)

chroma_db.add_texts(texts=meta_texts, metadatas=meta_metadata)

filtered = chroma_db.similarity_search(
    "What is hybrid search?", k=2, filter={"topic": "hybrid"}
)
print("\nFiltered result (topic='hybrid'):\n", filtered)


def recall_at_k(retrieved_ids: List[str], relevant_ids: List[str], k: int) -> float:
    topk = set(retrieved_ids[:k])
    return len(topk.intersection(set(relevant_ids))) / len(relevant_ids)


print("\n✅ Advanced Chroma RAG demo complete.")

retrieved: [{'idx': 1, 'score': 1.2429592609405518, 'text': 'FAISS is a fast similarity search library from Facebook AI Research.'}, {'idx': 2, 'score': 1.8776800632476807, 'text': 'Chroma is an open source vector database oriented for LangChain integrations.'}]

assistant answer:
 FAISS is a fast similarity search library developed by Facebook AI Research. It is designed to efficiently search for similar items in large datasets.
{'query': 'How does Chroma integrate with LangChain?', 'result': 'Chroma integrates with LangChain to store and retrieve document embeddings efficiently. It provides persistent vector storage and similarity search capabilities, which are essential for AI pipelines.'}

Filtered result (topic='hybrid'):
 [Document(id='82e35bbd-599e-4ac3-8843-b05524e075ae', metadata={'topic': 'hybrid'}, page_content='Chroma allows hybrid lexical + semantic search.'), Document(id='2a7e37e8-89f3-40fd-8bc3-856874c21929', metadata={'topic': 'hybrid'}, page_content='Chroma allows hybr