In [None]:
# ✅ Setup
from rag_utils.embeddings import get_embeddings
from rag_utils.indexing import build_faiss_index, get_chroma_collection, add_to_chroma
from rag_utils.retrieval import retrieve_documents
from rag_utils.llm_integration import query_llm
from rag_utils.evaluation import evaluate_generation
from config import settings

In [2]:
texts = [
    "Retrieval-Augmented Generation (RAG) combines search and generation.",
    "FAISS is a library for efficient similarity search.",
    "Chroma is a vector database that supports persistence.",
    "Ollama runs local LLMs like Mistral and Llama 3 on your machine."
]
docs = [{"text": t, "metadata": {}} for t in texts]

In [3]:
# ✅ Embeddings
embs = get_embeddings([doc["text"] for doc in docs])

In [12]:
embs[0][:5]

[-0.09049961715936661,
 0.023931946605443954,
 -0.021431203931570053,
 0.038706738501787186,
 -0.044790495187044144]

In [None]:
# ✅ FAISS Indexing
faiss_index = build_faiss_index(embs)

In [13]:
faiss_index

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x168775680> >

In [None]:
# ✅ Chroma Indexing (persistent)
chroma_store_path = settings['paths']['chroma_store']
chroma = get_chroma_collection(persist_dir="chroma_store")
add_to_chroma(chroma, [d["text"] for d in docs], embs)

In [None]:
# ✅ Query
query = "What is the role of FAISS in RAG?"
query_embedding = get_embeddings([query])[0]


In [8]:
# ✅ Retrieve with FAISS
faiss_results = retrieve_documents(query_embedding, (faiss_index, docs), method="faiss", k=2)
faiss_context = "\n".join([r["text"] for r in faiss_results])

In [14]:
faiss_context

'FAISS is a library for efficient similarity search.\nRetrieval-Augmented Generation (RAG) combines search and generation.'

In [9]:
# ✅ Retrieve with Chroma
chroma_results = retrieve_documents(query, chroma, method="chroma", k=2)
chroma_context = "\n".join([r["text"] for r in chroma_results])

/Users/sunnyraj/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:26<00:00, 3.14MiB/s]


In [15]:
chroma_context

'FAISS is a library for efficient similarity search.\nRetrieval-Augmented Generation (RAG) combines search and generation.'

In [18]:
# ✅ Run LLM (Ollama)
response = query_llm(
    query=query,
    context="Use the context below to answer the user's question.",
    chat_history=[{"role": "user", "content": faiss_context}],
    provider="ollama",
    model="mistral"
)
print("📤 LLM Response:\n", response)

📤 LLM Response:
  In Retrieval-Augmented Generation (RAG), FAISS plays a significant role as it provides an efficient method for similarity search, which is crucial for the retrieval component of RAG. Specifically, FAISS helps to quickly find items (such as text snippets or images) that are semantically similar to the query being processed by RAG. This allows RAG to generate more relevant and contextually appropriate responses.


In [19]:
# ✅ Evaluation
reference = "FAISS helps RAG perform fast similarity search during retrieval."
eval_result = evaluate_generation(response, reference, [r["text"] for r in faiss_results])
print("📊 Evaluation:", eval_result)

📊 Evaluation: {'f1': 0.16666666666666666, 'similarity': np.float64(0.2588311534159718), 'context_recall': np.float64(0.30637012111773326)}
