#### Install Libraries and Setup Environment

In [None]:
# If running for the first time:
# !pip install sentence-transformers faiss-cpu PyMuPDF requests streamlit python-dotenv


In [None]:
from sentence_transformers import SentenceTransformer
import fitz  
import os
import faiss
import pickle
import requests

### 2. Load and Chunk Documents


![RAG Pipeline](images/indexing.png)

In [None]:

DOCS_PATH = "data/documents/"
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50

def load_documents():
    """
    Load PDF documents from the DOCS_PATH.
    Returns a list of dicts with keys:
    - 'source': a string identifier (filename and page number)
    - 'text': the text extracted from the page
    """
    docs = []
    for filename in os.listdir(DOCS_PATH):
        if filename.lower().endswith(".pdf"):
            doc_path = os.path.join(DOCS_PATH, filename)
            doc = fitz.open(doc_path)
            for page_no, page in enumerate(doc, start=1):
                text = page.get_text().strip()
                if text:
                    docs.append({"source": f"{filename} - Page {page_no}", "text": text})
    return docs

def chunk_text(docs):
    """
    Chunk each document and preserve the source info.
    Returns a list of chunks, each is a dict with keys:
    - 'text': the text chunk
    - 'source': the source info from the parent document
    """
    chunks = []
    for doc in docs:
        text = doc["text"]
        source = doc["source"]
        for i in range(0, len(text), CHUNK_SIZE - CHUNK_OVERLAP):
            chunk = text[i: i + CHUNK_SIZE]
            chunks.append({"text": chunk, "source": source})
    return chunks

documents = load_documents()
chunks = chunk_text(documents)
print(f"Loaded {len(documents)} pages and chunked into {len(chunks)} pieces.")


## 3. Build Embeddings and FAISS Index

In [None]:
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
INDEX_PATH = "embeddings/index.faiss"
CHUNKS_PATH = "embeddings/chunks.pkl"
model = SentenceTransformer(EMBEDDING_MODEL)

def build_faiss_index(chunks):
    """
    Build a FAISS index from the text of the chunks.
    Save the vector index and the mapping to chunks (with sources).
    """
    # Encode using only the text
    embeddings = model.encode([chunk["text"] for chunk in chunks])
    dim = embeddings[0].shape[0]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)

    with open(CHUNKS_PATH, "wb") as f:
        pickle.dump(chunks, f)
    faiss.write_index(index, INDEX_PATH)
    print("‚úÖ FAISS index created and saved.")

build_faiss_index(chunks)    


## üîç 4. Retrieve Relevant Chunks

![RAG Pipeline](images/retriever.png)

In [None]:
def load_faiss_index():
    """
    Load the FAISS index and corresponding chunks mapping.
    """
    index = faiss.read_index(INDEX_PATH)
    with open(CHUNKS_PATH, "rb") as f:
        chunks = pickle.load(f)
    return index, chunks

def get_relevant_chunks(question, top_k=3):
    """
    Return the top_k most relevant chunks (with their source info) based on the question.
    """
    index, chunks = load_faiss_index()
    q_embed = model.encode([question])
    distances, indices = index.search(q_embed, top_k)
    # Return list of dictionaries containing text and source for each retrieved chunk.
    return [chunks[i] for i in indices[0]]

question = "Tell me about Chain of Abstraction?"
relevant_chunks = get_relevant_chunks(question)
for idx, chunk in enumerate(relevant_chunks, start=1):
    print(f"\n[{chunk['source']}]:\n{chunk['text']}...")


## üí¨ 5. Query Local LLM (via Ollama)

![RAG Pipeline](images/RAG.png)

In [5]:

OLLAMA_MODEL = "mistral"
OLLAMA_URL = "http://localhost:11434/api/generate"

def query_llm(prompt):
    payload = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False
    }

    try:
        response = requests.post(OLLAMA_URL, json=payload)
        response.raise_for_status()
        return response.json()["response"]
    except requests.exceptions.RequestException as e:
        return f"Error connecting to Ollama: {e}"


In [None]:
prompt = "\n\n".join(f"[{c['source']}]:\n{c['text']}" for c in relevant_chunks)
prompt += f"\n\nQuestion:\n{question}"
answer = query_llm(prompt)
print("\nüí¨ Answer:\n", answer)

##  6. Full RAG Pipeline Function

In [7]:
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
INDEX_PATH = "embeddings/index.faiss"
CHUNKS_PATH = "embeddings/chunks.pkl"
DOCS_PATH = "data/documents/"
model = SentenceTransformer(EMBEDDING_MODEL)

In [8]:
def build_index():
    print("üìÑ Loading documents...")
    docs = load_documents()

    print("‚úÇÔ∏è Chunking text...")
    chunks = chunk_text(docs)

    print(f"üî¢ Total chunks: {len(chunks)}")
    print("üß† Building FAISS index...")
    build_faiss_index(chunks)

    print("‚úÖ Index built and saved!")

In [9]:
DEFAULT_TEMPLATE = """You are a helpful assistant. Use the following context to answer the question.
If you don't know the answer, just say so.

Context:
{context}

Question:
{question}
"""

def load_prompt_template():
    return DEFAULT_TEMPLATE

def build_prompt(chunks, question):
    """
    Build a prompt by combining each chunk with its source.
    Each chunk is annotated with the source from which it came.
    """
    context_lines = []
    for chunk in chunks:
        # You can format the source info as you wish.
        context_lines.append(f"[{chunk['source']}]:\n{chunk['text']}")
    context = "\n\n".join(context_lines)
    template = load_prompt_template()
    return template.format(context=context, question=question)

In [10]:
def run_rag(question):
    print("üîç Retrieving relevant chunks...")
    chunks = get_relevant_chunks(question)

    print("\n Retrieved Chunks (showing sources):")
    for idx, chunk in enumerate(chunks, start=1):
        preview = chunk['text'][:100].replace("\n", " ")  # short preview
        print(f"   {idx}. [{chunk['source']}]: {preview}...")

    print("\nBuilding prompt with context...")
    prompt = build_prompt(chunks, question)

    print("Querying local LLM...")
    answer = query_llm(prompt)

    return answer, chunks

In [None]:
question = "Tell me about the Graph based agents?"
answer, relevant_chunks = run_rag(question)

In [None]:
print(answer)