## 🔧 1. Setup Environment

In [None]:
# If running for the first time:
# !pip install sentence-transformers faiss-cpu PyMuPDF requests streamlit python-dotenv


In [None]:
from sentence_transformers import SentenceTransformer
import fitz  # PyMuPDF
import os
import faiss
import pickle
import requests

## 📄 2. Load and Chunk Documents

In [None]:

DOCS_PATH = "data/documents/"
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50

def load_documents():
    docs = []
    for filename in os.listdir(DOCS_PATH):
        if filename.lower().endswith(".pdf"):
            path = os.path.join(DOCS_PATH, filename)
            doc = fitz.open(path)
            for page_no, page in enumerate(doc, start=1):
                text = page.get_text().strip()
                if text:
                    docs.append({"source": f"{filename} - Page {page_no}", "text": text})
    return docs

def chunk_text(docs):
    chunks = []
    for doc in docs:
        text = doc["text"]
        source = doc["source"]
        for i in range(0, len(text), CHUNK_SIZE - CHUNK_OVERLAP):
            chunk = text[i:i + CHUNK_SIZE]
            chunks.append({"text": chunk, "source": source})
    return chunks

documents = load_documents()
chunks = chunk_text(documents)
print(f"Loaded {len(documents)} pages and chunked into {len(chunks)} pieces.")


## 🧠 3. Build Embeddings and FAISS Index

In [None]:

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

embeddings = model.encode([chunk["text"] for chunk in chunks])
dim = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)

# Save index + chunks
faiss.write_index(index, "embeddings/index.faiss")
with open("embeddings/chunks.pkl", "wb") as f:
    pickle.dump(chunks, f)

print("✅ Index built and saved.")


## 🔍 4. Retrieve Relevant Chunks

In [None]:

def retrieve_chunks(question, top_k=3):
    index = faiss.read_index("embeddings/index.faiss")
    with open("embeddings/chunks.pkl", "rb") as f:
        chunks = pickle.load(f)
    q_embed = model.encode([question])
    _, indices = index.search(q_embed, top_k)
    return [chunks[i] for i in indices[0]]

question = "What is the main idea of the document?"
relevant_chunks = retrieve_chunks(question)
for idx, chunk in enumerate(relevant_chunks, start=1):
    print(f"\n[{chunk['source']}]:\n{chunk['text'][:300]}...")


## 💬 5. Query Local LLM (via Ollama)

In [None]:

OLLAMA_MODEL = "mistral"
OLLAMA_URL = "http://localhost:11434/api/generate"

def query_ollama(prompt):
    payload = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False
    }
    response = requests.post(OLLAMA_URL, json=payload)
    return response.json()["response"]

prompt = "\n\n".join(f"[{c['source']}]:\n{c['text']}" for c in relevant_chunks)
prompt += f"\n\nQuestion:\n{question}"
answer = query_ollama(prompt)
print("\n💬 Answer:\n", answer)


## 🧩 6. Full RAG Pipeline Function

In [None]:

def run_rag(question):
    chunks = retrieve_chunks(question)
    prompt = "\n\n".join(f"[{c['source']}]:\n{c['text']}" for c in chunks)
    prompt += f"\n\nQuestion:\n{question}"
    return query_ollama(prompt)

print(run_rag("Summarize this document in 3 lines."))
