In [4]:
import faiss
import numpy as np
import requests
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer

# **------------- CONFIG -----------**

PDF_PATH = "/home/sanoop/Documents/silv.pdf"
CHUNK_SIZE = 500
CHUNK_OVERLAP = 100
TOP_K = 4
OLLAMA_MODEL = "mistral"

# ------------- LOAD PDF -------------


def load_pdf(path):
    reader = PdfReader(path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

# ------------- CHUNK TEXT -------------


def chunk_text(text, size, overlap):
    chunks = []
    start = 0
    while start < len(text):
        end = start + size
        chunks.append(text[start:end])
        start += size - overlap
    return chunks

# -------- FAISS + EMBEDDINGS -----------


def build_faiss(chunks, model):
    embeddings = model.encode(chunks)
    embeddings = np.array(embeddings).astype("float32")

    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return index

# ------------- RETRIEVE -------------



def retrieve(query, model, index, chunks, top_k):
    q_emb = model.encode([query]).astype("float32")
    _, indices = index.search(q_emb, top_k)
    return [chunks[i] for i in indices[0]]


# ------------- OLLAMA LLM -------------


def ask_ollama(question, context):
    prompt = f"""
Use the context below to answer the question.
If the answer is not in the context, say "I don't know".

Context:
{context}

Question:
{question}
"""

    response = requests.post(
        "http://localhost:11434/api/generate",
        json={
            "model": OLLAMA_MODEL,
            "prompt": prompt,
            "stream": False
        }
    )

    return response.json()["response"]


# ---------------- MAIN ----------------

def run_rag(pdf_path, question):
    print("PDF is being loaded...")
    text = load_pdf(pdf_path)

    chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP)

    print("Embeddings are being created...")
    embed_model = SentenceTransformer("all-MiniLM-L6-v2")
    index = build_faiss(chunks, embed_model)

    retrieved = retrieve(question, embed_model, index, chunks, TOP_K)
    context = "\n\n".join(retrieved)

    answer = ask_ollama(question, context)

    print("\n==============================")
    print("USER QUESTION:")
    print(question)

    print("\nRETRIEVED CONTEXT:")
    for i, chunk in enumerate(retrieved, 1):
        print(f"\n--- Chunk {i} ---\n{chunk}")

    print("\nFINAL ANSWER:")
    print(answer)
    print("==============================")

# ----- RUN -----


question = "What caused the disappearance of the horse?"
run_rag(PDF_PATH, question)


PDF is being loaded...
Embeddings are being created...

USER QUESTION:
What caused the disappearance of the horse?

RETRIEVED CONTEXT:

--- Chunk 1 ---
 had, while standing at the window, drugged
his curried mutton, and so deprived the stables of
their watchman. As to the missing horse, there were
abundant proofs in the mud which lay at the bottom
of the fatal hollow that he had been there at the time
of the struggle. But from that morning he has disap-
peared, and although a large reward has been offered,
and all the gypsies of Dartmoor are on the alert, no
news has come of him. Finally, an analysis has shown
that the remains of his supper lef

--- Chunk 2 ---
 killed John Straker for the
instant, and conﬁne ourselves to ﬁnding out what has
become of the horse. Now, supposing that he broke
away during or after the tragedy, where could he have
gone to? The horse is a very gregarious creature. If
left to himself his instincts would have been either to
return to King’s Pyland or go over 