In [1]:
#!pip install sentence-transformers faiss-cpu PyPDF2 requests


In [2]:
import os
import numpy as np
import faiss
import requests
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer, CrossEncoder


In [3]:
print("Loading embedding model...")
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

print("Loading reranker model...")
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

print("Models loaded successfully.")


Loading embedding model...




Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Loading reranker model...


Loading weights:   0%|          | 0/105 [00:00<?, ?it/s]

[1mBertForSequenceClassification LOAD REPORT[0m from: cross-encoder/ms-marco-MiniLM-L-6-v2
Key                          | Status     |  | 
-----------------------------+------------+--+-
bert.embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Models loaded successfully.


In [4]:
def extract_text_with_metadata(pdf_path):
    reader = PdfReader(pdf_path)
    pages = []
    
    for page_number, page in enumerate(reader.pages):
        text = page.extract_text()
        if text:
            pages.append({
                "page_number": page_number,
                "text": text
            })
    return pages


In [5]:
def chunk_pages(pages, pdf_name, chunk_size=300, overlap=50):
    metadata_store = []
    
    for page in pages:
        text = page["text"]
        page_number = page["page_number"]
        
        start = 0
        chunk_index = 0
        
        while start < len(text):
            end = start + chunk_size
            chunk_text = text[start:end]
            
            metadata_store.append({
                "chunk_id": f"{pdf_name}_page_{page_number}_chunk_{chunk_index}",
                "document_name": pdf_name,
                "page_number": page_number,
                "chunk_index": chunk_index,
                "text": chunk_text
            })
            
            start += chunk_size - overlap
            chunk_index += 1
            
    return metadata_store


In [6]:
pdf_path = "sample.pdf"
pdf_name = os.path.basename(pdf_path)

pages = extract_text_with_metadata(pdf_path)
metadata_store = chunk_pages(pages, pdf_name)

print("Total chunks created:", len(metadata_store))


Total chunks created: 6


In [7]:
chunk_texts = [item["text"] for item in metadata_store]

embeddings = embedding_model.encode(chunk_texts)
embeddings = np.array(embeddings).astype("float32")

faiss.normalize_L2(embeddings)

dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

print("FAISS index built.")


FAISS index built.


In [8]:
def retrieve(query, k=8):
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")
    faiss.normalize_L2(query_embedding)
    
    distances, indices = index.search(query_embedding, k)
    
    results = []
    for i, idx in enumerate(indices[0]):
        results.append({
            "text": metadata_store[idx]["text"],
            "page_number": metadata_store[idx]["page_number"],
            "score": float(distances[0][i])
        })
    
    return results


In [9]:
SIMILARITY_THRESHOLD = 0.3

def filter_by_threshold(retrieved_chunks):
    filtered = [
        chunk for chunk in retrieved_chunks
        if chunk["score"] >= SIMILARITY_THRESHOLD
    ]
    return filtered


In [10]:
def rerank(query, retrieved_chunks):
    if len(retrieved_chunks) == 0:
        return []
    
    pairs = [(query, chunk["text"]) for chunk in retrieved_chunks]
    scores = reranker.predict(pairs)
    
    for i, score in enumerate(scores):
        retrieved_chunks[i]["rerank_score"] = float(score)
    
    reranked = sorted(
        retrieved_chunks,
        key=lambda x: x["rerank_score"],
        reverse=True
    )
    
    return reranked


In [11]:
def build_prompt(query, selected_chunks):
    context = "\n\n".join(
        [chunk["text"] for chunk in selected_chunks]
    )
    
    prompt = f"""
You are a helpful assistant.

Use ONLY the provided context to answer the question.
If the answer is not in the context, say "I don't know."

Context:
{context}

Question:
{query}

Answer:
"""
    return prompt


In [12]:
def generate_answer(prompt):
    url = "http://localhost:11434/api/generate"
    
    payload = {
        "model": "mistral",
        "prompt": prompt,
        "stream": False
    }
    
    response = requests.post(url, json=payload)
    
    return response.json()["response"]


In [13]:
def rag_v2(query, retrieve_k=8, final_k=3):
    
    print("\nUser Question:", query)
    
    # Step 1: Retrieve
    retrieved = retrieve(query, k=retrieve_k)
    print(f"\nRetrieved {len(retrieved)} chunks")
    
    # Step 2: Filter
    filtered = filter_by_threshold(retrieved)
    print(f"After threshold filtering: {len(filtered)} chunks")
    
    # Step 3: Re-rank
    reranked = rerank(query, filtered)
    
    # Step 4: Select top final_k
    selected = reranked[:final_k]
    
    print("\nTop Selected Chunks After Re-ranking:\n")
    for i, chunk in enumerate(selected):
        print(f"Rank {i+1}")
        print("Page:", chunk["page_number"])
        print("Similarity Score:", round(chunk["score"], 4))
        print("Rerank Score:", round(chunk["rerank_score"], 4))
        print(chunk["text"][:200])
        print("-" * 60)
    
    # Step 5: Build prompt
    prompt = build_prompt(query, selected)
    
    # Step 6: Generate answer
    answer = generate_answer(prompt)
    
    print("\nFinal Answer:\n")
    print(answer)
    
    return answer


In [14]:
rag_v2(
    "What are applications of machine learning?",
    retrieve_k=8,
    final_k=3
)



User Question: What are applications of machine learning?

Retrieved 8 chunks
After threshold filtering: 6 chunks

Top Selected Chunks After Re-ranking:

Rank 1
Page: 0
Similarity Score: 0.7361
Rerank Score: 9.0974
eled data, and reinforcement learning
learns through rewards and penalties. Applications of machine learning include recommendation
systems, fraud detection, natural language processing, computer visi
------------------------------------------------------------
Rank 2
Page: 0
Similarity Score: 0.6534
Rerank Score: 5.378
Introduction to Machine Learning
Machine learning is a subset of artificial intelligence that focuses on building systems that learn from
data. Instead of being explicitly programmed with rules, machi
------------------------------------------------------------
Rank 3
Page: 0
Similarity Score: 0.5746
Rerank Score: 4.1344
sions based on historical information. There are three main types of machine learning:
supervised learning, unsupervised learning, and rein

' Applications of machine learning include recommendation systems, fraud detection, natural language processing, computer vision, and predictive analytics.'