<a href="https://colab.research.google.com/github/AshokGit544/AI-Document-QA-RAG/blob/main/rag_document_qa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# ============================================================
# INSTALL LIBRARIES
# ============================================================
!pip -q install faiss-cpu sentence-transformers pypdf python-docx requests

# ============================================================
# IMPORTS
# ============================================================
import numpy as np
import faiss
import requests
import os

from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
from docx import Document
from getpass import getpass

# ============================================================
# LOAD EMBEDDING MODEL (ONLY ONCE)
# ============================================================
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
print("âœ… Embedding model loaded")

# ============================================================
# DOCUMENT LOADING FUNCTIONS
# ============================================================
def load_pdf(path):
    reader = PdfReader(path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"
    return text

def load_docx(path):
    doc = Document(path)
    return "\n".join([para.text for para in doc.paragraphs])

def load_document(path):
    if path.endswith(".pdf"):
        return load_pdf(path)
    elif path.endswith(".docx"):
        return load_docx(path)
    else:
        raise ValueError("Only PDF and DOCX supported")

# ============================================================
# CHUNKING
# ============================================================
def chunk_text(text, chunk_size=500, overlap=50):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start += chunk_size - overlap
    return chunks

# ============================================================
# BUILD VECTOR STORE
# ============================================================
def build_index(text):
    chunks = chunk_text(text)

    embeddings = embedding_model.encode(chunks)
    embeddings = np.array(embeddings).astype("float32")

    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    return index, chunks

# ============================================================
# RETRIEVAL
# ============================================================
def retrieve(query, index, chunks, k=3):
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")

    distances, indices = index.search(query_embedding, k)
    return [chunks[i] for i in indices[0]]

# ============================================================
# OPENROUTER SETUP
# ============================================================
OPENROUTER_API_KEY = getpass("Enter your OpenRouter API Key: ")
MODEL_NAME = "nvidia/nemotron-3-nano-30b-a3b:free"

def call_llm(prompt):
    response = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
            "Content-Type": "application/json"
        },
        json={
            "model": MODEL_NAME,
            "messages": [
                {"role": "system", "content": "Answer ONLY using provided context. If not found, say it is not available."},
                {"role": "user", "content": prompt}
            ]
        }
    )

    result = response.json()

    if "choices" in result:
        return result["choices"][0]["message"]["content"]
    else:
        return f"Error: {result}"

# ============================================================
# LOAD DOCUMENT (UPLOAD)
# ============================================================
from google.colab import files

print("ðŸ“‚ Upload your document (PDF or DOCX)")
uploaded = files.upload()
file_path = list(uploaded.keys())[0]

print("ðŸ“„ Loading document...")
text = load_document(file_path)

print("ðŸ“¦ Building vector index...")
index, chunks = build_index(text)

print("âœ… System Ready! Ask unlimited questions.")
print("Type 'exit' to stop.")
print("Type 'reload' to upload a new document.")

# ============================================================
# CONTINUOUS QUESTION LOOP
# ============================================================
while True:
    question = input("\nAsk a question: ")

    if question.lower() == "exit":
        print("ðŸ‘‹ Exiting chatbot.")
        break

    if question.lower() == "reload":
        print("ðŸ“‚ Upload new document")
        uploaded = files.upload()
        file_path = list(uploaded.keys())[0]
        text = load_document(file_path)
        index, chunks = build_index(text)
        print("âœ… New document loaded!")
        continue

    retrieved_chunks = retrieve(question, index, chunks)
    context = "\n\n".join(retrieved_chunks)

    final_prompt = f"""
Context:
{context}

Question:
{question}

Answer:
"""

    answer = call_llm(final_prompt)

    print("\nðŸ¤– Answer:\n")
    print(answer)


Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


âœ… Embedding model loaded
Enter your OpenRouter API Key: Â·Â·Â·Â·Â·Â·Â·Â·Â·Â·
ðŸ“‚ Upload your document (PDF or DOCX)


Saving LLM_Practice_Document_Sample.pdf to LLM_Practice_Document_Sample (2).pdf
ðŸ“„ Loading document...
ðŸ“¦ Building vector index...
âœ… System Ready! Ask unlimited questions.
Type 'exit' to stop.
Type 'reload' to upload a new document.

Ask a question: What compliance requirements are discussed?

ðŸ¤– Answer:

The context mentions that governance policies define **strict access controls, encryption standards, and compliance frameworks that are aligned with regulatory bodies**. These are the compliance requirements discussed.


KeyboardInterrupt: Interrupted by user