# RAG application built on Gemini (Dynamic Multi-PDF Input)

You can provide multiple `pdf_urls` or local paths; all will be loaded, chunked, embedded, and queried together.

In [None]:
from langchain_community.document_loaders import PyPDFLoader

# 'pdf_paths' may be defined later (multi). Fallback to single pdf_path.
if 'pdf_paths' in globals():
    all_data = []
    for p in pdf_paths:
        loader = PyPDFLoader(str(p))
        all_data.extend(loader.load())
    data = all_data
else:
    loader = PyPDFLoader(str(pdf_path))
    data = loader.load()  # entire PDF is loaded as a single Document

In [None]:
# Construct list of PDF paths from environment (PDF_PATHS) if provided
import os, pathlib
if 'pdf_paths' not in globals():
    env_multi = os.getenv('PDF_PATHS','')
    pdf_paths = []
    if env_multi:
        for part in env_multi.split('|'):
            p = pathlib.Path(part)
            if p.exists():
                pdf_paths.append(p)
    # Ensure at least the single pdf_path if available
    if 'pdf_path' in globals() and pdf_path not in pdf_paths:
        pdf_paths = [pdf_path] + pdf_paths
print(f"PDF count for embedding: {len(pdf_paths)}")
for p in pdf_paths: print(' -', p)

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(str(pdf_path))
data = loader.load()  # entire PDF is loaded as a single Document

In [19]:
len(data)

114

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  115


In [21]:
docs[7]

Document(metadata={'producer': 'Adobe PDF Library 20.6.74', 'creator': 'Acrobat PDFMaker 20 for PowerPoint', 'creationdate': '2024-09-23T21:04:11+06:00', 'moddate': '2025-01-08T07:46:38+06:00', 'title': 'Theory of Computation', 'author': 'Md Mosaddek Khan', 'source': 'Automata.pdf', 'total_pages': 114, 'page': 7, 'page_label': '8'}, page_content='Basics of Strings \nMMK@CSEDU')

In [None]:
# Use a per-multi-PDF persist directory (hash of filenames + sizes)
from hashlib import sha1
sig = ';'.join(sorted(f"{p.name}:{p.stat().st_size}" for p in pdf_paths if p.exists()))
persist_dir = f"chroma_store_{sha1(sig.encode()).hexdigest()[:8]}"
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"),
    persist_directory=persist_dir,
)
vectorstore.persist()
print(f"Vector store persisted to: {persist_dir} for {len(pdf_paths)} PDF(s)")

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})  # reduce k to cut context tokens
print(f"Retriever ready for {len(pdf_paths)} PDF(s)")
retrieved_docs = retriever.invoke("What is an automata? Explain with an example.")

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})  # reduce k to cut context tokens
print(f"Retriever ready for: {pdf_path}")
retrieved_docs = retriever.invoke("What is an automata? Explain with an example.")

In [25]:
len(retrieved_docs)

5

In [30]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os

# Prefer a lighter / cheaper model by default to reduce quota exhaustion.
# You can override via environment variable GEMINI_CHAT_MODEL.
model_name = os.getenv("GEMINI_CHAT_MODEL", "gemini-2.0-flash")

llm = ChatGoogleGenerativeAI(
    model=model_name,
    temperature=0.3,
    max_output_tokens=400,  # slightly lower to save quota
)
print(f"Using chat model: {model_name}")

Using chat model: gemini-2.0-flash


In [31]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [32]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

print("RAG chain ready (retriever k=5)")

RAG chain ready (retriever k=5)


In [33]:
import time
from google.api_core.exceptions import ResourceExhausted

QUERY = "What is an automata? Explain with an example."

def safe_ask(query: str, retries: int = 3):
    for attempt in range(1, retries + 1):
        try:
            return rag_chain.invoke({"input": query})
        except ResourceExhausted as e:
            if attempt == retries:
                raise
            # Extract suggested server backoff if present, else exponential fallback
            delay = 5 * attempt
            print(f"Quota hit (attempt {attempt}/{retries}). Backing off {delay}s...")
            time.sleep(delay)

response = safe_ask(QUERY)
print(response["answer"])

An automata, also known as a Finite State Machine, consists of states and transitions (edges) between these states. An edge label defines the move from one state to another. There are three types of Finite Automata: Deterministic Finite Automata (DFA), Non-deterministic Finite Automata (NFA), and Finite Automata with ε-transitions (ε-NFA).



In [None]:
# Ask arbitrary questions against the loaded PDF
USER_QUERY = "Summarize the main topic of this document."  # change this
resp = safe_ask(USER_QUERY)
print(resp["answer"])