In [1]:
# Install required libraries
!pip install -q langchain langchain-community langchain-google-genai chromadb PyMuPDF tiktoken huggingface-hub



[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.8/47.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m73.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m66.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00

In [2]:
import os
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings


In [3]:
import yaml

# Load the config.yml file
with open("api_keys (1).yml", "r") as file:
    config = yaml.safe_load(file)

# Set the key to environment variable
import os
os.environ["GOOGLE_API_KEY"] = config["GEMINI_API_KEY"]


In [14]:
from google.colab import files

uploaded = files.upload()  # Choose your PDF file from your system



Saving attention_paper.pdf to attention_paper (4).pdf
Saving resnet_paper.pdf to resnet_paper.pdf
Saving vision_transformer.pdf to vision_transformer.pdf


In [15]:
# Check if files were uploaded
pdf_path = list(uploaded.keys())
if uploaded:
    # Get all uploaded filenames
    pdf_paths = list(uploaded.keys())

    # Print the uploaded file names
    print("Uploaded files:")
    for path in pdf_paths:
        print("-", path)
else:
    print("No files uploaded.")

Uploaded files:
- attention_paper (4).pdf
- resnet_paper.pdf
- vision_transformer.pdf


In [20]:
# # Load the uploaded PDF
# loader = PyMuPDFLoader(pdf_path)
# docs = loader.load()


# Initialize a list to hold all documents
all_docs = []

# Loop through uploaded files
for filename in uploaded.keys():
    loader = PyMuPDFLoader(filename)
    docs = loader.load()
    all_docs.extend(docs)


In [29]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(all_docs)

In [30]:
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

persist_directory = "/content/chroma_db"


In [31]:
# if os.path.exists(persist_directory):
#     shutil.rmtree(persist_directory)
vectordb = Chroma.from_documents(chunks, embedding=embedding, persist_directory="./chroma_db")
vectordb.persist()





In [32]:
# Retriever with top 3 documents
#1. Simple Cosine Similarity Retrieval
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

# RAG Chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)


In [33]:
query = "Summarize key points about transformer models in NLP."

result = qa_chain(query)

print(" Answer:\n")
print(result["result"])

print("\n📄 Top 3 Source Documents:")
for i, doc in enumerate(result["source_documents"]):
    print(f"\nSource {i+1}:")
    print(doc.metadata.get("source", "No source info"))
    print(doc.page_content[:500], "...\n")


 Answer:

Transformer models have become the leading architecture in natural language processing (NLP).  Their success is due to both their scalability and the use of large-scale self-supervised pre-training.  A common approach involves pre-training on a large text corpus, followed by fine-tuning on a smaller, task-specific dataset.  They are also noted for surpassing the performance of convolutional networks while using fewer computational resources.

📄 Top 3 Source Documents:

Source 1:
vision_transformer.pdf
Transformers show impressive performance on NLP tasks. However, much of their success stems
not only from their excellent scalability but also from large scale self-supervised pre-training (Devlin
8 ...


Source 2:
vision_transformer.pdf
results compared to state-of-the-art convolutional networks while requiring sub-
stantially fewer computational resources to train.1
1
INTRODUCTION
Self-attention-based architectures, in particular Transformers (Vaswani et al., 2017), have becom

In [34]:
#MMR (Max Marginal Relevance)
retriever_mmr = vectordb.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 3, "fetch_k": 10}
)


In [35]:
# Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

# RAG Chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever_mmr, return_source_documents=True)

In [37]:
query = '''Summarize key points about transformer models in NLP.'''

result = qa_chain(query)

print("🔍 Answer:\n")
print(result["result"])

print("\n📄 Top 3 Source Documents:")
for i, doc in enumerate(result["source_documents"]):
    print(f"\nSource {i+1}:")
    print(doc.metadata.get("source", "No source info"))
    print(doc.page_content[:500], "...\n")

🔍 Answer:

Transformer models have become the leading architecture in natural language processing (NLP).  Their success is due to both their scalability and the use of large-scale self-supervised pre-training, followed by fine-tuning on smaller, task-specific datasets.  They have shown superior performance compared to previous state-of-the-art models, such as convolutional networks and recurrent neural networks (RNNs like LSTMs and GRUs), often with less computational cost during training.

📄 Top 3 Source Documents:

Source 1:
vision_transformer.pdf
Transformers show impressive performance on NLP tasks. However, much of their success stems
not only from their excellent scalability but also from large scale self-supervised pre-training (Devlin
8 ...


Source 2:
vision_transformer.pdf
results compared to state-of-the-art convolutional networks while requiring sub-
stantially fewer computational resources to train.1
1
INTRODUCTION
Self-attention-based architectures, in particular Transfor

In [38]:
for doc in chunks:
    doc.metadata["type"] = "pdf"



In [39]:
vectordb = Chroma.from_documents(
    documents=chunks,
    embedding=embedding,
    persist_directory="./chroma_db"
)
vectordb.persist()


In [40]:
vectordb = Chroma(persist_directory="./chroma_db", embedding_function=embedding)

retriever_hybrid = vectordb.as_retriever(
    search_kwargs={
        "k": 3,
        "filter": {"type": "pdf"}
    }
)

  vectordb = Chroma(persist_directory="./chroma_db", embedding_function=embedding)


In [41]:
# Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

# RAG Chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever_hybrid, return_source_documents=True)

In [42]:
query = "Summarize key points about transformer models in NLP."

result = qa_chain(query)

print("🔍 Answer:\n")
print(result["result"])

print("\n📄 Top 3 Source Documents:")
for i, doc in enumerate(result["source_documents"]):
    print(f"\nSource {i+1}:")
    print(doc.metadata.get("source", "No source info"))
    print(doc.page_content[:500], "...\n")

🔍 Answer:

Transformer models have become the leading architecture in natural language processing (NLP).  Their success is due to a combination of scalability and large-scale self-supervised pre-training.  A common approach involves pre-training on a large text corpus, followed by fine-tuning on a smaller, task-specific dataset.  They are also noted for surpassing the performance of convolutional networks while using fewer computational resources.

📄 Top 3 Source Documents:

Source 1:
vision_transformer.pdf
Transformers show impressive performance on NLP tasks. However, much of their success stems
not only from their excellent scalability but also from large scale self-supervised pre-training (Devlin
8 ...


Source 2:
vision_transformer.pdf
results compared to state-of-the-art convolutional networks while requiring sub-
stantially fewer computational resources to train.1
1
INTRODUCTION
Self-attention-based architectures, in particular Transformers (Vaswani et al., 2017), have become
th

In [43]:
!pip install -U cohere==5.13.6 langchain langchain-cohere




Collecting cohere==5.13.6
  Downloading cohere-5.13.6-py3-none-any.whl.metadata (3.5 kB)
Collecting langchain-cohere
  Downloading langchain_cohere-0.4.4-py3-none-any.whl.metadata (6.6 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere==5.13.6)
  Downloading fastavro-1.12.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (5.7 kB)
Collecting httpx-sse==0.4.0 (from cohere==5.13.6)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting parameterized<0.10.0,>=0.9.0 (from cohere==5.13.6)
  Downloading parameterized-0.9.0-py2.py3-none-any.whl.metadata (18 kB)
Collecting types-requests<3.0.0,>=2.0.0 (from cohere==5.13.6)
  Downloading types_requests-2.32.4.20250611-py3-none-any.whl.metadata (2.1 kB)
Collecting types-pyyaml<7.0.0.0,>=6.0.12.20240917 (from langchain-cohere)
  Downloading types_pyyaml-6.0.12.20250516-py3-none-any.whl.metadata (1.8 kB)
Downloading cohere-5.13.6-py3-none-any.whl (250 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━

In [44]:
# Load API keys
with open("api_keys (1).yml", "r") as f:
    config = yaml.safe_load(f)

In [45]:
from langchain_cohere import CohereRerank

reranker = CohereRerank(
    cohere_api_key=config["COHERE_API_KEY"],
    model = "rerank-english-v3.0",
    top_n=3
)

retriever_rerank = vectordb.as_retriever(search_kwargs={"k": 10})

# Use reranker with retriever
from langchain.retrievers import ContextualCompressionRetriever

compression_retriever = ContextualCompressionRetriever(
    base_compressor=reranker,
    base_retriever=retriever_rerank
)


In [46]:
# Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

# RAG Chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=compression_retriever, return_source_documents=True)

In [49]:
query = "Summarize transformers model in NLP."

result = qa_chain(query)

print(" Answer:\n")
print(result["result"])

print("\n Top 3 Source Documents:")
for i, doc in enumerate(result["source_documents"]):
    print(f"\nSource {i+1}:")
    print(doc.metadata.get("source", "No source info"))
    print(doc.page_content[:500], "...\n")

 Answer:

In natural language processing (NLP), Transformer models, particularly those based on the self-attention mechanism, have become the leading architecture.  A common approach involves pre-training a Transformer on a massive text corpus and then fine-tuning it on a smaller, task-specific dataset.  This approach leads to impressive performance, exceeding that of convolutional networks while using fewer computational resources during training.

 Top 3 Source Documents:

Source 1:
vision_transformer.pdf
results compared to state-of-the-art convolutional networks while requiring sub-
stantially fewer computational resources to train.1
1
INTRODUCTION
Self-attention-based architectures, in particular Transformers (Vaswani et al., 2017), have become
the model of choice in natural language processing (NLP). The dominant approach is to pre-train on
a large text corpus and then ﬁne-tune on a smaller task-speciﬁc dataset (Devlin et al., 2019). Thanks ...


Source 2:
vision_transformer.pdf


In [50]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m45.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [51]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.13-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.2.13-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.13


In [52]:
%%writefile app.py
import streamlit as st
import os
import yaml
import asyncio

# # Fix RuntimeError: There is no current event loop
try:
    asyncio.get_running_loop()
except RuntimeError:
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)

from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain.retrievers import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

# Load API keys
with open("api_keys (1).yml", "r") as f:
    config = yaml.safe_load(f)

os.environ["GOOGLE_API_KEY"] = config["GEMINI_API_KEY"]

# UI
st.set_page_config(page_title="Gemini RAG App", layout="wide")
st.title(" Gemini PDF RAG App")

uploaded_files = st.file_uploader("Upload one or more PDFs", type=["pdf"], accept_multiple_files=True)

if uploaded_files:
    all_chunks = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

    for uploaded_file in uploaded_files:
        with open(uploaded_file.name, "wb") as f:
            f.write(uploaded_file.getbuffer())

        loader = PyMuPDFLoader(uploaded_file.name)
        docs = loader.load()
        # Add filename to each doc's metadata before splitting
        for doc in docs:
            doc.metadata["source"] = uploaded_file.name  # Add file name as source

        # Split into chunks
        chunks = splitter.split_documents(docs)

        for doc in chunks:
            doc.metadata["type"] = "pdf"

        # Add custom metadata to each chunk
        for chunk in chunks:
            chunk.metadata["type"] = "pdf"
            # You can also add page info if available
            chunk.metadata["source"] = f"{uploaded_file.name} - Page {chunk.metadata.get('page', 'N/A')}"

        all_chunks.extend(chunks)

    # Embed & store
    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectordb = Chroma.from_documents(all_chunks, embedding=embedding, persist_directory="./chroma_db")
    vectordb.persist()

    retriever = vectordb.as_retriever(search_kwargs={"k": 10})

    # Optional Reranker
    reranker = CohereRerank(
        cohere_api_key=config["COHERE_API_KEY"],
        model="rerank-english-v3.0",
        top_n=3
    )
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=reranker,
        base_retriever=retriever
    )

    # Gemini LLM
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

    # RAG
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=compression_retriever,
        return_source_documents=True
    )

    # Input Query
    query = st.text_input("Ask a question about the documents:")

    if query:
        with st.spinner("Generating answer..."):
            result = qa_chain.invoke({"query": query})

            st.subheader("Answer")
            st.write(result["result"])

            st.subheader("Top Source Documents")
            for i, doc in enumerate(result["source_documents"]):
                st.markdown(f"**Source {i+1} - {doc.metadata['source']}**")
                st.markdown(doc.page_content)



Writing app.py


In [53]:
# Start Streamlit in background
!streamlit run app.py --server.port 8989 > /dev/null 2>&1 &


In [54]:
!curl http://localhost:8989


<!--
 Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta
      name="viewport"
      content="width=device-width, initial-scale=1, shrink-to-fit=no"
    />
    <link rel="shortcut icon" href="./favicon.png" />
    <link
      rel="preload"
      href="./static/media/SourceSansVF-Upright.ttf.BsWL4Kly.woff2"
      as="font"
      type="font/woff2"
      crossorig

In [61]:
from pyngrok import ngrok

# Kill existing tunnels (optional safety)
ngrok.kill()

# Start new tunnel
public_url = ngrok.connect(8989)
print("🔗 Public URL:", public_url)


ERROR:pyngrok.process.ngrok:t=2025-08-04T05:26:21+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-08-04T05:26:21+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-08-04T05:26:21+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [64]:
from pyngrok import ngrok
import yaml

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken
# Get your authtoken from `ngrok_credentials.yml` file
with open('api_keys (1).yml', 'r') as file:
     NGROK_AUTH_TOKEN = yaml.safe_load(file)
ngrok.set_auth_token(config['NGORK_AUTH_TOKEN'])

# Open an HTTPs tunnel on port XXXX which you get from your `logs.txt` file
ngrok_tunnel = ngrok.connect(8989)
print("Streamlit App:", ngrok_tunnel.public_url)

Streamlit App: https://d7834cc74af7.ngrok-free.app
