<a href="https://colab.research.google.com/github/RaghavG189/Python-Projects/blob/main/RAG_Pipeline_Gradio_UI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install and import necessary libraries

In [None]:
!pip install -q llama-index llama-index-llms-gemini pymupdf
!pip install -q llama-index-embeddings-huggingface
!pip install llama-index-retrievers-bm25
!pip install gradio
!pip install google-genai
!pip install llama-index-llms-google-genai google-generativeai

Import necessary libraries and setup the environment

In [None]:
from google.colab import files
from google import genai
from llama_index.llms.google_genai import GoogleGenAI
import gradio as gr
import fitz
import os
from llama_index.core import Document
from typing import List
from llama_index.llms.gemini import Gemini
from llama_index.core import Settings
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore

Set up google API key for Gemini

In [None]:
GOOGLE_API_KEY = ""
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

Load PDF and convert to llamaindex format

In [None]:
def load_pdf(pdf_path: str) -> List[Document]:
    """Load a PDF and convert it to LlamaIndex Document format using PyMuPDF."""
    doc = fitz.open(pdf_path)
    documents = []

    for i, page in enumerate(doc):
        text = page.get_text()
        if not text.strip():
            continue
        documents.append(
            Document(
                text=text,
                metadata={
                    "file_name": os.path.basename(pdf_path),
                    "page_number": i + 1,
                    "total_pages": len(doc)
                }
            )
        )
    doc.close()
    print(f"Processed {pdf_path}:")
    print(f"Extracted {len(documents)} pages with content")
    return documents

Initialize Gemini and Embedding Model

In [None]:
# Initialize Gemini LLM
llm = GoogleGenAI(model="gemini-2.0-flash")

# Set as default in LlamaIndex
Settings.llm = llm

# Initialize embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
Settings.embed_model = embed_model
splitter = SemanticSplitterNodeParser( # Creates semantic splitter with embedding model
    buffer_size = 1,
    breakpoint_percentile_threshold = 95, # How sensitive to change in meaning
    embed_model = embed_model

)

Processes PDF and creates vector and keyword indices

In [None]:
def process_and_index_pdf(pdf_path):
    documents = load_pdf(pdf_path)
    nodes = splitter.get_nodes_from_documents(documents)
    vector_index = VectorStoreIndex(nodes)
    print(f"Indexed {len(documents)} document chunks")
    return vector_index

Build RAG Pipeline

In [None]:
def build_rag_pipeline(index):
    nodes = list(index.docstore.docs.values()) # Gets all chunks of text that were created when PDF was indexed
    num_nodes = len(nodes) # Stores how many chunks there are
    safe_top_k = min(2, max(1, num_nodes)) # Retrieves the minimum value for top k

    vector_retriever = index.as_retriever(similarity_top_k=safe_top_k) # Uses embeddings to find chunks that are semantically similar
    bm25_retriever = BM25Retriever.from_defaults( # Uses keyword search to find exact terms in chunks found in the query
        nodes=nodes,
        similarity_top_k=safe_top_k
    )

    class HybridRetriever(BaseRetriever): #Custom class to combine both vector and keyword search
        def __init__(self, vector_retriever, keyword_retriever, top_k=2):
            self.vector_retriever = vector_retriever
            self.keyword_retriever = keyword_retriever
            self.top_k = top_k
            super().__init__()

        def _retrieve(self, query_bundle, **kwargs):
            vector_nodes = self.vector_retriever.retrieve(query_bundle)
            keyword_nodes = self.keyword_retriever.retrieve(query_bundle)
            all_nodes = list(vector_nodes) + list(keyword_nodes)
            unique_nodes = {node.node_id: node for node in all_nodes}
            sorted_nodes = sorted(
                unique_nodes.values(),
                key=lambda x: x.score if hasattr(x, 'score') else 0.0,
                reverse=True
            )
            return sorted_nodes[:self.top_k]

    hybrid_retriever = HybridRetriever( # Creates instance of class defined above
        vector_retriever=vector_retriever,
        keyword_retriever=bm25_retriever,
        top_k=safe_top_k
    )

    if num_nodes > 1:
        reranker = SentenceTransformerRerank( #Checks which chunk is most relevant to original query
            model="cross-encoder/ms-marco-MiniLM-L-12-v2", # More powerful than l-6
            top_n=min(2, num_nodes)
        )
        node_postprocessors = [reranker]
    else:
        node_postprocessors = []

    fusion_retriever = QueryFusionRetriever( #Creates multiple versions of the user's query
        retrievers=[hybrid_retriever],
        llm=llm,
        similarity_top_k=2,
        num_queries=3,  # Generate 3 queries per original query
        mode="reciprocal_rerank"
    )

    query_engine = RetrieverQueryEngine.from_args( # Takes fusion retriever and reranker and combines them
        retriever=fusion_retriever,
        llm=llm,
        node_postprocessors=node_postprocessors
    )
    return query_engine # Returns output

Run the Query

In [None]:
def rag_chat(pdf_file, user_input, history): #Function for chatting with the chatbot
    try:
        pdf_path = pdf_file.name #Gets pdf path from pdf file
        index = process_and_index_pdf(pdf_path) #Gets the vector index from the process and index function
        rag_engine = build_rag_pipeline(index) #Return rag engine object

        response = rag_engine.query(user_input) #Gets response from user_input

        history = history or [] #Creates history list

        history.append((user_input, response.response)) #Stores history between chatbot and user

        return history
    except Exception as e: #Fallback in case of error
        history = history or []

        history.append((user_input, f"⚠️ Error: {str(e)}"))

        return history

def handle_feedback(chat_history, data: gr.LikeData):
    if not chat_history:
        return chat_history  # No messages yet

    # Determine message based on like/dislike
    feedback_text = "✅ Thanks for your feedback!" if data.liked else "⚠️ Feedback noted."

    # Append the feedback text to the last bot message
    # Changes last bot reply and adds feedback text
    # chat_history[:-1] - keeps all chats instead of last one unchanged since we are modifying the recent bot's reply
    new_history = chat_history[:-1] + [
        (chat_history[-1][0], chat_history[-1][1] + f"  \n{feedback_text}") #gets users reply and bots last message
    ]
    return new_history

#Chat UI
with gr.Blocks(title="Step 5: Full Functioning UI") as demo:
    gr.Markdown("### Step 5: Connected UI")
    with gr.Row():
      with gr.Column(scale=2):
          chatbot = gr.Chatbot(label="Chat History", height=300)
          user_input = gr.Textbox(
              placeholder="Ask a question about your document...",
              label="Your Question"
          )
          #Creates send and clear button to be used in UI
          send_btn = gr.Button("📤 Send")
          clear_btn = gr.Button("🗑️ Clear Chat")

      with gr.Column(scale=1):
          pdf_input = gr.File(label="📄 Upload PDF", file_types=[".pdf"]) #Allows for pdf upload - replaces uploaded = files.upload()

      send_btn.click( #Send button to send query
          fn=rag_chat,
          inputs=[pdf_input, user_input, chatbot],
          outputs=chatbot
      )
      user_input.submit( #Press Enter to submit query - alternative
          fn=rag_chat,
          inputs=[pdf_input, user_input, chatbot],
          outputs=chatbot
      )
      clear_btn.click(lambda: [], outputs=chatbot) #Clears chat
      chatbot.like(handle_feedback, inputs=[chatbot], outputs=chatbot)

demo.launch()