In [2]:
!pip install gradio langchain PyPDF2 langchain-google-genai google-generativeai faiss-cpu


Collecting gradio
  Downloading gradio-5.13.2-py3-none-any.whl.metadata (16 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.9-py3-none-any.whl.metadata (3.6 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.7-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.6.0 (from gradio)
  Downloading gradio_client-1.6.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub

In [3]:
pip install langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.16-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.16 (from langchain-community)
  Downloading langchain-0.3.17-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.32 (from langchain-community)
  Downloading langchain_core-0.3.33-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [4]:
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import gradio as gr

In [5]:
# Configure Google Generative AI with API Key
genai.configure(api_key='AIzaSyBzMkVhgBuNIL0VVgbIFUXhlbta3e8kmoI')

In [6]:
# Function to extract text from uploaded PDF files
def get_pdf_text(pdf_files):
    text = ''
    for pdf in pdf_files:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

In [7]:
# Function to split extracted text into chunks
def get_text_chunk(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

In [8]:
# Function to create and save a FAISS index from text chunks
def get_vector(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    os.makedirs('faiss_index', exist_ok=True)
    vector_store.save_local('faiss_index')

In [9]:
# Function to set up a conversational question-answering chain
def get_conversation_chain():
    prompt_template = """
    Answer the question as detailed as possible from the provided context. Make sure to provide all the details.
    If the answer is not in the context, just say, "Answer is not available in the context"; don't provide the wrong answer.

    Context:
    {context}

    Question:
    {question}

    Answer:
    """
    model = ChatGoogleGenerativeAI(model='gemini-pro', temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
    chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)
    return chain

In [10]:
# Main processing function
def process_pdfs(pdfs):
    raw_text = get_pdf_text(pdfs)
    text_chunks = get_text_chunk(raw_text)
    get_vector(text_chunks)
    return "FAISS index created successfully. You can now ask questions about your PDF!"

In [11]:
# Function to handle user question input
def answer_question(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
    # Check if FAISS index exists
    if not os.path.exists('faiss_index/index.faiss'):
        return "FAISS index not found. Please upload and process the PDFs first."
            # Load FAISS index
    new_db = FAISS.load_local('faiss_index', embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)

    # Generate response using the conversational chain
    chain = get_conversation_chain()
    response = chain({'input_documents': docs, 'question': user_question}, return_only_outputs=True)

    return response['output_text']


In [12]:
# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Chat with PDF using Gemini")

    with gr.Tab("Upload PDFs"):
        pdf_input = gr.File(file_types=['.pdf'], file_count="multiple", label="Upload PDF Files")
        process_button = gr.Button("Process PDFs")
        process_output = gr.Textbox(label="Processing Status")

        process_button.click(process_pdfs, inputs=pdf_input, outputs=process_output)

    with gr.Tab("Ask Questions"):
        # Corrected indentation for elements within this tab
        user_question = gr.Textbox(label="Ask a question")
        ask_button = gr.Button("Submit")
        answer_output = gr.Textbox(label="Answer")

        ask_button.click(answer_question, inputs=user_question, outputs=answer_output)

# Launch Gradio App
demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://de4fa624d00276e2a0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


