# Install Required Packages

First, we need to install all the necessary packages that our application depends on. This includes Streamlit for creating the web interface, PyPDF2 for PDF manipulation, langchain for natural language processing tasks, and pyngrok for exposing the local server to the internet.

In [1]:
!pip install streamlit
!pip install google-generativeai
!pip install python-dotenv
!pip install langchain
!pip install PyPDF2
!pip install faiss-cpu
!pip install langchain_google_genai
!pip install pyngrok
!pip install -U langchain-community



# Import Necessary Libraries

Import all the libraries that will be used in the application. This includes Streamlit for building the web app, PDF-related libraries for handling PDF files, langchain for text processing and question answering, Google Generative AI for conversational AI capabilities, FAISS for vector similarity search, and dotenv for handling environment variables.

In [9]:
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from pyngrok import ngrok

# Set Up Environment Variables

In this case, we set the Google API key for using Google Generative AI services.

**We need to Replace "YOUR_GOOGLE_API_KEY_HERE" with our actual Google API key obtained from the Google Cloud Console.**

In [10]:
load_dotenv()
os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API_KEY_HERE"
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Define Functions for Text Processing and Question Answering

Define functions that handle PDF text extraction, chunking of text, creating vector stores for FAISS, setting up conversational AI models, user input processing, and main application logic.

In [11]:
def get_pdf_text(pdf_docs):
    # Function to extract text from PDF documents
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    # Function to split text into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

def get_vector_store(text_chunks):
    # Function to create FAISS vector store from text chunks
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")

def get_conversational_chain():
    # Function to set up conversational AI model using Google Generative AI
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer.\n\n
    Context:\n {context}\n
    Question:\n {question}\n

    Answer:
    """
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain

def user_input(user_question):
    # Function to process user input and return AI-generated response
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)
    chain = get_conversational_chain()
    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
    return response["output_text"]

def main():
    # Function to define the main Streamlit application
    st.set_page_config(page_title="Chat PDF")
    st.header("Chat with PDF")
    user_question = st.text_input("Ask a Question from the PDF Files")
    if user_question:
        response = user_input(user_question)
        st.write("Reply: ", response)
    with st.sidebar:
        st.title("Upload")
        pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
        if st.button("Submit & Process"):
            with st.spinner("Processing..."):
                raw_text = get_pdf_text(pdf_docs)
                text_chunks = get_text_chunks(raw_text)
                get_vector_store(text_chunks)
                st.success("Done")

if __name__ == "__main__":
    main()

# Install ngrok and Authenticate

Ngrok is used to expose your local Streamlit server to the internet.

**We need to Replace "YOUR_NGROK_AUTHTOKEN_HERE" with our actual ngrok authtoken obtained from the ngrok dashboard.**

In [12]:
!ngrok authtoken YOUR_NGROK_AUTHTOKEN_HERE

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


# Connect to ngrok and Run the Streamlit App

Connect to ngrok and start a tunnel to expose the Streamlit app to the internet. This allows us to access our app from anywhere.

In [15]:
public_url = ngrok.connect(8501)
print(f"Public URL: {public_url}")

Public URL: NgrokTunnel: "https://9a18-34-19-22-175.ngrok-free.app" -> "http://localhost:8501"


# Write the Streamlit App Code to a File

Write the entire Streamlit app code to a file named app.py. This file contains the Streamlit application logic which will be executed to run our web application.

In [14]:
with open("app.py", "w") as f:
        f.write("""
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
import os

# Set up environment
load_dotenv()
os.environ["GOOGLE_API_KEY"] = "AIzaSyCoeubPB9VPTJJgMC6hPhkwA6hbHGLX0ZY"
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")

def get_conversational_chain():
    prompt_template = \"""
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer.\n\n
    Context:\n {context}\n
    Question:\n {question}\n

    Answer:
    \"""
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain

def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)
    chain = get_conversational_chain()
    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
    return response["output_text"]

def main():
    st.set_page_config(page_title="Chat PDF")
    st.header("Chat with PDF")
    user_question = st.text_input("Ask a Question from the PDF Files")
    if user_question:
        response = user_input(user_question)
        st.write("Reply: ", response)
    with st.sidebar:
        st.title("Upload")
        pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
        if st.button("Submit & Process"):
            with st.spinner("Processing..."):
                raw_text = get_pdf_text(pdf_docs)
                text_chunks = get_text_chunks(raw_text)
                get_vector_store(text_chunks)
                st.success("Done")

if __name__ == "__main__":
    main()
""")

# Run the Streamlit App

In [None]:
import subprocess
subprocess.run(["streamlit", "run", "app.py"])