Install Packages

In [None]:
!pip install streamlit
!pip install PyPDF2
!pip install langchain
!npm install localtunnel
!pip install openai
!pip install tiktoken
!pip install faiss-cpu
!sudo apt install tesseract-ocr
!pip install pytesseract
!pip install chromadb
!pip install pypdf
!pip install chromadb
!pip install streamlit_chat

Enter API KEY

In [6]:
import os
os.environ['OPENAI_API_KEY'] = 'API KEY'

app.py

In [None]:
%%writefile app.py


import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback

from langchain.vectorstores import Chroma
from langchain import OpenAI, VectorDBQA
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader

from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)
from streamlit_chat import message

import pytesseract
import shutil
import random
try:
 from PIL import Image
except ImportError:
 import Image

def pdf():
  #load_dotenv()
  #st.set_page_config(page_title="Ask your PDF")
  st.header("Ask your PDF 💬")
    
  # upload file
  pdf = st.file_uploader("Upload your PDF", type="pdf", label_visibility="hidden")
  
  
  # extract the text
  if pdf is not None:
    with open(pdf.name, "wb") as f:
      f.write(pdf.getbuffer())
    
    # Example path components
    directory = "/content"
    filename = pdf.name

    # Join the directory and filename
    full_path = os.path.join(directory, filename)

    loader =  PyPDFLoader(full_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings()
    docsearch = Chroma.from_documents(texts, embeddings)
    qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=docsearch, return_source_documents=True)

    st.divider()
    user_question = st.text_input("Ask a question about your PDF:", label_visibility="hidden")
    if user_question:
      st.session_state.messages.append(HumanMessage(content=user_question))
      result = qa({"query": user_question})
      #st.write(result['result'])
      st.session_state.messages.append(
                AIMessage(content=result['result']))
      with st.expander("See source"):
        st.write(result['source_documents'])
      

def img():
  st.header("Ask your Image 💬")
    
  # upload file
  image = st.file_uploader(label = "Upload your image here",type=['png','jpg','jpeg'])

  if image is not None:
    text = pytesseract.image_to_string(Image.open(image))
    qna(text)



def qna(text):
  # split into chunks
  text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
  )
  chunks = text_splitter.split_text(text)
  
  # create embeddings
  embeddings = OpenAIEmbeddings()
  knowledge_base = FAISS.from_texts(chunks, embeddings)
  
  st.divider()
  # show user input
  user_question = st.text_input("Ask a question:")
  if user_question:
    st.session_state.messages.append(HumanMessage(content=user_question))
    docs = knowledge_base.similarity_search(user_question)
    
    llm = OpenAI()
    chain = load_qa_chain(llm, chain_type="stuff")
    with get_openai_callback() as cb:
      response = chain.run(input_documents=docs, question=user_question)
      print(cb)
        
    #st.write(response)
    st.session_state.messages.append(
                AIMessage(content=response))


if "messages" not in st.session_state:
  st.session_state.messages = [
      SystemMessage(content="You are a helpful assistant.")
  ]



page_names = ['PDF', 'Image']

#st.title('Quetion Answering ChatBot')
#st.divider()
with st.sidebar:
  st.header("Choose file format")
  page = st.radio("File Format",page_names, label_visibility="hidden")
  st.divider()

  if page == 'PDF':
    pdf()
    
  if page == 'Image':
    img()

# display message history
messages = st.session_state.get('messages', [])
for i, msg in enumerate(messages[1:]):
  if i % 2 == 0:
      message(msg.content, is_user=True, key=str(i) + '_user')
  else:
      message(msg.content, is_user=False, key=str(i) + '_ai')



IP for Local Tunnel

In [None]:
!wget -q -O - ipv4.icanhazip.com

Launching app.py

In [None]:
!streamlit run app.py & npx localtunnel --port 8501