In [None]:
!pip install PyPDF2 python-dotenv streamlit openai langchain

Collecting openai
  Downloading openai-1.30.1-py3-none-any.whl (320 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.6/320.6 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.1.20-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)
Collecting langchain-community<0.1,>=0.0.38 (from langchain)
  Downloading langchain_community-0.0.38-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langch

In [None]:
pip install --upgrade streamlit-extras

Collecting streamlit-extras
  Downloading streamlit_extras-0.4.2-py3-none-any.whl (70 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/70.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.5/70.5 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting htbuilder>=0.6.2 (from streamlit-extras)
  Downloading htbuilder-0.6.2-py3-none-any.whl (12 kB)
Collecting markdownlit>=0.0.5 (from streamlit-extras)
  Downloading markdownlit-0.0.7-py3-none-any.whl (15 kB)
Collecting st-annotated-text>=3.0.0 (from streamlit-extras)
  Downloading st_annotated_text-4.0.1-py3-none-any.whl (9.0 kB)
Collecting streamlit-camera-input-live>=0.2.0 (from streamlit-extras)
  Downloading streamlit_camera_input_live-0.2.0-py3-none-any.whl (6.6 kB)
Collecting streamlit-card>=0.0.4 (from streamlit-extras)
  Downloading streamlit_card-1.0.2-py3-none-any.whl (680 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m680.8/68

In [None]:
pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.7.0


In [None]:
%%writefile app.py
import streamlit as st
from dotenv import load_dotenv
import pickle
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os

#sidebar contents
with st.sidebar:
  st.title("LLM Chat App")
  st.markdown('''
  ###About

  -Import Your Pdf


  -Ask Your Questions from the Pdf

  ''')


def main():
  st.header("PDF Information Extraction App")

  load_dotenv()

  #upload a pdf file
  pdf=st.file_uploader("Upload Your Pdf",type="pdf")

  if pdf is not None:
    pdf_reader=PdfReader(pdf)

    text=""
    for page in pdf_reader.pages:
      text += page.extract_text()

    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=200,
        length_function=len
        )
    chunks=text_splitter.split_text(text=text)

    #embeddings

    store_name=pdf.name[:-4]
    if os.path.exists(f"{store_name}.pkl"):
      with open(f"{store_name}.pkl","rb") as f:
        vector_store=pickle.load(f)
    else:
      embeddings=OpenAIEmbeddings()
      vector_store=FAISS.from_texts(chunks,embedding=embeddings)
      with open(f"{store_name}.pkl","wb") as f:
        pickle.dump(vector_store,f)

    #accept user questions/query
    query = st.text_input("Ask Questions About Your PDF File:")

    if query:
      docs=vector_store.similarity_search(query=query,k=3)

      llm=OpenAI(model_name="gpt-3.5-turbo")
      chain=load_qa_chain(llm=llm,chain_type="stuff")
      with get_openai_callback()as cb:
        response=chain.run(input_documents=docs,question=query)
        print(cb)
      st.write(response)






if __name__ =='__main__':
    main()

Overwriting app.py


In [None]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.201.252.99:8501[0m
[0m
[K[?25hnpx: installed 22 in 2.244s
your url is: https://light-beans-mix.loca.lt

>> from langchain.embeddings import OpenAIEmbeddings

with new imports of:

>> from langchain_community.embeddings import OpenAIEmbeddings

  warn_deprecated(

>> from langchain.vectorstores import FAISS

with new imports of:

>> from langchain_community.vectorstores import FAISS

  warn_deprecated(

`from langchain_community.llms import OpenAI`.

To install langchain-community run `pip install -U langchain-community`.
  warn_deprecated(
2024-05-16 02:08:03.559 Uncaught app exception
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/streamlit/runtime/scriptrunner/script_runner.py", line 600

Here's the documentation for your `app.py` script:
    
### Explanation of the Script:

1. **Imports:**
   - `streamlit as st`: Used to create the Streamlit web application.
   - `load_dotenv`: Loads environment variables from a `.env` file.
   - `pickle`: Used for serializing and deserializing Python objects.
   - `PdfReader` from `PyPDF2`: Reads and extracts text from PDF files.
   - `add_vertical_space`: Adds vertical space in the Streamlit app.
   - `RecursiveCharacterTextSplitter` from `langchain.text_splitter`: Splits text into manageable chunks.
   - `OpenAIEmbeddings` from `langchain.embeddings.openai`: Generates embeddings using OpenAI.
   - `FAISS` from `langchain.vectorstores`: A library for efficient similarity search.
   - `OpenAI` from `langchain.llms`: Interacts with OpenAI's language models.
   - `load_qa_chain` from `langchain.chains.question_answering`: Loads a question-answering chain.
   - `get_openai_callback` from `langchain.callbacks`: Retrieves callback data for OpenAI interactions.
   - `os`: Provides functions for interacting with the operating system.

2. **Environment Variable Loading:**
   - `load_dotenv()`: Loads environment variables from a `.env` file into the application's environment.

3. **Sidebar Contents:**
   - The sidebar contains a title and a brief description of the application.

4. **Main Function (`main`):**
   - Displays the main header of the application.
   - Allows users to upload a PDF file.
   - Reads and extracts text from the uploaded PDF.
   - Splits the extracted text into chunks.
   - Checks if a precomputed vector store exists; if not, it generates embeddings and creates a new vector store.
   - Allows users to input a query.
   - Searches for similar documents based on the query.
   - Initializes the language model and question-answering chain.
   - Generates and displays a response to the user's query.

5. **Execution Block:**
   - Ensures the `main` function is executed when the script is run directly.

### Instructions for Use:

1. **Install Dependencies:**
   Ensure you have all the required libraries installed. You can install them using pip:
   ```bash
   pip install streamlit python-dotenv PyPDF2 langchain streamlit-extras
   ```

2. **Create a `.env` File:**
   Create a `.env` file in the same directory as `app.py` with your OpenAI API key:
   ```
   OPENAI_API_KEY=your_openai_api_key
   ```

3. **Run the Application:**
   Run the Streamlit application using the following command:
   ```bash
   streamlit run app.py
   ```

4. **Upload and Query PDF:**
   - Use the file uploader in the web app to upload a PDF file.
   - Enter your questions in the text input field to query the PDF content.

This documentation provides a comprehensive overview of the script, its functionality, and instructions for setting up and running the application.