In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

## Basic App: Question & Answering from a Document

In [2]:
from langchain_openai import OpenAI

In [3]:
llm = OpenAI()

**Load the text file**

In [7]:
from langchain.document_loaders import PyPDFLoader

In [8]:
loader = PyPDFLoader(r"C:\Users\arpit\Desktop\UG\Arpit Agrawal DS\LLMOps\Langchain Level 1 Apps\Retrieval_QA_App\SM1671 52141501SD03434-RD EMARVEL 260C THI ST GL.pdf")

In [9]:
document = loader.load()

**The document is loaded as a Python list with metadata**

In [10]:
print(type(document))

<class 'list'>


In [11]:
print(len(document))

28


In [12]:
print(document[0].metadata)

{'source': 'C:\\Users\\arpit\\Desktop\\UG\\Arpit Agrawal DS\\LLMOps\\Langchain Level 1 Apps\\Retrieval_QA_App\\SM1671 52141501SD03434-RD EMARVEL 260C THI ST GL.pdf', 'page': 0}


In [13]:
print(f"You have {len(document)} document.")

You have 28 document.


In [15]:
print(f"Your document has {len(document[1].page_content)} characters")

Your document has 1268 characters


**Split the document in small chunks**

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [17]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=3000,
    chunk_overlap=400
)

In [27]:
document_chunks = text_splitter.split_documents(document[1:3])

In [28]:
print(f"Now you have {len(document_chunks)} chunks.")

Now you have 2 chunks.


**Convert text chunks in numeric vectors (called "embeddings")**

In [29]:
from langchain.embeddings.openai import OpenAIEmbeddings

In [30]:
embeddings = OpenAIEmbeddings()

**Load the embeddings to a vector database**

In [31]:
from langchain.vectorstores import FAISS

*Careful: the next operation is expensive in OpenAI*

In [32]:
stored_embeddings = FAISS.from_documents(document_chunks, embeddings)

**Create a Retrieval Question & Answering Chain**

In [33]:
from langchain.chains import RetrievalQA

In [34]:
QA_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=stored_embeddings.as_retriever()
)

**Now we have a Question & Answering APP**

In [35]:
question = """
What is this article about? 
Describe it in less than 100 words.
"""

In [36]:
QA_chain.invoke(question)

{'query': '\nWhat is this article about? \nDescribe it in less than 100 words.\n',
 'result': '\nThis article provides safety precautions and instructions for the proper use and maintenance of a refrigerator. It includes nomenclature and parts identification, specifications, installation, cleaning and maintenance tips, features, exploded views, spare code list, recommended tools and equipment, wiring diagram, troubleshooting chart, and good servicing practices. The main focus is on ensuring safe handling and operation of the refrigerator to minimize the risk of personal injury or property damage. The article also emphasizes the importance of using correct tools and replacement parts, as well as proper wiring and grounding to avoid electric shock hazards.'}

{'query': '\nWhat is this article about? \nDescribe it in less than 100 words.\n',
 'result': '\nThis article provides safety precautions and instructions for the proper use and maintenance of a refrigerator. It includes nomenclature and parts identification, specifications, installation, cleaning and maintenance tips, features, exploded views, spare code list, recommended tools and equipment, wiring diagram, troubleshooting chart, and good servicing practices. The main focus is on ensuring safe handling and operation of the refrigerator to minimize the risk of personal injury or property damage. The article also emphasizes the importance of using correct tools and replacement parts, as well as proper wiring and grounding to avoid electric shock hazards.'}

In [42]:
question2 = """
List one safety precaution about this model?
"""

In [43]:
QA_chain.invoke(question2)

{'query': '\nList one safety precaution about this model?\n',
 'result': ' \nIt is important to unplug the power cord before performing any repairs or removing any panel on the refrigerator.'}

{'query': '\nList one safety precaution about this model?\n',
 'result': ' \nIt is important to unplug the power cord before performing any repairs or removing any panel on the refrigerator.'}