In [77]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader, 
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter 
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain


In [83]:
loader = PyPDFLoader("/Users/rouzbeh/Downloads/apple_10k_2022.pdf")
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":4})
# create a chain to answer questions 
qa = ConversationalRetrievalChain.from_llm(
    llm=ChatOpenAI(model_name="gpt-4", temperature=0),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    )
chat_history = []
query = "What did apple did well in 2022?"
result = qa({"question": query, "chat_history": chat_history})
result["answer"]


"Apple's total net sales revenue in 2022 was $394,328 million."

In [89]:
chat_history = [(query, result["answer"])]
query = "What was apple's revenue in the past couple of years? Find how many years you can"
result = qa({"question": query, "chat_history": chat_history})
result['answer']

"Sure, here is the information on Apple's total net sales for the past three years:\n\n- In 2022, Apple's total net sales were $394,328 million.\n- In 2021, the total net sales were $365,817 million.\n- In 2020, the total net sales were $274,515 million."