In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'

os.environ['LANGCHAIN_API_KEY'] = os.environ.get('LANGCHAIN_API_KEY')
os.environ['OPENAI_API_KEY'] =  os.environ.get('OPENAI_API_KEY')

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [3]:
prompt = """ Answer the following question:
{query}
Based on the following context only:
{context}
"""

In [4]:
#### Indexing ####
from langchain import hub

# load documents:

loader = PyPDFLoader("Data/1998_amazon_letter.pdf")

docs = loader.load()

#split
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

# Index
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings(model='text-embedding-ada-002'))

retriever = vectorstore.as_retriever()

#### RETRIEVAL and GENERATION ####

# prompt
prompt = hub.pull("rlm/rag-prompt")

# llm
LLM = ChatOpenAI(model='gpt-4o-mini', temperature=0.1)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# chain generate (like a pipeline)
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()} #using the entire article as context for now
    | prompt
    | LLM
    | StrOutputParser()
)

# Question
rag_chain.invoke("What isthe broad sales, customer growth and geography spread for amazon inc in 1998?")



"In 1998, Amazon's sales grew from $148 million in 1997 to $610 million, marking a 313% increase. Customer accounts surged from 1.5 million to 6.2 million, also over 300% growth, while repeat customer orders increased from over 58% to over 64%. The company expanded its product offerings, notably launching the Amazon.com music store, which quickly became a leading online music retailer."