In [1]:
from langchain_openai import ChatOpenAI

In [2]:
MODEL = 'gpt-4o-mini'

In [3]:
llm = ChatOpenAI(model=MODEL, temperature=0.0)

In [4]:
llm.invoke("Hi! What up?")

AIMessage(content="Hello! I'm here and ready to help you with any questions or topics you'd like to discuss. What’s on your mind?", response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 12, 'total_tokens': 37}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_48196bc67a', 'finish_reason': 'stop', 'logprobs': None}, id='run-9ae65323-9889-4f54-8a00-6b3339fe735c-0', usage_metadata={'input_tokens': 12, 'output_tokens': 25, 'total_tokens': 37})

In [5]:
# pypdf
from langchain_community.document_loaders import PyPDFLoader

In [6]:
file_path = './paper.pdf'
loader = PyPDFLoader(file_path)
loader

<langchain_community.document_loaders.pdf.PyPDFLoader at 0x132951dd0>

In [7]:
docs = loader.load_and_split()
docs

[Document(metadata={'source': './paper.pdf', 'page': 0}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nba

In [8]:
len(docs)

16

In [9]:
docs[0]

Document(metadata={'source': './paper.pdf', 'page': 0}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbas

In [10]:
type(docs[0])

langchain_core.documents.base.Document

In [11]:
docs[0].metadata

{'source': './paper.pdf', 'page': 0}

In [13]:
docs[0].page_content

'Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and con

In [14]:
from langchain_core.prompts import ChatPromptTemplate

In [15]:
system_prompt = """
You are a question answering system. You take in context from 
articles, pdfs, papers, books, and so on, and you output
answers to user's questions.
Context:\n\n
{context}
"""

prompt = ChatPromptTemplate.from_messages([
    ('system', system_prompt),
    ('human', '{input}')
])

In [17]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

In [18]:
embedding = OpenAIEmbeddings()

In [19]:
vectordb = Chroma.from_documents(docs, embedding)

In [20]:
vectordb

<langchain_community.vectorstores.chroma.Chroma at 0x133e407d0>

In [21]:
retriever = vectordb.as_retriever()

In [22]:
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x133e407d0>)

In [23]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [24]:
qa_chain = create_stuff_documents_chain(llm, prompt)

In [25]:
qa_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="\nYou are a question answering system. You take in context from \narticles, pdfs, papers, books, and so on, and you output\nanswers to user's questions.\nContext:\n\n\n{context}\n")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x12257b210>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x127ea97d0>, model_name='gpt-4o-mini', temperature=0.0, openai_api_key=SecretStr('**********'), openai_api_base='https://api.openai.com/v1', openai_proxy='')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [26]:
from langchain.chains import create_retrieval_chain

In [27]:
rag_chain = create_retrieval_chain(retriever, qa_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x133e407d0>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="\nYou are a question answering system. You take in context from \narticles, pdfs, papers, books, and so on, and you output\nanswers to user's questions.\nContext:\n\n\n{context}\n")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])
            | Chat

In [28]:
output = rag_chain.invoke({"input": "What is self-attention"})
output

{'input': 'What is self-attention',
 'context': [Document(metadata={'page': 12, 'source': './paper.pdf'}, page_content='Attention Visualizations\nInput-Input Layer5\nIt\nis\nin\nthis\nspirit\nthat\na\nmajority\nof\nAmerican\ngovernments\nhave\npassed\nnew\nlaws\nsince\n2009\nmaking\nthe\nregistration\nor\nvoting\nprocess\nmore\ndifficult\n.\n<EOS>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\nIt\nis\nin\nthis\nspirit\nthat\na\nmajority\nof\nAmerican\ngovernments\nhave\npassed\nnew\nlaws\nsince\n2009\nmaking\nthe\nregistration\nor\nvoting\nprocess\nmore\ndifficult\n.\n<EOS>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\nFigure 3: An example of the attention mechanism following long-distance dependencies in the\nencoder self-attention in layer 5 of 6. Many of the attention heads attend to a distant dependency of\nthe verb ‘making’, completing the phrase ‘making...more difficult’. Attentions here shown only for\nthe word ‘making’. Different colors represent different heads. Best viewed in color.

In [30]:
for key in output.keys():
    print(key)

input
context
answer


In [31]:
context_list = output['context']
for doc in context_list:
    print(doc.metadata)

{'page': 12, 'source': './paper.pdf'}
{'page': 0, 'source': './paper.pdf'}
{'page': 13, 'source': './paper.pdf'}
{'page': 1, 'source': './paper.pdf'}


In [32]:
output['answer']

'Self-attention, sometimes referred to as intra-attention, is an attention mechanism that relates different positions of a single sequence in order to compute a representation of that sequence. It allows the model to weigh the importance of different words or tokens in the input sequence relative to each other, regardless of their distance in the sequence. This mechanism enables the model to capture dependencies and relationships between words effectively, which is particularly useful in tasks such as reading comprehension, summarization, and language modeling.\n\nIn self-attention, each word in the input sequence is transformed into a query, key, and value representation. The attention scores are computed by taking the dot product of the query with all keys, followed by a softmax operation to obtain a distribution of attention weights. These weights are then used to compute a weighted sum of the value representations, resulting in a new representation for each word that incorporates i

In [48]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ('system', system_prompt),
    ('human', '{input}')
])

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain_docs = (
    {
        'input': lambda x: x['input'],
        'context': lambda x: format_docs(x['context']), 
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [49]:
retrieve_docs = (lambda x: x['input']) | retriever

In [50]:
retrieve_docs

RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x133e407d0>)

In [51]:
from langchain_core.runnables import RunnablePassthrough

chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_docs
)
chain

RunnableAssign(mapper={
  context: RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x133e407d0>)
})
| RunnableAssign(mapper={
    answer: {
              input: RunnableLambda(...),
              context: RunnableLambda(...)
            }
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])
            | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x12257b210>, async_cli

In [47]:
chain.invoke({'input': "What are the main contributions of this paper?"})

{'input': 'What are the main contributions of this paper?',
 'context': [Document(metadata={'page': 0, 'source': './paper.pdf'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmecha

In [52]:
prompt = ChatPromptTemplate.from_template(
    """Create five jokes about {topic}"""
)

chain = prompt | llm

chain.invoke({'topic': 'Pandas'})

AIMessage(content='Sure! Here are five panda-themed jokes for you:\n\n1. **Why do pandas like old movies?**  \n   Because they’re in black and white!\n\n2. **What do you call a bear that’s stuck in the rain?**  \n   A drizzly panda!\n\n3. **Why did the panda like to eat bamboo?**  \n   Because it was un-bear-ably delicious!\n\n4. **What do you get when you cross a panda with a magician?**  \n   A bear-illusionist!\n\n5. **Why did the panda break up with his girlfriend?**  \n   She was too clingy—he needed some space to "bear" his feelings! \n\nHope these made you smile!', response_metadata={'token_usage': {'completion_tokens': 146, 'prompt_tokens': 13, 'total_tokens': 159}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_48196bc67a', 'finish_reason': 'stop', 'logprobs': None}, id='run-ea93cec3-32eb-42d6-a5b6-165a45d7a1f9-0', usage_metadata={'input_tokens': 13, 'output_tokens': 146, 'total_tokens': 159})

In [54]:
type(chain)

langchain_core.runnables.base.RunnableSequence

In [57]:
from langchain_core.runnables import RunnableSequence

In [59]:
from langchain_core.runnables import RunnableSequence, RunnableLambda
# Suppose we have a list of numbers
data = [1, 2, 3, 4, 5]

# Create RunnableLambdas
preprocess_runnable = RunnableLambda(lambda x: [i+1 for i in x])

preprocess_runnable.invoke(data)

[2, 3, 4, 5, 6]

In [60]:
sum_runnable = RunnableLambda(lambda x: sum(x))

sum_runnable.invoke(data)

15

In [61]:
runnnable_seq = RunnableSequence(first=preprocess_runnable, last=sum_runnable)

In [62]:
runnnable_seq.invoke(data)

20

In [63]:
from langchain_community.document_loaders import WebBaseLoader



In [64]:
url = "https://python.langchain.com/v0.2/docs/versions/overview/#:~:text=langchain%2Dcore%3A%20contains,as%20REST%20APIs."

loader = WebBaseLoader(url)
docs = loader.load_and_split()
docs

[Document(metadata={'source': 'https://python.langchain.com/v0.2/docs/versions/overview/#:~:text=langchain%2Dcore%3A%20contains,as%20REST%20APIs.', 'title': 'Overview of LangChain v0.2 | 🦜️🔗 LangChain', 'description': 'What’s new in LangChain?', 'language': 'en'}, page_content='Overview of LangChain v0.2 | 🦜️🔗 LangChain'),
 Document(metadata={'source': 'https://python.langchain.com/v0.2/docs/versions/overview/#:~:text=langchain%2Dcore%3A%20contains,as%20REST%20APIs.', 'title': 'Overview of LangChain v0.2 | 🦜️🔗 LangChain', 'description': 'What’s new in LangChain?', 'language': 'en'}, page_content="Skip to main contentLangChain 0.2 is out! Leave feedback on the v0.2 docs here. You can view the v0.1 docs here.IntegrationsAPI referenceLatestLegacyMorePeopleContributingCookbooks3rd party tutorialsYouTubearXivv0.2v0.2v0.1🦜️🔗LangSmithLangSmith DocsLangChain HubJS/TS Docs💬SearchIntroductionTutorialsBuild a Question Answering application over a Graph DatabaseTutorialsBuild a Simple LLM Applicat

In [65]:
url2 = "https://python.langchain.com/v0.2/docs/how_to/structured_output/"

In [67]:
docs2 = WebBaseLoader(url2).load_and_split()
docs2

[Document(metadata={'source': 'https://python.langchain.com/v0.2/docs/how_to/structured_output/', 'title': 'How to return structured data from a model | 🦜️🔗 LangChain', 'description': 'This guide assumes familiarity with the following concepts:', 'language': 'en'}, page_content='How to return structured data from a model | 🦜️🔗 LangChain'),
 Document(metadata={'source': 'https://python.langchain.com/v0.2/docs/how_to/structured_output/', 'title': 'How to return structured data from a model | 🦜️🔗 LangChain', 'description': 'This guide assumes familiarity with the following concepts:', 'language': 'en'}, page_content="Skip to main contentLangChain 0.2 is out! Leave feedback on the v0.2 docs here. You can view the v0.1 docs here.IntegrationsAPI referenceLatestLegacyMorePeopleContributingCookbooks3rd party tutorialsYouTubearXivv0.2v0.2v0.1🦜️🔗LangSmithLangSmith DocsLangChain HubJS/TS Docs💬SearchIntroductionTutorialsBuild a Question Answering application over a Graph DatabaseTutorialsBuild a S

In [68]:
len(docs)

5

In [69]:
docs.extend(docs2)

In [70]:
len(docs)

15

In [71]:
docs

[Document(metadata={'source': 'https://python.langchain.com/v0.2/docs/versions/overview/#:~:text=langchain%2Dcore%3A%20contains,as%20REST%20APIs.', 'title': 'Overview of LangChain v0.2 | 🦜️🔗 LangChain', 'description': 'What’s new in LangChain?', 'language': 'en'}, page_content='Overview of LangChain v0.2 | 🦜️🔗 LangChain'),
 Document(metadata={'source': 'https://python.langchain.com/v0.2/docs/versions/overview/#:~:text=langchain%2Dcore%3A%20contains,as%20REST%20APIs.', 'title': 'Overview of LangChain v0.2 | 🦜️🔗 LangChain', 'description': 'What’s new in LangChain?', 'language': 'en'}, page_content="Skip to main contentLangChain 0.2 is out! Leave feedback on the v0.2 docs here. You can view the v0.1 docs here.IntegrationsAPI referenceLatestLegacyMorePeopleContributingCookbooks3rd party tutorialsYouTubearXivv0.2v0.2v0.1🦜️🔗LangSmithLangSmith DocsLangChain HubJS/TS Docs💬SearchIntroductionTutorialsBuild a Question Answering application over a Graph DatabaseTutorialsBuild a Simple LLM Applicat

In [73]:
vectordb = Chroma.from_documents(docs, embedding)

In [74]:
retriever_web_pages = vectordb.as_retriever()

In [75]:
retrieve_docs = (lambda x: x['input']) | retriever_web_pages

In [76]:
from langchain_core.runnables import RunnablePassthrough

chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_docs
)
chain

RunnableAssign(mapper={
  context: RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x33fe07fd0>)
})
| RunnableAssign(mapper={
    answer: {
              input: RunnableLambda(...),
              context: RunnableLambda(...)
            }
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])
            | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x12257b210>, async_cli

In [77]:
chain.invoke({"input": "How to return structured data with langchain?"})

{'input': 'How to return structured data with langchain?',
 'context': [Document(metadata={'description': 'This guide assumes familiarity with the following concepts:', 'language': 'en', 'source': 'https://python.langchain.com/v0.2/docs/how_to/structured_output/', 'title': 'How to return structured data from a model | 🦜️🔗 LangChain'}, page_content='How to return structured data from a model | 🦜️🔗 LangChain'),
  Document(metadata={'description': 'This guide assumes familiarity with the following concepts:', 'language': 'en', 'source': 'https://python.langchain.com/v0.2/docs/how_to/structured_output/', 'title': 'How to return structured data from a model | 🦜️🔗 LangChain'}, page_content='to a runnableHow to filter messagesHybrid SearchHow to use the LangChain indexing APIHow to inspect runnablesLangChain Expression Language CheatsheetHow to cache LLM responsesHow to track token usage for LLMsRun models locallyHow to get log probabilitiesHow to reorder retrieved results to mitigate the "lo

# AF QUESTION: IS RAG JUST EMBEDDING?

## ANSWER: NO!

In [78]:
from langchain_openai import OpenAIEmbeddings

In [103]:
embedding = OpenAIEmbeddings()

In [88]:
sentence1 = 'Lucas is a pancake fanatic'

sentence2 = 'Lindsay also loves pancakes'

In [104]:
dimensions = len(embedding.embed_query(sentence1))

NotFoundError: Error code: 404 - {'error': {'message': 'The model `nomic-embed-text-v1` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}

In [93]:
dimensions

1536

In [94]:
embedding_sentence1 = embedding.embed_query(sentence1)

In [95]:
embedding_sentence2 = embedding.embed_query(sentence2)

In [99]:
import numpy as np

def cosine_similarity(embedding1, embedding2):
    dot_product = np.dot(embedding1, embedding2)
    norm1 = np.linalg.norm(embedding1)
    norm2 = np.linalg.norm(embedding2)
    similarity = dot_product / (norm1 * norm2)
    return similarity

embedding1 = [0.1, 0.2, 0.3]
embedding2 = [0.4, 0.5, 0.6]

similarity = cosine_similarity(embedding1, embedding2)
print(f"Cosine similarity: {similarity}")

Cosine similarity: 0.9746318461970762


In [101]:
embedding_sentence3 = embedding.embed_query("The live-training course was going well until Lucas started going crazy with live-demos")

In [100]:
cosine_similarity(embedding_sentence1, embedding_sentence2)

0.8790500741185215

In [102]:
cosine_similarity(embedding_sentence1, embedding_sentence3)

0.8174719942085794