In [7]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()

True

In [9]:
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.0,
    max_tokens=1000,
    streaming=True,
    verbose=True,
    request_timeout=60,
    max_retries=3
)
print("LLM initialized successfully.")
response = llm.invoke("What is the capital of France?")
print(response)

LLM initialized successfully.
content='The capital of France is Paris.' additional_kwargs={} response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-3.5-turbo-0125', 'service_tier': 'default'} id='run--29a68529-2598-4d34-9fe5-78f446f8f2fb-0'


In [11]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    chunk_size=1,
    request_timeout=60,
    max_retries=3
)
print("Embeddings initialized successfully.")
print(embeddings.embed_query("What is the capital of France?"))

Embeddings initialized successfully.
[0.04169800877571106, 0.0158005952835083, 0.028160491958260536, 0.024351144209504128, -0.023142803460359573, -0.002739247865974903, -0.014223608188331127, 0.01433624979108572, 0.010834109038114548, -0.010199218057096004, 0.006942841224372387, -0.024043940007686615, -0.06164587661623955, -0.01508378330618143, -0.014233848080039024, 0.023163283243775368, -0.006625395733863115, 0.019446099177002907, 0.07241854071617126, -0.024392105638980865, 0.003002932295203209, -0.010091695934534073, -0.04100167378783226, 0.011970768682658672, 0.06209644302725792, 0.0070964437909424305, -0.04554831609129906, -0.007347328122705221, 0.00364038348197937, 0.03942468762397766, 0.04214857518672943, -0.0251498781144619, -0.0019558740314096212, 0.04309067130088806, -0.024535467848181725, -0.03995717689394951, -0.03764289617538452, -0.039342764765024185, 0.021320052444934845, 0.029676036909222603, -0.003136054612696171, -0.01302550733089447, 0.00684555945917964, 0.0132303107

#### 1. Data Ingestion

In [55]:
from langchain_community.document_loaders import PyPDFLoader

In [56]:
import os 

In [57]:
filepath = os.path.join("data", "NIPS-2017-attention-is-all-you-need-Paper.pdf")
filepath

'data/NIPS-2017-attention-is-all-you-need-Paper.pdf'

In [59]:
documents = PyPDFLoader(filepath).load()
print(f"Loaded {len(documents)} pages from the PDF.")

Loaded 11 pages from the PDF.


In [61]:
documents

[Document(metadata={'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On 

In [38]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [39]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    length_function = len
)

In [42]:
docs = text_splitter.split_documents(documents)
len(docs)

76

In [47]:
docs[0].metadata

{'producer': 'PyPDF2',
 'creator': 'PyPDF',
 'creationdate': '',
 'subject': 'Neural Information Processing Systems http://nips.cc/',
 'publisher': 'Curran Associates, Inc.',
 'language': 'en-US',
 'created': '2017',
 'eventtype': 'Poster',
 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On English-to-

In [48]:
docs[0].page_content

'Attention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗†\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser ∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗‡\nillia.polosukhin@gmail.com\nAbstract'

In [49]:
from langchain.vectorstores import FAISS

In [51]:
vectorstore = FAISS.from_documents(documents=docs, embedding=embeddings)

In [52]:
relevant_docs = vectorstore.similarity_search("what is attention?")
len(relevant_docs)

4

In [62]:
relevant_docs

[Document(id='68279d29-07f8-4fa9-af93-d9e392efdeb6', metadata={'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existi

In [63]:
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x1296707f0>, search_kwargs={})

In [64]:
retriever.invoke("What is attention?")

[Document(id='68279d29-07f8-4fa9-af93-d9e392efdeb6', metadata={'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existi

In [79]:
prompt_template = """
Answer the questions based on the prompt provided below 
If the context does not contain the sufficient information, repond with:
"I don't have enough information about the topic"

Context : {context}
Question: {question}

Answer:...

"""

In [77]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.prompts import PromptTemplate

In [81]:
prompt = PromptTemplate(
    template= prompt_template,
    input_variables= ["context", "question"]
)
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\nAnswer the questions based on the prompt provided below \nIf the context does not contain the sufficient information, repond with:\n"I don\'t have enough information about the topic"\n\nContext : {context}\nQuestion: {question}\n\nAnswer:...\n\n')

In [82]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [83]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm 
    | StrOutputParser()
)

In [87]:
rag_chain.invoke("who are the authors?")

'Authors are Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, and Illia Polosukhin.'