In [1]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
docs = loader.load()



In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
text_splitter.split_documents(docs)[:5]

documents=text_splitter.split_documents(docs)

In [7]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

# Vector databse
db = FAISS.from_documents(documents[:30], OllamaEmbeddings(model="all-minilm"))

In [10]:
from langchain_community.llms import Ollama

# Load Ollama LAMA2 LLM model
llm = Ollama(model="llama2")

In [11]:
from langchain_core.prompts import ChatPromptTemplate

# Design ChatPrompt Template
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [15]:
from langchain.chains.combine_documents import create_stuff_documents_chain

# Chain
document_chain = create_stuff_documents_chain(
    llm, prompt)

In [16]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

# Retriever
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x1220a0820>)

In [17]:
from langchain.chains import create_retrieval_chain
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""

# Retriever chain
retrieval_chain = create_retrieval_chain(
    retriever, document_chain)

In [19]:
response = retrieval_chain.invoke({
    "input": "Scaled Dot-Product Attention"
})

In [20]:
response["answer"]

'The answer to the question "Scaled Dot-Product Attention" is based on the provided context.\n\nIn the context, the authors propose a type of attention mechanism called "Scaled Dot-Product Attention." This attention mechanism is similar to the dot-product attention mechanism, but with a scaling factor of √dk. The scaling factor is introduced to counteract the effect of large dot products in the softmax function, which can result in small gradients.\n\nThe authors explain that for small values of dk, the two mechanisms perform similarly, but for larger values of dk, additive attention outperforms dot-product attention without scaling. They suspect that for large values of dk, the dot products grow large in magnitude, pushing the softmax function into regions where it has extremely small gradients. By introducing the scaling factor, they are able to stabilize the attention mechanism and improve its performance.\n\nIn summary, Scaled Dot-Product Attention is a type of attention mechanism 