# Data Ingestion 

In [1]:
## Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('attention.pdf')
docs=loader.load()

# Data Transformation 

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents[:5]

[Document(metadata={'source': 'attention.pdf', 'page': 0}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\n

# Ollama Embeddings 

In [3]:
# Set environment variables directly (replace with your actual API key)
import os 
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "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"

In [2]:
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS

# Initialize Ollama's LLaMA embeddings
embeddings = OllamaEmbeddings(model="llama3.1")  # Replace with the version you are using, e.g., "llama-3.1"

 

In [6]:
from langchain_community.llms import Ollama
## Load Ollama LAMA2 LLM model
llm=Ollama(model="llama3.1")
llm

Ollama(model='llama3.1')

# Creating A Chat Prompt 

In [7]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate    
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful.      
<context>                                                   
{context}
</context>
Question: {input}""")

# The context is doccuments stored in data base and input is user Query

# Create Stuff Docment Chain

In [8]:
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)

# Creating Retriver 

In [9]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C47A18A1F0>, search_kwargs={})

# Creating Retrival Chain 

In [10]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

# Getting Responce

In [11]:
response=retrieval_chain.invoke({"input":"Scaled Dot-Product Attention"})

In [12]:
response 

{'input': 'Scaled Dot-Product Attention',
 'context': [Document(metadata={'page': 2, 'source': 'attention.pdf'}, page_content='3.2 Attention\nAn attention function can be described as mapping a query and a set of key-value pairs to an output,\nwhere the query, keys, values, and output are all vectors. The output is computed as a weighted sum\n3'),
  Document(metadata={'page': 0, 'source': 'attention.pdf'}, page_content='mechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior in quality while being more parallelizable and requiring significantly\nless time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-\nto-German translation task, improving over the existing best results, including\nensembles, by over 2 BLEU. On the WMT 2014 English-to-French translation task,\nour model establishes

In [13]:
response['answer']

"Based on the provided context, I will answer your question about Scaled Dot-Product Attention.\n\nTo provide a detailed answer, let's break down the key points related to Scaled Dot-Product Attention:\n\n1. **Definition**: Scaled Dot-Product Attention is a particular attention mechanism proposed in the context.\n2. **Input Dimensions**: The input consists of queries and keys of dimension dk, and values of dimension dv.\n3. **Computation**:\n\t* Compute dot products of the query with all keys.\n\t* Divide each dot product by √dk.\n\t* Apply a softmax function to obtain weights on the values.\n4. **Notation**: The attention function is denoted as Attention(Q, K, V) = softmax(QKT/√dk)V (Equation 1).\n5. **Practical Implementation**: In practice, the computation is performed on a set of queries simultaneously, packed together into a matrix Q, and keys and values are also packed together into matrices K and V.\n6. **Output**: The output is computed as the weighted sum of the values.\n\nThe