# Loading Pdf

In [1]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("attentionisallyouneed.pdf")
data = loader.load()

In [2]:
len(data)

15

# Chunking Process

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)

print("Total", len(docs))

Total 52


# Embedding Process

In [6]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

# Saving to ChromaDB

In [7]:
vectorstore = Chroma.from_documents(documents=docs, embedding= embeddings)


In [8]:
retriver = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs= {"k" :10}
)

In [9]:
retrieved_docs = retriver.invoke("What is encoder?")

In [10]:
len(retrieved_docs)

10

In [11]:
print(retrieved_docs[5].page_content)

Attention Visualizations
Input-Input Layer5
It
is
in
this
spirit
that
a
majority
of
American
governments
have
passed
new
laws
since
2009
making
the
registration
or
voting
process
more
difficult
.
<EOS>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
It
is
in
this
spirit
that
a
majority
of
American
governments
have
passed
new
laws
since
2009
making
the
registration
or
voting
process
more
difficult
.
<EOS>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
Figure 3: An example of the attention mechanism following long-distance dependencies in the
encoder self-attention in layer 5 of 6. Many of the attention heads attend to a distant dependency of
the verb ‘making’, completing the phrase ‘making...more difficult’. Attentions here shown only for
the word ‘making’. Different colors represent different heads. Best viewed in color.
13


# LLM Invoke process with Google Gemini API

In [13]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.3,
    max_tokens=500
)

In [17]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

In [15]:
system_prompt = (
    "You are an assistant for question-answering tasks"
    "Use the following pieces of retrieved context to answer"
    "If you don't know the answer, say you don't know"
    "Use three sentences maximum and keep the answer coreects"
    "\n\n"
    "{context}"
)

In [16]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human","{input}")
    ]
)

# Question Answering Chain

In [18]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)

# Rag Chain

In [19]:
rag_chain = create_retrieval_chain(retriver, question_answer_chain)

# Producing Answer with User Query

In [24]:
response = rag_chain.invoke({"input":"What is encoder?"})

In [25]:
print(response["answer"])

The encoder maps an input sequence of symbol representations to a sequence of continuous representations. It is composed of a stack of N = 6 identical layers. Each layer has two sub-layers: a multi-head self-attention mechanism and a position-wise fully connected feed-forward network.
