In [1]:
# Check installed package versions
import sys
from importlib.metadata import version

try:
    import langchain
    print(f"‚úì langchain: {langchain.__version__}")
except:
    print("‚úó langchain not installed")

try:
    import langchain_core
    print(f"‚úì langchain-core: {langchain_core.__version__}")
except:
    print("‚úó langchain-core not installed - REQUIRED!")
    print("  Run: pip install langchain-core")

try:
    import langchain_openai
    print(f"‚úì langchain-openai: {version('langchain-openai')}")
except:
    print("‚úó langchain-openai not installed")

try:
    import langchain_community
    print(f"‚úì langchain-community: {langchain_community.__version__}")
except:
    print("‚úó langchain-community not installed")

print(f"\nPython version: {sys.version}")
print("\nIf any packages are missing, run:")
print("pip install langchain langchain-core langchain-openai langchain-community langchain-text-splitters faiss-cpu pypdf python-dotenv tiktoken")

‚úì langchain: 1.2.0
‚úì langchain-core: 1.2.4
‚úì langchain-openai: 1.1.6
‚úì langchain-community: 0.4.1

Python version: 3.12.10 (main, Apr  9 2025, 04:06:22) [MSC v.1943 64 bit (AMD64)]

If any packages are missing, run:
pip install langchain langchain-core langchain-openai langchain-community langchain-text-splitters faiss-cpu pypdf python-dotenv tiktoken


In [2]:
# Standard library imports
import os

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from pathlib import Path

# Environment variable management - for secure API key handling
from dotenv import load_dotenv

# LangChain Document Loaders - for loading PDF documents
from langchain_community.document_loaders import PyPDFLoader

# LangChain Text Splitters - for breaking documents into manageable chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter

# OpenAI Integration - for embeddings and LLM
# from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI

# Vector Store - FAISS for efficient similarity search
from langchain_community.vectorstores import FAISS

# LangChain Core Components
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

print("‚úì All imports successful!")
print("‚úì Compatible with LangChain 1.0+")

‚úì All imports successful!
‚úì Compatible with LangChain 1.0+


In [10]:
# Load environment variables from .env file
load_dotenv()

    
# Verify API key is loaded
if not os.getenv("GOOGLE_API_KEY"):
    print("‚ö†Ô∏è  WARNING: GOOGLE_API_KEY not found!")
    print("Please set it in .env file or uncomment the line below:")
    # os.environ["OPENAI_API_KEY"] = "your_api_key_here"
else:
    print("‚úì GOOGLE_API_KEY loaded successfully!")
    print(f"‚úì Key starts with: {os.getenv('GOOGLE_API_KEY')[:8]}...")

‚úì GOOGLE_API_KEY loaded successfully!
‚úì Key starts with: AIzaSyC3...


In [12]:
# ===== CONFIGURATION: Update this path to your PDF file =====
pdf_path = "attention.pdf"  # Change this to your PDF file path
# =============================================================

# Check if file exists
if not os.path.exists(pdf_path):
    print(f"‚ö†Ô∏è  ERROR: File '{pdf_path}' not found!")
    print("Please update the pdf_path variable with your PDF file location.")
else:
    # Initialize the PDF loader
    loader = PyPDFLoader(pdf_path)
    
    # Load all pages from the PDF
    # Each page becomes a separate Document object
    documents = loader.load()
    
    # Display information about loaded documents
    print(f"‚úì Loaded {len(documents)} pages from '{pdf_path}'")
    print(f"\n--- First Document Preview ---")
    print(f"Content (first 500 chars): {documents[0].page_content[:500]}...")
    print(f"\nMetadata: {documents[0].metadata}")
    print(f"\nTotal characters across all pages: {sum(len(doc.page_content) for doc in documents):,}")

‚úì Loaded 15 pages from 'attention.pdf'

--- First Document Preview ---
Content (first 500 chars): Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani‚àó
Google Brain
avaswani@google.com
Noam Shazeer‚àó
Google Brain
noam@google.com
Niki Parmar‚àó
Google Research
nikip@google.com
Jakob Uszkoreit‚àó
Google Research
usz@google.com
Llion Jones‚àó
Google Research
llion@google.com
Aidan N. Gomez‚àó ‚Ä†
University of Toronto
aidan@cs.toronto.edu
≈Åukasz ...

Metadata: {'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention.pdf', 'tota

In [13]:
# Initialize the text splitter with recommended settings
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,        # Maximum characters per chunk (roughly 200-250 tokens)
    chunk_overlap=128,      # Characters overlap between chunks (maintains context)
    length_function=len,    # Function to measure chunk length
    separators=["\n\n", "\n", " ", ""]  # Try to split on paragraphs first, then lines, etc.
)

# Split the documents into chunks
# This creates smaller, manageable pieces while preserving semantic meaning
chunks = text_splitter.split_documents(documents)

# Display splitting results
print(f"‚úì Split {len(documents)} documents into {len(chunks)} chunks")
print(f"\nAverage chunk size: {sum(len(chunk.page_content) for chunk in chunks) / len(chunks):.0f} characters")

# Preview a few chunks
print(f"\n--- Chunk Examples ---")
for i, chunk in enumerate(chunks[:3]):
    print(f"\nChunk {i+1} (length: {len(chunk.page_content)} chars):")
    print(f"{chunk.page_content[:200]}...")
    print(f"Metadata: {chunk.metadata}")

‚úì Split 15 documents into 49 chunks

Average chunk size: 873 characters

--- Chunk Examples ---

Chunk 1 (length: 986 chars):
Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
...
Metadata: {'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}

Chunk 2 (length: 944 chars):
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely. Experiments on two machine translation tasks show these models to
be superior in quality while being more pa...
Metadata: 

In [14]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [21]:

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")


# Test the embeddings with a sample text
sample_text = "This is a test sentence to demonstrate embeddings."
sample_embedding = embeddings.embed_query(sample_text,output_dimensionality=768)

print(f"‚úì Embeddings model initialized: gemini-embedding-001")
print(f"‚úì Embedding dimension: {len(sample_embedding)}")
print(f"‚úì Sample embedding (first 10 values): {sample_embedding[:10]}")
print(f"\n‚ÑπÔ∏è  Each chunk will be converted to a {len(sample_embedding)}-dimensional vector for similarity search")

‚úì Embeddings model initialized: gemini-embedding-001
‚úì Embedding dimension: 768
‚úì Sample embedding (first 10 values): [-0.029574525, 0.022307422, 0.0063085253, -0.083313696, -0.00023175914, -0.0035217889, 0.0065565994, 0.011158722, 0.008888665, -0.009683166]

‚ÑπÔ∏è  Each chunk will be converted to a 768-dimensional vector for similarity search


In [20]:
print(sample_embedding)

[-0.029574525, 0.022307422, 0.0063085253, -0.083313696, -0.00023175914, -0.0035217889, 0.0065565994, 0.011158722, 0.008888665, -0.009683166, 0.013862128, -0.02735611, -0.0028739157, 0.028659457, 0.11960617, 0.030436072, -0.0026327427, 0.007941088, 0.009127408, -0.020663254, -0.021241806, -0.012913413, 0.003094525, -0.008973285, 0.012700095, -0.040765505, 0.028845683, 0.0191686, 0.036848523, 0.005470643, 0.008387159, 0.02531023, 0.019456739, 0.0022349367, -0.006035907, 0.0046758098, 0.023670916, 0.020396505, 0.020004913, 0.00469194, -0.015836673, 0.002427885, -0.029576253, -0.0047748457, 0.0117557235, 0.0065011154, 0.011398145, -0.009430506, 0.02093114, 0.022828408, -0.016442709, -0.015130763, -0.0011457471, -0.15080276, -0.017350126, 0.012090634, 0.0066688405, 0.0065895687, 0.01064064, 0.0071897134, -0.0011035028, 0.020373905, -0.02260507, -0.029587613, 0.01594788, -0.0025209337, 0.020244412, -0.009752258, -0.020974781, 0.0067756013, 0.005353403, 0.01679566, -0.007710917, 0.011405226, 

In [22]:
# Create FAISS vector store from document chunks
# This step converts each chunk to an embedding and stores it
print(f"Creating FAISS index from {len(chunks)} chunks...")
print("This may take a minute depending on the number of chunks...")

vectorstore = FAISS.from_documents(
    documents=chunks,      # Our split document chunks
    embedding=embeddings   # OpenAI embedding model
)

print(f"‚úì FAISS vector store created successfully!")
print(f"‚úì Indexed {len(chunks)} document chunks")

# Save the vector store to disk for later use
# This allows you to reload the index without re-processing documents
vectorstore_path = "./faiss_index"
vectorstore.save_local(vectorstore_path)
print(f"‚úì Vector store saved to '{vectorstore_path}'")
print(f"\n‚ÑπÔ∏è  You can reload this index later using: FAISS.load_local('{vectorstore_path}', embeddings)")

Creating FAISS index from 49 chunks...
This may take a minute depending on the number of chunks...
‚úì FAISS vector store created successfully!
‚úì Indexed 49 document chunks
‚úì Vector store saved to './faiss_index'

‚ÑπÔ∏è  You can reload this index later using: FAISS.load_local('./faiss_index', embeddings)


In [23]:
# Create a retriever from the vector store
retriever = vectorstore.as_retriever(
    search_type="similarity",    # Use cosine similarity for search
    search_kwargs={"k": 4}        # Retrieve top 4 most relevant chunks
)

print("‚úì Retriever configured successfully")
print(f"  - Search type: similarity")
print(f"  - Number of documents to retrieve (k): 4")

# Test the retriever with a sample query
# Note: In LangChain 1.0+, use .invoke() instead of .get_relevant_documents()
test_query = "What is the main topic of this document?"
retrieved_docs = retriever.invoke(test_query)  # LangChain 1.0+ method

print(f"\n--- Retriever Test ---")
print(f"Query: '{test_query}'")
print(f"Retrieved {len(retrieved_docs)} documents:")

for i, doc in enumerate(retrieved_docs):
    print(f"\nDocument {i+1}:")
    print(f"  Content preview: {doc.page_content[:150]}...")
    print(f"  Metadata: {doc.metadata}")

‚úì Retriever configured successfully
  - Search type: similarity
  - Number of documents to retrieve (k): 4

--- Retriever Test ---
Query: 'What is the main topic of this document?'
Retrieved 4 documents:

Document 1:
  Content preview: Input-Input Layer5
The
Law
will
never
be
perfect
,
but
its
application
should
be
just
-
this
is
what
we
are
missing
,
in
my
opinion
.
<EOS>
<pad>
The
...
  Metadata: {'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention.pdf', 'total_pages': 15, 'page': 13, 'page_label': '14'}

Document 2:
  Content preview: (section 5.4), learning rates and beam size on the Section 22 development set, all other parameters
remained unchanged from the English-to-German ba

In [27]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=1.0,  # Gemini 3.0+ defaults to 1.0
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [28]:
print("‚úì LLM configured successfully")
print(f"  - Model: gpt-4-turbo-2024-04-09")
print(f"  - Temperature: 0 (deterministic)")
print(f"  - Max tokens: 2000")

# Test the LLM with a simple query
test_response = model.invoke("Say 'Hello, I am ready to answer questions!'")
print(f"\nLLM Test Response: {test_response.content}")

#   üìù Explanation of Parameters:

#   Model Selection:

#   # Option 1: Best quality (slower, more expensive)
#   llm = ChatOpenAI(model="gpt-4-turbo-2024-04-09")

#   # Option 2: Fast GPT-4 performance (balanced)
#   llm = ChatOpenAI(model="gpt-4o")

#   # Option 3: Fast and cheap (good for testing)
#   llm = ChatOpenAI(model="gpt-3.5-turbo")

#   Temperature:

#   temperature=0    # Deterministic, focused (best for factual Q&A)
#   temperature=0.7  # More creative, varied responses
#   temperature=1.0  # Most creative, less predictable

#   Max Tokens:

#   max_tokens=2000  # Controls maximum response length

‚úì LLM configured successfully
  - Model: gpt-4-turbo-2024-04-09
  - Temperature: 0 (deterministic)
  - Max tokens: 2000

LLM Test Response: Hello, I am ready to answer questions!


In [29]:
# Define the prompt template for the RAG system
# This tells the LLM how to use the retrieved context
system_prompt = (
    "You are a helpful assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer based on the context, say that you don't know. "
    "Keep the answer concise and accurate.\n\n"
    "Context: {context}\n\n"
    "Question: {question}"
)

# Create the prompt template
prompt = ChatPromptTemplate.from_template(system_prompt)

# Helper function to format documents
def format_docs(docs):
    """Format retrieved documents into a single string."""
    return "\n\n".join(doc.page_content for doc in docs)

# Build the RAG chain using LangChain 1.0+ LCEL (LangChain Expression Language)
# This uses the pipe operator (|) to chain components together
rag_chain = (
    {
        "context": retriever | format_docs,  # Retrieve docs and format them
        "question": RunnablePassthrough()      # Pass through the question
    }
    | prompt           # Format with prompt template
    | model              # Generate answer with LLM
    | StrOutputParser() # Parse output to string
)

print("‚úì RAG chain created successfully using LangChain 1.0+ LCEL!")
print("\nRAG Pipeline Flow:")
print("  1. User provides a query")
print("  2. Retriever finds top 4 relevant chunks")
print("  3. Chunks are formatted as context")
print("  4. Context + question are formatted with prompt template")
print("  5. LLM generates answer based on context")
print("  6. Answer is parsed and returned to user")

‚úì RAG chain created successfully using LangChain 1.0+ LCEL!

RAG Pipeline Flow:
  1. User provides a query
  2. Retriever finds top 4 relevant chunks
  3. Chunks are formatted as context
  4. Context + question are formatted with prompt template
  5. LLM generates answer based on context
  6. Answer is parsed and returned to user


In [31]:
# Example Query 1: General question about the document
query1 = "What is the main topic or subject of this document?"

print(f"Query: {query1}")
print("\nProcessing...\n")

# With LangChain 1.0+, we invoke the chain with the question directly
answer = rag_chain.invoke(query1)

print("=" * 80)
print("ANSWER:")
print("=" * 80)
print(answer)
print("\n" + "=" * 80)

# To see which documents were retrieved, we can call the retriever separately
print("\nSOURCE DOCUMENTS USED:")
print("=" * 80)
retrieved_docs = retriever.invoke(query1)
for i, doc in enumerate(retrieved_docs):
    print(f"\nDocument {i+1}:")
    print(f"  Source: {doc.metadata}")
    print(f"  Content: {doc.page_content[:200]}...")
    print("-" * 80)

Query: What is the main topic or subject of this document?

Processing...

ANSWER:
The main topic of this document appears to be the attention mechanism, specifically attention heads and their visualizations within a neural network's encoder (e.g., in layer 5). It discusses how attention is involved in tasks like anaphora resolution and handling long-distance dependencies, likely in the context of machine translation.


SOURCE DOCUMENTS USED:

Document 1:
  Source: {'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention.pdf', 'total_pages': 15, 'page': 13, 'page_label': '14'}
  Content: Input-Input Layer5
The
Law
will
never
be
perfect
,
but
its
application
should
be
just
-
this
is
what
we

In [32]:
# Example Query 2: Specific information extraction
query2 = "Can you summarize the key points from this document?"

print(f"Query: {query2}")
print("\nProcessing...\n")

answer = rag_chain.invoke(query2)

print("=" * 80)
print("ANSWER:")
print("=" * 80)
print(answer)
print("\n" + "=" * 80)

Query: Can you summarize the key points from this document?

Processing...

ANSWER:
This document snippet primarily illustrates the function of **attention mechanisms** in neural networks.

Key points include:
*   **Figure 3** demonstrates how encoder self-attention (layer 5) can follow **long-distance dependencies**, specifically connecting the verb 'making' to 'more difficult' in a sentence.
*   **Figure 4** provides examples of two attention heads (layer 5) that appear to be involved in **anaphora resolution**, highlighting sharp attentions from words like 'its'.
*   The context also includes a list of **academic references** related to computational linguistics, parsing, attention models, abstractive summarization, and neural machine translation.



In [33]:
# Example Query 3: Your custom question
# Replace this with your own question!
custom_query = "What specific details are mentioned about attention mechanisms?"

print(f"Query: {custom_query}")
print("\nProcessing...\n")

answer = rag_chain.invoke(custom_query)

print("=" * 80)
print("ANSWER:")
print("=" * 80)
print(answer)
print("\n" + "=" * 80)

Query: What specific details are mentioned about attention mechanisms?

Processing...

ANSWER:
The attention mechanisms mentioned include:

*   **Encoder-decoder attention:** Queries come from the previous decoder layer, and memory keys and values come from the output of the encoder. This allows every position in the decoder to attend over all positions in the input sequence.
*   **Self-attention in the encoder:** All keys, values, and queries come from the output of the previous layer in the encoder. Each position can attend to all positions in the previous layer.
*   **Self-attention in the decoder:** Each position in the decoder can attend to all positions up to and including that position, preventing leftward information flow to preserve the auto-regressive property.
*   **Multi-head attention:** This allows the model to jointly attend to information from different representation subspaces at different positions. It is computed as `MultiHead(Q, K, V) = Concat(head1, ...,headh)WO`, 

In [30]:
# Example Query 3: Your custom question
# Replace this with your own question!
custom_query = "What are the applications of Attention Mechanism?"

print(f"Query: {custom_query}")
print("\nProcessing...\n")

response3 = rag_chain.invoke(custom_query)

print("=" * 80)
print("ANSWER:")
print("=" * 80)
print(response3)
print("\n" + "=" * 80)

Query: What are the applications of Attention Mechanism?

Processing...

ANSWER:
Attention mechanisms have been applied in various tasks including:
*   Sequence modeling and transduction models
*   Reading comprehension
*   Abstractive summarization
*   Textual entailment
*   Learning task-independent sentence representations
*   Simple-language question answering
*   Language modeling

