# LangGraph RAG Implementation
This notebook downloads a free PDF, indices it, and uses a LangGraph agent to answer queries by fetching relevant documents.

In [23]:
# Install dependencies (added sentence-transformers and langchain-huggingface for free embeddings)
!pip install -qU langgraph langchain-groq langchain-community pypdf faiss-cpu requests python-dotenv sentence-transformers langchain-huggingface

In [24]:
import os
import requests
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain_community.vectorstores import FAISS
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langgraph.graph import START, MessageGraph
from langgraph.prebuilt import ToolNode, tools_condition

# Load API keys
load_dotenv('/Users/shubham_infinity/Desktop/Projects/LangGraph_Projects/.env')

True

In [25]:
# 1. Download Free PDF from the Internet
pdf_url = "https://arxiv.org/pdf/1706.03762.pdf"
pdf_path = "attention_is_all_you_need.pdf"

if not os.path.exists(pdf_path):
    print("Downloading PDF...")
    response = requests.get(pdf_url)
    with open(pdf_path, "wb") as f:
        f.write(response.content)
    print("Download complete.")
else:
    print("PDF already exists.")

PDF already exists.


In [26]:
# 2. Load, Chunk, and Setup Retriever
loader = PyPDFLoader(pdf_path)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Using free local HuggingFace Embeddings instead of OpenAI (which caused AuthenticationError)
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(documents=splits, embedding=embedding_model)
retriever = vectorstore.as_retriever()
print("Documents indexed. Retriever is ready.")

Documents indexed. Retriever is ready.


In [27]:
# 3. Define the rag_tool to fetch relevant documents
@tool
def rag_tool(query):
    """
    Retrieve relevant information from the pdf document.
    Use this tool when the user asks factual / conceptual questions 
    that might be answered from the stored documents.
    """
    result = retriever.invoke(query)
    
    context = [doc.page_content for doc in result]
    metadata = [doc.metadata for doc in result]
    
    return {
        'query': query,
        'context': context,
        'metadata': metadata
    }

In [28]:
# 4. Build LangGraph Agent
# Changed from llama3-70b-8192 to llama-3.3-70b-versatile as the former was decommissioned
llm = ChatGroq(model="llama-3.3-70b-versatile").bind_tools([rag_tool])

builder = MessageGraph()
builder.add_node("agent", lambda state: llm.invoke(state))
builder.add_node("tools", ToolNode([rag_tool]))

builder.add_edge(START, "agent")
builder.add_conditional_edges("agent", tools_condition)
builder.add_edge("tools", "agent")

rag_app = builder.compile()
print("LangGraph Agent compiled successfully.")

LangGraph Agent compiled successfully.


/var/folders/yc/wbsm0cns4xl2dtcyv_rrvk280000gn/T/ipykernel_13497/1778625708.py:5: LangGraphDeprecatedSinceV10: MessageGraph is deprecated in LangGraph v1.0.0, to be removed in v2.0.0. Please use StateGraph with a `messages` key instead. Deprecated in LangGraph V1.0 to be removed in V2.0.
  builder = MessageGraph()


In [29]:
# 5. Query Function to get relative response
def ask_agent(query: str):
    print(f"\nUser: {query}")
    inputs = [HumanMessage(content=query)]
    
    # We stream the values so that we can see the final AI response
    for event in rag_app.stream(inputs, stream_mode="values"):
        message = event[-1]
        if message.type == "ai" and message.content:
            print(f"Agent: {message.content}")

# Example usage:
ask_agent("What is Multi-head attention according to the document? Ensure you use the rag_tool to fetch relevant docs.")


User: What is Multi-head attention according to the document? Ensure you use the rag_tool to fetch relevant docs.
Agent: Multi-head attention allows the model to jointly attend to information from different representation subspaces at different positions. It is a type of attention mechanism that uses multiple attention heads in parallel, each of which attends to a different subset of the input data. The outputs from each attention head are then concatenated and projected to form the final output. This allows the model to capture a wider range of contextual relationships in the input data. The Transformer uses multi-head attention in three different ways: encoder-decoder attention, self-attention in the encoder, and self-attention in the decoder.
