# LangGraph - RAG Exercise! 🚀

<a target="_blank" href="https://githubtocolab.com/IT-HUSET/ai-agenter-2025/blob/main/exercises/langgraph/1.4-langgraph-rag.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a><br/>

Let's add some RAG to LangGraph!

## Setup

### Install dependencies

In [None]:
%pip install openai~=2.0 httpx~=0.28.1 --upgrade --quiet
%pip install python-dotenv~=1.0 --upgrade --quiet
%pip install python-dotenv~=1.0 docarray~=0.41.0 pypdf~=6.1 --upgrade --quiet
%pip install chromadb~=1.1.1 lark~=1.3 --upgrade --quiet
%pip install langchain~=0.3 langchain_openai~=0.3 langchain_community~=0.3.31 langchain-chroma~=0.2.6 --upgrade --quiet
%pip install langgraph~=0.6 --upgrade --quiet

# If running locally, you can do this instead:
#%pip install -r ../requirements.txt

### Load environment variables

In [None]:
import os

# Check if running in Google Colab
try:
    from google.colab import userdata
    IN_COLAB = True
    # Get API key from Colab secrets
    os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
    print("✅ Running in Google Colab - API key loaded from secrets")
except ImportError:
    IN_COLAB = False
    # Load from .env file for local development
    try:
        from dotenv import load_dotenv, find_dotenv
        load_dotenv(find_dotenv())
        print("✅ Running locally - API key loaded from .env file")
    except ImportError:
        print("⚠️ python-dotenv not installed. Install with: pip install python-dotenv")

# Verify API key is set
if not os.environ.get("OPENAI_API_KEY"):
    print("❌ OPENAI_API_KEY not found!")
    if IN_COLAB:
        print("   → Click the key icon (🔑) in the left sidebar")
        print("   → Add a secret named 'OPENAI_API_KEY'")
        print("   → Toggle 'Notebook access' to enable it")
    else:
        print("   → Create a .env file with: OPENAI_API_KEY=your-key-here")
else:
    print("✅ API key configured!")

### Setup Chat Model

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")

### Setup vector DB (Chroma)

In [None]:
from langchain_chroma import Chroma
from langchain_core.vectorstores import VectorStore, VectorStoreRetriever

persist_directory = './db/rag_simple_exercise/'

# Optionally remove the directory and all files in it recursively if it exists
import shutil
import os
if os.path.exists(persist_directory):
    shutil.rmtree(persist_directory)

vectordb: Chroma = Chroma(
    collection_name="rag_simple_exercise",
    embedding_function=embedding_model,
    persist_directory=persist_directory # Persist the database
)

### Setup a text splitter

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100
)

## Embeddings

Let's take our splits and embed them.

In [None]:
sentence1 = "i like dogs"
sentence2 = "i like canines"
sentence3 = "the weather is ugly outside"

In [None]:
embedding1 = embedding_model.embed_query(sentence1)
embedding2 = embedding_model.embed_query(sentence2)
embedding3 = embedding_model.embed_query(sentence3)

print(embedding1[:10])
#print(len(embedding1))

In [None]:
import numpy as np

Embedding 1 and 2 should be similar (using NumPy's dot product to calculate similarity)

In [None]:
np.dot(embedding1, embedding2)

But Embedding 3 should differ more

In [None]:
np.dot(embedding1, embedding3)

In [None]:
np.dot(embedding2, embedding3)

## Document Loading

### PDFs

PDFs can be loaded in a number of different ways, but the easiest is by using the `PyPDFLoader` class. PDFs can be loaded from a local file or a URL.

In [None]:
from langchain_community.document_loaders import PyPDFLoader
#loader = PyPDFLoader("some_local_file.pdf")
loader = PyPDFLoader("https://data.riksdagen.se/fil/61B7540B-EEDD-4922-B61B-FC0A9F3AE4E2") # 2024/25:263 AI, annan ny teknik och de mänskliga rättigheterna
#loader = PyPDFLoader("https://data.riksdagen.se/fil/0D43150B-5B31-43A4-89CD-4FE0478EC6C7") # 2024/25:263 AI, annan ny teknik och de mänskliga rättigheterna (svar)
pdf_pages = loader.load()

**Each page** is a `Document`.

A `Document` contains text (`page_content`) and `metadata`.

In [None]:
len(pdf_pages)

In [None]:
page = pdf_pages[0]
print(page.page_content[0:500])

In [None]:
page.metadata

### Web Page

There are a number of different ways of loading data from the web, but the easiest is by using the `WebBaseLoader` class, which uses the parser BeautifulSoup under the hood.

In [None]:
from langchain.document_loaders import WebBaseLoader

page_url = "https://world.hey.com/dhh/open-source-royalty-and-mad-kings-a8f79d16"
loader = WebBaseLoader(page_url)
# loader = WebBaseLoader(page_url, header_template={
#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
# })

In [None]:
web_docs = loader.load()

In [None]:
print(web_docs[0].page_content[:500])

### Ingest - load, split and add to vector index

In [None]:
# Documents to load

# Load
loader = PyPDFLoader("https://data.riksdagen.se/fil/61B7540B-EEDD-4922-B61B-FC0A9F3AE4E2")
pages = loader.load()

# Split
doc_splits = text_splitter.split_documents(pages)

# Add to index
print(f"Adding document to index...")
vectordb.add_documents(documents=doc_splits)

print(f"Added document - {len(pages)} pages - {len(doc_splits)} splits")

## Setup query graph / pipeline

### Graph state

In [None]:
from typing import  List

from langchain_core.documents import Document
from langgraph.graph import MessagesState


class GraphState(MessagesState):
    question: str
    documents: List[Document]
    answer: str


### Nodes

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable
from langchain_core.output_parsers import StrOutputParser

#### Retrieval (Vector Store similarity search)

In [None]:
class RetrievalNode:
    retriever: VectorStoreRetriever

    def __init__(self):
        self.retriever = vectordb.as_retriever(search_kwargs={"k": 3})

    def __call__(self, state: GraphState):
        print("---RETRIEVE---")
        question = state["question"]

        # Retrieval
        documents = self.retriever.invoke(question)

        print(f"---RETRIEVED {len(documents)} DOCS---")
        #print(f"{documents}")

        return {"documents": documents}

#### RAG Generation (LLM call with factual/grounded context)

In [None]:
class RAGNode:
    system_template = """You are an helpful assistant, expert in answering questions based on provided sources (snippets from documents) and citing the sources used to generate the answer. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible.
    ALWAYS respond in the SAME language as the original question.

    ** Context (snippets from documents): **

    {context}
    """

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_template),
            ("human", "{question}"),
        ]
    )

    chain: Runnable

    def __init__(self):
        self.chain = self.prompt | llm | StrOutputParser()

    def __call__(self, state: GraphState):
        print("---GENERATE---")
        question = state["question"]
        documents = state["documents"]

        # RAG generation - setup context (i.e. relevant documents snippets)
        context = "\n\n".join(doc.page_content for doc in documents)

        # RAG generation - generate answer
        answer = self.chain.invoke({"question": question, "context": context})
        #print(f"---GENERATE - ANSWER: \n{answer}")

        return {"documents": documents, "answer": answer}

### Build Graph

In [None]:
#### Graph ####
from langgraph.graph import END, StateGraph, START
from IPython.display import Image, display

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", RetrievalNode())  # retrieve
workflow.add_node("generate", RAGNode())  # generate

workflow.add_edge(START, "retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", END)

# Compile
graph = workflow.compile()

# View
display(Image(graph.get_graph().draw_mermaid_png()))


## Use Graph

In [None]:
# Run
inputs = {
    "question": "Vad har sagts om mänskliga rättigheter och artificiell intelligens (AI)?"
    #"question": "Vilka är nobelpristagarna 2024?" # Should result in "Jag vet inte."
}

# Execute graph
result = graph.invoke(inputs)

print(f"--- ANSWER: ---\n{result['answer']}")

<br/>

-----

## Going even further - adding grading of retrieved documents for relevance (Corrective RAG)

#### Look at **`simple-rag-agent-demo.ipynb`** for inspiration - and try to implement a similar setup here.