# Agentic RAG with LangGraph

In [1]:
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode
from langchain.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from dotenv import load_dotenv
from IPython.display import Image, display
from typing import Literal
import os

print("All imports successful")

All imports successful


In [2]:
load_dotenv()
api_key = os.getenv("paid_api")

if not api_key:
    raise ValueError("API_Key not found. Please set it in your .env file")
print("API key loaded")

API key loaded


In [3]:
## Initialize LLM
llm = ChatOpenAI(
    model = "gpt-4o-mini",
    temperature=0.5,
    api_key = api_key
)
print(f"LLM initialized: {llm.model_name}")

LLM initialized: gpt-4o-mini


## Load and Process Documents

In [5]:
file_path = r"c:\Users\owner\Downloads\Article_Research\Artificial Intelligence in the Energy Industry.pdf"

# Check if file exists
if not os.path.exists(file_path):
    print(f"File not found {file_path}")
    print("Please update the file_path variable with your PDF file.")

    #Creating sample documents for demo
    from langchain_core.documents import Document
    pages = [
        Document(page_content="Biochemistry is the study of chemical processes in living organisms.",
                 metadata={"page":1}),
        Document(page_content= "Proteins are made of amino acids and perform many functions in cells.",
                 metadata = {"page":2}),
        Document(page_content="DNA stores genetic information using four nucleotide bases.",
                 metadata={"page":3})
    ]
    print("Using sample documents for demo")
else:
    #Load the pdf
    loader = PyPDFLoader(file_path)
    pages= []

    #Load pages (async loading)
    async for page in loader.alazy_load(): # async prevents memory overload by processing pages from a PDF one by one instead of all at once
        pages.append(page)
    print(f"Loaded {len(pages)} pages from PDF.")

Loaded 11 pages from PDF.


### Split into Chunks

In [6]:
# Create text_splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap=100
)

#Split documents
doc_splits = text_splitter.split_documents(pages)

print(f"Created {len(doc_splits)} chunks")
print(f"\n Sample chunk:")
print(f"{doc_splits[0].page_content[:200]}...")

Created 22 chunks

 Sample chunk:
What is Artiﬁcial Intelligence in
the Energy Industry ?
Deﬁnition
In recent years, Artiﬁcial Intelligence (AI) has gained relevance in a wide variety of
sectors. However, deﬁning the term poses some d...


## Create Vector Store (Chromadb)

In [None]:
#Initilize embeddings
embeddings = OpenAIEmbeddings(
    model = "text-embedding-3-small",
    api_key=api_key
)
print("Embeddings model initialized")

In [None]:
# Create Chroma vector stor
chroma_path = "./chroma_db_agentic_rag"

#Create vector store from documents
vectorstore = Chroma(
    collection_name="agentic_rag_docs",
    persist_directory=chroma_path,
    embedding_function=embeddings
)