# Practice Exercise

# Building my Own Agentic RAG System

In [48]:
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode
from langchain.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from dotenv import load_dotenv
from IPython.display import Image, display
from typing import Literal
import os

print("All imports successful")

All imports successful


In [49]:
load_dotenv()
api_key = os.getenv("paid_api")

if not api_key:
    raise ValueError("API_Key not found. Please set it in your .env file")
print("API key loaded")

API key loaded


In [50]:
## Initialize LLM
llm = ChatOpenAI(
    model = "gpt-5-nano",
    temperature=0.5,
    api_key = api_key
)
print(f"LLM initialized: {llm.model_name}")

LLM initialized: gpt-5-nano


## Document Collection

In [51]:
file_path = r"C:\Users\owner\Desktop\Files_Deep_Learning\Agentic_RAG_knowledge_basee"

loader = PyPDFDirectoryLoader(file_path)
pages = []

# async for page in loader.alazy_load():
#     pages.append(page)

pages = loader.load()
    
print("Documents loaded.")

Documents loaded.


In [52]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    length_function=len
)

doc_splits = text_splitter.split_documents(pages)

print(f"Sample chunk: \n{doc_splits[0].page_content[:200]}...")
print("Documents chunked")


Sample chunk: 
What is Artiﬁcial Intelligence in
the Energy Industry ?
Deﬁnition
In recent years, Artiﬁcial Intelligence (AI) has gained relevance in a wide variety of
sectors. However, deﬁning the term poses some d...
Documents chunked


## Vector Store Setup

In [53]:
embeddings = OpenAIEmbeddings(
    model= "text-embedding-3-small",
    api_key = api_key
)
print("Embeddings model initialized")

Embeddings model initialized


In [54]:
chroma_path = "./chroma_db_personal_rag"
vectorstore = Chroma(
    collection_name="agentic_rag_docs",
    persist_directory=chroma_path,
    embedding_function=embeddings
)

#Add documents
vectorstore.add_documents(documents=doc_splits)
print(f"Vector store created with {len(doc_splits)} chunks")
print(f"Persisted to: {chroma_path}")

Vector store created with 958 chunks
Persisted to: ./chroma_db_personal_rag


## Retrieval Tool

In [55]:
@tool
def retrieve_documents(query: str) -> str:
    """
    Search for relevant documents in the knowledge base.
    
    Use this tool when you need information from the document collection
    to answer the user's question. Do NOT use this for:
    - General knowledge questions
    - Greetings or small talk
    - Simple calculations
    
    Args:
        query: The search query describing what information is needed
        
    Returns:
        Relevant document excerpts that can help answer the question
    """
    # Using MMR for diverse results
    retriever = vectorstore.as_retriever(
        search_type = "mmr",
        search_kwargs = {"k":5, "fetch_k":10}
    )

    results = retriever.invoke(query)
    if not results:
        return "No relevant documents found"
    
    formatted = "\n\n---\n\n".join(
        f"Document {i+1}:\n{doc.page_content}"
        for i, doc in enumerate(results)
    )
    return formatted

print("Retrieval tool created")

Retrieval tool created


## Agentic RAG System

In [56]:
system_prompt = SystemMessage(content="""You are PowerBot, a helpfulassistant with access to a document retrieval tool.

RETRIEVAL DECISION RULES:

DO NOT retrieve for:
- Greetings: "Hello", "Hi", "How are you"
- Questions about your capabilities: "What can you help with?", "What do you do?"
- Simple math or general knowledge: "What is 2+2?"
- Casual conversation: "Thank you", "Goodbye"

DO retrieve for:
- Questions asking for specific information that would be in documents
- Requests for facts, definitions, or explanations about specialized topics
- Any question where citing sources would improve the answer

Rule of thumb: If the user is asking for information (not just chatting), retrieve first.

When you retrieve documents, cite them in your answer. If documents don't contain the answer, say so.
""")

print("System prompt configured")

System prompt configured


In [57]:
# Bind tool to LLM
tools = [retrieve_documents]
llm_with_tools = llm.bind_tools(tools)

def assistant(state: MessagesState) -> dict:
    """
    Assistant node - decides whether to retrieve or answer directly.
    """
    messages = [system_prompt] + state["messages"]
    response = llm_with_tools.invoke(messages)
    return {"messages": [response]}

def should_continue(state: MessagesState) -> Literal["tools", "__end__"]:
    """
    Decide whether to call tools or finish.
    """
    last_message = state["messages"][-1]

    if last_message.tool_calls:
        return "tools"
    return "__end__"
print("Agent nodes defined")

Agent nodes defined


In [58]:
builder = StateGraph(MessagesState)

builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))

builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    should_continue,
    {"tools": "tools", "__end__": END}
)
builder.add_edge("tools", "assistant")

# Conversation memory
memory = MemorySaver()
agent = builder.compile(checkpointer=memory)

print("Agentic RAG system compiled")

Agentic RAG system compiled


## Testing and Evaluation

In [59]:
def query_agent(test_queries: str, thread_id: str = "default"):
    # test_queries = [
    #     "Hi",
    #     "What is the role of artificial intelligence in the energy industry?",
    #     "What can you help me with?",
    #     "What is agriculture?",
    #     "How many grid-connected generating plants are operating in the NESI?"
    #     "Whos is the president of Nigeria?",
    #     "How many sectors are in the Nigerian power sector?",
    #     "List the distribution companies (DisCos) operating in Nigeria"
    # ]

    for query in test_queries:
        print(f"\n{'='*70}")
        print(f"Query: {query}")
        print(f"{'='*70}")

        result = agent.invoke(
            {"messages": [HumanMessage(content=query)]},
            config={"configurable": {"thread_id": thread_id}}
        )

        #Check if Retrieval was used
        used_retrieval = any(
            isinstance(message, AIMessage) and message.tool_calls
            for message in result["messages"]
        )

        final_answer = result["messages"][-1].content
        print(f"Agent: {final_answer}")
        print(f"Decision: {'RETRIEVED' if used_retrieval else 'ANSWERED DIRECTLY'}")
        print(f"\n{'='*70}\n")

In [60]:
test5 = ["What is today's date?"]
query_agent(test5, thread_id="test_5")


Query: What is today's date?


Agent: Today is January 2, 2026.
Decision: ANSWERED DIRECTLY




In [61]:
test1 = [
        "Hi",
        "What is the role of artificial intelligence in the energy industry?",
        "Who was the president of America in 2020?"]
query_agent(test1, thread_id="test1")


Query: Hi
Agent: Hi there! How can I assist you today? If you have a specific question or need information from our documents, tell me what you’re looking for and I’ll help.
Decision: ANSWERED DIRECTLY



Query: What is the role of artificial intelligence in the energy industry?
Agent: Here’s how AI typically functions in the energy industry and why it’s important:

- Role and purpose: AI is becoming central to designing and operating energy systems. It helps make and implement data-driven decisions to improve efficiency, reliability, and security. This involves handling large volumes of data and optimizing how energy is produced, distributed, and consumed. (Docs: 4, 5)

- Key application areas: 
  - Electricity trading
  - Smart grids
  - Sector coupling across electricity, heat, and transport
  These areas illustrate how AI can optimize market operations, grid management, and cross-energy system integration. (Docs: 1)

- How it achieves benefits: By analyzing and evaluating large da

In [62]:
test2 = ["Good morning",
        "How many grid-connected generating plants are operating in the NESI?",
        "What is today's date?"]
query_agent(test2, thread_id="test2__")


Query: Good morning
Agent: Good morning! How can I assist you today? If you need information from our documents, I can look it up for you.
Decision: ANSWERED DIRECTLY



Query: How many grid-connected generating plants are operating in the NESI?
Agent: 29 grid-connected generating plants are operating in the NESI (as of the end of 2023). This is stated as 29 on-grid GenCos/operational grid-connected plants in the referenced documents. 
Sources: Document 4; Document 5.
Decision: RETRIEVED



Query: What is today's date?
Agent: Today’s date is January 2, 2026.
Decision: RETRIEVED




In [63]:
test3 = ["What is 2 + 2?",
         "How many sectors are in the Nigerian power sector",
         "What is the full meaning of NESI?",
        "List the distribution companies (DisCos) operating in Nigeria"
    
]
query_agent(test3, thread_id="test3")


Query: What is 2 + 2?
Agent: 4
Decision: ANSWERED DIRECTLY



Query: How many sectors are in the Nigerian power sector
Agent: There are 3 major sub-sectors in the Nigerian power sector: Generation, Transmission, and Distribution.

Sources: Document 1 (and related documents) and Document 5.
Decision: RETRIEVED



Query: What is the full meaning of NESI?
Agent: NESI stands for Nigerian Electricity Supply Industry.

Sources: Documents 1 and 4 (glossary entries).
Decision: RETRIEVED



Query: List the distribution companies (DisCos) operating in Nigeria
Agent: Here are the Distribution Companies (DisCos) operating in Nigeria (the 11 licensed DisCos as of end-2023):

- Abuja Electricity Distribution Company (AEDC)
- Benin Electricity Distribution Company (BEDC)
- Eko Electricity Distribution Company (EKEDC)
- Enugu Electricity Distribution Company (EEDC)
- Ibadan Electricity Distribution Company (IBEDC)
- Ikeja Electric Plc (Ikeja Electric)
- Jos Electricity Distribution Plc (JED)
- Kaduna

## **Brief Report**

### Domain Choice

The chosen domain is Nigeria's Power sector, it was chosen because that is my field of expertise and I am motivated to solve issues in the power sector using advanced technology, in this case, Aritificial intelligence.

### Chunking Strategy

I chose a chunk size of 500 characters with an overlap of 50 which is within the recommended of range of 10-20% of my chosen chunk_size. This particular chunk size and overlap size were chosen to preserve context and avoid exceeding the context window.

### Agent Evaluation

The agent made satisfactory retrieval decisions.


### Evaluation and Improvement