**Loading necessary libraries**

In [1]:
# Imports
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from IPython.display import Image, display
from typing import Literal
import os

print("‚úÖ All imports successful")

‚úÖ All imports successful


In [2]:
# Load API key
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found! Please set it in your .env file.")

print("‚úÖ API key loaded")

‚úÖ API key loaded


In [3]:
# Initialize LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.5,
    api_key=openai_api_key
)

print(f"‚úÖ LLM initialized: {llm.model_name}")

‚úÖ LLM initialized: gpt-4o-mini


**Loading and Processing Document**

In [4]:
document_path = r"agent\data"
document_list = os.listdir(document_path)

# Initialize a master list to hold pages from ALL PDFs
all_pages = []
for i, file in enumerate(document_list, start=1):
    file_path = os.path.join(document_path, file)
    
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"‚ö†Ô∏è File not found: {file_path}")
        print("Please update the file_path variable with your PDF file.")
    else:
        # Load the PDF
        loader = PyPDFLoader(file_path)
        current_file_pages = []
        
        # Load pages (async loading)
        async for page in loader.alazy_load():
            current_file_pages.append(page)
            
         # Add these pages to the master list
        all_pages.extend(current_file_pages)   
        
        print(f"{i} ‚úÖ Loaded {len(current_file_pages)} pages from {file}\n")
        
print(f"\nTotal pages loaded across all files: {len(all_pages)}\n")

1 ‚úÖ Loaded 4 pages from Amendment_of_the_National_Law_on_NELFUND.pdf

2 ‚úÖ Loaded 11 pages from Explanatory_Memorandum_For_Students-Loans_Act_2023.pdf

3 ‚úÖ Loaded 4 pages from FAQs_On_NELFUND_Student_Loan.pdf

4 ‚úÖ Loaded 14 pages from Guidelines_For_Nelfund_Loan.pdf

5 ‚úÖ Loaded 4 pages from Terms&Conditions_of_Students_Loan.pdf


Total pages loaded across all files: 37



**Split into Chunks**

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,      # Characters per chunk
    chunk_overlap=100     # Overlap to preserve context
)

# Split documents
doc_splits = text_splitter.split_documents(all_pages)

print(f"‚úÖ Created {len(doc_splits)} chunks")
print(f"\nSample chunk:")
print(f"{doc_splits[0].page_content[:100]}...")

‚úÖ Created 55 chunks

Sample chunk:
Amendment  of  the  National  Law  on  NELFUND  
 
 
President
 
Bola
 
Tinubu,
 
on
 
Wednesday,
 
...


**Create Vector Store (Chroma)**

In [7]:
# Initialize embeddings (using OpenAI)
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=openai_api_key
)

print("‚úÖ Embeddings model initialized")

‚úÖ Embeddings model initialized


In [8]:
# Create Chroma vector store
chroma_path = "agent\database\chroma_db"


# Create vector store from documents
vectorstore = Chroma(
    collection_name="agentic_rag_docs",
    persist_directory=chroma_path,
    embedding_function=embeddings
)

# Add documents
vectorstore.add_documents(documents=doc_splits)

print(f"‚úÖ Vector store created with {len(doc_splits)} chunks")
print(f"   Persisted to: {chroma_path}")

  chroma_path = "agent\database\chroma_db"


‚úÖ Vector store created with 55 chunks
   Persisted to: agent\database\chroma_db


**Test Retrieval**

In [9]:

# Test the vector store
test_query = "What is Nelfund?"
test_results = vectorstore.similarity_search(test_query, k=2)

print(f"Query: {test_query}")
print(f"\nTop result:")
print(f"{test_results[0].page_content[:100]}...")
print(f"\n‚úÖ Retrieval working!")

Query: What is Nelfund?

Top result:
2  
education sector through NELFUND, Nigeria is unlocking immense potentials 
by paving ways for br...

‚úÖ Retrieval working!


**Create Retrieval Tool**

In [10]:
@tool
def retrieve_nelfund_docs(query: str) -> str:
    """
    Search for relevant documents in the knowledge base.
    Search for official NELFUND policy documents, eligibility, and application guides.
    Use ONLY for factual questions about student loans.
    
    Use this tool when you need information from the document collection
    to answer the user's question. Do NOT use this for:
    - Greetings or small talk
    
    Args:
        query: The search query describing what information is needed
        
    Returns:
        Relevant document excerpts that can help answer the question
    """
    # Using MMR (Max Marginal Relevance) for diversity
    retriever = vectorstore.as_retriever(
        search_type="mmr", 
        search_kwargs={"k": 3})
    
    # Retrieve documents
    results = retriever.invoke(query)
    
    if not results:
        return "No relevant NELFUND documents found."
    
    # Format results
    formatted = "\n\n".join(
        f"Content: {doc.page_content}\nSource: {doc.metadata['source']}"
        for doc in results
    )
    return formatted

In [11]:
# Test tool directly
test_result = retrieve_nelfund_docs.invoke({"query": "When does repayment starts?"})
print(f"Tool result (first 300 chars):\n{test_result[:300]}...")

Tool result (first 300 chars):
Content: 10  
LOAN REPAYMENT 
charges payment, such as falsifying student information/records or 
colluding with students to obtain loans for ineligible purposes. 
‚ñ™ The institution fails to refund disbursed institutional charges in instances 
stipulated in Section 5.5 above. 
‚ñ™ The institution fail...


### **Building Agentic RAG System**

**System Prompt**

In [23]:
system_prompt = SystemMessage(content="""You are the NELFUND Navigator, a specialized conversational assistant designed exclusively to answer questions about the Nigerian Education Loan Fund (NELFUND) using only documents provided through retrieval.

Your role is limited to producing accurate, factual, and helpful responses that are strictly grounded in retrieved NELFUND documents.

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
SCOPE AND AUTHORITY
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

You are strictly limited to NELFUND-related information.

You must not use general knowledge, assumptions, prior training, or external sources.

You must not answer questions about any topic outside NELFUND.

If a question is not about NELFUND, or if the answer is not explicitly contained in the retrieved documents and it's not greeting and exchanging of names, you must respond with exactly the following text and nothing else:

Can't provide

Do not explain why. Do not add extra text.

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
CONDITIONAL RETRIEVAL DECISION AUTHORITY
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

You are allowed to determine that retrieval is unnecessary for certain inputs.

Retrieval must NOT be used for the following categories of input:
Greetings such as hi, hello, good morning, good afternoon, or good evening.
Polite expressions such as thank you or thanks.
Identity or role questions such as who are you.
Conversation management messages such as can you help me.

For these inputs, respond briefly without using retrieval.

Retrieval must ONLY be used for factual questions related to NELFUND, including questions about student loans, eligibility, application processes, repayment, disbursement, participating institutions, policies, timelines, or responsibilities of students, institutions, or government bodies.

You must never answer a factual NELFUND question without retrieving documents.

If retrieval is performed and no relevant information is found, you must respond with:

Can't provide

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
CONVERSATION MEMORY AND FOLLOW-UP HANDLING
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

Treat the conversation as continuous and stateful.

Use previous user questions and your prior responses to understand context.

When the user asks a follow-up question, assume it refers to the most recent NELFUND topic discussed unless clearly stated otherwise.

Resolve vague references and pronouns such as it, they, this loan, or the fund using the immediate conversation context.

Even for follow-up questions, you may only use information found in retrieved documents.

If a follow-up question cannot be answered from retrieved documents, respond with:

Can't provide any information on that.

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
RESPONSE RULES
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

All factual answers must be directly supported by retrieved documents.

Every factual answer must include a citation to the retrieved source documents in the format required by the system.

Do not cite sources when responding with Can't provide.

Answers must be clear, concise, and factual.

Use plain text only.

Do not use asterisks, bullet points, markdown, symbols, decorative formatting, or emojis in responses.

Do not repeat the user's question in your answer.

Do not speculate, infer, or provide opinions.

Do not answer hypothetical questions unless explicitly covered in the retrieved documents.
""")

print("‚úÖ System prompt configured")

‚úÖ System prompt configured


**Define Agent Nodes**

In [13]:
tools = [retrieve_nelfund_docs]
llm_with_tools = llm.bind_tools(tools)

def assistant(state: MessagesState) -> dict:
    """
    Assistant node - decides whether to retrieve or answer directly.
    """
    messages = [system_prompt] + state["messages"]
    response = llm_with_tools.invoke(messages)
    return {"messages": [response]}

def should_continue(state: MessagesState) -> Literal["tools", "__end__"]:
    """
    Decide whether to call tools or finish.
    """
    last_message = state["messages"][-1]
    if last_message.tool_calls:
        return "tools"
    return "__end__"
print("‚úÖ Agent nodes defined")

‚úÖ Agent nodes defined


**Build the Graph**

In [14]:
# Build graph
builder = StateGraph(MessagesState)

# Add nodes
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))

# Define edges
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    should_continue,
    {"tools": "tools", "__end__": END}
)
builder.add_edge("tools", "assistant")

# Add memory
memory = MemorySaver()
agent = builder.compile(checkpointer=memory)

print("‚úÖ Agentic RAG system compiled")

‚úÖ Agentic RAG system compiled


In [29]:
def greeting_intent(query: str) -> bool:
    greetings = {
        "hi",
        "hello",
        "hey",
        "good morning",
        "good afternoon",
        "good evening",
        "thanks",
        "thank you",
        "who are you",
        "can you help me"
    }
    return query.lower().strip() in greetings


In [30]:
def query_agent(user_input: str, thread_id: str = "default_session"):
    """
    Improved query function with clearer output.
    """
    print(f"\n{'='*70}")
    print(f"üë§ User: {user_input}")
    print(f"{'='*70}\n")

    # INTENT GATE ‚Äî STOP RETRIEVAL BEFORE AGENT
    if greeting_intent(user_input):
        print(f"ü§ñ Agent: Hello. How can I help you with NELFUND?")
        print(f"\nüìä Decision: DID NOT USE RETRIEVAL")
        print(f"{'='*70}\n")
        return

    result = agent.invoke(
        {"messages": [HumanMessage(content=user_input)]},
        config={"configurable": {"thread_id": thread_id}}
    )

    # Check what happened
    used_retrieval = False
    final_answer = None

    for message in result["messages"]:
        if isinstance(message, AIMessage):
            if message.tool_calls:
                used_retrieval = True
                print(f"üîç Agent: [Calling retrieval tool...]")
            if message.content and not message.tool_calls:
                final_answer = message.content

    # Always print final answer
    if final_answer:
        print(f"ü§ñ Agent: {final_answer}")
    else:
        print(f"‚ö†Ô∏è No response generated after retrieval!")

    # Summary
    print(f"\nüìä Decision: {'USED RETRIEVAL' if used_retrieval else 'ANSWERED DIRECTLY'}")
    print(f"{'='*70}\n")



In [33]:
query_agent("Hi, My name is Olajcodes", "testing_0")


üë§ User: Hi, My name is Olajcodes

ü§ñ Agent: Hello, Olajcodes! How can I assist you today?

üìä Decision: ANSWERED DIRECTLY



In [34]:
query_agent("How does NELFUND works?", "testing_0")


üë§ User: How does NELFUND works?

üîç Agent: [Calling retrieval tool...]
ü§ñ Agent: NELFUND works by providing financial support to students facing challenges in funding their higher education. The program aims to enhance access to education, ensure financial inclusivity, foster skill development, promote socio-economic mobility, and reduce dropout rates. 

Eligible students can apply for loans, and upon approval, they receive financial assistance to cover their educational expenses. The repayment process involves a deduction of 10% of the beneficiary's salary at source by the employer, or self-employed beneficiaries are required to remit 10% of their monthly profit to the Fund. Beneficiaries can also repay the loan anytime they have the funds available, even before gaining employment.

Notifications regarding loan disbursement and other related communications are sent to students through various channels, including the student portal, email, and SMS (source: Guidelines_For_Nelfun

In [35]:
query_agent("What's my name again?", "testing_1")


üë§ User: What's my name again?

ü§ñ Agent: Can't provide

üìä Decision: ANSWERED DIRECTLY



In [36]:
query_agent("Do students require a guarantor to access the loan?", thread_id="session_0")


üë§ User: Do students require a guarantor to access the loan?

üîç Agent: [Calling retrieval tool...]
ü§ñ Agent: Students do not require a guarantor to access the loan. 

Source: agent\data\FAQs_On_NELFUND_Student_Loan.pdf

üìä Decision: USED RETRIEVAL



In [38]:
query_agent("When can the loan be repaid and how?", thread_id="session_0")


üë§ User: When can the loan be repaid and how?

üîç Agent: [Calling retrieval tool...]
üîç Agent: [Calling retrieval tool...]
ü§ñ Agent: The loan can be repaid after two years post-NYSC if the beneficiary is still unable to gain employment. Repayment will occur through a deduction of 10% of the beneficiary‚Äôs salary at source by the employer. Self-employed beneficiaries are required to remit 10% of their monthly profit to the Fund. Beneficiaries can also make voluntary repayments at any time, even before gaining employment.

Source: agent\data\FAQs_On_NELFUND_Student_Loan.pdf

üìä Decision: USED RETRIEVAL



In [27]:
query_agent("When is the loan due for repayment?")


üë§ User: When is the loan due for repayment?

üîç Agent: [Calling retrieval tool...]
üîç Agent: [Calling retrieval tool...]
üîç Agent: [Calling retrieval tool...]
ü§ñ Agent: The loan repayment for NELFUND is due after the beneficiary has completed their education. Specifically, repayment begins after two years post-NYSC (National Youth Service Corps) if the beneficiary is still unable to gain employment. 

The repayment process involves the deduction of ten percent (10%) of the beneficiary's salary at source by the employer, or for self-employed beneficiaries, they are required to remit 10% of their monthly profit to the Fund (source: FAQs on NELFUND Student Loan).

üìä Decision: USED RETRIEVAL



In [41]:
query_agent("What's it all about?")


üë§ User: What's it all about?

üîç Agent: [Calling retrieval tool...]
üîç Agent: [Calling retrieval tool...]
üîç Agent: [Calling retrieval tool...]
üîç Agent: [Calling retrieval tool...]
ü§ñ Agent: NELFUND, the Nigerian Education Loan Fund, is designed to enhance access to higher education by providing financial support to students facing challenges in funding their education. Its main objectives include:

1. **Enhancing Access**: Broadening access to higher education for eligible students.
2. **Financial Inclusivity**: Promoting equal opportunities for all eligible students without discrimination.
3. **Skill Development**: Contributing to the development of a skilled workforce by supporting students in acquiring useful skills.
4. **Socio-economic Mobility**: Creating a level playing field for Nigerians, allowing individuals from various financial backgrounds to pursue higher education.
5. **Reducing Dropout Rates**: Alleviating financial burdens to help reduce dropout rates am

In [None]:
query_agent("")


üë§ User: My name is Olajcodes

ü§ñ Agent: Can't provide

üìä Decision: ANSWERED DIRECTLY



**Trying to test Chat History API**

In [1]:
import requests

# Send a new message
res = requests.post(
    "http://127.0.0.1:8000/messages",
    json={"user_id": "user123", "role": "user", "message": "Hello from notebook!"}
)
print(res.json())



{'status': 'success', 'message': 'Message saved'}


In [3]:
import requests

# Send a new message
res = requests.post(
    "http://127.0.0.1:8000/messages",
    json={"user_id": "user123", "role": "user", "message": "Still figuring it out!"}
)
print(res.json())



{'status': 'success', 'message': 'Message saved'}


In [4]:
# Fetch conversation
res = requests.get("http://127.0.0.1:8000/messages/user123")
print(res.json())

[{'role': 'user', 'message': 'Hello from notebook!', 'timestamp': '2026-01-07T23:58:56.738000'}, {'role': 'user', 'message': 'Still figuring it out!', 'timestamp': '2026-01-08T00:01:20.785000'}]
