**Loading necessary libraries**

In [58]:
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.prebuilt import ToolNode
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from backend.db import messages_collection, threads_collection
from typing import Literal
from datetime import datetime, timezone
import uuid
import os

print("‚úÖ All imports successful")

‚úÖ All imports successful


In [59]:
# Load API key
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found! Please set it in your .env file.")

print("‚úÖ API key loaded")

‚úÖ API key loaded


In [60]:
# Initialize LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.5,
    api_key=openai_api_key
)

print(f"‚úÖ LLM initialized: {llm.model_name}")

‚úÖ LLM initialized: gpt-4o-mini


**Loading and Processing Document**

In [61]:
document_path = r"agent\data"
document_list = os.listdir(document_path)
all_pages = []

for i, file in enumerate(document_list, start=1):
    file_path = os.path.join(document_path, file)
    if not os.path.exists(file_path):
        print(f"‚ö†Ô∏è File not found: {file_path}")
    else:
        loader = PyPDFLoader(file_path)
        pages = loader.load()  # simpler sync loading for notebook
        all_pages.extend(pages)
        print(f"{i} ‚úÖ Loaded {len(pages)} pages from {file}")

print(f"\nTotal pages loaded: {len(all_pages)}")

1 ‚úÖ Loaded 4 pages from Amendment_of_the_National_Law_on_NELFUND.pdf
2 ‚úÖ Loaded 11 pages from Explanatory_Memorandum_For_Students-Loans_Act_2023.pdf
3 ‚úÖ Loaded 4 pages from FAQs_On_NELFUND_Student_Loan.pdf
4 ‚úÖ Loaded 14 pages from Guidelines_For_Nelfund_Loan.pdf
5 ‚úÖ Loaded 4 pages from Terms&Conditions_of_Students_Loan.pdf

Total pages loaded: 37


**Split into Chunks**

In [62]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)
doc_splits = text_splitter.split_documents(all_pages)
print(f"‚úÖ Created {len(doc_splits)} chunks")
print(f"Sample chunk:\n{doc_splits[0].page_content[:100]}...")

‚úÖ Created 55 chunks
Sample chunk:
Amendment  of  the  National  Law  on  NELFUND  
 
 
President
 
Bola
 
Tinubu,
 
on
 
Wednesday,
 
...


**Create Vector Store (Chroma)**

In [63]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=openai_api_key
)
print("‚úÖ Embeddings model initialized")

chroma_path = r"agent\database\chroma_db"
vectorstore = Chroma(
    collection_name="agentic_rag_docs",
    persist_directory=chroma_path,
    embedding_function=embeddings
)
vectorstore.add_documents(documents=doc_splits)
print(f"‚úÖ Vector store created with {len(doc_splits)} chunks")
print(f"   Persisted to: {chroma_path}")

‚úÖ Embeddings model initialized
‚úÖ Vector store created with 55 chunks
   Persisted to: agent\database\chroma_db


**Test Retrieval**

In [64]:

# Test the vector store
test_query = "What is Nelfund?"
test_results = vectorstore.similarity_search(test_query, k=2)

print(f"Query: {test_query}")
print(f"\nTop result:")
print(f"{test_results[0].page_content[:100]}...")
print(f"\n‚úÖ Retrieval working!")

Query: What is Nelfund?

Top result:
2  
education sector through NELFUND, Nigeria is unlocking immense potentials 
by paving ways for br...

‚úÖ Retrieval working!


**Create Retrieval Tool**

In [66]:
@tool
def retrieve_nelfund_docs(query: str) -> str:
    """
    Search for relevant documents in the knowledge base.
    Search for official NELFUND policy documents, eligibility, and application guides.
    Use ONLY for factual questions about student loans.
    
    Use this tool when you need information from the document collection
    to answer the user's question. Do NOT use this for:
    - Greetings or small talk
    
    Args:
        query: The search query describing what information is needed
        
    Returns:
        Relevant document excerpts that can help answer the question
    """
    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
    results = retriever.invoke(query)
    
    if not results:
        return "No relevant NELFUND documents found."
    
    formatted = "\n\n".join(
        f"Content: {doc.page_content}\nSource: {doc.metadata.get('source', 'unknown')}"
        for doc in results
    )
    return formatted

In [67]:
# Test tool directly
test_result = retrieve_nelfund_docs.invoke({"query": "When does repayment starts?"})
print(f"Tool result (first 300 chars):\n{test_result[:300]}...")

Tool result (first 300 chars):
Content: 10  
LOAN REPAYMENT 
charges payment, such as falsifying student information/records or 
colluding with students to obtain loans for ineligible purposes. 
‚ñ™ The institution fails to refund disbursed institutional charges in instances 
stipulated in Section 5.5 above. 
‚ñ™ The institution fail...


### **Building Agentic RAG System**

**System Prompt**

In [68]:
system_prompt = SystemMessage(content="""You are the NELFUND Navigator, a specialized conversational assistant designed exclusively to answer questions about the Nigerian Education Loan Fund (NELFUND) using only documents provided through retrieval.

Your role is limited to producing accurate, factual, and helpful responses that are strictly grounded in retrieved NELFUND documents.

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
SCOPE AND AUTHORITY
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

You are strictly limited to NELFUND-related information.

You must not use general knowledge, assumptions, prior training, or external sources.

You must not answer questions about any topic outside NELFUND.

If a question is not about NELFUND, or if the answer is not explicitly contained in the retrieved documents and it's not greeting and exchanging of names, you must respond with exactly the following text and nothing else:

Can't provide

Do not explain why. Do not add extra text.

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
CONDITIONAL RETRIEVAL DECISION AUTHORITY
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

You are allowed to determine that retrieval is unnecessary for certain inputs.

Retrieval must NOT be used for the following categories of input:
Greetings such as hi, hello, good morning, good afternoon, or good evening.
Polite expressions such as thank you or thanks.
Identity or role questions such as who are you.
Conversation management messages such as can you help me.

For these inputs, respond briefly without using retrieval.

Retrieval must ONLY be used for factual questions related to NELFUND, including questions about student loans, eligibility, application processes, repayment, disbursement, participating institutions, policies, timelines, or responsibilities of students, institutions, or government bodies.

You must never answer a factual NELFUND question without retrieving documents.

If retrieval is performed and no relevant information is found, you must respond with:

Can't provide

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
CONVERSATION MEMORY AND FOLLOW-UP HANDLING
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

Treat the conversation as continuous and stateful.

Use previous user questions and your prior responses to understand context.

When the user asks a follow-up question, assume it refers to the most recent NELFUND topic discussed unless clearly stated otherwise.

Resolve vague references and pronouns such as it, they, this loan, or the fund using the immediate conversation context.

Even for follow-up questions, you may only use information found in retrieved documents.

If a follow-up question cannot be answered from retrieved documents, respond with:

Can't provide any information on that.

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
RESPONSE RULES
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

All factual answers must be directly supported by retrieved documents.

Every factual answer must include a citation to the retrieved source documents in the format required by the system.

Do not cite sources when responding with Can't provide.

Answers must be clear, concise, and factual.

Use plain text only.

Do not use asterisks, bullet points, markdown, symbols, decorative formatting, or emojis in responses.

Do not repeat the user's question in your answer.

Do not speculate, infer, or provide opinions.

Do not answer hypothetical questions unless explicitly covered in the retrieved documents.
""")

print("‚úÖ System prompt configured")

‚úÖ System prompt configured


**Define Agent Nodes and Build the Graph**

In [73]:
tools = [retrieve_nelfund_docs]
llm_with_tools = llm.bind_tools(tools)

def assistant(state: MessagesState) -> dict:
    """
    Assistant node - decides whether to retrieve or answer directly.
    """
    messages = [system_prompt] + state["messages"]
    response = llm_with_tools.invoke(messages)
    return {"messages": [response]}

def should_continue(state: dict) -> Literal["tools", "__end__"]:
    """
    Decide whether to call tools or finish.
    """
    last_message = state["messages"][-1]
    if getattr(last_message, "tool_calls", False):
        if state.get("retrieval_done", False):
            return "__end__"
        state["retrieval_done"] = True  # <-- store flag in dict
        return "tools"
    return "__end__"

builder = StateGraph(dict)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", should_continue, {"tools": "tools", "__end__": END})
builder.add_edge("tools", "assistant")
agent = builder.compile()
print("‚úÖ Agentic RAG system compiled")

‚úÖ Agentic RAG system compiled


**MongoDB Thread Loader**

In [70]:
def load_state(user_id, thread_id):
    docs = messages_collection.find({"user_id": user_id, "thread_id": thread_id}).sort("timestamp", 1)
    return {"messages": [{"role": d["role"], "content": d["message"]} for d in docs]}

user_id = "user_001"
thread = threads_collection.find_one({"user_id": user_id})

if not thread:
    thread_id = str(uuid.uuid4())
    threads_collection.insert_one({
        "user_id": user_id,
        "thread_id": thread_id,
        "created_at": datetime.now(timezone.utc)
    })
else:
    thread_id = thread["thread_id"]

print("Active thread:", thread_id)


Active thread: 3a464f7e-2d0d-4285-8396-523e5d689c01


**Query Agent & Intent Gate**

In [77]:
def greeting_intent(query: str) -> bool:
    greetings = {
        "hi", "hello", "hey", "good morning", "good afternoon", "good evening",
        "thanks", "thank you", "who are you", "can you help me"
    }
    return query.lower().strip() in greetings

def query_agent(user_input: str, thread_id: str = user_id):
    """
    Improved query function with clearer output.
    """
    print(f"\n{'='*70}\nüë§ User: {user_input}\n{'='*70}\n")

    if greeting_intent(user_input):
        print("ü§ñ Agent: Hello. How can I help you with NELFUND?")
        print(f"\nüìä Decision: DID NOT USE RETRIEVAL\n{'='*70}\n")
        return

    state = {
    "messages": [
        HumanMessage(content=user_input),
        AIMessage(content="Hi, How can I help you today?")  
    ]
    }
    result = agent.invoke(state, config={"configurable": {"thread_id": thread_id}})

    used_retrieval = False
    final_answer = None

    for message in result["messages"]:
        if hasattr(message, "tool_calls") and message.tool_calls:
            used_retrieval = True
            print("üîç Agent: [Calling retrieval tool...]")
        if getattr(message, "content", None):
            final_answer = message.content

    if final_answer:
        print(f"ü§ñ Agent: {final_answer}")
    else:
        print("‚ö†Ô∏è No response generated after retrieval!")

    print(f"\nüìä Decision: {'USED RETRIEVAL' if used_retrieval else 'ANSWERED DIRECTLY'}\n{'='*70}\n")


In [78]:
query_agent("Hi, My name is Olajcodes", thread_id)


üë§ User: Hi, My name is Olajcodes

ü§ñ Agent: Hi, How can I help you today?

üìä Decision: ANSWERED DIRECTLY



**Trying to test Chat History API**

In [None]:
# import requests

# # Send a new message
# res = requests.post(
#     "http://127.0.0.1:8000/messages",
#     json={"user_id": "user123", "role": "user", "message": "Hello from notebook!"}
# )
# print(res.json())



{'status': 'success', 'message': 'Message saved'}


In [None]:
# import requests

# # Send a new message
# res = requests.post(
#     "http://127.0.0.1:8000/messages",
#     # json={"user_id": thread_id, "role": "user", "message": "Still figuring it out!"}
# )
# print(res.json())



In [None]:
# # Fetch conversation
# res = requests.get("http://127.0.0.1:8000/messages/user123")
# print(res.json())