In [10]:
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from IPython.display import Image, display
from typing import Literal
import os
import pypdf
from langgraph.prebuilt import create_react_agent

print(" All imports successful")

 All imports successful


In [3]:
# Load API key
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found! Please set it in your .env file.")

print(" API key loaded")

 API key loaded


In [4]:
# Initialize LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.5,
    api_key=openai_api_key
)

print(f" LLM initialized: {llm.model_name}")

 LLM initialized: gpt-4o-mini


In [5]:
# Define the folder containing PDF files
pdf_folder = "."  # Current folder where the PDFs are located
pdf_files = [
    "LAW 243.pdf",
    "law 432 LAW OF BANKING AND INSURANCE II  post editorial.pdf",
    "LAW411 oil and gas I.pdf"
]

pages = []

# Load all PDF files
for pdf_file in pdf_files:
    file_path = os.path.join(pdf_folder, pdf_file)
    
    if not os.path.exists(file_path):
        print(f" File not found: {file_path}")
        continue
    
    try:
        # Load the PDF
        loader = PyPDFLoader(file_path)
        pdf_pages = loader.load()
        pages.extend(pdf_pages)
        print(f" Loaded {len(pdf_pages)} pages from {pdf_file}")
    except Exception as e:
        print(f" Error loading {pdf_file}: {str(e)}")

print(f"\n Total pages loaded: {len(pages)}")


 Loaded 90 pages from LAW 243.pdf
 Loaded 135 pages from law 432 LAW OF BANKING AND INSURANCE II  post editorial.pdf
 Loaded 194 pages from LAW411 oil and gas I.pdf

 Total pages loaded: 419


In [6]:
# Verify pages were loaded
if not pages:
    raise ValueError("No pages loaded! Please check your PDF files.")

# Create text splitter (Module 2 knowledge!)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,      # Characters per chunk
    chunk_overlap=100,    # Overlap to preserve context
    separators=["\n\n", "\n", " ", ""]
)

# Split documents
doc_splits = text_splitter.split_documents(pages)

print(f" Created {len(doc_splits)} chunks from {len(pages)} pages")
print(f" Chunk statistics:")
print(f"   - Average chunk size: {sum(len(chunk.page_content) for chunk in doc_splits) // len(doc_splits)} characters")
print(f"   - Min chunk size: {min(len(chunk.page_content) for chunk in doc_splits)} characters")
print(f"   - Max chunk size: {max(len(chunk.page_content) for chunk in doc_splits)} characters")

# Display sample chunk with metadata
if doc_splits:
    print(f"\n Sample chunk (Chunk 1):")
    print(f"   Source: {doc_splits[0].metadata.get('source', 'Unknown')}")
    print(f"   Page: {doc_splits[0].metadata.get('page', 'Unknown')}")
    print(f"   Length: {len(doc_splits[0].page_content)} characters")
    print(f"\n   Content preview:")
    print(f"   {doc_splits[0].page_content[:300]}...")


 Created 942 chunks from 419 pages
 Chunk statistics:
   - Average chunk size: 768 characters
   - Min chunk size: 2 characters
   - Max chunk size: 999 characters

 Sample chunk (Chunk 1):
   Source: .\LAW 243.pdf
   Page: 0
   Length: 32 characters

   Content preview:
   LAW 243    
CONSTITUTIONAL LAW 1...


In [7]:
# Analyze chunks by document source
from collections import defaultdict

chunks_by_source = defaultdict(list)

for chunk in doc_splits:
    source = chunk.metadata.get('source', 'Unknown')
    chunks_by_source[source].append(chunk)

print(" Chunks Distribution by Document:\n")
print("=" * 80)

for source, chunks in chunks_by_source.items():
    total_chars = sum(len(chunk.page_content) for chunk in chunks)
    avg_chars = total_chars // len(chunks) if chunks else 0
    
    # Extract filename from full path
    filename = source.split("\\")[-1] if "\\" in source else source.split("/")[-1] if "/" in source else source
    
    print(f"\n {filename}")
    print(f"   ├─ Total chunks: {len(chunks)}")
    print(f"   ├─ Total characters: {total_chars:,}")
    print(f"   └─ Average chunk size: {avg_chars:,} characters")

print("\n" + "=" * 80)
print(f" Total: {len(doc_splits)} chunks from {len(chunks_by_source)} documents")   


 Chunks Distribution by Document:


 LAW 243.pdf
   ├─ Total chunks: 231
   ├─ Total characters: 186,690
   └─ Average chunk size: 808 characters

 law 432 LAW OF BANKING AND INSURANCE II  post editorial.pdf
   ├─ Total chunks: 229
   ├─ Total characters: 169,632
   └─ Average chunk size: 740 characters

 LAW411 oil and gas I.pdf
   ├─ Total chunks: 482
   ├─ Total characters: 367,694
   └─ Average chunk size: 762 characters

 Total: 942 chunks from 3 documents


In [12]:
# Initialize embeddings and create vector store
print(" Initializing embeddings and creating vector database...")

# Create embeddings
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=openai_api_key
)

# Create Chroma vector store
vector_store = Chroma.from_documents(
    documents=doc_splits,
    embedding=embeddings,
    persist_directory="./chroma_db"
)

print(f" Vector store created with {len(doc_splits)} chunks")
print(f" Persistent storage: ./chroma_db")


 Initializing embeddings and creating vector database...
 Vector store created with 942 chunks
 Persistent storage: ./chroma_db


In [13]:
# Define retriever tool for the agent
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

@tool
def search_documents(query: str) -> str:
    """
    Search the policy documents for relevant information.
    Use this tool to find answers about the legal documents.
    """
    try:
        docs = retriever.invoke(query)
        
        if not docs:
            return "No relevant documents found for this query."
        
        # Format retrieved documents with sources
        results = []
        for i, doc in enumerate(docs, 1):
            source = doc.metadata.get('source', 'Unknown')
            page = doc.metadata.get('page', 'N/A')
            filename = source.split("\\")[-1] if "\\" in source else source.split("/")[-1] if "/" in source else source
            
            results.append(f"[Source {i}: {filename} (Page {page})]\n{doc.page_content}\n")
        
        return "\n---\n".join(results)
    except Exception as e:
        return f"Error searching documents: {str(e)}"

print(" Retriever tool created successfully")


 Retriever tool created successfully


In [14]:
# Build the agentic RAG system with LangGraph


# Define tools
tools = [search_documents]

# Create the agent
agent_executor = create_react_agent(llm, tools)

# Create a state graph for conversation management
class AgentState(MessagesState):
    """State for managing conversation context"""
    pass

def agent_node(state: AgentState) -> AgentState:
    """Process user query and generate response with document retrieval"""
    messages = state["messages"]
    
    # Add system prompt for document-aware responses
    system_prompt = SystemMessage(
        content="""You are an intelligent policy assistant. Your role is to answer questions about policy documents 
        with accuracy and helpfulness. 

IMPORTANT INSTRUCTIONS:
1. Always use the search_documents tool to find relevant information
2. Provide sources for every piece of information you cite
3. Format citations clearly: [Source: Document Name (Page X)]
4. If you cannot find relevant information, be honest about it
5. Maintain conversation context and refer to previous messages when relevant
6. Be concise but thorough in your answers"""
    )
    
    # Prepare messages with system context
    messages_with_system = [system_prompt] + messages
    
    # Call the agent
    response = agent_executor.invoke({"messages": messages_with_system})
    
    # Extract the final response
    final_message = response["messages"][-1]
    
    return AgentState(messages=state["messages"] + [final_message])

# Create the graph
graph = StateGraph(AgentState)
graph.add_node("agent", agent_node)

# Set START -> agent -> END
graph.add_edge(START, "agent")
graph.add_edge("agent", END)

# Compile with memory
memory = MemorySaver()
compiled_agent = graph.compile(checkpointer=memory)

print(" Agentic RAG system initialized successfully")
print(" System components:")
print("   ├─ LLM: gpt-4o-mini")
print("   ├─ Embeddings: text-embedding-3-small")
print("   ├─ Vector Store: Chroma with 3 documents")
print("   ├─ Retriever: Semantic search (k=3)")
print("   └─ Agent: ReAct agent with conversation memory")


 Agentic RAG system initialized successfully
 System components:
   ├─ LLM: gpt-4o-mini
   ├─ Embeddings: text-embedding-3-small
   ├─ Vector Store: Chroma with 3 documents
   ├─ Retriever: Semantic search (k=3)
   └─ Agent: ReAct agent with conversation memory


C:\Users\ncc333\AppData\Local\Temp\ipykernel_2592\1936763313.py:8: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent_executor = create_react_agent(llm, tools)


In [16]:
# # Test the agent with sample queries
# def run_query(user_query: str, session_id: str = "default"):
#     """Run a query through the agentic RAG system"""
#     print(f"\n{'='*80}")
#     print(f" User Query: {user_query}")
#     print(f"{'='*80}")
    
#     # Create input state
#     input_state = AgentState(messages=[HumanMessage(content=user_query)])
    
#     # Run the agent with session/thread ID for context management
#     result = compiled_agent.invoke(
#         input_state,
#         config={"configurable": {"thread_id": session_id}}
#     )
    
#     # Extract and display the response
#     response = result["messages"][-1]
#     print(f"\n Assistant Response:")
#     print(f"{response.content}")
#     print(f"{'='*80}\n")
    
#     return response.content

# # Test queries
# print(" Testing Agentic RAG System\n")

# # Test 1: Initial query
# response1 = run_query("What is LAW 243 about?", session_id="session_1")

# # Test 2: Follow-up question (tests context management)
# response2 = run_query("Can you provide more details about the main topics?", session_id="session_1")

# # Test 3: Different document
# response3 = run_query("What are the key provisions in LAW411 about oil and gas?", session_id="session_2")

















def run_query(user_query: str, session_id: str = "default"):
    """Run a query through the agentic RAG system"""
    print(f"\n{'='*80}")
    print(f" User Query: {user_query}")
    print(f"{'='*80}")
    
    # Create input state
    input_state = AgentState(messages=[HumanMessage(content=user_query)])
    
    # Run the agent with session/thread ID for context management
    result = compiled_agent.invoke(
        input_state,
        config={"configurable": {"thread_id": session_id}}
    )
    
    # Extract and display the response
    response = result["messages"][-1]
    print(f"\n Assistant Response:")
    print(f"{response.content}")
    print(f"{'='*80}\n")
    
    return response.content

def interactive_chat():
    """Interactive chat interface for the agentic RAG system"""
    print("\n" + "="*80)
    print(" Welcome to the Agentic RAG System")
    print("="*80)
    print("\nCommands:")
    print("  - Type your question to query the system")
    print("  - Type 'new' to start a new conversation session")
    print("  - Type 'quit' or 'exit' to end the session")
    print("="*80 + "\n")
    
    session_id = "session_1"
    session_counter = 1
    
    while True:
        try:
            # Get user input
            user_input = input("You: ").strip()
            
            # Check for exit commands
            if user_input.lower() in ['quit', 'exit', 'q']:
                print("\nThank you for using the Agentic RAG System. Goodbye!")
                break
            
            # Check for new session command
            if user_input.lower() == 'new':
                session_counter += 1
                session_id = f"session_{session_counter}"
                print(f"\n Started new conversation session: {session_id}\n")
                continue
            
            # Skip empty inputs
            if not user_input:
                continue
            
            # Run the query
            run_query(user_input, session_id=session_id)
            
        except KeyboardInterrupt:
            print("\n\nSession interrupted. Goodbye!")
            break
        except Exception as e:
            print(f"\n Error: {str(e)}\n")
            continue

def run_test_queries():
    """Run predefined test queries"""
    print("\n" + "="*80)
    print(" Running Test Queries")
    print("="*80 + "\n")
    
    # Test 1: Initial query
    print("TEST 1: Initial query")
    response1 = run_query("What is LAW 243 about?", session_id="test_session_1")
    
    # Test 2: Follow-up question (tests context management)
    print("\nTEST 2: Follow-up question")
    response2 = run_query("Can you provide more details about the main topics?", session_id="test_session_1")
    
    # Test 3: Different document
    print("\nTEST 3: Different document query")
    response3 = run_query("What are the key provisions in LAW411 about oil and gas?", session_id="test_session_2")
    
    print("\n" + "="*80)
    print(" Test Queries Completed")
    print("="*80 + "\n")

# Main execution
if __name__ == "__main__":
    print("\nSelect mode:")
    print("1. Interactive Chat (type your questions)")
    print("2. Run Test Queries")
    
    choice = input("\nEnter your choice (1 or 2): ").strip()
    
    if choice == "1":
        interactive_chat()
    elif choice == "2":
        run_test_queries()
    else:
        print("Invalid choice. Running interactive chat by default...")
        interactive_chat()


Select mode:
1. Interactive Chat (type your questions)
2. Run Test Queries
Invalid choice. Running interactive chat by default...

 Welcome to the Agentic RAG System

Commands:
  - Type your question to query the system
  - Type 'new' to start a new conversation session
  - Type 'quit' or 'exit' to end the session


 User Query: what is constitutional law

 Assistant Response:
Constitutional law is a body of law that defines the structure and function of government institutions, as well as the rights of individuals in relation to the state. It encompasses the interpretation and implementation of a country's constitution, which serves as the supreme legal authority. Key aspects of constitutional law include:

1. **Fundamental Rights**: Protecting individual rights and liberties against government infringement.
2. **Separation of Powers**: Dividing government responsibilities among different branches (executive, legislative, and judicial) to prevent abuse of power.
3. **Checks and Balan