In [5]:
import os
import glob
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage
from langgraph.graph import StateGraph, START, END, MessagesState
from langgraph.prebuilt import ToolNode
from langgraph.checkpoint.memory import MemorySaver
from typing import Literal

# Load environment variables (API Key)
load_dotenv()

True

In [6]:
# Path to your PDF directory
folder_path = "data/business_docs"

# 1. Load PDF Documents
documents = []
pdf_files = glob.glob(os.path.join(folder_path, "*.pdf"))

if not pdf_files:
    raise FileNotFoundError(f"No PDF files found in {folder_path}")

print(f"üìÇ Found {len(pdf_files)} PDFs. Loading...")

for file_path in pdf_files:
    loader = PyPDFLoader(file_path)
    # Using standard load() for simplicity, comparable to the notebook's async approach
    docs = loader.load()
    documents.extend(docs)
    print(f"   - Loaded: {os.path.basename(file_path)}")

print(f"‚úÖ Total loaded pages: {len(documents)}")

# 2. Split into Chunks (Module 2 knowledge applied in Section 3)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,      # Characters per chunk
    chunk_overlap=100     # Overlap to preserve context
)

doc_splits = text_splitter.split_documents(documents)
print(f"‚úÖ Created {len(doc_splits)} chunks")

üìÇ Found 3 PDFs. Loading...
   - Loaded: 2010-the-venture-capital-revolution.pdf
   - Loaded: Entrepreneurship_Development_in_Nigeria.pdf
   - Loaded: ssrn-4350545.pdf
‚úÖ Total loaded pages: 247
‚úÖ Created 695 chunks


In [7]:
# 3. Create Vector Store
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Define persistence path (optional, but good practice as shown in notebook)
chroma_path = "./chroma_db_business"

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embeddings,
    collection_name="business_knowledge",
    persist_directory=chroma_path
)

print(f"‚úÖ Vector store created and persisted to {chroma_path}")

‚úÖ Vector store created and persisted to ./chroma_db_business


In [8]:
@tool
def retrieve_business_knowledge(query: str) -> str:
    """
    Search for information in the business document knowledge base.
    
    Use this tool when the user asks about specific business concepts, 
    strategies, or details contained in the provided PDFs.
    
    Args:
        query: The search query describing the information needed.
        
    Returns:
        Relevant document excerpts to answer the question.
    """
    # Use MMR (Maximum Marginal Relevance) for diversity, as in the notebook
    retriever = vectorstore.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 5, "fetch_k": 10}
    )
    
    results = retriever.invoke(query)
    
    if not results:
        return "No relevant business documents found."
    
    # Format results with citations
    formatted = "\n\n---\n\n".join(
        f"Source: {doc.metadata.get('source', 'Unknown')}, Page {doc.metadata.get('page', 0)}\n{doc.page_content}"
        for doc in results
    )
    
    return formatted

print("‚úÖ Retrieval tool created")

‚úÖ Retrieval tool created


In [9]:
# Initialize LLM
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Bind tools
tools = [retrieve_business_knowledge]
llm_with_tools = llm.bind_tools(tools)

# System Prompt (Customized for Business Domain)
system_prompt = SystemMessage(content="""You are an expert Business Consultant Agent with access to a knowledge base.

RETRIEVAL DECISION RULES:
- DO NOT retrieve for greetings or general knowledge ("What is 2+2?").
- DO retrieve for specific questions about business strategies, financial metrics, or content found in the uploaded documents.
- Always cite the source when providing information from the documents.
""")

# Define Assistant Node
def assistant(state: MessagesState) -> dict:
    messages = [system_prompt] + state["messages"]
    response = llm_with_tools.invoke(messages)
    return {"messages": [response]}

# Define Conditional Logic
def should_continue(state: MessagesState) -> Literal["tools", "__end__"]:
    last_message = state["messages"][-1]
    if last_message.tool_calls:
        return "tools"
    return "__end__"

# Build Graph
builder = StateGraph(MessagesState)

builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))

builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", should_continue, {"tools": "tools", "__end__": END})
builder.add_edge("tools", "assistant")

# Compile with Memory
memory = MemorySaver()
agent = builder.compile(checkpointer=memory)

print("‚úÖ Agentic RAG System Compiled")

‚úÖ Agentic RAG System Compiled


In [None]:
from langchain_core.messages import HumanMessage, AIMessage

def run_query(user_input: str, thread_id: str):
    print(f"\n{'='*50}\nüë§ User: {user_input}\n{'='*50}")
    
    config = {"configurable": {"thread_id": thread_id}}
    result = agent.invoke(
        {"messages": [HumanMessage(content=user_input)]},
        config=config
    )
    
    # Analysis of the result
    messages = result["messages"]
    tool_calls = [msg for msg in messages if isinstance(msg, AIMessage) and msg.tool_calls]
    
    if tool_calls:
        print(f"üîç Agent: [Decided to retrieve information]")
    else:
        print(f"üß† Agent: [Decided to answer directly]")
        
    print(f"ü§ñ Response: {messages[-1].content}")

# Run Tests
run_query("What are the key topics discussed in these documents?", "test_1")
run_query("Hello, are you ready to help?", "test_2")


üë§ User: What are the key topics discussed in these documents?
üîç Agent: [Decided to retrieve information]
ü§ñ Response: The key topics discussed in the documents include:

1. **Globalization**: The documents explore the implications of globalization on business, referencing significant works by Martin Wolf and Jagdish Bhagwati that analyze the subject comprehensively.

2. **Environmental Challenges**: There is a focus on current and prospective environmental problems, emphasizing the need for a global partnership for sustainable development as outlined in Agenda 21, which addresses disparities, poverty, and ecosystem deterioration.

3. **Capitalism and Collectivism**: The documents discuss the interplay between capitalism and collectivism, highlighting the role of various coalitions, including government and UN agencies, in promoting sustainable development and corporate social responsibility.

4. **Human Rights**: The concept of 'positive' human rights is presented as a critica