## AGENTIC RAG PROJECT

In [27]:
# from langchain_openai import ChatOpenAI
# from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
# from langgraph.graph import StateGraph, START, END, MessagesState
# from dotenv import load_dotenv
# load_dotenv()


from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langgraph.graph import StateGraph, START, END, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode



In [7]:
# Load Documents

from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader


loader = DirectoryLoader(
    "documents/",
    glob="**/*.pdf",
    loader_cls=PyPDFLoader
)


print("Starting PDF load...")
docs = loader.load()
print("Finished loading PDFs")


Starting PDF load...
Finished loading PDFs


In [8]:
# Chunking strategy

# from langchain.text_splitter import RecursiveCharacterTextSplitter

# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=1000,
#     chunk_overlap=200
# )

# chunks = text_splitter.split_documents(docs)


from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

chunks = text_splitter.split_documents(docs)



In [9]:
len(chunks)


225

In [10]:
# Embeddings and Chroma

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

embeddings = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./chroma_db"
)


In [11]:
# Let's test if it retrieves

vectorstore.similarity_search(
    "Subnets and IPv6 support",
    k=3
)


[Document(metadata={'page': 6, 'title': '01 Google Cloud VPC Networking Fundamentals 2.0.3_OD', 'creator': 'Google', 'source': 'documents\\01 Google Cloud VPC Networking Fundamentals 2.0_OD.pdf', 'total_pages': 48, 'creationdate': '', 'page_label': '7', 'producer': 'PyPDF'}, page_content='Subnets and IPv6 support●VPC networks now support IPv6 addresses.●Support for IPv6 addresses can vary per subnet.●To support IPv6, Google Cloud has introduced the concept of a subnet stack.○Single-stack subnets support IPv4.○Dual-stack subnets support IPv4 and IPv6.●IPv6 addresses can be assigned to objects in a subnet that supports IPv6.\nVPC networks now support IPv6 addresses.\nSupport for IPv6 addresses can vary per subnet. To support IPv6, Google Cloud has \nintroduced the concept of a subnet stack. The subnet stack defines the type of \naddress that can be assigned to objects in the subnet.\nSingle-stack subnets support IPv4. Dual-stack subnets support IPv4 and IPv6. \nThere’s no subnet that onl

In [12]:
# Retrieval Tool

from langchain_core.tools import tool

@tool
def retrieve_docs(query: str) -> str:
    """
    Retrieve relevant documentation snippets for a query.
    """
    results = vectorstore.max_marginal_relevance_search(
        query,
        k=4,
        fetch_k=8
    )

    formatted = []
    for doc in results:
        formatted.append(
            f"Source: {doc.metadata.get('source')}\n{doc.page_content}"
        )

    return "\n\n---\n\n".join(formatted)


In [15]:
# LLM and tool binding (LangGraph)

tools = [retrieve_docs]

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0
).bind_tools(tools)


In [18]:
# System Prompt
system_prompt = SystemMessage(
    content=(
        "You are studying Networking in Google Cloud . "
        "If a question requires specific documentation, use the retrieval tool. "
        "If the question is general knowledge, answer directly without retrieval."
    )
)


In [21]:
# Agent Node

def agent_node(state: MessagesState):
    messages = [system_prompt] + state["messages"]
    response = llm.invoke(messages)
    return {"messages": state["messages"] + [response]}


In [22]:
# Tool Node

from langgraph.prebuilt import ToolNode

tool_node = ToolNode(tools)


In [23]:
# Routing Logic

from typing import Literal

def route(state: MessagesState) -> Literal["tools", "__end__"]:
    last = state["messages"][-1]
    if getattr(last, "tool_calls", None):
        return "tools"
    return END


In [25]:
# Let's build LangGraph

graph = StateGraph(MessagesState)

graph.add_node("agent", agent_node)
graph.add_node("tools", tool_node)

graph.add_edge(START, "agent")

graph.add_conditional_edges(
    "agent",
    route,
    {
        "tools": "tools",
        END: END
    }
)

graph.add_edge("tools", "agent")

print(f'LangGraph created successfully')


LangGraph created successfully


In [28]:
# Let's compile with memory

memory = MemorySaver()
app = graph.compile(checkpointer=memory)


In [None]:
# Sanity Check

from langchain_core.messages import HumanMessage

# Example query that should trigger retrieval
query = "How do I set up a VPC in Google Cloud Networking?"

response = app.invoke(
    {"messages": [HumanMessage(content=query)]},
    config={"configurable": {"thread_id": "test"}}
)

# Display the agent's response
print(response["messages"][-1].content)


To set up a Virtual Private Cloud (VPC) in Google Cloud Networking, follow these general steps:

1. **Access Google Cloud Console**: Go to the Google Cloud Console and log in to your account.

2. **Create a VPC Network**:
   - Navigate to the "VPC network" section in the console.
   - Click on "Create VPC network."
   - Provide a name for your VPC network.
   - Choose the "Automatic" or "Custom" subnet creation option:
     - **Automatic**: Google Cloud will create subnets in each region.
     - **Custom**: You can define your own subnets.

3. **Configure Subnets** (if you chose Custom):
   - Specify the name, region, and IP address range (CIDR block) for each subnet you want to create.

4. **Set Up Firewall Rules**:
   - After creating the VPC, you may want to set up firewall rules to control traffic to and from your VPC network.
   - Define rules based on your security requirements.

5. **Connect to Other Networks** (if needed):
   - If you need to connect your VPC to on-premises net

In [32]:
queries = [
    "What is VPC peering?",
    "Explain subnetting in Google Cloud Networking",
    "What is Network endpoint groups(NEG)",
    "What are IAM roles in Google Cloud?",
    "Briefly explain load balancing in Google Cloud"
]

for q in queries:
    response = app.invoke(
        {"messages": [HumanMessage(content=q)]},
        config={"configurable": {"thread_id": "test"}}
    )
    print(f"Query: {q}\nResponse: {response['messages'][-1].content}\n{'-'*50}")


Query: What is VPC peering?
Response: VPC peering is a networking connection between two Virtual Private Cloud (VPC) networks that allows them to communicate with each other as if they were part of the same network. This connection enables resources in different VPCs to interact privately and securely without requiring public IP addresses or traversing the public internet. Here are the key aspects of VPC peering:

1. **Direct Communication**: VPC peering allows instances in one VPC to communicate with instances in another VPC using private IP addresses. This direct communication enhances security and reduces latency.

2. **No Transitive Peering**: VPC peering is non-transitive, meaning that if VPC A is peered with VPC B, and VPC B is peered with VPC C, instances in VPC A cannot communicate directly with instances in VPC C through VPC B. Each peering connection must be established individually.

3. **Same or Different Regions**: VPC peering can be established between VPCs in the same re

In [31]:
queries = [
    "What is Python?",
    "Define cloud computing",
    "What is a VPN?",
    "Who invented TCP/IP?",
    "Explain what an API is"
]

for q in queries:
    response = app.invoke(
        {"messages": [HumanMessage(content=q)]},
        config={"configurable": {"thread_id": "test"}}
    )
    print(f"Query: {q}\nResponse: {response['messages'][-1].content}\n{'-'*50}")


Query: What is Python?
Response: Python is a high-level, interpreted programming language known for its simplicity and readability. It was created by Guido van Rossum and first released in 1991. Here are some key features and characteristics of Python:

1. **Easy to Learn and Use**: Python's syntax is clear and straightforward, making it an excellent choice for beginners as well as experienced programmers.

2. **Interpreted Language**: Python code is executed line by line, which makes debugging easier and allows for interactive programming.

3. **Dynamically Typed**: In Python, you do not need to declare the type of a variable when you create it. The type is determined at runtime, which adds flexibility to the code.

4. **Extensive Standard Library**: Python comes with a rich standard library that provides modules and functions for various tasks, such as file I/O, system calls, web development, and data manipulation.

5. **Cross-Platform**: Python is available on various operating syst

## REPORT


**Domain Selection**

For this project, I chose Google Cloud Networking tutorials and study notes. This domain includes both general concepts, such as VPNs and subnets, and it is an integral part of the Associate Cloud Engineer Certificate Exam I am currently preparing for. 

**Chunk Size Tuning**

The PDF documents were split into chunks using RecursiveCharacterTextSplitter, with a chunk size of 1000 characters and an overlap of 200. This setup ensures that each chunk contains enough context for meaningful embeddings, while overlapping content prevents information loss at chunk boundaries. After splitting, the collection resulted in 225 chunks, which balances retrieval efficiency with content completeness.

**Retrieval Decisions**

The agentic RAG system combines a Chroma vector store, OpenAI embeddings, and a retrieval tool decorated with @tool. Testing showed that domain-specific queries, like "Explain subnetting in Google Cloud," correctly triggered retrieval and returned relevant document chunks. General knowledge questions, such as "What is Python?" were answered directly without retrieval. Multi-turn conversations also worked correctly, as the MemorySaver preserved context, allowing coherent follow-ups.

**System Performance**

The system performed well: document loading, chunking, embedding, and vectorization were smooth; the agent made appropriate retrieval decisions; and memory successfully preserved context. Areas for improvement include optimizing retrieval relevance through alternative chunk sizes or reranking strategies, and improving speed by considering local embedding models to reduce dependency on OpenAI API calls.

**Conclusion**

Overall, the agentic RAG system effectively integrates documents, embeddings, retrieval, LLM, and memory to answer queries accurately. It demonstrates that a combination of LangGraph, LangChain, and a vector store can build a context-aware, intelligent agent suitable for technical domains.

