<a href="https://colab.research.google.com/github/GiovanniPasq/agentic-rag-for-dummies/blob/main/Agentic_Rag_For_Dummies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --quiet --upgrade langgraph
!pip install -qU "langchain[google-genai]"
!pip install -qU langchain langchain-community langchain-qdrant langchain-huggingface qdrant-client fastembed flashrank langchain-core
!pip install --upgrade gradio

# Optional: if you want to use Ollama with local models
!pip install -qU langchain-ollama

In [None]:
import os
from pathlib import Path
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_qdrant.fastembed_sparse import FastEmbedSparse
from qdrant_client import QdrantClient

# Configuration
DOCS_DIR = "docs"  # Directory containing your .md files
PARENT_STORE_PATH = "parent_store"  # Directory for parent chunk JSON files
CHILD_COLLECTION = "document_child_chunks"

# Create directories if they don't exist
os.makedirs(DOCS_DIR, exist_ok=True)
os.makedirs(PARENT_STORE_PATH, exist_ok=True)

# Initialize LLM (choose one from the previous section)
# Example with Ollama:
from langchain_ollama import ChatOllama
llm = ChatOllama(model="qwen3:4b-instruct", temperature=0)

# Or with Google Gemini:
# from langchain_google_genai import ChatGoogleGenerativeAI
# os.environ["GOOGLE_API_KEY"] = "your-api-key-here"
# llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", temperature=0)

# Dense embeddings for semantic understanding
dense_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

# Sparse embeddings for keyword matching
sparse_embeddings = FastEmbedSparse(
    model_name="Qdrant/bm25"
)

# Qdrant client (local file-based storage)
client = QdrantClient(path="qdrant_db")

In [None]:
from qdrant_client.http import models as qmodels
from langchain_qdrant import QdrantVectorStore
from langchain_qdrant.qdrant import RetrievalMode

# Get embedding dimension
embedding_dimension = len(dense_embeddings.embed_query("test"))

def ensure_collection(collection_name):
    """Create Qdrant collection if it doesn't exist"""
    if not client.collection_exists(collection_name):
        client.create_collection(
            collection_name=collection_name,
            vectors_config=qmodels.VectorParams(
                size=embedding_dimension,
                distance=qmodels.Distance.COSINE
            ),
            sparse_vectors_config={
                "sparse": qmodels.SparseVectorParams()
            },
        )
        print(f"✓ Created collection: {collection_name}")
    else:
        print(f"✓ Collection already exists: {collection_name}")

# Create collection
ensure_collection(CHILD_COLLECTION)

# Initialize vector store for child chunks
child_vector_store = QdrantVectorStore(
    client=client,
    collection_name=CHILD_COLLECTION,
    embedding=dense_embeddings,
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.HYBRID,
    sparse_vector_name="sparse"
)

✓ Created collection: document_child_chunks


In [None]:
import glob
import json
from langchain_text_splitters import (
    MarkdownHeaderTextSplitter,
    RecursiveCharacterTextSplitter
)

def index_documents():
    """Index documents using hierarchical Parent/Child strategy"""
    print("\n" + "="*50)
    print("Starting Hierarchical Indexing")
    print("="*50 + "\n")

    # Parent splitter: by Markdown headers
    headers_to_split_on = [
        ("#", "H1"),
        ("##", "H2"),
        ("###", "H3")
    ]
    parent_splitter = MarkdownHeaderTextSplitter(
        headers_to_split_on=headers_to_split_on,
        strip_headers=False
    )

    # Child splitter: by character count
    child_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100
    )

    all_child_chunks = []
    all_parent_pairs = []

    # Check if docs directory has files
    md_files = sorted(glob.glob(os.path.join(DOCS_DIR, "*.md")))
    if not md_files:
        print(f"⚠️  No .md files found in {DOCS_DIR}/")
        print("Please add your Markdown documents to continue.")
        return

    # Process each document
    for doc_path_str in md_files:
        doc_path = Path(doc_path_str)
        print(f"📄 Processing: {doc_path.name}")

        try:
            with open(doc_path, "r", encoding="utf-8") as f:
                md_text = f.read()
        except Exception as e:
            print(f"❌ Error reading {doc_path.name}: {e}")
            continue

        # Split into parent chunks
        parent_chunks = parent_splitter.split_text(md_text)

        for i, p_chunk in enumerate(parent_chunks):
            # Add metadata
            p_chunk.metadata["source"] = str(doc_path)
            parent_id = f"{doc_path.stem}_parent_{i}"
            p_chunk.metadata["parent_id"] = parent_id

            # Store parent reference
            all_parent_pairs.append((parent_id, p_chunk))

            # Split into child chunks
            child_chunks = child_splitter.split_documents([p_chunk])
            all_child_chunks.extend(child_chunks)

    # Save child chunks to Qdrant
    if all_child_chunks:
        print(f"\n🔍 Indexing {len(all_child_chunks)} child chunks into Qdrant...")
        try:
            child_vector_store.add_documents(all_child_chunks)
            print("✓ Child chunks indexed successfully")
        except Exception as e:
            print(f"❌ Error indexing child chunks: {e}")
            return
    else:
        print("⚠️  No child chunks to index")
        return

    # Save parent chunks to JSON files
    if all_parent_pairs:
        print(f"💾 Saving {len(all_parent_pairs)} parent chunks to JSON...")

        # Clear existing parent files
        for item in os.listdir(PARENT_STORE_PATH):
            os.remove(os.path.join(PARENT_STORE_PATH, item))

        # Save each parent chunk
        for parent_id, doc in all_parent_pairs:
            doc_dict = {
                "page_content": doc.page_content,
                "metadata": doc.metadata
            }
            file_path = os.path.join(PARENT_STORE_PATH, f"{parent_id}.json")
            with open(file_path, "w", encoding="utf-8") as f:
                json.dump(doc_dict, f, ensure_ascii=False, indent=2)

        print("✓ Parent chunks saved successfully")

    print("\n" + "="*50)
    print("✓ Indexing Complete!")
    print("="*50 + "\n")

# Run indexing
index_documents()


Starting Hierarchical Indexing

📄 Processing: blockchain.md
📄 Processing: fortinet.md
📄 Processing: javascript_tutorial.md
📄 Processing: microservices.md

🔍 Indexing 2447 child chunks into Qdrant...
✓ Child chunks indexed successfully
💾 Saving 386 parent chunks to JSON...
✓ Parent chunks saved successfully

✓ Indexing Complete!



In [None]:
from typing import List

def search_child_chunks(query: str, k: int = 5) -> List[dict]:
    """
    Search for the top K most relevant child chunks.

    Args:
        query: Search query string
        k: Number of results to return

    Returns:
        List of dicts with content, parent_id, and source
    """
    try:
        results = child_vector_store.similarity_search(query, k=k)
        return [
            {
                "content": doc.page_content,
                "parent_id": doc.metadata.get("parent_id", ""),
                "source": doc.metadata.get("source", "")
            }
            for doc in results
        ]
    except Exception as e:
        print(f"Error searching child chunks: {e}")
        return []

def retrieve_parent_chunks(parent_ids: List[str]) -> List[dict]:
    """
    Retrieve full parent chunks by their IDs.

    Args:
        parent_ids: List of parent chunk IDs to retrieve

    Returns:
        List of dicts with content, parent_id, and metadata
    """
    unique_ids = sorted(list(set(parent_ids)))
    results = []

    for parent_id in unique_ids:
        file_path = os.path.join(PARENT_STORE_PATH, f"{parent_id}.json")
        if os.path.exists(file_path):
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    doc_dict = json.load(f)
                    results.append({
                        "content": doc_dict["page_content"],
                        "parent_id": parent_id,
                        "metadata": doc_dict["metadata"]
                    })
            except Exception as e:
                print(f"Error loading parent chunk {parent_id}: {e}")

    return results

# Bind tools to LLM
llm_with_tools = llm.bind_tools([search_child_chunks, retrieve_parent_chunks])

In [None]:
from langchain_core.messages import SystemMessage

SYSTEM_PROMPT = """You are an intelligent assistant specialized in answering questions using documents.

Follow this precise workflow:

**1. Analyze the Question**
- Understand what the user is asking
- Identify main topics
- If complex, split into focused subqueries
- Process each subquery through steps 2-7

**2. Retrieve Child Chunks**
- Use `search_child_chunks` to find relevant small chunks
- Choose appropriate K value (default: 5)

**3. Evaluate Child Chunks**
- Read retrieved content carefully
- Determine relevance to the question
- Identify `parent_id`s of most relevant chunks
- If chunks contain ALL needed information, skip to step 6

**4. Assess Need for Context**
- If chunks are fragmented, unclear, or incomplete
- If they only partially answer the question
- Then retrieve parent chunks

**5. Retrieve Parent Chunks (if needed)**
- Use `retrieve_parent_chunks` with unique `parent_id`s
- Read parent chunks for full context

**6. Generate Answer**
- Base answer exclusively on retrieved information
- Combine subquery answers if applicable
- Explain concepts clearly
- Cite source files (without extension) using metadata
- Example: "This information comes from '[filename]'"

**7. Verify and Iterate**
- If initial search found nothing relevant: rephrase query and retry
- If parent chunks insufficient: restart from step 1
- Maximum 3 attempts per question/subquery
- After 3 attempts, ask user to rephrase

**Critical Rules:**
- Follow steps 1-7 for every question/subquery
- Answer only from retrieved chunks
- Never fabricate information
- Always cite sources
"""

system_message = SystemMessage(content=SYSTEM_PROMPT)

In [None]:
from langgraph.graph import MessagesState, START, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.messages import HumanMessage

def agent_node(state: MessagesState):
    """
    Agent decision-making node.
    Decides which tool to call or generates final response.
    """
    messages = [system_message] + state["messages"]
    response = llm_with_tools.invoke(messages)
    return {"messages": [response]}

# Build the execution graph
graph_builder = StateGraph(MessagesState)

# Add nodes
graph_builder.add_node("agent", agent_node)
graph_builder.add_node(
    "tools",
    ToolNode([search_child_chunks, retrieve_parent_chunks])
)

# Define edges
graph_builder.add_edge(START, "agent")
graph_builder.add_conditional_edges(
    "agent",
    tools_condition,  # Routes to tools or END
)
graph_builder.add_edge("tools", "agent")

# Compile the graph
agent_graph = graph_builder.compile()

print("✓ Agent graph compiled successfully")

✓ Agent graph compiled successfully


In [None]:
import gradio as gr

def chat_with_agent(message, history):
    """
    Process user message through the agent graph.

    Args:
        message: User's question
        history: Chat history (unused in current implementation)

    Returns:
        Agent's response as string
    """
    try:
        result = agent_graph.invoke({
            "messages": [HumanMessage(content=message)]
        })
        return result["messages"][-1].content
    except Exception as e:
        return f"❌ Error: {str(e)}\n\nPlease try rephrasing your question."

# Create and launch interface
demo = gr.ChatInterface(
    fn=chat_with_agent,
    title="🤖 Agentic RAG Assistant",
    description="Ask questions about your documents. The agent will intelligently retrieve and combine information."
)

print("\n🚀 Launching chat interface...")
demo.launch(share=False)