In [1]:
# Routing is the process of deciding how to handle a user query by directing it to the most appropriate retriever, knowledge source, or processing path.

In [2]:
from dotenv import load_dotenv
load_dotenv() # load environment variables

True

In [3]:
# Do all the setup here

import bs4
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaLLM

# Fetch blog content
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split content into chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50) # uses tiktoken before splitting
splits = text_splitter.split_documents(blog_docs)

# Index (embed and store) the chunks into a vector db
vectorstore = Chroma.from_documents(documents=splits, embedding=OllamaEmbeddings(model="nomic-embed-text"))
retriever = vectorstore.as_retriever()

### Problem Statement

RAG systems must route queries correctly: If routed poorly, even accurate retrieval won't help.

Wrong routing can trigger irrelevant tools, prompts, or retrievers—leading to confusing results.

**Example**:

User query:
- "Add John to the CRM"

Problem:
- This is an action request, not a search query.
- If routed to a retriever, it may return docs like “What is a CRM?”

Potential bad outcome:
- The LLM replies: “A CRM is a system for managing customer relationships...”
- But the user expected the system to perform the action, not explain it.

### Strategy #1: Logical Routing

![Image 1](rsc/jupyter/logical_routing_1.png)

**Idea**: Use explicit reasoning or decision logic (usually powered by an LLM) to select the best tool, retriever, or knowledge source based on the user's query.

![Image 2](rsc/jupyter/logical_routing_2.png)

_**Note**: Nowadays, models are fine-tuned to understand, invoke, and integrate external tools during their operation. That is to say, this idea is already baked into modern LLMs, and we just have to "bind" the relevant tools to the LLM, so that it knows they exist and how to use them._

In [4]:
# Route a user's query to either python_docs, js_docs, or golang_docs, based on the context of the query.
#   - Use LLM structured output to constrain the model's output to a pydantic object.

from typing import Literal
from pydantic import BaseModel, Field
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

# Data model (the structured output)
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question choose which datasource would be most relevant for answering their question",
    )

# LLM with structured output
llm = ChatOllama(model="llama3.2:1b")
structured_llm = llm.with_structured_output(RouteQuery)

# Prompt
system = """You are an expert at routing a user question to the appropriate data source.\n
Based on the programming language the question is referring to, route it to the relevant data source."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

prompt.invoke({"question": "What is a pydantic object?"})

ChatPromptValue(messages=[SystemMessage(content='You are an expert at routing a user question to the appropriate data source.\n\nBased on the programming language the question is referring to, route it to the relevant data source.', additional_kwargs={}, response_metadata={}), HumanMessage(content='What is a pydantic object?', additional_kwargs={}, response_metadata={})])

In [5]:
# Define router
router = prompt | structured_llm

question = """
Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

router.invoke({"question": question})

RouteQuery(datasource='python_docs')

In [6]:
# Once we have this, it is trivial to define a branch that uses `result.datasource`
from langchain_core.runnables import RunnableLambda

def choose_route(result):
    if "python_docs" in result.datasource.lower():
        ### Logic here
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        ### Logic here
        return "chain for js_docs"
    else:
        ### Logic here
        return "golang_docs"

full_chain = router | RunnableLambda(choose_route)

full_chain.invoke({"question": question})

'chain for python_docs'

### Strategy #2: Semantic Routing

![Image](rsc/jupyter/semantic_routing.png)

**Idea**: Use embedding similarity to route a user's query to the correct tool, agent, or sub-system based on its description (meaning).

In [7]:
from langchain_ollama import OllamaEmbeddings

# Two prompts
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

# Embed prompts
embeddings = OllamaEmbeddings(model="nomic-embed-text")
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

print(len(prompt_embeddings))  # 2 embeddings
print(len(prompt_embeddings[0]))  # 768 dimensions per embedding

2
768


In [8]:
from langchain_community.utils.math import cosine_similarity
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.prompts import PromptTemplate

# Route question to appropriate prompt template
def prompt_router(input):
    # Embed question
    query_embedding = embeddings.embed_query(input["query"])
    # Compute similarity
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    # Chosen prompt
    print("Using MATH" if most_similar == math_template else "Using PHYSICS", "\n")
    return PromptTemplate.from_template(most_similar)

routing_chain = {"query": RunnablePassthrough()} | RunnableLambda(prompt_router)

routing_chain.invoke("How do I find the GCD of two numbers?")

Using MATH 



StringPromptValue(text='You are a very good mathematician. You are great at answering math questions. You are so good because you are able to break down hard problems into their component parts, answer the component parts, and then put them together to answer the broader question.\n\nHere is a question:\nHow do I find the GCD of two numbers?')

In [9]:
from langchain_core.output_parsers import StrOutputParser

full_chain = routing_chain | llm | StrOutputParser()

print(full_chain.invoke("What's a black hole"))

Using PHYSICS 

A black hole is a region in space where the gravitational pull is so strong that nothing, including light, can escape. It's formed when a massive star collapses in on itself and its gravity becomes so strong that it warps the fabric of spacetime around it.

Imagine you're standing near a super-powerful vacuum cleaner that sucks up everything that gets too close. That's essentially what a black hole is - a void in spacetime that has such intense gravitational pull that nothing can escape once it falls inside.
