# Middleware

Middleware provides a way to more tightly control what happens inside the agent. Middleware is useful for the following:
- Tracking agent behavior with logging, analytics and debugging.
- Transforming prompts, tool selection, and output formatting.
- Adding retries, fallbacks and early termination logic.
- Applying rate limits, guardrails, and PII detection.

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

## Summarization Middleware

Automatically summarize conversation history when approaching token limits, preserving recent messages while compressing older context. Summarization is useful for the following:
- Long-running conversations that exceed the context window
- Multi-turn dialogues with extensive history
- Applications where preserving full conversational context matters

#### Based on Number of the Messages/Conversations

In [5]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import HumanMessage, SystemMessage

### Message based summarizations
agent=create_agent(
    model='gpt-4o-mini',
    checkpointer=MemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model='gpt-4o-mini',
            trigger=("messages", 10),
            keep=("messages", 4)
        )
    ]
)

In [7]:
## Run with a thread ID
config={"configurable": {"thread_id":"test-1"}}

In [8]:
from langchain_core.messages import HumanMessage
# Alternative test data
questions = [
    "what is 2+2?",
    "what is 10*5?",
    "what is 23*10?",
    "what is 18+11?",
    "what is 22/7?",
    "what is 100/4?"
]

for q in questions:
    response=agent.invoke({"messages":[HumanMessage(content=q)]},config)
    print(f"Messages: {response}")
    print(f"Len of Messages: {len(response['messages'])}")

Messages: {'messages': [HumanMessage(content='what is 2+2?', additional_kwargs={}, response_metadata={}, id='3bdd9ba8-801b-4cf0-b18c-88fa0dce456e'), AIMessage(content='2 + 2 equals 4.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 14, 'total_tokens': 22, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_c4585b5b9c', 'id': 'chatcmpl-D0UKX7tVbXHFxtVoyG1EdqbEOTJnP', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019be126-92c1-7721-a784-14d72b0eca79-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 14, 'output_tokens': 8, 'total_tokens': 22, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': 

#### Based on Token Size

In [None]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.memory import MemorySaver

@tool
def search_hotels(city: str) -> str:
    """ Search hotels in a city - returns long responses to use more tokens."""
    return f"""Hotels in {city}:
    1. Grand Hotel - 5 star, $350/night, spa, pool, gym
    2. City Inn - 4 star, $180/night, business center
    3. Budget Stay - 3 star, $75/night, free wifi"""

agent=create_agent(
    model="gpt-4o-mini",
    tools=[search_hotels],
    checkpointer=MemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model='gpt-4o-mini',
            trigger=("tokens", 550),
            keep=("tokens", 200)
        ),
    ]
)

config={"configurable": {"thread_id": "test-2"}}

In [11]:
# Token counter (approximate)
def count_tokens(messages):
    total_chars=sum(len(str(m.content)) for m in messages)
    return total_chars // 4

In [13]:
# Run test
cities = ["Paris", "London", "Tokyo", "New York", "Dubai", "Singapore"]

for city in cities:
    response = agent.invoke(
        {"messages": [HumanMessage(content=f"Find Hotels in {city}")]},
        config=config
    )

    tokens=count_tokens(response['messages'])
    print(f"{city} - {tokens} tokens, {len(response['messages'])} messages")
    print(f"{(response['messages'])}")

Paris - 144 tokens, 4 messages
[HumanMessage(content='Find Hotels in Paris', additional_kwargs={}, response_metadata={}, id='9a9787ba-ed30-4a8a-bee5-7cd3d37b4ebf'), AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 56, 'total_tokens': 71, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_c4585b5b9c', 'id': 'chatcmpl-D0UYkkv0iAVxLwdAoJNdW8YmTbW4A', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019be134-07f0-73b0-97fd-b4b6f7aa9d99-0', tool_calls=[{'name': 'search_hotels', 'args': {'city': 'Paris'}, 'id': 'call_3Q4ICjKVdDXsfgv70aWlG0Ej', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'input_tokens': 56

### Based on Fraction

In [16]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.memory import MemorySaver

@tool
def search_hotels(city: str) -> str:
    """ Search hotels for a given city """
    return f"Hotels in {city}: Grand Hotel $350, City Inn $180, Budget Stay $75"

# LOW Fraction for testing
agent=create_agent(
    model="gpt-4o-mini",
    tools=[search_hotels],
    checkpointer=MemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model="gpt-4o-mini",
            trigger=("fraction",0.005), # If the LLM is able to accomodate 160K tokens, 0.5% of 160K tokens is ~640 tokens
            keep=("fraction",0.002) # 0.2% is ~ 256 tokens
        ),
    ],
)
config={"configurable":{"thread_id":"test-3"}}

def count_tokens(messages):
    return sum(len(str(m.content)) for m in messages) // 4

# Test
cities = ["Paris", "London", "Tokyo", "New York", "Dubai", "Singapore","Bangalore"]

for city in cities:
    response=agent.invoke(
        {"messages": [HumanMessage(content=f"Hotels in {city}")]},
        config=config
    )
    tokens=count_tokens(response['messages'])
    fraction=tokens/128000 # gpt-4o-mini context
    print(f"{city}: ~{tokens} tokens ({fraction:.4%}), {len(response['messages'])} messages")
    print(f"{response['messages']}")

Paris: ~71 tokens (0.0555%), 4 messages
[HumanMessage(content='Hotels in Paris', additional_kwargs={}, response_metadata={}, id='23f61891-7b1b-45f2-ad36-f9708a8c4be4'), AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 48, 'total_tokens': 63, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_29330a9688', 'id': 'chatcmpl-D0V3BAoLmDGJxpmviDOHr1tmZBFYZ', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019be150-d431-7f21-8914-db6935ad09b1-0', tool_calls=[{'name': 'search_hotels', 'args': {'city': 'Paris'}, 'id': 'call_SUjiN3nnsnlGIWEVDazgDh0F', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'input_tokens'

## Human in the Loop

Pause agent execution for human approval, editing, or rejection of tool calls before they execute. Human-in-the-loop is useful for the
following:
- High-stakes operations requiring human approval(e.g. database writes, finanacial transactions)
- Compliance workflows where human oversight is mandatory
- Long-running conversations where human feedback guides the agent

In [17]:
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langgraph.checkpoint.memory import MemorySaver

In [18]:
def read_email_tool(emailid: str) -> str:
    """ Mock function to read an email by its ID. """ 
    return f"Email content for ID: {emailid}"

def send_email_tool(recipient: str, subject: str, body:str) -> str:
    """ Mock function to send an email. """
    return f"Email sent to {recipient} with subject: {subject}"


In [25]:
agent=create_agent(
    model="gpt-4o",
    tools=[read_email_tool, send_email_tool],
    checkpointer=MemorySaver(),
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                "send_email_tool": {
                    "allowed_decisions": ["approve", "edit", "reject"]
                },
                "read_email_tool": False,
            }
        )
    ]
)

In [26]:
config={"configurable": {"thread_id": "test_approve"}}

# Step 1: Request
result=agent.invoke(
    {"messages": [HumanMessage(content="Send email to ram@test.com with subject 'Hello' and body 'How are you?'")]},
    config=config
)
result

{'messages': [HumanMessage(content="Send email to ram@test.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='bf5c3875-c685-488a-ae2d-bcf9275ef380'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 97, 'total_tokens': 125, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_deacdd5f6f', 'id': 'chatcmpl-D0WOEJ0H9hIWrkWY3gGyFuhD4ouXP', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019be19f-644a-7c01-bcf2-5a504634e207-0', tool_calls=[{'name': 'send_email_tool', 'args': {'recipient': 'ram@test.com', 'subject': 'Hello', 'body': 'How are you?'}, 'id': 'call_rm760mLKnT8nUTQP9b

In [27]:
# Step 2: Approve
from langgraph.types import Command
if "__interrupt__" in result:
    print("⏸️ Paused! Approving...")

    result=agent.invoke(
        Command(
            resume={
                "decisions": [
                    {"type": "approve"}
                ]
            }
        ),
        config=config
    )
    print(f"✅ Result: {result['messages'][-1].content}")

⏸️ Paused! Approving...
✅ Result: The email has been sent to ram@test.com with the subject "Hello" and the body "How are you?".


In [28]:
result

{'messages': [HumanMessage(content="Send email to ram@test.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='bf5c3875-c685-488a-ae2d-bcf9275ef380'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 97, 'total_tokens': 125, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_deacdd5f6f', 'id': 'chatcmpl-D0WOEJ0H9hIWrkWY3gGyFuhD4ouXP', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019be19f-644a-7c01-bcf2-5a504634e207-0', tool_calls=[{'name': 'send_email_tool', 'args': {'recipient': 'ram@test.com', 'subject': 'Hello', 'body': 'How are you?'}, 'id': 'call_rm760mLKnT8nUTQP9b

In [29]:
# Step 2: Reject
config={"configurable": {"thread_id": "test_reject"}}

# Step 1: Request
result=agent.invoke(
    {"messages": [HumanMessage(content="Send email to ram@test.com with subject 'Hello' and body 'How are you?'")]},
    config=config
)
result

from langgraph.types import Command
if "__interrupt__" in result:
    print("⏸️ Paused! Rejecting...")

    result=agent.invoke(
        Command(
            resume={
                "decisions": [
                    {"type": "reject"}
                ]
            }
        ),
        config=config
    )
    print(f"❌ Result: {result['messages'][-1].content}")

⏸️ Paused! Rejecting...
❌ Result: If there's anything specific you'd like to change or if you need help with something else, feel free to let me know!


In [30]:
result

{'messages': [HumanMessage(content="Send email to ram@test.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='68288fe1-c062-4f47-83b6-fcc9f357ea48'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 97, 'total_tokens': 125, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_deacdd5f6f', 'id': 'chatcmpl-D0WQR58EwtLOZrdoRkN2pcPoSkLZv', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019be1a1-77ef-7ef2-9a62-f1a256285cd2-0', tool_calls=[{'name': 'send_email_tool', 'args': {'recipient': 'ram@test.com', 'subject': 'Hello', 'body': 'How are you?'}, 'id': 'call_BWvIlJQubcyo2O5UHd

In [31]:
# Step 2: Reject
config={"configurable": {"thread_id": "test_edit"}}

# Step 1: Request
result=agent.invoke(
    {"messages": [HumanMessage(content="Send email to wrong@test.com with subject 'Hello' and body 'How are you?'")]},
    config=config
)
result

from langgraph.types import Command
if "__interrupt__" in result:
    print("⏸️ Paused! Editing...")

    result=agent.invoke(
        Command(
            resume={
                "decisions": [
                    {
                        "type": "edit",
                        "edited_action": 
                        {
                            "name": "send_email_tool",
                            "args": 
                            {
                                "recipient": "correct@email.com",
                                "subject": "Corrected Subject",
                                "body": "This email was edited by human before sending"
                            }
                        }
                     }
                ]
            }
        ),
        config=config
    )
    print(f"✍️ Result: {result['messages'][-1].content}")

⏸️ Paused! Editing...
✍️ Result: The email has been sent with the corrected details.


In [32]:
result

{'messages': [HumanMessage(content="Send email to wrong@test.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='27d2e6e5-e65c-43c8-babf-96cc366f097b'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 97, 'total_tokens': 125, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_deacdd5f6f', 'id': 'chatcmpl-D0WV4BgxAsy6fIKagCqQAxFGVRlPx', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019be1a5-ded2-7320-acc8-3c415610e30b-0', tool_calls=[{'type': 'tool_call', 'name': 'send_email_tool', 'args': {'recipient': 'correct@email.com', 'subject': 'Corrected Subject', 'body': 'This e