### Middleware

Middleware provides a way to more tightly control what happens inside the agent. Middleware is useful for the following:
* Tracking agent behavior with logging, analytics, and debugging.
* Transforming prompts, tool selection, and output formatting.
* Adding retries, fallbacks, and early termination logic.
* Applying rate limits, guardrails, and PII detection.

#### Example with built-in summarizationmiddleware

In [13]:
from langchain.chat_models import init_chat_model
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import InMemorySaver

agent = create_agent(
    model=init_chat_model(model="groq:llama-3.1-8b-instant"),
    checkpointer=InMemorySaver(),
    middleware=[SummarizationMiddleware(
        model=init_chat_model(model="groq:llama-3.1-8b-instant"),
        trigger=("messages", 6),
        keep=("messages", 4)
    )]
)

config = {"configurable": {"thread_id":"user_A"}}

user_messages = ["what is 2 + 2?",
                 "what is 3 * 9?",
                 "what is captial of India?",
                 "Is chennai in India?",
                 "what is national bird of India?"]

for message in user_messages:
    response = agent.invoke({"messages":[{"role":"user", "content": message}]}, config=config)
    print(response)
    print(len(response['messages']))

{'messages': [HumanMessage(content='what is 2 + 2?', additional_kwargs={}, response_metadata={}, id='66b5bcd7-2405-4ee8-be1f-6525b0fefcf1'), AIMessage(content='The answer to 2 + 2 is 4.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 43, 'total_tokens': 56, 'completion_time': 0.015816698, 'completion_tokens_details': None, 'prompt_time': 0.002042713, 'prompt_tokens_details': None, 'queue_time': 0.050370207, 'total_time': 0.017859411}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_f757f4b0bf', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019b4264-5779-7511-9c9b-840bd20402b1-0', usage_metadata={'input_tokens': 43, 'output_tokens': 13, 'total_tokens': 56})]}
2
{'messages': [HumanMessage(content='what is 2 + 2?', additional_kwargs={}, response_metadata={}, id='66b5bcd7-2405-4ee8-be1f-6525b0fefcf1'), AIMessage(content='The answer to 2 + 2 is 4.', additiona

for "trigger" each condition can include:
* fraction (float): Fraction of model’s context size (0-1)
* tokens (int): Absolute token count
* messages (int): Message count

At least one property must be specified per condition. If not provided, summarization will not trigger automatically.

for "keep" default is "{messages: 20}"

How much context to preserve after summarization. Specify exactly one of:
* fraction (float): Fraction of model’s context size to keep (0-1)
* tokens (int): Absolute token count to keep
* messages (int): Number of recent messages to keep

#### Using tokens in summarizationmiddleware

In [16]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import InMemorySaver
from langchain.tools import tool

@tool
def search_hotels(city:str)->str:
    "search hotels - return long response to use more tokens"
    return f"""Hotels in {city}:
    1.Grand Hotel - 5 star $350/night, spa, pool, gym
    2.City Inn - 4 star $100/night, business center
    3.Budget Stay - 3 star $80/night, free wi-fi """


agent = create_agent(
    model= init_chat_model(model="groq:llama-3.1-8b-instant"),
    tools=[search_hotels],
    checkpointer=InMemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model=init_chat_model(model="groq:llama-3.1-8b-instant"),
            trigger=("tokens", 500),
            keep=("tokens", 200)
        )
    ]
)

In [18]:
config={"configurable": {"thread_id": "hotel_user"}}

#token counter function

def count_tokens(messages):
    total_chars= sum(len(str(m.content)) for m in messages)
    return total_chars//4  #4 chars is equal to 1 token


cities = ["Paris", "Goa", "LA", "London", "Tokyo"]

for city in cities:
    response = agent.invoke(
        {"messages": [{"role": "user", "content": f"find hotels in {city}"}]},
        config=config
    )

    tokens = count_tokens(response['messages'])
    print(f"{city}: ~{tokens} tokens, {len(response['messages'])} messages")
    print(f"{response['messages']}")

Paris: ~174 tokens, 8 messages
[HumanMessage(content='find hotels in Paris', additional_kwargs={}, response_metadata={}, id='bf86d29e-616f-4167-9230-08843b026b62'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '8264g1w1m', 'function': {'arguments': '{"city":"Paris"}', 'name': 'search_hotels'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 226, 'total_tokens': 241, 'completion_time': 0.017932471, 'completion_tokens_details': None, 'prompt_time': 0.013829735, 'prompt_tokens_details': None, 'queue_time': 0.055531525, 'total_time': 0.031762206}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_f757f4b0bf', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019b4279-24c4-79e0-9d77-0606d61a07ea-0', tool_calls=[{'name': 'search_hotels', 'args': {'city': 'Paris'}, 'id': '8264g1w1m', 'type': 'tool_call'}], usage_metadata={'input_tokens': 226, '