### Static Model
Static models are configured once when creating the agent and remain unchanged throughout execution. This is the most common and straightforward approach. To initialize a static model from a model identifier string:

In [10]:
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI 
from langchain.tools import tool    
from dotenv import load_dotenv
import os
load_dotenv()
langsmith_api_key = os.getenv("LANGCHAIN_API_KEY")
langsmith_project = os.getenv("LANGCHAIN_PROJECT")
langsmith_endpoint = os.getenv("LANGCHAIN_ENDPOINT")

def get_weather(city: str) -> str:
    """Get weather for a given city."""
    return f"It's always sunny in {city}!"
 
model = ChatOpenAI(
    model_name="gpt-5-nano-2025-08-07",
    temperature=0.7,
    stream_usage=True, 
    use_responses_api=True,
    
    

     
)
model_with_tools = model.bind_tools([get_weather])

agent = create_agent(
    model=model_with_tools,
    tools=[get_weather],
    system_prompt="You are a helpful assistant",
)

# Run the agent
agent.invoke(
    {"messages": [{"role": "user", "content": "what is the weather in sf"}]}
)

{'messages': [HumanMessage(content='what is the weather in sf', additional_kwargs={}, response_metadata={}, id='40b2fe6c-4120-49d2-8b51-0204c44853a0'),
  AIMessage(content=[], additional_kwargs={'reasoning': {'id': 'rs_070777c1fb554a660068ebd9a474f48192877e2a385f3499ac', 'summary': [], 'type': 'reasoning'}, '__openai_function_call_ids__': {'call_4vvwfGS9K0zszbZAmj2kbALS': 'fc_070777c1fb554a660068ebd9a61a6c819295d8cd7bb11c7325'}}, response_metadata={'id': 'resp_070777c1fb554a660068ebd9a3b0708192b1042736f30d441d', 'created_at': 1760287139.0, 'metadata': {}, 'model': 'gpt-5-nano-2025-08-07', 'object': 'response', 'service_tier': 'default', 'status': 'completed', 'model_name': 'gpt-5-nano-2025-08-07'}, id='lc_run--7de8e40a-068d-4783-a164-0f1f325277e6-0', tool_calls=[{'name': 'get_weather', 'args': {'city': 'San Francisco'}, 'id': 'call_4vvwfGS9K0zszbZAmj2kbALS', 'type': 'tool_call'}], usage_metadata={'input_tokens': 58, 'output_tokens': 277, 'total_tokens': 335, 'input_token_details': {'ca

### Dynamic Model
Dynamic models are selected at runtime based on the current state and context. This enables sophisticated routing logic and cost optimization.
To use a dynamic model, create middleware with the ```@wrap_model_call``` decorator that modifies the model in the request:

In [46]:
from langchain_openai import ChatOpenAI
from langchain.agents import create_agent
from langchain.agents.middleware import wrap_model_call, ModelRequest
from langchain.agents.middleware.types import ModelResponse
from langchain.tools import tool

basic_model = ChatOpenAI(model="gpt-5-nano-2025-08-07")
advanced_model = ChatOpenAI(model="gpt-5-mini")


@wrap_model_call#type of middleware that takes request and handler as arguments
@traceable
def dynamic_model_selection(request: ModelRequest, handler) -> ModelResponse:
    """Choose model based on conversation complexity."""
    message_count = len(request.state["messages"])

    if message_count > 10:
        # Use advanced model for longer conversations
        model = advanced_model
    else:
        model = basic_model

    request.model = model
    return handler(request)

agent = create_agent(
    model=basic_model,  # Default model
    tools=[get_weather],
    middleware=[dynamic_model_selection] 
)

response = agent.invoke({"messages": [{"role": "user", "content": "what is the weather in sf"}]})
pprint(response)



{'messages': [HumanMessage(content='what is the weather in sf', additional_kwargs={}, response_metadata={}, id='02b25661-d659-4c49-bc88-a7bebb6fe07f'),
              AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_HCHt71165yDeFlP9EpvabjNB', 'function': {'arguments': '{"city":"San Francisco"}', 'name': 'get_weather'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 88, 'prompt_tokens': 131, 'total_tokens': 219, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 64, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CPe8v4VxFxBwoAStzSseppXvoQzoV', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--8a32bbbf-327a-4dce-b99a-e80bee6afb42-0', tool_calls=[{'name': 'get_weather', 'args': {'city': 'San Francis

### System Prompt (both regular and dynamic)
When no system_prompt is provided, the agent will infer its task from the messages directly.
For more advanced use cases where you need to modify the system prompt based on runtime context or agent state, you can use middleware.
The ```@dynamic_prompt``` decorator creates middleware that generates system prompts dynamically based on the model request:

In [None]:


 
from typing import TypedDict
from pprint import pprint

from langchain.agents import create_agent
from langchain.agents.middleware.types import dynamic_prompt, ModelRequest, wrap_tool_call
from langchain_core.messages import ToolMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama

from dotenv import load_dotenv
import os
load_dotenv()
langsmith_api_key = os.getenv("LANGCHAIN_API_KEY")
langsmith_project = os.getenv("LANGCHAIN_PROJECT")
langsmith_endpoint = os.getenv("LANGCHAIN_ENDPOINT")

model = ChatOllama(model_name="gpt-oss:20b")
tools = {"type": "web_search_preview"}
model_with_tools = model.bind_tools([tools]) 


class Context(TypedDict):
    user_role: str
    
    
def web_search(query: str) -> str:
    """Search the web for information."""
    return f"Found information about {query}"

@dynamic_prompt # convenience decorator that creates middleware using wrap_model_call specifically for dynamic prompt generation
def user_role_prompt(request: ModelRequest) -> str:
    """Generate system prompt based on user role."""
    user_role = request.runtime.context.get("user_role", "user")
    base_prompt = "You are a helpful assistant."

    if user_role == "expert":
        return f"{base_prompt} Provide detailed technical responses."
    elif user_role == "beginner":
        return f"{base_prompt} Explain concepts simply and avoid jargon."

    return base_prompt

@wrap_tool_call
def handle_tool_errors(request, handler):
    """Handle tool execution errors with custom messages."""
    try:
        return handler(request)
    except Exception as e:
        # Return a custom error message to the model
        return ToolMessage(
            content=f"Tool error: Please check your input and try again. ({str(e)})",
            tool_call_id=request.tool_call["id"]
        )

agent = create_agent(
    model=model_with_tools,
    # tools=[web_search],
    context_schema=Context,
    middleware=[handle_tool_errors, user_role_prompt]
)

# The system prompt will be set dynamically based on context
result = agent.invoke(
    {"messages": [{"role": "user", "content": "search the web and Explain agentic RAG based on the latest 2025 research."}]},
    context={"user_role": "beginner"}, 
)
pprint(result)


ValidationError: 1 validation error for ChatOllama
model
  Field required [type=missing, input_value={'model_name': 'ollama:gpt-oss:20b'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing

### Structured Output
LangChain provides strategies for structured output via the response_format parameter.

**ToolStrategy**
uses artificial tool calling to generate structured output. This works with any model that supports tool calling

**ProviderStrategy**
uses the model provider’s native structured output generation. This is more reliable but only works with providers that support native structured output (e.g., OpenAI):

In [None]:
# ToolStrategy
from pydantic import BaseModel
from langchain.agents import create_agent
from langchain.agents.structured_output import ToolStrategy

class ContactInfo(BaseModel):
    name: str
    email: str
    phone: str

agent = create_agent(
    model=model,
    tools=[search],
    response_format=ToolStrategy(ContactInfo)
)

result = agent.invoke({
    "messages": [{"role": "user", "content": "Extract contact info from: John Doe, john@example.com, (555) 123-4567"}]
})

result




{'messages': [HumanMessage(content='Extract contact info from: John Doe, john@example.com, (555) 123-4567', additional_kwargs={}, response_metadata={}, id='6d0e2575-1f63-46b0-b5c0-1050f262e25d'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_7Zmbves1v9tTZknBDuUZKpAu', 'function': {'arguments': '{"name":"John Doe","email":"john@example.com","phone":"(555) 123-4567"}', 'name': 'ContactInfo'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 297, 'prompt_tokens': 163, 'total_tokens': 460, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 256, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CPcZ2qnmTQys3rIVg3iDuP0cyXyVl', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--88751229-3af6-4578-8585-8

In [None]:
# ProviderStrategy
from langchain.agents.structured_output import ProviderStrategy

agent = create_agent(
    model=model,
    response_format=ProviderStrategy(ContactInfo)
)
result = agent.invoke({
    "messages": [{"role": "user", "content": "Extract contact info from: John Doe, john@example.com, (555) 123-4567"}]
})

result

{'messages': [HumanMessage(content='Extract contact info from: John Doe, john@example.com, (555) 123-4567', additional_kwargs={}, response_metadata={}, id='9a0adfe7-b25f-4745-b117-dd7525dccbb3'),
  AIMessage(content='{"name":"John Doe","email":"john@example.com","phone":"(555) 123-4567"}', additional_kwargs={'parsed': None, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 292, 'prompt_tokens': 176, 'total_tokens': 468, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 256, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CPcYjFCGVc2iGad9KX7lRcGwN9Rpa', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--def6b71d-d37e-447b-a3b8-b39f94af9e23-0', usage_metadata={'input_tokens': 176, 'output_tokens': 292, 'total_tokens': 468, 'input_token_details': {'

### Model methods: (suedo code)

#### Invoke
The most straightforward way to call a model is to use invoke() with a single message or a list of messages.

In [None]:
# single message
response = model.invoke("Why do parrots have colorful feathers?")
print(response)


# Dictionary format
from langchain.messages import HumanMessage, AIMessage, SystemMessage

conversation = [
    {"role": "system", "content": "You are a helpful assistant that translates English to French."},
    {"role": "user", "content": "Translate: I love programming."},
    {"role": "assistant", "content": "J'adore la programmation."},
    {"role": "user", "content": "Translate: I love building applications."}
]

response = model.invoke(conversation)
print(response)  # AIMessage("J'adore créer des applications.")


# message objects
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage

conversation = [
    SystemMessage("You are a helpful assistant that translates English to French."),
    HumanMessage("Translate: I love programming."),
    AIMessage("J'adore la programmation."),
    HumanMessage("Translate: I love building applications.")
]

response = model.invoke(conversation)
print(response)  // AIMessage("J'adore créer des applications.")

#### Stream
Calling ```stream()``` returns an iterator that yields output chunks as they are produced. You can use a loop to process each chunk in real-time

As opposed to ```invoke()```, which returns a single AIMessage after the model has finished generating its full response, ```stream()``` returns multiple AIMessageChunk objects, each containing a portion of the output text. Importantly, each chunk in a stream is designed to be gathered into a full message via summation:

In [None]:
# basic text streaming
for chunk in model.stream("Why do parrots have colorful feathers?"):
    print(chunk.text, end="|", flush=True)



# stream toolcalls, reasoning, and othe content
for chunk in model.stream("What color is the sky?"):
    for block in chunk.content_blocks:
        if block["type"] == "reasoning" and (reasoning := block.get("reasoning")):
            print(f"Reasoning: {reasoning}")
        elif block["type"] == "tool_call_chunk":
            print(f"Tool call chunk: {block}")
        elif block["type"] == "text":
            print(block["text"])
        else:
            ...
            
            
            
# construct an AI message
full = None  # None | AIMessageChunk
for chunk in model.stream("What color is the sky?"):
    full = chunk if full is None else full + chunk
    print(full.text)

# The
# The sky
# The sky is
# The sky is typically
# The sky is typically blue
# ...

print(full.content_blocks)
# [{"type": "text", "text": "The sky is typically blue..."}]

#### Batch
By default, ```batch()``` will only return the final output for the entire batch. If you want to receive the output for each individual input as it finishes generating, you can stream results with ```batch_as_completed()```

In [None]:
#straight batch
responses = model.batch([
    "Why do parrots have colorful feathers?",
    "How do airplanes fly?",
    "What is quantum computing?"
])
for response in responses:
    print(response)
    
    
#yeid batch responses upon completion
for response in model.batch_as_completed([
    "Why do parrots have colorful feathers?",
    "How do airplanes fly?",
    "What is quantum computing?"
]):
    print(response)

#### Tool Calling
To make tools that you have defined available for use by a model, you must bind them using bind_tools(). In subsequent invocations, the model can choose to call any of the bound tools as needed.

When binding user-defined tools, the model’s response includes a request to execute a tool. When using a model separately from an agent, it is up to you to perform the requested action and return the result back to the model for use in subsequent reasoning

In [150]:
#Binding user tools
from langchain.tools import tool
from langchain_openai import ChatOpenAI
import base64
from IPython.display import Image
from langchain.agents import create_agent
from langchain.agents.middleware import wrap_model_call, ModelRequest
from langchain.agents.middleware.types import ModelResponse
from langchain.messages import AIMessage
from langchain_ollama import ChatOllama

model = ChatOllama(model="gpt-oss:20b")
tools = [{"type": "web_search_preview", }, {"type": "image_generation", "quality": "low"}]



@tool
def get_weather(location: str) -> str:
    """Get the weather at a location."""
    return f"It's sunny in {location}."


response = model_with_tools.invoke("Draw a picture of a cute dog")
for tool_call in response.tool_calls:
    # View tool calls made by the model
    print(f"Tool: {tool_call['name']}")
    print(f"Args: {tool_call['args']}")

agent = create_agent(
    model = model_with_tools,
    tools = tools, 
) 
result = agent.invoke({"messages":[
    {"role": "user", "content": "what the weather in New York?"}
    ]})

    
print(type(response))
print(response.text)

<class 'langchain_core.messages.ai.AIMessage'>
I can’t literally draw a picture here, but here’s a little ASCII‑art “cute dog” that you can copy‑paste and share:

```
   __
o-''))_____\/\
 \_/      /  )
     ( /   /
       `--'
```

It’s a simple, cartoonish pup with floppy ears and a wagging tail—hope it brings a smile!


Each ToolMessage returned by the tool includes a tool_call_id that matches the original tool call, helping the model correlate results with requests.

In [148]:
#Tool execution loop - steps 1 and 2 are run under the hood by LangChain.


# Bind (potentially multiple) tools to the model
model_with_tools = model.bind_tools([get_weather])

# Step 1: Model generates tool calls
messages = [{"role": "user", "content": "What's the weather in Boston?"}]
ai_msg = model_with_tools.invoke(messages)
messages.append(ai_msg)

# Step 2: Execute tools and collect results
for tool_call in ai_msg.tool_calls:
    # Execute the tool with the generated arguments
    tool_result = get_weather.invoke(tool_call)
    messages.append(tool_result)

# Step 3: Pass results back to model for final response
final_response = model_with_tools.invoke(messages)
print(final_response.text)
print(ai_msg.tool_calls)
print(tool_result)

It’s sunny in Boston.
[{'name': 'get_weather', 'args': {'location': 'Boston'}, 'id': 'c0ef9fae-e60a-4e99-9447-c1143a392468', 'type': 'tool_call'}]
content="It's sunny in Boston." name='get_weather' tool_call_id='c0ef9fae-e60a-4e99-9447-c1143a392468'


#### Tools (continued)
The simplest way to create a tool is with the ```@tool``` decorator. By default, the function’s docstring becomes the tool’s description that helps the model understand when to use it

Type hints are **required** as they define the tool’s input schema. The docstring should be informative and concise to help the model understand the tool’s purpose.

In [None]:
#basic definition 
from langchain.tools import tool

@tool
def search_database(query: str, limit: int = 10) -> str:
    """Search the customer database for records matching the query.
    Args:
        query: Search terms to look for
        limit: Maximum number of results to return
    """
    return f"Found {limit} results for '{query}'"

#Custom tool name (by default it is the function name)
@tool("web_search")  # Custom name
def search(query: str) -> str:
    """Search the web for information."""
    return f"Results for: {query}"

print(search.name)  # web_search



Advanced shcema definition

In [None]:
#Define complex inputs with Pydantic models 
from pydantic import BaseModel, Field
from typing import Literal

 
class WeatherInput(BaseModel):
    """Input for weather queries."""
    location: str = Field(description="City name or coordinates")
    units: Literal["celsius", "fahrenheit"] = Field(
        default="celsius",
        description="Temperature unit preference"
    )
    include_forecast: bool = Field(
        default=False,
        description="Include 5-day forecast"
    )

@tool(args_schema=WeatherInput)
def get_weather(location: str, units: str = "celsius", include_forecast: bool = False) -> str:
    """Get current weather and optional forecast."""
    temp = 22 if units == "celsius" else 72
    result = f"Current weather in {location}: {temp} degrees {units[0].upper()}"
    if include_forecast:
        result += "\nNext 5 days: Sunny"
    return result

### Messages


#### Trim messages
 If you’re using LangChain, you can use the trim messages utility and specify the number of tokens to keep from the list, as well as the strategy (e.g., keep the last maxTokens) to use for handling the boundary.

To trim message history in an agent, use ```@[pre_model_hook][create_agent]``` with the ```trim_messages``` function:

In [2]:
from langchain.agents.middleware.types import before_model
from langchain_core.messages.utils import trim_messages, count_tokens_approximately
from langchain_core.messages import BaseMessage
from langgraph.checkpoint.memory import InMemorySaver
from langchain.agents import create_agent
from langchain_core.runnables import RunnableConfig
from langchain_openai import ChatOpenAI
import pprint
from langchain_ollama import ChatOllama


model = ChatOllama(model="gpt-oss:20b")

@before_model
def pre_model_hook(state, runtime) -> dict[str, list[BaseMessage]]: #before_model expects runtime even though it's not used
    """
    This function will be called prior to every llm call to prepare the messages for the llm.
    """
    trimmed_messages = trim_messages(
        state["messages"],
        strategy="last",
        token_counter=count_tokens_approximately,
        max_tokens=384,
        start_on="human",
        end_on=("human", "tool"),
    )
    return {"llm_input_messages": trimmed_messages}



checkpointer = InMemorySaver()
agent = create_agent(
    model=model,
    tools=[],
    middleware=[pre_model_hook],
    checkpointer=checkpointer,
)

config: RunnableConfig = {"configurable": {"thread_id": "1"}}

agent.invoke({"messages": "hi, my name is bob"}, config)
agent.invoke({"messages": "write a short poem about cats"}, config)
agent.invoke({"messages": "now do the same but for dogs"}, config)
final_response = agent.invoke({"messages": "what's my name?"}, config)

pprint.pprint(final_response)


{'messages': [HumanMessage(content='hi, my name is bob', additional_kwargs={}, response_metadata={}, id='88220357-4dc4-4cdf-9be2-b033541a83c2'),
              AIMessage(content='Hello, Bob! 👋 How can I help you today?', additional_kwargs={}, response_metadata={'model': 'gpt-oss:20b', 'created_at': '2025-10-15T00:04:36.349819Z', 'done': True, 'done_reason': 'stop', 'total_duration': 24816384167, 'load_duration': 5930117084, 'prompt_eval_count': 73, 'prompt_eval_duration': 17220677333, 'eval_count': 88, 'eval_duration': 1603056578, 'model_name': 'gpt-oss:20b'}, id='lc_run--9cd0cae2-da9d-416a-9057-232389055bdc-0', usage_metadata={'input_tokens': 73, 'output_tokens': 88, 'total_tokens': 161}),
              HumanMessage(content='write a short poem about cats', additional_kwargs={}, response_metadata={}, id='9e942a3d-c3a4-48b5-b13b-01dc088630fe'),
              AIMessage(content='Whiskers twitch at moonlit purrs,  \nA silent grace upon the night.  \nEyes like amber, soft as fur,  \nThey rul

### Summarization (Built-in middleware)
Automatically summarize conversation history when approaching token limits.

In [1]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langchain_ollama import ChatOllama
from langchain_core.tools import tool

model = ChatOllama(model="gpt-oss:20b")

@tool
def weather_tool(location: str) -> str:
    """
    Returns the weather in the given location.  
    """
    return f"The weather in {location} is sunny."

@tool
def calculator_tool(expression: str) -> str:
    """
    Returns the result of the given expression.
    """
    return eval(expression)

agent = create_agent( 
    model=model,
    tools=[weather_tool, calculator_tool],
    middleware=[
        SummarizationMiddleware(
            model=model,
            max_tokens_before_summary=4000,  # Trigger summarization at 4000 tokens
            messages_to_keep=20,  # Keep last 20 messages after summary
            summary_prompt="Custom prompt for summarization...",  # Optional
        ),
    ],
)

response = agent.invoke({"messages": [{"role": "user", "content": "What is the square root of 16?"}]})
print(response)


{'messages': [HumanMessage(content='What is the square root of 16?', additional_kwargs={}, response_metadata={}, id='c0ce41d5-cab5-4af8-8704-fb18a29ed589'), AIMessage(content='The square root of 16 is \\(4\\).', additional_kwargs={}, response_metadata={'model': 'gpt-oss:20b', 'created_at': '2025-10-14T23:58:13.334686Z', 'done': True, 'done_reason': 'stop', 'total_duration': 63945702541, 'load_duration': 6948658208, 'prompt_eval_count': 155, 'prompt_eval_duration': 56280518417, 'eval_count': 35, 'eval_duration': 635508251, 'model_name': 'gpt-oss:20b'}, id='lc_run--7801a090-e719-44a3-91a5-666654eb8f8a-0', usage_metadata={'input_tokens': 155, 'output_tokens': 35, 'total_tokens': 190})]}


### Human-in-the-loop (built-in middleware)
Pause agent execution for human approval, editing, or rejection of tool calls before they execute.

In [None]:
from langchain_anthropic import ChatAnthropic
from langchain.agents.middleware.prompt_caching import AnthropicPromptCachingMiddleware
from langchain.agents import create_agent

LONG_PROMPT = """
Please be a helpful assistant.

<Lots more context ...>
"""

agent = create_agent(
    model=ChatAnthropic(model="claude-sonnet-4-latest"),
    system_prompt=LONG_PROMPT,
    middleware=[AnthropicPromptCachingMiddleware(ttl="5m")],
)

# cache store
agent.invoke({"messages": [HumanMessage("Hi, my name is Bob")]})

# cache hit, system prompt is cached
agent.invoke({"messages": [HumanMessage("What's my name?")]})

### Tool call limit (Built-in middleware)
Limit the number of tool calls to specific tools or all tools.

In [None]:
from langchain.agents import create_agent
from langchain.agents.middleware import ToolCallLimitMiddleware

# Limit all tool calls
global_limiter = ToolCallLimitMiddleware(thread_limit=20, run_limit=10)

# Limit specific tool
search_limiter = ToolCallLimitMiddleware(
    tool_name="search",
    thread_limit=5,
    run_limit=3,
)

agent = create_agent(
    model="openai:gpt-4o",
    tools=[...],
    middleware=[global_limiter, search_limiter],
)

### Model Fallback
Automatically fallback to alternative models when the primary model fails.

In [None]:
from langchain.agents import create_agent
from langchain.agents.middleware import ModelFallbackMiddleware

agent = create_agent(
    model="openai:gpt-4o",  # Primary model
    tools=[...],
    middleware=[
        ModelFallbackMiddleware(
            "openai:gpt-4o-mini",  # Try first on error
            "anthropic:claude-3-5-sonnet-20241022",  # Then this
        ),
    ],
)

### Decorator-based middleware
For simple middleware that only needs a single hook, decorators provide the quickest way to add functionality:

In [None]:
from langchain.agents.middleware import before_model, after_model, wrap_model_call
from langchain.agents.middleware import AgentState, ModelRequest, ModelResponse
from langgraph.runtime import Runtime
from typing import Any, Callable

# Node-style: logging before model calls
@before_model
def log_before_model(state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
    print(f"About to call model with {len(state['messages'])} messages")
    return None

# Node-style: validation after model calls
@after_model(can_jump_to=["end"])
def validate_output(state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
    last_message = state["messages"][-1]
    if "BLOCKED" in last_message.content:
        return {
            "messages": [AIMessage("I cannot respond to that request.")],
            "jump_to": "end"
        }
    return None

# Wrap-style: retry logic
@wrap_model_call
def retry_model(
    request: ModelRequest,
    handler: Callable[[ModelRequest], ModelResponse],
) -> ModelResponse:
    for attempt in range(3):
        try:
            return handler(request)
        except Exception as e:
            if attempt == 2:
                raise
            print(f"Retry {attempt + 1}/3 after error: {e}")

# Wrap-style: dynamic prompts
@dynamic_prompt
def personalized_prompt(request: ModelRequest) -> str:
    user_id = request.runtime.context.get("user_id", "guest")
    return f"You are a helpful assistant for user {user_id}. Be concise and friendly."

# Use decorators in agent
agent = create_agent(
    model="openai:gpt-4o",
    middleware=[log_before_model, validate_output, retry_model, personalized_prompt],
    tools=[...],
)