In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
from typing import Literal
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import HumanMessage

In [None]:
from langgraph.graph import END, START, StateGraph, MessagesState
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode

In [None]:
@tool
def get_weather(location: str):
    """Call to get the current weather."""
    if location.lower() in ["munich"]:
        return "It's 15 degrees Celsius and cloudy."
    else:
        return "It's 32 degrees Celsius and sunny."

In [None]:
tools = [get_weather]
model = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)

In [None]:
def call_model(state: MessagesState):
    messages = state["messages"]
    response = model.invoke(messages)
    return {"messages": [response]}


def should_continue(state: MessagesState) -> Literal["tools", END]:
    messages = state["messages"]
    last_message = messages[-1]
    if last_message.tool_calls:
        return "tools"
    return END

In [None]:
workflow = StateGraph(MessagesState)
tool_node = ToolNode(tools)

In [None]:
workflow = StateGraph(MessagesState)

workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

workflow.add_edge(START, "agent")

workflow.add_conditional_edges(
    "agent",
    should_continue,
)
workflow.add_edge("tools", "agent")

graph = workflow.compile(checkpointer=MemorySaver())

In [None]:
graph.invoke(
    {"messages": [HumanMessage(content="How is the weather in munich?")]},
    config={"configurable": {"thread_id": 1}},
)

In [None]:
graph.invoke(
    {
        "messages": [
            HumanMessage(content="What would you recommend to do in that city then?")
        ]
    },
    config={"configurable": {"thread_id": 1}},
)

### Getting production ready - async and streaming

In [None]:

"""
This line configures the ChatOpenAI model with the streaming=True parameter. This is a critical difference from the earlier code. When streaming is enabled:

The LLM will return chunks of the response as they're generated
Users see the response being built word by word
Your application can feel more responsive, especially for longer responses
"""
model = ChatOpenAI(model="gpt-4o-mini", streaming=True).bind_tools(tools)

In [None]:
"""
Here's what's happening:

The function is now defined with async def - making it an asynchronous function
Instead of model.invoke(), it uses await model.ainvoke(messages)
It still returns the response in the same state format

This change to asynchronous processing is important because:

It allows your application to handle multiple requests simultaneously
The server doesn't get blocked waiting for the LLM to respond
Your application can remain responsive even under load
"""

async def call_model(state: MessagesState):
    messages = state["messages"]
    response = await model.ainvoke(messages)
    return {"messages": [response]}

In [None]:
workflow = StateGraph(MessagesState)
tool_node = ToolNode(tools)

workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

workflow.add_edge(START, "agent")

workflow.add_conditional_edges(
    "agent",
    should_continue,
    {"tools": "tools", END: END},
)
workflow.add_edge("tools", "agent")

graph = workflow.compile(checkpointer=MemorySaver())

In [None]:
inputs = {"messages": [HumanMessage(content="How is the weather in Munich?")]}
config = {"configurable": {"thread_id": 2}}

In [None]:
"""
Instead of graph.invoke(), this uses await graph.ainvoke() to call the graph asynchronously. This allows the operation to be awaited, making it work within an async context. This is essential when:

Your application handles multiple users concurrently
You're working in an async framework like FastAPI
You need to perform other operations while waiting for LLM responses
"""

await graph.ainvoke(input=inputs, config=config)

In [None]:
"""
This demonstrates streaming with stream_mode="updates":

graph.astream() returns an async iterator
The stream_mode="updates" parameter makes it yield entire updates from each node
The code loops through each update as it arrives
For each update, it prints information about which node produced it and the message content

This is useful when you want to show users which part of your agent is currently working (e.g., "Agent thinking..." followed by "Tool executing...").
"""

"""
This code with stream_mode="updates" shows complete updates from each node in the graph. It doesn't show token-by-token generation, but rather complete outputs when each node in your graph finishes its work.
What you'll see is something like:

"Output from node 'agent':" - followed by the complete LLM response
"Output from node 'tools':" - followed by the complete tool response (if a tool was called)
"Output from node 'agent':" again - if the LLM responds to the tool output

Each node outputs its complete result before moving to the next node. This is useful for tracking the workflow through your graph, showing which component is active at each stage.
"""
inputs = {"messages": [HumanMessage(content="How is the weather in Munich?")]}
async for output in graph.astream(inputs, stream_mode="updates", config=config):
    # stream_mode="updates" yields dictionaries with output keyed by node name
    for key, value in output.items():
        print(f"Output from node '{key}':")
        print("---")
        print(value["messages"][-1].pretty_print())

In [None]:
"""
Here's what's happening in this code:

This code streams the AI's response token by token. When we say "token", it's usually small pieces of text (might be parts of words, punctuation, etc.).
The graph.astream() with stream_mode="messages" yields each small piece of the AI's response as it's being generated.
Now, let's look at the two conditions inside the loop:

First condition: if msg.content and not isinstance(msg, HumanMessage):

This checks if the message has content AND is NOT a human message
For any message that meets these criteria, it prints the content followed by a pipe symbol (|)
This is just visualizing each chunk as it comes in (giving a "typewriter effect")


Second condition: if isinstance(msg, AIMessageChunk):

This specifically checks if the message is an AI message chunk
If it's the first chunk (gathered is None), it initializes gathered with this chunk
If we already have gathered chunks, it adds this new chunk to them using the + operator




After the loop completes, print(gathered.content) displays the entire reassembled message.
"""

"""
What's happening in the code:

First condition (if msg.content and not isinstance(msg, HumanMessage):):

This catches ALL non-human messages that have content
This includes AIMessageChunk and potentially other message types
It prints each chunk's content with a pipe symbol
This gives you a visual of the chunks arriving in real-time


Second condition (if isinstance(msg, AIMessageChunk):):

This specifically targets only AIMessageChunk objects
It builds the complete message by combining all chunks
The first chunk initializes gathered
Each subsequent chunk is added to gathered using the + operator
"""

from langchain_core.messages import AIMessageChunk, HumanMessage

inputs = [HumanMessage(content="How is the weather in Munich?")]
gathered = None

async for msg, metadata in graph.astream(
    {"messages": inputs}, stream_mode="messages", config=config
):
    if msg.content and not isinstance(msg, HumanMessage):
        # Print each token as it streams in
        print(msg.content, end="|", flush=True)

    # Handle the AI message chunks for proper assembly
    if isinstance(msg, AIMessageChunk):
        if gathered is None:
            gathered = msg
        else:
            gathered = gathered + msg

In [None]:
print(gathered.content)