Week 5 Day 2

### AutoGen AgentChat - Going deeper..

1. Multi-modal conversation
2. Structured Outputs
3. Using LangChain tools
4. Teams

...and a special surprise extra piece

In [None]:
from io import BytesIO
import requests
from autogen_agentchat.messages import TextMessage, MultiModalMessage
from autogen_core import Image as AGImage
from PIL import Image
from dotenv import load_dotenv
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.agents import AssistantAgent
from autogen_core import CancellationToken
from IPython.display import display, Markdown
from pydantic import BaseModel, Field
from typing import Literal

load_dotenv(override=True)


### A multi-modal conversation

In [None]:
# isn't just about text, but you can send pictures along with the text and have that be part of it.
url = "https://edwarddonner.com/wp-content/uploads/2024/10/from-software-engineer-to-AI-DS.jpeg"

pil_image = Image.open(BytesIO(requests.get(url).content))
# create autogen image from that
img = AGImage(pil_image)
img

In [None]:
# It's a multi-modal message and it has content which has a list.
# It has like a bit of text to describe the content of this image in detail.
# The Message
multi_modal_message = MultiModalMessage(content=["Describe the content of this image in detail", img], source="User")

In [None]:
# The Model
model_client = OpenAIChatCompletionClient(model="gpt-4o-mini")

# The Agent
describer = AssistantAgent(
    name="description_agent",
    model_client=model_client,
    system_message="You are good at describing images",
)

response = await describer.on_messages([multi_modal_message], cancellation_token=CancellationToken())
reply = response.chat_message.content
display(Markdown(reply))

### Structured Outputs!

Autogen AgentChat makes it easy.

In [None]:
# So this is a subclass of the Pydantic base model called image description.
# And it's a class that I'm going to want to populate with the answer from the LLM.
# And it's a class that I'm going to want to populate with the answer from the LLM.
class ImageDescription(BaseModel):
    scene: str = Field(description="Briefly, the overall scene of the image")
    message: str = Field(description="The point that the image is trying to convey")
    style: str = Field(description="The artistic style of the image")
    orientation: Literal["portrait", "landscape", "square"] = Field(description="The orientation of the image")


In [None]:
model_client = OpenAIChatCompletionClient(model="gpt-4o-mini")

describer = AssistantAgent(
    name="description_agent",
    model_client=model_client,
    system_message="You are good at describing images in detail",
    output_content_type=ImageDescription, # output content type equals and we pass in the pedantic object.
)

# The reply we expect to simply be a type of this object.
# It's going to have replied with this object.
# And remember, it feels as if the model is able to reply with a Python object.
# And what's going on behind the scenes.
# I know you know this.
# It's all just JSON.
# This is converted into some sort of a JSON spec, and the model returns JSON and the wrapper code then
# populates this object from the JSON.
response = await describer.on_messages([multi_modal_message], cancellation_token=CancellationToken())
reply = response.chat_message.content
# It is indeed an image description object.
reply

In [None]:
# So I'm using this thing called text wrap that prints something that's formatted, uh, to, to wrap
# around at the end of a certain number of characters.
# Summary:
# The point of this was to show you that we can use structured outputs easily, and we can get back our
# data.
# That is according to this schema.
import textwrap
print(f"Scene:\n{textwrap.fill(reply.scene)}\n\n")
print(f"Message:\n{textwrap.fill(reply.message)}\n\n")
print(f"Style:\n{textwrap.fill(reply.style)}\n\n")
print(f"Orientation:\n{textwrap.fill(reply.orientation)}\n\n")

### Using LangChain tools from AutoGen

In [None]:

# We're going to use the tools that we worked with last week in long chain.
# Autogen has a really easy way to wrap long chain tools so that you can call them directly from within
# Autogen.

# AutoGen's wrapper:
# you can use that to wrap any long chain tool and it becomes an autogen tool.
from autogen_ext.tools.langchain import LangChainToolAdapter

# LangChain tools:

from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_community.agent_toolkits import FileManagementToolkit
from langchain.agents import Tool


prompt = """Your task is to find a one-way non-stop flight from JFK to LHR in June 2025.
First search online for promising deals.
Next, write all the deals to a file called flights.md with full details.
Finally, select the one you think is best and reply with a short summary.
Reply with the selected flight only, and only after you have written the details to the file."""

# createing Goodle Serp api wrapper
serper = GoogleSerperAPIWrapper()

# Lang chain code to create a lang chain tool for searching the internet with a description based on this funciton
langchain_serper =Tool(name="internet_search", func=serper.run, description="useful for when you need to search the internet")

# And now I can call long chain tool adapter.
# Create a new instance of that passing in the long chain tool.
# And by passing that in this adapter adapts that long chain tool to become an autogen tool.
# It's just sort of a wrapper around it.
autogen_serper = LangChainToolAdapter(langchain_serper)

autogen_tools = [autogen_serper]

# And then I've also collected some long chain file tools by getting the file management toolkit giving
# it a directory sandbox I've created an empty directory sandbox on the left.
# And we call Get tools.
# And we get a bunch of tools.
# And for each of those, I'm going to add them to my autogen tools by appending a Lang chain tool adapter
# and Autogen piece of Autogen code that adapts Lang Chains tool to be an Autogen tool.
langchain_file_management_tools = FileManagementToolkit(root_dir="sandbox").get_tools()
for tool in langchain_file_management_tools:
    autogen_tools.append(LangChainToolAdapter(tool))

for tool in autogen_tools:
    print(tool.name, tool.description)

# Aim to call autogen agent
model_client = OpenAIChatCompletionClient(model="gpt-4o-mini")
agent = AssistantAgent(name="searcher", model_client=model_client, tools=autogen_tools, reflect_on_tool_use=True)
message = TextMessage(content=prompt, source="user")
result = await agent.on_messages([message], cancellation_token=CancellationToken())
for message in result.inner_messages:
    print(message.content)
display(Markdown(result.chat_message.content))

In [None]:
# Now we need to call the agent again to write the file
# Now, one of the interesting things about Autogen is the way that it handles the interactions that agents
# typically, uh, will then stop.
# Um, particularly in this kind of mode of working with them.
# And so what we now do is that I'm just going to send the next message of, okay, proceed, send a second
# message to this agent in the same way.

message = TextMessage(content="OK proceed", source="user")

result = await agent.on_messages([message], cancellation_token=CancellationToken())
for message in result.inner_messages:
    print(message.content)
display(Markdown(result.chat_message.content))

### Team interactions

In [None]:
# Uh, so you can create multiple assistants.

from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import  TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat

from autogen_ext.tools.langchain import LangChainToolAdapter
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain.agents import Tool

serper = GoogleSerperAPIWrapper()
langchain_serper =Tool(name="internet_search", func=serper.run, description="useful for when you need to search the internet")
autogen_serper = LangChainToolAdapter(langchain_serper)

model_client = OpenAIChatCompletionClient(model="gpt-4o-mini")


prompt = """Find a one-way non-stop flight from JFK to LHR in June 2025."""

# Agent that is doing the internet searches
primary_agent = AssistantAgent(
    "primary",
    model_client=model_client,
    tools=[autogen_serper],
    system_message="You are a helpful AI research assistant who looks for promising deals on flights. Incorporate any feedback you receive.",
)

# Agent that is doing the evaluation
evaluation_agent = AssistantAgent(
    "evaluator",
    model_client=model_client,
    system_message="Provide constructive feedback. Respond with 'APPROVE' when your feedback is addressed.",
)

text_termination = TextMentionTermination("APPROVE")

# With thanks to Peter A for adding in the max_turns - otherwise this can get into a loop..
# Um, and then we create a team, and there's various ways to create teams.
# This is a bit like a crew and crew, but it's, uh, it's really, uh, somewhat simpler.
# Really.
# This is a round robin group chat, which means, like, one after the other, obviously.
# Um, and, uh, that is the simplest way that you could have some kind of relationship between them.
# And we pass in a list of agents to talk to each other, primary agent, and then an evaluation agent
# and a termination condition that tells it.
# When do you know that enough is enough?

# Text mention termination.
# The word approve.
# So this is a little bit brittle.
# I'm relying on the fact that the assistant, the evaluator agent, will reply the word approve.
# Normally you would want something a little bit more profound than that.
# You would probably want to reply, have structured outputs here and use that to test.
# But this is perfectly good.
# For now.
team = RoundRobinGroupChat([primary_agent, evaluation_agent], termination_condition=text_termination, max_turns=20)


In [None]:
# Actually I've been focusing a lot on the on messages.
# The on messages, uh, thing that you call agents here.
# You can also call Agent Run as well.
# An agent can be called with run.
# And then you just pass in this exactly the same thing.
# You pass in the task, um, task equals and a prompt.
# And what comes back is just going to be the final messages.
# So that is another way of doing it as well.
# Um, but this is what we will do now.

result = await team.run(task=prompt)
for message in result.messages:
    print(f"{message.source}:\n{message.content}\n\n")


### Drumroll..

## Introducing MCP!

Our first look at the Model Context Protocol from Anthropic -

Autogen makes it easy to use MCP tools, just like LangChain tools.

<table style="margin: 0; text-align: left; width:100%">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../assets/stop.png" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#ff7800;">But wait - a not-so-small problem for Windows PC people</h2>
            <span style="color:#ff7800;">I have unpleasant news. There's a problem running MCP Servers on Windows PCs; Mac and Linux is fine. This is a known issue as of May 4th, 2025. I asked o3 with Deep Research to try to find workarounds; it <a href="https://chatgpt.com/share/6817bbc3-3d0c-8012-9b51-631842470628">confirmed the issue</a> and confirmed the workaround.<br/><br/>
            The workaround is a bit of a bore. It is to take advantage of "WSL", the Microsoft approach for running Linux on your PC. You'll need to carry out more setup instructions! But it's quick, and several students have confirmed that this works perfectly for them, then this lab and the Week 6 MCP labs work. Plus, WSL is actually a great way to build software on your Windows PC. You can also skip this final cell, but you will need to come back to this when we start Week 6.<br/>
            The WSL Setup instructions are in the Setup folder, <a href="../setup/SETUP-WSL.md">in the file called SETUP-WSL.md here</a>. I do hope this only holds you up briefly - you should be back up and running quickly. Oh the joys of working with bleeding-edge technology!<br/><br/>
            With many thanks to student Kaushik R. for raising that this is needed here as well as week 6. Thanks Kaushik!
            </span>
        </td>
    </tr>
</table>

In [None]:
from autogen_agentchat.agents import AssistantAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_ext.tools.mcp import StdioServerParams, mcp_server_tools

# Get the fetch tool from mcp-server-fetch.
# And what this, this this is going to be running locally on our computers.
# And because it's been written a certain way, that tool is just a tool we can use from Autogen.
# So the key to MCP is that different people can write tools and as long as they write them a certain
# way, you can just use them out of the box.
# So MCP server fetch is an example of a tool that is open source and available, and that you can just
# download and run it yourself.
# And that is what this thing here does.
# It runs it locally, and it is a tool which actually runs the playwright browser in a headless mode
# and allows it to go and fetch web pages.
# So it's doing something a bit similar to what we worked on with sidekick last week, but it's just doing
# it not not in a way that brings up the browser, but just does it quietly behind the scenes, headless
# as they call it.
# And it will run that and then it will, it will, uh, get those tools and it will put those tools into
# this thing called fetcher.
# And we can just provide fetcher in as our tools.
# That's it's, just as simple as that.
# And so, uh, yeah, basically what we're doing here is that we're using a public online tool available
# that runs Playwright Browser locally and uses that to scrape the web.
# And we're making that tool available to our assistant.

# And the reason it's cool is that we've just used a tool that someone else has written for, for running
# playwrights in a headless way.
# And we have just incorporated that tool because that tool uses this, this open standard, this standard
# called MCP.
# We're able to just drop that tool in and use it from within Autogen.
# So just like we could use a Lang chain tool from within Autogen, we can use an MCP tool.
# Anyone that's written a tool that conforms to the MCP standard.
# And the cool thing about MCP is that it's such an open standard that anyone can write tools, and there
# are websites where you can get access to lots and lots of these tools.
# So it's like saying, we've got access to the Lang chain ecosystem only it's a whole lot more.=
# It's this massive open source, public community ecosystem of tools.
# Anyone that writes tools that conforms to the MCP standard, you can then access just like this, and
# you can do it from within autogen in this way and immediately have access to any of them.

fetch_mcp_server = StdioServerParams(command="uvx", args=["mcp-server-fetch"])
fetcher = await mcp_server_tools(fetch_mcp_server)

# Create an agent that can use the fetch tool.
model_client = OpenAIChatCompletionClient(model="gpt-4o-mini")
agent = AssistantAgent(name="fetcher", model_client=model_client, tools=fetcher, reflect_on_tool_use=True)  # type: ignore

# Let the agent fetch the content of a URL and summarize it.
result = await agent.run(task="Review edwarddonner.com and summarize what you learn. Reply in Markdown.")
display(Markdown(result.messages[-1].content))