In [1]:
# pip install -U autogen-agentchat autogen-ext[openai,web-surfer]
# playwright install
#%pip install python.dotenv

## Example using the ROUND ROBING GROUPCHAT team with a USER PROXY AGENT and a MULTIMODAL WEBSURFER

In [6]:
import sys
import asyncio
from autogen_agentchat.agents import UserProxyAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.ui import Console
from autogen_ext.models.openai import AzureOpenAIChatCompletionClient
from autogen_ext.agents.web_surfer import MultimodalWebSurfer
from autogen_agentchat.teams import MagenticOneGroupChat
from dotenv import load_dotenv

from settings import llm_config
load_dotenv()


True

In [7]:
model_client = AzureOpenAIChatCompletionClient.load_component(llm_config)

## This requires USER to provide feedback as to what to do next

In [4]:
async def main() -> None:
    # The web surfer will open a Chromium browser window to perform web browsing tasks.
    web_surfer = MultimodalWebSurfer("web_surfer", model_client, headless=False, animate_actions=True)
    # The user proxy agent is used to get user input after each step of the web surfer.
    # NOTE: you can skip input by pressing Enter.
    user_proxy = UserProxyAgent("user_proxy")
    # The termination condition is set to end the conversation when the user types 'exit'.
    termination = TextMentionTermination("exit", sources=["user_proxy"])
    # Web surfer and user proxy take turns in a round-robin fashion.
    team = RoundRobinGroupChat([web_surfer, user_proxy], termination_condition=termination)
    try:
        # Start the team and wait for it to terminate.
        await Console(team.run_stream(task="find information about the latest iPhone"))
    finally:
        await web_surfer.close()
        await model_client.close()

await main()

---------- TextMessage (user) ----------
find information about the latest iPhone


  response = await self._model_client.create(


---------- MultiModalMessage (web_surfer) ----------
I typed 'latest iPhone' into '0 characters out of 2000'.

The web browser is open to the page [latest iPhone - Search](https://www.bing.com/search?q=latest+iPhone&form=QBLH&sp=-1&lq=0&pq=latest+iphon&sc=12-12&qs=n&sk=&cvid=28CE1C1256A74E7E861503C71ED5ED19).
The viewport shows 17% of the webpage, and is positioned at the top of the page
The following text is visible in the viewport:

Skip to content
latest iPhoneFrançais3
You could win a Million USD!
Claim Your Free Entry to Join the $2 Million Dollar Sweepstakes!
Claim free entry
Later
All
Search
Copilot
Shopping
Images
Videos
Maps
More
Tools
About 1,690,000 resultsSponsored
See Latest iPhoneApple iPhone 15, 256GB, Black (Renewed)$959.99Amazon CAFree shippingiPhone 16 Pro Max 256GB With 2-Yr Phone Plan On Bell Smartpay + Device Return Option$1,799.28 now$42/mo x 24
Bell MobilityiPhone 16 Pro 128GB Desert Titanium Unlocked- Apple$1,449.00Apple StoreFree shippingiPhone 15 128GB Pink Un

## This DOES NOT require user feedback as to what to do next, the orchestrator does that

In [8]:
async def main() -> None:

    surfer = MultimodalWebSurfer(
        "MultimodalWebSurfer",
        model_client=model_client,
        downloads_folder="./downs",
        debug_dir="./debug",
        headless = False,
        to_resize_viewport=True,
        description="A web surfing assistant that can browse and interact with web pages.",
        start_page="https://www.google.com",  # Optional: Initial page
        animate_actions=True,
        browser_data_dir="./browser_data",
    )
    
    team = MagenticOneGroupChat([surfer], model_client=model_client, max_turns=3)
    # Define a team
    # team = RoundRobinGroupChat([surfer], max_turns=3)
    #await Console(team.run_stream(task="Navigate to the AutoGen readme on GitHub."))
    #await Console(team.run_stream(task="Summarize latest updates from Accenture newsroowm."))
    #await Console(team.run_stream(task="Summarize latest news from venture beat all things in AI."))
    await Console(team.run_stream(task="Find information about the latest iPhone."))
    #await Console(team.run_stream(task="what NHL games are happening today, and tell me about the teams playing"))
    #await Console(team.run_stream(task="what is the weather like in Toronto today, and what is the weather like in New York today?"))
    await surfer.close()


if __name__ == "__main__":
    if sys.platform == "win32":
        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
await main()

  response = await self._model_client.create(
  response = await self._model_client.create(


---------- TextMessage (user) ----------
Find information about the latest iPhone.
---------- TextMessage (MagenticOneOrchestrator) ----------

We are working to address the following user request:

Find information about the latest iPhone.


To answer this request we have assembled the following team:

MultimodalWebSurfer: A web surfing assistant that can browse and interact with web pages.


Here is an initial fact sheet to consider:

### 1. GIVEN OR VERIFIED FACTS  
None. The request simply asks for information about the latest iPhone, and does not provide any specific facts or figures.  

### 2. FACTS TO LOOK UP  
- The model name and number of the latest iPhone (e.g., iPhone 15, iPhone 15 Pro Max).  
- The release date of the latest iPhone.  
- Key specifications (e.g., screen size, processor, camera specs).  
- Pricing and available configurations (e.g., storage options).  
- Any notable new features or advancements introduced in the latest iPhone.  
- These details may be found 

  response = await self._model_client.create(self._get_compatible_context(context), json_output=True)
  response = await self._model_client.create(


---------- MultiModalMessage (MultimodalWebSurfer) ----------
I typed 'latest iPhone specifications, release date, pricing, features' into the browser search bar.

The web browser is open to the page [latest iPhone specifications, release date, pricing, features - Search](https://www.bing.com/search?q=latest+iPhone+specifications%2C+release+date%2C+pricing%2C+features&FORM=QBLH).
The viewport shows 40% of the webpage, and is positioned at the top of the page
The following text is visible in the viewport:

Skip to content
FrançaisMobileCollections
Settings
Language
English
Country/Region
Canada - English
Location
Voice
More
SafeSearch
Moderate
Copilot Settings
Copilot response on result page
Choose if you want to see Copilot responses on the search result page.
On
Search history
Privacy
Feedback
Appearance
LightDarkSystem defaultThemes
You could win a Million USD!
Claim Your Free Entry to Join the $2 Million Dollar Sweepstakes!
Claim free entry
Later
All
Search
Shopping
Images
Videos
Ma

  response = await self._model_client.create(self._get_compatible_context(context), json_output=True)


---------- MultiModalMessage (MultimodalWebSurfer) ----------
I clicked 'iPhone | Apple Official Site | Get to know iPhone'.

The web browser is open to the page [Apple (Canada)](https://www.apple.com/ca/?&mtid=20925xpb40345&aosid=p238&mnid=s-dc_mtid_20925xpb40345_pcrid_78202979837155_pgrid_1251245574385448_pexid__ptid_kwd-78203223289767:loc-32_&cid=wwa-ca-kwbi-iphone-slid----Announce-).
The viewport shows 16% of the webpage, and is positioned at the top of the page
The following text is visible in the viewport:

Apple
Apple
Store
Shop
Shop the Latest
Mac
iPad
iPhone
Apple Watch
Apple Vision Pro
Accessories
Quick Links
Find a Store
Order Status
Apple Trade In
Financing
Personal Setup
Shop Special Stores
Certified Refurbished
Education
Business
Mac
Explore Mac
Explore All Mac
MacBook Air
MacBook Pro
iMac
Mac mini
Mac Studio
Mac Pro
Displays
Compare Mac
Switch from PC to Mac
Shop Mac
Shop Mac
Mac Accessories
Apple Trade In
Financing
More from Mac
Mac Support
AppleCare+ for Mac
macOS Sequ

  response = await self._model_client.create(
