In [None]:
%load_ext autoreload
%autoreload 2

import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

import asyncio
import logging
from hud.mcp_agent import ClaudeMCPAgent, OpenAIMCPAgent
from hud import Task
from mcp_use import MCPClient

"""
Example: GMail environment - local

This example demonstrates the new agent.run(task) interface that can handle:
1. Simple string queries
2. Full Task objects with setup/evaluate lifecycle

Usage:
    # First, build and start the gmail environment:
    docker build -t hud-gmail .

    # Then run this example:
    python examples/environments/gmail_local.ipynb
"""

DEBUG:hud.telemetry.instrumentation.registry:Registered instrumentor: mcp
DEBUG:hud.telemetry.instrumentation.registry:MCP instrumentor registered
DEBUG:mcp_use.observability.laminar:Laminar API key not found - tracing disabled. Set LAMINAR_PROJECT_API_KEY to enable
DEBUG:mcp_use.observability.langfuse:Langfuse API keys not found - tracing disabled. Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY to enable


In [None]:


print("🚀 Gmail Local Example")
print("=" * 50)

# Configure MCP client to connect to gmail environment
config = {
    "mcpServers": {
        "gmail": {
            "command": "docker",
            "args": [
                "run",
                "-i",
                "-p",
                "6080:6080",
                "gmail",
            ],
        }
    }
}

# Create MCP client and agent
print("📡 Connecting to browser environment...")
client = MCPClient.from_dict(config)

agent = ClaudeMCPAgent(
    client=client,
    model="claude-sonnet-4-20250514",
    allowed_tools=["computer"],
    initial_screenshot=True,
)

🚀 Simple Task Interface Example
📡 Connecting to browser environment...
✅ Agent created! Testing both query and task modes...

🎯 Example 2: Full Task Lifecycle
------------------------------
📋 Task: Open Sent mail, search for the Series B pitch deck, forward it to billgates@microsoft.com, and mark the original message as important.
⚙️  Setup: {'problem_id': 'forward-series-b-deck-to-billgates'}
📊 Evaluate: {'problem_id': 'forward-series-b-deck-to-billgates'}
[INFO] 2025-08-01 20:11:05,434 | hud.mcp_agent.base | No active sessions found, creating new ones...
[INFO] 2025-08-01 20:11:08,073 | hud.mcp_agent.base | Tools from 'gmail' (pre-filter): ['computer', 'setup', 'evaluate']
[INFO] 2025-08-01 20:11:08,074 | hud.mcp_agent.base | Agent initialized with 3 tools: ['computer', 'setup', 'evaluate']
[INFO] 2025-08-01 20:11:08,074 | hud.mcp_agent.base | Running agent with prompt: id=None prompt='Open Sent mail, search for the Series B pitch deck, forward it to billgates@microsoft.com, and mark

In [None]:
task_dict = (
    {
        "id": None,
        "prompt": "Open Sent mail, search for the Series B pitch deck, forward it to billgates@microsoft.com, and mark the original message as important.",
        "system_prompt": None,
        "gym": {
            "type": "public",
            "location": "local",
            "image_or_build_context": "gmail",
            "host_config": None,
        },
        "setup": {"problem_id":"forward-series-b-deck-to-billgates"},
        "evaluate": {"problem_id":"forward-series-b-deck-to-billgates"},
        "config": None,
        "sensitive_data": {},
        "metadata": {"id": "forward-series-b-deck-to-billgates"},
        "description": "Open Sent mail, search for the Series B pitch deck, forward it to billgates@microsoft.com, and mark the original message as important.",
    }
)

task = Task(**task_dict)

print(f"📋 Task: {task.prompt}")
print(f"⚙️  Setup: {task.setup}")
print(f"📊 Evaluate: {task.evaluate}")

In [None]:
eval_result = await agent.run(task, max_steps=10)
print(f"🎉 Task Result: {eval_result}")

# Show formatted results
reward = eval_result.get("reward", 0.0) if isinstance(eval_result, dict) else 0.0
success = reward > 0.5
info = eval_result.get("info", {}) if isinstance(eval_result, dict) else {}

print(f"\n📈 Task Performance:")
print(f"   ✅ Success: {success}")
print(f"   🏆 Reward: {reward}")
print(f"   📝 Info: {info}")

In [None]:
print("\n🧹 Cleaning up...")
await client.close_all_sessions()
print("✅ Done!")