In [1]:
!pip install textarena smithery anthropic httpx mcp --upgrade

Collecting textarena
  Downloading TextArena-0.5.8-py3-none-any.whl.metadata (16 kB)
Downloading TextArena-0.5.8-py3-none-any.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: textarena
  Attempting uninstall: textarena
    Found existing installation: TextArena 0.5.6
    Uninstalling TextArena-0.5.6:
      Successfully uninstalled TextArena-0.5.6
Successfully installed textarena-0.5.8

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


In [1]:
import os
os.environ["ANTHROPIC_API_KEY"] = "sk..."
os.environ["E2B_API_KEY"] = "e2b..."

In [2]:
from textarena.core import Agent
import textarena as ta
import asyncio
from typing import Optional

STANDARD_GAME_PROMPT = "You are a competitive game player. Make sure you read the game instructions carefully, and always follow the required format."

class AsyncAnthropicAgent(Agent):
    """Agent class using the Anthropic Claude API to generate responses asynchronously."""
    def __init__(self, model_name: str, system_prompt: Optional[str] = STANDARD_GAME_PROMPT, max_tokens: int = 1000, temperature: float = 0.9, verbose: bool = False):
        """
        Initialize the Anthropic agent.

        Args:
            model_name (str): The name of the Claude model (e.g., "claude-3-5-sonnet-20241022").
            system_prompt (Optional[str]): The system prompt to use (default: STANDARD_GAME_PROMPT).
            max_tokens (int): The maximum number of tokens to generate.
            temperature (float): The temperature for randomness in response generation.
            verbose (bool): If True, additional debug info will be printed.
        """
        super().__init__()
        self.model_name = model_name
        self.system_prompt = system_prompt
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.verbose = verbose
        
        try:
            import anthropic
        except ImportError:
            raise ImportError(
                "Anthropic package is required for AsyncAnthropicAgent. "
                "Install it with: pip install anthropic"
            )
            
        self.client = anthropic.AsyncAnthropic()
    
    async def _make_request(self, observation: str) -> str:
        """Make a single API request to Anthropic and return the generated message."""
        response = await self.client.messages.create(
            model=self.model_name,
            max_tokens=self.max_tokens,
            temperature=self.temperature,
            system=self.system_prompt,
            messages=[
                {"role": "user", "content": [{"type": "text", "text": observation}]}
            ]
        )
        
        return response.content[0].text.strip()
    
    async def _retry_request(self, observation: str, retries: int = 3, delay: int = 5) -> str:
        """
        Attempt to make an API request with retries.

        Args:
            observation (str): The input to process.
            retries (int): The number of attempts to try.
            delay (int): Seconds to wait between attempts.

        Raises:
            Exception: The last exception caught if all retries fail.
        """
        last_exception = None
        for attempt in range(1, retries + 1):
            try:
                response = await self._make_request(observation)
                if self.verbose:
                    print(f"\nObservation: {observation}\nResponse: {response}")
                return response
            except Exception as e:
                last_exception = e
                print(f"Attempt {attempt} failed with error: {e}")
                if attempt < retries:
                    await asyncio.sleep(delay)
        raise last_exception
    
    async def __call__(self, observation: str) -> str:
        """
        Process the observation using the Anthropic API and return the generated response.
        
        Args:
            observation (str): The input string to process.
        
        Returns:
            str: The generated response.
        """
        if not isinstance(observation, str):
            raise ValueError(f"Observation must be a string. Received type: {type(observation)}")
        return await self._retry_request(observation)

In [9]:
import textarena as ta
import smithery
import mcp
import os
import json


async def create_session(url: str):
    async with smithery.websocket_client(url) as streams:
        async with mcp.client.session.ClientSession(*streams) as session:
            yield session

class MCPAgent(AsyncAnthropicAgent):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.url = smithery.create_smithery_url(
            "wss://server.smithery.ai/e2b/ws", {"e2bApiKey": os.environ["E2B_API_KEY"]}
        )

        self.session = None
        self._session_generator = None

    async def _initialize_session(self):
        if self._session_generator is None:
            self._session_generator = create_session(self.url)
        
        if self.session is None:
            self.session = await anext(self._session_generator)
        
        return self.session

    async def _make_request(self, observation: str) -> str:
        """Make a single API request to Anthropic and return the generated message."""
        session = await self._initialize_session()

        try:
            tools_result = await session.list_tools()
            tools = tools_result.model_dump()["tools"]

            tools = [
                {"input_schema": tool.pop("inputSchema"), **tool}
                for tool in tools
                if "inputSchema" in tool
            ]

            print("Available tools:", tools)

            final_response_text = ""
            is_tool_call_pending = True
            messages = [
                {
                    "role": "user",
                    "content": [{"type": "text", "text": observation}],
                }
            ]

            # Loop to handle multiple tool calls in a conversation
            while is_tool_call_pending:
                response = await self.client.messages.create(
                    model=self.model_name,
                    max_tokens=self.max_tokens,
                    temperature=self.temperature,
                    system=self.system_prompt,
                    messages=messages,
                    tools=tools,
                )

                print("Response:", response)

                # Check if there's a tool_use in the response
                is_tool_call_pending = False
                for content_block in response.content:
                    if content_block.type == "tool_use":
                        is_tool_call_pending = True

                        tool_name = content_block.name
                        tool_input = content_block.input
                        tool_id = content_block.id

                        print(f"Tool called: {tool_name}")
                        print(f"Tool input: {json.dumps(tool_input, indent=2)}")

                        # Execute the tool using MCP session
                        try:
                            tool_result = await session.call_tool(
                                tool_name, tool_input
                            )

                            # Convert tool result to string format for Anthropic
                            # The content must be a string, not an object
                            tool_result_dict = tool_result.model_dump()
                        except Exception as e:
                            if "MCP error" in str(e):
                                tool_result_dict = {"error": str(e)}

                        result_str = json.dumps(tool_result_dict)
                        print(f"Tool result: {result_str}")

                        # Add tool call and result to messages
                        messages.append(
                            {
                                "role": "assistant",
                                "content": [content_block.model_dump()],
                            }
                        )

                        # Add tool response to messages - content must be a string
                        messages.append(
                            {
                                "role": "user",
                                "content": [
                                    {
                                        "type": "tool_result",
                                        "tool_use_id": tool_id,
                                        "content": result_str,  # Now it's a string
                                    }
                                ],
                            }
                        )
                    elif content_block.type == "text":
                        # Accumulate text responses
                        final_response_text += content_block.text

                # If no tool calls were made, we use the text response
                if not is_tool_call_pending and not final_response_text:
                    final_response_text = response.content[0].text

        except Exception as e:

            print(f"Error: {e}")
            raise e

        return final_response_text.strip()

In [None]:
import textarena as ta

# Initialize agents
agents = {
    0: MCPAgent(model_name="claude-3-7-sonnet-20250219"),
    1: AsyncAnthropicAgent(model_name="claude-3-5-haiku-20241022"),
}

# Initialize environment from subset and wrap it
env = ta.make(env_id="SimpleNegotiation-v0")
env = ta.wrappers.LLMObservationWrapper(env=env)
env = ta.wrappers.SimpleRenderWrapper(
    env=env,
    player_names={0: "sonnet", 1: "haiku"},
)

env.reset(num_players=len(agents))
done = False
while not done:
    player_id, observation = env.get_observation()
    # Alternative if running outside notebook:
    # action = asyncio.get_event_loop().run_until_complete(agents[player_id](observation))
    action = await agents[player_id](observation)
    done, info = env.step(action=action)
    print("step complete")
rewards = env.close()
print(rewards)

Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_014SnBW11PSd38q7gmKmuiR2', content=[TextBlock(citations=None, text="I'll help you navigate this Negotiation Game. Let me first analyze your resources and their values to understand what would be most beneficial to trade.", type='text'), ToolUseBlock(id='toolu_01L6erW5oRhUHNDAQeDq4hbt', input={'code': '# Current resources and their values\nresources = {\n    "Wheat": {"qty": 10, "value": 6},\n    "Wood": {"qty": 25, "value": 10},\n    "Sheep": {"qty": 19, "value": 18},\n    "Brick": {"qty": 5, "value": 29},\n    "Ore": {"qty": 20, "value": 35}\n}\n\n# Calculate total value of current resources\ntotal_value = sum(res["qty"] * res["value"] for r

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01SNLgjF3JzGZ8THp6vSQuwZ', content=[TextBlock(citations=None, text="I'll analyze the game state and help you make strategic decisions based on your current resources.", type='text'), ToolUseBlock(id='toolu_012GzLiHP6N6CLNXCV5xkk5V', input={}, name='run_code', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='max_tokens', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1256, output_tokens=1000))
Tool called: run_code
Tool input: {}
Tool result: {"error": "MCP error -32602: Invalid code interpreter arguments"}
Response: Messa

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01NgJLjbK8M8GnquQDYRYenh', content=[TextBlock(citations=None, text='I need to analyze my current resources and values after the trade to determine my next strategic move.', type='text'), ToolUseBlock(id='toolu_0152GTji72AMDFWQkqXHozEw', input={'code': '# Initial resources\nresources = {\n    \'Wheat\': {\'qty\': 10, \'value\': 6},\n    \'Wood\': {\'qty\': 25, \'value\': 10},\n    \'Sheep\': {\'qty\': 19, \'value\': 18},\n    \'Brick\': {\'qty\': 5, \'value\': 29},\n    \'Ore\': {\'qty\': 20, \'value\': 35}\n}\n\n# Process the trade (2 Ore -> 3 Brick, 1 Wheat)\nresources[\'Ore\'][\'qty\'] -= 2\nresources[\'Brick\'][\'qty\'] += 3\

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01EvQc9wXTFpSjHaK266Tcvv', content=[TextBlock(citations=None, text="I'll analyze your current position after the latest successful trade and recommend a strategic move.", type='text'), ToolUseBlock(id='toolu_011TBHC93Ye8PNpJoPSauXVL', input={'code': '# Calculate current resources after the trade\n# Initial resources\nwheat = 11\nwood = 25\nsheep = 19\nbrick = 8\nore = 18\n\n# Values per unit\nwheat_value = 6\nwood_value = 10\nsheep_value = 18\nbrick_value = 29\nore_value = 35\n\n# After trade: gave 5 Wood, 3 Wheat, received 2 Brick\nwheat_after = wheat - 3\nwood_after = wood - 5\nbrick_after = brick + 2\n\n# Calculate new totals

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
