# Code Interpreter + Browser tool to automate web interactions

Automates browser tasks starting from a given URL based on natural language instructions.
(A Simple Nova Alternative)

Basic Flow:
1. Create a custom `Agent Core Code Interpreter` with Access to `Agent Core Browser`
1. Have an AI/Agent generate code to run on a `Playwright Page` Object based on natural language
2. Run the code in the `Code Interpreter` which will invoke the `Agent Core Browser` to interact with the website 

For more details, please refer to my article: [Bedrock AgentCore Code Interpreter + Browser. A Simple Nova-Act Alternative?](https://medium.com/@itsuki.enjoy/bedrock-agentcore-code-interpreter-browser-a-simple-nova-act-alternative-e8ab00348159)

In [None]:
import json
import boto3
from bedrock_agentcore.tools.code_interpreter_client import CodeInterpreter
from agents import set_default_openai_key, Agent, Runner, function_tool

OPENAI_API_KEY="sk-..."
set_default_openai_key(OPENAI_API_KEY)

code_client = CodeInterpreter('us-east-1')
region="us-east-1"
role_name = "InterpreterBrowserExecutionRole"
policy_name = "BedrockAgentCoreBrowserSessionAccess"


## Code Interpreter Execution Role

Permission to bedrock-agentcore (at least browser-related actions) is required.

In [16]:
iam_client = boto3.client('iam')

# code interpreter Execution role
response = iam_client.create_role(
    RoleName=role_name,
    AssumeRolePolicyDocument=json.dumps({
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "bedrock-agentcore.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
            }
        ]
    }),
)
role_arn = response["Role"]["Arn"]
print(f"role created: {role_arn}")

# Add policy to browser
iam_client.put_role_policy(
    RoleName=role_name,
    PolicyName=policy_name,
    PolicyDocument=json.dumps({
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "bedrock-agentcore:*",
                ],
                "Resource": "*"
            }
        ]
    })
)

role created: arn:aws:iam::075198889659:role/InterpreterBrowserExecutionRole


{'ResponseMetadata': {'RequestId': 'c631931a-7177-42fb-891a-4525fbb14855',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Wed, 06 Aug 2025 04:53:45 GMT',
   'x-amzn-requestid': 'c631931a-7177-42fb-891a-4525fbb14855',
   'content-type': 'text/xml',
   'content-length': '206'},
  'RetryAttempts': 0}}

## Create custom code interpreter and browser

Code interpreter has to be `PUBLIC` to be able to install necessary libraries and invoke other AWS Resources.

Custom Browser is optional.

In [None]:
control_client = boto3.client(
    'bedrock-agentcore-control',
    region_name=region,
    endpoint_url=f"https://bedrock-agentcore-control.{region}.amazonaws.com"
)

# Code interpreter
# might need to wait for couple seconds for the role to get ready
response = control_client.create_code_interpreter(
    name="code_interpreter_with_browser_access",
    executionRoleArn=role_arn,
    networkConfiguration={
        "networkMode": "PUBLIC" # allow agent to operate in a public environment and have internet access
    }
)
code_interpreter_id = response["codeInterpreterId"]
print(f"code interpreter created: {code_interpreter_id}")

response = control_client.create_browser(
    name="browser_tool_for_code_interpreter",
    networkConfiguration={
        "networkMode": "PUBLIC" # required to access any aws resources other than s3
    },
)
browser_id = response["browserId"]
print(f"browser created: {browser_id}")

code interpreter created: code_interpreter_with_browser_access-U6cWG0lc5e
browser created: browser_tool_for_code_interpreter-hKMZcxkUfS


## Agent Configuration

### Define Function Tool For Browser Automation

In [19]:
@function_tool
def browser_automation_tool(starting_url: str, code: str) -> str:
    """
    Automates browser tasks starting from a given URL with the code to execute on a Playwright `Page` object.

    Args:
        starting_url (str): The initial URL to open in the browser.
        code (str): Python Code to execute on the Playwright `Page` object in an asynchronous context.

    Returns:
        str: The result of the code execution.

    Example:
        If user wants to get the title for a website, the input should be the following.
        - starting_url: site's url
        - code:
        ```
        print(f"title: {await page.title()}")
        ```
    """

    code_client.start(
        identifier=code_interpreter_id,
        session_timeout_seconds=3600
    )
    code_client.invoke("executeCommand", {
        "command": "pip install boto3 playwright bedrock_agentcore",
    })

    response = None
    error = None
    try:
        response = code_client.invoke("executeCode", {
            "language": "python",
    "code": f"""
from bedrock_agentcore.tools.browser_client import BrowserClient
from playwright.async_api import async_playwright, BrowserType
import base64

browser_client = BrowserClient(region='{region}')
session_id = browser_client.start(
    identifier='{browser_id}'
)
ws_url, headers = browser_client.generate_ws_headers()

async with async_playwright() as playwright:
    chromium: BrowserType = playwright.chromium
    browser = await chromium.connect_over_cdp(
        endpoint_url=ws_url,
        headers=headers
    )
    page = await browser.new_page()

    try:
        await page.goto(f"{starting_url}")
        {code}

    except Exception as e:
        print(f"Error running page command: {{e}}")

    finally:
        await page.close()
        await browser.close()
        browser_client.stop()
    """
        })

    except Exception as e:
        print(f"Error executing: {e}")
        error = e

    finally:
        code_client.stop()

    if response == None:
        return f"Error executing: {error}"
    else :
        return "\n".join([json.dumps(event["result"], indent=2) for event in response["stream"]])


##### Function Tool Test

In [20]:
url = "https://medium.com/@itsuki.enjoy"
code = """print(f"title: {await page.title()}")"""
argument = {
    "starting_url": "https://medium.com/@itsuki.enjoy",
    "code": """print(f"title: {await page.title()}")"""
}
result = await browser_automation_tool.on_invoke_tool(None, json.dumps(argument))
print(result)


{
  "content": [
    {
      "type": "text",
      "text": "title: Itsuki \u2013 Medium"
    }
  ],
  "structuredContent": {
    "stdout": "title: Itsuki \u2013 Medium",
    "stderr": "",
    "exitCode": 0,
    "executionTime": 8.789749145507812
  },
  "isError": false
}


### Define Agent

In [None]:
browser_assistant = Agent(
    name="Browser Assitant",
    instructions="""
You are a helpful AI assistant that helps user to perform browser tasks using `browser_automation_tool` based on natural language instructions.

Your job is to convert the natural language instructions to *PYTHON* code that will be executed asynchronously on a Playwright `Page` object (variable name: `page`).
- For information you would like to get back from the code execution, use the `print` statement.
- If the user ask for data-based information, for example, an image, convert the bytes/buffer to base64 string and print it out.


Following libraries are imported by default:
- base64
- async_playwright, BrowserType from playwright.async_api
If you are using other SDKs, make sure to import those inline as necessary.


For Example, if the user ask you to get the title and a screenshot of the website `https://medium.com/@itsuki.enjoy`, your input to the function should be:
- `starting_url`: `https://medium.com/@itsuki.enjoy`
- `code`: `print(f"title: {await page.title()}");screenshot_bytes = await page.screenshot();print(base64.b64encode(screenshot_bytes).decode())`

Important Points to pay attention to:
- the indentation of the code.
- CSS.escape the characters when using query selectors
- If you are getting an error while using the tool, adjust the code based on the error and retry again.
    """,
    model="gpt-4o-mini",
    tools=[browser_automation_tool],
)

### Execution Example

In [23]:
result = await Runner.run(browser_assistant, """
Get the top 1 current market gainer from Yahoo Finance.
""", max_turns=20)

In [24]:
print(result.final_output)

The top current market gainer from Yahoo Finance is:

**Symbol:** XMTR  
**Company:** Xometry, Inc.  
**Current Price:** $44.28  
**Change:** +$13.31 (+42.98%)  
**Market Cap:** 2.24B  
**Volume:** 745,880  
**Previous Close:** --  
**Percentage Change:** +288.76%  


In [25]:
# tool uses
raw = result.raw_responses
for response in raw:
    inputs = response.to_input_items()
    for input in inputs:
        print(f"input type: {input["type"]}")

        if input["type"] == "function_call":
            print(f"name: {input["name"]}")
            print(f"arguments: {input["arguments"]}")
            print(f"status: {input["status"]}")


        if input["type"] == "function_call_output":
            print(f"output: {input["output"]}")
            print(f"status: {input["status"]}")

        if input["type"] == "message":
            print(f"role: {input["role"]}")
            print(f"content: {input["content"]}")
            print(f"status: {input["status"]}")


        print("------------------")

input type: function_call
name: browser_automation_tool
arguments: {"starting_url":"https://finance.yahoo.com/gainers","code":"await page.wait_for_timeout(2000);top_gainer = await page.query_selector('table tbody tr:first-child');gainer_data = await top_gainer.inner_text();print(gainer_data)"}
status: completed
------------------
input type: message
role: assistant
content: [{'annotations': [], 'text': 'The top current market gainer from Yahoo Finance is:\n\n**Symbol:** XMTR  \n**Company:** Xometry, Inc.  \n**Current Price:** $44.28  \n**Change:** +$13.31 (+42.98%)  \n**Market Cap:** 2.24B  \n**Volume:** 745,880  \n**Previous Close:** --  \n**Percentage Change:** +288.76%  ', 'type': 'output_text', 'logprobs': []}]
status: completed
------------------


## Clean Up

In [26]:
agent_core = boto3.client(
    'bedrock-agentcore',
    region_name=region,
    endpoint_url=f"https://bedrock-agentcore.{region}.amazonaws.com"
)

# Code interpreter
response = agent_core.list_code_interpreter_sessions(
    codeInterpreterIdentifier=code_interpreter_id,
    status="READY" # 'READY'|'TERMINATED'
)

# stop all the running sessions
for item in response["items"]:
    agent_core.stop_code_interpreter_session(
        codeInterpreterIdentifier=code_interpreter_id,
        sessionId=item["sessionId"]
    )

_ = control_client.delete_code_interpreter(
    codeInterpreterId = code_interpreter_id
)

# Browser
response = agent_core.list_browser_sessions(
    browserIdentifier=browser_id,
    status="READY" # 'READY'|'TERMINATED'
)

# stop all the running sessions
for item in response["items"]:
    agent_core.stop_browser_session(
        browserIdentifier=browser_id,
        sessionId=item["sessionId"]
    )

_ = control_client.delete_browser(
    browserId = browser_id
)

# delete role policy and role
iam_client.delete_role_policy(
    RoleName=role_name,
    PolicyName=policy_name
)

iam_client.delete_role(
    RoleName = role_name
)
print("Deleted!")

Deleted!
