<a href="https://colab.research.google.com/github/DataSavvyYT/experiments/blob/main/playwright/0_playwright_openai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!playwright install

In [None]:
!playwright install-deps

In [None]:
import sys
!pip -q install playwright
!playwright install # Added to install browser binaries
# Let's import the basic requirements
from playwright.async_api import async_playwright # Changed to async_playwright
from openai import OpenAI

In [None]:
import os
import json
import asyncio # Added asyncio
from openai import OpenAI
from playwright.async_api import async_playwright # Changed to async_playwright

In [None]:
from google.colab import userdata
OPENAI_API_KEY=userdata.get('OPENAI_API_KEY')

In [None]:
# Define the tool schema for the browsing function
browse_tool = {
    "type": "function",
    "function": {
        "name": "browse_website",
        "description": "Use Playwright to browse a website and perform a series of tasks such as navigating, clicking, typing, or extracting text.",
        "parameters": {
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The initial URL to navigate to."
                },
                "actions": {
                    "type": "array",
                    "description": "A list of actions to perform on the page.",
                    "items": {
                        "type": "object",
                        "properties": {
                            "action_type": {
                                "type": "string",
                                "enum": ["navigate", "click", "type", "extract_text"],
                                "description": "The type of action: navigate (to a new URL), click (on a selector), type (text into a selector), extract_text (get text from a selector)."
                            },
                            "selector": {
                                "type": "string",
                                "description": "The CSS selector for click, type, or extract_text actions."
                            },
                            "text": {
                                "type": "string",
                                "description": "The text to type for 'type' actions."
                            },
                            "url": {
                                "type": "string",
                                "description": "The URL for 'navigate' actions."
                            }
                        },
                        "required": ["action_type"]
                    }
                }
            },
            "required": ["url", "actions"]
        }
    }
}

In [None]:
# Create the OpenAI Assistant
assistant = client.beta.assistants.create(
    name="Browser Agent",
    instructions="You are a helpful agent that can browse websites using the provided tool to perform tasks like searching for information or extracting content. Analyze the user's request and determine the necessary URL and sequence of actions.",
    tools=[browse_tool],
    model="gpt-4o-mini"  # Use a capable model like gpt-4o
)

In [None]:
# Function to execute the browsing tool using Playwright
async def execute_browse_tool(url, actions): # Made function async
    results = []
    async with async_playwright() as p: # Used async with async_playwright
        browser = await p.chromium.launch(headless=True)  # Added await
        page = await browser.new_page()
        await page.goto(url) # Added await

        for action in actions:
            action_type = action.get("action_type")
            if action_type == "navigate":
                target_url = action.get("url")
                if target_url:
                    await page.goto(target_url) # Added await
                    results.append(f"Navigated to {target_url}")
                else:
                    results.append("Error: No URL provided for navigate action")
            elif action_type == "click":
                selector = action.get("selector")
                if selector:
                    await page.click(selector) # Added await
                    results.append(f"Clicked on {selector}")
                else:
                    results.append("Error: No selector provided for click action")
            elif action_type == "type":
                selector = action.get("selector")
                text = action.get("text")
                if selector and text:
                    await page.fill(selector, text)  # Added await
                    results.append(f"Typed '{text}' into {selector}")
                else:
                    results.append("Error: Missing selector or text for type action")
            elif action_type == "extract_text":
                selector = action.get("selector")
                if selector:
                    text = await page.inner_text(selector) # Added await
                    results.append(f"Extracted text from {selector}: {text[:200]}...")  # Truncate long text
                else:
                    results.append("Error: No selector provided for extract_text action")
            else:
                results.append(f"Unknown action type: {action_type}")

        await browser.close() # Added await

    return "\n".join(results)

In [None]:
# Example usage: Run the assistant with a user query
def run_example(query):
    # Create a thread
    thread = client.beta.threads.create()

    # Add user message
    client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=query
    )

    # Run the assistant
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    # Poll for completion and handle tool calls
    while run.status != "completed":
        run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)

        if run.status == "requires_action":
            tool_calls = run.required_action.submit_tool_outputs.tool_calls
            tool_outputs = []

            for tool_call in tool_calls:
                if tool_call.function.name == "browse_website":
                    args = json.loads(tool_call.function.arguments)
                    url = args["url"]
                    actions = args["actions"]
                    # Call the async execute_browse_tool using asyncio.run()
                    output = asyncio.run(execute_browse_tool(url, actions))
                    tool_outputs.append({
                        "tool_call_id": tool_call.id,
                        "output": output
                    })

            # Submit tool outputs
            client.beta.threads.runs.submit_tool_outputs(
                thread_id=thread.id,
                run_id=run.id,
                tool_outputs=tool_outputs
            )

    # Get the final messages
    messages = client.beta.threads.messages.list(thread_id=thread.id)
    for message in messages.data:
        if message.role == "assistant":
            print("Assistant response:")
            for content in message.content:
                if content.type == "text":
                    print(content.text.value)


In [None]:
# Run an example query that performs a few tasks
# Example: Browse Wikipedia, search for "Playwright", extract the first paragraph

!pip -q install nest_asyncio
import nest_asyncio
nest_asyncio.apply()

run_example("Go to the Wikipedia page for 'harry Potter' and extract the summary paragraph.")