In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.driver.service import DriverService
from src.state_manager.state import StateManager
from src.actions.browser_actions import BrowserActions
from src.agent_interface.planing_agent import PlaningAgent
from src.actions.browser_actions import ActionResult
from src.state_manager.utils import save_formatted_html

import os
from datetime import datetime

In [3]:
driver = DriverService().get_driver()
state_manager = StateManager(driver)
actions = BrowserActions(driver)

In [4]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
run_folder = f'temp/run_{timestamp}'
if not os.path.exists(run_folder):
    os.makedirs(run_folder)

In [5]:
task = 'Go to kayak.com and find a flight from Zürich to (ask the user for the destination) on 2025-04-25 with return on 2025-06-05 for 2 people.'

In [6]:
print(actions.get_default_actions())

{'ask_user': 'text: string', 'send_user_text': 'text: string', 'search_google': 'text: string', 'go_to_url': 'url: string', 'done': '', 'go_back': '', 'click': 'id: int', 'input': 'id: int, text: string', 'nothing': ''}


In [7]:
agent = PlaningAgent(default_actions=actions.get_default_actions(), model="gpt-4o")

In [8]:
agent.add_user_prompt(f'Your task is: {task}', after_system=True)

In [11]:
url_history = []
output = ActionResult()

In [12]:
i = 0

In [13]:
previous_state = None

### Repeat the following block!

---

In [14]:
current_state = state_manager.get_current_state(run_folder=run_folder, step=i)

In [15]:
state_manager.add_change_info(current_state=current_state, previous_state=previous_state)

PageState(current_url='data:,', page_title='', dom_content=ProcessedDomContent(items=[], selector_map={}), screenshot='temp/run_20250511_121915/window_screenshot_0.png')

In [16]:
save_formatted_html(driver.page_source, f'{run_folder}/html_{i}.html')

In [17]:
url_history.append(driver.current_url)

In [18]:
elem_text = current_state.dom_content.dom_items_to_string()

In [19]:
text = f'Elements:\n{elem_text}\nUrl history: {url_history}'

In [20]:
print(text)

Elements:

Url history: ['data:,']


In [21]:
if output.user_input:
    agent.add_user_prompt(output.user_input)
if output.error:
    text += f', Previous action error: {output.error}'
action = agent.chat(
    text, 
    store_conversation=f'{run_folder}/conversation_{i}.txt',
    image=current_state.screenshot
)


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - Incorrect API key provided: sk-proj-********************************************************************************************************************************************************KaMA. You can find your API key at https://platform.openai.com/account/api-keys.

In [217]:
action

Action(valuation_previous_goal='Failed to select the Bali destination option.', goal="Select 'Denpasar, Bali, Indonesia' as the destination.", action='click', params=ActionParams(url=None, id=42, text=None))

In [218]:
output = actions.execute_action(action, current_state.dom_content.selector_map)

{
    "valuation_previous_goal": "Failed to select the Bali destination option.",
    "goal": "Select 'Denpasar, Bali, Indonesia' as the destination.",
    "action": "click",
    "params": {
        "url": null,
        "id": 42,
        "text": null
    }
}


In [219]:
if output.done:
    print('Task completed')
    driver.quit()

In [220]:
i += 1

In [221]:
previous_state = current_state

---

In [222]:
driver.quit()