In [1]:
%load_ext autoreload
%autoreload 2

In [67]:
from src.driver.service import DriverService
from src.state_manager.state import StateManager
from src.actions.browser_actions import BrowserActions
from src.agent_interface.planing_agent import PlaningAgent
from src.actions.browser_actions import ActionResult
from src.state_manager.utils import save_formatted_html

import os
from datetime import datetime

In [68]:
driver = DriverService().get_driver()
state_manager = StateManager(driver)
actions = BrowserActions(driver)

In [69]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
run_folder = f'temp/run_{timestamp}'
if not os.path.exists(run_folder):
    os.makedirs(run_folder)

In [70]:
task = 'Go to kayak.com and find a flight from Zürich to (ask the user for the destination) on 2025-04-25 with return on 2025-06-05 for 2 people.'

In [71]:
print(actions.get_default_actions())

{'ask_user': 'text: string', 'send_user_text': 'text: string', 'search_google': 'text: string', 'go_to_url': 'url: string', 'done': '', 'go_back': '', 'click': 'id: int', 'input': 'id: int, text: string', 'nothing': ''}


In [72]:
agent = PlaningAgent(default_actions=actions.get_default_actions(), model="gpt-4o")

In [73]:
agent.add_user_prompt(f'Your task is: {task}', after_system=True)

In [74]:
url_history = []
output = ActionResult()

In [75]:
i = 0

In [76]:
previous_state = None

---

In [257]:
current_state = state_manager.get_current_state(run_folder=run_folder, step=i)

In [258]:
state_manager.add_change_info(current_state=current_state, previous_state=previous_state)
state_manager.add_parent_info(current_state=current_state)

In [259]:
save_formatted_html(driver.page_source, f'{run_folder}/html_{i}.html')

In [260]:
url_history.append(driver.current_url)

In [261]:
elem_text = "\n".join([e.get_text() for e in current_state.interactable_elements])
text = f'Elements:\n{elem_text}\nUrl history: {url_history}'

In [262]:
print(text)

Elements:
      6:<div role="button" aria-label="Open main navigation"></div>
      7:<a href="/" aria-label="Go to the kayak homepage"></a>
     49:<div role="button" aria-label=" Round-trip">Round-trip</div>
     50:	<span>Round-trip</span>
     52:<div role="button" aria-label=" ISB, ZRH">From? | ISB, ZRH</div>
     55:	<div role="option" aria-label="ISB, ZRH" aria-selected="false">ISB, ZRH</div>
     56:		<div>ISB, ZRH</div>
     58:	<div role="button" aria-label="Add" aria-hidden="false"></div>
     59:<div role="button" aria-label="Swap departure airport and destination airport"></div>
     60:<div role="button" aria-label=" Denpasar, Bali, Indonesia">To? | Denpasar, Bali, Indonesia</div>
     63:	<div role="option" aria-label="Denpasar, Bali, Indonesia" aria-selected="false">Denpasar, Bali, Indonesia</div>
     64:		<div>Denpasar, Bali, Indonesia</div>
     66:		<div role="button" aria-label="Remove"></div>
     67:			<div role="button"></div>
     68:	<div role="button" aria-la

In [263]:
if output.user_input:
    agent.add_user_prompt(output.user_input)
if output.error:
    text += f', Previous action error: {output.error}'
action = agent.chat(
    text, 
    store_conversation=f'{run_folder}/conversation_{i}.txt',
    image=current_state.screenshot
)

Text $0.0188 + Image $0.0002 = $0.0194 for 7567  tokens


In [264]:
action

Action(valuation_previous_goal='Successfully found flights from Zürich to Bali.', goal='Finish the task and confirm to the user.', action='done', params=None)

In [265]:
output = actions.execute_action(action, current_state.selector_map)

{
    "valuation_previous_goal": "Successfully found flights from Zürich to Bali.",
    "goal": "Finish the task and confirm to the user.",
    "action": "done",
    "params": null
}


In [266]:
if output.done:
    print('Task completed')
    driver.quit()

Task completed


In [267]:
i += 1

In [268]:
previous_state = current_state

---

In [66]:
driver.quit()