In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.agent_interface.agent import Agent
from src.actions.browser_actions import BrowserActions
from src.actions.action_validator import ActionValidator
from src.state_manager.state import StateManager
from src.utils.selenium_utils import setup_selenium_driver

### Initialize agent, browser actions, state manager and action validator

In [3]:
task = "Apply for 50 jobs for software engineering in Zurich."
extra_context = "{user_resume_and_additional_info}"

In [4]:
agent = Agent()
agent.task = task
agent.context = extra_context

In [5]:
driver = setup_selenium_driver(headless=False)

In [6]:
driver.quit() # for time being

In [7]:
browser_actions = BrowserActions(driver=driver)

In [8]:
statemanager = StateManager(driver=driver)

In [9]:
validator = ActionValidator(driver=driver)

### Get the current state and possible actions 

For example:

In [10]:
state =  {
    "current_url": "string",
    "interactable_elements": [
      {
        "type": "button",
        "identifier": {
          "id": "string",
          "class": "string",
          "name": "string",
          "text": "string"
        }
      },
      {
        "type": "input",
        "identifier": {
          "id": "string",
          "class": "string",
          "name": "string",
          "placeholder": "string"
        }
      }
    ]
}

In [11]:
actions = [
    {
      "type": "search_google",
      "params": {
        "query": "string"
      }
    },
    {
      "type": "go_to_url",
      "params": {
        "url": "string"
      }
    },
    {
      "type": "click_element",
      "params": {
        "identifier": {
          "id": "string",
          "class": "string",
          "name": "string",
          "text": "string"
        }
      }
    }
]

In [12]:
agent.receive_state(state, actions)

In [13]:
vars(agent)

{'current_state': {'current_url': 'string',
  'interactable_elements': [{'type': 'button',
    'identifier': {'id': 'string',
     'class': 'string',
     'name': 'string',
     'text': 'string'}},
   {'type': 'input',
    'identifier': {'id': 'string',
     'class': 'string',
     'name': 'string',
     'placeholder': 'string'}}]},
 'available_actions': [{'type': 'search_google',
   'params': {'query': 'string'}},
  {'type': 'go_to_url', 'params': {'url': 'string'}},
  {'type': 'click_element',
   'params': {'identifier': {'id': 'string',
     'class': 'string',
     'name': 'string',
     'text': 'string'}}}],
 'task': 'Apply for 50 jobs for software engineering in Zurich.',
 'context': '{user_resume_and_additional_info}'}

### llm will decide the next action

In [14]:
llm_action = agent.decide_next_action()
llm_action = { 
    "type": "click_element",
    "params": {
        "identifier": {
            "id": "string",
            "class": "string",
            "name": "string",
            "text": "string"
        }
    }
} # example

In [15]:
llm_action

{'type': 'click_element',
 'params': {'identifier': {'id': 'string',
   'class': 'string',
   'name': 'string',
   'text': 'string'}}}

### perform the action

In [16]:
f = getattr(browser_actions, llm_action["type"])
# f(**llm_action["params"])

### Update the next state and possible actions

In [17]:
# state = statemanager.get_current_state()
# actions = ? (extract them somehow)

### Do validation of taken action

In [18]:
validator.is_action_successful(llm_action)
validator.check_ambiguity(llm_action)

### Repeat the steps by feeding updated state and actions