Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion browsergym/core/src/browsergym/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def _get_obs(self):
logger.warning(
f"An error occured while extracting the dom and axtree. Retrying ({retries_left}/{EXTRACT_OBS_MAX_TRIES} tries left).\n{repr(e)}"
)
# post-extract cleanup (aria-roledescription attribute)
# post-extract cleanup (ARIA attributes)
_post_extract(self.page)
time.sleep(0.5)
continue
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,8 @@
/**
* Go through all DOM elements in the frame (including shadowDOMs), give them unique browsergym
* identifiers (bid), and store custom data in the aria-roledescription attribute.
* identifiers (bid), and store custom data in ARIA attributes.
*/
async ([parent_bid, bid_attr_name]) => {

// standard html tags
// https://www.w3schools.com/tags/
const html_tags = new Set([
"a", "abbr", "acronym", "address", "applet", "area", "article", "aside", "audio",
"b", "base", "basefont", "bdi", "bdo", "big", "blockquote", "body", "br", "button",
"canvas", "caption", "center", "cite", "code", "col", "colgroup", "data", "datalist",
"dd", "del", "details", "dfn", "dialog", "dir", "div", "dl", "dt", "em", "embed",
"fieldset", "figcaption", "figure", "font", "footer", "form", "frame", "frameset",
"h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "i",
"iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "link", "main",
"map", "mark", "menu", "meta", "meter", "nav", "noframes", "noscript", "object",
"ol", "optgroup", "option", "output", "p", "param", "picture", "pre", "progress",
"q", "rp", "rt", "ruby", "s", "samp", "script", "search", "section", "select",
"small", "source", "span", "strike", "strong", "style", "sub", "summary", "sup",
"svg", "table", "tbody", "td", "template", "textarea", "tfoot", "th", "thead",
"time", "title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr"
]);
const set_of_marks_tags = new Set([
"input", "textarea", "select", "button", "a", "iframe", "video", "li", "td", "option"
]);
Expand Down Expand Up @@ -69,11 +51,6 @@ async ([parent_bid, bid_attr_name]) => {
);
}
i++;
// we will mark only standard HTML tags
if (!elem.tagName || !html_tags.has(elem.tagName.toLowerCase())) {
// Skipping element
continue; // stop and move on to the next element
}
// Processing element
// register intersection callback on element, and keep track of element for waiting later
elem.setAttribute('browsergym_visibility_ratio', 0);
Expand Down Expand Up @@ -132,15 +109,11 @@ async ([parent_bid, bid_attr_name]) => {
}
all_bids.add(elem_global_bid);

// Hack: store custom data inside the aria-roledescription attribute (will be available in DOM and AXTree)
// Hack: store custom data inside ARIA attributes (will be available in DOM and AXTree)
// - elem_global_bid: global element identifier (unique over multiple frames)
// TODO: add more data if needed (x, y coordinates, bounding box, is_visible, is_clickable etc.)
let original_content = "";
if (elem.hasAttribute("aria-roledescription")) {
original_content = elem.getAttribute("aria-roledescription");
}
let new_content = `${elem_global_bid}_${original_content}`
elem.setAttribute("aria-roledescription", new_content);
push_bid_to_attribute(elem_global_bid, elem, "aria-roledescription");
push_bid_to_attribute(elem_global_bid, elem, "aria-description"); // fallback for generic nodes

// set-of-marks flag (He et al. 2024)
// https://github.com/MinorJerry/WebVoyager/blob/main/utils.py
Expand Down Expand Up @@ -229,6 +202,15 @@ function whoCapturesCenterClick(element){
}
}

function push_bid_to_attribute(bid, elem, attr){
let original_content = "";
if (elem.hasAttribute(attr)) {
original_content = elem.getAttribute(attr);
}
let new_content = `browsergym_id_${bid} ${original_content}`
elem.setAttribute(attr, new_content);
}

function elementFromPoint(x, y) {
let dom = document;
let last_elem = null;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* Go through all DOM elements in the frame (including shadowDOMs),
* and cleanup previously stored data in the aria-roledescription attribute.
* and cleanup previously stored data in ARIA attributes.
*/
() => {
// get all DOM elements in the current frame (does not include elements in shadowDOMs)
Expand All @@ -18,23 +18,23 @@
);
}
i++;
// Hack: remove custom data stored inside the aria-roledescription tag
// Hack: remove custom data stored in ARIA attributes
// - elem_global_id: global browsergym identifier
if (elem.hasAttribute("aria-roledescription")) {
let content = elem.getAttribute("aria-roledescription");
// TODO: handle more data if needed
let n_data_items = 1; // bid
let post_data_index = 0;
for (let j = 0 ; j < n_data_items ; j++) {
post_data_index = content.indexOf("_", post_data_index) + 1;
}
original_content = content.substring(post_data_index);
if (original_content) {
elem.setAttribute("aria-roledescription", original_content);
}
else {
elem.removeAttribute("aria-roledescription");
}
pop_bid_from_attribute(elem, "aria-description");
pop_bid_from_attribute(elem, "aria-roledescription"); // fallback for generic nodes
}
}

function pop_bid_from_attribute(elem, attr) {
let bid_regex = /^browsergym_id[^\s]*\s/;
if (elem.hasAttribute(attr)) {
let content = elem.getAttribute(attr);
let original_content = content.replace(bid_regex, '');
if (original_content) {
elem.setAttribute(attr, original_content);
}
else {
elem.removeAttribute(attr);
}
}
}
123 changes: 65 additions & 58 deletions browsergym/core/src/browsergym/core/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,24 +130,20 @@ def extract_screenshot(page: playwright.sync_api.Page):
return img


# TODO: handle more data items if needed
# we could handle more data items here if needed
__BID_EXPR = r"([a-z0-9]+)"
__FLOAT_EXPR = r"([+-]?(?:[0-9]*[.])?[0-9]+)"
__BOOL_EXPR = r"([01])"
# bid, bbox_left, bbox_top, center_x, center_y, bbox_right, bbox_bottom, is_in_viewport
__DATA_REGEXP = re.compile(__BID_EXPR + r"_" + r"(.*)")
__DATA_REGEXP = re.compile(r"^browsergym_id_" + __BID_EXPR + r"\s?" + r"(.*)")


def extract_data_items_from_aria(string):
def extract_data_items_from_aria(string: str, with_warning: bool = True):
"""
Utility function to extract temporary data stored in the "aria-roledescription" attribute of a node
Utility function to extract temporary data stored in the ARIA attributes of a node
"""

match = __DATA_REGEXP.fullmatch(string)
if not match:
logger.warning(
f'Data items could not be extracted from "aria-roledescription" attribute: {string}'
)
if with_warning:
logger.warning(f"Failed to extract BrowserGym data from ARIA string: {repr(string)}")
return [], string

groups = match.groups()
Expand All @@ -171,7 +167,7 @@ def extract_dom_snapshot(
computed_styles: whitelist of computed styles to return.
include_dom_rects: whether to include DOM rectangles (offsetRects, clientRects, scrollRects) in the snapshot.
include_paint_order: whether to include paint orders in the snapshot.
temp_data_cleanup: whether to clean up the temporary data stored in the "aria-roledescription" attribute.
temp_data_cleanup: whether to clean up the temporary data stored in the ARIA attributes.

Returns:
A document snapshot, including the full DOM tree of the root node (including iframes,
Expand All @@ -191,43 +187,50 @@ def extract_dom_snapshot(
)
cdp.detach()

# if requested, remove temporary data stored in the "aria-roledescription" attribute of each node
# if requested, remove temporary data stored in the ARIA attributes of each node
if temp_data_cleanup:
try:
target_attr_name_id = dom_snapshot["strings"].index("aria-roledescription")
except ValueError:
target_attr_name_id = -1
# run the cleanup only if the "aria-roledescription" string is present
if target_attr_name_id > -1:
processed_string_ids = set()
for document in dom_snapshot["documents"]:
for node_attributes in document["nodes"]["attributes"]:
i = 0
# find the "aria-roledescription" attribute, if any
for i in range(0, len(node_attributes), 2):
attr_name_id = node_attributes[i]
attr_value_id = node_attributes[i + 1]
if attr_name_id == target_attr_name_id:
attr_value = dom_snapshot["strings"][attr_value_id]
# remove any data stored in the "aria-roledescription" attribute
if attr_value_id not in processed_string_ids:
_, new_attr_value = extract_data_items_from_aria(attr_value)
dom_snapshot["strings"][
attr_value_id
] = new_attr_value # update the string in the metadata
processed_string_ids.add(
attr_value_id
) # mark string as processed (in case several "aria-roledescription" attributes share the same value string)
attr_value = new_attr_value
# remove "aria-roledescription" attribute (name and value) if empty
if attr_value == "":
del node_attributes[i : i + 2]
# once "aria-roledescription" is found, exit the search
break
pop_bids_from_attribute(dom_snapshot, "aria-roledescription")
pop_bids_from_attribute(dom_snapshot, "aria-description")

return dom_snapshot


def pop_bids_from_attribute(dom_snapshot, attr: str):
try:
target_attr_name_id = dom_snapshot["strings"].index(attr)
except ValueError:
target_attr_name_id = -1
# run the cleanup only if the target attribute string is present
if target_attr_name_id > -1:
processed_string_ids = set()
for document in dom_snapshot["documents"]:
for node_attributes in document["nodes"]["attributes"]:
i = 0
# find the target attribute, if any
for i in range(0, len(node_attributes), 2):
attr_name_id = node_attributes[i]
attr_value_id = node_attributes[i + 1]
if attr_name_id == target_attr_name_id:
attr_value = dom_snapshot["strings"][attr_value_id]
# remove any data stored in the target attribute
if attr_value_id not in processed_string_ids:
_, new_attr_value = extract_data_items_from_aria(
attr_value, with_warning=False
)
dom_snapshot["strings"][
attr_value_id
] = new_attr_value # update the string in the metadata
processed_string_ids.add(
attr_value_id
) # mark string as processed (in case several nodes share the same target attribute string value)
attr_value = new_attr_value
# remove target attribute (name and value) if empty
if attr_value == "":
del node_attributes[i : i + 2]
# once target attribute is found, exit the search
break


def extract_dom_extra_properties(dom_snapshot):
def to_string(idx):
if idx == -1:
Expand Down Expand Up @@ -433,30 +436,34 @@ def extract_all_frame_axtrees(page: playwright.sync_api.Page):

cdp.detach()

# extract browsergym properties (bids, coordinates, etc.) from the "roledescription" property ("aria-roledescription" attribute)
# extract browsergym data from ARIA attributes
for ax_tree in frame_axtrees.values():
for node in ax_tree["nodes"]:
# look for the "roledescription" property
data_items = []
# look for data in the node's "roledescription" property
if "properties" in node:
for i, prop in enumerate(node["properties"]):
if prop["name"] == "roledescription":
data_items, new_value = extract_data_items_from_aria(prop["value"]["value"])
prop["value"]["value"] = new_value
# remove the "roledescription" property if empty
# remove the "description" property if empty
if new_value == "":
del node["properties"][i]
# add all extracted "browsergym" properties to the AXTree
if data_items:
(browsergym_id,) = data_items
node["properties"].append(
{
"name": "browsergym_id",
"value": {
"type": "string",
"value": browsergym_id,
},
}
)
break
# look for data in the node's "description" (fallback plan)
if "description" in node:
data_items_bis, new_value = extract_data_items_from_aria(
node["description"]["value"]
)
node["description"]["value"] = new_value
if new_value == "":
del node["description"]
if not data_items:
data_items = data_items_bis
# add the extracted "browsergym" data to the AXTree
if data_items:
(browsergym_id,) = data_items
node["browsergym_id"] = browsergym_id
return frame_axtrees


Expand Down
Loading