Skip to content

Commit

Permalink
Add more context to element tree (Skyvern-AI#207)
Browse files Browse the repository at this point in the history
  • Loading branch information
ykeremy committed Apr 18, 2024
1 parent a02bf6d commit 0a49254
Showing 1 changed file with 33 additions and 1 deletion.
34 changes: 33 additions & 1 deletion skyvern/webeye/scraper/domUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -408,13 +408,15 @@ function cleanupText(text) {
).trim();
}

function getElementContext(element) {
function getElementContext(element, existingContext = "") {
// dfs to collect the non unique_id context
let fullContext = "";
if (element.childNodes.length === 0) {
return fullContext;
}
let childContextList = new Array();
// if the element already has a context, then add it to the list first
if (existingContext.length > 0) childContextList.push(existingContext);
for (var child of element.childNodes) {
let childContext = "";
if (child.nodeType === Node.TEXT_NODE) {
Expand Down Expand Up @@ -791,6 +793,36 @@ function buildTreeFromBody() {
if (context && context.length <= 1000) {
element.context = context;
}

// pass element's parent's context to the element for listed tags
let tagsWithDirectParentContext = new Set(["a"]);
// if the element is a child of a td, th, or tr, then pass the grandparent's context to the element
let parentTagsThatDelegateParentContext = new Set(["td", "th", "tr"]);
if (tagsWithDirectParentContext.has(element.tagName)) {
let parentElement = document.querySelector(
`[unique_id="${element.id}"]`,
).parentElement;
if (!parentElement) {
continue;
}
if (
parentTagsThatDelegateParentContext.has(
parentElement.tagName.toLowerCase(),
)
) {
let grandParentElement = parentElement.parentElement;
if (grandParentElement) {
let context = getElementContext(grandParentElement, element.context);
if (context.length > 0) {
element.context = context;
}
}
}
let context = getElementContext(parentElement, element.context);
if (context.length > 0) {
element.context = context;
}
}
}

return [elements, resultArray];
Expand Down

0 comments on commit 0a49254

Please sign in to comment.