remove id from select.option and fix bugs

Skyvern-AI · Jun 3, 2024 · 4ffca62 · 4ffca62
1 parent c5dfb92
commit 4ffca62
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 24 deletions.
diff --git a/skyvern/forge/prompts/skyvern/extract-action.j2 b/skyvern/forge/prompts/skyvern/extract-action.j2
@@ -21,7 +21,6 @@ Reply in JSON format with the following keys:
             "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
             "index": int, // the index corresponding to the option index under the select element.
             "value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
-            "id": str // the id of the option.
         },
 {% if error_code_mapping_str %}
         "errors": array // A list of errors. This is used to surface any errors that matches the current situation for COMPLETE and TERMINATE actions. For other actions or if no error description suits the current situation on the screenshots, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.

diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py
@@ -94,10 +94,9 @@ class SelectOption(BaseModel):
     label: str | None
     value: str | None
     index: int | None
-    id: str | None
 
     def __repr__(self) -> str:
-        return f"SelectOption(label={self.label}, value={self.value}, index={self.index}, id={self.id})"
+        return f"SelectOption(label={self.label}, value={self.value}, index={self.index}"
 
 
 class SelectOptionAction(WebAction):

diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py
@@ -8,7 +8,7 @@
 from deprecation import deprecated
 from playwright.async_api import Locator, Page
 
-from skyvern.constants import REPO_ROOT_DIR
+from skyvern.constants import REPO_ROOT_DIR, SKYVERN_ID_ATTR
 from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound
 from skyvern.forge import app
 from skyvern.forge.prompts import prompt_engine
@@ -35,7 +35,6 @@
 from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
 from skyvern.webeye.browser_factory import BrowserState
 from skyvern.webeye.scraper.scraper import ScrapedPage
-from skyvern.constants import SKYVERN_ID_ATTR
 
 LOG = structlog.get_logger()
 TEXT_INPUT_DELAY = 10  # 10ms between each character input
@@ -176,7 +175,7 @@ async def handle_click_action(
             num_downloaded_files_before=num_downloaded_files_before,
             download_dir=download_dir,
         )
-    xpath = await validate_actions_in_dom(action, page, scraped_page)
+    xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
     await asyncio.sleep(0.3)
     if action.download:
         results = await handle_click_to_download_file_action(action, page, scraped_page)
@@ -186,6 +185,7 @@ async def handle_click_action(
             page,
             action,
             xpath,
+            frame,
             timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
         )
 
@@ -285,7 +285,6 @@ async def handle_upload_file_action(
     if is_file_input:
         LOG.info("Taking UploadFileAction. Found file input tag", action=action)
         if file_path:
-
             locator = resolve_locator(page, frame, xpath)
 
             await locator.set_input_files(
@@ -551,8 +550,7 @@ async def handle_select_option_action(
 
 
 async def handle_checkbox_action(
-    self: actions.CheckboxAction,
-    action: Action,
+    action: actions.CheckboxAction,
     page: Page,
     scraped_page: ScrapedPage,
     task: Task,
@@ -565,11 +563,11 @@ async def handle_checkbox_action(
     Treating checkbox actions as click actions seem to perform way more reliably
     Developers who tried this and failed: 2 (Suchintan and Shu 😂)
     """
-    xpath, frame = await validate_actions_in_dom(self, page, scraped_page)
+    xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
 
     locator = resolve_locator(page, frame, xpath)
 
-    if self.is_checked:
+    if action.is_checked:
         await locator.check(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
     else:
         await locator.uncheck(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
@@ -649,7 +647,7 @@ def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) -> Any:
     return secret_value if secret_value is not None else parameter
 
 
-async def validate_actions_in_dom(action: WebAction, page: Page, scraped_page: ScrapedPage) -> (str, str):
+async def validate_actions_in_dom(action: WebAction, page: Page, scraped_page: ScrapedPage) -> tuple[str, str]:
     xpath = scraped_page.id_to_xpath_dict[action.element_id]
     frame = scraped_page.id_to_frame_dict[action.element_id]
 
@@ -976,7 +974,7 @@ async def click_listbox_option(
 
 
 def resolve_locator(page: Page, frame: str, xpath: str) -> Locator:
-    if frame == 'main':
+    if frame == "main":
         return page.locator(f"xpath={xpath}")
 
-    return page.frame_locator(f"[{SKYVERN_ID_ATTR}='{frame}']").locator(f"xpath={xpath}")
+    return page.frame_locator(f"[{SKYVERN_ID_ATTR}='{frame}']").locator(f"xpath={xpath}")
diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py
@@ -6,7 +6,7 @@
 from typing import Any
 
 import structlog
-from playwright.async_api import Page, Frame
+from playwright.async_api import Frame, Page
 from pydantic import BaseModel
 
 from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR
@@ -92,7 +92,7 @@ def json_to_html(element: dict) -> str:
     children_html = "".join(json_to_html(child) for child in element.get("children", []))
     # build option HTML
     option_html = "".join(
-        f'<option index="{option.get("optionIndex")}" id="{option.get("id")}">{option.get("text")}</option>'
+        f'<option index="{option.get("optionIndex")}">{option.get("text")}</option>'
         for option in element.get("options", [])
     )
 
@@ -187,6 +187,7 @@ async def scrape_website(
             num_retry=num_retry,
         )
 
+
 async def get_frame_text(iframe: Frame) -> str:
     """
     Get all the visible text in the iframe.
@@ -197,8 +198,12 @@ async def get_frame_text(iframe: Frame) -> str:
 
     try:
         text = await iframe.evaluate(js_script)
-    except:
-        return ''
+    except Exception:
+        LOG.warning(
+            "failed to get text from iframe",
+            exc_info=True,
+        )
+        return ""
 
     for child_frame in iframe.child_frames:
         if child_frame.is_detached():
@@ -208,6 +213,7 @@ async def get_frame_text(iframe: Frame) -> str:
 
     return text
 
+
 async def scrape_web_unsafe(
     browser_state: BrowserState,
     url: str,
@@ -308,10 +314,14 @@ async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[di
 
         try:
             frame_element = await frame.frame_element()
-        except:
+        except Exception:
+            LOG.warning(
+                "Unable to get frame_element",
+                exc_info=True,
+            )
             continue
 
-        unique_id = await frame_element.get_attribute('unique_id')
+        unique_id = await frame_element.get_attribute("unique_id")
 
         frame_js_script = f"() => buildTreeFromBody('{unique_id}')"
 
@@ -321,12 +331,12 @@ async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[di
         elements = elements + frame_elements
 
         for element in elements:
-            if element['id'] == unique_id:
-                element['children'] = frame_elements
+            if element["id"] == unique_id:
+                element["children"] = frame_elements
 
         for element_tree_item in element_tree:
-            if element_tree_item['id'] == unique_id:
-                element_tree_item['children'] = frame_element_tree
+            if element_tree_item["id"] == unique_id:
+                element_tree_item["children"] = frame_element_tree
 
     return elements, element_tree