In [1]:
import time, json, torch
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from sentence_transformers import SentenceTransformer, util

In [2]:
CHROME_DRIVER_PATH = r"C:\Users\ojasw\Downloads\chromedriver-win64\chromedriver.exe"
DATASET_DIR = Path("dataset")
DATASET_DIR.mkdir(exist_ok=True)

model = SentenceTransformer("all-MiniLM-L6-v2")

In [3]:
from selenium.common.exceptions import StaleElementReferenceException

def capture_state(driver, state_name, catalog, actions, outdir):
    try:
        screenshot_path = outdir / f"{len(catalog)}_{state_name}.png"
        driver.save_screenshot(str(screenshot_path))
        catalog.append({
            "name": state_name,
            "url": driver.current_url,
            "screenshot": str(screenshot_path),
            "timestamp": time.time(),
            "actions_so_far": list(actions)
        })
        print(f"[+] Captured state: {state_name}")
    except Exception as e:
        print(f"[!] Failed to capture state '{state_name}': {e}")

def get_text(el):
    try:
        return (el.text or el.get_attribute("aria-label") or el.get_attribute("title") or "").strip()
    except StaleElementReferenceException:
        print("[!] get_text: Element became stale")
        return ""
    except Exception as e:
        print(f"[!] get_text error: {e}")
        return ""

def find_best_match(elements, target_text, threshold=0.25):
    try:
        texts = [(el, get_text(el)) for el in elements if get_text(el)]
        if not texts:
            return None, 0
        cand_texts = [t for _, t in texts]
        cand_emb = model.encode(cand_texts, convert_to_tensor=True, normalize_embeddings=True)
        target_emb = model.encode([target_text], convert_to_tensor=True, normalize_embeddings=True)
        sims = util.cos_sim(cand_emb, target_emb).cpu().squeeze(1)
        best_idx = torch.argmax(sims).item()
        best_score = sims[best_idx].item()
        if best_score < threshold:
            return None, best_score
        return texts[best_idx][0], best_score
    except StaleElementReferenceException:
        print("[!] find_best_match: Some elements became stale, skipping.")
        return None, 0
    except Exception as e:
        print(f"[!] find_best_match error: {e}")
        return None, 0

def get_clickables(driver, scope=None):
    try:
        if scope is None:
            scope = driver
        return scope.find_elements(By.CSS_SELECTOR, "a, button, [role='button'], [tabindex]")
    except StaleElementReferenceException:
        print("[!] get_clickables: DOM changed, retrying empty list")
        return []
    except Exception as e:
        print(f"[!] get_clickables error: {e}")
        return []

def safe_click(driver, element):
    try:
        driver.execute_script("arguments[0].scrollIntoView({behavior:'smooth',block:'center'});", element)
        time.sleep(0.3)
        element.click()
        return True
    except StaleElementReferenceException:
        print("[!] safe_click: Element became stale before click")
        return False
    except Exception as e:
        print(f"[!] Click failed: {e}")
        return False

def infer_app_type(url: str):
    if "linear.app" in url:
        return "linear"
    elif "notion.so" in url:
        return "notion"
    return "generic"


# ---------- CORE TASK LOGIC ----------
def run_task(driver, task_name, url):
    try:
        app_name = infer_app_type(url)
        safe_name = f"{app_name}_{task_name.replace(' ', '_')}"
        OUTDIR = DATASET_DIR / safe_name
        OUTDIR.mkdir(exist_ok=True)

        catalog, actions = [], []
        driver.get(url)
        time.sleep(3)
        capture_state(driver, "entry_page", catalog, actions, OUTDIR)

        # Locate sidebar (if exists)
        sidebars = driver.find_elements(By.CSS_SELECTOR, "aside, nav, [class*=Sidebar]")
        sidebar = sidebars[0] if sidebars else None
        if sidebar:
            sidebar_clickables = get_clickables(driver, sidebar)
            best_tab, score = find_best_match(sidebar_clickables, task_name)
            if best_tab:
                print(f"[>] Sidebar match: '{get_text(best_tab)}' (score={score:.3f})")
                if safe_click(driver, best_tab):
                    time.sleep(1.5)
                    capture_state(driver, "after_sidebar_click", catalog, actions, OUTDIR)
                    actions.append({"action": "click_sidebar", "label": get_text(best_tab)})

        # Click main page action (e.g. '+ New Project', 'Add Todo')
        page_clickables = get_clickables(driver)
        best_action, score = find_best_match(page_clickables, task_name)
        if best_action:
            print(f"[>] Page action match: '{get_text(best_action)}' (score={score:.3f})")
            if safe_click(driver, best_action):
                time.sleep(2)
                capture_state(driver, "after_action_click", catalog, actions, OUTDIR)
                actions.append({"action": "click_action", "label": get_text(best_action)})

        # Handle modals/forms that appear after click
        modals = driver.find_elements(By.XPATH, "//div[contains(@class,'modal') or contains(@role,'dialog')]")
        for idx, modal in enumerate(modals):
            capture_state(driver, f"modal_{idx}", catalog, actions, OUTDIR)
            modal_clickables = get_clickables(driver, modal)
            best_submit, score = find_best_match(modal_clickables, f"submit {task_name}")
            if not best_submit:
                best_submit, score = find_best_match(modal_clickables, "submit")
            if not best_submit:
                best_submit, score = find_best_match(modal_clickables, "create")
            if best_submit:
                print(f"[>] Modal submit match: '{get_text(best_submit)}' (score={score:.3f})")
                if safe_click(driver, best_submit):
                    time.sleep(2)
                    capture_state(driver, f"modal_{idx}_submitted", catalog, actions, OUTDIR)
                    actions.append({"action": "submit_modal", "label": get_text(best_submit)})

        # Post-submit: capture final state
        time.sleep(1.5)
        capture_state(driver, "final_state", catalog, actions, OUTDIR)

        # Save log
        out_json = OUTDIR / f"{safe_name}_catalog.json"
        out_json.write_text(json.dumps(catalog, indent=2), encoding="utf-8")
        print(f"[✓] {task_name} saved -> {out_json}")

    except StaleElementReferenceException:
        print(f"[!] DOM changed unexpectedly during '{task_name}', skipping to next task safely.")
    except Exception as e:
        print(f"[!] Unexpected error in '{task_name}': {e}")


def explore_tasks_sequential(task_list, url):
    chrome_opts = Options()
    chrome_opts.add_argument("--start-maximized")
    chrome_opts.add_argument("--disable-blink-features=AutomationControlled")
    chrome_opts.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_opts.add_experimental_option('useAutomationExtension', False)

    service = Service(CHROME_DRIVER_PATH)
    driver = webdriver.Chrome(service=service, options=chrome_opts)
    driver.get(url)
    print("Please manually log in. After login, press ENTER to start all tasks.")
    input()

    for task_name in task_list:
        print(f"\n========== RUNNING TASK: {task_name} ==========")
        try:
            run_task(driver, task_name, url)
        except Exception as e:
            print(f"[!] Error running task '{task_name}': {e}")
        print(f"Finished: {task_name}")
        time.sleep(2)

    driver.quit()
    print("\nAll tasks completed successfully!")


In [6]:
if __name__ == "__main__":
    linear_tasks = ["invite people", "create project", "create view", "add issues"]
    linear_url = "https://linear.app/jazzcodes/team/JAZ/active"

    notion_tasks = ["collaborate","add new page"]
    notion_url = "https://www.notion.so/63858de365ad42c78d85528761a654e2"

    explore_tasks_sequential(linear_tasks, linear_url)
    #explore_tasks_sequential(notion_tasks, notion_url)

Please manually log in. After login, press ENTER to start all tasks.


 



[+] Captured state: entry_page
[>] Sidebar match: 'Invite people' (score=1.000)
[+] Captured state: after_sidebar_click
[>] Page action match: 'Invite people' (score=1.000)
[!] Click failed: Message: element click intercepted: Element <div tabindex="0" class="sc-jCttAn cPppzc sc-jBgFVJ hceBeW sc-dSLnXq bPmWGZ hide-during-bootstrap" data-menu-open="false">...</div> is not clickable at point (122, 489). Other element would receive the click: <div aria-modal="true" role="dialog" class="sc-UzPqf bhmqre" data-focus-trap-active="true">...</div>
  (Session info: chrome=142.0.7444.60)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff648377a35
	0x7ff648377a90
	0x7ff6480f16ad
	0x7ff648152359
	0x7ff64814fcde
	0x7ff64814cc11
	0x7ff64814bad0
	0x7ff64813d2c8
	0x7ff64817297a
	0x7ff64813cb56
	0x7ff64819b8fb
	0x7ff64813b068
	0x7ff64813be93
	0x7ff6486329d0
	0x7ff64862ce50
	0x7ff64864cc45
	0x7ff6483930ce
	0x7ff64839adbf
	0x7ff648380c14
	0x7ff648380dcf
	0x7ff648366828
	0x7fff3212e8d