In [1]:
import time, json
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

In [2]:
# ---------- CONFIG ----------
CHROME_DRIVER_PATH = r"C:\Users\ojasw\Downloads\chromedriver-win64\chromedriver.exe"
DATASET_DIR = Path("dataset")
DATASET_DIR.mkdir(exist_ok=True)

In [7]:
# ---------- UTILS ----------
def capture_state(driver, state_name, catalog, actions, outdir):
    screenshot_path = outdir / f"{len(catalog)}_{state_name}.png"
    driver.save_screenshot(str(screenshot_path))
    catalog.append({
        "name": state_name,
        "url": driver.current_url,
        "screenshot": str(screenshot_path),
        "timestamp": time.time(),
        "actions_so_far": list(actions)
    })
    print(f"[+] Captured state: {state_name}")

def find_sidebar_tab(driver, tab_keywords):
    sidebar_tabs = driver.find_elements(By.XPATH, "//div[contains(@class,'sidebar')]//button|//a")
    for tab in sidebar_tabs:
        text = (tab.text or tab.get_attribute("aria-label") or "").lower()
        if any(k.lower() in text for k in tab_keywords):
            return tab
    return None

def find_action_button(driver, action_keywords):
    buttons = driver.find_elements(By.XPATH, "//button|//a|//*[@role='button']")
    for btn in buttons:
        text = (btn.text or btn.get_attribute("aria-label") or "").lower()
        if any(k.lower() in text for k in action_keywords):
            return btn
    return None

def infer_app_type(url: str) -> str:
    if "linear.app" in url:
        return "linear"
    elif "notion.so" in url:
        return "notion"
    return "generic"

def get_app_keywords(app_type: str, task_name: str):
    t = task_name.lower()
    if app_type == "linear":
        if "project" in t:
            return ["project", "projects"], ["create project", "+ project", "new project"]
        elif "todo" in t or "task" in t:
            return ["issues", "your team", "todo", "tasks"], ["add todo", "+", "new issue"]
    elif app_type == "notion":
        if "todo" in t or "task" in t:
            return ["tasks", "to do", "personal", "home"], ["new", "+", "add task", "create"]
        elif "database" in t or "project" in t:
            return ["workspace", "projects", "pages"], ["new database", "create", "add page"]
    return [], []

# ---------- TASK RUNNER ----------
def run_task(driver, app_type, task_name, url):
    """Run a single task exploration on an already-open driver."""
    safe_name = f"{app_type}_{task_name.replace(' ', '_')}"
    OUTDIR = DATASET_DIR / safe_name
    OUTDIR.mkdir(exist_ok=True)

    catalog, actions = [], []

    # Navigate to page
    driver.get(url)
    time.sleep(2)
    capture_state(driver, "entry_page", catalog, actions, OUTDIR)

    sidebar_keywords, action_keywords = get_app_keywords(app_type, task_name)

    sidebar_tab = find_sidebar_tab(driver, sidebar_keywords)
    if sidebar_tab:
        print(f"[>] Clicking sidebar tab for {task_name}")
        sidebar_tab.click()
        time.sleep(1)
        capture_state(driver, "sidebar_tab", catalog, actions, OUTDIR)
        actions.append({"action": "click_sidebar", "label": sidebar_tab.text})

    action_btn = find_action_button(driver, action_keywords)
    if action_btn:
        print(f"[>] Clicking main action button: {action_btn.text}")
        driver.execute_script("arguments[0].scrollIntoView(true);", action_btn)
        time.sleep(0.3)
        action_btn.click()
        time.sleep(1.2)
        capture_state(driver, "main_action_button", catalog, actions, OUTDIR)
        actions.append({"action": "click_button", "label": action_btn.text})

    modals = driver.find_elements(By.XPATH, "//div[contains(@class,'modal') or contains(@class,'Dialog') or contains(@role,'dialog')]")
    for idx, modal in enumerate(modals):
        print(f"[>] Capturing modal/form #{idx}")
        capture_state(driver, f"modal_{idx}", catalog, actions, OUTDIR)

    submit_btn = find_action_button(driver, ["create", "submit", "add", "done"])
    if submit_btn:
        try:
            driver.execute_script("arguments[0].scrollIntoView(true);", submit_btn)
            time.sleep(0.3)
            submit_btn.click()
            time.sleep(2)
            capture_state(driver, "success_state", catalog, actions, OUTDIR)
            actions.append({"action": "submit", "label": submit_btn.text})
        except Exception as e:
            print(f"[!] Could not submit: {e}")

    out_json = OUTDIR / f"{safe_name}_catalog.json"
    out_json.write_text(json.dumps(catalog, indent=2), encoding="utf-8")
    print(f"{task_name} saved -> {out_json}")

# ---------- MAIN ----------
def explore_tasks_sequential(task_list, url):
    app_type = infer_app_type(url)
    print(f"[INFO] App detected: {app_type}")

    chrome_opts = Options()
    chrome_opts.add_argument("--start-maximized")
    chrome_opts.add_argument("--disable-blink-features=AutomationControlled")
    chrome_opts.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_opts.add_experimental_option('useAutomationExtension', False)

    service = Service(CHROME_DRIVER_PATH)
    driver = webdriver.Chrome(service=service, options=chrome_opts)
    driver.get(url)
    print("Please manually log in. After login, press ENTER to start all tasks.")
    input()

    for task_name in task_list:
        print(f"\n========== RUNNING TASK: {task_name} ==========")
        run_task(driver, app_type, task_name, url)
        print(f"Finished: {task_name}")
        time.sleep(2)

    driver.quit()
    print("\nAll tasks completed successfully!")

In [8]:
if __name__ == "__main__":
    linear_tasks = ["create project", "add todo"]
    linear_url = "https://linear.app/jazzcodes/team/JAZ/active"

    notion_tasks = ["add todo"]
    notion_url = "https://www.notion.so/63858de365ad42c78d85528761a654e2"
    
    #explore_tasks_sequential(linear_tasks, linear_url)
    explore_tasks_sequential(notion_tasks, notion_url)

[INFO] App detected: notion
Please manually log in. After login, press ENTER to start all tasks.


 



[+] Captured state: entry_page
[>] Clicking sidebar tab for add todo
[+] Captured state: sidebar_tab
[>] Clicking main action button: 
[+] Captured state: main_action_button
[!] Could not submit: Message: element click intercepted: Element <div role="button" tabindex="0" class="shadow-cursor-new-page-sidebar" aria-label="Add a page" style="user-select: none; transition: background 20ms ease-in; cursor: pointer; display: flex; align-items: center; justify-content: center; height: 20px; width: 20px; border-radius: 4px; margin-inline-start: 4px;">...</div> is not clickable at point (214, 196). Other element would receive the click: <div style="display: grid; align-items: end;">...</div>
  (Session info: chrome=142.0.7444.60)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff648377a35
	0x7ff648377a90
	0x7ff6480f16ad
	0x7ff648152359
	0x7ff64814fcde
	0x7ff64814cc11
	0x7ff64814bad0
	0x7ff64813d2c8
	0x7ff64817297a
	0x7ff64813cb56
	0x7ff64819b8fb
	0x7ff64813b068
	0x7ff6481