BrowserOperator
diff --git a/‎front_end/panels/ai_chat/BUILD.gn‎
Lines changed: 9 additions & 0 deletions b/‎front_end/panels/ai_chat/BUILD.gn‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts‎
Lines changed: 2 additions & 0 deletions b/‎front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎front_end/panels/ai_chat/agent_framework/implementation/agents/ResearchAgent.ts‎
Lines changed: 40 additions & 25 deletions b/‎front_end/panels/ai_chat/agent_framework/implementation/agents/ResearchAgent.ts‎
Lines changed: 40 additions & 25 deletions
diff --git a/‎front_end/panels/ai_chat/agent_framework/implementation/agents/SearchAgent.ts‎
Lines changed: 4 additions & 4 deletions b/‎front_end/panels/ai_chat/agent_framework/implementation/agents/SearchAgent.ts‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎front_end/panels/ai_chat/evaluation/test-cases/html-to-markdown-tests.ts‎
Lines changed: 2 additions & 0 deletions b/‎front_end/panels/ai_chat/evaluation/test-cases/html-to-markdown-tests.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎front_end/panels/ai_chat/tools/FetcherTool.ts‎
Lines changed: 17 additions & 43 deletions b/‎front_end/panels/ai_chat/tools/FetcherTool.ts‎
Lines changed: 17 additions & 43 deletions
diff --git a/‎front_end/panels/ai_chat/tools/HTMLToMarkdownTool.ts‎
Lines changed: 2 additions & 2 deletions b/‎front_end/panels/ai_chat/tools/HTMLToMarkdownTool.ts‎
Lines changed: 2 additions & 2 deletions
@@ -69,6 +69,7 @@ devtools_module("ai_chat") {
     "ui/ConversationHistoryList.ts",
     "ui/conversationHistoryStyles.ts",
     "ui/CustomProviderDialog.ts",
+    "ui/customProviderStyles.ts",
     "ai_chat_impl.ts",
     "models/ChatTypes.ts",
     "persistence/ConversationTypes.ts",
@@ -120,6 +121,7 @@ devtools_module("ai_chat") {
     "tools/FinalizeWithCritiqueTool.ts",
     "tools/VisitHistoryManager.ts",
     "tools/HTMLToMarkdownTool.ts",
+    "tools/ReadabilityExtractorTool.ts",
     "tools/SchemaBasedExtractorTool.ts",
     "tools/StreamlinedSchemaExtractorTool.ts",
     "tools/CombinedExtractionTool.ts",
@@ -174,6 +176,7 @@ devtools_module("ai_chat") {
     "evaluation/test-cases/research-agent-tests.ts",
     "evaluation/test-cases/action-agent-tests.ts",
     "evaluation/test-cases/web-task-agent-tests.ts",
+    "evaluation/test-cases/html-to-markdown-tests.ts",
     "evaluation/runner/EvaluationRunner.ts",
     "evaluation/runner/VisionAgentEvaluationRunner.ts",
     "common/MarkdownViewerUtil.ts",
@@ -183,6 +186,8 @@ devtools_module("ai_chat") {
     "common/page.ts",
     "common/WebSocketRPCClient.ts",
     "common/EvaluationConfig.ts",
+    "utils/ContentChunker.ts",
+    "vendor/readability-source.ts",
     "evaluation/remote/EvaluationProtocol.ts",
     "evaluation/remote/EvaluationAgent.ts",
     "tracing/TracingProvider.ts",
@@ -319,6 +324,7 @@ _ai_chat_sources = [
     "tools/FinalizeWithCritiqueTool.ts",
     "tools/VisitHistoryManager.ts",
     "tools/HTMLToMarkdownTool.ts",
+    "tools/ReadabilityExtractorTool.ts",
     "tools/SchemaBasedExtractorTool.ts",
     "tools/StreamlinedSchemaExtractorTool.ts",
     "tools/CombinedExtractionTool.ts",
@@ -373,6 +379,7 @@ _ai_chat_sources = [
     "evaluation/test-cases/research-agent-tests.ts",
     "evaluation/test-cases/action-agent-tests.ts",
     "evaluation/test-cases/web-task-agent-tests.ts",
+    "evaluation/test-cases/html-to-markdown-tests.ts",
     "evaluation/runner/EvaluationRunner.ts",
     "evaluation/runner/VisionAgentEvaluationRunner.ts",
     "common/MarkdownViewerUtil.ts",
@@ -382,6 +389,8 @@ _ai_chat_sources = [
     "common/page.ts",
     "common/WebSocketRPCClient.ts",
     "common/EvaluationConfig.ts",
+    "utils/ContentChunker.ts",
+    "vendor/readability-source.ts",
     "evaluation/remote/EvaluationProtocol.ts",
     "evaluation/remote/EvaluationAgent.ts",
     "tracing/TracingProvider.ts",
 
@@ -12,6 +12,7 @@ import { NavigateURLTool, PerformActionTool, GetAccessibilityTreeTool, SearchCon
 import { UpdateTodoTool } from '../../tools/UpdateTodoTool.js';
 import { ExecuteCodeTool } from '../../tools/ExecuteCodeTool.js';
 import { HTMLToMarkdownTool } from '../../tools/HTMLToMarkdownTool.js';
+import { ReadabilityExtractorTool } from '../../tools/ReadabilityExtractorTool.js';
 import { ConfigurableAgentTool, ToolRegistry } from '../ConfigurableAgentTool.js';
 import { ThinkingTool } from '../../tools/ThinkingTool.js';
 import { registerMCPMetaTools } from '../../mcp/MCPMetaTools.js';
@@ -48,6 +49,7 @@ export function initializeConfiguredAgents(): void {
   ToolRegistry.registerToolFactory('search_content', () => new SearchContentTool());
   ToolRegistry.registerToolFactory('take_screenshot', () => new TakeScreenshotTool());
   ToolRegistry.registerToolFactory('html_to_markdown', () => new HTMLToMarkdownTool());
+  ToolRegistry.registerToolFactory('readability_extractor', () => new ReadabilityExtractorTool());
   ToolRegistry.registerToolFactory('scroll_page', () => new ScrollPageTool());
   ToolRegistry.registerToolFactory('wait_for_page_load', () => new WaitTool());
   ToolRegistry.registerToolFactory('thinking', () => new ThinkingTool());
 
@@ -45,7 +45,7 @@ export function createResearchAgentConfig(): AgentToolConfig {
 ## Key Tools
 - **navigate_url + fetcher_tool**: Primary research loop
 - **extract_data**: Structured data extraction with JSON schema
-- **html_to_markdown**: Clean page text extraction
+- **readability_extractor**: Fast plain text extraction
 - **create_file/update_file/read_file/list_files**: Persist and track findings across iterations
 
 ## Quality Standards
@@ -97,7 +97,7 @@ Example for "AI trends in 2025": ai-trends-2025_research.md, ai-trends-2025_sour
       'fetcher_tool',
       'extract_data',
       'node_ids_to_urls',
-      'html_to_markdown',
+      'readability_extractor',
       'create_file',
       'update_file',
       'read_file',
@@ -173,8 +173,8 @@ ${args.scope ? `The scope of research expected: ${args.scope}` : ''}
                 // Only save successful fetches with content
                 if (source.success && source.markdownContent && source.markdownContent.trim().length > 0) {
                   try {
-                    // Create a sanitized filename from the URL
-                    const filename = sanitizeUrlToFilename(source.url);
+                    // Create a sanitized filename from the URL and title
+                    const filename = sanitizeUrlToFilename(source.url, source.title);
 
                     // Create file content with metadata header
                     const fileContent = `# ${source.title || 'Untitled'}
@@ -232,31 +232,46 @@ ${source.markdownContent}`;
 }
 
 /**
- * Sanitize a URL to create a safe filename
+ * Sanitize a URL and optional title to create a safe filename
+ * Prefers title-based names for readability, falls back to URL-based names
  */
-function sanitizeUrlToFilename(url: string): string {
+function sanitizeUrlToFilename(url: string, title?: string): string {
   try {
-    const urlObj = new URL(url);
-
-    // Extract domain and path
-    let domain = urlObj.hostname.replace(/^www\./, '');
-    let path = urlObj.pathname.replace(/^\//, '').replace(/\/$/, '');
-
-    // Create a base name from domain and path
-    let baseName = domain;
-    if (path) {
-      // Take first 2 path segments for readability
-      const pathParts = path.split('/').filter(p => p.length > 0);
-      if (pathParts.length > 0) {
-        baseName += '-' + pathParts.slice(0, 2).join('-');
-      }
+    let baseName = '';
+
+    // Prefer title if available
+    if (title && title.trim()) {
+      baseName = title
+        .trim()
+        .toLowerCase()
+        .replace(/[^a-zA-Z0-9\s-]/g, '')  // Remove special characters
+        .replace(/\s+/g, '-')              // Convert spaces to dashes
+        .replace(/-+/g, '-')               // Collapse multiple dashes
+        .replace(/^-|-$/g, '')             // Remove leading/trailing dashes
+        .substring(0, 60);                 // Limit length for readability
     }
 
-    // Remove special characters and limit length
-    baseName = baseName
-      .replace(/[^a-zA-Z0-9-_]/g, '-')
-      .replace(/-+/g, '-')
-      .substring(0, 80);
+    // Fallback to URL-based name if no title or title is empty after sanitization
+    if (!baseName) {
+      const urlObj = new URL(url);
+      let domain = urlObj.hostname.replace(/^www\./, '');
+      let path = urlObj.pathname.replace(/^\//, '').replace(/\/$/, '');
+
+      baseName = domain;
+      if (path) {
+        // Take first 2 path segments for readability
+        const pathParts = path.split('/').filter(p => p.length > 0);
+        if (pathParts.length > 0) {
+          baseName += '-' + pathParts.slice(0, 2).join('-');
+        }
+      }
+
+      // Remove special characters and limit length
+      baseName = baseName
+        .replace(/[^a-zA-Z0-9-_]/g, '-')
+        .replace(/-+/g, '-')
+        .substring(0, 60);
+    }
 
     // Add a short hash of the full URL to prevent collisions
     const hash = simpleHash(url).substring(0, 8);
 
@@ -32,7 +32,7 @@ export function createSearchAgentConfig(): AgentToolConfig {
 3. **Collect leads**:
    - Use navigate_url to reach the most relevant search entry point (search engines, directories, LinkedIn public results, company pages, press releases).
    - Use extract_data with an explicit JSON schema every time you capture structured search results. Prefer capturing multiple leads in one call.
-   - Batch follow-up pages with fetcher_tool, and use html_to_markdown when you need to confirm context inside long documents.
+   - Batch follow-up pages with fetcher_tool, and use readability_extractor when you need to confirm context inside long documents.
    - After each significant batch of new leads or fetcher_tool response, immediately persist the harvested candidates (including query, timestamp, and confidence notes) by appending to a coordination file via 'create_file'/'update_file'. This keeps other subtasks aligned and prevents redundant scraping.
 4. **Mandatory Pagination Loop (ENFORCED)**:
    - Harvest target per task: collect 30–50 unique candidates before enrichment (unless the user specifies otherwise). Absolute minimum 25 when the request requires it.
@@ -58,7 +58,7 @@ export function createSearchAgentConfig(): AgentToolConfig {
     "name": "extract_data",
     "arguments": "{\"instruction\":\"From the currently loaded Google News results page for query 'OpenAI September 2025 news', extract the top 15 news items visible in the search results. For each item extract: title (string), snippet (string), url (string, format:url), source (string), and publishDate (string). Return a JSON object with property 'results' which is an array of these items.\",\"reasoning\":\"Collect structured list of recent news articles about OpenAI in September 2025 so we can batch-fetch the full content for comprehensive research.\",\"schema\":{\"type\":\"object\",\"properties\":{\"results\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"},\"snippet\":{\"type\":\"string\"},\"url\":{\"type\":\"string\",\"format\":\"url\"},\"source\":{\"type\":\"string\"},\"publishDate\":{\"type\":\"string\"}},\"required\":[\"title\",\"url\",\"source\"]}}},\"required\":[\"results\"]}}"
 })
-- Use html_to_markdown when you need high-quality page text in addition to (not instead of) structured extractions.
+- Use readability_extractor when you need fast plain text extraction in addition to (not instead of) structured extractions.
 - Never call extract_data or fetcher_tool without a clear plan for how the results will fill gaps in the objective.
 - Before starting new queries, call 'list_files'/'read_file' to review previous batches and avoid duplicating work; always append incremental findings to the existing coordination file for the current objective.
 
@@ -132,7 +132,7 @@ If you absolutely cannot find any reliable leads, return status "failed" with ga
       'extract_data',
       'scroll_page',
       'action_agent',
-      'html_to_markdown',
+      'readability_extractor',
       'create_file',
       'update_file',
       'delete_file',
@@ -273,7 +273,7 @@ If you absolutely cannot find any reliable leads, return status "failed" with ga
           ],
           next_actions: [
             'Continue pagination on current queries (Next/numeric page or query params).',
-            'Batch fetcher_tool on shortlisted URLs; use html_to_markdown + document_search to extract location, availability, portfolio, and contact.',
+            'Batch fetcher_tool on shortlisted URLs; use readability_extractor + document_search to extract location, availability, portfolio, and contact.',
             'Deduplicate by normalized name + hostname and canonical URL.'
           ]
         };
 
@@ -327,7 +327,9 @@ export function getTestsByDuration(
  * CommonJS export for Node.js compatibility
  * Allows backend evaluation runner to import test cases
  */
+// @ts-ignore - module is not defined in browser context
 if (typeof module !== 'undefined' && module.exports) {
+  // @ts-ignore
   module.exports = {
     simpleArticleTest,
     largeArticleChunkingTest,
 
@@ -3,7 +3,7 @@
 // found in the LICENSE file.
 
 import { createLogger } from '../core/Logger.js';
-import { HTMLToMarkdownTool, type HTMLToMarkdownResult } from './HTMLToMarkdownTool.js';
+import { ReadabilityExtractorTool, type ReadabilityExtractorResult } from './ReadabilityExtractorTool.js';
 import { NavigateURLTool, type Tool, type LLMContext } from './Tools.js';
 
 const logger = createLogger('Tool:Fetcher');
@@ -14,7 +14,7 @@ const logger = createLogger('Tool:Fetcher');
 export interface FetchedContent {
   url: string;
   title: string;
-  markdownContent: string;
+  markdownContent: string;  // Plain text content (for backwards compatibility, named markdownContent)
   success: boolean;
   error?: string;
 }
@@ -40,15 +40,15 @@ export interface FetcherToolResult {
  * Agent that fetches and extracts content from URLs
  *
  * This agent takes a list of URLs, navigates to each one, and extracts
- * the main content as markdown. It uses NavigateURLTool for navigation
- * and HTMLToMarkdownTool for content extraction.
+ * the main content as plain text. It uses NavigateURLTool for navigation
+ * and ReadabilityExtractorTool for fast content extraction.
  *
- * Content extraction is handled by HTMLToMarkdownTool, which
- * automatically chunks large pages for efficient processing.
+ * Content extraction is handled by ReadabilityExtractorTool, which uses
+ * Mozilla Readability for deterministic extraction without LLM calls.
  */
 export class FetcherTool implements Tool<FetcherToolArgs, FetcherToolResult> {
   name = 'fetcher_tool';
-  description = 'Navigates to URLs, extracts and cleans the main content, returning markdown for each source.';
+  description = 'Navigates to URLs, extracts and cleans the main content, returning plain text for each source';
 
 
   schema = {
@@ -70,7 +70,7 @@ export class FetcherTool implements Tool<FetcherToolArgs, FetcherToolResult> {
   };
 
   private navigateURLTool = new NavigateURLTool();
-  private htmlToMarkdownTool = new HTMLToMarkdownTool();
+  private readabilityExtractorTool = new ReadabilityExtractorTool();
 
   /**
    * Execute the fetcher agent to process multiple URLs
@@ -138,29 +138,6 @@ export class FetcherTool implements Tool<FetcherToolArgs, FetcherToolResult> {
         throw new DOMException('The operation was aborted', 'AbortError');
       }
     };
-    const sleep = (ms: number) => new Promise<void>((resolve, reject) => {
-      if (!ms) return resolve();
-      const timer = setTimeout(() => {
-        cleanup();
-        resolve();
-      }, ms);
-      const onAbort = () => {
-        clearTimeout(timer);
-        cleanup();
-        reject(new DOMException('The operation was aborted', 'AbortError'));
-      };
-      const cleanup = () => {
-        signal?.removeEventListener('abort', onAbort);
-      };
-      if (signal) {
-        if (signal.aborted) {
-          clearTimeout(timer);
-          cleanup();
-          return reject(new DOMException('The operation was aborted', 'AbortError'));
-        }
-        signal.addEventListener('abort', onAbort, { once: true });
-      }
-    });
     try {
       // Step 1: Navigate to the URL
       logger.info('Navigating to URL', { url });
@@ -182,37 +159,34 @@ export class FetcherTool implements Tool<FetcherToolArgs, FetcherToolResult> {
         };
       }
 
-      // Wait for 1 second to ensure the page has time to load
-      await sleep(1000);
-      throwIfAborted();
-
       // Get metadata from navigation result
       const metadata = navigationResult.metadata ? navigationResult.metadata : { url: '', title: '' };
 
-      // Step 2: Extract markdown content using HTMLToMarkdownTool
+      // Step 2: Extract content using ReadabilityExtractorTool (with automatic LLM fallback)
       logger.info('Extracting content from URL', { url });
       throwIfAborted();
-      const extractionResult = await this.htmlToMarkdownTool.execute({
-        instruction: 'Extract the main content focusing on article text, headings, and important information. Remove ads, navigation, and distracting elements.',
+
+      // Always pass ctx for LLM fallback capability
+      const extractionResult = await this.readabilityExtractorTool.execute({
         reasoning
       }, ctx);
 
       // Check for extraction errors
-      if (!extractionResult.success || !extractionResult.markdownContent) {
+      if (!extractionResult.success || !extractionResult.textContent) {
         return {
           url,
-          title: metadata?.title || '',
+          title: metadata?.title || extractionResult.title || '',
           markdownContent: '',
           success: false,
           error: extractionResult.error || 'Failed to extract content'
         };
       }
 
-      // Return the fetched content (HTMLToMarkdownTool handles chunking)
+      // Return the fetched content (plain text from Readability)
       return {
         url: metadata?.url || url,
-        title: metadata?.title || '',
-        markdownContent: extractionResult.markdownContent,
+        title: extractionResult.title || metadata?.title || '',
+        markdownContent: extractionResult.textContent,  // Plain text content
         success: true
       };
     } catch (error: any) {
 
@@ -36,8 +36,8 @@ export interface HTMLToMarkdownArgs {
  */
 export class HTMLToMarkdownTool implements Tool<HTMLToMarkdownArgs, HTMLToMarkdownResult> {
   // Chunking configuration
-  private readonly TOKEN_LIMIT_FOR_CHUNKING = 10000; // Auto-chunk if tree exceeds this (40k chars)
-  private readonly CHUNK_TOKEN_LIMIT = 8000; // Max tokens per chunk (32k chars)
+  private readonly TOKEN_LIMIT_FOR_CHUNKING = 65000; // Auto-chunk if tree exceeds this (~260k chars)
+  private readonly CHUNK_TOKEN_LIMIT = 40000; // Max tokens per chunk (~160k chars)
   private readonly CHARS_PER_TOKEN = 4; // Conservative estimate
 
   private contentChunker = new ContentChunker();