From 6aac8d0f429584371410b68156848ce52eced399 Mon Sep 17 00:00:00 2001 From: Tyson Thomas Date: Mon, 9 Jun 2025 22:10:23 -0700 Subject: [PATCH 1/2] fix report rendering --- front_end/panels/ai_chat/ui/ChatView.ts | 481 ++++++++++++++++-------- 1 file changed, 324 insertions(+), 157 deletions(-) diff --git a/front_end/panels/ai_chat/ui/ChatView.ts b/front_end/panels/ai_chat/ui/ChatView.ts index c4cf9606c1a..988e3cff019 100644 --- a/front_end/panels/ai_chat/ui/ChatView.ts +++ b/front_end/panels/ai_chat/ui/ChatView.ts @@ -269,6 +269,9 @@ export class ChatView extends HTMLElement { #isInputDisabled = false; #inputPlaceholder = ''; + // Add state tracking for AI Assistant operations + #aiAssistantStates = new Map(); + #lastProcessedMessageKey: string | null = null; connectedCallback(): void { const sheet = new CSSStyleSheet(); @@ -289,6 +292,10 @@ export class ChatView extends HTMLElement { disconnectedCallback(): void { // Cleanup resize observer this.#messagesContainerResizeObserver.disconnect(); + + // Clear state maps to prevent memory leaks + this.#aiAssistantStates.clear(); + this.#lastProcessedMessageKey = null; } // Add method to scroll to bottom @@ -345,6 +352,98 @@ export class ChatView extends HTMLElement { } }; + // Helper methods for AI Assistant state management + #getMessageStateKey(structuredResponse: {reasoning: string, markdownReport: string}): string { + // Create stable hash from content - Unicode safe + const content = structuredResponse.reasoning + structuredResponse.markdownReport; + + // Unicode-safe hash function using TextEncoder + const encoder = new TextEncoder(); + const bytes = encoder.encode(content); + + let hash = 0; + for (let i = 0; i < bytes.length; i++) { + hash = ((hash << 5) - hash) + bytes[i]; + hash = hash & hash; // Convert to 32-bit integer + } + + // Convert to hex for consistent 8-character length + const key = Math.abs(hash).toString(16).padStart(8, '0'); + + return key; + } + + #getMessageAIAssistantState(messageKey: string): 'pending' | 'opened' | 'failed' | 'not-attempted' { + return this.#aiAssistantStates.get(messageKey) || 'not-attempted'; + } + + #setMessageAIAssistantState(messageKey: string, state: 'pending' | 'opened' | 'failed'): void { + this.#aiAssistantStates.set(messageKey, state); + } + + + #isLastStructuredMessage(currentCombinedIndex: number): boolean { + // We need to work with the combined messages logic to properly identify the last structured message + // The currentCombinedIndex is from the combined array, but we need to check against the original array + + // Recreate the combined messages logic to understand the mapping + let combinedIndex = 0; + let lastStructuredCombinedIndex = -1; + + for (let originalIndex = 0; originalIndex < this.#messages.length; originalIndex++) { + const message = this.#messages[originalIndex]; + + // Keep User messages and Final Model answers + if (message.entity === ChatMessageEntity.USER || + (message.entity === ChatMessageEntity.MODEL && message.action === 'final')) { + + // Check if this is a structured final answer + if (message.entity === ChatMessageEntity.MODEL && message.action === 'final') { + const structuredResponse = this.#parseStructuredResponse((message as any).answer || ''); + if (structuredResponse) { + lastStructuredCombinedIndex = combinedIndex; + } + } + + combinedIndex++; + continue; + } + + // Handle Model Tool Call message + if (message.entity === ChatMessageEntity.MODEL && message.action === 'tool') { + const nextMessage = this.#messages[originalIndex + 1]; + + // Check if the next message is the corresponding result + if (nextMessage && nextMessage.entity === ChatMessageEntity.TOOL_RESULT && nextMessage.toolName === (message as any).toolName) { + // Combined representation: tool call + result = 1 entry in combined array + combinedIndex++; + } else { + // Tool call is still running (no result yet) + combinedIndex++; + } + continue; + } + + // Handle Tool Result message - skip if it was combined previously + if (message.entity === ChatMessageEntity.TOOL_RESULT) { + const prevMessage = this.#messages[originalIndex - 1]; + // Check if the previous message was the corresponding model call + if (!(prevMessage && prevMessage.entity === ChatMessageEntity.MODEL && prevMessage.action === 'tool' && prevMessage.toolName === (message as any).toolName)) { + // Orphaned tool result - add it directly + combinedIndex++; + } + // Otherwise, it was handled by the MODEL case above, so we skip this result message + continue; + } + + // Fallback for any unexpected message types + combinedIndex++; + } + + return lastStructuredCombinedIndex === currentCombinedIndex; + } + + // Update the prompt button click handler when props/state changes #updatePromptButtonClickHandler(): void { this.#handlePromptButtonClickBound = BaseOrchestratorAgent.createAgentTypeSelectionHandler( @@ -417,6 +516,34 @@ export class ChatView extends HTMLElement { const willHaveMoreMessages = data.messages?.length > previousMessageCount; const wasInputDisabled = this.#isInputDisabled; + // Handle AI Assistant state cleanup for last-message-only approach + if (willHaveMoreMessages && this.#messages) { + // When new messages are added, reset states for previous final messages + // so that only the last message can attempt to open AI Assistant + const previousLastFinalIndex = this.#messages.findLastIndex(msg => + msg.entity === ChatMessageEntity.MODEL && + (msg as ModelChatMessage).action === 'final' + ); + + if (previousLastFinalIndex >= 0) { + const previousLastMessage = this.#messages[previousLastFinalIndex] as ModelChatMessage; + if (previousLastMessage.answer) { + const structuredResponse = this.#parseStructuredResponse(previousLastMessage.answer); + if (structuredResponse) { + const messageKey = this.#getMessageStateKey(structuredResponse); + const currentState = this.#getMessageAIAssistantState(messageKey); + + // If the previous last message was pending, mark it as failed + // But keep 'opened' state to preserve successfully opened reports + if (currentState === 'pending') { + this.#setMessageAIAssistantState(messageKey, 'failed'); + } + // If it was 'opened', keep it that way to show button only + } + } + } + } + this.#messages = data.messages; this.#state = data.state; this.#isTextInputEmpty = data.isTextInputEmpty; @@ -473,11 +600,6 @@ export class ChatView extends HTMLElement { #handleSendMessage(): void { // Check if textInputElement, onSendMessage callback, or input is disabled if (!this.#textInputElement || !this.#onSendMessage || this.#isInputDisabled) { - logger.info("Send prevented: ", { - hasTextInput: Boolean(this.#textInputElement), - hasCallback: Boolean(this.#onSendMessage), - isDisabled: this.#isInputDisabled - }); return; } @@ -492,7 +614,6 @@ export class ChatView extends HTMLElement { // Always scroll to bottom after sending message this.#pinScrollToBottom = true; - logger.info("Sending message:", text); this.#onSendMessage(text, this.#imageInput); this.#textInputElement.value = ''; this.#textInputElement.style.height = 'auto'; @@ -511,12 +632,20 @@ export class ChatView extends HTMLElement { const textarea = event.target as HTMLTextAreaElement; textarea.style.height = 'auto'; // Reset height to shrink if needed textarea.style.height = `${textarea.scrollHeight}px`; - this.#isTextInputEmpty = textarea.value.trim().length === 0; - void ComponentHelpers.ScheduledRender.scheduleRender(this, this.#boundRender); + + const newIsEmpty = textarea.value.trim().length === 0; + + // Only trigger re-render if empty state actually changed + if (this.#isTextInputEmpty !== newIsEmpty) { + this.#isTextInputEmpty = newIsEmpty; + void ComponentHelpers.ScheduledRender.scheduleRender(this, this.#boundRender); + } else { + this.#isTextInputEmpty = newIsEmpty; + } } // Render messages based on the combined structure - #renderMessage(message: ChatMessage | (ModelChatMessage & { resultText?: string, isError?: boolean, resultError?: string, combined?: boolean }) | (ToolResultMessage & { orphaned?: boolean }), index?: number ): Lit.TemplateResult { + #renderMessage(message: ChatMessage | (ModelChatMessage & { resultText?: string, isError?: boolean, resultError?: string, combined?: boolean }) | (ToolResultMessage & { orphaned?: boolean }), combinedIndex?: number ): Lit.TemplateResult { try { switch (message.entity) { case ChatMessageEntity.USER: @@ -534,7 +663,6 @@ export class ChatView extends HTMLElement { { const toolResultMessage = message as (ToolResultMessage & { orphaned?: boolean }); if (toolResultMessage.orphaned) { - logger.warn('Rendering orphaned ToolResultMessage:', toolResultMessage); return html`
@@ -568,7 +696,7 @@ export class ChatView extends HTMLElement { const structuredResponse = this.#parseStructuredResponse(modelMessage.answer || ''); if (structuredResponse) { - return this.#renderStructuredResponse(structuredResponse, index); + return this.#renderStructuredResponse(structuredResponse, combinedIndex); } else { // Regular response - use the old logic const isDeepResearch = isDeepResearchContent(modelMessage.answer || ''); @@ -583,7 +711,7 @@ export class ChatView extends HTMLElement {
@@ -621,33 +749,10 @@ export class ChatView extends HTMLElement { const toolArgs = modelMessage.toolArgs || {}; const filteredArgs = Object.fromEntries(Object.entries(toolArgs).filter(([key]) => key !== 'reasoning')); - // --- Styling and Icons --- - const blockStyles = (bgColor: string) => Lit.Directives.styleMap({ - padding: '10px', - borderRadius: '8px', - marginBottom: '5px', // Reduced margin between blocks if they are separate - border: '1px solid var(--sys-color-outline)', - backgroundColor: bgColor, - }); - - const headerStyles = Lit.Directives.styleMap({ - display: 'flex', - alignItems: 'center', - gap: '8px', - marginBottom: '8px', - fontWeight: '500', - }); - - const contentStyles = Lit.Directives.styleMap({ - marginLeft: '24px', // Indent content under the header icon - paddingTop: '5px', - }); - - // Icons + // Icons for tool status const spinnerIcon = html``; const checkIcon = html``; const errorIcon = html``; - // --- End Styling and Icons --- return html` @@ -728,7 +833,6 @@ export class ChatView extends HTMLElement { } default: // Should not happen, but render a fallback - logger.warn('Unhandled message entity type in renderMessage:', (message as any).entity); return html`
Unknown message type
`; } } catch (error) { @@ -789,9 +893,9 @@ export class ChatView extends HTMLElement { const prevMessage = allMessages[index - 1]; // Check if the previous message was the corresponding model call if (!(prevMessage && prevMessage.entity === ChatMessageEntity.MODEL && prevMessage.action === 'tool' && prevMessage.toolName === message.toolName)) { - // Orphaned tool result - add it directly (maybe mark it?) + // Orphaned tool result - add it directly logger.warn('Orphaned tool result found:', message); - acc.push({...message, orphaned: true }); // Add marker if needed for rendering + acc.push({...message, orphaned: true }); // Add marker for rendering } // Otherwise, it was handled by the MODEL case above, so we skip this result message return acc; @@ -805,6 +909,7 @@ export class ChatView extends HTMLElement { // Allow ToolResultMessage to potentially have an 'orphaned' flag }, [] as Array); + // General loading state (before any model response or after tool result) const showGeneralLoading = this.#state === State.LOADING && !isModelRunningTool; @@ -912,7 +1017,7 @@ export class ChatView extends HTMLElement {
- ${combinedMessages?.map((message, index) => this.#renderMessage(message, index)) || Lit.nothing} + ${combinedMessages?.map((message, combinedIndex) => this.#renderMessage(message, combinedIndex)) || Lit.nothing} ${showGeneralLoading ? html`
@@ -1066,7 +1171,6 @@ export class ChatView extends HTMLElement { #handleModelChange(event: Event): void { if (this.#isModelSelectorDisabled) { - logger.info('Model selector is disabled, ignoring change'); return; } const selectElement = event.target as HTMLSelectElement; @@ -1129,82 +1233,126 @@ export class ChatView extends HTMLElement { return null; } - // Render structured response with conditional inline report - #renderStructuredResponse(structuredResponse: {reasoning: string, markdownReport: string}, index?: number): Lit.TemplateResult { - const messageClass = `structured-response-${index || 0}`; + // Render structured response with last-message-only auto-processing + #renderStructuredResponse(structuredResponse: {reasoning: string, markdownReport: string}, combinedIndex?: number): Lit.TemplateResult { + logger.info('Starting renderStructuredResponse:', { + combinedIndex, + hasMessages: Boolean(this.#messages), + messagesLength: this.#messages?.length, + lastProcessedKey: this.#lastProcessedMessageKey, + reasoningPreview: structuredResponse.reasoning.slice(0, 50) + '...' + }); + + const messageKey = this.#getMessageStateKey(structuredResponse); + const isLastMessage = this.#isLastStructuredMessage(combinedIndex || 0); + + logger.info('Rendering structured response decision:', { + messageKey, + combinedIndex, + isLastMessage, + lastProcessedKey: this.#lastProcessedMessageKey, + shouldAutoProcess: isLastMessage && messageKey !== this.#lastProcessedMessageKey + }); + + // Auto-process only last message + if (isLastMessage && messageKey !== this.#lastProcessedMessageKey) { + const aiState = this.#getMessageAIAssistantState(messageKey); + if (aiState === 'not-attempted') { + // Set to pending immediately for loading state + logger.info('Setting state to pending and starting AI Assistant for LAST message key:', messageKey); + this.#setMessageAIAssistantState(messageKey, 'pending'); + this.#attemptAIAssistantOpen(structuredResponse.markdownReport, messageKey); + this.#lastProcessedMessageKey = messageKey; + } + } + + const aiState = this.#getMessageAIAssistantState(messageKey); + return this.#renderStructuredMessage(structuredResponse, messageKey, aiState, isLastMessage); + } + + // Unified render method for structured response messages + #renderStructuredMessage(structuredResponse: {reasoning: string, markdownReport: string}, messageKey: string, aiState: 'pending' | 'opened' | 'failed' | 'not-attempted', isLastMessage: boolean): Lit.TemplateResult { + logger.info('Rendering structured message:', { messageKey, aiState, isLastMessage }); - // Start with inline report visible - const messageElement = html` -
+ return html` +
${renderMarkdown(structuredResponse.reasoning, this.#markdownRenderer)}
-
-
-

Full Research Report

+ + ${aiState === 'pending' ? html` + +
+ + + + +
-
- ${renderMarkdown(structuredResponse.markdownReport, this.#markdownRenderer)} + ` : aiState === 'opened' ? html` + +
+
-
+ ` : html` + +
+
+

Full Research Report

+
+
+ ${renderMarkdown(structuredResponse.markdownReport, this.#markdownRenderer)} +
+
+
+ +
+ `}
`; - - // Try to open in AI Assistant and hide inline report if successful - this.#tryOpenAndHideIfSuccessful(structuredResponse.markdownReport, messageClass); - - return messageElement; } - // Try to open in AI Assistant and hide inline report if successful - async #tryOpenAndHideIfSuccessful(markdownContent: string, messageClass: string): Promise { + // Attempt to open AI Assistant for a specific message + async #attemptAIAssistantOpen(markdownContent: string, messageKey: string): Promise { + logger.info('ATTEMPTING AI ASSISTANT OPEN:', { + messageKey, + contentLength: markdownContent.length, + contentPreview: markdownContent.slice(0, 200) + '...' + }); + try { + logger.info('Calling #openInAIAssistantViewer for key:', messageKey); await this.#openInAIAssistantViewer(markdownContent); - // Navigation successful - hide the inline report after a short delay - setTimeout(() => { - const messageElement = this.#shadow.querySelector(`.${messageClass}`); - if (messageElement) { - const inlineReport = messageElement.querySelector('.inline-markdown-report'); - if (inlineReport) { - inlineReport.remove(); - // Add a button to view the report again - const actionsDiv = document.createElement('div'); - actionsDiv.className = 'deep-research-actions'; - - const viewButton = document.createElement('button'); - viewButton.className = 'view-document-btn'; - viewButton.title = 'Open full report in document viewer'; - viewButton.textContent = '📄 View Full Report'; - viewButton.addEventListener('click', async () => { - try { - await this.#openInAIAssistantViewer(markdownContent); - } catch (error) { - logger.error('Failed to open report:', error); - // Could show the inline report again as fallback - } - }); - - actionsDiv.appendChild(viewButton); - messageElement.querySelector('.message-content')?.appendChild(actionsDiv); - } - } - }, 500); // Wait for DOM to be fully rendered + + logger.info('AI Assistant opened successfully, setting state to opened for key:', messageKey); + this.#setMessageAIAssistantState(messageKey, 'opened'); } catch (error) { - logger.info('AI Assistant navigation failed, keeping report inline:', error); - // Keep the inline report visible + logger.warn('AI Assistant navigation failed for key:', { messageKey, error }); + this.#setMessageAIAssistantState(messageKey, 'failed'); } + + // Trigger single re-render after state change + logger.info('Triggering re-render after AI Assistant state change for key:', messageKey); + void ComponentHelpers.ScheduledRender.scheduleRender(this, this.#boundRender); } - // Try to open markdown content in AI Assistant viewer - async #tryOpenInAIAssistantViewer(markdownContent: string): Promise { - try { - await this.#openInAIAssistantViewer(markdownContent); - return true; // Successfully navigated - } catch (error) { - logger.info('AI Assistant navigation failed, showing report inline:', error); - return false; // Navigation failed - } - } // Method to open markdown content in AI Assistant viewer in the same tab async #openInAIAssistantViewer(markdownContent: string): Promise { @@ -1230,11 +1378,11 @@ export class ChatView extends HTMLElement { // Wait for the page to load, then inject the markdown content // Use event-based detection or timeout as fallback - const injectContent = async () => { + const injectContent = async (): Promise => { const runtimeModel = target.model(SDK.RuntimeModel.RuntimeModel); if (!runtimeModel) { logger.error('No RuntimeModel found'); - return; + throw new Error('No RuntimeModel found'); } // Escape the markdown content for JavaScript injection @@ -1243,41 +1391,41 @@ export class ChatView extends HTMLElement { // JavaScript to inject - calls the global function we added to AI Assistant const injectionScript = ` (function() { - logger.info('DevTools injecting markdown content...', 'Content length:', ${JSON.stringify(markdownContent.length)}); - logger.info('Available global functions:', Object.keys(window).filter(k => k.includes('setDevTools') || k.includes('aiAssistant'))); + console.log('DevTools injecting markdown content...', 'Content length:', ${JSON.stringify(markdownContent.length)}); + console.log('Available global functions:', Object.keys(window).filter(k => k.includes('setDevTools') || k.includes('aiAssistant'))); if (typeof window.setDevToolsMarkdown === 'function') { try { window.setDevToolsMarkdown(${escapedContent}); - logger.info('Successfully called setDevToolsMarkdown function'); + console.log('Successfully called setDevToolsMarkdown function'); return 'SUCCESS: Content injected via setDevToolsMarkdown function'; } catch (error) { - logger.error('Error calling setDevToolsMarkdown:', error); + console.error('Error calling setDevToolsMarkdown:', error); return 'ERROR: Failed to call setDevToolsMarkdown: ' + error.message; } } else { - logger.warn('setDevToolsMarkdown function not found, using fallback methods'); - logger.info('Available window properties:', Object.keys(window).filter(k => k.includes('DevTools') || k.includes('assistant') || k.includes('ai'))); + console.warn('setDevToolsMarkdown function not found, using fallback methods'); + console.log('Available window properties:', Object.keys(window).filter(k => k.includes('DevTools') || k.includes('assistant') || k.includes('ai'))); // Store in sessionStorage sessionStorage.setItem('devtools-markdown-content', ${escapedContent}); - logger.info('Stored content in sessionStorage'); + console.log('Stored content in sessionStorage'); // Try to trigger app reload if (window.aiAssistantApp && typeof window.aiAssistantApp.loadFromSessionStorage === 'function') { try { window.aiAssistantApp.loadFromSessionStorage(); - logger.info('Successfully called aiAssistantApp.loadFromSessionStorage'); + console.log('Successfully called aiAssistantApp.loadFromSessionStorage'); return 'SUCCESS: Content stored and app reloaded'; } catch (error) { - logger.error('Error calling loadFromSessionStorage:', error); + console.error('Error calling loadFromSessionStorage:', error); return 'ERROR: Content stored but failed to reload app: ' + error.message; } } else { - logger.info('aiAssistantApp not available or loadFromSessionStorage not a function'); - logger.info('aiAssistantApp type:', typeof window.aiAssistantApp); + console.log('aiAssistantApp not available or loadFromSessionStorage not a function'); + console.log('aiAssistantApp type:', typeof window.aiAssistantApp); if (window.aiAssistantApp) { - logger.info('aiAssistantApp methods:', Object.getOwnPropertyNames(Object.getPrototypeOf(window.aiAssistantApp))); + console.log('aiAssistantApp methods:', Object.getOwnPropertyNames(Object.getPrototypeOf(window.aiAssistantApp))); } // Try to force a page reload as last resort @@ -1297,7 +1445,7 @@ export class ChatView extends HTMLElement { const executionContext = runtimeModel.defaultExecutionContext(); if (!executionContext) { logger.error('No execution context available'); - return; + throw new Error('No execution context available'); } const result = await executionContext.evaluate({ @@ -1311,16 +1459,22 @@ export class ChatView extends HTMLElement { if ('error' in result) { logger.error('Evaluation failed:', result.error); - return; + throw new Error(`Evaluation failed: ${result.error}`); } if (result.object.value) { logger.info('Content injection result:', result.object.value); + // Check if injection was successful + if (typeof result.object.value === 'string' && result.object.value.startsWith('ERROR:')) { + throw new Error(result.object.value); + } } else if (result.exceptionDetails) { logger.error('Content injection failed:', result.exceptionDetails.text); + throw new Error(`Content injection failed: ${result.exceptionDetails.text || 'Unknown error'}`); } } catch (error) { logger.error('Failed to inject content:', error); + throw error; // Re-throw to propagate to caller } }; @@ -1328,46 +1482,59 @@ export class ChatView extends HTMLElement { let retries = 0; const maxRetries = TIMING_CONSTANTS.AI_ASSISTANT_MAX_RETRIES; - const attemptInjection = () => { - setTimeout(async () => { - const runtimeModel = target.model(SDK.RuntimeModel.RuntimeModel); - if (!runtimeModel) { - logger.error('No RuntimeModel found'); - return; - } - - const executionContext = runtimeModel.defaultExecutionContext(); - if (!executionContext) { - logger.error('No execution context available'); - return; - } - - // Check if AI Assistant is ready - const checkResult = await executionContext.evaluate({ - expression: 'typeof window.setDevToolsMarkdown === "function" || (window.aiAssistantApp && typeof window.aiAssistantApp.loadFromSessionStorage === "function")', - objectGroup: 'console', - includeCommandLineAPI: false, - silent: true, - returnByValue: true, - generatePreview: false - }, false, false); - - if (!('error' in checkResult) && checkResult.object.value === true) { - // AI Assistant is ready - await injectContent(); - } else if (retries < maxRetries) { - // Retry with exponential backoff - retries++; - attemptInjection(); - } else { - logger.error('AI Assistant did not load in time'); - // Try to inject anyway as a last resort - await injectContent(); - } - }, TIMING_CONSTANTS.AI_ASSISTANT_RETRY_DELAY * Math.pow(2, retries)); - }; - - attemptInjection(); + // Return a promise that resolves/rejects based on injection success + return new Promise((resolve, reject) => { + const attemptInjection = () => { + setTimeout(async () => { + try { + const runtimeModel = target.model(SDK.RuntimeModel.RuntimeModel); + if (!runtimeModel) { + reject(new Error('No RuntimeModel found')); + return; + } + + const executionContext = runtimeModel.defaultExecutionContext(); + if (!executionContext) { + reject(new Error('No execution context available')); + return; + } + + // Check if AI Assistant is ready + const checkResult = await executionContext.evaluate({ + expression: 'typeof window.setDevToolsMarkdown === "function" || (window.aiAssistantApp && typeof window.aiAssistantApp.loadFromSessionStorage === "function")', + objectGroup: 'console', + includeCommandLineAPI: false, + silent: true, + returnByValue: true, + generatePreview: false + }, false, false); + + if (!('error' in checkResult) && checkResult.object.value === true) { + // AI Assistant is ready + await injectContent(); + resolve(); + } else if (retries < maxRetries) { + // Retry with exponential backoff + retries++; + attemptInjection(); + } else { + logger.error('AI Assistant did not load in time'); + // Try to inject anyway as a last resort + try { + await injectContent(); + resolve(); + } catch (error) { + reject(error); + } + } + } catch (error) { + reject(error); + } + }, TIMING_CONSTANTS.AI_ASSISTANT_RETRY_DELAY * Math.pow(2, retries)); + }; + + attemptInjection(); + }); } } From 812348bdc7080e23a85cd75645069017be6f0a3f Mon Sep 17 00:00:00 2001 From: Tyson Thomas Date: Mon, 9 Jun 2025 23:39:00 -0700 Subject: [PATCH 2/2] update readme --- README.md | 272 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 193 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index c50c62d79c3..91c32587319 100644 --- a/README.md +++ b/README.md @@ -1,115 +1,229 @@ -# Browser Operator [Chromium DevTools with Agentic Framework] +# Browser Operator - Open Source Agentic Browser -Chromium browser with an user interface to run multi-agent workflows directly on the browser using a stateful, orchestration framework. +![GitHub Release](https://img.shields.io/github/v/release/tysonthomas9/browser-operator-devtools-frontend) +![Platform](https://img.shields.io/badge/platform-macOS-blue) +[![License](https://img.shields.io/badge/license-BSD--3--Clause-green)](LICENSE) +**The first open-source, privacy-friendly AI browser that transforms how you work on the web. Your intelligent partner for research, analysis, and automation - all running locally in your browser.** ![Live Demo](front_end/panels/ai_chat/docs/demo.gif) +## 🚀 Download & Get Started -## Quick Start +**[⬇️ Download Browser Operator for macOS](https://github.com/tysonthomas9/browser-operator-devtools-frontend/releases)** -[Download the Agentic Browser for MacOS](https://github.com/tysonthomas9/browser-operator-devtools-frontend/releases) +Or build from source: [Developer Setup Guide](front_end/panels/ai_chat/Readme.md) -Or +## 🎬 See It In Action -[Set up the chromium dev tools with Agent framework on your system](front_end/panels/ai_chat/Readme.md) +### Deep Research & Analysis +Watch Browser Operator synthesize information from multiple sources, creating comprehensive research reports without manual copying and pasting. -Note: Read this document to know more about the [build](front_end/panels/ai_chat/docs/PreBuilt.md) -## Key Capabilities +https://github.com/user-attachments/assets/225319db-c5a0-4834-9f37-5787fb646d16 -* Built in Agent Framework for running tasks / workflows. -* Ability to perform actions such as: Navigate URLs, Perform Actions (Clicks, Fill Form, Scroll). -* The agent can autonmously plan and execute tasks on behalf of user, such as - * Summarize content - * Deep research topics - * Literature reviews - * Product comparisons - * Shopping assistance - * Advanced search - * And many more -* Integrates with 100+ LLM Models - * OpenAI GPT-4.1, O4-Mini - * Claude 4, 3.7, 3.5 - * Google Gemeni - * Llama - * Deepseek - * Qwen - * And many more -* Integrates with LiteLLM which supports ability to use multiple providers - * Huggingface - * Groq - * Azure - * AWS - * OpenRouter - * vLLM - * Ollama - * And many more -* Customize workflows or agent behavior with config changes. -![Architecture Flow](front_end/panels/ai_chat/docs/ArchitectureFlow.png) +### Smart Shopping Assistant +See how it automatically compares products, analyzes reviews, and helps you make informed purchasing decisions. -## Demos +https://github.com/user-attachments/assets/c478b18e-0342-400d-98ab-222c93eecd7a -Watch Browser Operator in action with our demo videos: +### Professional Research +Discover how businesses use Browser Operator for talent search, competitive analysis, and market research. -#### Deep Research -Browser Operator seamlessly integrates public web data with your private documents and knowledge bases, creating comprehensive research without switching between tools. +https://github.com/user-attachments/assets/90150f0e-e8c8-4b53-b6a6-c739f143f4a0 -https://github.com/user-attachments/assets/225319db-c5a0-4834-9f37-5787fb646d16 +## ✨ Key Features + +### 🤖 Intelligent Automation +- **Multi-Agent Framework**: Specialized agents work together to handle complex tasks +- **Autonomous Navigation**: Understands and interacts with any website +- **Smart Actions**: Click, fill forms, extract data, and navigate without manual scripting +- **Adaptive Learning**: Improves task execution based on patterns and feedback + +### 🔒 Privacy First (Use local LLM) +- **Local Processing**: Your data never leaves your machine +- **No Cloud Dependencies**: Full functionality without sending data to external servers +- **Secure Sessions**: Works with your existing browser authentication +- **Open Source**: Complete transparency in how your data is handled -#### Product Discovery & Comparison -Streamline your shopping research by automatically gathering specifications, user ratings, and availability across retailers, to help you make confident purchasing decisions. +### 🧩 Extensible Platform +- **100+ AI Models**: Support for OpenAI, Claude, Gemini, Llama, and more +- **Custom Workflows**: Build your own automation sequences +- **Plugin Architecture**: Extend functionality with custom agents +- **API Integration**: Connect with your existing tools and services -https://github.com/user-attachments/assets/c478b18e-0342-400d-98ab-222c93eecd7a +## 💡 What Can You Build? -#### Professional Talent Search -Efficiently discover and evaluate potential candidates based on skills, experience, and portfolio quality, creating detailed profiles for recruitment decision-making. + + + + + +
-https://github.com/user-attachments/assets/90150f0e-e8c8-4b53-b6a6-c739f143f4a0 +**Personal Productivity** +- 📚 Literature reviews and research papers +- 🛍️ Price tracking and comparison shopping +- 📰 News aggregation and summarization +- 📊 Data collection and analysis +- ✈️ Travel planning and booking research + + + +**Business Intelligence** +- 🔍 Competitive analysis and monitoring +- 👥 Talent sourcing and recruitment +- 📈 Market research and trends +- 🏢 Lead generation and qualification +- 📋 Compliance and audit automation + +
+ +## 🛠️ Technical Architecture + +Browser Operator combines a Chromium-based browser with an advanced agentic framework: + +``` +┌─────────────────────────────────────────────────┐ +│ Browser Operator UI │ +├─────────────────────────────────────────────────┤ +│ Multi-Agent Orchestrator │ +├──────────────┬────────────────┬─────────────────┤ +│ Research │ Navigation │ Analysis │ +│ Agent │ Agent │ Agent │ +├──────────────┴────────────────┴─────────────────┤ +│ Chromium Browser Engine │ +└─────────────────────────────────────────────────┘ +``` + +### Core Components +- **Orchestrator Agent**: Coordinates multi-agent workflows and task distribution +- **Navigation Engine**: Handles web interactions and page understanding +- **Tool Registry**: Extensible system for adding new capabilities +- **State Management**: Maintains context across complex workflows + +[Full Technical Documentation →](front_end/panels/ai_chat/Readme.md) + +## ⚙️ Quick Setup + +### For Users: Pre-built Application + +1. [Download the latest release](https://github.com/tysonthomas9/browser-operator-devtools-frontend/releases) +2. Open Browser Operator +3. Configure your AI provider (see below) +4. Start automating! + +### For Developers: Build from Source + +```bash +# Clone the repository +git clone https://github.com/tysonthomas9/browser-operator-devtools-frontend.git + +# Follow the detailed build instructions +cd browser-operator-devtools-frontend +# See front_end/panels/ai_chat/Readme.md for complete setup +``` -### Quick Roadmap +### AI Provider Configuration -|Features| Status | -|--|--| -| Multi-Agent Workflow | Completed (Initial Release) | -| OpenAI LLM | Completed (Initial Release) | -| Local LLM | Completed (May 22) | -| MCP | Planned | -| Customize System Prompts in UI| Planned | -| Customize Agents in UI| Planned | -| Customize Workflow Graphs in UI| Planned | -| Eval Management | Planned | -| Memory | Planned | -| A2A Protocol | Planned | +
+Option 1: OpenAI (Recommended for beginners) -### DevTools Documentation +1. Get an API key from [OpenAI Platform](https://platform.openai.com) +2. Open Browser Operator settings +3. Select "OpenAI" as provider +4. Enter your API key +5. Choose a model (GPT-4.1 recommended) +6. Save and start using! -- [Agentic Framework Documentation](front_end/panels/ai_chat/Readme.md) -- [Chromium Devtools Original Documentation](https://chromium.googlesource.com/devtools/devtools-frontend/+/main/docs/README.md) +
-### Agentic Framework Documentation +
+Option 2: LiteLLM (For multiple providers) -* [`front_end/panels/ai_chat/core/Readme.md`](front_end/panels/ai_chat/core/Readme.md): Explains how to customize the `BaseOrchestratorAgent` to add new top-level agent types and UI buttons, and details its graph-based workflow. -* [`front_end/panels/ai_chat/agent_framework/Readme.md`](front_end/panels/ai_chat/agent_framework/Readme.md): Describes the AI Agent Framework, its core components (`ConfigurableAgentTool`, `AgentRunner`, `ToolRegistry`), and how to create, configure, and register new custom agents, including agent handoff mechanisms. +Perfect for using multiple AI providers or self-hosted models: -### Setup LiteLLM Configuration +1. Set up your LiteLLM proxy server +2. Select "LiteLLM Provider" in settings +3. Enter proxy URL and API key +4. Click "Fetch Models" to verify connection +5. Select your preferred model -https://github.com/user-attachments/assets/579dcfdc-71c8-4664-87b8-c2b68cc5c1ce +[LiteLLM Setup Video →](https://github.com/user-attachments/assets/579dcfdc-71c8-4664-87b8-c2b68cc5c1ce) -1. Click on the setting config inside the chat panel -2. Select LiteLLM Provider -3. Input the LiteLLM URL and API key -4. Click on fetch models to test the configuration -5. Click save to update the configuration +
+ +
+Option 3: Local Models (Maximum privacy) + +Run completely offline with Ollama: + +1. Install Ollama on your system +2. Pull your preferred model (e.g., `ollama pull llama3`) +3. Configure Browser Operator to use local endpoint +4. Enjoy private, offline automation + +
+ +## 🗺️ Roadmap + +### ✅ Released +- Multi-agent workflow engine +- Support for 100+ AI models +- macOS application +- Core automation capabilities + +### 🚧 In Development +- Windows and Linux support +- Enhanced memory system +- Custom agent builder + +### 🔮 Planned Features +- MCP (Model Context Protocol) support +- Visual workflow designer +- Team collaboration features +- Advanced scheduling system + +## 👥 Community & Support + +### Get Help +- 📖 [Documentation](front_end/panels/ai_chat/Readme.md) +- 💬 [Discord Community](https://discord.gg/fp7ryHYBSY) +- 🐛 [Report Issues](https://github.com/tysonthomas9/browser-operator-devtools-frontend/issues) +- 🐦 [Follow Updates](https://x.com/BrowserOperator) ### Contributing -Found a bug 🐛 or have a feature idea ✨? Please create issues [here](https://github.com/tysonthomas9/browser-operator-devtools-frontend/issues) +We welcome contributions! Here's how you can help: + +- **🐛 Report Bugs**: Help us identify and fix issues +- **✨ Request Features**: Share your ideas for new capabilities +- **📝 Improve Docs**: Help others get started +- **💻 Submit PRs**: Contribute code improvements + +See our [Contributing Guide](CONTRIBUTING.md) for details. + +## 📚 Documentation + +- [Getting Started Guide](front_end/panels/ai_chat/docs/GettingStarted.md) +- [Agent Framework](front_end/panels/ai_chat/agent_framework/Readme.md) +- [Creating Custom Agents](front_end/panels/ai_chat/core/Readme.md) +- [Architecture Overview](front_end/panels/ai_chat/docs/Architecture.md) +- [Build Instructions](front_end/panels/ai_chat/docs/PreBuilt.md) + +## 🙏 Acknowledgments + +Browser Operator is built on top of Chromium and integrates with numerous open-source projects. Special thanks to all contributors and the open-source community. + +## 📄 License + +Browser Operator is released under the [BSD-3-Clause License](LICENSE). + +--- -### Join Us +
-If you like this project, don't hesitate to ⭐ star this repository. For those who'd like to contribute code or just hang out with the community please join our Discord. +**⭐ Star this repo to support open-source AI development!** -[![Discord](https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/JKYuuubr) -[![X (Twitter)](https://img.shields.io/badge/X_(Twitter)-000000?style=for-the-badge&logo=x&logoColor=white)](https://x.com/BrowserOperator) +
\ No newline at end of file