From 15904b37f8509412392371a9aa499c70b8aeefce Mon Sep 17 00:00:00 2001
From: Tyson Thomas <tysonthomas9@users.noreply.github.com>
Date: Mon, 8 Sep 2025 17:50:35 -0700
Subject: [PATCH 1/4] Fix the model config issue, OpenRouter Oauth issue and
 model refresh issue

---
 front_end/panels/ai_chat/BUILD.gn             |   1 +
 .../ai_chat/agent_framework/AgentRunner.ts    |  40 ++-
 .../agent_framework/ConfigurableAgentTool.ts  |  66 ++++-
 .../implementation/ConfiguredAgents.ts        |  23 +-
 .../panels/ai_chat/auth/OpenRouterOAuth.ts    |  39 ++-
 front_end/panels/ai_chat/core/AgentNodes.ts   |  17 +-
 front_end/panels/ai_chat/core/AgentService.ts |   7 +-
 .../panels/ai_chat/core/ConfigurableGraph.ts  |  10 +-
 front_end/panels/ai_chat/core/Graph.ts        |   4 +-
 .../framework/GenericToolEvaluator.ts         |  14 +-
 .../ai_chat/evaluation/framework/types.ts     |   5 +
 .../evaluation/runner/EvaluationRunner.ts     |  57 +++-
 .../runner/VisionAgentEvaluationRunner.ts     |   9 +
 .../panels/ai_chat/tools/CritiqueTool.ts      |  20 +-
 ...FullPageAccessibilityTreeToMarkdownTool.ts |   6 +-
 .../ai_chat/tools/HTMLToMarkdownTool.ts       |   6 +-
 .../ai_chat/tools/SchemaBasedExtractorTool.ts |   6 +-
 .../tools/StreamlinedSchemaExtractorTool.ts   |  12 +-
 front_end/panels/ai_chat/tools/Tools.ts       |   8 +-
 front_end/panels/ai_chat/ui/AIChatPanel.ts    |  53 +++-
 front_end/panels/ai_chat/ui/SettingsDialog.ts | 112 +++++++-
 .../SettingsDialogOpenRouterCache.test.ts     | 259 ++++++++++++++++++
 22 files changed, 699 insertions(+), 75 deletions(-)
 create mode 100644 front_end/panels/ai_chat/ui/__tests__/SettingsDialogOpenRouterCache.test.ts
diff --git a/front_end/panels/ai_chat/BUILD.gn b/front_end/panels/ai_chat/BUILD.gn
index b7996b33a02..1fea191b0c1 100644
--- a/front_end/panels/ai_chat/BUILD.gn
+++ b/front_end/panels/ai_chat/BUILD.gn
@@ -337,6 +337,7 @@ ts_library("unittests") {
     "ui/__tests__/ChatViewAgentSessionsOrder.test.ts",
     "ui/__tests__/ChatViewSequentialSessionsTransition.test.ts",
     "ui/__tests__/ChatViewInputClear.test.ts",
+    "ui/__tests__/SettingsDialogOpenRouterCache.test.ts",
     "ui/input/__tests__/InputBarClear.test.ts",
     "ui/message/__tests__/MessageCombiner.test.ts",
     "ui/message/__tests__/StructuredResponseController.test.ts",
diff --git a/front_end/panels/ai_chat/agent_framework/AgentRunner.ts b/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
index 183e2fab378..e074cf67ca8 100644
--- a/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
+++ b/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
@@ -33,6 +33,10 @@ export interface AgentRunnerConfig {
   provider: LLMProvider;
   /** Optional vision capability check. Defaults to false (no vision). */
   getVisionCapability?: (modelName: string) => Promise<boolean> | boolean;
+  /** Mini model for smaller/faster operations */
+  miniModel?: string;
+  /** Nano model for smallest/fastest operations */
+  nanoModel?: string;
 }
 
 /**
@@ -218,6 +222,8 @@ export class AgentRunner {
     parentSession?: AgentSession, // For natural nesting
     defaultProvider?: LLMProvider,
     defaultGetVisionCapability?: (modelName: string) => Promise<boolean> | boolean,
+    miniModel?: string, // Mini model for smaller/faster operations
+    nanoModel?: string, // Nano model for smallest/fastest operations
     overrides?: { sessionId?: string; parentSessionId?: string; traceId?: string }
   ): Promise<ConfigurableAgentResult & { agentSession: AgentSession }> {
     const targetAgentName = handoffConfig.targetAgentName;
@@ -286,12 +292,28 @@ export class AgentRunner {
     // Enhance the target agent's system prompt with page context
     const enhancedSystemPrompt = await enhancePromptWithPageContext(targetConfig.systemPrompt);
 
+    // Resolve model name for the target agent
+    let resolvedModelName: string;
+    if (typeof targetConfig.modelName === 'function') {
+      resolvedModelName = targetConfig.modelName();
+    } else if (targetConfig.modelName === 'use-mini') {
+      if (!miniModel) {
+        throw new Error(`Mini model not provided for handoff to agent '${targetAgentName}'. Ensure miniModel is passed in context.`);
+      }
+      resolvedModelName = miniModel;
+    } else if (targetConfig.modelName === 'use-nano') {
+      if (!nanoModel) {
+        throw new Error(`Nano model not provided for handoff to agent '${targetAgentName}'. Ensure nanoModel is passed in context.`);
+      }
+      resolvedModelName = nanoModel;
+    } else {
+      resolvedModelName = targetConfig.modelName || defaultModelName;
+    }
+
     // Construct Runner Config & Hooks for the target agent
     const targetRunnerConfig: AgentRunnerConfig = {
       apiKey,
-      modelName: typeof targetConfig.modelName === 'function'
-        ? targetConfig.modelName()
-        : (targetConfig.modelName || defaultModelName),
+      modelName: resolvedModelName,
       systemPrompt: enhancedSystemPrompt,
       tools: targetConfig.tools
               .map(toolName => ToolRegistry.getRegisteredTool(toolName))
@@ -300,6 +322,8 @@ export class AgentRunner {
       temperature: targetConfig.temperature ?? defaultTemperature,
       provider: defaultProvider as LLMProvider,
       getVisionCapability: defaultGetVisionCapability,
+      miniModel,
+      nanoModel,
     };
     const targetRunnerHooks: AgentRunnerHooks = {
       prepareInitialMessages: undefined, // History already formed by transform or passthrough
@@ -845,6 +869,8 @@ export class AgentRunner {
                   currentSession, // Pass current session for natural nesting
                   config.provider,
                   config.getVisionCapability,
+                  config.miniModel,
+                  config.nanoModel,
                   { sessionId: nestedSessionId, parentSessionId: currentSession.sessionId, traceId: getCurrentTracingContext()?.traceId }
               );
 
@@ -947,11 +973,13 @@ export class AgentRunner {
             }
 
             try {
-              logger.info(`${agentName} Executing tool: ${toolToExecute.name} with args:`, toolArgs);
+              logger.info(`${agentName} Executing tool: ${toolToExecute.name}`);
               const execTracingContext = getCurrentTracingContext();
               toolResultData = await toolToExecute.execute(toolArgs as any, ({
                 provider: config.provider,
                 model: modelName,
+                miniModel: config.miniModel,
+                nanoModel: config.nanoModel,
                 getVisionCapability: config.getVisionCapability,
                 overrideSessionId: preallocatedChildId,
                 overrideParentSessionId: currentSession.sessionId,
@@ -1210,7 +1238,9 @@ export class AgentRunner {
                 undefined, // No llmToolArgs for max iterations handoff
                 currentSession, // Pass current session for natural nesting
                 config.provider,
-                config.getVisionCapability
+                config.getVisionCapability,
+                config.miniModel,
+                config.nanoModel
             );
             // Extract the result and session
             const { agentSession: childSession, ...actualResult } = handoffResult;
diff --git a/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts b/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
index 5ddf170d813..f26367316df 100644
--- a/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
+++ b/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
@@ -4,7 +4,6 @@
 
 import { AgentService } from '../core/AgentService.js';
 import type { Tool } from '../tools/Tools.js';
-import { AIChatPanel } from '../ui/AIChatPanel.js';
 import { ChatMessageEntity, type ChatMessage } from '../models/ChatTypes.js';
 import { createLogger } from '../core/Logger.js';
 import { getCurrentTracingContext } from '../tracing/TracingConfig.js';
@@ -412,18 +411,61 @@ export class ConfigurableAgentTool implements Tool<ConfigurableAgentArgs, Config
 
     // Initialize
     const maxIterations = this.config.maxIterations || 10;
-    const modelName = typeof this.config.modelName === 'function'
-      ? this.config.modelName()
-      : (this.config.modelName || AIChatPanel.instance().getSelectedModel());
+    
+    // Parse execution context first
+    const callCtx = (_ctx || {}) as {
+      provider?: import('../LLM/LLMTypes.js').LLMProvider;
+      model?: string;
+      miniModel?: string;
+      nanoModel?: string;
+      mainModel?: string;
+      getVisionCapability?: (modelName: string) => Promise<boolean> | boolean;
+      overrideSessionId?: string;
+      overrideParentSessionId?: string;
+      overrideTraceId?: string;
+    };
+    
+    // Resolve model name from context or configuration
+    let modelName: string;
+    if (this.config.modelName === 'use-mini') {
+      if (!callCtx.miniModel) {
+        throw new Error(`Mini model not provided in context for agent '${this.name}'. Ensure context includes miniModel.`);
+      }
+      modelName = callCtx.miniModel;
+    } else if (this.config.modelName === 'use-nano') {
+      if (!callCtx.nanoModel) {
+        throw new Error(`Nano model not provided in context for agent '${this.name}'. Ensure context includes nanoModel.`);
+      }
+      modelName = callCtx.nanoModel;
+    } else if (typeof this.config.modelName === 'function') {
+      modelName = this.config.modelName();
+    } else if (this.config.modelName) {
+      modelName = this.config.modelName;
+    } else {
+      // Use main model from context, or fallback to context model
+      const contextModel = callCtx.mainModel || callCtx.model;
+      if (!contextModel) {
+        throw new Error(`No model provided for agent '${this.name}'. Ensure context includes model or mainModel.`);
+      }
+      modelName = contextModel;
+    }
+    
+    // Override with context model only if agent doesn't have its own model configuration
+    if (callCtx.model && !this.config.modelName) {
+      modelName = callCtx.model;
+    }
+    
+    // Validate required context
+    if (!callCtx.provider) {
+      throw new Error(`Provider not provided in context for agent '${this.name}'. Ensure context includes provider.`);
+    }
+    
     const temperature = this.config.temperature ?? 0;
-
     const systemPrompt = this.config.systemPrompt;
     const tools = this.getToolInstances();
-
+    
     // Prepare initial messages
     const internalMessages = this.prepareInitialMessages(args);
-
-    // Prepare runner config and hooks
     const runnerConfig: AgentRunnerConfig = {
       apiKey,
       modelName,
@@ -431,8 +473,10 @@ export class ConfigurableAgentTool implements Tool<ConfigurableAgentArgs, Config
       tools,
       maxIterations,
       temperature,
-      provider: AIChatPanel.getProviderForModel(modelName),
-      getVisionCapability: (m: string) => AIChatPanel.isVisionCapable(m),
+      provider: callCtx.provider,
+      getVisionCapability: callCtx.getVisionCapability ?? (() => false),
+      miniModel: callCtx.miniModel,
+      nanoModel: callCtx.nanoModel,
     };
 
     const runnerHooks: AgentRunnerHooks = {
@@ -446,7 +490,7 @@ export class ConfigurableAgentTool implements Tool<ConfigurableAgentArgs, Config
     };
 
     // Run the agent
-    const ctx: any = _ctx || {};
+    const ctx: any = callCtx || {};
     const result = await AgentRunner.run(
       internalMessages,
       args,
diff --git a/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts b/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts
index c32ecb65a27..26dbb71a2ee 100644
--- a/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts
+++ b/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts
@@ -10,7 +10,6 @@ import { BookmarkStoreTool } from '../../tools/BookmarkStoreTool.js';
 import { DocumentSearchTool } from '../../tools/DocumentSearchTool.js';
 import { NavigateURLTool, PerformActionTool, GetAccessibilityTreeTool, SearchContentTool, NavigateBackTool, NodeIDsToURLsTool, TakeScreenshotTool, ScrollPageTool } from '../../tools/Tools.js';
 import { HTMLToMarkdownTool } from '../../tools/HTMLToMarkdownTool.js';
-import { AIChatPanel } from '../../ui/AIChatPanel.js';
 import { ChatMessageEntity, type ChatMessage } from '../../models/ChatTypes.js';
 import {
   ConfigurableAgentTool,
@@ -70,7 +69,6 @@ If the page does not match the expected content, retry with a different URL patt
 Remember: Always use navigate_url to actually go to the constructed URLs. Return easy-to-read markdown reports.`,
     tools: ['navigate_url', 'get_page_content'],
     maxIterations: 5,
-    modelName: () => AIChatPanel.instance().getSelectedModel(),
     temperature: 0.1,
     schema: {
       type: 'object',
@@ -322,7 +320,7 @@ Remember: You gather data, content_writer_agent writes the report. Always hand o
       'document_search'
     ],
     maxIterations: 15,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0,
     schema: {
       type: 'object',
@@ -423,7 +421,7 @@ Your process should follow these steps:
 The final output should be in markdown format, and it should be lengthy and detailed. Aim for 5-10 pages of content, at least 1000 words.`,
     tools: [],
     maxIterations: 3,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.3,
     schema: {
       type: 'object',
@@ -531,7 +529,7 @@ Conclusion: Fix the args format and retry with proper syntax: { "method": "fill"
       'take_screenshot',
     ],
     maxIterations: 10,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.5,
     schema: {
       type: 'object',
@@ -640,7 +638,7 @@ Remember that verification is time-sensitive - the page state might change durin
       'take_screenshot'
     ],
     maxIterations: 3,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.2,
     schema: {
       type: 'object',
@@ -725,7 +723,7 @@ When selecting an element to click, prioritize:
       'node_ids_to_urls',
     ],
     maxIterations: 5,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -805,7 +803,7 @@ When selecting a form field to fill, prioritize:
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -881,7 +879,7 @@ When selecting an element for keyboard input, prioritize:
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -966,7 +964,7 @@ When selecting an element to hover over, prioritize:
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -1048,7 +1046,7 @@ The accessibility tree includes information about scrollable containers. Look fo
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -1283,7 +1281,6 @@ Remember: **Plan adaptively, execute systematically, validate continuously, and
       'thinking',
     ],
     maxIterations: 15,
-    modelName: () => AIChatPanel.instance().getSelectedModel(),
     temperature: 0.3,
     schema: {
       type: 'object',
@@ -1422,7 +1419,7 @@ Remember to adapt your analysis based on the product category - different attrib
       'get_page_content',
     ],
     maxIterations: 5,
-    modelName: () => AIChatPanel.getMiniModel(),
+    modelName: 'use-mini',
     temperature: 0.2,
     schema: {
       type: 'object',
diff --git a/front_end/panels/ai_chat/auth/OpenRouterOAuth.ts b/front_end/panels/ai_chat/auth/OpenRouterOAuth.ts
index 78b982767c0..f05037f2d6c 100644
--- a/front_end/panels/ai_chat/auth/OpenRouterOAuth.ts
+++ b/front_end/panels/ai_chat/auth/OpenRouterOAuth.ts
@@ -241,6 +241,16 @@ export class OpenRouterOAuth {
           resolve();
           return true;
         }
+        // Intercept known OpenRouter sign-up dead-end by redirecting to sign-in
+        if (url) {
+          try {
+            await this.maybeRedirectSignupToSignin(url);
+          } catch (e) {
+            if (this.isDevelopment()) {
+              logger.warn('Signup→Signin redirect attempt failed:', e);
+            }
+          }
+        }
         return false;
       };
 
@@ -307,6 +317,30 @@ export class OpenRouterOAuth {
     });
   }
 
+  /**
+   * If the inspected page is OpenRouter sign-up, navigate to sign-in instead, preserving query string.
+   * This works around a provider bug where sign-up does not continue to the callback.
+   */
+  private static async maybeRedirectSignupToSignin(currentUrl: string): Promise<void> {
+    try {
+      const url = new URL(currentUrl);
+      const hostMatches = /(^|\.)openrouter\.ai$/i.test(url.hostname);
+      const isSignup = url.pathname.startsWith('/sign-up');
+      const alreadyRedirected = sessionStorage.getItem('openrouter_signin_redirect_performed') === 'true';
+      if (!hostMatches || !isSignup || alreadyRedirected) {
+        return;
+      }
+      const signInUrl = `https://openrouter.ai/sign-in${url.search || ''}`;
+      sessionStorage.setItem('openrouter_signin_redirect_performed', 'true');
+      if (this.isDevelopment()) {
+        logger.info('Redirecting OpenRouter sign-up -> sign-in');
+      }
+      await this.navigateToUrl(signInUrl);
+    } catch {
+      // Ignore parse/navigation errors
+    }
+  }
+
   /**
    * Handle OAuth callback by parsing URL parameters
    */
@@ -573,6 +607,9 @@ export class OpenRouterOAuth {
     
     // Clear active token exchange
     this.activeTokenExchange = null;
+
+    // Clear any signup→signin redirect flag
+    sessionStorage.removeItem('openrouter_signin_redirect_performed');
   }
 
   /**
@@ -789,4 +826,4 @@ export class OpenRouterOAuth {
     
     return messages[error] || 'Authentication failed';
   }
-}
\ No newline at end of file
+}
diff --git a/front_end/panels/ai_chat/core/AgentNodes.ts b/front_end/panels/ai_chat/core/AgentNodes.ts
index 6a48713755a..5fa3a98df08 100644
--- a/front_end/panels/ai_chat/core/AgentNodes.ts
+++ b/front_end/panels/ai_chat/core/AgentNodes.ts
@@ -406,7 +406,7 @@ export function createAgentNode(modelName: string, provider: LLMProvider, temper
   return agentNode;
 }
 
-export function createToolExecutorNode(state: AgentState, provider: LLMProvider, modelName: string): Runnable<AgentState, AgentState> {
+export function createToolExecutorNode(state: AgentState, provider: LLMProvider, modelName: string, miniModel?: string, nanoModel?: string): Runnable<AgentState, AgentState> {
   const tools = getAgentToolsFromState(state); // Adjusted to use getAgentToolsFromState
   const toolMap = new Map<string, ReturnType<typeof getTools>[number]>();
   tools.forEach((tool: ReturnType<typeof getTools>[number]) => toolMap.set(tool.name, tool));
@@ -416,12 +416,16 @@ export function createToolExecutorNode(state: AgentState, provider: LLMProvider,
     private tracingProvider: TracingProvider;
     private provider: LLMProvider;
     private modelName: string;
+    private miniModel?: string;
+    private nanoModel?: string;
 
-    constructor(toolMap: Map<string, ReturnType<typeof getTools>[number]>, provider: LLMProvider, modelName: string) {
+    constructor(toolMap: Map<string, ReturnType<typeof getTools>[number]>, provider: LLMProvider, modelName: string, miniModel?: string, nanoModel?: string) {
       this.toolMap = toolMap;
       this.tracingProvider = createTracingProvider();
       this.provider = provider;
       this.modelName = modelName;
+      this.miniModel = miniModel;
+      this.nanoModel = nanoModel;
     }
 
     async invoke(state: AgentState): Promise<AgentState> {
@@ -539,7 +543,12 @@ export function createToolExecutorNode(state: AgentState, provider: LLMProvider,
               
         const result = await withTracingContext(executionContext, async () => {
           console.log(`[TOOL EXECUTION PATH 1] Inside withTracingContext for tool: ${toolName}`);
-          return await selectedTool.execute(toolArgs as any, { provider: this.provider, model: this.modelName });
+          return await selectedTool.execute(toolArgs as any, { 
+            provider: this.provider, 
+            model: this.modelName,
+            miniModel: this.miniModel,
+            nanoModel: this.nanoModel
+          });
         });
         console.log(`[TOOL EXECUTION PATH 1] ToolExecutorNode completed tool: ${toolName}`);
 
@@ -732,7 +741,7 @@ export function createToolExecutorNode(state: AgentState, provider: LLMProvider,
       
       return newState;
     }
-  }(toolMap, provider, modelName);
+  }(toolMap, provider, modelName, miniModel, nanoModel);
   return toolExecutorNode;
 }
 
diff --git a/front_end/panels/ai_chat/core/AgentService.ts b/front_end/panels/ai_chat/core/AgentService.ts
index 8a1ed9735e9..31aa8777dd9 100644
--- a/front_end/panels/ai_chat/core/AgentService.ts
+++ b/front_end/panels/ai_chat/core/AgentService.ts
@@ -18,6 +18,7 @@ import type { TracingProvider, TracingContext } from '../tracing/TracingProvider
 import { AgentRunnerEventBus } from '../agent_framework/AgentRunnerEventBus.js';
 import { AgentRunner } from '../agent_framework/AgentRunner.js';
 import type { AgentSession, AgentMessage } from '../agent_framework/AgentSessionTypes.js';
+import { AIChatPanel } from '../ui/AIChatPanel.js';
 import type { LLMProvider } from '../LLM/LLMTypes.js';
 
 const logger = createLogger('AgentService');
@@ -196,8 +197,12 @@ export class AgentService extends Common.ObjectWrapper.ObjectWrapper<{
       // Determine selected provider for primary graph execution
       const selectedProvider = (localStorage.getItem('ai_chat_provider') || 'openai') as LLMProvider;
 
+      // Get mini and nano models for tool execution
+      const miniModel = AIChatPanel.getMiniModel();
+      const nanoModel = AIChatPanel.getNanoModel();
+
       // Will throw error if model/provider configuration is invalid
-      this.#graph = createAgentGraph(apiKey, modelName, selectedProvider);
+      this.#graph = createAgentGraph(apiKey, modelName, selectedProvider, miniModel, nanoModel);
 
       this.#isInitialized = true;
     } catch (error) {
diff --git a/front_end/panels/ai_chat/core/ConfigurableGraph.ts b/front_end/panels/ai_chat/core/ConfigurableGraph.ts
index 6dd48d925c5..0dc52f54bc0 100644
--- a/front_end/panels/ai_chat/core/ConfigurableGraph.ts
+++ b/front_end/panels/ai_chat/core/ConfigurableGraph.ts
@@ -40,6 +40,14 @@ export interface GraphConfig {
    * Selected LLM provider for this graph's agent nodes
    */
   provider?: LLMProvider;
+  /**
+   * Mini model for smaller/faster operations
+   */
+  miniModel?: string;
+  /**
+   * Nano model for smallest/fastest operations
+   */
+  nanoModel?: string;
 }
 
 /**
@@ -96,7 +104,7 @@ export function createAgentGraphFromConfig(
         const toolExecutorNodeName = edgeConfig.targetMap[NodeType.TOOL_EXECUTOR.toString()];
         if (toolExecutorNodeName && toolExecutorNodeName !== '__end__') {
           logger.debug(`Dynamically creating/updating tool executor: ${toolExecutorNodeName}`);
-          const toolExecutorInstance = createToolExecutorNode(state, config.provider!, config.modelName!);
+          const toolExecutorInstance = createToolExecutorNode(state, config.provider!, config.modelName!, config.miniModel, config.nanoModel);
           graphInstance.addNode(toolExecutorNodeName, toolExecutorInstance);
         } else {
           logger.error('Tool executor node name not found in targetMap or is __end__. Routing to __end__.');
diff --git a/front_end/panels/ai_chat/core/Graph.ts b/front_end/panels/ai_chat/core/Graph.ts
index a0b9950b222..75dac4df5e7 100644
--- a/front_end/panels/ai_chat/core/Graph.ts
+++ b/front_end/panels/ai_chat/core/Graph.ts
@@ -21,7 +21,7 @@ import type { LLMProvider } from '../LLM/LLMTypes.js';
 const logger = createLogger('Graph');
 
 // createAgentGraph now uses the LLM SDK directly
-export function createAgentGraph(_apiKey: string | null, modelName: string, provider?: LLMProvider): CompiledGraph {
+export function createAgentGraph(_apiKey: string | null, modelName: string, provider?: LLMProvider, miniModel?: string, nanoModel?: string): CompiledGraph {
   if (!modelName) {
     throw new Error('Model name is required');
   }
@@ -34,6 +34,8 @@ export function createAgentGraph(_apiKey: string | null, modelName: string, prov
     modelName: modelName,
     temperature: 0,
     ...(provider ? { provider } : {}),
+    ...(miniModel ? { miniModel } : {}),
+    ...(nanoModel ? { nanoModel } : {}),
   };
 
   return createAgentGraphFromConfig(graphConfigWithModel);
diff --git a/front_end/panels/ai_chat/evaluation/framework/GenericToolEvaluator.ts b/front_end/panels/ai_chat/evaluation/framework/GenericToolEvaluator.ts
index 723fab6889f..2680a2f2f5d 100644
--- a/front_end/panels/ai_chat/evaluation/framework/GenericToolEvaluator.ts
+++ b/front_end/panels/ai_chat/evaluation/framework/GenericToolEvaluator.ts
@@ -3,6 +3,8 @@
 // found in the LICENSE file.
 
 import type { Tool } from '../../tools/Tools.js';
+import type { LLMContext } from '../../tools/Tools.js';
+import type { LLMProvider } from '../../LLM/LLMTypes.js';
 import { NavigateURLTool } from '../../tools/Tools.js';
 import type { TestCase, TestResult, EvaluationConfig } from './types.js';
 import { createLogger } from '../../core/Logger.js';
@@ -136,7 +138,15 @@ export class GenericToolEvaluator {
         
         const toolResult = await ErrorHandlingUtils.withErrorHandling(
           async () => {
-            return await tool.execute(testCase.input);
+            // Build LLM context for tools that require LLM calls (extraction/refinement, etc.)
+            const provider = (localStorage.getItem('ai_chat_provider') as LLMProvider | null) || 'openai';
+            const ctx: LLMContext = {
+              provider,
+              model: this.config.mainModel,
+              miniModel: this.config.miniModel,
+              nanoModel: this.config.nanoModel,
+            };
+            return await tool.execute(testCase.input, ctx);
           },
           (error) => ({ error: ErrorHandlingUtils.formatUserFriendlyError(error, 'Tool execution failed') }),
           logger,
@@ -315,4 +325,4 @@ export class GenericToolEvaluator {
   static sanitizeOutput(output: unknown): unknown {
     return SanitizationUtils.sanitizeOutput(output);
   }
-}
\ No newline at end of file
+}
diff --git a/front_end/panels/ai_chat/evaluation/framework/types.ts b/front_end/panels/ai_chat/evaluation/framework/types.ts
index ecc93d61acf..d5882a21c22 100644
--- a/front_end/panels/ai_chat/evaluation/framework/types.ts
+++ b/front_end/panels/ai_chat/evaluation/framework/types.ts
@@ -160,6 +160,11 @@ export interface EvaluationConfig {
   evaluationModel: string;
   evaluationApiKey: string;
   
+  // Model settings for tools and agents under test
+  mainModel: string;
+  miniModel: string;
+  nanoModel: string;
+  
   // Execution settings
   maxConcurrency: number;
   timeoutMs: number;
diff --git a/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts b/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
index af12d054c4a..03c390cc316 100644
--- a/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
+++ b/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
@@ -8,9 +8,12 @@ import { AgentService } from '../../core/AgentService.js';
 import { ToolRegistry } from '../../agent_framework/ConfigurableAgentTool.js';
 import type { EvaluationConfig, TestResult, TestCase } from '../framework/types.js';
 import { createLogger } from '../../core/Logger.js';
+import { LLMClient } from '../../LLM/LLMClient.js';
+import type { LLMProviderConfig } from '../../LLM/LLMClient.js';
 import { TIMING_CONSTANTS } from '../../core/Constants.js';
 import { createTracingProvider, isTracingEnabled, getTracingConfig } from '../../tracing/TracingConfig.js';
 import type { TracingProvider, TracingContext } from '../../tracing/TracingProvider.js';
+import { AIChatPanel } from '../../ui/AIChatPanel.js';
 
 const logger = createLogger('EvaluationRunner');
 
@@ -36,11 +39,20 @@ export class EvaluationRunner {
     // Use provided judge model or default
     const evaluationModel = judgeModel || 'gpt-4.1-mini';
 
+    // Get the actual models configured in the UI for tools and agents
+    // TODO: Use a more robust method to get these settings
+    const mainModel = AIChatPanel.instance().getSelectedModel();
+    const miniModel = AIChatPanel.getMiniModel();
+    const nanoModel = AIChatPanel.getNanoModel();
+
     this.config = {
       extractionModel: evaluationModel,
       extractionApiKey: apiKey,
       evaluationModel: evaluationModel,
       evaluationApiKey: apiKey,
+      mainModel,
+      miniModel,
+      nanoModel,
       maxConcurrency: 1,
       timeoutMs: TIMING_CONSTANTS.AGENT_TEST_SCHEMA_TIMEOUT,
       retries: 2,
@@ -51,6 +63,9 @@ export class EvaluationRunner {
     this.evaluator = new GenericToolEvaluator(this.config);
     this.llmEvaluator = new LLMEvaluator(this.config.evaluationApiKey, this.config.evaluationModel);
     
+    // Initialize LLM client for tools under evaluation (based on selected provider)
+    void this.#initializeLLMForEvaluation();
+
     // Initialize tracing
     this.tracingProvider = createTracingProvider();
     this.sessionId = `evaluation-session-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
@@ -65,6 +80,46 @@ export class EvaluationRunner {
     // Initialize tracing provider
     this.initializeTracing();
   }
+
+  // Initialize LLMClient with the currently selected provider for extraction tools
+  async #initializeLLMForEvaluation(): Promise<void> {
+    try {
+      const provider = (localStorage.getItem('ai_chat_provider') || 'openai') as any;
+      const providers: LLMProviderConfig[] = [];
+
+      if (provider === 'openai') {
+        const key = localStorage.getItem('ai_chat_api_key') || '';
+        if (key) providers.push({ provider: 'openai', apiKey: key });
+      } else if (provider === 'openrouter') {
+        const key = localStorage.getItem('ai_chat_openrouter_api_key') || '';
+        if (key) providers.push({ provider: 'openrouter', apiKey: key });
+      } else if (provider === 'groq') {
+        const key = localStorage.getItem('ai_chat_groq_api_key') || '';
+        if (key) providers.push({ provider: 'groq', apiKey: key });
+      } else if (provider === 'litellm') {
+        const key = localStorage.getItem('ai_chat_litellm_api_key') || '';
+        const endpoint = localStorage.getItem('ai_chat_litellm_endpoint') || '';
+        if (endpoint) providers.push({ provider: 'litellm', apiKey: key, providerURL: endpoint });
+      }
+
+      // Fallback to OpenAI if specific selection is not configured but an OpenAI key exists
+      if (!providers.length) {
+        const fallback = localStorage.getItem('ai_chat_api_key') || '';
+        if (fallback) providers.push({ provider: 'openai', apiKey: fallback });
+      }
+
+      if (!providers.length) {
+        logger.warn('LLMClient initialization skipped: no provider credentials found');
+        return;
+      }
+
+      const llm = LLMClient.getInstance();
+      await llm.initialize({ providers });
+      logger.info('LLMClient initialized for evaluation', { providerSelection: provider, providersRegistered: providers.map(p => p.provider) });
+    } catch (error) {
+      logger.warn('Failed to initialize LLM client for evaluation:', error);
+    }
+  }
   
   private async initializeTracing(): Promise<void> {
     if (isTracingEnabled()) {
@@ -398,4 +453,4 @@ export class EvaluationRunner {
 }
 
 // Export for easy access in DevTools console
-(globalThis as any).EvaluationRunner = EvaluationRunner;
\ No newline at end of file
+(globalThis as any).EvaluationRunner = EvaluationRunner;
diff --git a/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts b/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts
index fb5463434e3..b986d27d580 100644
--- a/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts
+++ b/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts
@@ -13,6 +13,7 @@ import { createLogger } from '../../core/Logger.js';
 import { TIMING_CONSTANTS } from '../../core/Constants.js';
 import { createTracingProvider, isTracingEnabled } from '../../tracing/TracingConfig.js';
 import type { TracingProvider, TracingContext } from '../../tracing/TracingProvider.js';
+import { AIChatPanel } from '../../ui/AIChatPanel.js';
 
 const logger = createLogger('VisionAgentEvaluationRunner');
 
@@ -59,11 +60,19 @@ export class VisionAgentEvaluationRunner {
     // Use provided judge model or default
     const evaluationModel = judgeModel || 'gpt-4.1-mini';
 
+    // Get the actual models configured in the UI for tools and agents
+    const mainModel = AIChatPanel.instance().getSelectedModel();
+    const miniModel = AIChatPanel.getMiniModel();
+    const nanoModel = AIChatPanel.getNanoModel();
+
     this.config = {
       extractionModel: evaluationModel,
       extractionApiKey: apiKey,
       evaluationModel: evaluationModel, 
       evaluationApiKey: apiKey,
+      mainModel,
+      miniModel,
+      nanoModel,
       maxConcurrency: 1, // Agent tools should run sequentially
       timeoutMs: TIMING_CONSTANTS.AGENT_TEST_DEFAULT_TIMEOUT,
       retries: 2,
diff --git a/front_end/panels/ai_chat/tools/CritiqueTool.ts b/front_end/panels/ai_chat/tools/CritiqueTool.ts
index 8cc8f95bd15..482b424bbb9 100644
--- a/front_end/panels/ai_chat/tools/CritiqueTool.ts
+++ b/front_end/panels/ai_chat/tools/CritiqueTool.ts
@@ -165,12 +165,12 @@ Return a JSON array of requirement statements. Example format:
 ["Requirement 1", "Requirement 2", ...]`;
 
     try {
-      if (!ctx?.provider || !(ctx.nanoModel || ctx.model)) {
-        throw new Error('Missing LLM context (provider/model) for requirements extraction');
+      if (!ctx?.provider || !ctx.nanoModel) {
+        throw new Error('Missing LLM context (provider/miniModel) for requirements extraction');
       }
       const provider = ctx.provider;
-      const model = ctx.nanoModel || ctx.model;
-      
+      const model = ctx.nanoModel;
+
       const response = await callLLMWithTracing(
         {
           provider,
@@ -272,11 +272,11 @@ Return a JSON object evaluating the plan against the requirements using this sch
 ${JSON.stringify(evaluationSchema, null, 2)}`;
 
     try {
-      if (!ctx?.provider || !(ctx.nanoModel || ctx.model)) {
-        throw new Error('Missing LLM context (provider/model) for evaluation');
+      if (!ctx?.provider || !ctx.nanoModel) {
+        throw new Error('Missing LLM context (provider/miniModel) for requirements extraction');
       }
       const provider = ctx.provider;
-      const model = ctx.nanoModel || ctx.model;
+      const model = ctx.nanoModel;
       
       const response = await callLLMWithTracing(
         {
@@ -347,11 +347,11 @@ Provide clear, actionable feedback focused on helping improve the final response
 Be concise, specific, and constructive.`;
 
     try {
-      if (!ctx?.provider || !(ctx.nanoModel || ctx.model)) {
-        throw new Error('Missing LLM context (provider/model) for feedback generation');
+      if (!ctx?.provider || !ctx.nanoModel) {
+        throw new Error('Missing LLM context (provider/miniModel) for requirements extraction');
       }
       const provider = ctx.provider;
-      const model = ctx.nanoModel || ctx.model;
+      const model = ctx.nanoModel;
       
       const response = await callLLMWithTracing(
         {
diff --git a/front_end/panels/ai_chat/tools/FullPageAccessibilityTreeToMarkdownTool.ts b/front_end/panels/ai_chat/tools/FullPageAccessibilityTreeToMarkdownTool.ts
index 08e55e68909..e2107c6d43b 100644
--- a/front_end/panels/ai_chat/tools/FullPageAccessibilityTreeToMarkdownTool.ts
+++ b/front_end/panels/ai_chat/tools/FullPageAccessibilityTreeToMarkdownTool.ts
@@ -50,11 +50,11 @@ export class FullPageAccessibilityTreeToMarkdownTool implements Tool<Record<stri
     if (!apiKey) {
       return { error: 'API key not configured.' };
     }
-    if (!ctx?.provider || !(ctx.nanoModel || ctx.model)) {
-      return { error: 'Missing LLM context (provider/model) for AccessibilityTreeToMarkdownTool' };
+    if (!ctx?.provider || !ctx.nanoModel) {
+      return { error: 'Missing LLM context (provider/miniModel) for AccessibilityTreeToMarkdownTool' };
     }
     const provider = ctx.provider;
-    const model = ctx.nanoModel || ctx.model;
+    const model = ctx.nanoModel;
 
     const prompt = `Accessibility Tree:\n\n\`\`\`\n${accessibilityTreeString}\n\`\`\``;
 
diff --git a/front_end/panels/ai_chat/tools/HTMLToMarkdownTool.ts b/front_end/panels/ai_chat/tools/HTMLToMarkdownTool.ts
index 6cbc925a8bf..f434b220104 100644
--- a/front_end/panels/ai_chat/tools/HTMLToMarkdownTool.ts
+++ b/front_end/panels/ai_chat/tools/HTMLToMarkdownTool.ts
@@ -106,11 +106,11 @@ export class HTMLToMarkdownTool implements Tool<HTMLToMarkdownArgs, HTMLToMarkdo
 
       // Call the LLM for extraction
       logger.info('Calling LLM for extraction');
-      if (!ctx?.provider || !(ctx.model || ctx.nanoModel)) {
+      if (!ctx?.provider || !ctx.nanoModel) {
         return {
           success: false,
           markdownContent: null,
-          error: 'Missing LLM context (provider/model) for HTMLToMarkdownTool'
+          error: 'Missing LLM context (provider/nanoModel) for HTMLToMarkdownTool'
         };
       }
       const extractionResult = await this.callExtractionLLM({
@@ -118,7 +118,7 @@ export class HTMLToMarkdownTool implements Tool<HTMLToMarkdownArgs, HTMLToMarkdo
         userPrompt,
         apiKey,
         provider: ctx.provider,
-        model: ctx.nanoModel || ctx.model,
+        model: ctx.nanoModel,
       });
 
       logger.info('Extraction completed successfully');
diff --git a/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts b/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts
index a7b663a1648..da381d6d702 100644
--- a/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts
+++ b/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts
@@ -451,11 +451,11 @@ CRITICAL:
 Only output the JSON object with real data from the accessibility tree.`;
 
     try {
-      if (!options.ctx?.provider || !(options.ctx.nanoModel || options.ctx.model)) {
-        throw new Error('Missing LLM context (provider/model) for extraction');
+      if (!options.ctx?.provider || !(options.ctx.nanoModel)) {
+        throw new Error('Missing LLM context (provider/nano model) for extraction');
       }
       const provider = options.ctx.provider;
-      const model = options.ctx.nanoModel || options.ctx.model;
+      const model = options.ctx.nanoModel;
       const llmResponse = await callLLMWithTracing(
         {
           provider,
diff --git a/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts b/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
index 416a4b5fbb8..d20ceaa242d 100644
--- a/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
+++ b/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
@@ -239,11 +239,11 @@ IMPORTANT: Only extract data that you can see in the accessibility tree above. D
           extractionPrompt += `\n\nIMPORTANT: Previous attempt ${attempt - 1} failed due to invalid JSON. Please ensure you return ONLY valid JSON that can be parsed. Do not hallucinate any data - only extract what actually exists in the tree.`;
         }
 
-        if (!ctx?.provider || !(ctx.miniModel || ctx.model)) {
-          throw new Error('Missing LLM context (provider/model) for streamlined extraction');
+        if (!ctx?.provider || !ctx.miniModel) {
+          throw new Error('Missing LLM context (provider/miniModel) for streamlined extraction');
         }
         const provider = ctx.provider;
-        const model = ctx.miniModel || ctx.model;
+        const model = ctx.miniModel;
         const llmResponse = await callLLMWithTracing(
           {
             provider,
@@ -383,11 +383,11 @@ Extract data according to the schema. For URL fields, return different nodeId nu
 CRITICAL: Only use nodeIds that you can actually see in the accessibility tree above. Do not invent, guess, or make up any nodeIds.`;
 
     try {
-      if (!ctx?.provider || !(ctx.miniModel || ctx.model)) {
-        throw new Error('Missing LLM context (provider/model) for URL retry extraction');
+      if (!ctx?.provider || !ctx.miniModel) {
+        throw new Error('Missing LLM context (provider/miniModel) for URL retry extraction');
       }
       const provider = ctx.provider;
-      const model = ctx.miniModel || ctx.model;
+      const model = ctx.miniModel;
       const llmResponse = await callLLMWithTracing(
         {
           provider,
diff --git a/front_end/panels/ai_chat/tools/Tools.ts b/front_end/panels/ai_chat/tools/Tools.ts
index 8469fb2fb5e..b4f2dd520df 100644
--- a/front_end/panels/ai_chat/tools/Tools.ts
+++ b/front_end/panels/ai_chat/tools/Tools.ts
@@ -811,12 +811,12 @@ export class NavigateURLTool implements Tool<{ url: string, reasoning: string },
         return false;
       }
 
-      if (!ctx?.provider || !(ctx.nanoModel || ctx.model)) {
+      if (!ctx?.provider || !ctx.nanoModel) {
         logger.warn('Missing LLM context for 404 confirmation');
         return false;
       }
       const provider = ctx.provider;
-      const model = ctx.nanoModel || ctx.model;
+      const model = ctx.nanoModel;
       const llm = LLMClient.getInstance();
       
       const systemPrompt = `You are analyzing web page content to determine if it represents a 404 "Page Not Found" error page.
@@ -1455,9 +1455,9 @@ export class WaitTool implements Tool<{ seconds?: number, duration?: number, rea
         const treeResult = await Utils.getVisibleAccessibilityTree(target);
         
         // Generate summary using LLM if ctx is available
-        if (ctx?.provider && (ctx.nanoModel || ctx.model)) {
+        if (ctx?.provider && ctx.nanoModel) {
           const provider = ctx.provider;
-          const model = ctx.nanoModel || ctx.model;
+          const model = ctx.nanoModel;
           const llm = LLMClient.getInstance();
         
         const reasonContext = waitReason ? `The wait was specifically for: ${waitReason}` : 'No specific reason was provided for the wait.';
diff --git a/front_end/panels/ai_chat/ui/AIChatPanel.ts b/front_end/panels/ai_chat/ui/AIChatPanel.ts
index 0d3f674d883..d94f5d9ee20 100644
--- a/front_end/panels/ai_chat/ui/AIChatPanel.ts
+++ b/front_end/panels/ai_chat/ui/AIChatPanel.ts
@@ -1016,7 +1016,7 @@ export class AIChatPanel extends UI.Panel.Panel {
    */
   #setupOAuthEventListeners(): void {
     // Listen for OAuth success events
-    window.addEventListener('openrouter-oauth-success', () => {
+    window.addEventListener('openrouter-oauth-success', async () => {
       logger.info('=== OAUTH SUCCESS EVENT RECEIVED IN AICHATPANEL ===');
       logger.info('Timestamp:', new Date().toISOString());
       logger.info('Current localStorage state for OpenRouter:');
@@ -1025,6 +1025,18 @@ export class AIChatPanel extends UI.Panel.Panel {
       logger.info('- API key exists:', !!apiKey);
       logger.info('- API key length:', apiKey?.length || 0);
       logger.info('- Auth method:', authMethod);
+      
+      // Auto-fetch OpenRouter models after successful OAuth
+      if (apiKey) {
+        try {
+          logger.info('Auto-fetching OpenRouter models after OAuth success...');
+          await this.#autoFetchOpenRouterModels(apiKey);
+          logger.info('Successfully auto-fetched OpenRouter models');
+        } catch (error) {
+          logger.warn('Failed to auto-fetch OpenRouter models after OAuth:', error);
+        }
+      }
+      
       logger.info('Re-initializing agent service after OAuth success...');
       this.#initializeAgentService();
     });
@@ -1520,6 +1532,45 @@ export class AIChatPanel extends UI.Panel.Panel {
     });
   }
 
+  /**
+   * Auto-fetch OpenRouter models after successful OAuth authentication
+   */
+  async #autoFetchOpenRouterModels(apiKey: string): Promise<void> {
+    try {
+      logger.debug('Fetching OpenRouter models automatically after OAuth...');
+      
+      // Import LLMClient and SettingsDialog dynamically to fetch and update models
+      const [{ LLMClient }, { SettingsDialog }] = await Promise.all([
+        import('../LLM/LLMClient.js'),
+        import('./SettingsDialog.js')
+      ]);
+      
+      const openrouterModels = await LLMClient.fetchOpenRouterModels(apiKey);
+      logger.debug(`Auto-fetched ${openrouterModels.length} OpenRouter models`);
+      
+      // Update models programmatically via SettingsDialog static method
+      SettingsDialog.updateOpenRouterModels(openrouterModels);
+      
+      // Also update AIChatPanel's model options for immediate UI availability
+      const modelOptions: ModelOption[] = openrouterModels.map(model => ({
+        value: model.id,
+        label: model.name || model.id,
+        type: 'openrouter' as const,
+      }));
+      AIChatPanel.updateModelOptions(modelOptions, false);
+      this.performUpdate();
+      
+      // Also dispatch event for backward compatibility / other listeners
+      window.dispatchEvent(new CustomEvent('openrouter-models-fetched', {
+        detail: { models: openrouterModels }
+      }));
+      
+    } catch (error) {
+      logger.error('Failed to auto-fetch OpenRouter models:', error);
+      throw error;
+    }
+  }
+
   /**
    * Handle manual setup request from ChatView
    */
diff --git a/front_end/panels/ai_chat/ui/SettingsDialog.ts b/front_end/panels/ai_chat/ui/SettingsDialog.ts
index 91f7739c2c1..3a50983a08a 100644
--- a/front_end/panels/ai_chat/ui/SettingsDialog.ts
+++ b/front_end/panels/ai_chat/ui/SettingsDialog.ts
@@ -28,6 +28,9 @@ const LITELLM_API_KEY_STORAGE_KEY = 'ai_chat_litellm_api_key';
 const GROQ_API_KEY_STORAGE_KEY = 'ai_chat_groq_api_key';
 const OPENROUTER_API_KEY_STORAGE_KEY = 'ai_chat_openrouter_api_key';
 const PROVIDER_SELECTION_KEY = 'ai_chat_provider';
+
+// Cache constants
+const OPENROUTER_MODELS_CACHE_DURATION_MS = 60 * 60 * 1000; // 60 minutes
 // Vector DB configuration keys - Milvus format
 const VECTOR_DB_ENABLED_KEY = 'ai_chat_vector_db_enabled';
 const MILVUS_ENDPOINT_KEY = 'ai_chat_milvus_endpoint';
@@ -556,6 +559,7 @@ export class SettingsDialog {
               type: 'openrouter' as const
             }));
             updateModelOptions(modelOptions, false);
+            localStorage.setItem('openrouter_models_cache_timestamp', Date.now().toString());
             logger.debug('Successfully refreshed OpenRouter models after provider change');
           } catch (error) {
             logger.error('Failed to fetch OpenRouter models after provider change:', error);
@@ -581,7 +585,7 @@ export class SettingsDialog {
         updateGroqModelSelectors();
       } else if (selectedProvider === 'openrouter') {
         // Update OpenRouter selectors
-        updateOpenRouterModelSelectors();
+        await updateOpenRouterModelSelectors();
       }
     });
     
@@ -1683,10 +1687,75 @@ export class SettingsDialog {
       fetchOpenRouterModelsButton.disabled = !openrouterApiKeyInput.value.trim();
     });
     
+    // Function to check cache age and auto-refresh OpenRouter models if stale
+    async function checkAndRefreshOpenRouterCache(): Promise<void> {
+      try {
+        const cacheTimestamp = localStorage.getItem('openrouter_models_cache_timestamp');
+        const now = Date.now();
+        
+        // If no timestamp, cache is considered stale
+        if (!cacheTimestamp) {
+          logger.debug('OpenRouter models cache has no timestamp, considering stale');
+          await autoRefreshOpenRouterModels();
+          return;
+        }
+        
+        const cacheAge = now - parseInt(cacheTimestamp, 10);
+        const isStale = cacheAge > OPENROUTER_MODELS_CACHE_DURATION_MS;
+        
+        if (isStale) {
+          const ageMinutes = Math.round(cacheAge / (1000 * 60));
+          logger.debug(`OpenRouter models cache is stale (${ageMinutes} minutes old), auto-refreshing...`);
+          await autoRefreshOpenRouterModels();
+        } else {
+          const remainingMinutes = Math.round((OPENROUTER_MODELS_CACHE_DURATION_MS - cacheAge) / (1000 * 60));
+          logger.debug(`OpenRouter models cache is fresh (expires in ${remainingMinutes} minutes)`);
+        }
+      } catch (error) {
+        logger.warn('Failed to check OpenRouter models cache age:', error);
+      }
+    }
+
+    // Function to auto-refresh OpenRouter models silently
+    async function autoRefreshOpenRouterModels(): Promise<void> {
+      try {
+        const openrouterApiKey = openrouterApiKeyInput.value.trim();
+        
+        if (!openrouterApiKey) {
+          logger.debug('No OpenRouter API key available for auto-refresh');
+          return;
+        }
+        
+        logger.debug('Auto-refreshing OpenRouter models...');
+        const openrouterModels = await LLMClient.fetchOpenRouterModels(openrouterApiKey);
+        
+        // Convert OpenRouter models to ModelOption format
+        const modelOptions: ModelOption[] = openrouterModels.map(model => ({
+          value: model.id,
+          label: model.name || model.id,
+          type: 'openrouter' as const
+        }));
+        
+        // Store in localStorage with timestamp
+        localStorage.setItem('openrouter_models_cache', JSON.stringify(modelOptions));
+        localStorage.setItem('openrouter_models_cache_timestamp', Date.now().toString());
+        
+        // Also update global model options so UI immediately sees models
+        updateModelOptions(modelOptions, false);
+        
+        logger.debug(`Auto-refreshed ${modelOptions.length} OpenRouter models`);
+      } catch (error) {
+        logger.warn('Failed to auto-refresh OpenRouter models:', error);
+      }
+    }
+
     // Function to update OpenRouter model selectors
-    function updateOpenRouterModelSelectors() {
+    async function updateOpenRouterModelSelectors() {
       logger.debug('Updating OpenRouter model selectors');
       
+      // Check if OpenRouter models cache is stale and auto-refresh if needed
+      await checkAndRefreshOpenRouterCache();
+      
       // Get the latest model options filtered for OpenRouter provider
       const openrouterModels = getModelOptions('openrouter');
       logger.debug('OpenRouter models from getModelOptions:', openrouterModels);
@@ -1755,10 +1824,13 @@ export class SettingsDialog {
         // Update model options with fetched OpenRouter models
         updateModelOptions(modelOptions, false);
         
+        // Update timestamp for cache management
+        localStorage.setItem('openrouter_models_cache_timestamp', Date.now().toString());
+        
         const actualModelCount = openrouterModels.length;
         
         // Update the model selectors with the new models
-        updateOpenRouterModelSelectors();
+        await updateOpenRouterModelSelectors();
         
         // Update status to show success
         fetchOpenRouterModelsStatus.textContent = i18nString(UIStrings.fetchedModels, {PH1: actualModelCount});
@@ -1782,7 +1854,7 @@ export class SettingsDialog {
     });
     
     // Initialize OpenRouter model selectors
-    updateOpenRouterModelSelectors();
+    await updateOpenRouterModelSelectors();
     
     // Add Vector DB configuration section
     const vectorDBSection = document.createElement('div');
@@ -3101,6 +3173,36 @@ export class SettingsDialog {
     
     return Promise.resolve();
   }
+
+  /**
+   * Static method to update OpenRouter models programmatically (called after OAuth success)
+   */
+  static updateOpenRouterModels(openrouterModels: any[]): void {
+    try {
+      logger.debug('Updating OpenRouter models programmatically...', openrouterModels.length);
+      
+      // Convert OpenRouter models to ModelOption format
+      const modelOptions: ModelOption[] = openrouterModels.map(model => ({
+        value: model.id,
+        label: model.name || model.id,
+        type: 'openrouter' as const
+      }));
+      
+      // Store models in localStorage cache for the model management system to pick up
+      localStorage.setItem('openrouter_models_cache', JSON.stringify(modelOptions));
+      localStorage.setItem('openrouter_models_cache_timestamp', Date.now().toString());
+      
+      // Dispatch event to notify model management system to refresh
+      window.dispatchEvent(new CustomEvent('openrouter-models-updated', {
+        detail: { models: modelOptions, source: 'oauth' }
+      }));
+      
+      logger.debug('Successfully cached OpenRouter models and dispatched update event');
+      
+    } catch (error) {
+      logger.error('Failed to update OpenRouter models programmatically:', error);
+    }
+  }
 }
 
 // Helper function to create a model selector
@@ -3156,4 +3258,4 @@ function createModelSelector(
   }
   
   return modelSelect;
-}
\ No newline at end of file
+}
diff --git a/front_end/panels/ai_chat/ui/__tests__/SettingsDialogOpenRouterCache.test.ts b/front_end/panels/ai_chat/ui/__tests__/SettingsDialogOpenRouterCache.test.ts
new file mode 100644
index 00000000000..3f7cc6b0df9
--- /dev/null
+++ b/front_end/panels/ai_chat/ui/__tests__/SettingsDialogOpenRouterCache.test.ts
@@ -0,0 +1,259 @@
+// Copyright 2025 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import {SettingsDialog} from '../SettingsDialog.js';
+import {LLMClient} from '../../LLM/LLMClient.js';
+
+describe('SettingsDialog OpenRouter Cache Auto-Refresh', () => {
+  let mockLocalStorage: Map<string, string>;
+  let originalDateNow: () => number;
+  let mockCurrentTime: number;
+  let fetchOpenRouterModelsCalls: any[];
+
+  // Cache duration constant (60 minutes in milliseconds)
+  const CACHE_DURATION_MS = 60 * 60 * 1000;
+
+  beforeEach(() => {
+    // Mock localStorage
+    mockLocalStorage = new Map();
+    Object.defineProperty(window, 'localStorage', {
+      value: {
+        getItem: (key: string) => mockLocalStorage.get(key) || null,
+        setItem: (key: string, value: string) => mockLocalStorage.set(key, value),
+        removeItem: (key: string) => mockLocalStorage.delete(key),
+        clear: () => mockLocalStorage.clear(),
+      },
+      writable: true,
+    });
+
+    // Mock Date.now() for time-based tests
+    originalDateNow = Date.now;
+    mockCurrentTime = 1640995200000; // January 1, 2022 00:00:00 UTC
+    Date.now = () => mockCurrentTime;
+
+    // Mock LLMClient.fetchOpenRouterModels
+    fetchOpenRouterModelsCalls = [];
+    LLMClient.fetchOpenRouterModels = async (apiKey: string) => {
+      fetchOpenRouterModelsCalls.push({ apiKey, timestamp: mockCurrentTime });
+      return [
+        { id: 'openai/gpt-4', name: 'GPT-4' },
+        { id: 'anthropic/claude-3-sonnet', name: 'Claude 3 Sonnet' },
+        { id: 'meta-llama/llama-2-70b-chat', name: 'Llama 2 70B' },
+      ];
+    };
+  });
+
+  afterEach(() => {
+    // Restore original Date.now
+    Date.now = originalDateNow;
+    mockLocalStorage.clear();
+    fetchOpenRouterModelsCalls = [];
+  });
+
+  describe('Cache Timestamp Setting', () => {
+    it('should set timestamp when models are fetched via updateOpenRouterModels', () => {
+      const mockModels = [
+        { id: 'openai/gpt-4', name: 'GPT-4' },
+        { id: 'anthropic/claude-3-sonnet', name: 'Claude 3 Sonnet' },
+      ];
+
+      SettingsDialog.updateOpenRouterModels(mockModels);
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      assert.strictEqual(timestamp, mockCurrentTime.toString());
+    });
+
+    it('should set timestamp when models are stored in cache', () => {
+      const mockModels = [
+        { value: 'openai/gpt-4', label: 'GPT-4', type: 'openrouter' as const },
+        { value: 'anthropic/claude-3-sonnet', label: 'Claude 3 Sonnet', type: 'openrouter' as const },
+      ];
+
+      mockLocalStorage.set('openrouter_models_cache', JSON.stringify(mockModels));
+      SettingsDialog.updateOpenRouterModels(mockModels.map(m => ({ id: m.value, name: m.label })));
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      assert.strictEqual(timestamp, mockCurrentTime.toString());
+    });
+  });
+
+  describe('Cache Staleness Detection', () => {
+    it('should consider cache stale when no timestamp exists', () => {
+      // Set up cache without timestamp
+      const mockModels = [
+        { value: 'openai/gpt-4', label: 'GPT-4', type: 'openrouter' as const },
+      ];
+      mockLocalStorage.set('openrouter_models_cache', JSON.stringify(mockModels));
+      // No timestamp set
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      assert.isNull(timestamp);
+    });
+
+    it('should consider cache fresh when less than 60 minutes old', () => {
+      const cacheTime = mockCurrentTime - (30 * 60 * 1000); // 30 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', cacheTime.toString());
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      
+      assert.strictEqual(cacheAge < CACHE_DURATION_MS, true);
+    });
+
+    it('should consider cache stale when older than 60 minutes', () => {
+      const cacheTime = mockCurrentTime - (90 * 60 * 1000); // 90 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', cacheTime.toString());
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      
+      assert.strictEqual(cacheAge > CACHE_DURATION_MS, true);
+    });
+  });
+
+  describe('Auto-Refresh Behavior', () => {
+    it('should not trigger fetch when cache is fresh', async () => {
+      const freshCacheTime = mockCurrentTime - (30 * 60 * 1000); // 30 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', freshCacheTime.toString());
+      mockLocalStorage.set('ai_chat_openrouter_api_key', 'test-api-key');
+
+      // The actual auto-refresh logic would be tested through the settings dialog
+      // For now, we test the timestamp comparison logic
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      const shouldRefresh = cacheAge > CACHE_DURATION_MS;
+      
+      assert.strictEqual(shouldRefresh, false);
+    });
+
+    it('should trigger fetch when cache is stale', async () => {
+      const staleCacheTime = mockCurrentTime - (90 * 60 * 1000); // 90 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', staleCacheTime.toString());
+      mockLocalStorage.set('ai_chat_openrouter_api_key', 'test-api-key');
+
+      // Test timestamp comparison logic
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      const shouldRefresh = cacheAge > CACHE_DURATION_MS;
+      
+      assert.strictEqual(shouldRefresh, true);
+    });
+
+    it('should handle missing API key gracefully', () => {
+      const staleCacheTime = mockCurrentTime - (90 * 60 * 1000); // 90 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', staleCacheTime.toString());
+      // No API key set
+
+      const apiKey = mockLocalStorage.get('ai_chat_openrouter_api_key');
+      assert.isNull(apiKey);
+      
+      // Should not attempt fetch without API key
+      const shouldAttemptFetch = !!apiKey;
+      assert.strictEqual(shouldAttemptFetch, false);
+    });
+  });
+
+  describe('Cache Age Calculation', () => {
+    it('should correctly calculate cache age in minutes', () => {
+      const cacheTime = mockCurrentTime - (45 * 60 * 1000); // 45 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', cacheTime.toString());
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      const ageInMinutes = Math.round(cacheAge / (1000 * 60));
+      
+      assert.strictEqual(ageInMinutes, 45);
+    });
+
+    it('should correctly calculate remaining cache time', () => {
+      const cacheTime = mockCurrentTime - (30 * 60 * 1000); // 30 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', cacheTime.toString());
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      const remainingTime = CACHE_DURATION_MS - cacheAge;
+      const remainingMinutes = Math.round(remainingTime / (1000 * 60));
+      
+      assert.strictEqual(remainingMinutes, 30);
+    });
+  });
+
+  describe('Time Advancement Tests', () => {
+    it('should detect when cache becomes stale over time', () => {
+      // Set cache as fresh initially
+      const initialTime = mockCurrentTime;
+      mockLocalStorage.set('openrouter_models_cache_timestamp', initialTime.toString());
+
+      // Verify cache is fresh
+      let timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      let cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      assert.strictEqual(cacheAge < CACHE_DURATION_MS, true);
+
+      // Advance time by 90 minutes
+      mockCurrentTime += (90 * 60 * 1000);
+
+      // Verify cache is now stale
+      timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      assert.strictEqual(cacheAge > CACHE_DURATION_MS, true);
+    });
+
+    it('should handle edge case at exact cache duration boundary', () => {
+      const cacheTime = mockCurrentTime - CACHE_DURATION_MS; // Exactly 60 minutes ago
+      mockLocalStorage.set('openrouter_models_cache_timestamp', cacheTime.toString());
+
+      const timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      
+      // Should be considered stale when age equals duration
+      assert.strictEqual(cacheAge >= CACHE_DURATION_MS, true);
+    });
+  });
+
+  describe('Integration Scenarios', () => {
+    it('should maintain cache consistency across multiple operations', () => {
+      // Initial fetch and cache
+      SettingsDialog.updateOpenRouterModels([
+        { id: 'openai/gpt-4', name: 'GPT-4' },
+      ]);
+
+      let timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      assert.strictEqual(timestamp, mockCurrentTime.toString());
+
+      // Advance time slightly (cache still fresh)
+      mockCurrentTime += (30 * 60 * 1000); // 30 minutes
+
+      // Verify cache is still considered fresh
+      timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      const cacheAge = mockCurrentTime - parseInt(timestamp!, 10);
+      assert.strictEqual(cacheAge < CACHE_DURATION_MS, true);
+
+      // Update models again
+      SettingsDialog.updateOpenRouterModels([
+        { id: 'openai/gpt-4', name: 'GPT-4' },
+        { id: 'anthropic/claude-3-sonnet', name: 'Claude 3 Sonnet' },
+      ]);
+
+      // Timestamp should be updated to current time
+      timestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+      assert.strictEqual(timestamp, mockCurrentTime.toString());
+    });
+
+    it('should handle rapid consecutive operations correctly', () => {
+      const startTime = mockCurrentTime;
+
+      // First operation
+      SettingsDialog.updateOpenRouterModels([{ id: 'model1', name: 'Model 1' }]);
+      let firstTimestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+
+      // Second operation immediately after (same timestamp due to mocking)
+      SettingsDialog.updateOpenRouterModels([{ id: 'model2', name: 'Model 2' }]);
+      let secondTimestamp = mockLocalStorage.get('openrouter_models_cache_timestamp');
+
+      assert.strictEqual(firstTimestamp, startTime.toString());
+      assert.strictEqual(secondTimestamp, startTime.toString());
+      assert.strictEqual(firstTimestamp, secondTimestamp);
+    });
+  });
+});
\ No newline at end of file

From a3d3ff1c137550179240b3d425fdd83741420b8a Mon Sep 17 00:00:00 2001
From: Tyson Thomas <tysonthomas9@users.noreply.github.com>
Date: Mon, 8 Sep 2025 18:52:19 -0700
Subject: [PATCH 2/4] more refactor

---
 front_end/panels/ai_chat/core/AgentService.ts | 11 ++-----
 .../framework/judges/LLMEvaluator.ts          | 32 ++++++++++++++----
 .../evaluation/remote/EvaluationAgent.ts      | 25 +++++++++-----
 .../evaluation/runner/EvaluationRunner.ts     | 21 ++++++------
 .../runner/VisionAgentEvaluationRunner.ts     | 25 +++++++-------
 front_end/panels/ai_chat/ui/AIChatPanel.ts    | 14 ++++++--
 .../panels/ai_chat/ui/EvaluationDialog.ts     | 33 ++++++++++++++++---
 7 files changed, 108 insertions(+), 53 deletions(-)

diff --git a/front_end/panels/ai_chat/core/AgentService.ts b/front_end/panels/ai_chat/core/AgentService.ts
index 31aa8777dd9..f9d775204a9 100644
--- a/front_end/panels/ai_chat/core/AgentService.ts
+++ b/front_end/panels/ai_chat/core/AgentService.ts
@@ -18,7 +18,6 @@ import type { TracingProvider, TracingContext } from '../tracing/TracingProvider
 import { AgentRunnerEventBus } from '../agent_framework/AgentRunnerEventBus.js';
 import { AgentRunner } from '../agent_framework/AgentRunner.js';
 import type { AgentSession, AgentMessage } from '../agent_framework/AgentSessionTypes.js';
-import { AIChatPanel } from '../ui/AIChatPanel.js';
 import type { LLMProvider } from '../LLM/LLMTypes.js';
 
 const logger = createLogger('AgentService');
@@ -166,13 +165,9 @@ export class AgentService extends Common.ObjectWrapper.ObjectWrapper<{
   /**
    * Initializes the agent with the given API key
    */
-  async initialize(apiKey: string | null, modelName?: string): Promise<void> {
+  async initialize(apiKey: string | null, modelName: string, miniModel: string, nanoModel: string): Promise<void> {
     try {
       this.#apiKey = apiKey;
-
-      if (!modelName) {
-        throw new Error('Model name is required for initialization');
-      }
       
       // Initialize LLM client first
       await this.#initializeLLMClient();
@@ -197,9 +192,7 @@ export class AgentService extends Common.ObjectWrapper.ObjectWrapper<{
       // Determine selected provider for primary graph execution
       const selectedProvider = (localStorage.getItem('ai_chat_provider') || 'openai') as LLMProvider;
 
-      // Get mini and nano models for tool execution
-      const miniModel = AIChatPanel.getMiniModel();
-      const nanoModel = AIChatPanel.getNanoModel();
+      // Mini and nano models are injected by caller (validated upstream)
 
       // Will throw error if model/provider configuration is invalid
       this.#graph = createAgentGraph(apiKey, modelName, selectedProvider, miniModel, nanoModel);
diff --git a/front_end/panels/ai_chat/evaluation/framework/judges/LLMEvaluator.ts b/front_end/panels/ai_chat/evaluation/framework/judges/LLMEvaluator.ts
index cdbfd9e14aa..efab6a44458 100644
--- a/front_end/panels/ai_chat/evaluation/framework/judges/LLMEvaluator.ts
+++ b/front_end/panels/ai_chat/evaluation/framework/judges/LLMEvaluator.ts
@@ -9,7 +9,6 @@ import { ErrorHandlingUtils } from '../../utils/ErrorHandlingUtils.js';
 import { PromptTemplates } from '../../utils/PromptTemplates.js';
 import { ResponseParsingUtils } from '../../utils/ResponseParsingUtils.js';
 import type { ScreenshotData, VisionMessage, TextContent, ImageContent } from '../../utils/EvaluationTypes.js';
-import { AIChatPanel } from '../../../ui/AIChatPanel.js';
 
 const logger = createLogger('LLMEvaluator');
 
@@ -21,11 +20,10 @@ export class LLMEvaluator {
   private apiKey: string;
   private defaultModel: string;
 
-  constructor(apiKey: string, defaultModel?: string) {
+  constructor(apiKey: string, defaultModel: string) {
     this.apiKey = apiKey;
-    // Use the provided model, or fall back to saved judge model, or finally to mini model
-    const JUDGE_MODEL_STORAGE_KEY = 'ai_chat_judge_model';
-    this.defaultModel = defaultModel || localStorage.getItem(JUDGE_MODEL_STORAGE_KEY) || AIChatPanel.getMiniModel();
+    // Model must be provided by caller; no fallbacks
+    this.defaultModel = defaultModel;
   }
 
 
@@ -89,8 +87,9 @@ export class LLMEvaluator {
     for (let attempt = 1; attempt <= maxRetries; attempt++) {
       try {
         const llm = LLMClient.getInstance();
+        const provider = await this.#getProviderForModel(model);
         const llmResponse = await llm.call({
-          provider: AIChatPanel.getProviderForModel(model),
+          provider,
           model: model,
           messages: [
             { role: 'system', content: PromptTemplates.buildSystemPrompt({ hasVision: false }) },
@@ -124,6 +123,25 @@ export class LLMEvaluator {
     throw lastError;
   }
 
+  /**
+   * Resolve the provider for a given model using the LLM registry.
+   * Falls back to the currently selected provider if unknown.
+   */
+  async #getProviderForModel(modelId: string): Promise<'openai' | 'litellm' | 'groq' | 'openrouter'> {
+    try {
+      const llm = LLMClient.getInstance();
+      const models = await llm.getAvailableModels();
+      const found = models.find(m => m.id === modelId);
+      if (found) {
+        return found.provider as any;
+      }
+    } catch {
+      // ignore and fall through to fallback
+    }
+    // Fallback: current selection or openai
+    return (localStorage.getItem('ai_chat_provider') || 'openai') as any;
+  }
+
   /**
    * Vision-enhanced evaluation using multimodal LLM
    */
@@ -342,4 +360,4 @@ export class LLMEvaluator {
 
     return evaluations;
   }
-}
\ No newline at end of file
+}
diff --git a/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts b/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
index e739fd36e50..56ff739b9ff 100644
--- a/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
+++ b/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
@@ -10,7 +10,6 @@ import { createLogger } from '../../core/Logger.js';
 import { createTracingProvider, withTracingContext, isTracingEnabled, getTracingConfig } from '../../tracing/TracingConfig.js';
 import type { TracingProvider, TracingContext } from '../../tracing/TracingProvider.js';
 import type { ChatMessage } from '../../models/ChatTypes.js';
-import { AIChatPanel } from '../../ui/AIChatPanel.js';
 import {
   RegisterMessage,
   ReadyMessage,
@@ -40,6 +39,10 @@ export interface EvaluationAgentOptions {
   clientId: string;
   endpoint: string;
   secretKey?: string;
+  // Explicit models to avoid UI coupling
+  judgeModel: string;
+  miniModel: string;
+  nanoModel: string;
 }
 
 
@@ -56,11 +59,17 @@ export class EvaluationAgent {
   private authResolve: ((value?: void) => void) | null = null;
   private authReject: ((reason?: any) => void) | null = null;
   private tracingProvider: TracingProvider;
+  private judgeModel: string;
+  private miniModel: string;
+  private nanoModel: string;
 
   constructor(options: EvaluationAgentOptions) {
     this.clientId = options.clientId;
     this.endpoint = options.endpoint;
     this.secretKey = options.secretKey;
+    this.judgeModel = options.judgeModel;
+    this.miniModel = options.miniModel;
+    this.nanoModel = options.nanoModel;
     this.tracingProvider = createTracingProvider();
     
     logger.info('EvaluationAgent created with tracing provider', {
@@ -701,12 +710,10 @@ export class EvaluationAgent {
         // Get or create AgentService instance
         const agentService = AgentService.getInstance();
         
-        // Use the current model from localStorage (no override)
-        let modelName = localStorage.getItem('ai_chat_model_selection');
-        if (!modelName) {
-          // Default model
-          modelName = 'gpt-4o';
-        }
+        // Use explicit models from constructor
+        const modelName = this.judgeModel;
+        const miniModel = this.miniModel;
+        const nanoModel = this.nanoModel;
         
         logger.info('Initializing AgentService for chat evaluation', {
           modelName,
@@ -714,8 +721,8 @@ export class EvaluationAgent {
           isInitialized: agentService.isInitialized()
         });
         
-        // Always reinitialize with the current model
-        await agentService.initialize(agentService.getApiKey(), modelName);
+        // Always reinitialize with the current model and explicit mini/nano
+        await agentService.initialize(agentService.getApiKey(), modelName, miniModel, nanoModel);
         
         // Create a child observation for the chat execution
         if (tracingContext) {
diff --git a/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts b/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
index 03c390cc316..099a7b1b770 100644
--- a/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
+++ b/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
@@ -13,13 +13,19 @@ import type { LLMProviderConfig } from '../../LLM/LLMClient.js';
 import { TIMING_CONSTANTS } from '../../core/Constants.js';
 import { createTracingProvider, isTracingEnabled, getTracingConfig } from '../../tracing/TracingConfig.js';
 import type { TracingProvider, TracingContext } from '../../tracing/TracingProvider.js';
-import { AIChatPanel } from '../../ui/AIChatPanel.js';
 
 const logger = createLogger('EvaluationRunner');
 
 /**
  * Example runner for the evaluation framework
  */
+export interface EvaluationRunnerOptions {
+  judgeModel: string;
+  mainModel: string;
+  miniModel: string;
+  nanoModel: string;
+}
+
 export class EvaluationRunner {
   private evaluator: GenericToolEvaluator;
   private llmEvaluator: LLMEvaluator;
@@ -27,7 +33,7 @@ export class EvaluationRunner {
   private tracingProvider: TracingProvider;
   private sessionId: string;
 
-  constructor(judgeModel?: string) {
+  constructor(options: EvaluationRunnerOptions) {
     // Get API key from AgentService
     const agentService = AgentService.getInstance();
     const apiKey = agentService.getApiKey();
@@ -36,14 +42,9 @@ export class EvaluationRunner {
       throw new Error('API key not configured. Please configure in AI Chat settings.');
     }
 
-    // Use provided judge model or default
-    const evaluationModel = judgeModel || 'gpt-4.1-mini';
-
-    // Get the actual models configured in the UI for tools and agents
-    // TODO: Use a more robust method to get these settings
-    const mainModel = AIChatPanel.instance().getSelectedModel();
-    const miniModel = AIChatPanel.getMiniModel();
-    const nanoModel = AIChatPanel.getNanoModel();
+    // Require explicit models from caller
+    const { judgeModel, mainModel, miniModel, nanoModel } = options;
+    const evaluationModel = judgeModel;
 
     this.config = {
       extractionModel: evaluationModel,
diff --git a/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts b/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts
index b986d27d580..fb90173f5a7 100644
--- a/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts
+++ b/front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.ts
@@ -13,7 +13,6 @@ import { createLogger } from '../../core/Logger.js';
 import { TIMING_CONSTANTS } from '../../core/Constants.js';
 import { createTracingProvider, isTracingEnabled } from '../../tracing/TracingConfig.js';
 import type { TracingProvider, TracingContext } from '../../tracing/TracingProvider.js';
-import { AIChatPanel } from '../../ui/AIChatPanel.js';
 
 const logger = createLogger('VisionAgentEvaluationRunner');
 
@@ -40,6 +39,14 @@ export interface VisionTestCase extends TestCase {
  * Unified agent evaluation runner that supports both standard and vision-based evaluation
  * This replaces AgentEvaluationRunner when vision capabilities are needed
  */
+export interface VisionRunnerOptions {
+  visionEnabled?: boolean;
+  judgeModel: string;
+  mainModel: string;
+  miniModel: string;
+  nanoModel: string;
+}
+
 export class VisionAgentEvaluationRunner {
   
   private llmEvaluator: LLMEvaluator;
@@ -48,7 +55,7 @@ export class VisionAgentEvaluationRunner {
   private globalVisionEnabled: boolean;
   private tracingProvider: TracingProvider;
 
-  constructor(visionEnabled: boolean = false, judgeModel?: string) {
+  constructor(options: VisionRunnerOptions) {
     // Get API key from AgentService
     const agentService = AgentService.getInstance();
     const apiKey = agentService.getApiKey();
@@ -57,13 +64,9 @@ export class VisionAgentEvaluationRunner {
       throw new Error('API key not configured. Please configure in AI Chat settings.');
     }
 
-    // Use provided judge model or default
-    const evaluationModel = judgeModel || 'gpt-4.1-mini';
-
-    // Get the actual models configured in the UI for tools and agents
-    const mainModel = AIChatPanel.instance().getSelectedModel();
-    const miniModel = AIChatPanel.getMiniModel();
-    const nanoModel = AIChatPanel.getNanoModel();
+    // Require explicit models from caller
+    const { judgeModel, mainModel, miniModel, nanoModel } = options;
+    const evaluationModel = judgeModel;
 
     this.config = {
       extractionModel: evaluationModel,
@@ -82,7 +85,7 @@ export class VisionAgentEvaluationRunner {
 
     this.llmEvaluator = new LLMEvaluator(this.config.evaluationApiKey, this.config.evaluationModel);
     this.screenshotTool = new TakeScreenshotTool();
-    this.globalVisionEnabled = visionEnabled;
+    this.globalVisionEnabled = Boolean(options.visionEnabled);
     this.tracingProvider = createTracingProvider();
   }
 
@@ -467,4 +470,4 @@ export class VisionAgentEvaluationRunner {
     this.globalVisionEnabled = enabled;
     logger.info(`Global vision mode: ${enabled ? 'ENABLED' : 'DISABLED'}`);
   }
-}
\ No newline at end of file
+}
diff --git a/front_end/panels/ai_chat/ui/AIChatPanel.ts b/front_end/panels/ai_chat/ui/AIChatPanel.ts
index d94f5d9ee20..95761de4ad8 100644
--- a/front_end/panels/ai_chat/ui/AIChatPanel.ts
+++ b/front_end/panels/ai_chat/ui/AIChatPanel.ts
@@ -1282,7 +1282,10 @@ export class AIChatPanel extends UI.Panel.Panel {
         this.#evaluationAgent = new EvaluationAgent({
           clientId: compositeClientId,
           endpoint: config.endpoint,
-          secretKey: config.secretKey
+          secretKey: config.secretKey,
+          judgeModel: this.#selectedModel,
+          miniModel: this.#miniModel,
+          nanoModel: this.#nanoModel,
         });
 
         await this.#evaluationAgent.connect();
@@ -1351,7 +1354,14 @@ export class AIChatPanel extends UI.Panel.Panel {
     
     // Initialize the agent service
     logger.info('Calling agentService.initialize()...');
-    this.#agentService.initialize(apiKey, this.#selectedModel)
+    const miniForInit = this.#miniModel || this.#selectedModel;
+    const nanoForInit = this.#nanoModel || miniForInit;
+    this.#agentService.initialize(
+        apiKey,
+        this.#selectedModel,
+        miniForInit,
+        nanoForInit,
+      )
       .then(() => {
         logger.info('✅ Agent service initialized successfully');
         this.#setCanSendMessagesState(true, "Agent service initialized successfully");
diff --git a/front_end/panels/ai_chat/ui/EvaluationDialog.ts b/front_end/panels/ai_chat/ui/EvaluationDialog.ts
index 85e1ba98bef..e4129b63ba9 100644
--- a/front_end/panels/ai_chat/ui/EvaluationDialog.ts
+++ b/front_end/panels/ai_chat/ui/EvaluationDialog.ts
@@ -199,13 +199,25 @@ export class EvaluationDialog {
     
     // Initialize evaluation runners
     try {
-      this.#evaluationRunner = new EvaluationRunner(this.#state.judgeModel);
+      // Inject current UI-selected models into the runner to decouple from UI internals
+      this.#evaluationRunner = new EvaluationRunner({
+        judgeModel: this.#state.judgeModel,
+        mainModel: AIChatPanel.instance().getSelectedModel(),
+        miniModel: AIChatPanel.getMiniModel(),
+        nanoModel: AIChatPanel.getNanoModel(),
+      });
     } catch (error) {
       logger.error('Failed to initialize evaluation runner:', error);
     }
     
     try {
-      this.#agentEvaluationRunner = new VisionAgentEvaluationRunner(this.#state.visionEnabled, this.#state.judgeModel);
+      this.#agentEvaluationRunner = new VisionAgentEvaluationRunner({
+        visionEnabled: this.#state.visionEnabled,
+        judgeModel: this.#state.judgeModel,
+        mainModel: AIChatPanel.instance().getSelectedModel(),
+        miniModel: AIChatPanel.getMiniModel(),
+        nanoModel: AIChatPanel.getNanoModel(),
+      });
     } catch (error) {
       logger.error('Failed to initialize agent evaluation runner:', error);
     }
@@ -885,13 +897,24 @@ export class EvaluationDialog {
       
       // Reinitialize evaluation runners with new model
       try {
-        this.#evaluationRunner = new EvaluationRunner(this.#state.judgeModel);
+        this.#evaluationRunner = new EvaluationRunner({
+          judgeModel: this.#state.judgeModel,
+          mainModel: AIChatPanel.instance().getSelectedModel(),
+          miniModel: AIChatPanel.getMiniModel(),
+          nanoModel: AIChatPanel.getNanoModel(),
+        });
       } catch (error) {
         logger.error('Failed to reinitialize evaluation runner:', error);
       }
       
       try {
-        this.#agentEvaluationRunner = new VisionAgentEvaluationRunner(this.#state.visionEnabled, this.#state.judgeModel);
+        this.#agentEvaluationRunner = new VisionAgentEvaluationRunner({
+          visionEnabled: this.#state.visionEnabled,
+          judgeModel: this.#state.judgeModel,
+          mainModel: AIChatPanel.instance().getSelectedModel(),
+          miniModel: AIChatPanel.getMiniModel(),
+          nanoModel: AIChatPanel.getNanoModel(),
+        });
       } catch (error) {
         logger.error('Failed to reinitialize agent evaluation runner:', error);
       }
@@ -1837,4 +1860,4 @@ export class EvaluationDialog {
     }
   }
 
-}
\ No newline at end of file
+}

From 834500c8a8cc6c62189efe27aa282bfaec1f6761 Mon Sep 17 00:00:00 2001
From: Tyson Thomas <tysonthomas9@users.noreply.github.com>
Date: Mon, 8 Sep 2025 21:10:56 -0700
Subject: [PATCH 3/4] more refactor

---
 .../ai_chat/agent_framework/AgentRunner.ts    |   3 +-
 .../agent_framework/ConfigurableAgentTool.ts  |  29 +++--
 .../implementation/ConfiguredAgents.ts        |  21 +--
 front_end/panels/ai_chat/core/Constants.ts    |  14 +-
 .../evaluation/runner/EvaluationRunner.ts     |  11 +-
 .../panels/ai_chat/tools/BookmarkStoreTool.ts |   8 +-
 .../panels/ai_chat/tools/CritiqueTool.ts      |  19 +--
 .../ai_chat/tools/DocumentSearchTool.ts       |   6 +-
 .../ai_chat/tools/FinalizeWithCritiqueTool.ts |   6 +-
 .../ai_chat/tools/SchemaBasedExtractorTool.ts | 120 +++++-------------
 .../tools/StreamlinedSchemaExtractorTool.ts   |  26 +++-
 front_end/panels/ai_chat/tools/Tools.ts       |  28 ++--
 front_end/panels/ai_chat/ui/AIChatPanel.ts    |  11 +-
 .../panels/ai_chat/ui/EvaluationDialog.ts     |   3 +-
 front_end/panels/ai_chat/ui/SettingsDialog.ts |   2 +
 15 files changed, 146 insertions(+), 161 deletions(-)

diff --git a/front_end/panels/ai_chat/agent_framework/AgentRunner.ts b/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
index e074cf67ca8..475e2ad53db 100644
--- a/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
+++ b/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
@@ -18,6 +18,7 @@ import { sanitizeMessagesForModel } from '../LLM/MessageSanitizer.js';
 const logger = createLogger('AgentRunner');
 
 import { ConfigurableAgentTool, ToolRegistry, type ConfigurableAgentArgs, type ConfigurableAgentResult, type AgentRunTerminationReason, type HandoffConfig /* , HandoffContextTransform, ContextFilterRegistry*/ } from './ConfigurableAgentTool.js';
+import { MODEL_SENTINELS } from '../core/Constants.js';
 
 /**
  * Configuration for the AgentRunner
@@ -296,7 +297,7 @@ export class AgentRunner {
     let resolvedModelName: string;
     if (typeof targetConfig.modelName === 'function') {
       resolvedModelName = targetConfig.modelName();
-    } else if (targetConfig.modelName === 'use-mini') {
+    } else if (targetConfig.modelName === MODEL_SENTINELS.USE_MINI) {
       if (!miniModel) {
         throw new Error(`Mini model not provided for handoff to agent '${targetAgentName}'. Ensure miniModel is passed in context.`);
       }
diff --git a/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts b/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
index f26367316df..9ff9ae530f2 100644
--- a/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
+++ b/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
@@ -7,12 +7,27 @@ import type { Tool } from '../tools/Tools.js';
 import { ChatMessageEntity, type ChatMessage } from '../models/ChatTypes.js';
 import { createLogger } from '../core/Logger.js';
 import { getCurrentTracingContext } from '../tracing/TracingConfig.js';
+import { MODEL_SENTINELS } from '../core/Constants.js';
 import type { AgentSession } from './AgentSessionTypes.js';
+import type { LLMProvider } from '../LLM/LLMTypes.js';
 
 const logger = createLogger('ConfigurableAgentTool');
 
 import { AgentRunner, type AgentRunnerConfig, type AgentRunnerHooks } from './AgentRunner.js';
 
+// Context passed along with agent/tool calls
+export interface CallCtx {
+  provider?: LLMProvider,
+  model?: string,
+  miniModel?: string,
+  nanoModel?: string,
+  mainModel?: string,
+  getVisionCapability?: (modelName: string) => Promise<boolean> | boolean,
+  overrideSessionId?: string,
+  overrideParentSessionId?: string,
+  overrideTraceId?: string,
+}
+
 /**
  * Defines the possible reasons an agent run might terminate.
  */
@@ -413,21 +428,11 @@ export class ConfigurableAgentTool implements Tool<ConfigurableAgentArgs, Config
     const maxIterations = this.config.maxIterations || 10;
     
     // Parse execution context first
-    const callCtx = (_ctx || {}) as {
-      provider?: import('../LLM/LLMTypes.js').LLMProvider;
-      model?: string;
-      miniModel?: string;
-      nanoModel?: string;
-      mainModel?: string;
-      getVisionCapability?: (modelName: string) => Promise<boolean> | boolean;
-      overrideSessionId?: string;
-      overrideParentSessionId?: string;
-      overrideTraceId?: string;
-    };
+    const callCtx = (_ctx || {}) as CallCtx;
     
     // Resolve model name from context or configuration
     let modelName: string;
-    if (this.config.modelName === 'use-mini') {
+    if (this.config.modelName === MODEL_SENTINELS.USE_MINI) {
       if (!callCtx.miniModel) {
         throw new Error(`Mini model not provided in context for agent '${this.name}'. Ensure context includes miniModel.`);
       }
diff --git a/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts b/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts
index 26dbb71a2ee..f5244d9df94 100644
--- a/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts
+++ b/front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.ts
@@ -16,6 +16,7 @@ import {
   ToolRegistry, type AgentToolConfig, type ConfigurableAgentArgs
 } from '../ConfigurableAgentTool.js';
 import { WaitTool } from '../../tools/Tools.js';
+import { MODEL_SENTINELS } from '../../core/Constants.js';
 import { ThinkingTool } from '../../tools/ThinkingTool.js';
 import type { Tool } from '../../tools/Tools.js';
 
@@ -320,7 +321,7 @@ Remember: You gather data, content_writer_agent writes the report. Always hand o
       'document_search'
     ],
     maxIterations: 15,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0,
     schema: {
       type: 'object',
@@ -421,7 +422,7 @@ Your process should follow these steps:
 The final output should be in markdown format, and it should be lengthy and detailed. Aim for 5-10 pages of content, at least 1000 words.`,
     tools: [],
     maxIterations: 3,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.3,
     schema: {
       type: 'object',
@@ -529,7 +530,7 @@ Conclusion: Fix the args format and retry with proper syntax: { "method": "fill"
       'take_screenshot',
     ],
     maxIterations: 10,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.5,
     schema: {
       type: 'object',
@@ -638,7 +639,7 @@ Remember that verification is time-sensitive - the page state might change durin
       'take_screenshot'
     ],
     maxIterations: 3,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.2,
     schema: {
       type: 'object',
@@ -723,7 +724,7 @@ When selecting an element to click, prioritize:
       'node_ids_to_urls',
     ],
     maxIterations: 5,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -803,7 +804,7 @@ When selecting a form field to fill, prioritize:
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -879,7 +880,7 @@ When selecting an element for keyboard input, prioritize:
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -964,7 +965,7 @@ When selecting an element to hover over, prioritize:
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -1046,7 +1047,7 @@ The accessibility tree includes information about scrollable containers. Look fo
       'schema_based_extractor',
     ],
     maxIterations: 5,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.7,
     schema: {
       type: 'object',
@@ -1419,7 +1420,7 @@ Remember to adapt your analysis based on the product category - different attrib
       'get_page_content',
     ],
     maxIterations: 5,
-    modelName: 'use-mini',
+    modelName: MODEL_SENTINELS.USE_MINI,
     temperature: 0.2,
     schema: {
       type: 'object',
diff --git a/front_end/panels/ai_chat/core/Constants.ts b/front_end/panels/ai_chat/core/Constants.ts
index 7a8dc3ad4e4..84e97bf93a3 100644
--- a/front_end/panels/ai_chat/core/Constants.ts
+++ b/front_end/panels/ai_chat/core/Constants.ts
@@ -79,4 +79,16 @@ export const ERROR_MESSAGES = {
   AI_ASSISTANT_LOAD_FAILED: 'Failed to load AI Assistant. Please try again.',
   NO_PRIMARY_TARGET: 'No primary page target found',
   EMPTY_PROMPT: 'Prompt cannot be empty',
-} as const;
\ No newline at end of file
+} as const;
+
+// Sentinel model identifiers used in agent configurations
+export const MODEL_SENTINELS = {
+  USE_MINI: 'use-mini',
+  USE_NANO: 'use-nano',
+} as const;
+
+// Placeholder values used in UI model selectors
+export const MODEL_PLACEHOLDERS = {
+  NO_MODELS: '_placeholder_no_models',
+  ADD_CUSTOM: '_placeholder_add_custom',
+} as const;
diff --git a/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts b/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
index 099a7b1b770..3dd3cd0a897 100644
--- a/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
+++ b/front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.ts
@@ -32,6 +32,7 @@ export class EvaluationRunner {
   private config: EvaluationConfig;
   private tracingProvider: TracingProvider;
   private sessionId: string;
+  #llmInitPromise: Promise<void> | null = null;
 
   constructor(options: EvaluationRunnerOptions) {
     // Get API key from AgentService
@@ -65,7 +66,7 @@ export class EvaluationRunner {
     this.llmEvaluator = new LLMEvaluator(this.config.evaluationApiKey, this.config.evaluationModel);
     
     // Initialize LLM client for tools under evaluation (based on selected provider)
-    void this.#initializeLLMForEvaluation();
+    this.#llmInitPromise = this.#initializeLLMForEvaluation();
 
     // Initialize tracing
     this.tracingProvider = createTracingProvider();
@@ -150,6 +151,10 @@ export class EvaluationRunner {
    * Run a single test case
    */
   async runSingleTest(testCase: TestCase<any>): Promise<TestResult> {
+    // Ensure LLM client initialization completes before running tests
+    if (this.#llmInitPromise) {
+      try { await this.#llmInitPromise; } catch { /* ignore; errors are logged in initializer */ }
+    }
     const traceId = `eval-${testCase.id || testCase.name}-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
     const startTime = new Date();
 
@@ -305,6 +310,10 @@ export class EvaluationRunner {
    * Run all tests from a given test array
    */
   async runAllTests(testCases: TestCase<any>[]): Promise<TestResult[]> {
+    // Ensure LLM client initialization completes before running batch
+    if (this.#llmInitPromise) {
+      try { await this.#llmInitPromise; } catch { /* ignore; errors are logged in initializer */ }
+    }
     logger.debug(`[EvaluationRunner] Running ${testCases.length} tests...`);
     
     // Create tool instances map based on tools used in test cases
diff --git a/front_end/panels/ai_chat/tools/BookmarkStoreTool.ts b/front_end/panels/ai_chat/tools/BookmarkStoreTool.ts
index c0dc27fc5af..4f6da858537 100644
--- a/front_end/panels/ai_chat/tools/BookmarkStoreTool.ts
+++ b/front_end/panels/ai_chat/tools/BookmarkStoreTool.ts
@@ -7,7 +7,7 @@ import * as Utils from '../common/utils.js';
 import { createLogger } from '../core/Logger.js';
 import { HTMLToMarkdownTool } from './HTMLToMarkdownTool.js';
 import { VectorDBClient, type VectorDocument, type VectorStoreResponse } from './VectorDBClient.js';
-import type { Tool } from './Tools.js';
+import type { Tool, LLMContext } from './Tools.js';
 import { integer } from '../../../generated/protocol.js';
 
 const logger = createLogger('Tool:BookmarkStore');
@@ -72,7 +72,7 @@ export class BookmarkStoreTool implements Tool<BookmarkStoreArgs, BookmarkStoreR
   /**
    * Execute the bookmark store operation
    */
-  async execute(args: BookmarkStoreArgs): Promise<BookmarkStoreResult> {
+  async execute(args: BookmarkStoreArgs, ctx?: LLMContext): Promise<BookmarkStoreResult> {
     logger.info('Executing bookmark store with args', { args });
 
     try {
@@ -108,7 +108,7 @@ export class BookmarkStoreTool implements Tool<BookmarkStoreArgs, BookmarkStoreR
       const markdownResult = await this.htmlToMarkdownTool.execute({
         instruction: `Extract the main content from this page for bookmarking. Focus on the primary article or content that would be useful for later reference.`,
         reasoning: 'Extracting content for bookmark storage'
-      });
+      }, ctx);
 
       if (!markdownResult.success || !markdownResult.markdownContent) {
         return {
@@ -247,4 +247,4 @@ export class BookmarkStoreTool implements Tool<BookmarkStoreArgs, BookmarkStoreR
       openaiApiKey: localStorage.getItem('ai_chat_milvus_openai_key') || undefined,
     };
   }
-}
\ No newline at end of file
+}
diff --git a/front_end/panels/ai_chat/tools/CritiqueTool.ts b/front_end/panels/ai_chat/tools/CritiqueTool.ts
index 482b424bbb9..7fea95bb07d 100644
--- a/front_end/panels/ai_chat/tools/CritiqueTool.ts
+++ b/front_end/panels/ai_chat/tools/CritiqueTool.ts
@@ -76,8 +76,6 @@ export class CritiqueTool implements Tool<CritiqueToolArgs, CritiqueToolResult>
   async execute(args: CritiqueToolArgs, ctx?: LLMContext): Promise<CritiqueToolResult> {
     logger.debug('Executing with args', args);
     const { userInput, finalResponse, reasoning } = args;
-    const agentService = AgentService.getInstance();
-    const apiKey = agentService.getApiKey();
 
     // Validate input
     if (!userInput || !finalResponse) {
@@ -88,19 +86,11 @@ export class CritiqueTool implements Tool<CritiqueToolArgs, CritiqueToolResult>
       };
     }
 
-    if (!apiKey) {
-      return {
-        satisfiesCriteria: false,
-        success: false,
-        error: 'API key not configured.'
-      };
-    }
-
     try {
       logger.info('Evaluating planning response against user requirements');
 
       // First, extract requirements from user input
-      const requirementsResult = await this.extractRequirements(userInput, apiKey, ctx);
+      const requirementsResult = await this.extractRequirements(userInput, ctx);
       if (!requirementsResult.success) {
         throw new Error('Failed to extract requirements from user input.');
       }
@@ -110,7 +100,6 @@ export class CritiqueTool implements Tool<CritiqueToolArgs, CritiqueToolResult>
         userInput,
         finalResponse,
         requirementsResult.requirements,
-        apiKey,
         ctx
       );
 
@@ -123,7 +112,7 @@ export class CritiqueTool implements Tool<CritiqueToolArgs, CritiqueToolResult>
       // Generate feedback only if criteria not satisfied
       let feedback = undefined;
       if (!criteria.satisfiesCriteria) {
-        feedback = await this.generateFeedback(criteria, userInput, finalResponse, apiKey, ctx);
+        feedback = await this.generateFeedback(criteria, userInput, finalResponse, ctx);
       }
 
       logger.info('Evaluation complete', {
@@ -150,7 +139,7 @@ export class CritiqueTool implements Tool<CritiqueToolArgs, CritiqueToolResult>
   /**
    * Extract structured requirements from user input
    */
-  private async extractRequirements(userInput: string, apiKey: string, ctx?: LLMContext): Promise<{success: boolean, requirements: string[], error?: string}> {
+  private async extractRequirements(userInput: string, ctx?: LLMContext): Promise<{success: boolean, requirements: string[], error?: string}> {
     const systemPrompt = `You are an expert requirements analyst. 
 Your task is to extract clear, specific requirements from the user's input.
 Focus on functional requirements, constraints, and expected outcomes.
@@ -216,7 +205,6 @@ Return a JSON array of requirement statements. Example format:
     userInput: string,
     finalResponse: string,
     requirements: string[],
-    apiKey: string,
     ctx?: LLMContext
   ): Promise<{success: boolean, criteria?: EvaluationCriteria, error?: string}> {
     const systemPrompt = `You are an expert plan evaluator.
@@ -324,7 +312,6 @@ ${JSON.stringify(evaluationSchema, null, 2)}`;
     criteria: EvaluationCriteria,
     userInput: string,
     finalResponse: string,
-    apiKey: string,
     ctx?: LLMContext
   ): Promise<string> {
     const systemPrompt = `You are an expert feedback provider.
diff --git a/front_end/panels/ai_chat/tools/DocumentSearchTool.ts b/front_end/panels/ai_chat/tools/DocumentSearchTool.ts
index d491d1c9c9f..638f5c99936 100644
--- a/front_end/panels/ai_chat/tools/DocumentSearchTool.ts
+++ b/front_end/panels/ai_chat/tools/DocumentSearchTool.ts
@@ -4,7 +4,7 @@
 
 import { createLogger } from '../core/Logger.js';
 import { VectorDBClient, type VectorSearchResult } from './VectorDBClient.js';
-import type { Tool } from './Tools.js';
+import type { Tool, LLMContext } from './Tools.js';
 
 const logger = createLogger('Tool:DocumentSearch');
 
@@ -87,7 +87,7 @@ export class DocumentSearchTool implements Tool<DocumentSearchArgs, DocumentSear
   /**
    * Execute the document search operation
    */
-  async execute(args: DocumentSearchArgs): Promise<DocumentSearchResult> {
+  async execute(args: DocumentSearchArgs, _ctx?: LLMContext): Promise<DocumentSearchResult> {
     logger.info('Executing document search with args', { args });
 
     try {
@@ -270,4 +270,4 @@ export class DocumentSearchTool implements Tool<DocumentSearchArgs, DocumentSear
       openaiApiKey: localStorage.getItem('ai_chat_milvus_openai_key') || undefined,
     };
   }
-}
\ No newline at end of file
+}
diff --git a/front_end/panels/ai_chat/tools/FinalizeWithCritiqueTool.ts b/front_end/panels/ai_chat/tools/FinalizeWithCritiqueTool.ts
index 268ca55dce9..e9cd23363ff 100644
--- a/front_end/panels/ai_chat/tools/FinalizeWithCritiqueTool.ts
+++ b/front_end/panels/ai_chat/tools/FinalizeWithCritiqueTool.ts
@@ -9,7 +9,7 @@ import { createLogger } from '../core/Logger.js';
 const logger = createLogger('FinalizeWithCritiqueTool');
 
 import { CritiqueTool} from './CritiqueTool.js';
-import type { Tool } from './Tools.js';
+import type { Tool, LLMContext } from './Tools.js';
 
 /**
  * Arguments for the FinalizeWithCritiqueTool
@@ -74,7 +74,7 @@ export class FinalizeWithCritiqueTool implements Tool<FinalizeWithCritiqueArgs,
   /**
    * Execute the finalize with critique tool
    */
-  async execute(args: FinalizeWithCritiqueArgs): Promise<FinalizeWithCritiqueResult> {
+  async execute(args: FinalizeWithCritiqueArgs, ctx?: LLMContext): Promise<FinalizeWithCritiqueResult> {
     logger.info('Executing with answer:', args.answer.substring(0, 100) + '...');
 
     try {
@@ -112,7 +112,7 @@ export class FinalizeWithCritiqueTool implements Tool<FinalizeWithCritiqueArgs,
         userInput: userMessage.text,
         finalResponse: args.answer,
         reasoning: 'Validating if the response meets all user requirements'
-      });
+      }, ctx);
 
       logger.info('Critique result:', critiqueResult);
 
diff --git a/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts b/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts
index da381d6d702..bc372fdaaef 100644
--- a/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts
+++ b/front_end/panels/ai_chat/tools/SchemaBasedExtractorTool.ts
@@ -9,6 +9,7 @@ import { AgentService } from '../core/AgentService.js';
 import { createLogger } from '../core/Logger.js';
 import type { LLMContext } from './Tools.js';
 import { callLLMWithTracing } from './LLMTracingWrapper.js';
+import { LLMResponseParser } from '../LLM/LLMResponseParser.js';
 
 import { NodeIDsToURLsTool, type Tool } from './Tools.js';
 
@@ -479,9 +480,17 @@ Only output the JSON object with real data from the accessibility tree.`;
           }
         }
       );
-      const response = llmResponse.text;
-      if (!response) { throw new Error('No text response from extraction LLM'); }
-      return this.parseJsonResponse(response);
+      const response = llmResponse.text || '';
+      try {
+        return LLMResponseParser.parseStrictJSON(response);
+      } catch {
+        try {
+          return LLMResponseParser.parseJSONWithFallbacks(response);
+        } catch (e) {
+          logger.error('Failed to parse extraction JSON:', e);
+          return null;
+        }
+      }
     } catch (error) {
       logger.error('Error in callExtractionLLM:', error);
       return null; // Indicate failure
@@ -563,9 +572,17 @@ Do not add any conversational text or explanations or thinking tags.`;
           }
         }
       );
-      const response = llmResponse.text;
-      if (!response) { throw new Error('No text response from refinement LLM'); }
-      return this.parseJsonResponse(response);
+      const response = llmResponse.text || '';
+      try {
+        return LLMResponseParser.parseStrictJSON(response);
+      } catch {
+        try {
+          return LLMResponseParser.parseJSONWithFallbacks(response);
+        } catch (e) {
+          logger.error('Failed to parse refinement JSON:', e);
+          return null;
+        }
+      }
     } catch (error) {
       logger.error('Error in callRefinementLLM:', error);
       return null; // Indicate failure
@@ -676,9 +693,18 @@ Return ONLY a valid JSON object conforming to the required metadata schema.`;
           }
         }
       );
-      const response = llmResponse.text;
-      if (!response) { throw new Error('No text response from metadata LLM'); }
-      const parsedMetadata = this.parseJsonResponse(response);
+      const response = llmResponse.text || '';
+      let parsedMetadata: any = null;
+      try {
+        parsedMetadata = LLMResponseParser.parseStrictJSON(response);
+      } catch {
+        try {
+          parsedMetadata = LLMResponseParser.parseJSONWithFallbacks(response);
+        } catch (e) {
+          logger.error('Failed to parse metadata JSON:', e);
+          parsedMetadata = null;
+        }
+      }
       // Basic validation
       if (typeof parsedMetadata?.progress === 'string' && typeof parsedMetadata?.completed === 'boolean') {
         return parsedMetadata as ExtractionMetadata;
@@ -693,82 +719,6 @@ Return ONLY a valid JSON object conforming to the required metadata schema.`;
     }
   }
 
-  /**
-   * Helper to parse JSON, potentially extracting it from surrounding text.
-   */
-  private parseJsonResponse(responseText: string): any | null {
-    try {
-      // First, try parsing the whole string directly
-      return JSON.parse(responseText);
-    } catch (e) {
-      // If direct parsing fails, remove all think tags and their content
-      logger.debug('Removing think tags before parsing JSON');
-
-      // Remove <think>...</think> tags and everything inside them (handles multiple think tags)
-      let cleanedText = responseText.replace(/<think>[\s\S]*?<\/think>/g, '');
-
-      // Remove any incomplete <think> tags without closing tags
-      cleanedText = cleanedText.replace(/<think>[\s\S]*/g, '');
-
-      // If after removing think tags, the text is empty or whitespace, give up
-      if (!cleanedText.trim()) {
-        logger.error('No content left after removing think tags');
-        return null;
-      }
-
-      // First, look for JSON code blocks in the cleaned text
-      const codeBlockMatch = cleanedText.match(/```json\s*([\s\S]*?)\s*```/);
-      if (codeBlockMatch && codeBlockMatch[1]) {
-        try {
-          return JSON.parse(codeBlockMatch[1]);
-        } catch (codeBlockError) {
-          logger.error('Failed to parse JSON from code block:', codeBlockError);
-        }
-      }
-
-      // Next, try to find a complete JSON object or array in the cleaned text
-      // Find the last valid JSON in the text (in case there are multiple)
-      let potentialJsons: string[] = [];
-      const jsonMatches = cleanedText.match(/(\{[\s\S]*?\}|\[[\s\S]*?\])/g);
-      if (jsonMatches) {
-        potentialJsons = jsonMatches;
-      }
-
-      // Try parsing each potential JSON, starting with the longest one
-      // (longer matches are more likely to be complete)
-      potentialJsons.sort((a, b) => b.length - a.length);
-
-      for (const json of potentialJsons) {
-        try {
-          return JSON.parse(json);
-        } catch (jsonError) {
-          // Continue to the next potential JSON
-        }
-      }
-
-      // If no valid JSON found yet, try a more aggressive approach
-      const jsonObjectMatch = cleanedText.match(/\{[\s\S]*\}/);
-      if (jsonObjectMatch) {
-        try {
-          return JSON.parse(jsonObjectMatch[0]);
-        } catch (objectError) {
-          logger.error('Failed to parse JSON object:', objectError);
-        }
-      }
-
-      const jsonArrayMatch = cleanedText.match(/\[[\s\S]*\]/);
-      if (jsonArrayMatch) {
-        try {
-          return JSON.parse(jsonArrayMatch[0]);
-        } catch (arrayError) {
-          logger.error('Failed to parse JSON array:', arrayError);
-        }
-      }
-
-      logger.error('Failed to parse and no valid JSON found in response after removing think tags');
-      return null;
-    }
-  }
 
   /**
    * Recursively find and replace node IDs with URLs in a data structure
diff --git a/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts b/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
index d20ceaa242d..ca66216ac24 100644
--- a/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
+++ b/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
@@ -10,6 +10,7 @@ import { AgentService } from '../core/AgentService.js';
 import { createLogger } from '../core/Logger.js';
 import { callLLMWithTracing } from './LLMTracingWrapper.js';
 import type { Tool, LLMContext } from './Tools.js';
+import { LLMResponseParser } from '../LLM/LLMResponseParser.js';
 
 const logger = createLogger('Tool:StreamlinedSchemaExtractor');
 
@@ -268,10 +269,17 @@ IMPORTANT: Only extract data that you can see in the accessibility tree above. D
             }
           }
         );
-        const result = llmResponse.text;
+        const text = llmResponse.text || '';
+        // Parse using LLMResponseParser with strict mode then fallbacks
+        let parsed: any;
+        try {
+          parsed = LLMResponseParser.parseStrictJSON(text);
+        } catch {
+          parsed = LLMResponseParser.parseJSONWithFallbacks(text);
+        }
         
         logger.debug(`JSON extraction successful on attempt ${attempt}`);
-        return result;
+        return parsed;
 
       } catch (error) {
         if (attempt <= maxRetries) {
@@ -411,9 +419,17 @@ CRITICAL: Only use nodeIds that you can actually see in the accessibility tree a
           }
         }
       );
-      const result = llmResponse.text;
-      
-      return result;
+      const text = llmResponse.text || '';
+      try {
+        return LLMResponseParser.parseStrictJSON(text);
+      } catch {
+        try {
+          return LLMResponseParser.parseJSONWithFallbacks(text);
+        } catch {
+          logger.warn('Retry URL resolution returned non-JSON; aborting this attempt');
+          return null;
+        }
+      }
     } catch (error) {
       logger.error(`Error in URL retry attempt ${attemptNumber}:`, error instanceof Error ? error.message : String(error));
       return null;
diff --git a/front_end/panels/ai_chat/tools/Tools.ts b/front_end/panels/ai_chat/tools/Tools.ts
index b4f2dd520df..b3b22b3809b 100644
--- a/front_end/panels/ai_chat/tools/Tools.ts
+++ b/front_end/panels/ai_chat/tools/Tools.ts
@@ -341,7 +341,7 @@ export class ExecuteJavaScriptTool implements Tool<{ code: string }, JavaScriptE
   name = 'execute_javascript';
   description = 'Executes JavaScript code in the page context';
 
-  async execute(args: { code: string }): Promise<JavaScriptExecutionResult | ErrorResult> {
+  async execute(args: { code: string }, _ctx?: LLMContext): Promise<JavaScriptExecutionResult | ErrorResult> {
     logger.info('execute_javascript', args);
     const code = args.code;
     if (typeof code !== 'string') {
@@ -399,7 +399,7 @@ export class NetworkAnalysisTool implements Tool<{ url?: string, limit?: number
   name = 'analyze_network';
   description = 'Analyzes network requests, optionally filtered by URL pattern';
 
-  async execute(args: { url?: string, limit?: number }): Promise<NetworkAnalysisResult | ErrorResult> {
+  async execute(args: { url?: string, limit?: number }, _ctx?: LLMContext): Promise<NetworkAnalysisResult | ErrorResult> {
     const url = args.url;
     const limit = args.limit || 10;
 
@@ -888,7 +888,7 @@ export class NavigateBackTool implements Tool<{ steps: number, reasoning: string
     required: ['steps', 'reasoning'],
   };
 
-  async execute(args: { steps: number, reasoning: string }): Promise<NavigateBackResult | ErrorResult> {
+  async execute(args: { steps: number, reasoning: string }, _ctx?: LLMContext): Promise<NavigateBackResult | ErrorResult> {
     logger.error('navigate_back', args);
     const steps = args.steps;
     if (typeof steps !== 'number' || steps <= 0) {
@@ -992,7 +992,7 @@ export class GetPageHTMLTool implements Tool<Record<string, unknown>, PageHTMLRe
   name = 'get_page_html';
   description = 'Gets the HTML contents and structure of the current page for analysis and summarization with CSS, JavaScript, and other non-essential content removed';
 
-  async execute(_args: Record<string, unknown>): Promise<PageHTMLResult | ErrorResult> {
+  async execute(_args: Record<string, unknown>, _ctx?: LLMContext): Promise<PageHTMLResult | ErrorResult> {
     // Get the main target
     const target = SDK.TargetManager.TargetManager.instance().primaryPageTarget();
     if (!target) {
@@ -1104,7 +1104,7 @@ export class ClickElementTool implements Tool<{ selector: string }, ClickElement
   name = 'click_element';
   description = 'Clicks on an element identified by a CSS selector';
 
-  async execute(args: { selector: string }): Promise<ClickElementResult | ErrorResult> {
+  async execute(args: { selector: string }, _ctx?: LLMContext): Promise<ClickElementResult | ErrorResult> {
     
     const selector = args.selector;
     if (typeof selector !== 'string') {
@@ -1178,7 +1178,7 @@ export class SearchContentTool implements Tool<{ query: string, limit?: number }
   name = 'search_content';
   description = 'Searches for text content on the page and returns matching elements';
 
-  async execute(args: { query: string, limit?: number }): Promise<SearchContentResult | ErrorResult> {
+  async execute(args: { query: string, limit?: number }, _ctx?: LLMContext): Promise<SearchContentResult | ErrorResult> {
     
     const query = args.query;
     const limit = args.limit || 5;
@@ -1321,7 +1321,7 @@ export class ScrollPageTool implements Tool<{ position?: { x: number, y: number
   name = 'scroll_page';
   description = 'Scrolls the page to a specific position or in a specific direction';
 
-  async execute(args: { position?: { x: number, y: number }, direction?: string, amount?: number }): Promise<ScrollResult | ErrorResult> {
+  async execute(args: { position?: { x: number, y: number }, direction?: string, amount?: number }, _ctx?: LLMContext): Promise<ScrollResult | ErrorResult> {
     const position = args.position;
     const direction = args.direction;
     const amount = args.amount || 300;  // Default scroll amount
@@ -1537,7 +1537,7 @@ export class TakeScreenshotTool implements Tool<{fullPage?: boolean}, Screenshot
   name = 'take_screenshot';
   description = 'Takes a screenshot of the current page view or the entire page. The image can be used for analyzing the page layout, content, and visual elements. Always specify whether to capture the full page or just the viewport and the reasoning behind it.';
 
-  async execute(args: {fullPage?: boolean}): Promise<ScreenshotResult|ErrorResult> {
+  async execute(args: {fullPage?: boolean}, _ctx?: LLMContext): Promise<ScreenshotResult|ErrorResult> {
     const fullPage = args.fullPage || false;
 
     // Get the main target
@@ -1598,7 +1598,7 @@ export class GetAccessibilityTreeTool implements Tool<{ reasoning: string }, Acc
   name = 'get_page_content';
   description = 'Gets the accessibility tree of the current page, providing a hierarchical structure of all accessible elements.';
 
-  async execute(args: { reasoning: string }): Promise<AccessibilityTreeResult | ErrorResult> {
+  async execute(args: { reasoning: string }, _ctx?: LLMContext): Promise<AccessibilityTreeResult | ErrorResult> {
     try {
       // Log reasoning for this action (addresses unused args warning)
       logger.warn(`Getting accessibility tree: ${args.reasoning}`);
@@ -1642,7 +1642,7 @@ export class GetVisibleAccessibilityTreeTool implements Tool<{ reasoning: string
   name = 'get_visible_content';
   description = 'Gets the accessibility tree of only the visible content in the viewport, providing a focused view of what the user can currently see.';
 
-  async execute(args: { reasoning: string }): Promise<AccessibilityTreeResult | ErrorResult> {
+  async execute(args: { reasoning: string }, _ctx?: LLMContext): Promise<AccessibilityTreeResult | ErrorResult> {
     try {
       // Log reasoning for this action
       logger.warn(`Getting visible accessibility tree: ${args.reasoning}`);
@@ -3112,7 +3112,7 @@ export class NodeIDsToURLsTool implements Tool<{ nodeIds: number[] }, NodeIDsToU
   name = 'get_urls_from_nodeids';
   description = 'Gets URLs associated with DOM elements identified by NodeIDs from accessibility tree.';
 
-  async execute(args: { nodeIds: number[] }): Promise<NodeIDsToURLsResult | ErrorResult> {
+  async execute(args: { nodeIds: number[] }, _ctx?: LLMContext): Promise<NodeIDsToURLsResult | ErrorResult> {
     if (!Array.isArray(args.nodeIds)) {
       return { error: 'nodeIds must be an array of numbers' };
     }
@@ -4008,7 +4008,7 @@ export class GetVisitsByDomainTool implements Tool<{ domain: string }, VisitHist
   name = 'get_visits_by_domain';
   description = 'Get a list of visited pages filtered by domain name';
 
-  async execute(args: { domain: string }): Promise<VisitHistoryDomainResult | ErrorResult> {
+  async execute(args: { domain: string }, _ctx?: LLMContext): Promise<VisitHistoryDomainResult | ErrorResult> {
     try {
       const visits = await VisitHistoryManager.getInstance().getVisitsByDomain(args.domain);
 
@@ -4046,7 +4046,7 @@ export class GetVisitsByKeywordTool implements Tool<{ keyword: string }, VisitHi
   name = 'get_visits_by_keyword';
   description = 'Get a list of visited pages containing a specific keyword';
 
-  async execute(args: { keyword: string }): Promise<VisitHistoryKeywordResult | ErrorResult> {
+  async execute(args: { keyword: string }, _ctx?: LLMContext): Promise<VisitHistoryKeywordResult | ErrorResult> {
     try {
       const visits = await VisitHistoryManager.getInstance().getVisitsByKeyword(args.keyword);
 
@@ -4091,7 +4091,7 @@ export class SearchVisitHistoryTool implements Tool<{
     keyword?: string,
     daysAgo?: number,
     limit?: number,
-  }): Promise<VisitHistorySearchResult | ErrorResult> {
+  }, _ctx?: LLMContext): Promise<VisitHistorySearchResult | ErrorResult> {
     try {
       const { domain, keyword, daysAgo, limit } = args;
 
diff --git a/front_end/panels/ai_chat/ui/AIChatPanel.ts b/front_end/panels/ai_chat/ui/AIChatPanel.ts
index 95761de4ad8..ec1a673f090 100644
--- a/front_end/panels/ai_chat/ui/AIChatPanel.ts
+++ b/front_end/panels/ai_chat/ui/AIChatPanel.ts
@@ -83,6 +83,7 @@ import { type ChatMessage, ChatMessageEntity, type ImageInputData, type ModelCha
 import { HelpDialog } from './HelpDialog.js';
 import { SettingsDialog, isVectorDBEnabled } from './SettingsDialog.js';
 import { EvaluationDialog } from './EvaluationDialog.js';
+import { MODEL_PLACEHOLDERS } from '../core/Constants.js';
 import * as Snackbars from '../../../ui/components/snackbars/snackbars.js';
 
 const {html} = Lit;
@@ -558,7 +559,7 @@ export class AIChatPanel extends UI.Panel.Panel {
       // Add placeholder if no Groq models available
       if (MODEL_OPTIONS.length === 0) {
         MODEL_OPTIONS.push({
-          value: '_placeholder_no_models',
+          value: MODEL_PLACEHOLDERS.NO_MODELS,
           label: 'Groq: Please configure API key in settings',
           type: 'groq' as const
         });
@@ -569,7 +570,7 @@ export class AIChatPanel extends UI.Panel.Panel {
       // Add placeholder if no OpenRouter models available
       if (MODEL_OPTIONS.length === 0) {
         MODEL_OPTIONS.push({
-          value: '_placeholder_no_models',
+          value: MODEL_PLACEHOLDERS.NO_MODELS,
           label: 'OpenRouter: Please configure API key in settings',
           type: 'openrouter' as const
         });
@@ -581,7 +582,7 @@ export class AIChatPanel extends UI.Panel.Panel {
       // Add placeholder if needed for LiteLLM when we have no models
       if (hadWildcard && MODEL_OPTIONS.length === 0) {
         MODEL_OPTIONS.push({
-          value: '_placeholder_add_custom',
+          value: MODEL_PLACEHOLDERS.ADD_CUSTOM,
           label: 'LiteLLM: Please add custom models in settings',
           type: 'litellm' as const
         });
@@ -1250,8 +1251,8 @@ export class AIChatPanel extends UI.Panel.Panel {
     return {
       isLiteLLM: Boolean(modelOption?.type === 'litellm'),
       isPlaceholder: Boolean(
-        modelOption?.value === '_placeholder_add_custom' || 
-        modelOption?.value === '_placeholder_no_models'
+        modelOption?.value === MODEL_PLACEHOLDERS.ADD_CUSTOM || 
+        modelOption?.value === MODEL_PLACEHOLDERS.NO_MODELS
       ),
     };
   }
diff --git a/front_end/panels/ai_chat/ui/EvaluationDialog.ts b/front_end/panels/ai_chat/ui/EvaluationDialog.ts
index e4129b63ba9..2d3561ad873 100644
--- a/front_end/panels/ai_chat/ui/EvaluationDialog.ts
+++ b/front_end/panels/ai_chat/ui/EvaluationDialog.ts
@@ -16,6 +16,7 @@ import { webTaskAgentTests } from '../evaluation/test-cases/web-task-agent-tests
 import type { TestResult } from '../evaluation/framework/types.js';
 import { createLogger } from '../core/Logger.js';
 import { AIChatPanel } from './AIChatPanel.js';
+import { MODEL_PLACEHOLDERS } from '../core/Constants.js';
 
 const logger = createLogger('EvaluationDialog');
 
@@ -847,7 +848,7 @@ export class EvaluationDialog {
     
     // Filter models to only show those from the selected provider
     const filteredModels = modelOptions.filter(option => {
-      if (option.value.startsWith('_placeholder')) {
+      if (option.value.startsWith(MODEL_PLACEHOLDERS.NO_MODELS) || option.value.startsWith(MODEL_PLACEHOLDERS.ADD_CUSTOM)) {
         return false; // Skip placeholder options
       }
       // Use the model's type to determine if it belongs to the selected provider
diff --git a/front_end/panels/ai_chat/ui/SettingsDialog.ts b/front_end/panels/ai_chat/ui/SettingsDialog.ts
index 3a50983a08a..4274474b400 100644
--- a/front_end/panels/ai_chat/ui/SettingsDialog.ts
+++ b/front_end/panels/ai_chat/ui/SettingsDialog.ts
@@ -559,6 +559,8 @@ export class SettingsDialog {
               type: 'openrouter' as const
             }));
             updateModelOptions(modelOptions, false);
+            // Persist cache alongside timestamp for consistency
+            localStorage.setItem('openrouter_models_cache', JSON.stringify(modelOptions));
             localStorage.setItem('openrouter_models_cache_timestamp', Date.now().toString());
             logger.debug('Successfully refreshed OpenRouter models after provider change');
           } catch (error) {

From 84febc66cc664fbde4fb541cb0945fa71987c8c9 Mon Sep 17 00:00:00 2001
From: Tyson Thomas <tysonthomas9@users.noreply.github.com>
Date: Mon, 8 Sep 2025 21:50:15 -0700
Subject: [PATCH 4/4] clean up

---
 front_end/panels/ai_chat/agent_framework/AgentRunner.ts  | 2 +-
 .../ai_chat/agent_framework/ConfigurableAgentTool.ts     | 2 +-
 .../ai_chat/tools/StreamlinedSchemaExtractorTool.ts      | 9 ++++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/front_end/panels/ai_chat/agent_framework/AgentRunner.ts b/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
index 475e2ad53db..06d401ebbab 100644
--- a/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
+++ b/front_end/panels/ai_chat/agent_framework/AgentRunner.ts
@@ -302,7 +302,7 @@ export class AgentRunner {
         throw new Error(`Mini model not provided for handoff to agent '${targetAgentName}'. Ensure miniModel is passed in context.`);
       }
       resolvedModelName = miniModel;
-    } else if (targetConfig.modelName === 'use-nano') {
+    } else if (targetConfig.modelName === MODEL_SENTINELS.USE_NANO) {
       if (!nanoModel) {
         throw new Error(`Nano model not provided for handoff to agent '${targetAgentName}'. Ensure nanoModel is passed in context.`);
       }
diff --git a/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts b/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
index 9ff9ae530f2..dd8654f6681 100644
--- a/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
+++ b/front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.ts
@@ -437,7 +437,7 @@ export class ConfigurableAgentTool implements Tool<ConfigurableAgentArgs, Config
         throw new Error(`Mini model not provided in context for agent '${this.name}'. Ensure context includes miniModel.`);
       }
       modelName = callCtx.miniModel;
-    } else if (this.config.modelName === 'use-nano') {
+    } else if (this.config.modelName === MODEL_SENTINELS.USE_NANO) {
       if (!callCtx.nanoModel) {
         throw new Error(`Nano model not provided in context for agent '${this.name}'. Ensure context includes nanoModel.`);
       }
diff --git a/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts b/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
index ca66216ac24..04a2e9603c6 100644
--- a/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
+++ b/front_end/panels/ai_chat/tools/StreamlinedSchemaExtractorTool.ts
@@ -179,7 +179,7 @@ export class StreamlinedSchemaExtractorTool implements Tool<StreamlinedSchemaExt
         ctx
       );
       
-      if (retryResult) {
+      if (retryResult && typeof retryResult === 'object') {
         finalData = this.resolveUrlsDirectly(retryResult, context.urlMappings, urlFields);
         extractionResult = retryResult; // Update for next iteration
       } else {
@@ -278,8 +278,11 @@ IMPORTANT: Only extract data that you can see in the accessibility tree above. D
           parsed = LLMResponseParser.parseJSONWithFallbacks(text);
         }
         
-        logger.debug(`JSON extraction successful on attempt ${attempt}`);
-        return parsed;
+        if (parsed && typeof parsed === 'object') {
+          logger.debug(`JSON extraction successful on attempt ${attempt}`);
+          return parsed;
+        }
+        throw new Error('Parsed extraction result is not an object/array');
 
       } catch (error) {
         if (attempt <= maxRetries) {