From 00e25026fb6a19fafbdb187751b8a5d6e1b72a1c Mon Sep 17 00:00:00 2001 From: hchen Date: Tue, 30 Sep 2025 16:39:58 -0500 Subject: [PATCH] Add IWebElementLocator --- .../Browsing/Models/ElementLocatingArgs.cs | 5 + .../Browsing/Settings/IWebElementLocator.cs | 8 + .../BotSharp.Plugin.WebDriver.csproj | 11 +- .../PlaywrightWebDriver.ActionOnElement.cs | 5 +- .../PlaywrightWebDriver.DoAction.cs | 226 ++++++++++++++++-- .../PlaywrightWebDriver.LocateElement.cs | 17 ++ .../Hooks/WebUtilityHook.cs | 7 +- .../BotSharp.Plugin.WebDriver/Using.cs | 4 +- .../UtilFunctions/UtilWebActionOnElementFn.cs | 1 + .../UtilFunctions/UtilWebTakeScreenshotFn.cs | 39 +++ .../functions/util-web-action_on_element.json | 6 +- .../functions/util-web-locate_element.json | 6 +- .../functions/util-web-take_screenshot.json | 10 + .../util-web-action_on_element.fn.liquid | 1 - .../templates/util-web-go_to_page.fn.liquid | 1 - 15 files changed, 315 insertions(+), 32 deletions(-) create mode 100644 src/Infrastructure/BotSharp.Abstraction/Browsing/Settings/IWebElementLocator.cs create mode 100644 src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebTakeScreenshotFn.cs create mode 100644 src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-take_screenshot.json delete mode 100644 src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-action_on_element.fn.liquid delete mode 100644 src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-go_to_page.fn.liquid diff --git a/src/Infrastructure/BotSharp.Abstraction/Browsing/Models/ElementLocatingArgs.cs b/src/Infrastructure/BotSharp.Abstraction/Browsing/Models/ElementLocatingArgs.cs index 0890b0c90..7d7dcc6ca 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Browsing/Models/ElementLocatingArgs.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Browsing/Models/ElementLocatingArgs.cs @@ -5,6 +5,9 @@ namespace BotSharp.Abstraction.Browsing.Models; [DebuggerStepThrough] public class ElementLocatingArgs { + [JsonPropertyName("element_locator_desc")] + public string ElementLocatorDescription { get; set; } = string.Empty; + [JsonPropertyName("match_rule")] public string MatchRule { get; set; } = string.Empty; @@ -26,6 +29,8 @@ public class ElementLocatingArgs [JsonPropertyName("selector")] public string? Selector { get; set; } + public ElementPosition? Position { get; set; } + public bool Parent { get; set; } public bool FailIfMultiple { get; set; } diff --git a/src/Infrastructure/BotSharp.Abstraction/Browsing/Settings/IWebElementLocator.cs b/src/Infrastructure/BotSharp.Abstraction/Browsing/Settings/IWebElementLocator.cs new file mode 100644 index 000000000..2b46f01fc --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Browsing/Settings/IWebElementLocator.cs @@ -0,0 +1,8 @@ +using BotSharp.Abstraction.Browsing.Models; + +namespace BotSharp.Abstraction.Browsing.Settings; + +public interface IWebElementLocator +{ + Task DetectElementCoordinates(IWebBrowser browser, string contextId, string elementDescription); +} diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/BotSharp.Plugin.WebDriver.csproj b/src/Plugins/BotSharp.Plugin.WebDriver/BotSharp.Plugin.WebDriver.csproj index 2f3918532..74e027f81 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/BotSharp.Plugin.WebDriver.csproj +++ b/src/Plugins/BotSharp.Plugin.WebDriver/BotSharp.Plugin.WebDriver.csproj @@ -16,8 +16,6 @@ - - @@ -28,6 +26,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest @@ -37,12 +38,6 @@ PreserveNewest - - PreserveNewest - - - PreserveNewest - PreserveNewest diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ActionOnElement.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ActionOnElement.cs index 7a5f4820e..17809ba86 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ActionOnElement.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ActionOnElement.cs @@ -8,9 +8,12 @@ public async Task ActionOnElement(MessageInfo message, Elem var result = await LocateElement(message, location); if (result.IsSuccess) { + action.Position = location.Position; await DoAction(message, action, result); - result.UrlAfterAction = _instance.GetPage(message.ContextId)?.Url; } + + result.UrlAfterAction = _instance.GetPage(message.ContextId)?.Url; + return result; } } diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.DoAction.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.DoAction.cs index 58b478ad3..4e18e2d65 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.DoAction.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.DoAction.cs @@ -1,5 +1,6 @@ using System.IO; using System.Net.Http; +using System.Xml.Linq; namespace BotSharp.Plugin.WebDriver.Drivers.PlaywrightDriver; @@ -8,33 +9,66 @@ public partial class PlaywrightWebDriver public async Task DoAction(MessageInfo message, ElementActionArgs action, BrowserActionResult result) { var page = _instance.GetPage(message.ContextId); - if (string.IsNullOrEmpty(result.Selector)) + if (string.IsNullOrEmpty(result.Selector) && action.Position == null) { Serilog.Log.Error($"Selector is not set."); return; } - ILocator locator = page.Locator(result.Selector); - var count = await locator.CountAsync(); - - if (count == 0) - { - Serilog.Log.Error($"Element not found: {result.Selector}"); - return; - } - else if (count > 1) + ILocator? locator; + + if (result.Selector != null) { - if (!action.FirstIfMultipleFound) + locator = page.Locator(result.Selector); + + var count = await locator.CountAsync(); + + if (count == 0) { - Serilog.Log.Error($"Multiple eElements were found: {result.Selector}"); + Serilog.Log.Error($"Element not found: {result.Selector}"); return; } - else + else if (count > 1) { - locator = page.Locator(result.Selector).First;// 匹配到多个时取第一个,否则当await locator.ClickAsync();匹配到多个就会抛异常。 + if (!action.FirstIfMultipleFound) + { + Serilog.Log.Error($"Multiple eElements were found: {result.Selector}"); + return; + } + else + { + locator = page.Locator(result.Selector).First;// 匹配到多个时取第一个,否则当await locator.ClickAsync();匹配到多个就会抛异常。 + } } + + await ExecuteAction(message, page, locator, action); } + else if (action.Position != null && action.Position.X != 0 && action.Position.Y != 0) + { + if (action.Position != null && action.Position.X != 0 && action.Position.Y != 0) + { + var elementHandle = await page.EvaluateHandleAsync( + @"(coords) => document.elementFromPoint(coords.x, coords.y)", + new { x = (int)action.Position.X, y = (int)action.Position.Y } + ); + await ExecuteAction(message, page, elementHandle.AsElement(), action); + } + } + else + { + Serilog.Log.Error($"Selector or position is not set."); + return; + } + + if (action.WaitTime > 0) + { + await Task.Delay(1000 * action.WaitTime); + } + } + + private async Task ExecuteAction(MessageInfo message, IPage page, ILocator locator, ElementActionArgs action) + { if (action.Action == BroswerActionEnum.Click) { if (action.Position == null) @@ -201,12 +235,174 @@ await locator.ClickAsync(new LocatorClickOptions } } } + } - if (action.WaitTime > 0) + private async Task ExecuteAction(MessageInfo message, IPage page, IElementHandle elementHandle, ElementActionArgs action) + { + var body = page.Locator("body"); + + if (action.Action == BroswerActionEnum.Click) { + await body.ClickAsync(new LocatorClickOptions + { + Position = new Position + { + X = action.Position.X, + Y = action.Position.Y + } + }); + } + else if (action.Action == BroswerActionEnum.DropDown) + { + var tagName = await body.EvaluateAsync("el => el.tagName.toLowerCase()"); + if (tagName == "select") + { + await HandleSelectDropDownAsync(page, body, action); + } + else + { + await body.ClickAsync(); + if (!string.IsNullOrWhiteSpace(action.PressKey)) + { + await page.Keyboard.PressAsync(action.PressKey); + await page.Keyboard.PressAsync("Enter"); + } + else + { + var optionLocator = page.Locator($"//div[text()='{action.Content}']"); + var optionCount = await optionLocator.CountAsync(); + if (optionCount == 0) + { + Serilog.Log.Error($"Dropdown option not found: {action.Content}"); + return; + } + await optionLocator.First.ClickAsync(); + } + } + } + else if (action.Action == BroswerActionEnum.InputText) + { + await elementHandle.FillAsync(action.Content); + + if (action.PressKey != null) + { + if (action.DelayBeforePressingKey > 0) + { + await Task.Delay(action.DelayBeforePressingKey); + } + await body.PressAsync(action.PressKey); + } + } + else if (action.Action == BroswerActionEnum.FileUpload) + { + var _states = _services.GetRequiredService(); + var files = new List(); + if (action.FileUrl != null && action.FileUrl.Length > 0) + { + files.AddRange(action.FileUrl); + } + var hooks = _services.GetServices(); + foreach (var hook in hooks) + { + files.AddRange(await hook.GetUploadFiles(message)); + } + if (files.Count == 0) + { + Serilog.Log.Warning($"No files found to upload: {action.Content}"); + return; + } + var fileChooser = await page.RunAndWaitForFileChooserAsync(async () => + { + await body.ClickAsync(); + }); + var guid = Guid.NewGuid().ToString(); + var directory = Path.Combine(Path.GetTempPath(), guid); + DeleteDirectory(directory); + Directory.CreateDirectory(directory); + var localPaths = new List(); + var http = _services.GetRequiredService(); + using var httpClient = http.CreateClient(); + foreach (var fileUrl in files) + { + try + { + using var fileData = await httpClient.GetAsync(fileUrl); + var fileName = new Uri(fileUrl).AbsolutePath; + var localPath = Path.Combine(directory, Path.GetFileName(fileName)); + await using var fs = new FileStream(localPath, FileMode.Create, FileAccess.Write, FileShare.None); + await fileData.Content.CopyToAsync(fs); + localPaths.Add(localPath); + } + catch (Exception ex) + { + Serilog.Log.Error($"FileUpload failed for {fileUrl}. Message: {ex.Message}"); + } + } + await fileChooser.SetFilesAsync(localPaths); await Task.Delay(1000 * action.WaitTime); } + else if (action.Action == BroswerActionEnum.Typing) + { + await body.PressSequentiallyAsync(action.Content); + if (action.PressKey != null) + { + if (action.DelayBeforePressingKey > 0) + { + await Task.Delay(action.DelayBeforePressingKey); + } + await body.PressAsync(action.PressKey); + } + } + else if (action.Action == BroswerActionEnum.Hover) + { + await body.HoverAsync(); + } + else if (action.Action == BroswerActionEnum.DragAndDrop) + { + // Locate the element to drag + var box = await body.BoundingBoxAsync(); + + if (box != null) + { + // Calculate start position + float startX = box.X + box.Width / 2; // Start at the center of the element + float startY = box.Y + box.Height / 2; + + // Drag offsets + float offsetX = action.Position.X; + // Move horizontally + if (action.Position.Y == 0) + { + // Perform drag-and-move + // Move mouse to the start position + var mouse = page.Mouse; + await mouse.MoveAsync(startX, startY); + await mouse.DownAsync(); + + // Move mouse smoothly in increments + var tracks = GetVelocityTrack(offsetX); + foreach (var track in tracks) + { + startX += track; + await page.Mouse.MoveAsync(startX, 0, new MouseMoveOptions + { + Steps = 3 + }); + } + + // Release mouse button + await Task.Delay(1000); + await mouse.UpAsync(); + } + else + { + throw new NotImplementedException(); + } + } + } } + + private void DeleteDirectory(string directory) { if (Directory.Exists(directory)) diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.LocateElement.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.LocateElement.cs index b4b2eb2f8..129b1cd52 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.LocateElement.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.LocateElement.cs @@ -21,6 +21,23 @@ public async Task LocateElement(MessageInfo message, Elemen IsSuccess = false }; } + + // Use IWebElementLocator to detect element position by element description + var locators = _services.GetServices(); + foreach (var el in locators) + { + location.Position = await el.DetectElementCoordinates(this, message.ContextId, location.ElementLocatorDescription); + + if (location.Position != null && location.Position.X > 0 && location.Position.Y > 0) + { + result.Message = $"Position based locating is found at {location.Position}"; + return new BrowserActionResult + { + IsSuccess = true + }; + } + } + ILocator locator = page.Locator("body"); int count = 0; var keyword = string.Empty; diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Hooks/WebUtilityHook.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Hooks/WebUtilityHook.cs index 36f693573..4812d87eb 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Hooks/WebUtilityHook.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Hooks/WebUtilityHook.cs @@ -7,6 +7,7 @@ public class WebUtilityHook : IAgentUtilityHook private const string GO_TO_PAGE_FN = $"{PREFIX}go_to_page"; private const string LOCATE_ELEMENT_FN = $"{PREFIX}locate_element"; private const string ACTION_ON_ELEMENT_FN = $"{PREFIX}action_on_element"; + private const string TAKE_SCREENSHOT_FN = $"{PREFIX}take_screenshot"; public void AddUtilities(List utilities) { @@ -20,12 +21,10 @@ public void AddUtilities(List utilities) new UtilityItem { FunctionName = GO_TO_PAGE_FN, - TemplateName = $"{GO_TO_PAGE_FN}.fn" }, new UtilityItem { FunctionName = ACTION_ON_ELEMENT_FN, - TemplateName = $"{ACTION_ON_ELEMENT_FN}.fn" }, new UtilityItem { @@ -34,6 +33,10 @@ public void AddUtilities(List utilities) new UtilityItem { FunctionName = CLOSE_BROWSER_FN + }, + new UtilityItem + { + FunctionName = TAKE_SCREENSHOT_FN } ] } diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Using.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Using.cs index 161e150a9..f534c7b88 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Using.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Using.cs @@ -21,6 +21,6 @@ global using BotSharp.Plugin.WebDriver.Models; global using BotSharp.Plugin.WebDriver.Services; global using BotSharp.Plugin.WebDriver.LlmContexts; -global using BotSharp.Plugin.WebDriver.Drivers; global using BotSharp.Abstraction.Browsing.Models; -global using BotSharp.Abstraction.Browsing; \ No newline at end of file +global using BotSharp.Abstraction.Browsing; +global using BotSharp.Abstraction.Browsing.Settings; \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebActionOnElementFn.cs b/src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebActionOnElementFn.cs index 7e14c1358..338ea10cc 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebActionOnElementFn.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebActionOnElementFn.cs @@ -45,6 +45,7 @@ public async Task Execute(RoleDialogModel message) FunctionArgs = message.FunctionArgs }; browser.SetServiceProvider(_services); + var result = await browser.ActionOnElement(msg, locatorArgs, actionArgs); message.Content = $"{actionArgs.Action} executed {(result.IsSuccess ? "success" : "failed")}."; diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebTakeScreenshotFn.cs b/src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebTakeScreenshotFn.cs new file mode 100644 index 000000000..6762c975d --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.WebDriver/UtilFunctions/UtilWebTakeScreenshotFn.cs @@ -0,0 +1,39 @@ +namespace BotSharp.Plugin.WebDriver.UtilFunctions; + +public class UtilWebTakeScreenshotFn : IFunctionCallback +{ + public string Name => "util-web-take_screenshot"; + public string Indication => "Taking screenshot for current viewport."; + private readonly IServiceProvider _services; + private readonly ILogger _logger; + + public UtilWebTakeScreenshotFn( + IServiceProvider services, + ILogger logger) + { + _services = services; + _logger = logger; + } + + public async Task Execute(RoleDialogModel message) + { + var conv = _services.GetRequiredService(); + var webDriverService = _services.GetRequiredService(); + var services = _services.CreateScope().ServiceProvider; + var browser = services.GetRequiredService(); + var msg = new MessageInfo + { + AgentId = message.CurrentAgentId, + MessageId = message.MessageId, + ContextId = webDriverService.GetMessageContext(message) + }; + + var path = webDriverService.GetScreenshotFilePath(message.MessageId); + message.Content = "Took screenshot completed. You can take another screenshot if needed."; + + var screenshot = await browser.ScreenshotAsync(msg, path); + message.Data = screenshot.Body; + + return true; + } +} diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-action_on_element.json b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-action_on_element.json index 8265bc031..71cf806a5 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-action_on_element.json +++ b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-action_on_element.json @@ -4,6 +4,10 @@ "parameters": { "type": "object", "properties": { + "element_locator_desc": { + "type": "string", + "description": "The element locator description." + }, "selector": { "type": "string", "description": "element selector in XPath, use syntax of Playwright in .NET" @@ -43,6 +47,6 @@ "description": "meta data information if user provided" } }, - "required": [ "selector", "action" ] + "required": [ "element_locator_desc", "action" ] } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-locate_element.json b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-locate_element.json index 54aac017d..4ad4c2b18 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-locate_element.json +++ b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-locate_element.json @@ -4,6 +4,10 @@ "parameters": { "type": "object", "properties": { + "element_locator_desc": { + "type": "string", + "description": "The element locator description." + }, "selector": { "type": "string", "description": "element selector in XPath, use syntax of Playwright in .NET" @@ -17,6 +21,6 @@ "description": "ignore error logging if element not exists" } }, - "required": [ "selector" ] + "required": [ "element_locator_desc" ] } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-take_screenshot.json b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-take_screenshot.json new file mode 100644 index 000000000..53470203d --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/functions/util-web-take_screenshot.json @@ -0,0 +1,10 @@ +{ + "name": "util-web-take_screenshot", + "description": "Take screenshot for current viewport.", + "parameters": { + "type": "object", + "properties": { + }, + "required": [] + } +} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-action_on_element.fn.liquid b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-action_on_element.fn.liquid deleted file mode 100644 index 958a54a82..000000000 --- a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-action_on_element.fn.liquid +++ /dev/null @@ -1 +0,0 @@ -When user asks to do specific actions (click, fill content, scroll) on a web page, use tool of util-web-action_on_element. \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-go_to_page.fn.liquid b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-go_to_page.fn.liquid deleted file mode 100644 index 3ec14d20a..000000000 --- a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/6745151e-6d46-4a02-8de4-1c4f21c7da95/templates/util-web-go_to_page.fn.liquid +++ /dev/null @@ -1 +0,0 @@ -When user asks to open a web page, use tool of util-web-go_to_page. \ No newline at end of file