diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/IWebBrowser.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/IWebBrowser.cs index f7353d9fa..88d1a01cd 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/IWebBrowser.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/IWebBrowser.cs @@ -4,6 +4,7 @@ public interface IWebBrowser { Task LaunchBrowser(string conversationId, string? url); Task ScreenshotAsync(string conversationId, string path); + Task ScrollPageAsync(BrowserActionParams actionParams); Task InputUserText(BrowserActionParams actionParams); Task InputUserPassword(BrowserActionParams actionParams); Task ClickButton(BrowserActionParams actionParams); diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightInstance.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightInstance.cs index 1f5f9d51d..252f5f9a5 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightInstance.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightInstance.cs @@ -70,8 +70,9 @@ public async Task Wait(string id) { if (_contexts.ContainsKey(id)) { - await _contexts[id].Pages.Last().WaitForLoadStateAsync(LoadState.DOMContentLoaded); - await _contexts[id].Pages.Last().WaitForLoadStateAsync(LoadState.NetworkIdle); + var page = _contexts[id].Pages.Last(); + await page.WaitForLoadStateAsync(LoadState.DOMContentLoaded); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); } await Task.Delay(100); } diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ClickElement.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ClickElement.cs index 864f1c56e..0dd9d3d31 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ClickElement.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ClickElement.cs @@ -9,23 +9,37 @@ public async Task ClickElement(BrowserActionParams actionParams) { await _instance.Wait(actionParams.ConversationId); + var page = _instance.GetPage(actionParams.ConversationId); + ILocator locator = default; + int count = 0; + // Retrieve the page raw html and infer the element path - var regexExpression = actionParams.Context.MatchRule.ToLower() switch + if (!string.IsNullOrEmpty(actionParams.Context.ElementText)) { - "startwith" => $"^{actionParams.Context.ElementText}", - "endwith" => $"{actionParams.Context.ElementText}$", - "contains" => $"{actionParams.Context.ElementText}", - _ => $"^{actionParams.Context.ElementText}$" - }; - var regex = new Regex(regexExpression, RegexOptions.IgnoreCase); - var elements = _instance.GetPage(actionParams.ConversationId).GetByText(regex); - var count = await elements.CountAsync(); - - // try placeholder - if (count == 0) + var regexExpression = actionParams.Context.MatchRule.ToLower() switch + { + "startwith" => $"^{actionParams.Context.ElementText}", + "endwith" => $"{actionParams.Context.ElementText}$", + "contains" => $"{actionParams.Context.ElementText}", + _ => $"^{actionParams.Context.ElementText}$" + }; + var regex = new Regex(regexExpression, RegexOptions.IgnoreCase); + locator = page.GetByText(regex); + count = await locator.CountAsync(); + + // try placeholder + if (count == 0) + { + locator = page.GetByPlaceholder(regex); + count = await locator.CountAsync(); + } + } + + // try attribute + if (count == 0 && !string.IsNullOrEmpty(actionParams.Context.AttributeName)) { - elements = _instance.GetPage(actionParams.ConversationId).GetByPlaceholder(regex); - count = await elements.CountAsync(); + locator = page.Locator($"[{actionParams.Context.AttributeName}='{actionParams.Context.AttributeValue}']"); + count = await locator.CountAsync(); } if (count == 0) @@ -34,9 +48,8 @@ public async Task ClickElement(BrowserActionParams actionParams) } else if (count == 1) { - // var tagName = await elements.EvaluateAsync("el => el.tagName"); - - await elements.ClickAsync(); + // var tagName = await locator.EvaluateAsync("el => el.tagName"); + await locator.ClickAsync(); // Triggered ajax await _instance.Wait(actionParams.ConversationId); @@ -46,7 +59,7 @@ public async Task ClickElement(BrowserActionParams actionParams) else if (count > 1) { _logger.LogWarning($"Multiple elements are found by keyword {actionParams.Context.ElementText}"); - var all = await elements.AllAsync(); + var all = await locator.AllAsync(); foreach (var element in all) { var content = await element.TextContentAsync(); diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserPassword.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserPassword.cs index 430365a7d..d8123c2ec 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserPassword.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserPassword.cs @@ -17,15 +17,14 @@ public async Task InputUserPassword(BrowserActionParams actionParams) if (password == null) { - throw new Exception($"Can't locate the web element {actionParams.Context.ElementName}."); + _logger.LogError($"Can't locate the password element by '{actionParams.Context.ElementName}'"); + return false; } var config = _services.GetRequiredService(); try { - var key = actionParams.Context.Password.Replace("@", "").Replace(".", ":"); - var value = config.GetValue(key); - await password.FillAsync(value); + await password.FillAsync(actionParams.Context.Password); return true; } catch (Exception ex) diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserText.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserText.cs index 08a84b397..7ef1f6f51 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserText.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.InputUserText.cs @@ -8,18 +8,31 @@ public async Task InputUserText(BrowserActionParams actionParams) { await _instance.Wait(actionParams.ConversationId); + var page = _instance.GetPage(actionParams.ConversationId); + ILocator locator = default; + int count = 0; + + // try attribute + if (count == 0 && !string.IsNullOrEmpty(actionParams.Context.AttributeName)) + { + locator = page.Locator($"[{actionParams.Context.AttributeName}='{actionParams.Context.AttributeValue}']"); + count = await locator.CountAsync(); + } + // Find by text exactly match - var elements = _instance.GetPage(actionParams.ConversationId) - .GetByRole(AriaRole.Textbox, new PageGetByRoleOptions + if (count == 0) + { + locator = page.GetByRole(AriaRole.Textbox, new PageGetByRoleOptions { Name = actionParams.Context.ElementText }); - var count = await elements.CountAsync(); + count = await locator.CountAsync(); + } + if (count == 0) { - elements = _instance.GetPage(actionParams.ConversationId) - .GetByPlaceholder(actionParams.Context.ElementText); - count = await elements.CountAsync(); + locator = page.GetByPlaceholder(actionParams.Context.ElementText); + count = await locator.CountAsync(); } if (count == 0) @@ -30,22 +43,18 @@ public async Task InputUserText(BrowserActionParams actionParams) html, actionParams.Context.ElementText, actionParams.MessageId); - elements = Locator(actionParams.ConversationId, htmlElementContextOut); - count = await elements.CountAsync(); - } - - if (count == 0) - { - + locator = Locator(actionParams.ConversationId, htmlElementContextOut); + count = await locator.CountAsync(); } - else if (count == 1) + + if (count == 1) { try { - await elements.FillAsync(actionParams.Context.InputText); + await locator.FillAsync(actionParams.Context.InputText); if (actionParams.Context.PressEnter.HasValue && actionParams.Context.PressEnter.Value) { - await elements.PressAsync("Enter"); + await locator.PressAsync("Enter"); } // Triggered ajax diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.Screenshot.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.Screenshot.cs index aa096b751..bab41e4cc 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.Screenshot.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.Screenshot.cs @@ -5,11 +5,14 @@ public partial class PlaywrightWebDriver { public async Task ScreenshotAsync(string conversationId, string path) { - var bytes = await _instance.GetPage(conversationId) - .ScreenshotAsync(new PageScreenshotOptions - { - Path = path, - }); + await _instance.Wait(conversationId); + var page = _instance.GetPage(conversationId); + + await Task.Delay(500); + var bytes = await page.ScreenshotAsync(new PageScreenshotOptions + { + Path = path + }); return "data:image/png;base64," + Convert.ToBase64String(bytes); } diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ScrollPage.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ScrollPage.cs new file mode 100644 index 000000000..b894259bd --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Drivers/PlaywrightDriver/PlaywrightWebDriver.ScrollPage.cs @@ -0,0 +1,23 @@ + +namespace BotSharp.Plugin.WebDriver.Drivers.PlaywrightDriver; + +public partial class PlaywrightWebDriver +{ + public async Task ScrollPageAsync(BrowserActionParams actionParams) + { + await _instance.Wait(actionParams.ConversationId); + + var page = _instance.GetPage(actionParams.ConversationId); + + if(actionParams.Context.Direction == "down") + await page.EvaluateAsync("window.scrollBy(0, window.innerHeight - 200)"); + else if (actionParams.Context.Direction == "up") + await page.EvaluateAsync("window.scrollBy(0, -window.innerHeight + 200)"); + else if (actionParams.Context.Direction == "left") + await page.EvaluateAsync("window.scrollBy(-400, 0)"); + else if (actionParams.Context.Direction == "right") + await page.EvaluateAsync("window.scrollBy(400, 0)"); + + return true; + } +} diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Functions/InputUserPasswordFn.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Functions/InputUserPasswordFn.cs index 30afded46..b378e0d09 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/Functions/InputUserPasswordFn.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Functions/InputUserPasswordFn.cs @@ -21,11 +21,13 @@ public async Task Execute(RoleDialogModel message) var agentService = _services.GetRequiredService(); var agent = await agentService.LoadAgent(message.CurrentAgentId); + + var webDriverService = _services.GetRequiredService(); + args.Password = webDriverService.ReplaceToken(args.Password); var result = await _browser.InputUserPassword(new BrowserActionParams(agent, args, convService.ConversationId, message.MessageId)); message.Content = result ? "Input password successfully" : "Input password failed"; - var webDriverService = _services.GetRequiredService(); var path = webDriverService.GetScreenshotFilePath(message.MessageId); message.Data = await _browser.ScreenshotAsync(convService.ConversationId, path); diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Functions/ScreenshotFn.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Functions/ScreenshotFn.cs new file mode 100644 index 000000000..1d133ad1a --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Functions/ScreenshotFn.cs @@ -0,0 +1,29 @@ +namespace BotSharp.Plugin.WebDriver.Functions; + +public class ScreenshotFn : IFunctionCallback +{ + public string Name => "take_screenshot"; + + private readonly IServiceProvider _services; + private readonly IWebBrowser _browser; + + public ScreenshotFn(IServiceProvider services, + IWebBrowser browser) + { + _services = services; + _browser = browser; + } + + public async Task Execute(RoleDialogModel message) + { + var convService = _services.GetRequiredService(); + + var webDriverService = _services.GetRequiredService(); + var path = webDriverService.GetScreenshotFilePath(message.MessageId); + + message.Data = await _browser.ScreenshotAsync(convService.ConversationId, path); + message.Content = "Took screenshot completed. You can take another screenshot if needed."; + + return true; + } +} diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/Functions/ScrollPageFn.cs b/src/Plugins/BotSharp.Plugin.WebDriver/Functions/ScrollPageFn.cs new file mode 100644 index 000000000..3d4914543 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.WebDriver/Functions/ScrollPageFn.cs @@ -0,0 +1,35 @@ +namespace BotSharp.Plugin.WebDriver.Functions; + +public class ScrollPageFn : IFunctionCallback +{ + public string Name => "scroll_page"; + + private readonly IServiceProvider _services; + private readonly IWebBrowser _browser; + + public ScrollPageFn(IServiceProvider services, + IWebBrowser browser) + { + _services = services; + _browser = browser; + } + + public async Task Execute(RoleDialogModel message) + { + var convService = _services.GetRequiredService(); + var args = JsonSerializer.Deserialize(message.FunctionArgs); + + var agentService = _services.GetRequiredService(); + var agent = await agentService.LoadAgent(message.CurrentAgentId); + + message.Data = await _browser.ScrollPageAsync(new BrowserActionParams(agent, args, convService.ConversationId, message.MessageId)); + message.Content = "Scrolled. You can scroll more if needed."; + + var webDriverService = _services.GetRequiredService(); + var path = webDriverService.GetScreenshotFilePath(message.MessageId); + + message.Data = await _browser.ScreenshotAsync(convService.ConversationId, path); + + return true; + } +} diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/LlmContexts/BrowsingContextIn.cs b/src/Plugins/BotSharp.Plugin.WebDriver/LlmContexts/BrowsingContextIn.cs index 6cd74e955..0beef400d 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/LlmContexts/BrowsingContextIn.cs +++ b/src/Plugins/BotSharp.Plugin.WebDriver/LlmContexts/BrowsingContextIn.cs @@ -19,6 +19,12 @@ public class BrowsingContextIn [JsonPropertyName("element_text")] public string? ElementText { get; set; } + [JsonPropertyName("attribute_name")] + public string? AttributeName { get; set; } + + [JsonPropertyName("attribute_value")] + public string? AttributeValue { get; set; } + [JsonPropertyName("press_enter")] public bool? PressEnter { get; set; } @@ -33,4 +39,7 @@ public class BrowsingContextIn [JsonPropertyName("question")] public string? Question { get; set; } + + [JsonPropertyName("direction")] + public string? Direction { get; set; } } diff --git a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/f3ae2a0f-e6ba-4ee1-a0b9-75d7431ff32b/functions.json b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/f3ae2a0f-e6ba-4ee1-a0b9-75d7431ff32b/functions.json index de0d201c6..7dcebe01c 100644 --- a/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/f3ae2a0f-e6ba-4ee1-a0b9-75d7431ff32b/functions.json +++ b/src/Plugins/BotSharp.Plugin.WebDriver/data/agents/f3ae2a0f-e6ba-4ee1-a0b9-75d7431ff32b/functions.json @@ -37,6 +37,30 @@ "required": [ "url" ] } }, + { + "name": "scroll_page", + "description": "Scroll page down or up", + "parameters": { + "type": "object", + "properties": { + "direction": { + "type": "string", + "description": "down, up, left, right" + } + }, + "required": [ "direction" ] + } + }, + { + "name": "take_screenshot", + "description": "Tak screenshot to show current page screen", + "parameters": { + "type": "object", + "properties": { + }, + "required": [] + } + }, { "name": "click_button", "description": "Click a button in a web page.", @@ -82,6 +106,14 @@ "press_enter": { "type": "boolean", "description": "whether to press Enter key" + }, + "attribute_name": { + "type": "string", + "description": "attribute name in the element" + }, + "attribute_value": { + "type": "string", + "description": "attribute value in the element" } }, "required": [ "element_text", "input_text" ] @@ -155,6 +187,14 @@ "type": "string", "description": "text or placeholder shown in the element." }, + "attribute_name": { + "type": "string", + "description": "attribute name in the element" + }, + "attribute_value": { + "type": "string", + "description": "attribute value in the element" + }, "match_rule": { "type": "string", "description": "text matching rule: EndWith, StartWith, Contains, Match"