From 2d5dd9cb4602173e6845d46187055ca2f17c0b57 Mon Sep 17 00:00:00 2001 From: zfoong Date: Mon, 13 Apr 2026 13:15:50 +0900 Subject: [PATCH 01/41] bug:heartbeat to avoid websocket closing issue --- app/ui_layer/adapters/browser_adapter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index 0f341056..8cab1cb6 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -955,10 +955,9 @@ async def _websocket_handler(self, request: "web.Request") -> "web.WebSocketResp from aiohttp import web, WSMsgType import asyncio - # Simple WebSocket configuration - no heartbeat (client handles reconnect) ws = web.WebSocketResponse( max_msg_size=100 * 1024 * 1024, - timeout=None, # No timeout - let messages flow naturally + heartbeat=30.0, # Send ping every 30s to keep connection alive ) try: From d8a4fe49491b3070505cdd88e84207a669994beb Mon Sep 17 00:00:00 2001 From: zfoong Date: Mon, 13 Apr 2026 22:18:23 +0900 Subject: [PATCH 02/41] bug:fix provider VLM issue --- agent_core/core/impl/vlm/interface.py | 3 +- agent_core/core/models/model_registry.py | 4 +-- app/agent_base.py | 22 +++++++++++---- app/data/action/describe_image.py | 35 ++++++++++++++++++++++-- app/data/action/generate_image.py | 2 +- app/main.py | 14 ++++++---- 6 files changed, 61 insertions(+), 19 deletions(-) diff --git a/agent_core/core/impl/vlm/interface.py b/agent_core/core/impl/vlm/interface.py index dce58675..d43c6fc8 100644 --- a/agent_core/core/impl/vlm/interface.py +++ b/agent_core/core/impl/vlm/interface.py @@ -259,7 +259,7 @@ def describe_image_bytes( return cleaned except Exception as e: logger.error(f"[ERROR] {e}") - return "" + raise async def generate_response_async( self, @@ -451,7 +451,6 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str "messages": messages, "temperature": self.temperature, "max_tokens": 2048, - "response_format": {"type": "json_object"}, } headers = { "Content-Type": "application/json", diff --git a/agent_core/core/models/model_registry.py b/agent_core/core/models/model_registry.py index f43f499c..3735e52d 100644 --- a/agent_core/core/models/model_registry.py +++ b/agent_core/core/models/model_registry.py @@ -36,7 +36,7 @@ }, "deepseek": { InterfaceType.LLM: "deepseek-chat", - InterfaceType.VLM: "deepseek-chat", + InterfaceType.VLM: None, InterfaceType.EMBEDDING: None, }, "moonshot": { @@ -46,7 +46,7 @@ }, "grok": { InterfaceType.LLM: "grok-3", - InterfaceType.VLM: "grok-2-vision-1212", + InterfaceType.VLM: "grok-4-0709", InterfaceType.EMBEDDING: None, }, } diff --git a/app/agent_base.py b/app/agent_base.py index 8ee53288..bf22f182 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -45,6 +45,8 @@ AGENT_FILE_SYSTEM_TEMPLATE_PATH, AGENT_MEMORY_CHROMA_PATH, PROCESS_MEMORY_AT_STARTUP, + get_api_key, + get_base_url, ) from app.internal_action_interface import InternalActionInterface @@ -124,6 +126,8 @@ def __init__( llm_api_key: str | None = None, llm_base_url: str | None = None, llm_model: str | None = None, + vlm_provider: str | None = None, + vlm_model: str | None = None, deferred_init: bool = False, ) -> None: """ @@ -134,11 +138,12 @@ def __init__( history, etc.) is stored. chroma_path: Directory for the local Chroma vector store used by the RAG components. - llm_provider: Provider name passed to :class:`LLMInterface` and - :class:`VLMInterface`. + llm_provider: Provider name passed to :class:`LLMInterface`. llm_api_key: API key for the LLM provider. llm_base_url: Base URL for the LLM provider (optional). llm_model: Model name override (None = use registry default). + vlm_provider: Provider name for VLM (defaults to llm_provider). + vlm_model: VLM model name override (None = use registry default). deferred_init: If True, allow LLM/VLM initialization to be deferred until API key is configured (useful for first-time setup). """ @@ -156,11 +161,16 @@ def __init__( base_url=llm_base_url, deferred=deferred_init, ) + + # VLM uses its own provider/model settings, falling back to LLM values + _vlm_provider = vlm_provider or llm_provider + _vlm_api_key = get_api_key(_vlm_provider) if vlm_provider else llm_api_key + _vlm_base_url = get_base_url(_vlm_provider) if vlm_provider else llm_base_url self.vlm = VLMInterface( - provider=llm_provider, - model=llm_model, - api_key=llm_api_key, - base_url=llm_base_url, + provider=_vlm_provider, + model=vlm_model, + api_key=_vlm_api_key, + base_url=_vlm_base_url, deferred=deferred_init, ) diff --git a/app/data/action/describe_image.py b/app/data/action/describe_image.py index abccca24..67e58e20 100644 --- a/app/data/action/describe_image.py +++ b/app/data/action/describe_image.py @@ -4,7 +4,7 @@ name="describe_image", description="Uses a Visual Language Model to analyse an image and return a detailed, markdown-ready description. IMPORTANT: Always provide a prompt describing what to look for or describe in the image.", mode="CLI", - action_sets=["document_processing, image"], + action_sets=["core", "document_processing", "image"], input_schema={ "image_path": { "type": "string", @@ -41,7 +41,7 @@ } ) def view_image(input_data: dict) -> dict: - import json, os + import os image_path = str(input_data.get('image_path', '')).strip() simulated_mode = input_data.get('simulated_mode', False) @@ -57,9 +57,38 @@ def view_image(input_data: dict) -> dict: if not os.path.isfile(image_path): return {'status': 'error', 'description': '', 'message': 'File not found.'} + # Check if VLM is available before attempting the call + import app.internal_action_interface as iai + vlm = iai.InternalActionInterface.vlm_interface + + # Check the model registry to see if the provider actually supports VLM + from agent_core.core.models.model_registry import MODEL_REGISTRY + from agent_core.core.models.types import InterfaceType + from app.config import get_vlm_provider + current_provider = get_vlm_provider() + registry_vlm = MODEL_REGISTRY.get(current_provider, {}).get(InterfaceType.VLM) + + if vlm is None or not registry_vlm: + return { + 'status': 'error', + 'description': '', + 'message': ( + f"The current VLM provider '{current_provider}' does not support vision/image analysis. " + "Please inform the user and suggest switching to a provider that supports VLM.\n\n" + "Providers with VLM support: openai, anthropic, gemini, byteplus.\n\n" + "To switch provider, edit 'app/config/settings.json' and update:\n" + ' "vlm_provider": "" (e.g. "anthropic")\n' + ' "vlm_model": "" (e.g. "claude-sonnet-4-6" for anthropic)\n\n' + "Make sure the corresponding API key is configured under 'api_keys' in the same file. " + "If no API key is set, ask the user to provide one. " + "The system will automatically detect the config change and reload." + ), + } + try: - import app.internal_action_interface as iai description = iai.InternalActionInterface.describe_image(image_path, prompt) + if not description: + return {'status': 'error', 'description': '', 'message': 'VLM returned an empty description.'} return {'status': 'success', 'description': description, 'message': ''} except Exception as e: return {'status': 'error', 'description': '', 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/generate_image.py b/app/data/action/generate_image.py index fde5dfae..751a2d5e 100644 --- a/app/data/action/generate_image.py +++ b/app/data/action/generate_image.py @@ -10,7 +10,7 @@ - TIP: When generating multiple images for the same project or related work, use 'reference_images' parameter with previously generated images to maintain consistent style across all outputs""", default=True, mode="CLI", - action_sets=["content_creation, image, document_processing"], + action_sets=["content_creation", "image", "document_processing"], input_schema={ "prompt": { "type": "string", diff --git a/app/main.py b/app/main.py index ce4e5dd4..b46c93ae 100644 --- a/app/main.py +++ b/app/main.py @@ -67,7 +67,7 @@ def _suppress_console_logging_early() -> None: ConfigRegistry.register_workspace_root(".") # Import settings reader (reads directly from settings.json) -from app.config import get_llm_provider, get_api_key, get_base_url, get_llm_model +from app.config import get_llm_provider, get_vlm_provider, get_api_key, get_base_url, get_llm_model, get_vlm_model from app.agent_base import AgentBase @@ -110,11 +110,11 @@ def _parse_cli_args() -> dict: return vars(args) -def _initial_settings() -> tuple[str, str, str, bool]: +def _initial_settings() -> tuple: """Determine initial provider, API key, and base URL from settings.json. Returns: - Tuple of (provider, api_key, base_url, has_valid_key) where has_valid_key + Tuple of (provider, api_key, base_url, model, vlm_provider, vlm_model, has_valid_key) where has_valid_key indicates if a working API key was found. """ # Read directly from settings.json @@ -122,11 +122,13 @@ def _initial_settings() -> tuple[str, str, str, bool]: api_key = get_api_key(provider) base_url = get_base_url(provider) model = get_llm_model() # None → use registry default for the provider + vlm_prov = get_vlm_provider() + vlm_mod = get_vlm_model() # Remote (Ollama) doesn't require API key has_key = bool(api_key) or provider == "remote" - return provider, api_key, base_url, model, has_key + return provider, api_key, base_url, model, vlm_prov, vlm_mod, has_key async def main_async() -> None: @@ -136,7 +138,7 @@ async def main_async() -> None: browser_mode = cli_args.get("browser", False) # Get settings from settings.json - provider, api_key, base_url, model, has_valid_key = _initial_settings() + provider, api_key, base_url, model, vlm_prov, vlm_mod, has_valid_key = _initial_settings() # CLI args override settings.json if provided if cli_args.get("provider"): @@ -159,6 +161,8 @@ async def main_async() -> None: llm_api_key=api_key, llm_base_url=base_url, llm_model=model, + vlm_provider=vlm_prov, + vlm_model=vlm_mod, deferred_init=not has_valid_key, ) From 23abdcf92814e2fe8ffcce7c6a585bb92a07296e Mon Sep 17 00:00:00 2001 From: zfoong Date: Tue, 14 Apr 2026 10:25:17 +0900 Subject: [PATCH 03/41] Invoke skill with command --- app/agent_base.py | 19 ++- app/data/action/task_start.py | 3 + app/internal_action_interface.py | 31 ++++- app/ui_layer/adapters/browser_adapter.py | 35 +++++- .../src/pages/Settings/SkillsSettings.tsx | 24 ++++ app/ui_layer/commands/builtin/__init__.py | 2 + app/ui_layer/commands/builtin/help.py | 1 + app/ui_layer/commands/builtin/skill.py | 6 + app/ui_layer/commands/builtin/skill_invoke.py | 66 +++++++++++ app/ui_layer/controller/ui_controller.py | 109 ++++++++++++++++++ 10 files changed, 286 insertions(+), 10 deletions(-) create mode 100644 app/ui_layer/commands/builtin/skill_invoke.py diff --git a/app/agent_base.py b/app/agent_base.py index bf22f182..ec487641 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -105,6 +105,7 @@ class TriggerData: is_self_message: bool = False # True when the user sent themselves a message contact_id: str | None = None # Sender/chat ID from external platform channel_id: str | None = None # Channel/group ID from external platform + payload: dict | None = None # Full trigger payload for passing extra data class AgentBase: """ @@ -603,6 +604,7 @@ def _extract_trigger_data(self, trigger: Trigger) -> TriggerData: is_self_message=payload.get("is_self_message", False), contact_id=payload.get("contact_id", ""), channel_id=payload.get("channel_id", ""), + payload=payload, ) def _extract_user_message_from_trigger(self, trigger: Trigger) -> Optional[str]: @@ -1104,6 +1106,9 @@ async def _execute_actions( if action.name == "task_start": params["_original_query"] = trigger_data.user_message or trigger_data.query params["_original_platform"] = trigger_data.platform + # Pass pre-selected skills from skill slash commands (e.g., /pdf, /docx) + if trigger_data.payload and trigger_data.payload.get("pre_selected_skills"): + params["_pre_selected_skills"] = trigger_data.payload["pre_selected_skills"] action_names = [a[0].name for a in actions_with_input] logger.info(f"[ACTION] Ready to run {len(actions_with_input)} action(s): {action_names}") @@ -1810,6 +1815,10 @@ async def _handle_chat_message(self, payload: Dict): trigger_payload["contact_id"] = payload.get("contact_id", "") trigger_payload["channel_id"] = payload.get("channel_id", "") + # Carry pre-selected skills from skill slash commands (e.g., /pdf, /docx) + if payload.get("pre_selected_skills"): + trigger_payload["pre_selected_skills"] = payload["pre_selected_skills"] + # Include platform in the action description so the LLM picks # the correct platform-specific send action for replies. # Must be directive (not just informational) for weaker LLMs. @@ -2632,9 +2641,17 @@ async def _initialize_config_watcher(self) -> None: skills_config_path = PROJECT_ROOT / "app" / "config" / "skills_config.json" if skills_config_path.exists(): from app.skill import skill_manager + + async def _reload_skills_and_sync(): + """Reload skills and sync skill slash commands.""" + result = await skill_manager.reload() + if self.ui_controller: + self.ui_controller.sync_skill_commands() + return result + config_watcher.register( skills_config_path, - skill_manager.reload, + _reload_skills_and_sync, name="skills_config.json" ) diff --git a/app/data/action/task_start.py b/app/data/action/task_start.py index 902ec4a3..a8939b5e 100644 --- a/app/data/action/task_start.py +++ b/app/data/action/task_start.py @@ -66,6 +66,8 @@ async def start_task(input_data: dict) -> dict: # Extract original user query and platform for logging to the new task's event stream original_query = input_data.get("_original_query") original_platform = input_data.get("_original_platform") + # Extract pre-selected skills (from skill slash commands like /pdf, /docx) + pre_selected_skills = input_data.get("_pre_selected_skills") if not task_name: return { @@ -105,6 +107,7 @@ async def start_task(input_data: dict) -> dict: session_id=session_id, original_query=original_query, original_platform=original_platform, + pre_selected_skills=pre_selected_skills, ) return { "status": "success", diff --git a/app/internal_action_interface.py b/app/internal_action_interface.py index a1486f1b..076d1623 100644 --- a/app/internal_action_interface.py +++ b/app/internal_action_interface.py @@ -302,6 +302,7 @@ async def do_create_task( session_id: Optional[str] = None, original_query: Optional[str] = None, original_platform: Optional[str] = None, + pre_selected_skills: Optional[List[str]] = None, ) -> Dict[str, Any]: """ Create a new task with automatic skill and action set selection. @@ -319,6 +320,9 @@ async def do_create_task( event stream before the task_start event. original_platform: Optional platform where the original message came from (e.g., "CraftBot TUI", "Telegram", "Whatsapp"). + pre_selected_skills: Optional list of skill names to use directly, + bypassing LLM skill selection. Used when skills are + invoked explicitly via slash commands (e.g., /pdf). Returns: Dictionary with task_id, action_sets, action_count, and selected_skills. @@ -330,12 +334,27 @@ async def do_create_task( # Each task's stream is created when the task starts and cleaned up when the task ends. # Stream lifecycle is managed by TaskManager via on_stream_create/on_stream_remove hooks. - # Select skills and action sets in a single LLM call (optimized) - # Skills are selected first, then action sets with knowledge of skill recommendations - selected_skills, all_action_sets = await cls._select_skills_and_action_sets_via_llm( - task_name, task_description, source_platform=original_platform - ) - logger.info(f"[TASK] Auto-selected skills for '{task_name}': {selected_skills}") + if pre_selected_skills: + # Skills explicitly selected via slash command — skip LLM skill selection + # but still select action sets (including skill-recommended ones) + selected_skills = pre_selected_skills + # Get action sets recommended by pre-selected skills + from agent_core.core.impl.skill.manager import skill_manager + from app.action.action_set import action_set_manager + + skill_action_sets = skill_manager.get_skill_action_sets(selected_skills) + # Also run LLM action set selection for additional sets needed + llm_action_sets = await cls._select_action_sets_via_llm(task_name, task_description) + # Merge: skill-recommended + LLM-selected (deduplicated) + all_action_sets = list(dict.fromkeys(skill_action_sets + llm_action_sets)) + logger.info(f"[TASK] Pre-selected skills (via command): {selected_skills}") + else: + # Select skills and action sets in a single LLM call (optimized) + # Skills are selected first, then action sets with knowledge of skill recommendations + selected_skills, all_action_sets = await cls._select_skills_and_action_sets_via_llm( + task_name, task_description, source_platform=original_platform + ) + logger.info(f"[TASK] Auto-selected skills for '{task_name}': {selected_skills}") logger.info(f"[TASK] Final action sets: {all_action_sets}") # Create task with selected skills and action sets diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index 8cab1cb6..76fe1e62 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -1321,6 +1321,11 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: description = data.get("description", "") await self._handle_skill_template(name, description) + elif msg_type == "skill_run": + name = data.get("name", "") + args_text = data.get("args", "") + await self._handle_skill_run(name, args_text) + # Integration handlers elif msg_type == "integration_list": await self._handle_integration_list() @@ -3196,9 +3201,10 @@ async def _handle_skill_enable(self, name: str) -> None: "name": name, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_enable", @@ -3221,9 +3227,10 @@ async def _handle_skill_disable(self, name: str) -> None: "name": name, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_disable", @@ -3245,9 +3252,10 @@ async def _handle_skill_reload(self) -> None: "message": message, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_reload", @@ -3257,6 +3265,27 @@ async def _handle_skill_reload(self) -> None: }, }) + async def _handle_skill_run(self, name: str, args_text: str = "") -> None: + """Run a skill by invoking it through the controller.""" + try: + await self._controller.invoke_skill(name, args_text, self._adapter_id) + await self._broadcast({ + "type": "skill_run", + "data": { + "success": True, + "name": name, + }, + }) + except Exception as e: + await self._broadcast({ + "type": "skill_run", + "data": { + "success": False, + "error": str(e), + "name": name, + }, + }) + async def _handle_skill_install(self, source: str) -> None: """Install a skill from path or git URL.""" try: diff --git a/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx b/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx index 2daa4f05..ed2d50c1 100644 --- a/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx @@ -1,4 +1,5 @@ import { useState, useEffect } from 'react' +import { useNavigate } from 'react-router-dom' import { Loader2, Plus, @@ -6,6 +7,7 @@ import { RotateCcw, X, Wrench, + Play, } from 'lucide-react' import { Button, Badge, ConfirmModal } from '../../components/ui' import { useToast } from '../../contexts/ToastContext' @@ -32,6 +34,7 @@ interface SkillInfo extends SkillConfig { export function SkillsSettings() { const { send, onMessage, isConnected } = useSettingsWebSocket() const { showToast } = useToast() + const navigate = useNavigate() // State const [skills, setSkills] = useState([]) @@ -150,6 +153,12 @@ export function SkillsSettings() { showToast('error', d.error || 'Failed to get skill info') } }), + onMessage('skill_run', (data: unknown) => { + const d = data as { success: boolean; name?: string; error?: string } + if (!d.success) { + showToast('error', d.error || 'Failed to run skill') + } + }), ] send('skill_list') @@ -185,6 +194,12 @@ export function SkillsSettings() { send('skill_info', { name }) } + const handleRunSkill = (name: string) => { + send('skill_run', { name }) + setViewingSkill(null) + navigate('/chat') + } + const handleInstallSkill = () => { const source = installSource.trim() if (!source) { @@ -533,6 +548,15 @@ export function SkillsSettings() { + {viewingSkill.enabled && ( + + )} + ))} + + )} {message.attachments && message.attachments.length > 0 && (
{ } export function ChatPage() { - const { messages, actions, connected, sendMessage, cancelTask, cancellingTaskId, openFile, openFolder, lastSeenMessageId, markMessagesAsSeen, replyTarget, setReplyTarget, clearReplyTarget, loadOlderMessages, hasMoreMessages, loadingOlderMessages } = useWebSocket() + const { messages, actions, connected, sendMessage, cancelTask, cancellingTaskId, openFile, openFolder, lastSeenMessageId, markMessagesAsSeen, replyTarget, setReplyTarget, clearReplyTarget, loadOlderMessages, hasMoreMessages, loadingOlderMessages, sendOptionClick } = useWebSocket() // Derive agent status from actions and messages const status = useDerivedAgentStatus({ @@ -556,6 +556,7 @@ export function ChatPage() { onOpenFile={openFile} onOpenFolder={openFolder} onReply={handleChatReply} + onOptionClick={sendOptionClick} />
) diff --git a/app/ui_layer/browser/frontend/src/types/index.ts b/app/ui_layer/browser/frontend/src/types/index.ts index a6d55b27..bab75e00 100644 --- a/app/ui_layer/browser/frontend/src/types/index.ts +++ b/app/ui_layer/browser/frontend/src/types/index.ts @@ -12,6 +12,12 @@ export interface Attachment { url: string } +export interface ChatMessageOption { + label: string + value: string + style?: 'primary' | 'danger' | 'default' +} + export interface ChatMessage { sender: string content: string @@ -20,6 +26,8 @@ export interface ChatMessage { messageId: string attachments?: Attachment[] taskSessionId?: string // Links message to a task session for reply feature + options?: ChatMessageOption[] + optionSelected?: string // Value of the option that was selected } // ───────────────────────────────────────────────────────────────────── @@ -95,6 +103,8 @@ export type WSMessageType = // Task control | 'task_cancel' | 'task_cancel_response' + // Option click (interactive buttons in chat) + | 'option_click' // Onboarding | 'onboarding_step' | 'onboarding_step_get' diff --git a/app/ui_layer/components/types.py b/app/ui_layer/components/types.py index f0dedf3d..f9206c2d 100644 --- a/app/ui_layer/components/types.py +++ b/app/ui_layer/components/types.py @@ -29,6 +29,22 @@ class Attachment: url: str +@dataclass +class ChatMessageOption: + """ + Data structure for an interactive option/button in a chat message. + + Attributes: + label: Button text displayed to the user (e.g. "Continue") + value: Machine-readable value sent back on click (e.g. "continue_limit") + style: Visual style - "primary", "danger", or "default" + """ + + label: str + value: str + style: str = "default" + + @dataclass class ChatMessage: """ @@ -44,6 +60,7 @@ class ChatMessage: message_id: Optional unique identifier for the message attachments: Optional list of file attachments task_session_id: Optional task session ID for reply feature + options: Optional list of interactive options/buttons """ sender: str @@ -53,6 +70,7 @@ class ChatMessage: message_id: Optional[str] = None attachments: Optional[List[Attachment]] = None task_session_id: Optional[str] = None + options: Optional[List[ChatMessageOption]] = None def __post_init__(self) -> None: """Generate message_id if not provided.""" diff --git a/app/ui_layer/controller/ui_controller.py b/app/ui_layer/controller/ui_controller.py index f65125bf..9ec34ae3 100644 --- a/app/ui_layer/controller/ui_controller.py +++ b/app/ui_layer/controller/ui_controller.py @@ -280,6 +280,21 @@ async def submit_message( await self._agent._handle_chat_message(payload) + async def handle_option_click(self, value: str, session_id: str) -> None: + """ + Handle a user clicking an option button in a chat message. + + Routes limit-choice options to the appropriate agent handler. + + Args: + value: The option value (e.g. "continue_limit", "abort_limit") + session_id: The task session ID associated with the option + """ + if value == "continue_limit": + await self._agent.handle_limit_continue(session_id) + elif value == "abort_limit": + await self._agent.handle_limit_abort(session_id) + # ───────────────────────────────────────────────────────────────────── # Event Processing # ───────────────────────────────────────────────────────────────────── diff --git a/app/usage/chat_storage.py b/app/usage/chat_storage.py index 17de5ffc..da85aa3e 100644 --- a/app/usage/chat_storage.py +++ b/app/usage/chat_storage.py @@ -34,6 +34,8 @@ class StoredChatMessage: timestamp: float attachments: Optional[List[Dict[str, Any]]] = None task_session_id: Optional[str] = None + options: Optional[List[Dict[str, Any]]] = None + option_selected: Optional[str] = None def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" @@ -48,6 +50,10 @@ def to_dict(self) -> Dict[str, Any]: result["attachments"] = self.attachments if self.task_session_id: result["taskSessionId"] = self.task_session_id + if self.options: + result["options"] = self.options + if self.option_selected: + result["optionSelected"] = self.option_selected return result @@ -104,7 +110,7 @@ def _init_db(self) -> None: ON chat_messages(message_id) """) - # Migration: Add task_session_id column if it doesn't exist + # Migration: Add new columns if they don't exist cursor.execute("PRAGMA table_info(chat_messages)") columns = [col[1] for col in cursor.fetchall()] if "task_session_id" not in columns: @@ -113,6 +119,18 @@ def _init_db(self) -> None: ADD COLUMN task_session_id TEXT """) logger.info("[ChatStorage] Migrated: added task_session_id column") + if "options" not in columns: + cursor.execute(""" + ALTER TABLE chat_messages + ADD COLUMN options TEXT + """) + logger.info("[ChatStorage] Migrated: added options column") + if "option_selected" not in columns: + cursor.execute(""" + ALTER TABLE chat_messages + ADD COLUMN option_selected TEXT + """) + logger.info("[ChatStorage] Migrated: added option_selected column") conn.commit() @@ -130,8 +148,8 @@ def insert_message(self, message: StoredChatMessage) -> int: cursor = conn.cursor() cursor.execute(""" INSERT OR REPLACE INTO chat_messages - (message_id, sender, content, style, timestamp, attachments, task_session_id) - VALUES (?, ?, ?, ?, ?, ?, ?) + (message_id, sender, content, style, timestamp, attachments, task_session_id, options, option_selected) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( message.message_id, message.sender, @@ -140,6 +158,8 @@ def insert_message(self, message: StoredChatMessage) -> int: message.timestamp, json.dumps(message.attachments) if message.attachments else None, message.task_session_id, + json.dumps(message.options) if message.options else None, + message.option_selected, )) conn.commit() return cursor.lastrowid @@ -162,7 +182,7 @@ def get_messages( with sqlite3.connect(self._db_path) as conn: cursor = conn.cursor() cursor.execute(""" - SELECT message_id, sender, content, style, timestamp, attachments, task_session_id + SELECT message_id, sender, content, style, timestamp, attachments, task_session_id, options, option_selected FROM chat_messages ORDER BY timestamp ASC LIMIT ? OFFSET ? @@ -178,6 +198,8 @@ def get_messages( timestamp=row[4], attachments=json.loads(row[5]) if row[5] else None, task_session_id=row[6], + options=json.loads(row[7]) if row[7] else None, + option_selected=row[8], ) for row in rows ] @@ -196,7 +218,7 @@ def get_recent_messages(self, limit: int = 100) -> List[StoredChatMessage]: cursor = conn.cursor() # Get last N messages ordered by timestamp DESC, then reverse cursor.execute(""" - SELECT message_id, sender, content, style, timestamp, attachments, task_session_id + SELECT message_id, sender, content, style, timestamp, attachments, task_session_id, options, option_selected FROM chat_messages ORDER BY timestamp DESC LIMIT ? @@ -212,6 +234,8 @@ def get_recent_messages(self, limit: int = 100) -> List[StoredChatMessage]: timestamp=row[4], attachments=json.loads(row[5]) if row[5] else None, task_session_id=row[6], + options=json.loads(row[7]) if row[7] else None, + option_selected=row[8], ) for row in rows ] @@ -234,6 +258,26 @@ def clear_messages(self) -> int: conn.commit() return count + def update_option_selected(self, message_id: str, option_value: str) -> bool: + """ + Mark which option was selected on a message. + + Args: + message_id: The message ID to update. + option_value: The value of the selected option. + + Returns: + True if the message was updated, False if not found. + """ + with sqlite3.connect(self._db_path) as conn: + cursor = conn.cursor() + cursor.execute( + "UPDATE chat_messages SET option_selected = ? WHERE message_id = ?", + (option_value, message_id), + ) + conn.commit() + return cursor.rowcount > 0 + def delete_message(self, message_id: str) -> bool: """ Delete a message by ID. @@ -271,7 +315,7 @@ def get_messages_before( with sqlite3.connect(self._db_path) as conn: cursor = conn.cursor() cursor.execute(""" - SELECT message_id, sender, content, style, timestamp, attachments, task_session_id + SELECT message_id, sender, content, style, timestamp, attachments, task_session_id, options, option_selected FROM chat_messages WHERE timestamp < ? ORDER BY timestamp DESC @@ -288,6 +332,8 @@ def get_messages_before( timestamp=row[4], attachments=json.loads(row[5]) if row[5] else None, task_session_id=row[6], + options=json.loads(row[7]) if row[7] else None, + option_selected=row[8], ) for row in rows ] From f3400747958751cfca993b6478d88f337a119127 Mon Sep 17 00:00:00 2001 From: zfoong Date: Mon, 13 Apr 2026 13:15:50 +0900 Subject: [PATCH 14/41] bug:heartbeat to avoid websocket closing issue --- app/ui_layer/adapters/browser_adapter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index 0f341056..8cab1cb6 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -955,10 +955,9 @@ async def _websocket_handler(self, request: "web.Request") -> "web.WebSocketResp from aiohttp import web, WSMsgType import asyncio - # Simple WebSocket configuration - no heartbeat (client handles reconnect) ws = web.WebSocketResponse( max_msg_size=100 * 1024 * 1024, - timeout=None, # No timeout - let messages flow naturally + heartbeat=30.0, # Send ping every 30s to keep connection alive ) try: From 4d9be5d71d5b4e0cf77855f9ca5f6de92a661060 Mon Sep 17 00:00:00 2001 From: zfoong Date: Mon, 13 Apr 2026 22:18:23 +0900 Subject: [PATCH 15/41] bug:fix provider VLM issue --- agent_core/core/impl/vlm/interface.py | 3 +- agent_core/core/models/model_registry.py | 4 +-- app/agent_base.py | 22 +++++++++++---- app/data/action/describe_image.py | 35 ++++++++++++++++++++++-- app/data/action/generate_image.py | 2 +- app/main.py | 14 ++++++---- 6 files changed, 61 insertions(+), 19 deletions(-) diff --git a/agent_core/core/impl/vlm/interface.py b/agent_core/core/impl/vlm/interface.py index dce58675..d43c6fc8 100644 --- a/agent_core/core/impl/vlm/interface.py +++ b/agent_core/core/impl/vlm/interface.py @@ -259,7 +259,7 @@ def describe_image_bytes( return cleaned except Exception as e: logger.error(f"[ERROR] {e}") - return "" + raise async def generate_response_async( self, @@ -451,7 +451,6 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str "messages": messages, "temperature": self.temperature, "max_tokens": 2048, - "response_format": {"type": "json_object"}, } headers = { "Content-Type": "application/json", diff --git a/agent_core/core/models/model_registry.py b/agent_core/core/models/model_registry.py index f43f499c..3735e52d 100644 --- a/agent_core/core/models/model_registry.py +++ b/agent_core/core/models/model_registry.py @@ -36,7 +36,7 @@ }, "deepseek": { InterfaceType.LLM: "deepseek-chat", - InterfaceType.VLM: "deepseek-chat", + InterfaceType.VLM: None, InterfaceType.EMBEDDING: None, }, "moonshot": { @@ -46,7 +46,7 @@ }, "grok": { InterfaceType.LLM: "grok-3", - InterfaceType.VLM: "grok-2-vision-1212", + InterfaceType.VLM: "grok-4-0709", InterfaceType.EMBEDDING: None, }, } diff --git a/app/agent_base.py b/app/agent_base.py index 8ee53288..bf22f182 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -45,6 +45,8 @@ AGENT_FILE_SYSTEM_TEMPLATE_PATH, AGENT_MEMORY_CHROMA_PATH, PROCESS_MEMORY_AT_STARTUP, + get_api_key, + get_base_url, ) from app.internal_action_interface import InternalActionInterface @@ -124,6 +126,8 @@ def __init__( llm_api_key: str | None = None, llm_base_url: str | None = None, llm_model: str | None = None, + vlm_provider: str | None = None, + vlm_model: str | None = None, deferred_init: bool = False, ) -> None: """ @@ -134,11 +138,12 @@ def __init__( history, etc.) is stored. chroma_path: Directory for the local Chroma vector store used by the RAG components. - llm_provider: Provider name passed to :class:`LLMInterface` and - :class:`VLMInterface`. + llm_provider: Provider name passed to :class:`LLMInterface`. llm_api_key: API key for the LLM provider. llm_base_url: Base URL for the LLM provider (optional). llm_model: Model name override (None = use registry default). + vlm_provider: Provider name for VLM (defaults to llm_provider). + vlm_model: VLM model name override (None = use registry default). deferred_init: If True, allow LLM/VLM initialization to be deferred until API key is configured (useful for first-time setup). """ @@ -156,11 +161,16 @@ def __init__( base_url=llm_base_url, deferred=deferred_init, ) + + # VLM uses its own provider/model settings, falling back to LLM values + _vlm_provider = vlm_provider or llm_provider + _vlm_api_key = get_api_key(_vlm_provider) if vlm_provider else llm_api_key + _vlm_base_url = get_base_url(_vlm_provider) if vlm_provider else llm_base_url self.vlm = VLMInterface( - provider=llm_provider, - model=llm_model, - api_key=llm_api_key, - base_url=llm_base_url, + provider=_vlm_provider, + model=vlm_model, + api_key=_vlm_api_key, + base_url=_vlm_base_url, deferred=deferred_init, ) diff --git a/app/data/action/describe_image.py b/app/data/action/describe_image.py index abccca24..67e58e20 100644 --- a/app/data/action/describe_image.py +++ b/app/data/action/describe_image.py @@ -4,7 +4,7 @@ name="describe_image", description="Uses a Visual Language Model to analyse an image and return a detailed, markdown-ready description. IMPORTANT: Always provide a prompt describing what to look for or describe in the image.", mode="CLI", - action_sets=["document_processing, image"], + action_sets=["core", "document_processing", "image"], input_schema={ "image_path": { "type": "string", @@ -41,7 +41,7 @@ } ) def view_image(input_data: dict) -> dict: - import json, os + import os image_path = str(input_data.get('image_path', '')).strip() simulated_mode = input_data.get('simulated_mode', False) @@ -57,9 +57,38 @@ def view_image(input_data: dict) -> dict: if not os.path.isfile(image_path): return {'status': 'error', 'description': '', 'message': 'File not found.'} + # Check if VLM is available before attempting the call + import app.internal_action_interface as iai + vlm = iai.InternalActionInterface.vlm_interface + + # Check the model registry to see if the provider actually supports VLM + from agent_core.core.models.model_registry import MODEL_REGISTRY + from agent_core.core.models.types import InterfaceType + from app.config import get_vlm_provider + current_provider = get_vlm_provider() + registry_vlm = MODEL_REGISTRY.get(current_provider, {}).get(InterfaceType.VLM) + + if vlm is None or not registry_vlm: + return { + 'status': 'error', + 'description': '', + 'message': ( + f"The current VLM provider '{current_provider}' does not support vision/image analysis. " + "Please inform the user and suggest switching to a provider that supports VLM.\n\n" + "Providers with VLM support: openai, anthropic, gemini, byteplus.\n\n" + "To switch provider, edit 'app/config/settings.json' and update:\n" + ' "vlm_provider": "" (e.g. "anthropic")\n' + ' "vlm_model": "" (e.g. "claude-sonnet-4-6" for anthropic)\n\n' + "Make sure the corresponding API key is configured under 'api_keys' in the same file. " + "If no API key is set, ask the user to provide one. " + "The system will automatically detect the config change and reload." + ), + } + try: - import app.internal_action_interface as iai description = iai.InternalActionInterface.describe_image(image_path, prompt) + if not description: + return {'status': 'error', 'description': '', 'message': 'VLM returned an empty description.'} return {'status': 'success', 'description': description, 'message': ''} except Exception as e: return {'status': 'error', 'description': '', 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/generate_image.py b/app/data/action/generate_image.py index fde5dfae..751a2d5e 100644 --- a/app/data/action/generate_image.py +++ b/app/data/action/generate_image.py @@ -10,7 +10,7 @@ - TIP: When generating multiple images for the same project or related work, use 'reference_images' parameter with previously generated images to maintain consistent style across all outputs""", default=True, mode="CLI", - action_sets=["content_creation, image, document_processing"], + action_sets=["content_creation", "image", "document_processing"], input_schema={ "prompt": { "type": "string", diff --git a/app/main.py b/app/main.py index ce4e5dd4..b46c93ae 100644 --- a/app/main.py +++ b/app/main.py @@ -67,7 +67,7 @@ def _suppress_console_logging_early() -> None: ConfigRegistry.register_workspace_root(".") # Import settings reader (reads directly from settings.json) -from app.config import get_llm_provider, get_api_key, get_base_url, get_llm_model +from app.config import get_llm_provider, get_vlm_provider, get_api_key, get_base_url, get_llm_model, get_vlm_model from app.agent_base import AgentBase @@ -110,11 +110,11 @@ def _parse_cli_args() -> dict: return vars(args) -def _initial_settings() -> tuple[str, str, str, bool]: +def _initial_settings() -> tuple: """Determine initial provider, API key, and base URL from settings.json. Returns: - Tuple of (provider, api_key, base_url, has_valid_key) where has_valid_key + Tuple of (provider, api_key, base_url, model, vlm_provider, vlm_model, has_valid_key) where has_valid_key indicates if a working API key was found. """ # Read directly from settings.json @@ -122,11 +122,13 @@ def _initial_settings() -> tuple[str, str, str, bool]: api_key = get_api_key(provider) base_url = get_base_url(provider) model = get_llm_model() # None → use registry default for the provider + vlm_prov = get_vlm_provider() + vlm_mod = get_vlm_model() # Remote (Ollama) doesn't require API key has_key = bool(api_key) or provider == "remote" - return provider, api_key, base_url, model, has_key + return provider, api_key, base_url, model, vlm_prov, vlm_mod, has_key async def main_async() -> None: @@ -136,7 +138,7 @@ async def main_async() -> None: browser_mode = cli_args.get("browser", False) # Get settings from settings.json - provider, api_key, base_url, model, has_valid_key = _initial_settings() + provider, api_key, base_url, model, vlm_prov, vlm_mod, has_valid_key = _initial_settings() # CLI args override settings.json if provided if cli_args.get("provider"): @@ -159,6 +161,8 @@ async def main_async() -> None: llm_api_key=api_key, llm_base_url=base_url, llm_model=model, + vlm_provider=vlm_prov, + vlm_model=vlm_mod, deferred_init=not has_valid_key, ) From 782386a1c76aec1d8906ff4c812c6658ee24bde3 Mon Sep 17 00:00:00 2001 From: zfoong Date: Tue, 14 Apr 2026 10:25:17 +0900 Subject: [PATCH 16/41] Invoke skill with command --- app/agent_base.py | 19 ++- app/data/action/task_start.py | 3 + app/internal_action_interface.py | 31 ++++- app/ui_layer/adapters/browser_adapter.py | 35 +++++- .../src/pages/Settings/SkillsSettings.tsx | 24 ++++ app/ui_layer/commands/builtin/__init__.py | 2 + app/ui_layer/commands/builtin/help.py | 1 + app/ui_layer/commands/builtin/skill.py | 6 + app/ui_layer/commands/builtin/skill_invoke.py | 66 +++++++++++ app/ui_layer/controller/ui_controller.py | 109 ++++++++++++++++++ 10 files changed, 286 insertions(+), 10 deletions(-) create mode 100644 app/ui_layer/commands/builtin/skill_invoke.py diff --git a/app/agent_base.py b/app/agent_base.py index bf22f182..ec487641 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -105,6 +105,7 @@ class TriggerData: is_self_message: bool = False # True when the user sent themselves a message contact_id: str | None = None # Sender/chat ID from external platform channel_id: str | None = None # Channel/group ID from external platform + payload: dict | None = None # Full trigger payload for passing extra data class AgentBase: """ @@ -603,6 +604,7 @@ def _extract_trigger_data(self, trigger: Trigger) -> TriggerData: is_self_message=payload.get("is_self_message", False), contact_id=payload.get("contact_id", ""), channel_id=payload.get("channel_id", ""), + payload=payload, ) def _extract_user_message_from_trigger(self, trigger: Trigger) -> Optional[str]: @@ -1104,6 +1106,9 @@ async def _execute_actions( if action.name == "task_start": params["_original_query"] = trigger_data.user_message or trigger_data.query params["_original_platform"] = trigger_data.platform + # Pass pre-selected skills from skill slash commands (e.g., /pdf, /docx) + if trigger_data.payload and trigger_data.payload.get("pre_selected_skills"): + params["_pre_selected_skills"] = trigger_data.payload["pre_selected_skills"] action_names = [a[0].name for a in actions_with_input] logger.info(f"[ACTION] Ready to run {len(actions_with_input)} action(s): {action_names}") @@ -1810,6 +1815,10 @@ async def _handle_chat_message(self, payload: Dict): trigger_payload["contact_id"] = payload.get("contact_id", "") trigger_payload["channel_id"] = payload.get("channel_id", "") + # Carry pre-selected skills from skill slash commands (e.g., /pdf, /docx) + if payload.get("pre_selected_skills"): + trigger_payload["pre_selected_skills"] = payload["pre_selected_skills"] + # Include platform in the action description so the LLM picks # the correct platform-specific send action for replies. # Must be directive (not just informational) for weaker LLMs. @@ -2632,9 +2641,17 @@ async def _initialize_config_watcher(self) -> None: skills_config_path = PROJECT_ROOT / "app" / "config" / "skills_config.json" if skills_config_path.exists(): from app.skill import skill_manager + + async def _reload_skills_and_sync(): + """Reload skills and sync skill slash commands.""" + result = await skill_manager.reload() + if self.ui_controller: + self.ui_controller.sync_skill_commands() + return result + config_watcher.register( skills_config_path, - skill_manager.reload, + _reload_skills_and_sync, name="skills_config.json" ) diff --git a/app/data/action/task_start.py b/app/data/action/task_start.py index 902ec4a3..a8939b5e 100644 --- a/app/data/action/task_start.py +++ b/app/data/action/task_start.py @@ -66,6 +66,8 @@ async def start_task(input_data: dict) -> dict: # Extract original user query and platform for logging to the new task's event stream original_query = input_data.get("_original_query") original_platform = input_data.get("_original_platform") + # Extract pre-selected skills (from skill slash commands like /pdf, /docx) + pre_selected_skills = input_data.get("_pre_selected_skills") if not task_name: return { @@ -105,6 +107,7 @@ async def start_task(input_data: dict) -> dict: session_id=session_id, original_query=original_query, original_platform=original_platform, + pre_selected_skills=pre_selected_skills, ) return { "status": "success", diff --git a/app/internal_action_interface.py b/app/internal_action_interface.py index a1486f1b..076d1623 100644 --- a/app/internal_action_interface.py +++ b/app/internal_action_interface.py @@ -302,6 +302,7 @@ async def do_create_task( session_id: Optional[str] = None, original_query: Optional[str] = None, original_platform: Optional[str] = None, + pre_selected_skills: Optional[List[str]] = None, ) -> Dict[str, Any]: """ Create a new task with automatic skill and action set selection. @@ -319,6 +320,9 @@ async def do_create_task( event stream before the task_start event. original_platform: Optional platform where the original message came from (e.g., "CraftBot TUI", "Telegram", "Whatsapp"). + pre_selected_skills: Optional list of skill names to use directly, + bypassing LLM skill selection. Used when skills are + invoked explicitly via slash commands (e.g., /pdf). Returns: Dictionary with task_id, action_sets, action_count, and selected_skills. @@ -330,12 +334,27 @@ async def do_create_task( # Each task's stream is created when the task starts and cleaned up when the task ends. # Stream lifecycle is managed by TaskManager via on_stream_create/on_stream_remove hooks. - # Select skills and action sets in a single LLM call (optimized) - # Skills are selected first, then action sets with knowledge of skill recommendations - selected_skills, all_action_sets = await cls._select_skills_and_action_sets_via_llm( - task_name, task_description, source_platform=original_platform - ) - logger.info(f"[TASK] Auto-selected skills for '{task_name}': {selected_skills}") + if pre_selected_skills: + # Skills explicitly selected via slash command — skip LLM skill selection + # but still select action sets (including skill-recommended ones) + selected_skills = pre_selected_skills + # Get action sets recommended by pre-selected skills + from agent_core.core.impl.skill.manager import skill_manager + from app.action.action_set import action_set_manager + + skill_action_sets = skill_manager.get_skill_action_sets(selected_skills) + # Also run LLM action set selection for additional sets needed + llm_action_sets = await cls._select_action_sets_via_llm(task_name, task_description) + # Merge: skill-recommended + LLM-selected (deduplicated) + all_action_sets = list(dict.fromkeys(skill_action_sets + llm_action_sets)) + logger.info(f"[TASK] Pre-selected skills (via command): {selected_skills}") + else: + # Select skills and action sets in a single LLM call (optimized) + # Skills are selected first, then action sets with knowledge of skill recommendations + selected_skills, all_action_sets = await cls._select_skills_and_action_sets_via_llm( + task_name, task_description, source_platform=original_platform + ) + logger.info(f"[TASK] Auto-selected skills for '{task_name}': {selected_skills}") logger.info(f"[TASK] Final action sets: {all_action_sets}") # Create task with selected skills and action sets diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index 8cab1cb6..76fe1e62 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -1321,6 +1321,11 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: description = data.get("description", "") await self._handle_skill_template(name, description) + elif msg_type == "skill_run": + name = data.get("name", "") + args_text = data.get("args", "") + await self._handle_skill_run(name, args_text) + # Integration handlers elif msg_type == "integration_list": await self._handle_integration_list() @@ -3196,9 +3201,10 @@ async def _handle_skill_enable(self, name: str) -> None: "name": name, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_enable", @@ -3221,9 +3227,10 @@ async def _handle_skill_disable(self, name: str) -> None: "name": name, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_disable", @@ -3245,9 +3252,10 @@ async def _handle_skill_reload(self) -> None: "message": message, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_reload", @@ -3257,6 +3265,27 @@ async def _handle_skill_reload(self) -> None: }, }) + async def _handle_skill_run(self, name: str, args_text: str = "") -> None: + """Run a skill by invoking it through the controller.""" + try: + await self._controller.invoke_skill(name, args_text, self._adapter_id) + await self._broadcast({ + "type": "skill_run", + "data": { + "success": True, + "name": name, + }, + }) + except Exception as e: + await self._broadcast({ + "type": "skill_run", + "data": { + "success": False, + "error": str(e), + "name": name, + }, + }) + async def _handle_skill_install(self, source: str) -> None: """Install a skill from path or git URL.""" try: diff --git a/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx b/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx index 2daa4f05..ed2d50c1 100644 --- a/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Settings/SkillsSettings.tsx @@ -1,4 +1,5 @@ import { useState, useEffect } from 'react' +import { useNavigate } from 'react-router-dom' import { Loader2, Plus, @@ -6,6 +7,7 @@ import { RotateCcw, X, Wrench, + Play, } from 'lucide-react' import { Button, Badge, ConfirmModal } from '../../components/ui' import { useToast } from '../../contexts/ToastContext' @@ -32,6 +34,7 @@ interface SkillInfo extends SkillConfig { export function SkillsSettings() { const { send, onMessage, isConnected } = useSettingsWebSocket() const { showToast } = useToast() + const navigate = useNavigate() // State const [skills, setSkills] = useState([]) @@ -150,6 +153,12 @@ export function SkillsSettings() { showToast('error', d.error || 'Failed to get skill info') } }), + onMessage('skill_run', (data: unknown) => { + const d = data as { success: boolean; name?: string; error?: string } + if (!d.success) { + showToast('error', d.error || 'Failed to run skill') + } + }), ] send('skill_list') @@ -185,6 +194,12 @@ export function SkillsSettings() { send('skill_info', { name }) } + const handleRunSkill = (name: string) => { + send('skill_run', { name }) + setViewingSkill(null) + navigate('/chat') + } + const handleInstallSkill = () => { const source = installSource.trim() if (!source) { @@ -533,6 +548,15 @@ export function SkillsSettings() { + {viewingSkill.enabled && ( + + )}