diff --git a/agent_core/core/credentials/oauth_server.py b/agent_core/core/credentials/oauth_server.py index 9d8a701f..8b5a60b3 100644 --- a/agent_core/core/credentials/oauth_server.py +++ b/agent_core/core/credentials/oauth_server.py @@ -22,6 +22,7 @@ """ import asyncio +import html import ipaddress import logging import os @@ -120,10 +121,20 @@ class _OAuthCallbackHandler(BaseHTTPRequestHandler): def do_GET(self): """Handle GET request from OAuth callback.""" params = parse_qs(urlparse(self.path).query) - result_holder["code"] = params.get("code", [None])[0] - result_holder["state"] = params.get("state", [None])[0] + returned_state = params.get("state", [None])[0] result_holder["error"] = params.get("error", [None])[0] + # Validate OAuth state parameter to prevent CSRF + expected_state = result_holder.get("expected_state") + if expected_state and returned_state != expected_state: + result_holder["error"] = "OAuth state mismatch — possible CSRF attack" + result_holder["code"] = None + logger.warning("[OAUTH] State mismatch: expected %s, got %s", expected_state, returned_state) + else: + result_holder["code"] = params.get("code", [None])[0] + + result_holder["state"] = returned_state + self.send_response(200) self.send_header("Content-Type", "text/html") self.end_headers() @@ -132,8 +143,9 @@ def do_GET(self): b"

Authorization successful!

You can close this tab.

" ) else: + safe_error = html.escape(str(result_holder.get('error') or 'Unknown error')) self.wfile.write( - f"

Failed

{result_holder['error']}

".encode() + f"

Failed

{safe_error}

".encode() ) def log_message(self, format, *args): @@ -203,8 +215,12 @@ def run_oauth_flow( if cancel_event and cancel_event.is_set(): return None, "OAuth cancelled" + # Extract the state parameter from the auth URL for CSRF validation + auth_params = parse_qs(urlparse(auth_url).query) + expected_state = auth_params.get("state", [None])[0] + # Use instance-level result holder instead of class-level state - result_holder: Dict[str, Any] = {"code": None, "state": None, "error": None} + result_holder: Dict[str, Any] = {"code": None, "state": None, "error": None, "expected_state": expected_state} handler_class = _make_callback_handler(result_holder) try: diff --git a/agent_core/core/impl/action/manager.py b/agent_core/core/impl/action/manager.py index 84e7c4a0..3693982a 100644 --- a/agent_core/core/impl/action/manager.py +++ b/agent_core/core/impl/action/manager.py @@ -32,6 +32,15 @@ nest_asyncio.apply() + +def _to_pretty_json(value: Any) -> str: + """Serialize a value to pretty-printed JSON for readable logs and event streams.""" + try: + return json.dumps(value, indent=2, ensure_ascii=False, default=str) + except (TypeError, ValueError): + return str(value) + + # Type aliases for hooks OnActionStartHook = Callable[[str, Any, Dict, str, str], Any] # (run_id, action, inputs, parent_id, started_at) -> awaitable OnActionEndHook = Callable[[str, Any, Dict, str, str, str], Any] # (run_id, action, outputs, status, parent_id, ended_at) -> awaitable @@ -205,10 +214,11 @@ async def execute_action( # Log to event stream # Only pass session_id when is_running_task=True (task stream exists) # When no task exists, use global stream by not passing task_id + pretty_input = _to_pretty_json(input_data) self._log_event_stream( is_gui_task=is_gui_task, event_type="action_start", - event=f"Running action {action.name} with input: {input_data}.", + event=f"Running action {action.name} with input: {pretty_input}.", display_message=f"Running {action.display_name}", action_name=action.name, session_id=session_id if is_running_task else None, @@ -293,10 +303,11 @@ async def execute_action( # Only pass session_id when is_running_task=True (task stream exists) output_has_error = outputs and outputs.get("status") == "error" display_status = "failed" if (status == "error" or output_has_error) else "completed" + pretty_output = _to_pretty_json(outputs) self._log_event_stream( is_gui_task=is_gui_task, event_type="action_end", - event=f"Action {action.name} completed with output: {outputs}.", + event=f"Action {action.name} completed with output: {pretty_output}.", display_message=f"{action.display_name} → {display_status}", action_name=action.name, session_id=session_id if is_running_task else None, diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py index 94b7923d..84dec178 100644 --- a/agent_core/core/impl/llm/interface.py +++ b/agent_core/core/impl/llm/interface.py @@ -217,11 +217,17 @@ def reinitialize( target_base_url = base_url try: - logger.info(f"[LLM] Reinitializing with provider: {target_provider}") + from app.config import get_llm_model as _get_llm_model # type: ignore[import] + target_model = _get_llm_model() + except Exception: + target_model = None # app context not available (e.g. agent_core standalone) + + try: + logger.info(f"[LLM] Reinitializing with provider: {target_provider}, model: {target_model or 'registry default'}") ctx = ModelFactory.create( provider=target_provider, interface=InterfaceType.LLM, - model_override=None, + model_override=target_model, api_key=target_api_key, base_url=target_base_url, deferred=False, @@ -261,6 +267,16 @@ def reinitialize( else: self._gemini_cache_manager = None + # Reset consecutive failure counter — a config change is an explicit + # user-initiated retry signal. Without this, a prior run that hit the + # failure threshold would continue to abort even with the new config. + if self._consecutive_failures > 0: + logger.info( + f"[LLM] Resetting consecutive failure counter on reinitialize " + f"(was {self._consecutive_failures})" + ) + self._consecutive_failures = 0 + logger.info(f"[LLM] Reinitialized successfully with provider: {self.provider}, model: {self.model}") return self._initialized except EnvironmentError as e: @@ -1149,9 +1165,22 @@ def _generate_openai( "model": self.model, "messages": messages, "temperature": self.temperature, - "max_tokens": self.max_tokens, } + # Newer OpenAI models (o1, o3, o4, gpt-5, etc.) require + # 'max_completion_tokens' instead of the legacy 'max_tokens' parameter. + model_lower = (self.model or "").lower() + uses_max_completion_tokens = ( + model_lower.startswith("o1") + or model_lower.startswith("o3") + or model_lower.startswith("o4") + or model_lower.startswith("gpt-5") + ) + if uses_max_completion_tokens: + request_kwargs["max_completion_tokens"] = self.max_tokens + else: + request_kwargs["max_tokens"] = self.max_tokens + # Always enforce JSON output format request_kwargs["response_format"] = {"type": "json_object"} diff --git a/agent_core/core/impl/onboarding/config.py b/agent_core/core/impl/onboarding/config.py index 4a128785..fe39d170 100644 --- a/agent_core/core/impl/onboarding/config.py +++ b/agent_core/core/impl/onboarding/config.py @@ -28,28 +28,21 @@ def _get_config_file() -> Path: # Hard onboarding steps configuration # Each step has: id, required (must complete), title (display name) -# Note: User name is collected during soft onboarding (conversational interview) +# User profile (name, location, language, tone, etc.) is collected in the +# user_profile form step during hard onboarding. HARD_ONBOARDING_STEPS = [ {"id": "provider", "required": True, "title": "LLM Provider"}, {"id": "api_key", "required": True, "title": "API Key"}, {"id": "agent_name", "required": False, "title": "Agent Name"}, + {"id": "user_profile", "required": False, "title": "User Profile"}, {"id": "mcp", "required": False, "title": "MCP Servers"}, {"id": "skills", "required": False, "title": "Skills"}, ] # Soft onboarding interview questions template -# Questions are grouped to reduce conversation turns +# Identity/preferences are now collected in hard onboarding. +# Soft onboarding focuses on job/role and deep life goals exploration. SOFT_ONBOARDING_QUESTIONS = [ - # Batch 1: Identity (asked together) - "name", # What should I call you? "job", # What do you do for work? - "location", # Where are you located? (timezone inferred from this) - # Batch 2: Preferences (asked together) - "tone", # How would you like me to communicate? - "proactivity", # Should I be proactive or wait for instructions? - "approval", # What actions need your approval? - # Batch 3: Messaging - "preferred_messaging_platform", # Where should I send notifications? (telegram/whatsapp/discord/slack/tui) - # Batch 4: Life goals - "life_goals", # What are your life goals and what do you want help with? + "life_goals", # Deep life goals exploration (multiple rounds) ] diff --git a/agent_core/core/impl/onboarding/manager.py b/agent_core/core/impl/onboarding/manager.py index fee60d98..af1df49e 100644 --- a/agent_core/core/impl/onboarding/manager.py +++ b/agent_core/core/impl/onboarding/manager.py @@ -86,7 +86,8 @@ def is_complete(self) -> bool: def mark_hard_complete( self, user_name: Optional[str] = None, - agent_name: Optional[str] = None + agent_name: Optional[str] = None, + agent_profile_picture: Optional[str] = None, ) -> None: """ Mark hard onboarding as complete. @@ -94,6 +95,8 @@ def mark_hard_complete( Args: user_name: User's name collected during onboarding agent_name: Agent's name configured during onboarding + agent_profile_picture: Extension of the uploaded agent profile + picture (e.g. "png"). None leaves the current value untouched. """ state = self._ensure_state_loaded() state.hard_completed = True @@ -102,9 +105,15 @@ def mark_hard_complete( state.user_name = user_name if agent_name: state.agent_name = agent_name + if agent_profile_picture is not None: + state.agent_profile_picture = agent_profile_picture save_state(state) logger.info("[ONBOARDING] Hard onboarding marked complete") + def save(self) -> None: + """Persist the current state to disk.""" + save_state(self._ensure_state_loaded()) + def mark_soft_complete(self) -> None: """Mark soft onboarding as complete.""" state = self._ensure_state_loaded() diff --git a/agent_core/core/impl/onboarding/state.py b/agent_core/core/impl/onboarding/state.py index 794911d0..b245aeca 100644 --- a/agent_core/core/impl/onboarding/state.py +++ b/agent_core/core/impl/onboarding/state.py @@ -24,6 +24,8 @@ class OnboardingState: soft_completed_at: ISO timestamp when soft onboarding completed user_name: User's name collected during onboarding agent_name: Agent's name configured during onboarding + agent_profile_picture: Extension of the user-uploaded agent profile + picture (e.g. "png", "jpg"). None means the bundled default is used. """ hard_completed: bool = False soft_completed: bool = False @@ -31,6 +33,7 @@ class OnboardingState: soft_completed_at: Optional[str] = None user_name: Optional[str] = None agent_name: Optional[str] = None + agent_profile_picture: Optional[str] = None @property def is_complete(self) -> bool: @@ -56,6 +59,7 @@ def to_dict(self) -> dict: "soft_completed_at": self.soft_completed_at, "user_name": self.user_name, "agent_name": self.agent_name, + "agent_profile_picture": self.agent_profile_picture, } @classmethod @@ -68,6 +72,7 @@ def from_dict(cls, data: dict) -> "OnboardingState": soft_completed_at=data.get("soft_completed_at"), user_name=data.get("user_name"), agent_name=data.get("agent_name"), + agent_profile_picture=data.get("agent_profile_picture"), ) diff --git a/agent_core/core/impl/task/manager.py b/agent_core/core/impl/task/manager.py index a83b60a7..0e388374 100644 --- a/agent_core/core/impl/task/manager.py +++ b/agent_core/core/impl/task/manager.py @@ -282,6 +282,7 @@ def create_task( compiled_actions=compiled_actions, selected_skills=selected_skills or [], conversation_id=conversation_id, + source_platform=original_platform, ) self.tasks[task_id] = task diff --git a/agent_core/core/impl/vlm/interface.py b/agent_core/core/impl/vlm/interface.py index dce58675..0e1a7e4d 100644 --- a/agent_core/core/impl/vlm/interface.py +++ b/agent_core/core/impl/vlm/interface.py @@ -259,7 +259,7 @@ def describe_image_bytes( return cleaned except Exception as e: logger.error(f"[ERROR] {e}") - return "" + raise async def generate_response_async( self, @@ -332,13 +332,29 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str) ], } ) - response = self.client.chat.completions.create( - model=self.model, - messages=messages, - temperature=self.temperature, - max_tokens=2048, - response_format={"type": "json_object"}, + # Newer OpenAI models (o1, o3, o4, gpt-5, etc.) require + # 'max_completion_tokens' instead of the legacy 'max_tokens' parameter. + # Note: response_format=json_object is intentionally NOT set here because + # describe_image returns plain text descriptions, not JSON. Enabling JSON + # mode would also require the prompt to contain the word "json". + request_kwargs: Dict[str, Any] = { + "model": self.model, + "messages": messages, + "temperature": self.temperature, + } + model_lower = (self.model or "").lower() + uses_max_completion_tokens = ( + model_lower.startswith("o1") + or model_lower.startswith("o3") + or model_lower.startswith("o4") + or model_lower.startswith("gpt-5") ) + if uses_max_completion_tokens: + request_kwargs["max_completion_tokens"] = 2048 + else: + request_kwargs["max_tokens"] = 2048 + + response = self.client.chat.completions.create(**request_kwargs) content = response.choices[0].message.content.strip() token_count_input = response.usage.prompt_tokens token_count_output = response.usage.completion_tokens @@ -451,7 +467,6 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str "messages": messages, "temperature": self.temperature, "max_tokens": 2048, - "response_format": {"type": "json_object"}, } headers = { "Content-Type": "application/json", diff --git a/agent_core/core/models/model_registry.py b/agent_core/core/models/model_registry.py index f43f499c..3d141edc 100644 --- a/agent_core/core/models/model_registry.py +++ b/agent_core/core/models/model_registry.py @@ -15,8 +15,8 @@ InterfaceType.EMBEDDING: "text-embedding-004", }, "anthropic": { - InterfaceType.LLM: "claude-sonnet-4-6", - InterfaceType.VLM: "claude-sonnet-4-6", + InterfaceType.LLM: "claude-sonnet-4-5-20250929", + InterfaceType.VLM: "claude-sonnet-4-5-20250929", InterfaceType.EMBEDDING: None, # Anthropic does not provide native embedding models }, "byteplus": { @@ -36,7 +36,7 @@ }, "deepseek": { InterfaceType.LLM: "deepseek-chat", - InterfaceType.VLM: "deepseek-chat", + InterfaceType.VLM: None, InterfaceType.EMBEDDING: None, }, "moonshot": { @@ -46,7 +46,7 @@ }, "grok": { InterfaceType.LLM: "grok-3", - InterfaceType.VLM: "grok-2-vision-1212", + InterfaceType.VLM: "grok-4-0709", InterfaceType.EMBEDDING: None, }, } diff --git a/agent_core/core/prompts/action.py b/agent_core/core/prompts/action.py index f7c0a15b..d5e38203 100644 --- a/agent_core/core/prompts/action.py +++ b/agent_core/core/prompts/action.py @@ -198,6 +198,7 @@ - When all todos completed BUT the user sends a NEW question or request, do NOT end the task. Add new todos for the follow-up and continue working. - If unrecoverable error, use 'task_end' with status 'abort'. - You must provide concrete parameter values for the action's input_schema. +- When setting wait_for_user_reply=true on a send message action, the message MUST end with an explicit question (e.g., "Does this look good?" or "Would you like any changes?"). The agent will pause and wait for user input — if the message is a statement without a question, the user won't know a reply is expected and the task will hang indefinitely. File Reading Best Practices: - read_file returns content with line numbers in cat -n format @@ -205,8 +206,8 @@ * Default reads first 2000 lines - check has_more to know if more exists * Use offset to skip to specific line numbers * Use limit to control how many lines to read -- To find specific content in large files: - 1. Use grep_files with keywords to locate relevant sections +- To find specific content in files: + 1. Use grep_files with a regex pattern to locate relevant sections (use output_mode='content' for lines with line numbers, or 'files_with_matches' to discover files first) 2. Note the line numbers from grep results 3. Use read_file with appropriate offset to read that section - DO NOT repeatedly read entire large files - use targeted reading with offset/limit diff --git a/agent_core/core/prompts/context.py b/agent_core/core/prompts/context.py index 549c203b..9962e48f 100644 --- a/agent_core/core/prompts/context.py +++ b/agent_core/core/prompts/context.py @@ -192,6 +192,7 @@ ENVIRONMENTAL_CONTEXT_PROMPT = """ - User Location: {user_location} +- Current Working Directory: {working_directory} - Operating System: {operating_system} {os_version} ({os_platform}) - VM Operating System: {vm_operating_system} {vm_os_version} ({vm_os_platform}) - VM's screen resolution (GUI mode): {vm_resolution} diff --git a/agent_core/core/prompts/routing.py b/agent_core/core/prompts/routing.py index 9cdca8d9..b9bf1e11 100644 --- a/agent_core/core/prompts/routing.py +++ b/agent_core/core/prompts/routing.py @@ -53,8 +53,8 @@ Return ONLY a valid JSON object: -- Route to existing: {{ "action": "route", "session_id": "", "reason": "" }} -- Create new: {{ "action": "new", "session_id": "new", "reason": "" }} +- Route to existing: {{ "reason": "", "action": "route", "session_id": "" }} +- Create new: {{ "reason": "", "action": "new", "session_id": "new" }} """ diff --git a/agent_core/core/task/task.py b/agent_core/core/task/task.py index f63a526e..3051823e 100644 --- a/agent_core/core/task/task.py +++ b/agent_core/core/task/task.py @@ -69,6 +69,8 @@ class Task: chatserver_action_id: Optional[str] = None # Whether the task is waiting for user reply (pauses trigger scheduling) waiting_for_user_reply: bool = False + # Platform that started (or most recently resumed) this task — outbound messages route here + source_platform: Optional[str] = None def get_current_todo(self) -> Optional[TodoItem]: """ @@ -114,6 +116,7 @@ def to_dict(self) -> Dict[str, Any]: "token_count": self.token_count, "chatserver_action_id": self.chatserver_action_id, "waiting_for_user_reply": self.waiting_for_user_reply, + "source_platform": self.source_platform, } @classmethod @@ -140,4 +143,5 @@ def from_dict(cls, data: Dict[str, Any]) -> "Task": token_count=data.get("token_count", 0), chatserver_action_id=data.get("chatserver_action_id"), waiting_for_user_reply=data.get("waiting_for_user_reply", False), + source_platform=data.get("source_platform"), ) diff --git a/agent_file_system/AGENT.md b/agent_file_system/AGENT.md index 426f8b5d..910a24a6 100644 --- a/agent_file_system/AGENT.md +++ b/agent_file_system/AGENT.md @@ -26,7 +26,7 @@ Efficient File Reading: File Actions: - read_file: General reading with pagination (offset/limit) -- grep_files: Search for keywords, returns matching chunks with line numbers +- grep_files: Search files/directories for regex patterns with three output modes: 'files_with_matches' (discover files), 'content' (matching lines with line numbers), 'count' (match counts). Supports glob/file_type filtering, before/after context lines, case_insensitive, and multiline. - stream_read + stream_edit: Use together for file modifications Avoid: Reading entire large files repeatedly - use grep + targeted offset/limit reads instead diff --git a/app/agent_base.py b/app/agent_base.py index 8ee53288..aa1b85de 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -45,6 +45,8 @@ AGENT_FILE_SYSTEM_TEMPLATE_PATH, AGENT_MEMORY_CHROMA_PATH, PROCESS_MEMORY_AT_STARTUP, + get_api_key, + get_base_url, ) from app.internal_action_interface import InternalActionInterface @@ -103,6 +105,7 @@ class TriggerData: is_self_message: bool = False # True when the user sent themselves a message contact_id: str | None = None # Sender/chat ID from external platform channel_id: str | None = None # Channel/group ID from external platform + payload: dict | None = None # Full trigger payload for passing extra data class AgentBase: """ @@ -124,6 +127,8 @@ def __init__( llm_api_key: str | None = None, llm_base_url: str | None = None, llm_model: str | None = None, + vlm_provider: str | None = None, + vlm_model: str | None = None, deferred_init: bool = False, ) -> None: """ @@ -134,11 +139,12 @@ def __init__( history, etc.) is stored. chroma_path: Directory for the local Chroma vector store used by the RAG components. - llm_provider: Provider name passed to :class:`LLMInterface` and - :class:`VLMInterface`. + llm_provider: Provider name passed to :class:`LLMInterface`. llm_api_key: API key for the LLM provider. llm_base_url: Base URL for the LLM provider (optional). llm_model: Model name override (None = use registry default). + vlm_provider: Provider name for VLM (defaults to llm_provider). + vlm_model: VLM model name override (None = use registry default). deferred_init: If True, allow LLM/VLM initialization to be deferred until API key is configured (useful for first-time setup). """ @@ -156,11 +162,16 @@ def __init__( base_url=llm_base_url, deferred=deferred_init, ) + + # VLM uses its own provider/model settings, falling back to LLM values + _vlm_provider = vlm_provider or llm_provider + _vlm_api_key = get_api_key(_vlm_provider) if vlm_provider else llm_api_key + _vlm_base_url = get_base_url(_vlm_provider) if vlm_provider else llm_base_url self.vlm = VLMInterface( - provider=llm_provider, - model=llm_model, - api_key=llm_api_key, - base_url=llm_base_url, + provider=_vlm_provider, + model=vlm_model, + api_key=_vlm_api_key, + base_url=_vlm_base_url, deferred=deferred_init, ) @@ -593,6 +604,7 @@ def _extract_trigger_data(self, trigger: Trigger) -> TriggerData: is_self_message=payload.get("is_self_message", False), contact_id=payload.get("contact_id", ""), channel_id=payload.get("channel_id", ""), + payload=payload, ) def _extract_user_message_from_trigger(self, trigger: Trigger) -> Optional[str]: @@ -1094,6 +1106,9 @@ async def _execute_actions( if action.name == "task_start": params["_original_query"] = trigger_data.user_message or trigger_data.query params["_original_platform"] = trigger_data.platform + # Pass pre-selected skills from skill slash commands (e.g., /pdf, /docx) + if trigger_data.payload and trigger_data.payload.get("pre_selected_skills"): + params["_pre_selected_skills"] = trigger_data.payload["pre_selected_skills"] action_names = [a[0].name for a in actions_with_input] logger.info(f"[ACTION] Ready to run {len(actions_with_input)} action(s): {action_names}") @@ -1268,18 +1283,17 @@ async def _check_agent_limits(self) -> bool: # Check action limits if (action_count / max_actions) >= 1.0: - # Log warning BEFORE cancelling task (stream is removed during cancel) if self.event_stream_manager: self.event_stream_manager.log( "warning", - f"Action limit reached: 100% of the maximum actions ({max_actions} actions) has been used. Aborting task.", - display_message=f"Action limit reached: 100% of the maximum ({max_actions} actions) has been used. Aborting task.", + f"Action limit reached: 100% of the maximum actions ({max_actions} actions) has been used. Waiting for user decision.", + display_message=None, task_id=current_task_id, ) self.state_manager.bump_event_stream() - response = await self.task_manager.mark_task_cancel(reason=f"Task reached the maximum actions allowed limit: {max_actions}") - task_cancelled: bool = response - return not task_cancelled + await self._send_limit_choice_message("action", current_task_id) + await self._pause_task_for_limit_choice(current_task_id) + return False elif (action_count / max_actions) >= 0.8: if self.event_stream_manager: self.event_stream_manager.log( @@ -1295,18 +1309,17 @@ async def _check_agent_limits(self) -> bool: # Check token limits if (token_count / max_tokens) >= 1.0: - # Log warning BEFORE cancelling task (stream is removed during cancel) if self.event_stream_manager: self.event_stream_manager.log( "warning", - f"Token limit reached: 100% of the maximum tokens ({max_tokens} tokens) has been used. Aborting task.", - display_message=f"Token limit reached: 100% of the maximum ({max_tokens} tokens) has been used. Aborting task.", + f"Token limit reached: 100% of the maximum tokens ({max_tokens} tokens) has been used. Waiting for user decision.", + display_message=None, task_id=current_task_id, ) self.state_manager.bump_event_stream() - response = await self.task_manager.mark_task_cancel(reason=f"Task reached the maximum tokens allowed limit: {max_tokens}") - task_cancelled: bool = response - return not task_cancelled + await self._send_limit_choice_message("token", current_task_id) + await self._pause_task_for_limit_choice(current_task_id) + return False elif (token_count / max_tokens) >= 0.8: if self.event_stream_manager: self.event_stream_manager.log( @@ -1323,6 +1336,178 @@ async def _check_agent_limits(self) -> bool: # No limits close or reached return True + async def _send_limit_choice_message( + self, limit_type: str, session_id: str + ) -> None: + """Send a chat message with Continue/Abort options when a limit is reached.""" + label = "Action" if limit_type == "action" else "Token" + + # Include task name so user knows which task hit the limit + task_name_suffix = "" + if self.task_manager: + task = self.task_manager.tasks.get(session_id) + if task and task.name: + task_name_suffix = f' for task "{task.name}"' + + message = ( + f"{label} limit reached{task_name_suffix}. " + f"Would you like to continue (reset limits) or abort the task?" + ) + logger.info(f"[LIMIT] Sending limit choice message for session {session_id}: {message}") + + # Log to event stream for task context persistence only (display_message=None + # to avoid a duplicate chat message from the event watcher). + if self.event_stream_manager: + try: + self.event_stream_manager.log( + "internal", + message, + display_message=None, + task_id=session_id, + ) + except Exception as e: + logger.error(f"[LIMIT] Failed to log to event stream: {e}", exc_info=True) + + # Display message with options directly in the chat UI (awaited). + # We bypass the event bus (which uses fire-and-forget create_task) + # to ensure the message is broadcast before the method returns. + if self.ui_controller and self.ui_controller.active_adapter: + try: + from app.ui_layer.components.types import ChatMessage, ChatMessageOption + from app.onboarding import onboarding_manager + import time as _time + agent_name = onboarding_manager.state.agent_name or "Agent" + options = [ + ChatMessageOption(label="Continue", value="continue_limit", style="primary"), + ChatMessageOption(label="Abort", value="abort_limit", style="danger"), + ] + await self.ui_controller.active_adapter.chat_component.append_message( + ChatMessage( + sender=agent_name, + content=message, + style="agent", + timestamp=_time.time(), + task_session_id=session_id, + options=options, + ) + ) + logger.info(f"[LIMIT] Options message displayed in chat for session {session_id}") + except Exception as e: + logger.error(f"[LIMIT] Failed to display options in chat: {e}", exc_info=True) + else: + logger.warning(f"[LIMIT] No active UI adapter - options message not displayed") + + async def _pause_task_for_limit_choice(self, session_id: str) -> None: + """Pause the task and create a long-delay trigger to keep it alive.""" + logger.info(f"[LIMIT] Pausing task {session_id} for limit choice") + task = self.task_manager.tasks.get(session_id) if self.task_manager else None + if task: + task.waiting_for_user_reply = True + + # Update UI task status to "paused" - directly await to ensure + # the WebSocket broadcast completes before the react loop cleans up. + if self.ui_controller and self.ui_controller.active_adapter: + try: + action_panel = self.ui_controller.active_adapter.action_panel + if action_panel: + await action_panel.update_item(session_id, "paused") + except Exception as e: + logger.error(f"[LIMIT] Failed to update task status to paused: {e}", exc_info=True) + + from app.ui_layer.events import UIEvent, UIEventType + self.ui_controller.event_bus.emit( + UIEvent( + type=UIEventType.AGENT_STATE_CHANGED, + data={"state": "waiting", "status_message": "Paused - waiting for user decision..."}, + ) + ) + + # Create a long-delay trigger so the task stays alive + try: + await self.triggers.put( + Trigger( + fire_at=time.time() + 10800, + priority=5, + next_action_description="Waiting for user decision on limit reached", + session_id=session_id, + payload={"gui_mode": STATE.gui_mode}, + waiting_for_reply=True, + ), + skip_merge=True, + ) + except Exception as e: + logger.error(f"[LIMIT] Failed to create pause trigger for {session_id}: {e}", exc_info=True) + + async def handle_limit_continue(self, session_id: str) -> None: + """User chose to continue past the limit. Reset counters and resume.""" + task = self.task_manager.tasks.get(session_id) if self.task_manager else None + if not task: + logger.warning(f"[LIMIT] Task {session_id} not found for limit continue") + return + + # Reset counters + STATE.set_agent_property("action_count", 0) + STATE.set_agent_property("token_count", 0) + + # Also reset on the StateSession for this session + from agent_core.core.state.session import StateSession + session = StateSession.get(session_id) + if session: + session.agent_properties.set_property("action_count", 0) + session.agent_properties.set_property("token_count", 0) + + # Clear waiting flag + task.waiting_for_user_reply = False + + # Log to event stream as system message + task_label = f' for task "{task.name}"' if task.name else "" + if self.event_stream_manager: + msg = f"User chose to continue{task_label}. Action and token counters have been reset." + self.event_stream_manager.log( + "system", msg, display_message=msg, task_id=session_id, + ) + self.state_manager.bump_event_stream() + + # Update UI state back to working + if self.ui_controller: + from app.ui_layer.events import UIEvent, UIEventType + self.ui_controller.event_bus.emit( + UIEvent( + type=UIEventType.TASK_UPDATE, + data={"task_id": session_id, "status": "running"}, + ) + ) + self.ui_controller.event_bus.emit( + UIEvent( + type=UIEventType.AGENT_STATE_CHANGED, + data={"state": "working", "status_message": "Agent is working..."}, + ) + ) + + # Fire the trigger to resume execution + await self.triggers.fire(session_id) + + async def handle_limit_abort(self, session_id: str) -> None: + """User chose to abort after reaching limit.""" + task = self.task_manager.tasks.get(session_id) if self.task_manager else None + task_label = f' for task "{task.name}"' if task and task.name else "" + if task: + task.waiting_for_user_reply = False + + # Log system message before cancelling (stream is removed during cancel) + if self.event_stream_manager: + msg = f"User chose to abort{task_label}. Task has been cancelled." + self.event_stream_manager.log( + "system", msg, display_message=msg, task_id=session_id, + ) + self.state_manager.bump_event_stream() + + if self.task_manager: + await self.task_manager.mark_task_cancel( + reason="User chose to abort after reaching limit.", + task_id=session_id, + ) + # ----- Trigger Management ----- async def _cleanup_session_triggers(self, session_id: str) -> None: @@ -1393,12 +1578,16 @@ async def _create_new_trigger(self, new_session_id, action_output, STATE): if pending_platform: trigger_payload["pending_platform"] = pending_platform + # Determine priority based on task mode: + # simple task = 5, complex task = 7 + task_priority = 5 if self.task_manager.is_simple_task() else 7 + # Build and enqueue trigger safely try: await self.triggers.put( Trigger( fire_at=fire_at, - priority=5, + priority=task_priority, next_action_description=next_action_desc, session_id=new_session_id, payload=trigger_payload, @@ -1626,6 +1815,14 @@ async def _handle_chat_message(self, payload: Dict): chat_content = user_input logger.info(f"[CHAT RECEIVED] {chat_content}") + + # clear any stuck consecutive-failure state from a prior aborted task so the next + # LLM call actually hits the provider instead of short-circuiting. + try: + self.llm.reset_failure_counter() + except Exception as e: + logger.debug(f"[CHAT] Could not reset LLM failure counter: {e}") + gui_mode = payload.get("gui_mode") # Determine platform - use payload's platform if available, otherwise default @@ -1732,12 +1929,20 @@ async def _handle_chat_message(self, payload: Dict): f"(fired={fired}, reason: {routing_result.get('reason', 'N/A')})" ) - # Reset task's waiting_for_user_reply flag + # Reset task's waiting_for_user_reply flag and switch source_platform + # so subsequent outbound messages route to the platform the user is now on. if self.task_manager: task = self.task_manager.tasks.get(matched_session_id) - if task and task.waiting_for_user_reply: - task.waiting_for_user_reply = False - logger.info(f"[TASK] Task {matched_session_id} no longer waiting for user reply") + if task: + if task.waiting_for_user_reply: + task.waiting_for_user_reply = False + logger.info(f"[TASK] Task {matched_session_id} no longer waiting for user reply") + if platform and task.source_platform != platform: + logger.info( + f"[TASK] Task {matched_session_id} source_platform switched " + f"from {task.source_platform!r} to {platform!r}" + ) + task.source_platform = platform # Reset task status from "waiting" to "running" when user replies # Update UI regardless of fire() result - user has replied so we should @@ -1800,6 +2005,10 @@ async def _handle_chat_message(self, payload: Dict): trigger_payload["contact_id"] = payload.get("contact_id", "") trigger_payload["channel_id"] = payload.get("channel_id", "") + # Carry pre-selected skills from skill slash commands (e.g., /pdf, /docx) + if payload.get("pre_selected_skills"): + trigger_payload["pre_selected_skills"] = payload["pre_selected_skills"] + # Include platform in the action description so the LLM picks # the correct platform-specific send action for replies. # Must be directive (not just informational) for weaker LLMs. @@ -1810,7 +2019,7 @@ async def _handle_chat_message(self, payload: Dict): await self.triggers.put( Trigger( fire_at=time.time(), - priority=1, + priority=3, next_action_description=( "Please perform action that best suit this user chat " f"you just received{platform_hint}: {chat_content}" @@ -1903,7 +2112,8 @@ async def _handle_external_event(self, payload: Dict) -> None: # Add context so the agent knows it's from the user, not a third party. event_content = ( f"[USER SELF-MESSAGE via {source}]\n" - f"{message_body}" + f"{message_body}\n\n" + f"INSTRUCTIONS: Reply to the message to the user on {source}" ) else: # Third-party message — DO NOT act on it, only notify the user @@ -2115,6 +2325,8 @@ def _reset_agent_file_system_sync(self) -> None: logger.info("[RESET] Agent file system reinitialized from templates") + _soft_onboarding_triggered: bool = False + async def trigger_soft_onboarding(self, reset: bool = False) -> Optional[str]: """ Trigger soft onboarding interview task. @@ -2133,6 +2345,12 @@ async def trigger_soft_onboarding(self, reset: bool = False) -> Optional[str]: from app.trigger import Trigger import time + # Prevent double-triggering (multiple adapters/paths may call this) + if not reset and self._soft_onboarding_triggered: + logger.debug("[ONBOARDING] Soft onboarding already triggered, skipping") + return None + self._soft_onboarding_triggered = True + if reset: onboarding_manager.reset_soft_onboarding() @@ -2486,11 +2704,15 @@ async def _schedule_restored_task_triggers(self) -> None: continue try: + # Determine priority based on task mode: simple=5, complex=7 + is_simple = getattr(task, 'mode', 'complex') == 'simple' + restore_priority = 5 if is_simple else 7 + if task.waiting_for_user_reply: await self.triggers.put( Trigger( fire_at=time.time(), - priority=5, + priority=restore_priority, next_action_description=( "Waiting for user reply " "(resumed after restart)" @@ -2509,7 +2731,7 @@ async def _schedule_restored_task_triggers(self) -> None: await self.triggers.put( Trigger( fire_at=time.time(), - priority=5, + priority=restore_priority, next_action_description=( "Resume task after agent restart" ), @@ -2592,12 +2814,18 @@ async def _initialize_config_watcher(self) -> None: automatically to apply changes without restart. """ try: - from app.config import PROJECT_ROOT + from app.config import PROJECT_ROOT, invalidate_settings_cache # Initialize settings manager settings_path = PROJECT_ROOT / "app" / "config" / "settings.json" settings_manager.initialize(settings_path) + # Invalidate app.config cache when SettingsManager reloads, + # so get_api_key() and other getters pick up fresh values. + settings_manager.register_reload_callback( + lambda new_settings, old_settings: invalidate_settings_cache() + ) + # Get event loop for async callbacks event_loop = asyncio.get_event_loop() @@ -2622,9 +2850,17 @@ async def _initialize_config_watcher(self) -> None: skills_config_path = PROJECT_ROOT / "app" / "config" / "skills_config.json" if skills_config_path.exists(): from app.skill import skill_manager + + async def _reload_skills_and_sync(): + """Reload skills and sync skill slash commands.""" + result = await skill_manager.reload() + if self.ui_controller: + self.ui_controller.sync_skill_commands() + return result + config_watcher.register( skills_config_path, - skill_manager.reload, + _reload_skills_and_sync, name="skills_config.json" ) @@ -2741,13 +2977,6 @@ def print_startup_step(step: int, total: int, message: str): # Resume triggers for tasks restored from previous session await self._schedule_restored_task_triggers() - # Trigger soft onboarding if needed (BEFORE starting interface) - # This ensures agent handles onboarding logic, not the interfaces - from app.onboarding import onboarding_manager - if onboarding_manager.needs_soft_onboarding: - logger.info("[ONBOARDING] Soft onboarding needed, triggering from agent") - await self.trigger_soft_onboarding() - # Initialize external communications (WhatsApp, Telegram) print_startup_step(8, 8, "Starting communications") from app.external_comms import ExternalCommsManager diff --git a/app/cli/onboarding.py b/app/cli/onboarding.py index 94e8d588..3ee2276e 100644 --- a/app/cli/onboarding.py +++ b/app/cli/onboarding.py @@ -12,6 +12,7 @@ ProviderStep, ApiKeyStep, AgentNameStep, + UserProfileStep, MCPStep, SkillsStep, ) @@ -173,6 +174,72 @@ async def _select_multiple( return list(selections) + async def _input_form(self, step) -> Dict[str, Any]: + """Present a multi-field form and return collected data as a dict.""" + form_fields = step.get_form_fields() + result: Dict[str, Any] = {} + + print(f"\n{step.title}:") + print(f"{step.description}\n") + + for f in form_fields: + if f.field_type == "text": + default_display = f.default or "" + prompt = f" {f.label}" + if default_display: + prompt += f" (default: {default_display})" + prompt += ": " + try: + value = await self._async_input(prompt) + except (EOFError, KeyboardInterrupt): + value = "" + result[f.name] = value.strip() if value.strip() else (f.default or "") + + elif f.field_type == "select": + print(f"\n {f.label}:") + for i, opt in enumerate(f.options, 1): + marker = "*" if (opt.value == f.default or opt.default) else " " + label = f" {i}. [{marker}] {opt.label}" + if opt.description and opt.description != opt.label: + label += f" - {opt.description}" + print(label) + try: + choice = await self._async_input(f" Enter number [1-{len(f.options)}]: ") + except (EOFError, KeyboardInterrupt): + choice = "" + choice = choice.strip() + if choice: + try: + idx = int(choice) - 1 + if 0 <= idx < len(f.options): + result[f.name] = f.options[idx].value + continue + except ValueError: + pass + result[f.name] = f.default + + elif f.field_type == "multi_checkbox": + print(f"\n {f.label}:") + for i, opt in enumerate(f.options, 1): + print(f" {i}. [ ] {opt.label} - {opt.description}") + print(" Enter numbers to select (comma-separated), or press Enter to skip:") + try: + choice = await self._async_input(" > ") + except (EOFError, KeyboardInterrupt): + choice = "" + selected = [] + for part in choice.split(","): + part = part.strip() + try: + idx = int(part) - 1 + if 0 <= idx < len(f.options): + selected.append(f.options[idx].value) + except ValueError: + continue + result[f.name] = selected + + return result + async def run_hard_onboarding(self) -> Dict[str, Any]: """Execute CLI-based hard onboarding wizard.""" print(CLIFormatter.format_header("CraftBot Setup")) @@ -206,7 +273,21 @@ async def run_hard_onboarding(self) -> Dict[str, Any]: ) self._collected_data["agent_name"] = agent_name or "Agent" - # Step 4: MCP servers (optional) + # Step 4: User Profile (optional) + profile_step = UserProfileStep() + print("\nWould you like to set up your profile? (Y/n)") + try: + configure_profile = await self._async_input("> ") + except (EOFError, KeyboardInterrupt): + configure_profile = "n" + + if not configure_profile.lower().startswith("n"): + profile_data = await self._input_form(profile_step) + self._collected_data["user_profile"] = profile_data + else: + self._collected_data["user_profile"] = {} + + # Step 5: MCP servers (optional) mcp_step = MCPStep() mcp_options = mcp_step.get_options() if mcp_options: @@ -271,9 +352,16 @@ def on_complete(self, cancelled: bool = False) -> None: save_settings_to_json(provider, api_key) logger.info(f"[CLI ONBOARDING] Saved provider={provider} to settings.json") + # Write user profile data to USER.md + profile_data = self._collected_data.get("user_profile", {}) + if profile_data: + from app.onboarding.profile_writer import write_profile_to_user_md + write_profile_to_user_md(profile_data) + # Mark hard onboarding as complete agent_name = self._collected_data.get("agent_name", "Agent") - onboarding_manager.mark_hard_complete(agent_name=agent_name) + user_name = profile_data.get("user_name") if profile_data else None + onboarding_manager.mark_hard_complete(user_name=user_name, agent_name=agent_name) logger.info("[CLI ONBOARDING] Hard onboarding completed successfully") diff --git a/app/config.py b/app/config.py index bf3e2c03..b02818cb 100644 --- a/app/config.py +++ b/app/config.py @@ -35,6 +35,12 @@ def get_project_root() -> Path: _settings_cache: Optional[Dict[str, Any]] = None +def invalidate_settings_cache() -> None: + """Invalidate the settings cache so the next get_settings() call re-reads from disk.""" + global _settings_cache + _settings_cache = None + + def _get_default_settings() -> Dict[str, Any]: """Return default settings structure.""" return { diff --git a/app/config/skills_config.json b/app/config/skills_config.json index 9f6df29a..09aa5d49 100644 --- a/app/config/skills_config.json +++ b/app/config/skills_config.json @@ -2,6 +2,7 @@ "auto_load": true, "enabled_skills": [ "docx", + "file-format", "pdf", "playwright-mcp", "pptx", diff --git a/app/data/action/describe_image.py b/app/data/action/describe_image.py index abccca24..67e58e20 100644 --- a/app/data/action/describe_image.py +++ b/app/data/action/describe_image.py @@ -4,7 +4,7 @@ name="describe_image", description="Uses a Visual Language Model to analyse an image and return a detailed, markdown-ready description. IMPORTANT: Always provide a prompt describing what to look for or describe in the image.", mode="CLI", - action_sets=["document_processing, image"], + action_sets=["core", "document_processing", "image"], input_schema={ "image_path": { "type": "string", @@ -41,7 +41,7 @@ } ) def view_image(input_data: dict) -> dict: - import json, os + import os image_path = str(input_data.get('image_path', '')).strip() simulated_mode = input_data.get('simulated_mode', False) @@ -57,9 +57,38 @@ def view_image(input_data: dict) -> dict: if not os.path.isfile(image_path): return {'status': 'error', 'description': '', 'message': 'File not found.'} + # Check if VLM is available before attempting the call + import app.internal_action_interface as iai + vlm = iai.InternalActionInterface.vlm_interface + + # Check the model registry to see if the provider actually supports VLM + from agent_core.core.models.model_registry import MODEL_REGISTRY + from agent_core.core.models.types import InterfaceType + from app.config import get_vlm_provider + current_provider = get_vlm_provider() + registry_vlm = MODEL_REGISTRY.get(current_provider, {}).get(InterfaceType.VLM) + + if vlm is None or not registry_vlm: + return { + 'status': 'error', + 'description': '', + 'message': ( + f"The current VLM provider '{current_provider}' does not support vision/image analysis. " + "Please inform the user and suggest switching to a provider that supports VLM.\n\n" + "Providers with VLM support: openai, anthropic, gemini, byteplus.\n\n" + "To switch provider, edit 'app/config/settings.json' and update:\n" + ' "vlm_provider": "" (e.g. "anthropic")\n' + ' "vlm_model": "" (e.g. "claude-sonnet-4-6" for anthropic)\n\n' + "Make sure the corresponding API key is configured under 'api_keys' in the same file. " + "If no API key is set, ask the user to provide one. " + "The system will automatically detect the config change and reload." + ), + } + try: - import app.internal_action_interface as iai description = iai.InternalActionInterface.describe_image(image_path, prompt) + if not description: + return {'status': 'error', 'description': '', 'message': 'VLM returned an empty description.'} return {'status': 'success', 'description': description, 'message': ''} except Exception as e: return {'status': 'error', 'description': '', 'message': str(e)} \ No newline at end of file diff --git a/app/data/action/generate_image.py b/app/data/action/generate_image.py index fde5dfae..e692db32 100644 --- a/app/data/action/generate_image.py +++ b/app/data/action/generate_image.py @@ -10,7 +10,7 @@ - TIP: When generating multiple images for the same project or related work, use 'reference_images' parameter with previously generated images to maintain consistent style across all outputs""", default=True, mode="CLI", - action_sets=["content_creation, image, document_processing"], + action_sets=["content_creation", "image", "document_processing"], input_schema={ "prompt": { "type": "string", @@ -108,8 +108,7 @@ def generate_image(input_data: dict) -> dict: 'message': 'Image generated successfully (simulated mode).' } - # Check for API key first - before any package installation - # Read from settings.json (google/gemini provider key) + # Pre-flight validation: check API key is configured from app.config import get_api_key api_key = get_api_key('gemini') if not api_key: @@ -118,7 +117,7 @@ def generate_image(input_data: dict) -> dict: 'image_paths': [], 'prompt_used': '', 'resolution': '', - 'message': 'Google API key is not configured. Please set it in Settings > Model Settings > API Keys. Steps: 1) Go to https://aistudio.google.com/apikey 2) Create a new API key 3) Add it in Settings under the Google/Gemini provider.' + 'message': 'Gemini API key is not configured. Tell the user the Google Gemini API key is required for image generation, and ask if they need help setting it up.' } # Validate required input @@ -141,25 +140,99 @@ def generate_image(input_data: dict) -> dict: reference_images = input_data.get('reference_images', []) safety_filter_level = input_data.get('safety_filter_level', 'block_medium_and_above') - # Validate resolution + # Validate resolution with user feedback valid_resolutions = ['1K', '2K', '4K'] + warnings = [] if resolution not in valid_resolutions: + warnings.append(f"Invalid resolution '{resolution}'. Defaulting to '1K'. Valid options: {', '.join(valid_resolutions)}.") resolution = '1K' - # Validate aspect ratio + # Validate aspect ratio with user feedback valid_ratios = ['1:1', '3:4', '4:3', '9:16', '16:9'] if aspect_ratio not in valid_ratios: + warnings.append(f"Invalid aspect ratio '{aspect_ratio}'. Defaulting to '1:1'. Valid options: {', '.join(valid_ratios)}.") aspect_ratio = '1:1' - # Validate safety filter level + # Validate safety filter level with user feedback valid_safety_levels = ['block_none', 'block_only_high', 'block_medium_and_above', 'block_low_and_above'] if safety_filter_level not in valid_safety_levels: + warnings.append(f"Invalid safety filter level '{safety_filter_level}'. Defaulting to 'block_medium_and_above'. Valid options: {', '.join(valid_safety_levels)}.") safety_filter_level = 'block_medium_and_above' + # Validate number_of_images with user feedback + raw_num = int(input_data.get('number_of_images', 1)) + if raw_num < 1 or raw_num > 4: + warnings.append(f"number_of_images '{raw_num}' out of range. Clamped to {number_of_images}. Valid range: 1-4.") + # Limit reference images to 14 if len(reference_images) > 14: + warnings.append(f"Too many reference images ({len(reference_images)}). Only the first 14 will be used.") reference_images = reference_images[:14] + # Helper: extract images from Gemini response + def _extract_images_from_response(response): + images = [] + # Primary path: candidates[].content.parts[].inline_data + if hasattr(response, 'candidates') and response.candidates: + for candidate in response.candidates: + if not (hasattr(candidate, 'content') and hasattr(candidate.content, 'parts')): + continue + for part in candidate.content.parts: + if hasattr(part, 'inline_data') and part.inline_data: + if hasattr(part.inline_data, 'mime_type') and part.inline_data.mime_type.startswith('image/'): + images.append(part.inline_data.data) + # Fallback: response.images (older SDK versions) + if not images and hasattr(response, 'images'): + for img in response.images: + if hasattr(img, 'data'): + images.append(img.data) + elif hasattr(img, '_pil_image'): + images.append(img) + return images + + # Helper: check if response was blocked by safety filters + def _get_block_reason(response): + if hasattr(response, 'prompt_feedback'): + feedback = response.prompt_feedback + if hasattr(feedback, 'block_reason') and feedback.block_reason: + return str(feedback.block_reason) + if hasattr(response, 'candidates') and response.candidates: + for candidate in response.candidates: + if hasattr(candidate, 'finish_reason') and candidate.finish_reason: + reason = str(candidate.finish_reason) + if 'SAFETY' in reason.upper(): + return reason + return None + + # Helper: build the save path for a generated image + def _build_save_path(output_path, timestamp, index, number_of_images, total_found): + if output_path: + if number_of_images > 1 or total_found > 1: + base, ext = os.path.splitext(output_path) + if not ext: + ext = '.png' + return f"{base}_{index+1}{ext}" + else: + save_path = output_path + if not os.path.splitext(save_path)[1]: + save_path += '.png' + return save_path + else: + temp_dir = tempfile.gettempdir() + return os.path.join(temp_dir, f"generated_image_{timestamp}_{index+1}.png") + + # Helper: convert image data to PIL Image + def _to_pil_image(img_data, Image, io, base64): + if isinstance(img_data, str): + image_bytes = base64.b64decode(img_data) + return Image.open(io.BytesIO(image_bytes)) + elif isinstance(img_data, bytes): + return Image.open(io.BytesIO(img_data)) + elif hasattr(img_data, '_pil_image'): + return img_data._pil_image + else: + return img_data + # Ensure required packages are installed def _ensure_package(pkg_name): try: @@ -274,55 +347,31 @@ def _ensure_package(pkg_name): image_paths = [] timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - # Process response parts to find generated images - images_found = [] - if hasattr(response, 'candidates') and response.candidates: - for candidate in response.candidates: - if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts'): - for part in candidate.content.parts: - # Check for inline image data - if hasattr(part, 'inline_data') and part.inline_data: - if part.inline_data.mime_type.startswith('image/'): - images_found.append(part.inline_data.data) - # Check for file data - elif hasattr(part, 'file_data') and part.file_data: - if part.file_data.mime_type.startswith('image/'): - # Would need to download from file URI - pass - - if not images_found: - # Try alternative response structure - if hasattr(response, 'images'): - for img in response.images: - if hasattr(img, 'data'): - images_found.append(img.data) - elif hasattr(img, '_pil_image'): - images_found.append(img) + # Process response to find generated images + images_found = _extract_images_from_response(response) if not images_found: + # Check if response was blocked by safety filters + block_reason = _get_block_reason(response) + if block_reason: + return { + 'status': 'error', + 'image_paths': [], + 'prompt_used': prompt, + 'resolution': resolution, + 'message': f'Image generation was blocked by safety filters: {block_reason}. Try modifying your prompt or adjusting safety_filter_level.' + } return { 'status': 'error', 'image_paths': [], 'prompt_used': prompt, 'resolution': resolution, - 'message': 'No images were generated. The model may not have produced image output for this prompt. Try rephrasing your prompt or check if your API key has access to image generation.' + 'message': 'No images were generated. The model did not produce image output for this prompt. Try rephrasing your prompt or check if your API key has access to image generation.' } # Save each generated image for i, img_data in enumerate(images_found[:number_of_images]): - if output_path: - if number_of_images > 1 or len(images_found) > 1: - base, ext = os.path.splitext(output_path) - if not ext: - ext = '.png' - save_path = f"{base}_{i+1}{ext}" - else: - save_path = output_path - if not os.path.splitext(save_path)[1]: - save_path += '.png' - else: - temp_dir = tempfile.gettempdir() - save_path = os.path.join(temp_dir, f"generated_image_{timestamp}_{i+1}.png") + save_path = _build_save_path(output_path, timestamp, i, number_of_images, len(images_found)) # Ensure parent directory exists parent_dir = os.path.dirname(os.path.abspath(save_path)) @@ -330,26 +379,20 @@ def _ensure_package(pkg_name): os.makedirs(parent_dir, exist_ok=True) # Save the image - if isinstance(img_data, str): - # Base64 encoded data - image_bytes = base64.b64decode(img_data) - pil_image = Image.open(io.BytesIO(image_bytes)) - elif isinstance(img_data, bytes): - pil_image = Image.open(io.BytesIO(img_data)) - elif hasattr(img_data, '_pil_image'): - pil_image = img_data._pil_image - else: - pil_image = img_data - + pil_image = _to_pil_image(img_data, Image, io, base64) pil_image.save(save_path, 'PNG') image_paths.append(save_path) + message = f'Successfully generated {len(image_paths)} image(s) using Nano Banana Pro.' + if warnings: + message += ' Warnings: ' + ' '.join(warnings) + return { 'status': 'success', 'image_paths': image_paths, 'prompt_used': prompt, 'resolution': resolution, - 'message': f'Successfully generated {len(image_paths)} image(s) using Nano Banana Pro.' + 'message': message } except Exception as e: diff --git a/app/data/action/grep_files.py b/app/data/action/grep_files.py index 1cd7c372..a60d891d 100644 --- a/app/data/action/grep_files.py +++ b/app/data/action/grep_files.py @@ -1,6 +1,68 @@ from agent_core import action -# Common output schema for all platforms +_INPUT_SCHEMA = { + "pattern": { + "type": "string", + "example": "def \\w+\\(", + "description": "Regex pattern to search for. Supports full regex syntax (e.g., 'def \\w+\\(' to find function definitions, 'TODO:.*' to find TODOs). For literal text search, just use the plain text (special regex chars will need escaping)." + }, + "path": { + "type": "string", + "example": "/workspace/project", + "description": "File or directory path to search in. If a directory, searches all files recursively. If a file, searches only that file. Defaults to current working directory if not provided." + }, + "glob": { + "type": "string", + "example": "*.py", + "description": "Glob pattern to filter which files to search (e.g., '*.py' for Python files, '*.{js,ts}' for JS/TS files, 'test_*.py' for test files). Only applies when path is a directory." + }, + "file_type": { + "type": "string", + "example": "py", + "description": "Filter by file extension type (e.g., 'py', 'js', 'json', 'md'). Shorthand alternative to glob — 'py' is equivalent to glob '*.py'. If both glob and file_type are provided, glob takes priority." + }, + "output_mode": { + "type": "string", + "example": "content", + "description": "Controls what is returned. 'files_with_matches' (default): returns only file paths that contain matches. 'content': returns matching lines with line numbers and optional context. 'count': returns the number of matches per file." + }, + "case_insensitive": { + "type": "boolean", + "example": True, + "description": "If true, search is case-insensitive. Default is false (case-sensitive)." + }, + "before_context": { + "type": "integer", + "example": 2, + "description": "Number of lines to show BEFORE each match. Only applies when output_mode is 'content'. Default is 0." + }, + "after_context": { + "type": "integer", + "example": 2, + "description": "Number of lines to show AFTER each match. Only applies when output_mode is 'content'. Default is 0." + }, + "context": { + "type": "integer", + "example": 3, + "description": "Number of context lines to show both before AND after each match (shorthand for setting before_context and after_context to the same value). Only applies when output_mode is 'content'. Overridden by explicit before_context/after_context if provided." + }, + "multiline": { + "type": "boolean", + "example": False, + "description": "If true, enables multiline mode where '.' matches newlines and patterns can span across lines. Default is false." + }, + "head_limit": { + "type": "integer", + "example": 50, + "description": "Maximum number of results to return. For 'files_with_matches': max file paths. For 'content': max output lines. For 'count': max file entries. Default is 250. Pass 0 for unlimited results (no truncation). If results are truncated, the applied_limit field in the response tells you it happened — use offset to paginate through the rest." + }, + "offset": { + "type": "integer", + "example": 0, + "description": "Number of results to skip before returning. Use with head_limit for pagination. Default is 0." + } +} + _OUTPUT_SCHEMA = { "status": { "type": "string", @@ -9,262 +71,348 @@ }, "message": { "type": "string", - "example": "Found 5 matching chunks", - "description": "Status message or error description." + "example": "Found matches in 5 files", + "description": "Summary message or error description." }, - "chunks": { - "type": "array", - "example": [ - "[line 275] ...some text chunk...", - "[line 937] ...another text chunk..." - ], - "description": "List of formatted chunks for the requested range." + "mode": { + "type": "string", + "example": "content", + "description": "The output mode that was used." }, - "total_matches": { + "num_files": { "type": "integer", - "example": 23, - "description": "Total number of matched chunks available." + "example": 5, + "description": "Number of files that contained matches." }, - "returned_range": { + "filenames": { "type": "array", - "example": [1, 5], - "description": "The 1-based [start, end] chunk indices that were requested (clamped to available matches)." - } -} - -# Common input schema for all platforms -_INPUT_SCHEMA = { - "input_file": { - "type": "string", - "example": "/path/to/input.txt", - "description": "Absolute path to the input text file to search." + "example": ["/workspace/project/main.py", "/workspace/project/utils.py"], + "description": "List of file paths that contained matches." }, - "keywords": { - "type": "array", - "example": ["Mt. Fuji", "visibility"], - "description": "List of plain-text keywords to search for (OR-ed together, case-insensitive).", - "default": [] + "content": { + "type": "string", + "example": "File: /workspace/main.py\n10:def hello():\n11- pass\n--\n25:def world():\n26- return 1\n", + "description": "Matching lines with line numbers. Match lines use ':' after the line number (e.g., '10:matched line'), context lines use '-' (e.g., '11-context line'). Non-contiguous groups are separated by '--'. For single-file searches, the filepath is shown once at the top to save tokens. For multi-file searches, each file section is prefixed with 'File: path'. Only populated when output_mode is 'content'." }, - "chunk_size": { + "num_lines": { "type": "integer", - "example": 300, - "description": "Approximate number of words per chunk.", - "default": 300 + "example": 15, + "description": "Number of content lines returned. Only populated when output_mode is 'content'." }, - "overlap": { + "num_matches": { "type": "integer", - "example": 50, - "description": "Number of overlapping words between consecutive chunks.", - "default": 50 + "example": 42, + "description": "Total number of matches across all files. Only populated when output_mode is 'count'." }, - "chunk_start": { + "applied_limit": { "type": "integer", - "example": 1, - "description": "1-based start index of the matched chunk range to return.", - "default": 1 + "example": 250, + "description": "The head_limit that was applied, or null if unlimited (head_limit=0). If your results were truncated to this limit, use offset to paginate through the rest." }, - "chunk_end": { + "applied_offset": { "type": "integer", - "example": 5, - "description": "1-based end index of the matched chunk range to return.", - "default": 5 + "example": 0, + "description": "The offset that was applied." } } @action( name="grep_files", - description="Searches a text file for keywords and returns matching chunks with pagination.", + description=( + "Searches files for a regex pattern and returns results. " + "Supports searching a single file or an entire directory recursively. " + "Three output modes: " + "'files_with_matches' (default) returns file paths containing matches — use for discovery. " + "'content' returns matching lines with line numbers and optional before/after context — use to read matched code. " + "In content mode, match lines use ':' after line number (e.g., '10:matched line'), " + "context lines use '-' (e.g., '11-context line'), and non-contiguous groups are separated by '--'. " + "'count' returns match counts per file — use for quick frequency checks. " + "Supports glob and file_type filtering, case-insensitive search, and multiline patterns. " + "Use with read_file: first grep_files to find relevant line numbers, then read_file with offset to read that section." + ), mode="CLI", platforms=["linux", "windows", "darwin"], action_sets=["core"], input_schema=_INPUT_SCHEMA, output_schema=_OUTPUT_SCHEMA, test_payload={ - "input_file": "/path/to/input.txt", - "keywords": ["Mt. Fuji", "visibility"], - "chunk_size": 300, - "overlap": 50, - "chunk_start": 1, - "chunk_end": 5, + "pattern": "Mt\\. Fuji|visibility", + "path": "/path/to/input.txt", + "output_mode": "content", + "case_insensitive": True, + "head_limit": 50, "simulated_mode": True } ) def grep_files(input_data: dict) -> dict: - """Searches a text file for keywords and returns matching chunks with pagination.""" + """Searches files for a regex pattern and returns results.""" import os import re + import fnmatch - def chunk_text(text, chunk_size=300, overlap=50): - """Split text into overlapping word chunks.""" - words = re.findall(r'\S+', text or '') - if not words: - return [] - if chunk_size <= 0: - chunk_size = 300 - if overlap < 0: - overlap = 0 - step = max(1, chunk_size - overlap) - n = len(words) - segments = [] - for start in range(0, n, step): - end = min(start + chunk_size, n) - chunk_words = words[start:end] - if not chunk_words: - break - chunk_text_val = ' '.join(chunk_words).strip() - if not chunk_text_val: + # --- Helper functions (must be inside for sandboxed execution) --- + + def make_error(message): + return { + 'status': 'error', + 'message': message, + 'mode': None, + 'num_files': 0, + 'filenames': [], + 'content': None, + 'num_lines': None, + 'num_matches': None, + 'applied_limit': None, + 'applied_offset': None + } + + def collect_files(directory, glob_pat=None, max_files=10000): + SKIP_DIRS = { + '.git', '.svn', '.hg', '__pycache__', 'node_modules', + '.venv', 'venv', '.env', '.tox', '.mypy_cache', + '.pytest_cache', 'dist', 'build', '.idea', '.vscode' + } + collected = [] + for root, dirs, files in os.walk(directory): + dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith('.')] + for fname in files: + if fname.startswith('.'): + continue + if glob_pat and not fnmatch.fnmatch(fname, glob_pat): + continue + collected.append(os.path.join(root, fname)) + if len(collected) >= max_files: + return collected + return collected + + def format_content_lines(fpath, lines, sorted_indices, display_map, single_file, first_file): + result = [] + if single_file: + if first_file: + result.append(f'File: {fpath}') + else: + if not first_file: + result.append('--') + result.append(f'File: {fpath}') + + prev_ln = None + for ln in sorted_indices: + if ln >= len(lines): continue - has_leading = start > 0 - has_trailing = end < n - segments.append({ - 'text': chunk_text_val, - 'start_word_index': start + 1, - 'has_leading_ellipsis': bool(has_leading), - 'has_trailing_ellipsis': bool(has_trailing) - }) - return segments + if prev_ln is not None and ln > prev_ln + 1: + result.append('--') + separator = ':' if display_map[ln] else '-' + result.append(f'{ln + 1}{separator}{lines[ln]}') + prev_ln = ln + return result + + # --- Main logic --- simulated_mode = input_data.get('simulated_mode', False) if simulated_mode: return { 'status': 'success', - 'message': 'Found 1 matching chunk(s)', - 'chunks': ['[line 10] Test chunk with keyword'], - 'total_matches': 1, - 'returned_range': [1, 5] + 'message': 'Found matches in 2 files', + 'mode': 'content', + 'num_files': 2, + 'filenames': ['/path/to/input.txt', '/path/to/other.txt'], + 'content': 'File: /path/to/input.txt\n10:Mt. Fuji is visible today\n11-The mountain was clear\n--\nFile: /path/to/other.txt\n5:visibility is low\n', + 'num_lines': 5, + 'num_matches': None, + 'applied_limit': 50, + 'applied_offset': 0 } + # --- Parse and validate inputs --- + pattern_str = input_data.get('pattern') + if not pattern_str: + return make_error('pattern is required.') + + search_path = input_data.get('path') or os.getcwd() + output_mode = input_data.get('output_mode', 'files_with_matches') + if output_mode not in ('files_with_matches', 'content', 'count'): + output_mode = 'files_with_matches' + + case_insensitive = bool(input_data.get('case_insensitive', False)) + multiline_mode = bool(input_data.get('multiline', False)) + glob_pattern = input_data.get('glob') + file_type = input_data.get('file_type') + + # Context lines (only for content mode) try: - input_file = input_data.get('input_file') - if not input_file: - return { - 'status': 'error', - 'message': 'input_file is required', - 'chunks': [], - 'total_matches': 0, - 'returned_range': [0, 0] - } - - if not os.path.isfile(input_file): - return { - 'status': 'error', - 'message': f'Input file does not exist: {input_file}', - 'chunks': [], - 'total_matches': 0, - 'returned_range': [0, 0] - } - - keywords = input_data.get('keywords') or [] - if not keywords: - return { - 'status': 'error', - 'message': 'keywords must be a non-empty array', - 'chunks': [], - 'total_matches': 0, - 'returned_range': [0, 0] - } + ctx = int(input_data.get('context', 0)) + except (TypeError, ValueError): + ctx = 0 + try: + before_ctx = int(input_data.get('before_context', ctx)) + except (TypeError, ValueError): + before_ctx = ctx + try: + after_ctx = int(input_data.get('after_context', ctx)) + except (TypeError, ValueError): + after_ctx = ctx + before_ctx = max(0, before_ctx) + after_ctx = max(0, after_ctx) + # Pagination + raw_limit = input_data.get('head_limit') + try: + head_limit = int(raw_limit) if raw_limit is not None else 250 + except (TypeError, ValueError): + head_limit = 250 + try: + offset = int(input_data.get('offset', 0)) + except (TypeError, ValueError): + offset = 0 + if head_limit < 0: + head_limit = 250 + unlimited = (head_limit == 0) + if offset < 0: + offset = 0 + + # --- Compile regex --- + flags = 0 + if case_insensitive: + flags |= re.IGNORECASE + if multiline_mode: + flags |= re.DOTALL | re.MULTILINE + + try: + regex = re.compile(pattern_str, flags) + except re.error as e: + return make_error(f'Invalid regex pattern: {e}') + + # --- Collect files to search --- + if not os.path.exists(search_path): + return make_error(f'Path does not exist: {search_path}') + + if os.path.isfile(search_path): + files_to_search = [search_path] + else: + if glob_pattern: + active_glob = glob_pattern + elif file_type: + active_glob = f'*.{file_type.lstrip(".")}' + else: + active_glob = None + files_to_search = collect_files(search_path, active_glob) + + # --- Search each file --- + matched_filenames = [] + content_lines = [] + total_match_count = 0 + count_entries = [] + is_single_file = len(files_to_search) == 1 + + for fpath in files_to_search: try: - chunk_size = int(input_data.get('chunk_size', 300)) - except (TypeError, ValueError): - chunk_size = 300 - try: - overlap = int(input_data.get('overlap', 50)) - except (TypeError, ValueError): - overlap = 50 - try: - start_idx = int(input_data.get('chunk_start', 1)) - except (TypeError, ValueError): - start_idx = 1 - try: - end_idx = int(input_data.get('chunk_end', 5)) - except (TypeError, ValueError): - end_idx = 5 - - # Normalize values - if chunk_size <= 0: - chunk_size = 300 - if overlap < 0: - overlap = 0 - if start_idx < 1: - start_idx = 1 - if end_idx < 1: - end_idx = 1 - if end_idx < start_idx: - start_idx, end_idx = end_idx, start_idx - - with open(input_file, 'r', encoding='utf-8', errors='ignore') as f: - content = f.read() - - segments = chunk_text(content, chunk_size=chunk_size, overlap=overlap) - - if not segments: - return { - 'status': 'success', - 'message': 'File is empty or has no content', - 'chunks': [], - 'total_matches': 0, - 'returned_range': [start_idx, end_idx] - } - - pattern = re.compile('(' + '|'.join(re.escape(k) for k in keywords) + ')', re.I) - matched_segments = [s for s in segments if pattern.search(s['text'])] - - total_matches = len(matched_segments) - if total_matches == 0: - return { - 'status': 'success', - 'message': 'No matches found for the given keywords', - 'chunks': [], - 'total_matches': 0, - 'returned_range': [start_idx, end_idx] - } - - start_idx_clamped = max(1, min(start_idx, total_matches)) - end_idx_clamped = max(1, min(end_idx, total_matches)) - if end_idx_clamped < start_idx_clamped: - start_idx_clamped, end_idx_clamped = end_idx_clamped, start_idx_clamped - - start_zero = start_idx_clamped - 1 - end_zero_excl = end_idx_clamped - - page_segments = matched_segments[start_zero:end_zero_excl] - - def clean_text(s): - s = s.strip() - s = re.sub(r'\s+', ' ', s) - return s - - formatted_chunks = [] - for seg in page_segments: - text_clean = clean_text(seg['text']) - if not text_clean: + with open(fpath, 'r', encoding='utf-8', errors='ignore') as f: + file_content = f.read() + except (OSError, IOError): + continue + + if not file_content: + continue + + lines = file_content.split('\n') + + if multiline_mode: + matches = list(regex.finditer(file_content)) + if not matches: continue - display_text = text_clean - if seg.get('has_leading_ellipsis'): - display_text = '...' + display_text - if seg.get('has_trailing_ellipsis'): - if not display_text.endswith('...'): - display_text = display_text + '...' - line_no = int(seg.get('start_word_index', 1)) - para = f"[line {line_no}] {display_text}" - formatted_chunks.append(para) + matched_line_nums = set() + for m in matches: + start_line = file_content[:m.start()].count('\n') + end_line = file_content[:m.end()].count('\n') + for ln in range(start_line, end_line + 1): + matched_line_nums.add(ln) + else: + matched_line_nums = set() + for i, line in enumerate(lines): + if regex.search(line): + matched_line_nums.add(i) + + if not matched_line_nums: + continue + + matched_filenames.append(fpath) + match_count = len(matched_line_nums) + total_match_count += match_count + + if output_mode == 'count': + count_entries.append(f'{fpath}: {match_count}') + elif output_mode == 'content': + display_map = {} + for ln in matched_line_nums: + display_map[ln] = True + for ctx_ln in range(max(0, ln - before_ctx), min(len(lines), ln + after_ctx + 1)): + if ctx_ln not in display_map: + display_map[ctx_ln] = False + sorted_indices = sorted(display_map.keys()) + file_lines = format_content_lines( + fpath, lines, sorted_indices, display_map, is_single_file, + first_file=(len(content_lines) == 0) + ) + content_lines.extend(file_lines) + + # --- Apply pagination and build output --- + def paginate(items): + after_offset = items[offset:] + if unlimited: + return after_offset + return after_offset[:head_limit] + + effective_limit = None if unlimited else head_limit + + if output_mode == 'files_with_matches': + total = len(matched_filenames) + paginated = paginate(matched_filenames) return { 'status': 'success', - 'message': f'Found {total_matches} matching chunk(s)', - 'chunks': formatted_chunks, - 'total_matches': total_matches, - 'returned_range': [start_idx_clamped, end_idx_clamped] + 'message': f'Found matches in {total} file(s)', + 'mode': 'files_with_matches', + 'num_files': total, + 'filenames': paginated, + 'content': None, + 'num_lines': None, + 'num_matches': None, + 'applied_limit': effective_limit, + 'applied_offset': offset } - except Exception as e: + elif output_mode == 'content': + total_lines = len(content_lines) + paginated = paginate(content_lines) + content_str = '\n'.join(paginated) + if paginated: + content_str += '\n' return { - 'status': 'error', - 'message': str(e), - 'chunks': [], - 'total_matches': 0, - 'returned_range': [0, 0] + 'status': 'success', + 'message': f'Found {total_match_count} match(es) in {len(matched_filenames)} file(s)', + 'mode': 'content', + 'num_files': len(matched_filenames), + 'filenames': matched_filenames, + 'content': content_str, + 'num_lines': len(paginated), + 'num_matches': None, + 'applied_limit': effective_limit, + 'applied_offset': offset + } + + else: # count + paginated = paginate(count_entries) + return { + 'status': 'success', + 'message': f'Total: {total_match_count} match(es) in {len(matched_filenames)} file(s)', + 'mode': 'count', + 'num_files': len(matched_filenames), + 'filenames': matched_filenames, + 'content': '\n'.join(paginated) + '\n' if paginated else '', + 'num_lines': None, + 'num_matches': total_match_count, + 'applied_limit': effective_limit, + 'applied_offset': offset } diff --git a/app/data/action/http_request.py b/app/data/action/http_request.py index 803f7626..643299e9 100644 --- a/app/data/action/http_request.py +++ b/app/data/action/http_request.py @@ -169,6 +169,29 @@ def send_http_requests(input_data: dict) -> dict: return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Unsupported method.'} if not url or not (url.startswith('http://') or url.startswith('https://')): return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Invalid or missing URL.'} + + # SSRF protection: block requests to private/internal networks and cloud metadata + try: + from urllib.parse import urlparse as _urlparse + import ipaddress as _ipaddress + import socket as _socket + _parsed = _urlparse(url) + _hostname = _parsed.hostname or '' + # Block cloud metadata endpoints + _BLOCKED_HOSTS = {'169.254.169.254', 'metadata.google.internal', 'metadata.internal'} + if _hostname in _BLOCKED_HOSTS: + return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Blocked: requests to cloud metadata endpoints are not allowed.'} + # Resolve hostname and check for private IPs + try: + _resolved = _socket.getaddrinfo(_hostname, None) + for _family, _type, _proto, _canonname, _sockaddr in _resolved: + _ip = _ipaddress.ip_address(_sockaddr[0]) + if _ip.is_private or _ip.is_loopback or _ip.is_link_local: + return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':f'Blocked: requests to private/internal addresses ({_hostname}) are not allowed.'} + except (socket.gaierror, ValueError): + pass # Let the request library handle DNS resolution errors + except Exception: + pass # Best-effort SSRF check; don't block on parsing failures if json_body is not None and data_body is not None: return {'status':'error','status_code':0,'response_headers':{},'body':'','final_url':'','elapsed_ms':0,'message':'Provide either json or data, not both.'} if not isinstance(headers, dict) or not isinstance(params, dict): diff --git a/app/data/action/skill_management.py b/app/data/action/skill_management.py new file mode 100644 index 00000000..7daca570 --- /dev/null +++ b/app/data/action/skill_management.py @@ -0,0 +1,126 @@ +# core/data/action/skill_management.py +""" +Skill Management Actions + +These actions allow the agent to dynamically list and switch skills during task execution. +Both actions belong to the 'core' set and are always available. +""" + +from agent_core import action + + +@action( + name="list_skills", + description=( + "List all enabled skills with their names and descriptions. " + "Use this to discover available skills before using 'use_skill'." + ), + default=False, + mode="ALL", + action_sets=["core"], + input_schema={}, + output_schema={ + "skills": { + "type": "object", + "description": "Dictionary of enabled skill names to their descriptions.", + }, + }, + test_payload={ + "simulated_mode": True, + }, +) +def list_skills(input_data: dict) -> dict: + """List all enabled skills with their names and descriptions.""" + simulated_mode = input_data.get("simulated_mode", False) + + if simulated_mode: + return { + "skills": { + "pdf": "Read and create PDF documents", + "docx": "Read and create Word documents", + }, + } + + import app.internal_action_interface as iai + + try: + result = iai.InternalActionInterface.list_skills() + return result + except Exception as e: + return {"error": str(e)} + + +@action( + name="use_skill", + description=( + "Activate a skill for the current task, replacing the current skill in the system prompt. " + "ONLY use this action when the current skill need to be completely replaced with a new skill. " + "If you only need to read a skill's instructions while keeping the current skill in context, " + "find the skill directory and use 'read_file' on the skill's SKILL.md file instead. " + "Use 'list_skills' first to see enabled skill first." + ), + default=False, + mode="ALL", + action_sets=["core"], + parallelizable=False, + input_schema={ + "skill_name": { + "type": "string", + "description": "Name of the skill to activate.", + "example": "pdf", + }, + }, + output_schema={ + "success": { + "type": "boolean", + "description": "Whether the skill was activated successfully.", + }, + "active_skill": { + "type": "string", + "description": "Name of the now-active skill.", + }, + "skill_description": { + "type": "string", + "description": "Description of the activated skill.", + }, + "previous_skills": { + "type": "array", + "description": "List of previously active skill names that were replaced.", + }, + "added_action_sets": { + "type": "array", + "description": "Action sets that were added as recommended by the skill.", + }, + }, + test_payload={ + "skill_name": "pdf", + "simulated_mode": True, + }, +) +def use_skill(input_data: dict) -> dict: + """Activate a skill, replacing the current skill in the system prompt.""" + skill_name = input_data.get("skill_name", "") + simulated_mode = input_data.get("simulated_mode", False) + + if not skill_name: + return { + "success": False, + "error": "No skill_name specified.", + } + + if simulated_mode: + return { + "success": True, + "active_skill": skill_name, + "skill_description": "Simulated skill description", + "previous_skills": [], + "added_action_sets": [], + } + + import app.internal_action_interface as iai + + try: + result = iai.InternalActionInterface.use_skill(skill_name) + return result + except Exception as e: + return {"success": False, "error": str(e)} diff --git a/app/data/action/task_start.py b/app/data/action/task_start.py index 902ec4a3..a8939b5e 100644 --- a/app/data/action/task_start.py +++ b/app/data/action/task_start.py @@ -66,6 +66,8 @@ async def start_task(input_data: dict) -> dict: # Extract original user query and platform for logging to the new task's event stream original_query = input_data.get("_original_query") original_platform = input_data.get("_original_platform") + # Extract pre-selected skills (from skill slash commands like /pdf, /docx) + pre_selected_skills = input_data.get("_pre_selected_skills") if not task_name: return { @@ -105,6 +107,7 @@ async def start_task(input_data: dict) -> dict: session_id=session_id, original_query=original_query, original_platform=original_platform, + pre_selected_skills=pre_selected_skills, ) return { "status": "success", diff --git a/app/data/action/web_fetch.py b/app/data/action/web_fetch.py index 6dd9906b..361139fd 100644 --- a/app/data/action/web_fetch.py +++ b/app/data/action/web_fetch.py @@ -2,16 +2,15 @@ @action( name="web_fetch", - description="""Fetches content from a URL and returns processed markdown content. -- Takes a URL and an optional prompt describing what information to extract -- Fetches the URL content and converts HTML to markdown -- Uses two-tier extraction: fast static extraction first, then Jina Reader API for JS-rendered sites -- Handles redirects: when redirecting to a different host, returns redirect info -- HTTP URLs are automatically upgraded to HTTPS -- Use web_search action first to find relevant URLs, then use this to read full content - -IMPORTANT: This action may fail for authenticated or private URLs. For sites requiring -authentication (Google Docs, Confluence, Jira, etc.), use specialized authenticated tools.""", + description=( + "Fetches a URL and returns cleaned text/markdown content. " + "Use web_search first to find URLs, then web_fetch to read them. " + "Two modes: 'full' (default) returns extracted page content up to max_content_length chars. " + "'title' returns only the page title (cheap, no content extraction). " + "When content exceeds max_content_length, the full content is saved to a temp file " + "and content_file path is returned — use grep_files to search it or read_file with offset/limit to paginate. " + "HTTP is auto-upgraded to HTTPS (except localhost). Follows up to 10 redirects automatically." + ), mode="CLI", action_sets=["core"], input_schema={ @@ -21,182 +20,214 @@ "description": "The URL to fetch content from. Must be a valid http(s) URL.", "required": True }, - "prompt": { + "mode": { "type": "string", - "example": "Extract the main points and key takeaways from this article", - "description": "Optional prompt describing what information to extract from the page. If provided, content will be structured around this prompt." + "example": "full", + "description": "What to return. 'full' (default): extracted page content up to max_content_length, overflow saved to content_file. 'title': only the page title, no content extraction." }, "timeout": { "type": "number", - "example": 30, - "description": "Request timeout in seconds. Defaults to 30." + "example": 20, + "description": "Request timeout in seconds. Defaults to 20." }, "max_content_length": { "type": "integer", - "example": 50000, - "description": "Maximum content length in characters. Content exceeding this will be truncated. Defaults to 50000." + "example": 5000, + "description": "Maximum content length in characters returned inline. Content beyond this is saved to content_file — use grep_files to search it or read_file with offset/limit to paginate through it. Defaults to 5000. Pass 0 to return all content inline (use sparingly — large pages waste tokens)." }, "use_jina_fallback": { "type": "boolean", "example": True, - "description": "Use Jina Reader API as fallback for JS-rendered sites. Defaults to True." - }, - "min_content_length": { - "type": "integer", - "example": 200, - "description": "Minimum content length to consider extraction successful. Below this triggers fallback. Defaults to 200." + "description": "Use Jina Reader API as fallback for JS-rendered sites when static extraction yields too little content. Defaults to True." } }, output_schema={ "status": { "type": "string", "example": "success", - "description": "'success', 'redirect', or 'error'." + "description": "'success' or 'error'." }, - "url": { - "type": "string", - "description": "The original requested URL." + "status_code": { + "type": "integer", + "example": 200, + "description": "HTTP status code (e.g., 200, 404, 500)." }, - "final_url": { + "status_text": { "type": "string", - "description": "The final URL after any redirects (same host only)." + "example": "OK", + "description": "HTTP status reason (e.g., 'OK', 'Not Found')." }, - "redirect_url": { + "url": { "type": "string", - "description": "Present when status='redirect'. The URL to follow for cross-host redirects." + "description": "The final URL after following redirects." }, "title": { "type": "string", - "description": "The page title." + "description": "The page title, if extracted." }, "content": { "type": "string", - "description": "The extracted content in markdown format." + "description": "The extracted page content in markdown/text format, up to max_content_length chars. Empty when mode is 'title'." }, "content_length": { "type": "integer", - "description": "Length of the content in characters." + "description": "Length of the inline content in characters." + }, + "total_content_length": { + "type": "integer", + "description": "Total length of the full extracted content before truncation. Compare with content_length to know how much was cut." }, "was_truncated": { "type": "boolean", - "description": "True if content was truncated due to max_content_length." + "description": "True if content was truncated to max_content_length. When true, content_file contains the full content — use grep_files to search it or read_file with offset/limit to paginate." }, - "prompt_used": { + "content_file": { "type": "string", - "description": "The prompt that was applied (if any)." + "description": "Absolute path to the full content file when was_truncated is true. Use grep_files(pattern, path=content_file) to search for specific information, or read_file(file_path=content_file, offset=N, limit=M) to paginate. Null if content was not truncated." }, "message": { "type": "string", "description": "Error or informational message." - }, - "extraction_method": { - "type": "string", - "description": "Method used for extraction: 'static' (trafilatura/BeautifulSoup) or 'jina' (Jina Reader API)." } }, requirement=["requests", "beautifulsoup4", "trafilatura", "lxml"], test_payload={ "url": "https://example.com/article", - "prompt": "Summarize the main content", - "timeout": 30, + "timeout": 20, "simulated_mode": True } ) def web_fetch(input_data: dict) -> dict: - """ - Fetches content from a URL and returns processed markdown content. - Uses two-tier extraction: fast static extraction first, then Jina Reader API for JS-rendered sites. - """ + """Fetches a URL and returns cleaned text/markdown content.""" import re + import os + import tempfile from urllib.parse import urlparse + from datetime import datetime, timezone - simulated_mode = input_data.get('simulated_mode', False) - url = str(input_data.get('url', '')).strip() - prompt = str(input_data.get('prompt', '')).strip() if input_data.get('prompt') else None - timeout = float(input_data.get('timeout', 30)) - max_content_length = int(input_data.get('max_content_length', 50000)) - use_jina_fallback = input_data.get('use_jina_fallback', True) - min_content_length = int(input_data.get('min_content_length', 200)) + # --- Helper functions (must be inside for sandboxed execution) --- - def _make_error(message, url=''): + def make_error(message, err_url='', status_code=0, status_text=''): return { 'status': 'error', - 'url': url, - 'final_url': '', + 'status_code': status_code, + 'status_text': status_text, + 'url': err_url, 'title': '', 'content': '', 'content_length': 0, + 'total_content_length': 0, 'was_truncated': False, - 'prompt_used': prompt or '', + 'content_file': None, 'message': message } - def _make_redirect(original_url, redirect_url): + def make_result(res_url, title, content, total_content_length, + status_code, status_text, + was_truncated=False, content_file=None, message=''): return { - 'status': 'redirect', - 'url': original_url, - 'final_url': '', - 'redirect_url': redirect_url, - 'title': '', - 'content': '', - 'content_length': 0, - 'was_truncated': False, - 'prompt_used': prompt or '', - 'message': f'Redirect to different host detected. Please make a new request to: {redirect_url}' + 'status': 'success', + 'status_code': status_code, + 'status_text': status_text, + 'url': res_url, + 'title': title or '', + 'content': content, + 'content_length': len(content), + 'total_content_length': total_content_length, + 'was_truncated': was_truncated, + 'content_file': content_file, + 'message': message } - # Validate URL - if not url: - return _make_error('URL is required.') + def save_content_file(content, file_url, sess_id): + save_dir = None + if sess_id: + try: + current = os.path.abspath(__file__) + for _ in range(10): + current = os.path.dirname(current) + if os.path.isdir(os.path.join(current, 'agent_file_system')): + save_dir = os.path.join(current, 'agent_file_system', 'workspace', 'tmp', sess_id) + break + except Exception: + pass - # Auto-upgrade HTTP to HTTPS - if url.startswith('http://'): - url = 'https://' + url[7:] + if not save_dir: + save_dir = tempfile.gettempdir() - if not re.match(r'^https?://', url, re.I): - return _make_error('A valid http(s) URL is required.', url) + os.makedirs(save_dir, exist_ok=True) - # Parse original URL for host comparison - try: - original_parsed = urlparse(url) - original_host = original_parsed.netloc.lower() - except Exception as e: - return _make_error(f'Invalid URL format: {str(e)}', url) + try: + domain = urlparse(file_url).hostname or 'unknown' + domain = domain.replace('.', '_') + except Exception: + domain = 'unknown' - # Simulated mode for testing - if simulated_mode: - mock_content = f"""# Test Page Title + ts = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S%f') + filename = f'web_fetch_{domain}_{ts}.md' + file_path = os.path.join(save_dir, filename) -This is simulated content fetched from {url}. + with open(file_path, 'w', encoding='utf-8') as f: + f.write(f'\n\n') + f.write(content) -## Main Content + return file_path -This is the main body of the page content, converted to markdown format. + # --- Main logic --- -- Point 1: Important information -- Point 2: More details -- Point 3: Additional context + simulated_mode = input_data.get('simulated_mode', False) + url = str(input_data.get('url', '')).strip() + fetch_mode = str(input_data.get('mode', 'full')).strip().lower() + if fetch_mode not in ('full', 'title'): + fetch_mode = 'full' + timeout = float(input_data.get('timeout', 20)) + raw_max = input_data.get('max_content_length') + try: + max_content_length = int(raw_max) if raw_max is not None else 5000 + except (TypeError, ValueError): + max_content_length = 5000 + if max_content_length < 0: + max_content_length = 5000 + unlimited = (max_content_length == 0) + use_jina_fallback = input_data.get('use_jina_fallback', True) + session_id = input_data.get('_session_id', '') -## Summary + # --- Validate URL --- + if not url: + return make_error('URL is required.') -This is a test page demonstrating the web_fetch action functionality. -""" - if prompt: - mock_content = f"**Prompt:** {prompt}\n\n---\n\n{mock_content}" + # Auto-upgrade HTTP to HTTPS (except localhost) + if url.startswith('http://'): + try: + parsed = urlparse(url) + host = parsed.hostname or '' + if host not in ('localhost', '127.0.0.1', '::1'): + url = 'https://' + url[7:] + except Exception: + url = 'https://' + url[7:] - return { - 'status': 'success', - 'url': url, - 'final_url': url, - 'title': 'Test Page Title', - 'content': mock_content, - 'content_length': len(mock_content), - 'was_truncated': False, - 'prompt_used': prompt or '', - 'message': '' - } + if not re.match(r'^https?://', url, re.I): + return make_error('A valid http(s) URL is required.', url) - # Fetch the URL + # --- Simulated mode --- + if simulated_mode: + mock_content = ( + "# Test Page Title\n\n" + "This is simulated content fetched from the URL.\n\n" + "## Main Content\n\n" + "- Point 1: Important information\n" + "- Point 2: More details\n" + "- Point 3: Additional context\n\n" + "## Summary\n\n" + "This is a test page demonstrating the web_fetch action." + ) + if fetch_mode == 'title': + return make_result(url, 'Test Page Title', '', 0, 200, 'OK') + return make_result( + url, 'Test Page Title', mock_content, len(mock_content), 200, 'OK' + ) + + # --- Fetch the URL --- try: import requests from bs4 import BeautifulSoup @@ -208,39 +239,27 @@ def _make_redirect(original_url, redirect_url): 'Accept-Language': 'en-US,en;q=0.9' } - # First, make a HEAD request to check for redirects without downloading content - try: - head_response = requests.head(url, headers=headers, timeout=timeout, allow_redirects=True) - final_url = str(head_response.url) - final_parsed = urlparse(final_url) - final_host = final_parsed.netloc.lower() - - # Check if redirect is to a different host - if final_host != original_host: - return _make_redirect(url, final_url) - except requests.exceptions.RequestException: - # HEAD failed, continue with GET - pass - - # Fetch the content - response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, stream=True) + # Fetch content — follow up to 10 redirects automatically + response = requests.get( + url, headers=headers, timeout=timeout, + allow_redirects=True, stream=True + ) response.raise_for_status() + status_code = response.status_code + status_text = response.reason or '' final_url = str(response.url) - final_parsed = urlparse(final_url) - final_host = final_parsed.netloc.lower() - - # Double-check for cross-host redirect - if final_host != original_host: - return _make_redirect(url, final_url) # Check content type content_type = response.headers.get('Content-Type', '') if not any(t in content_type for t in ('text/html', 'application/xhtml+xml', 'text/plain')): - return _make_error(f'Unsupported content-type: {content_type}', url) + return make_error( + f'Unsupported content-type: {content_type}', final_url, + status_code=status_code, status_text=status_text + ) - # Read content with size limit - max_bytes = max_content_length * 4 # Rough estimate for UTF-8 + # Read content with size limit (raw bytes cap to prevent memory issues) + max_bytes = 500000 # 500KB raw cap content_bytes = b'' for chunk in response.iter_content(chunk_size=65536): if chunk: @@ -251,13 +270,32 @@ def _make_redirect(original_url, redirect_url): encoding = response.encoding or 'utf-8' html_text = content_bytes.decode(encoding, errors='replace') - # === TIER 1: Fast Static Extraction === + # === Extract title (needed for both modes) === title = '' + try: + meta = trafilatura.metadata.extract_metadata(content_bytes, url=final_url) + if meta and getattr(meta, 'title', None): + title = meta.title.strip() + except Exception: + pass + + if not title: + try: + soup_title = BeautifulSoup(html_text[:5000], 'lxml') + if soup_title.title and soup_title.title.string: + title = soup_title.title.string.strip() + except Exception: + pass + + # === Title mode: return just the title === + if fetch_mode == 'title': + return make_result(final_url, title, '', 0, status_code, status_text) + + # === Full mode: extract content === content_md = '' - extraction_method = 'static' + min_content_length = 200 try: - # Try trafilatura for main content extraction content_md = trafilatura.extract( content_bytes, url=final_url, @@ -265,42 +303,27 @@ def _make_redirect(original_url, redirect_url): include_tables=True, output_format='markdown' ) or '' - - # Try to get title from metadata - try: - meta = trafilatura.metadata.extract_metadata(content_bytes, url=final_url) - if meta and getattr(meta, 'title', None): - title = meta.title.strip() - except Exception: - pass - except Exception: pass - # Fallback to BeautifulSoup if trafilatura fails + # Fallback to BeautifulSoup if not content_md or len(content_md) < min_content_length: - soup = BeautifulSoup(html_text, 'lxml') - - # Get title - if not title and soup.title and soup.title.string: - title = soup.title.string.strip() + try: + soup = BeautifulSoup(html_text, 'lxml') - # Remove script/style elements - for tag in soup(['script', 'style', 'noscript', 'nav', 'footer', 'header']): - tag.decompose() + for tag in soup(['script', 'style', 'noscript', 'nav', 'footer', 'header']): + tag.decompose() - # Get text content - text = soup.get_text('\n') - # Clean up whitespace - text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) - bs_content = text.strip() + text = soup.get_text('\n') + text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) + bs_content = text.strip() - # Use BeautifulSoup content if better than trafilatura - if len(bs_content) > len(content_md or ''): - content_md = bs_content + if len(bs_content) > len(content_md or ''): + content_md = bs_content + except Exception: + pass - # === TIER 2: Jina Reader API Fallback === - # Use Jina if static extraction got insufficient content + # === Jina Reader API Fallback === if use_jina_fallback and (not content_md or len(content_md) < min_content_length): try: jina_url = f"https://r.jina.ai/{url}" @@ -312,62 +335,74 @@ def _make_redirect(original_url, redirect_url): if jina_response.status_code == 200: jina_content = jina_response.text.strip() - - # Jina returns markdown with title as first line if jina_content and len(jina_content) > min_content_length: content_md = jina_content - extraction_method = 'jina' - - # Extract title from Jina response (usually first # heading) - title_match = re.match(r'^#\s*(.+?)[\n\r]', jina_content) - if title_match and not title: - title = title_match.group(1).strip() + if not title: + title_match = re.match(r'^#\s*(.+?)[\n\r]', jina_content) + if title_match: + title = title_match.group(1).strip() except Exception: - # Jina fallback failed, continue with whatever we have pass - # === Content Quality Check === - # Clean and validate content + # === Clean content === if content_md: - # Remove excessive whitespace content_md = re.sub(r'\n{4,}', '\n\n\n', content_md) content_md = content_md.strip() - # Check if truncation is needed + if not content_md: + return make_result( + final_url, title, '', 0, status_code, status_text, + message='No content could be extracted. Site may require JavaScript rendering — use browser tools (Playwright) instead.' + ) + + total_content_length = len(content_md) + + # === Truncation + file save === was_truncated = False - if len(content_md) > max_content_length: - content_md = content_md[:max_content_length] - # Try to truncate at a sentence boundary - last_period = content_md.rfind('.') + content_file = None + + if not unlimited and total_content_length > max_content_length: + content_file = save_content_file(content_md, final_url, session_id) + + truncated = content_md[:max_content_length] + last_period = truncated.rfind('.') if last_period > max_content_length * 0.8: - content_md = content_md[:last_period + 1] - content_md += '\n\n[Content truncated due to length...]' + truncated = truncated[:last_period + 1] + content_md = truncated was_truncated = True - # Build result with extraction method info + # === Build message === message = '' - if not content_md or len(content_md) < min_content_length: - message = 'Warning: Extracted content may be incomplete. Site may require JavaScript rendering or authentication.' - - return { - 'status': 'success', - 'url': url, - 'final_url': final_url, - 'title': title or '', - 'content': content_md, - 'content_length': len(content_md), - 'was_truncated': was_truncated, - 'prompt_used': prompt or '', - 'message': message, - 'extraction_method': extraction_method - } + if was_truncated: + message = ( + f'Content truncated to {len(content_md)} chars. ' + f'Full content ({total_content_length} chars) saved to content_file. ' + f'Use grep_files(pattern, path=content_file) to search for specific info, ' + f'or read_file(file_path=content_file, offset=N, limit=M) to paginate.' + ) + + return make_result( + final_url, title, content_md, total_content_length, + status_code, status_text, + was_truncated=was_truncated, content_file=content_file, + message=message + ) - except requests.exceptions.Timeout: - return _make_error(f'Request timed out after {timeout} seconds.', url) - except requests.exceptions.ConnectionError as e: - return _make_error(f'Connection error: {str(e)}', url) - except requests.exceptions.HTTPError as e: - return _make_error(f'HTTP error: {str(e)}', url) except Exception as e: - return _make_error(f'Unexpected error: {str(e)}', url) + sc, st = 0, '' + if hasattr(e, 'response') and e.response is not None: + sc = e.response.status_code + st = e.response.reason or '' + + error_type = type(e).__name__ + if 'Timeout' in error_type: + msg = f'Request timed out after {timeout} seconds.' + elif 'ConnectionError' in error_type: + msg = f'Connection error: {str(e)}' + elif 'HTTPError' in error_type: + msg = f'HTTP error: {str(e)}' + else: + msg = f'Fetch failed: {str(e)}' + + return make_error(msg, url, status_code=sc, status_text=st) diff --git a/app/data/agent_file_system_template/AGENT.md b/app/data/agent_file_system_template/AGENT.md index 426f8b5d..910a24a6 100644 --- a/app/data/agent_file_system_template/AGENT.md +++ b/app/data/agent_file_system_template/AGENT.md @@ -26,7 +26,7 @@ Efficient File Reading: File Actions: - read_file: General reading with pagination (offset/limit) -- grep_files: Search for keywords, returns matching chunks with line numbers +- grep_files: Search files/directories for regex patterns with three output modes: 'files_with_matches' (discover files), 'content' (matching lines with line numbers), 'count' (match counts). Supports glob/file_type filtering, before/after context lines, case_insensitive, and multiline. - stream_read + stream_edit: Use together for file modifications Avoid: Reading entire large files repeatedly - use grep + targeted offset/limit reads instead diff --git a/app/data/agent_file_system_template/USER.md b/app/data/agent_file_system_template/USER.md index 74b1af08..e8bfb558 100644 --- a/app/data/agent_file_system_template/USER.md +++ b/app/data/agent_file_system_template/USER.md @@ -10,14 +10,15 @@ - **Language:** en - **Preferred Tone:** (Ask the users for info) - **Response Style:** (Ask the users for info) +- **Preferred Messaging Platform:** (Ask the users for info) ## Agent Interaction - **Prefer Proactive Assistance:** (Ask the users for info) - **Approval Required For:** (Ask the users for info) ## Life Goals -- **Goals:** (Ask the users for info) -- **Help Wanted:** (Ask the users for info) + +(Ask the users for info) ## Personality diff --git a/app/data/agent_profile/default.png b/app/data/agent_profile/default.png new file mode 100644 index 00000000..bc994c75 Binary files /dev/null and b/app/data/agent_profile/default.png differ diff --git a/app/external_comms/credentials.py b/app/external_comms/credentials.py index 3e6808cc..b1b23e78 100644 --- a/app/external_comms/credentials.py +++ b/app/external_comms/credentials.py @@ -10,6 +10,8 @@ import json import logging +import os +import stat from dataclasses import asdict, fields from pathlib import Path from typing import Optional, Type, TypeVar @@ -32,6 +34,11 @@ def _get_credentials_dir() -> Path: from app.config import PROJECT_ROOT _credentials_dir = PROJECT_ROOT / ".credentials" _credentials_dir.mkdir(parents=True, exist_ok=True) + # Restrict directory permissions to owner only (rwx------) + try: + os.chmod(_credentials_dir, stat.S_IRWXU) + except OSError: + pass # Best-effort on platforms that don't support chmod (e.g. Windows) return _credentials_dir @@ -78,6 +85,11 @@ def save_credential(filename: str, credential) -> None: try: with open(path, "w", encoding="utf-8") as f: json.dump(asdict(credential), f, indent=2, default=str) + # Restrict file permissions to owner read/write only (rw-------) + try: + os.chmod(path, stat.S_IRUSR | stat.S_IWUSR) + except OSError: + pass # Best-effort on platforms that don't support chmod logger.info(f"Saved credential: {filename}") except Exception as e: logger.error(f"Failed to save credential {filename}: {e}") diff --git a/app/gui/gui_module.py b/app/gui/gui_module.py index 6d53c583..24374600 100644 --- a/app/gui/gui_module.py +++ b/app/gui/gui_module.py @@ -786,13 +786,15 @@ async def _check_agent_limits(self) -> bool: token_count: int = agent_properties.get("token_count", 0) max_tokens: int = agent_properties.get("max_tokens_per_task", 0) - # Check action limits + # Check action limits - returns False to switch to CLI mode, + # where the agent_base's _check_agent_limits will handle the + # pause-and-ask flow with user options. if (action_count / max_actions) >= 1.0: return False # Check token limits if (token_count / max_tokens) >= 1.0: return False - + # No limits close or reached return True \ No newline at end of file diff --git a/app/internal_action_interface.py b/app/internal_action_interface.py index a1486f1b..05a98b27 100644 --- a/app/internal_action_interface.py +++ b/app/internal_action_interface.py @@ -149,23 +149,48 @@ def describe_screen(cls) -> Dict[str, str]: description = cls.describe_image(img_path) return {"description": description, "file_path": img_path} + @staticmethod + def _resolve_outbound_platform( + platform: Optional[str], + session_id: Optional[str], + ) -> str: + """Decide which platform an outbound message should be routed to. + + Resolution order: + 1. Explicit `platform` argument if provided. + 2. `source_platform` on the task identified by `session_id`. + 3. User's Preferred Messaging Platform from USER.md (which itself + falls back to "CraftBot Interface" when unset). + """ + if platform: + return platform + if session_id and InternalActionInterface.task_manager is not None: + task = InternalActionInterface.task_manager.get_task_by_id(session_id) + if task and task.source_platform: + return task.source_platform + from app.onboarding.profile_writer import read_preferred_messaging_platform + return read_preferred_messaging_platform() + @staticmethod async def do_chat( message: str, - platform: str = "CraftBot TUI", + platform: Optional[str] = None, session_id: Optional[str] = None, ) -> None: """Record an agent-authored chat message to the event stream. Args: message: The message content to record. - platform: The platform the message is sent to (default: "CraftBot TUI"). + platform: Optional platform override. If omitted, the task's + source_platform (looked up via session_id) is used, falling + back to "CraftBot Interface". session_id: Optional task/session ID for multi-task isolation. """ if InternalActionInterface.state_manager is None: raise RuntimeError("InternalActionInterface not initialized with StateManager.") + resolved_platform = InternalActionInterface._resolve_outbound_platform(platform, session_id) InternalActionInterface.state_manager.record_agent_message( - message, session_id=session_id, platform=platform + message, session_id=session_id, platform=resolved_platform ) @staticmethod @@ -238,9 +263,11 @@ async def do_chat_with_attachments( raise RuntimeError("InternalActionInterface not initialized with StateManager.") attachment_notes = "\n".join([f"[Attachment: {fp}]" for fp in file_paths]) + resolved_platform = InternalActionInterface._resolve_outbound_platform(None, session_id) InternalActionInterface.state_manager.record_agent_message( f"{message}\n\n{attachment_notes}", session_id=session_id, + platform=resolved_platform, ) # For non-browser adapters, we can't verify files exist, so assume success return {"success": True, "files_sent": len(file_paths), "errors": None} @@ -302,6 +329,7 @@ async def do_create_task( session_id: Optional[str] = None, original_query: Optional[str] = None, original_platform: Optional[str] = None, + pre_selected_skills: Optional[List[str]] = None, ) -> Dict[str, Any]: """ Create a new task with automatic skill and action set selection. @@ -319,6 +347,9 @@ async def do_create_task( event stream before the task_start event. original_platform: Optional platform where the original message came from (e.g., "CraftBot TUI", "Telegram", "Whatsapp"). + pre_selected_skills: Optional list of skill names to use directly, + bypassing LLM skill selection. Used when skills are + invoked explicitly via slash commands (e.g., /pdf). Returns: Dictionary with task_id, action_sets, action_count, and selected_skills. @@ -330,12 +361,27 @@ async def do_create_task( # Each task's stream is created when the task starts and cleaned up when the task ends. # Stream lifecycle is managed by TaskManager via on_stream_create/on_stream_remove hooks. - # Select skills and action sets in a single LLM call (optimized) - # Skills are selected first, then action sets with knowledge of skill recommendations - selected_skills, all_action_sets = await cls._select_skills_and_action_sets_via_llm( - task_name, task_description, source_platform=original_platform - ) - logger.info(f"[TASK] Auto-selected skills for '{task_name}': {selected_skills}") + if pre_selected_skills: + # Skills explicitly selected via slash command — skip LLM skill selection + # but still select action sets (including skill-recommended ones) + selected_skills = pre_selected_skills + # Get action sets recommended by pre-selected skills + from agent_core.core.impl.skill.manager import skill_manager + from app.action.action_set import action_set_manager + + skill_action_sets = skill_manager.get_skill_action_sets(selected_skills) + # Also run LLM action set selection for additional sets needed + llm_action_sets = await cls._select_action_sets_via_llm(task_name, task_description) + # Merge: skill-recommended + LLM-selected (deduplicated) + all_action_sets = list(dict.fromkeys(skill_action_sets + llm_action_sets)) + logger.info(f"[TASK] Pre-selected skills (via command): {selected_skills}") + else: + # Select skills and action sets in a single LLM call (optimized) + # Skills are selected first, then action sets with knowledge of skill recommendations + selected_skills, all_action_sets = await cls._select_skills_and_action_sets_via_llm( + task_name, task_description, source_platform=original_platform + ) + logger.info(f"[TASK] Auto-selected skills for '{task_name}': {selected_skills}") logger.info(f"[TASK] Final action sets: {all_action_sets}") # Create task with selected skills and action sets @@ -982,3 +1028,77 @@ def list_action_sets(cls) -> Dict[str, Any]: "available_sets": available_sets, "current_sets": current_sets, } + + @classmethod + def list_skills(cls) -> Dict[str, Any]: + """ + List all enabled skills with their names and descriptions. + + Returns: + Dictionary with skill names mapped to descriptions. + """ + from agent_core.core.impl.skill.manager import skill_manager + + skills = skill_manager.list_skills_for_selection() + return {"skills": skills} + + @classmethod + def use_skill(cls, skill_name: str) -> Dict[str, Any]: + """ + Activate a skill for the current task, replacing the current skill + in the system prompt. Invalidates and re-creates LLM session caches + so the updated system prompt takes effect. + + Args: + skill_name: Name of the skill to activate. + + Returns: + Dictionary with success status and skill details. + """ + if cls.task_manager is None: + raise RuntimeError("InternalActionInterface not initialized with TaskManager.") + + from agent_core.core.impl.skill.manager import skill_manager + + # Validate skill exists and is enabled + skill = skill_manager.get_skill(skill_name) + if not skill: + return {"success": False, "error": f"Skill '{skill_name}' not found."} + if not skill.enabled: + return {"success": False, "error": f"Skill '{skill_name}' is not enabled."} + + # Get current task and save previous skills + task = cls.task_manager.get_task() + if not task: + return {"success": False, "error": "No active task."} + + previous_skills = list(task.selected_skills) + + # Replace selected skills + task.selected_skills = [skill_name] + + # Add skill-recommended action sets (if any new ones) + added_action_sets = [] + recommended_sets = skill_manager.get_skill_action_sets([skill_name]) + if recommended_sets: + current_sets = set(task.action_sets) + new_sets = [s for s in recommended_sets if s not in current_sets] + if new_sets: + cls.add_action_sets(new_sets) # This also invalidates caches + added_action_sets = new_sets + else: + # No new action sets but system prompt still changed — invalidate caches + cls._invalidate_action_selection_caches() + else: + # No recommended sets — still need to invalidate for skill change + cls._invalidate_action_selection_caches() + + logger.info(f"[SKILL] Activated skill '{skill_name}' (replaced: {previous_skills})") + + return { + "success": True, + "active_skill": skill_name, + "skill_description": skill.description, + "previous_skills": previous_skills, + "added_action_sets": added_action_sets, + } diff --git a/app/main.py b/app/main.py index ce4e5dd4..50f2c83b 100644 --- a/app/main.py +++ b/app/main.py @@ -56,6 +56,7 @@ def _suppress_console_logging_early() -> None: import argparse import asyncio import sys +import pathlib # Register agent_core state provider and config before importing AgentBase # This ensures shared code can access state via get_state() @@ -64,10 +65,12 @@ def _suppress_console_logging_early() -> None: # CraftBot uses global STATE singleton - always available StateRegistry.register(lambda: STATE) -ConfigRegistry.register_workspace_root(".") +ConfigRegistry.register_workspace_root( + str(pathlib.Path(__file__).parent.parent.resolve()) +) # Import settings reader (reads directly from settings.json) -from app.config import get_llm_provider, get_api_key, get_base_url, get_llm_model +from app.config import get_llm_provider, get_vlm_provider, get_api_key, get_base_url, get_llm_model, get_vlm_model from app.agent_base import AgentBase @@ -110,11 +113,11 @@ def _parse_cli_args() -> dict: return vars(args) -def _initial_settings() -> tuple[str, str, str, bool]: +def _initial_settings() -> tuple: """Determine initial provider, API key, and base URL from settings.json. Returns: - Tuple of (provider, api_key, base_url, has_valid_key) where has_valid_key + Tuple of (provider, api_key, base_url, model, vlm_provider, vlm_model, has_valid_key) where has_valid_key indicates if a working API key was found. """ # Read directly from settings.json @@ -122,11 +125,13 @@ def _initial_settings() -> tuple[str, str, str, bool]: api_key = get_api_key(provider) base_url = get_base_url(provider) model = get_llm_model() # None → use registry default for the provider + vlm_prov = get_vlm_provider() + vlm_mod = get_vlm_model() # Remote (Ollama) doesn't require API key has_key = bool(api_key) or provider == "remote" - return provider, api_key, base_url, model, has_key + return provider, api_key, base_url, model, vlm_prov, vlm_mod, has_key async def main_async() -> None: @@ -136,7 +141,7 @@ async def main_async() -> None: browser_mode = cli_args.get("browser", False) # Get settings from settings.json - provider, api_key, base_url, model, has_valid_key = _initial_settings() + provider, api_key, base_url, model, vlm_prov, vlm_mod, has_valid_key = _initial_settings() # CLI args override settings.json if provided if cli_args.get("provider"): @@ -159,6 +164,8 @@ async def main_async() -> None: llm_api_key=api_key, llm_base_url=base_url, llm_model=model, + vlm_provider=vlm_prov, + vlm_model=vlm_mod, deferred_init=not has_valid_key, ) diff --git a/app/onboarding/interfaces/steps.py b/app/onboarding/interfaces/steps.py index e8899440..40cdd035 100644 --- a/app/onboarding/interfaces/steps.py +++ b/app/onboarding/interfaces/steps.py @@ -24,6 +24,17 @@ class StepOption: requires_setup: bool = False # Whether this option requires additional setup (API key, etc.) +@dataclass +class FormField: + """A field in a multi-field form step (e.g., User Profile).""" + name: str # Field key (e.g., "user_name") + label: str # Display label + field_type: str # "text", "select", "multi_checkbox" + options: List["StepOption"] = field(default_factory=list) # For select/checkbox types + default: Any = "" # Default value + placeholder: str = "" # Hint text + + @dataclass class StepResult: """Result of completing an onboarding step.""" @@ -198,22 +209,256 @@ def get_env_var_name(self) -> Optional[str]: class AgentNameStep: - """Agent name configuration step.""" + """Agent name + profile picture configuration step.""" name = "agent_name" - title = "Agent Name" - description = "Give your agent a name (optional)" + title = "Agent Identity" + description = "Give your agent a name and an optional avatar." required = False + ALLOWED_PICTURE_EXTS = {"png", "jpg", "jpeg", "webp", "gif"} + + def get_form_fields(self) -> List[FormField]: + return [ + FormField( + name="agent_name", + label="Agent Name", + field_type="text", + default="CraftBot", + placeholder="Enter a name", + ), + FormField( + name="agent_profile_picture", + label="Avatar", + field_type="image_upload", + default="", + placeholder="", + ), + ] + + def get_options(self) -> List[StepOption]: + return [] + + def validate(self, value: Any) -> tuple[bool, Optional[str]]: + # Accept legacy string submissions (plain text name) for backward compat. + if isinstance(value, str): + return True, None + if isinstance(value, dict): + picture = value.get("agent_profile_picture") + if picture not in (None, ""): + if not isinstance(picture, str) or picture.lower() not in self.ALLOWED_PICTURE_EXTS: + return False, "Unsupported avatar format" + return True, None + return False, "Invalid agent identity submission" + + def get_default(self) -> Dict[str, Any]: + return { + "agent_name": "CraftBot", + "agent_profile_picture": "", + } + + +class UserProfileStep: + """User profile form step — collects identity and preferences in a compact form.""" + + name = "user_profile" + title = "User Profile" + description = "Tell us about yourself to personalize your experience." + required = False + + TONE_OPTIONS = [ + ("casual", "Casual"), + ("formal", "Formal"), + ("friendly", "Friendly"), + ("professional", "Professional"), + ] + + PROACTIVITY_OPTIONS = [ + ("low", "Low", "Wait for instructions"), + ("medium", "Medium", "Suggest when relevant"), + ("high", "High", "Proactively suggest things"), + ] + + APPROVAL_OPTIONS = [ + ("messages", "Messages", "Sending messages on your behalf"), + ("scheduling", "Scheduling", "Creating/modifying schedules"), + ("file_changes", "File Changes", "Modifying files on your system"), + ("purchases", "Purchases", "Making purchases or payments"), + ("all", "All Actions", "Ask approval for everything"), + ] + + PLATFORM_OPTIONS = [ + ("telegram", "Telegram"), + ("whatsapp", "WhatsApp"), + ("discord", "Discord"), + ("slack", "Slack"), + ("tui", "CraftBot Interface"), + ] + + @staticmethod + def fetch_geolocation() -> str: + """Fetch user's location from IP. Returns 'City, Country' or '' on failure.""" + try: + import requests + resp = requests.get("http://ip-api.com/json", timeout=3) + if resp.status_code == 200: + data = resp.json() + city = data.get("city", "") + country = data.get("country", "") + if city and country: + return f"{city}, {country}" + return country or city or "" + except Exception: + pass + return "" + + @staticmethod + def get_language_options() -> List[StepOption]: + """Get a dynamic list of languages using babel. Pre-select based on OS locale.""" + try: + from babel import Locale + import locale as _locale + + # Get OS locale for pre-selection + try: + os_locale = _locale.getdefaultlocale()[0] or "en_US" + os_lang = os_locale.split("_")[0] + except Exception: + os_lang = "en" + + # Get all language display names from babel (in English) + lang_names = Locale("en").languages + + # Filter to commonly-used languages (those with 2-letter ISO codes) + # and sort by display name + seen = set() + options = [] + for code, display_name in sorted(lang_names.items(), key=lambda x: x[1]): + # Only include 2-letter codes (ISO 639-1) to keep list manageable + if len(code) == 2 and code not in seen: + seen.add(code) + options.append(StepOption( + value=code, + label=display_name, + description=code, + default=(code == os_lang), + )) + return options + except ImportError: + # Fallback if babel not installed — return a minimal list + return [ + StepOption(value="en", label="English", description="en", default=True), + StepOption(value="zh", label="Chinese", description="zh"), + StepOption(value="es", label="Spanish", description="es"), + StepOption(value="fr", label="French", description="fr"), + StepOption(value="de", label="German", description="de"), + StepOption(value="ja", label="Japanese", description="ja"), + StepOption(value="ko", label="Korean", description="ko"), + StepOption(value="pt", label="Portuguese", description="pt"), + StepOption(value="ru", label="Russian", description="ru"), + StepOption(value="ar", label="Arabic", description="ar"), + ] + + def get_form_fields(self) -> List[FormField]: + """Return all form fields for the user profile step.""" + # Fetch defaults + try: + location_default = self.fetch_geolocation() + except Exception: + location_default = "" + + language_options = self.get_language_options() + + # Find pre-selected language + lang_default = "en" + for opt in language_options: + if opt.default: + lang_default = opt.value + break + + return [ + FormField( + name="user_name", + label="Your Name", + field_type="text", + placeholder="What should we call you?", + default="", + ), + FormField( + name="location", + label="Location", + field_type="text", + placeholder="City, Country", + default=location_default, + ), + FormField( + name="language", + label="CraftBot's Language", + field_type="select", + options=language_options, + default=lang_default, + placeholder="The language CraftBot will communicate in (not the interface language)", + ), + FormField( + name="tone", + label="Communication Tone", + field_type="select", + options=[ + StepOption(value=val, label=label, default=(val == "casual")) + for val, label in self.TONE_OPTIONS + ], + default="casual", + ), + FormField( + name="proactivity", + label="Proactive Level", + field_type="select", + options=[ + StepOption(value=val, label=label, description=desc, default=(val == "medium")) + for val, label, desc in self.PROACTIVITY_OPTIONS + ], + default="medium", + ), + FormField( + name="approval", + label="Require Approval For", + field_type="multi_checkbox", + options=[ + StepOption(value=val, label=label, description=desc) + for val, label, desc in self.APPROVAL_OPTIONS + ], + default=[], + ), + FormField( + name="messaging_platform", + label="Preferred Notification Platform", + field_type="select", + options=[ + StepOption(value=val, label=label, default=(val == "tui")) + for val, label in self.PLATFORM_OPTIONS + ], + default="tui", + ), + ] + def get_options(self) -> List[StepOption]: + # Not a single-select step — form fields are used instead return [] def validate(self, value: Any) -> tuple[bool, Optional[str]]: - # Optional, any string is valid + """Validate the form data dict. All fields are optional.""" + if not isinstance(value, dict): + return False, "Expected a dictionary of form values" + # Validate approval is a list if present + approval = value.get("approval") + if approval is not None and not isinstance(approval, list): + return False, "Approval settings must be a list" return True, None - def get_default(self) -> str: - return "CraftBot" + def get_default(self) -> Dict[str, Any]: + """Return defaults for all fields.""" + fields = self.get_form_fields() + return {f.name: f.default for f in fields} class MCPStep: @@ -343,6 +588,7 @@ def get_default(self) -> List[str]: ProviderStep, ApiKeyStep, AgentNameStep, + UserProfileStep, MCPStep, SkillsStep, ] diff --git a/app/onboarding/profile_writer.py b/app/onboarding/profile_writer.py new file mode 100644 index 00000000..2d5a5b6b --- /dev/null +++ b/app/onboarding/profile_writer.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +""" +Shared utility to write user profile data to USER.md. + +Used by all onboarding completion handlers (TUI, CLI, Browser controller) +to populate USER.md with data collected during hard onboarding. +""" + +import re +from typing import Any, Dict +import shutil + +from app.logger import logger + + +def write_profile_to_user_md(profile_data: Dict[str, Any]) -> bool: + """ + Write user profile data collected during hard onboarding to USER.md. + + Updates Identity, Communication Preferences, and Agent Interaction + sections. Infers timezone from location using tzlocal. + + Args: + profile_data: Dict with keys: user_name, location, language, + tone, proactivity, approval, messaging_platform + + Returns: + True if successfully written, False otherwise. + """ + if not profile_data: + return False + + try: + from app.config import AGENT_FILE_SYSTEM_PATH, AGENT_FILE_SYSTEM_TEMPLATE_PATH + + filename: str = "USER.md" + user_md_path = AGENT_FILE_SYSTEM_PATH / filename + if not user_md_path.exists(): + # Try to copy from template + template_path = AGENT_FILE_SYSTEM_TEMPLATE_PATH / filename + if template_path.exists(): + shutil.copy(template_path, user_md_path) + else: + logger.warning("[PROFILE] USER.md not found, skipping profile write") + return False + + content = user_md_path.read_text(encoding="utf-8") + + user_name = profile_data.get("user_name", "").strip() + location = profile_data.get("location", "").strip() + language = profile_data.get("language", "").strip() + tone = profile_data.get("tone", "").strip() + proactivity = profile_data.get("proactivity", "").strip() + approval = profile_data.get("approval", []) + messaging_platform = profile_data.get("messaging_platform", "").strip() + + # Infer timezone from system + timezone_str = _infer_timezone() + + # --- Identity section --- + if user_name: + content = _replace_field(content, "Full Name", user_name) + content = _replace_field(content, "Preferred Name", user_name) + + if location: + content = _replace_field(content, "Location", location) + + if timezone_str: + content = _replace_field(content, "Timezone", timezone_str) + + # --- Communication Preferences section --- + if language: + content = _replace_field(content, "Language", language) + + if tone: + content = _replace_field(content, "Preferred Tone", tone) + + if messaging_platform: + content = _replace_field(content, "Preferred Messaging Platform", messaging_platform) + + # --- Agent Interaction section --- + if proactivity: + content = _replace_field(content, "Prefer Proactive Assistance", proactivity) + + if isinstance(approval, list) and approval: + approval_str = _format_approval(approval) + content = _replace_field(content, "Approval Required For", approval_str) + + user_md_path.write_text(content, encoding="utf-8") + logger.info("[PROFILE] Successfully wrote user profile to USER.md") + return True + + except Exception as e: + logger.error(f"[PROFILE] Failed to write profile to USER.md: {e}") + return False + + +def _replace_field(content: str, field_name: str, value: str) -> str: + """Replace a markdown bold field value in USER.md. + + Matches patterns like: - **Field Name:** + """ + pattern = rf'(\*\*{re.escape(field_name)}:\*\*\s*).*' + replacement = rf'\1{value}' + return re.sub(pattern, replacement, content) + + +APPROVAL_DESCRIPTIONS = { + "messages": "Ask before sending messages or notifications on user's behalf", + "scheduling": "Ask before creating, modifying, or deleting schedules and calendar events", + "file_changes": "Ask before creating, modifying, or deleting files on the user's system", + "purchases": "Ask before making any purchases, payments, or financial transactions", + "all": "Ask for explicit approval before taking any action", +} + + +def _format_approval(approval: list) -> str: + """Convert approval keys to descriptive sentences for the agent.""" + if "all" in approval: + return APPROVAL_DESCRIPTIONS["all"] + descriptions = [APPROVAL_DESCRIPTIONS.get(key, key) for key in approval] + return "; ".join(descriptions) + + +def _infer_timezone() -> str: + """Infer timezone from system using tzlocal.""" + try: + from tzlocal import get_localzone + tz = get_localzone() + return str(tz) + except Exception: + return "" + + +# Mirrors PLATFORM_OPTIONS in app/onboarding/interfaces/steps.py — the value +# written to USER.md is the stored key (e.g. "tui"); callers need the runtime +# display string (e.g. "CraftBot Interface"). +DEFAULT_PREFERRED_PLATFORM = "CraftBot Interface" +_PLATFORM_NORMALIZATION = { + "tui": "CraftBot Interface", + "craftbot interface": "CraftBot Interface", + "telegram": "Telegram", + "whatsapp": "WhatsApp", + "discord": "Discord", + "slack": "Slack", +} + + +def read_preferred_messaging_platform() -> str: + """Return the user's Preferred Messaging Platform from USER.md. + + Defaults to "CraftBot Interface" when the field is missing, empty, or still + holds the placeholder text — so callers can rely on a usable value. + """ + try: + from app.config import AGENT_FILE_SYSTEM_PATH + + user_md_path = AGENT_FILE_SYSTEM_PATH / "USER.md" + if not user_md_path.exists(): + return DEFAULT_PREFERRED_PLATFORM + + content = user_md_path.read_text(encoding="utf-8") + match = re.search(r'\*\*Preferred Messaging Platform:\*\*\s*(.*)', content) + if not match: + return DEFAULT_PREFERRED_PLATFORM + + value = match.group(1).strip() + if not value or value.startswith("(Ask the users"): + return DEFAULT_PREFERRED_PLATFORM + + return _PLATFORM_NORMALIZATION.get(value.lower(), value) + except Exception as e: + logger.warning(f"[PROFILE] Failed to read preferred platform from USER.md: {e}") + return DEFAULT_PREFERRED_PLATFORM diff --git a/app/onboarding/soft/task_creator.py b/app/onboarding/soft/task_creator.py index b7d36468..ab7f4171 100644 --- a/app/onboarding/soft/task_creator.py +++ b/app/onboarding/soft/task_creator.py @@ -15,63 +15,49 @@ SOFT_ONBOARDING_TASK_INSTRUCTION = """ -Conduct a friendly conversational interview to learn about the user. - -Your goal is to gather information to personalize the agent experience efficiently. -Ask MULTIPLE related questions together to reduce back-and-forth turns. - -INTERVIEW FLOW (4 batches): - -1. Warm Introduction + Identity Questions -Start with a friendly greeting and ask the first batch using a numbered list: - - What should I call you? - - What do you do for work? - - Where are you based? - (Infer timezone from their location, keep this silent) - - Example opening: - > "Hi there! I'm excited to be your new AI assistant. To personalize your experience, let me ask a few quick questions: - > 1. What should I call you? - > 2. What do you do for work? - > 3. Where are you based?" - -2. Preference Questions (Combined) - - What language do you prefer me to communicate in? - - Do you prefer casual or formal communication? - - Should I proactively suggest things or wait for instructions? - - What types of actions should I ask your approval for? - -3. Messaging Platform - - Which messaging platform should I use for notifications? (Telegram/WhatsApp/Discord/Slack/CraftBot Interface only) - -4. Life Goals & Assistance - - What are your life goals or aspirations? - - What would you like me to help you with generally? - -Refer to the "user-profile-interview" skill for questions and style. - -IMPORTANT GUIDELINES: -- Ask related questions together using a numbered list format -- Be warm and conversational, not robotic -- Acknowledge their answers before the next batch -- Infer timezone from location (e.g., San Francisco = Pacific Time) -- The life goals question is most important, ask multiple questions if necessary or goal unclear. Guide them to answer this question. Skip if user has no life or goal. -- If user is annoyed by this interview or refuse to answer, just skip, and end task. - -After gathering ALL information: -1. Tell the user to wait a moment while you update their preference -2. Read agent_file_system/USER.md -3. Update USER.md with the collected information using stream_edit (including Language in Communication Preferences and Life Goals section) -4. Suggest tasks based on life goals: Send a message suggesting 1-3 tasks that CraftBot can help with to improve their life and get closer to achieving their goals. Focus on: - - Tasks that leverage CraftBot's automation capabilities - - Recurring tasks that save time in the long run - - Immediate tasks that can show impact in short-term - - Bite-size tasks that is specialized, be specific with numbers or actionable items. DO NOT suggest generic task. - - Avoid giving mutliple approaches in each suggested task, provide the BEST option to achieve goal. - - Tasks that align with their work and personal aspirations -5. End the task immediately with task_end (do NOT wait for confirmation) - -Start with: "Hi! I'm excited to be your AI assistant. To personalize your experience, let me ask a few quick questions:" then list the first batch. +Conduct a natural conversation with the user to understand their work and life goals. + +The user already provided their name, location, language, communication tone, proactivity, +approval settings, and notification platform during setup. These are saved in +agent_file_system/USER.md. Read it first so you know who you're talking to. +Do not re-ask any of that. + +Never use scripted or static phrases. Rephrase everything naturally each time. +Match the user's energy and style. + +Phase 1: Greeting + Job/Role +Read agent_file_system/USER.md to get the user's name. Greet them by name in your own words. +Ask about their work and what a typical day looks like. +Acknowledge their answer before moving on. + +Phase 2: Life Goals Exploration +Ask about their goals and aspirations in your own words. +Follow up on the goal they mention to understand timelines, obstacles, what success looks like. +If the user is engaged, continue exploring what else they're working toward, habits they want +to build, skills they want to develop, what would make their day-to-day easier. +If the user is brief or disengaged, wrap up gracefully. Do not push for more question. Move on to phase 3. +If the user has no goals or refuses, respect that and move on to phase 3. + +Phase 3: How CraftBot Helps + Task Suggestions +In one message, explain how CraftBot can help them based on what you learned, and suggest +1-3 specific tasks. Each suggestion must say exactly what you will do and what the +deliverable is. Do not describe generic tasks — describe actions with concrete outputs. +At least one suggestion must be something you can execute immediately after this conversation +and deliver a tangible result. +Bad example: "Research synthesis - I can summarize AGI papers" +Good example: "I'll research the top 5 AGI breakthroughs this month and send you a summary now." + +After the conversation: +1. Tell the user to wait a moment while you update your knowledge about them. +2. Read agent_file_system/USER.md using read_file. +3. Update USER.md using stream_edit: + - Update the Job field + - Write their goals as free-form text under Life Goals + - Write personality observations under Personality + - Do not overwrite name, location, language, tone, proactivity, approval, or messaging platform +4. Update agent_file_system/AGENT.md if user provided a name for the agent. +5. Send your explanation of how CraftBot can help and your task suggestions. +6. End the task with task_end. Do not wait for confirmation. """ diff --git a/app/security/prompt_sanitizer.py b/app/security/prompt_sanitizer.py index 3dba8ced..83e662cb 100644 --- a/app/security/prompt_sanitizer.py +++ b/app/security/prompt_sanitizer.py @@ -76,7 +76,7 @@ def sanitize_user_message(text: str, max_length: int = 5000) -> str: f"[SECURITY] Potential prompt injection detected. " f"Text: {text[:100]}... Patterns: {suspicious_patterns[:2]}" ) - + return text @staticmethod diff --git a/app/tui/onboarding/hard_onboarding.py b/app/tui/onboarding/hard_onboarding.py index b7a719d6..0e55e6a0 100644 --- a/app/tui/onboarding/hard_onboarding.py +++ b/app/tui/onboarding/hard_onboarding.py @@ -10,6 +10,7 @@ ProviderStep, ApiKeyStep, AgentNameStep, + UserProfileStep, MCPStep, SkillsStep, ) @@ -43,6 +44,7 @@ def __init__(self, app: "CraftApp"): ProviderStep(), None, # ApiKeyStep - created dynamically based on provider AgentNameStep(), + UserProfileStep(), MCPStep(), SkillsStep(), ] @@ -122,9 +124,16 @@ def on_complete(self, cancelled: bool = False) -> None: self._app._interface._agent.llm.reinitialize(provider) logger.info(f"[ONBOARDING] Reinitialized LLM with provider: {provider}") + # Write user profile data to USER.md + profile_data = self._collected_data.get("user_profile", {}) + if profile_data: + from app.onboarding.profile_writer import write_profile_to_user_md + write_profile_to_user_md(profile_data) + # Mark hard onboarding as complete agent_name = self._collected_data.get("agent_name", "Agent") - onboarding_manager.mark_hard_complete(agent_name=agent_name) + user_name = profile_data.get("user_name") if profile_data else None + onboarding_manager.mark_hard_complete(user_name=user_name, agent_name=agent_name) logger.info("[ONBOARDING] Hard onboarding completed successfully") diff --git a/app/tui/onboarding/widgets.py b/app/tui/onboarding/widgets.py index e161abad..44116a68 100644 --- a/app/tui/onboarding/widgets.py +++ b/app/tui/onboarding/widgets.py @@ -3,7 +3,7 @@ Textual widgets for the onboarding wizard. """ -from typing import TYPE_CHECKING, Any, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional from textual.app import ComposeResult from textual.containers import Container, Horizontal, Vertical, VerticalScroll @@ -191,6 +191,82 @@ text-style: italic; margin-top: 1; } + +/* Profile form - compact scrollable multi-field form */ +.profile-form { + height: auto; + max-height: 22; + padding: 0 1; +} + +.form-field { + height: auto; + margin-bottom: 1; +} + +.form-label { + color: #ff4f18; + text-style: bold; + height: 1; +} + +.form-input { + width: 100%; + border: solid #2a2a2a; + background: #0a0a0a; + color: #e5e5e5; +} + +.form-input:focus { + border: solid #ff4f18; +} + +.form-select { + width: 30; + height: auto; + max-height: 4; + background: transparent; + border: none; + margin: 0 0; +} + +.form-select > ListItem { + padding: 0 0; +} + +.form-select > ListItem.--highlight .option-label { + background: #ff4f18; + color: #ffffff; + text-style: bold; +} + +.form-checkbox-row { + height: 1; + margin-bottom: 0; +} + +.form-checkbox-toggle { + width: 3; + min-width: 3; + height: 1; + background: #333333; + color: #666666; + border: none; + margin-right: 1; +} + +.form-checkbox-toggle.-checked { + color: #00cc00; +} + +.form-checkbox-toggle:hover { + background: #00cc00; + color: #000000; +} + +.form-checkbox-label { + color: #a0a0a0; +} """ @@ -215,6 +291,9 @@ def __init__(self, handler: "TUIHardOnboarding"): self._handler = handler self._current_step = 0 self._multi_select_values: List[str] = [] + # Form step state + self._form_fields: List[Any] = [] + self._form_checkbox_values: Dict[str, List[str]] = {} def compose(self) -> ComposeResult: with Container(id="onboarding-container"): @@ -279,17 +358,27 @@ def _show_step(self, index: int) -> None: content = self.query_one("#step-content", Container) content.remove_children() + # Check for form step (e.g., UserProfileStep) + form_fields = getattr(step, 'get_form_fields', lambda: [])() options = step.get_options() - if step.name in ("mcp", "skills"): + if form_fields: + # Multi-field form + self._form_fields = form_fields + self._form_checkbox_values = {} + self._build_form(content, step, form_fields) + elif step.name in ("mcp", "skills"): # Multi-select list + self._form_fields = [] self._multi_select_values = step.get_default() self._build_multi_select(content, options) elif options: # Single-select list + self._form_fields = [] self._build_option_list(content, options, step.get_default()) else: # Text input + self._form_fields = [] self._build_text_input(content, step.get_default()) def _update_nav_items(self, index: int, required: bool) -> None: @@ -369,13 +458,126 @@ def _build_multi_select(self, container: Container, options: list) -> None: container.mount(scroll) + def _build_form(self, container: Container, step: Any, fields: list) -> None: + """Build a compact scrollable form with multiple field types.""" + scroll = VerticalScroll(id="profile-form", classes="profile-form") + + for f in fields: + field_container = Vertical(classes="form-field") + + # Label + field_container.compose_add_child( + Static(f.label, classes="form-label") + ) + + if f.field_type == "text": + inp = Input( + value=str(f.default) if f.default else "", + placeholder=f.placeholder or "Enter value...", + id=f"form-{f.name}", + classes="form-input", + ) + field_container.compose_add_child(inp) + + elif f.field_type == "select": + items = [] + highlight_idx = 0 + for i, opt in enumerate(f.options): + label_text = f" {opt.label}" + if opt.description and opt.description != opt.label: + label_text += f" ({opt.description})" + items.append( + ListItem( + Label(label_text, classes="option-label"), + id=f"fopt-{f.name}-{opt.value}", + ) + ) + if opt.value == f.default or opt.default: + highlight_idx = i + + list_view = ListView( + *items, + id=f"form-select-{f.name}", + classes="form-select", + ) + field_container.compose_add_child(list_view) + + # Highlight default after mount + _idx = highlight_idx + def _make_highlight(lv=list_view, idx=_idx): + def _set(): + lv.index = idx + return _set + self.call_after_refresh(_make_highlight()) + + elif f.field_type == "multi_checkbox": + self._form_checkbox_values[f.name] = list(f.default) if isinstance(f.default, list) else [] + for opt in f.options: + is_checked = opt.value in self._form_checkbox_values[f.name] + toggle_text = "[x]" if is_checked else "[ ]" + toggle_cls = "form-checkbox-toggle -checked" if is_checked else "form-checkbox-toggle" + row = Horizontal( + Button(toggle_text, id=f"fchk-{f.name}-{opt.value}", classes=toggle_cls), + Static(f" {opt.label}", classes="form-checkbox-label"), + classes="form-checkbox-row", + ) + field_container.compose_add_child(row) + + scroll.compose_add_child(field_container) + + container.mount(scroll) + + # Focus the first text input if any + def _focus_first(): + for f in fields: + if f.field_type == "text": + widget = self.query(f"#form-{f.name}") + if widget: + widget.first().focus() + break + self.call_after_refresh(_focus_first) + + def _get_form_value(self) -> Dict[str, Any]: + """Extract all values from the form fields.""" + result: Dict[str, Any] = {} + for f in self._form_fields: + if f.field_type == "text": + widget = self.query(f"#form-{f.name}") + result[f.name] = widget.first().value.strip() if widget else f.default + + elif f.field_type == "select": + widget = self.query(f"#form-select-{f.name}") + if widget: + lv = widget.first() + if lv and lv.highlighted_child: + item_id = lv.highlighted_child.id + prefix = f"fopt-{f.name}-" + if item_id and item_id.startswith(prefix): + result[f.name] = item_id[len(prefix):] + continue + result[f.name] = f.default + + elif f.field_type == "multi_checkbox": + result[f.name] = list(self._form_checkbox_values.get(f.name, [])) + + else: + result[f.name] = f.default + return result + def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle button presses (for multi-select toggles).""" + """Handle button presses (for multi-select toggles and form checkboxes).""" button_id = event.button.id if button_id and button_id.startswith("toggle-"): value = button_id[7:] # Remove "toggle-" prefix self._toggle_multi_select(value, event.button) + elif button_id and button_id.startswith("fchk-"): + # Form checkbox toggle: "fchk-{field_name}-{value}" + parts = button_id[5:] # Remove "fchk-" + dash_idx = parts.index("-") + field_name = parts[:dash_idx] + value = parts[dash_idx + 1:] + self._toggle_form_checkbox(field_name, value, event.button) def on_list_view_selected(self, event: ListView.Selected) -> None: """Handle list view selection.""" @@ -412,10 +614,26 @@ def _toggle_multi_select(self, value: str, button: Button) -> None: button.label = "[+]" button.add_class("-selected") + def _toggle_form_checkbox(self, field_name: str, value: str, button: Button) -> None: + """Toggle a form checkbox option.""" + values = self._form_checkbox_values.setdefault(field_name, []) + if value in values: + values.remove(value) + button.label = "[ ]" + button.remove_class("-checked") + else: + values.append(value) + button.label = "[x]" + button.add_class("-checked") + def _get_current_value(self) -> Any: """Get the current value from the active step widget.""" step = self._handler.get_step(self._current_step) + # Form step returns a dict + if self._form_fields: + return self._get_form_value() + if step.name in ("mcp", "skills"): return self._multi_select_values diff --git a/app/ui_layer/adapters/base.py b/app/ui_layer/adapters/base.py index f8f9aa8f..13dfdefc 100644 --- a/app/ui_layer/adapters/base.py +++ b/app/ui_layer/adapters/base.py @@ -15,7 +15,7 @@ InputComponentProtocol, FootageComponentProtocol, ) -from app.ui_layer.components.types import ChatMessage, ActionItem +from app.ui_layer.components.types import ChatMessage, ChatMessageOption, ActionItem if TYPE_CHECKING: from app.ui_layer.controller.ui_controller import UIController @@ -271,12 +271,25 @@ def _handle_agent_message(self, event: UIEvent) -> None: from app.onboarding import onboarding_manager agent_name = onboarding_manager.state.agent_name or "Agent" + # Extract options from event data if present + raw_options = event.data.get("options") + options = None + if raw_options and isinstance(raw_options, list): + options = [ + ChatMessageOption( + label=o.get("label", ""), + value=o.get("value", ""), + style=o.get("style", "default"), + ) + for o in raw_options + ] asyncio.create_task( self._display_chat_message( agent_name, event.data.get("message", ""), "agent", task_session_id=event.task_id, + options=options, ) ) @@ -442,6 +455,7 @@ async def _display_chat_message( message: str, style: str, task_session_id: Optional[str] = None, + options: Optional[List[ChatMessageOption]] = None, ) -> None: """ Display a chat message. @@ -451,6 +465,7 @@ async def _display_chat_message( message: Message content style: Style identifier task_session_id: Optional task session ID for reply feature + options: Optional list of interactive options/buttons """ import time @@ -461,6 +476,7 @@ async def _display_chat_message( style=style, timestamp=time.time(), task_session_id=task_session_id, + options=options, ) ) diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index 0f341056..1a19aa3f 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -200,6 +200,17 @@ def _init_storage(self) -> None: ) for att in stored.attachments ] + options = None + if stored.options: + from app.ui_layer.components.types import ChatMessageOption + options = [ + ChatMessageOption( + label=o.get("label", ""), + value=o.get("value", ""), + style=o.get("style", "default"), + ) + for o in stored.options + ] self._messages.append(ChatMessage( sender=stored.sender, content=stored.content, @@ -208,6 +219,8 @@ def _init_storage(self) -> None: message_id=stored.message_id, attachments=attachments, task_session_id=stored.task_session_id, + options=options, + option_selected=stored.option_selected, )) except Exception: # Storage may not be available, continue without persistence @@ -233,6 +246,12 @@ async def append_message(self, message: ChatMessage) -> None: } for att in message.attachments ] + options_data = None + if message.options: + options_data = [ + {"label": o.label, "value": o.value, "style": o.style} + for o in message.options + ] stored = StoredChatMessage( message_id=message.message_id or f"{message.sender}:{message.timestamp}", sender=message.sender, @@ -241,6 +260,7 @@ async def append_message(self, message: ChatMessage) -> None: timestamp=message.timestamp, attachments=attachments_data, task_session_id=message.task_session_id, + options=options_data, ) self._storage.insert_message(stored) except Exception: @@ -272,6 +292,15 @@ async def append_message(self, message: ChatMessage) -> None: if message.task_session_id: message_data["taskSessionId"] = message.task_session_id + # Include options/buttons if present + if message.options: + message_data["options"] = [ + {"label": o.label, "value": o.value, "style": o.style} + for o in message.options + ] + if message.option_selected: + message_data["optionSelected"] = message.option_selected + await self._adapter._broadcast({ "type": "chat_message", "data": message_data, @@ -320,6 +349,17 @@ def get_messages_before(self, before_timestamp: float, limit: int = 50) -> List[ ) for att in s.attachments ] + options = None + if s.options: + from app.ui_layer.components.types import ChatMessageOption + options = [ + ChatMessageOption( + label=o.get("label", ""), + value=o.get("value", ""), + style=o.get("style", "default"), + ) + for o in s.options + ] messages.append(ChatMessage( sender=s.sender, content=s.content, @@ -327,6 +367,8 @@ def get_messages_before(self, before_timestamp: float, limit: int = 50) -> List[ timestamp=s.timestamp, message_id=s.message_id, attachments=attachments, + options=options, + option_selected=s.option_selected, )) return messages except Exception: @@ -866,6 +908,7 @@ async def _on_start(self) -> None: self._app.router.add_get("/api/state", self._state_handler) self._app.router.add_get("/api/theme.css", self._theme_css_handler) self._app.router.add_get("/api/workspace/{path:.*}", self._workspace_file_handler) + self._app.router.add_get("/api/agent-profile-picture", self._agent_profile_picture_handler) # Serve Vite-built frontend (production) frontend_dist = Path(__file__).parent.parent / "browser" / "frontend" / "dist" @@ -955,10 +998,9 @@ async def _websocket_handler(self, request: "web.Request") -> "web.WebSocketResp from aiohttp import web, WSMsgType import asyncio - # Simple WebSocket configuration - no heartbeat (client handles reconnect) ws = web.WebSocketResponse( max_msg_size=100 * 1024 * 1024, - timeout=None, # No timeout - let messages flow naturally + heartbeat=30.0, # Send ping every 30s to keep connection alive ) try: @@ -967,8 +1009,19 @@ async def _websocket_handler(self, request: "web.Request") -> "web.WebSocketResp print(f"[BROWSER ADAPTER] Failed to prepare WebSocket: {e}") return ws + is_first_client = len(self._ws_clients) == 0 self._ws_clients.add(ws) + # Trigger soft onboarding on first client connection so the UI + # is ready to receive the task creation event. + if is_first_client: + from app.onboarding import onboarding_manager + if onboarding_manager.needs_soft_onboarding: + agent = self._controller.agent + if agent: + import asyncio + asyncio.create_task(agent.trigger_soft_onboarding()) + # Send initial state try: initial_state = self._get_initial_state() @@ -1124,6 +1177,12 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: task_id = data.get("taskId", "") await self._handle_task_cancel(task_id) + elif msg_type == "option_click": + value = data.get("value", "") + session_id = data.get("sessionId", "") + message_id = data.get("messageId", "") + await self._handle_option_click(value, session_id, message_id) + # Settings operations elif msg_type == "settings_get": await self._handle_settings_get() @@ -1145,6 +1204,12 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: filename = data.get("filename", "") await self._handle_agent_file_restore(filename) + elif msg_type == "agent_profile_picture_upload": + await self._handle_agent_profile_picture_upload(data) + + elif msg_type == "agent_profile_picture_remove": + await self._handle_agent_profile_picture_remove() + elif msg_type == "reset": await self._handle_reset() @@ -1322,6 +1387,11 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: description = data.get("description", "") await self._handle_skill_template(name, description) + elif msg_type == "skill_run": + name = data.get("name", "") + args_text = data.get("args", "") + await self._handle_skill_run(name, args_text) + # Integration handlers elif msg_type == "integration_list": await self._handle_integration_list() @@ -1554,6 +1624,7 @@ async def _handle_onboarding_step_get(self) -> None: ], "default": controller.get_step_default(), "provider": getattr(step, "provider", None), + "form_fields": self._get_step_form_fields(step), }, }, }) @@ -1567,6 +1638,27 @@ async def _handle_onboarding_step_get(self) -> None: }, }) + @staticmethod + def _get_step_form_fields(step) -> Optional[list]: + """Extract form field definitions from a step, if it supports them.""" + form_fields = getattr(step, 'get_form_fields', lambda: [])() + if not form_fields: + return None + return [ + { + "name": f.name, + "label": f.label, + "field_type": f.field_type, + "options": [ + {"value": o.value, "label": o.label, "description": o.description, "default": o.default} + for o in f.options + ], + "default": f.default, + "placeholder": f.placeholder, + } + for f in form_fields + ] + async def _handle_onboarding_step_submit(self, value: Any) -> None: """Submit a value for the current onboarding step.""" try: @@ -1635,11 +1727,15 @@ async def _handle_onboarding_step_submit(self, value: Any) -> None: # Onboarding complete - controller._complete() already called from app.onboarding import onboarding_manager + from app.ui_layer.settings.general_settings import get_agent_profile_picture_info + picture_info = get_agent_profile_picture_info() await self._broadcast({ "type": "onboarding_complete", "data": { "success": True, "agentName": onboarding_manager.state.agent_name or "Agent", + "agentProfilePictureUrl": picture_info["url"], + "agentProfilePictureHasCustom": picture_info["has_custom"], }, }) # Clear cached controller for fresh state @@ -1674,6 +1770,7 @@ async def _handle_onboarding_step_submit(self, value: Any) -> None: ], "default": controller.get_step_default(), "provider": getattr(step, "provider", None), + "form_fields": self._get_step_form_fields(step), }, }, }) @@ -1711,11 +1808,15 @@ async def _handle_onboarding_skip(self) -> None: if controller.is_complete: from app.onboarding import onboarding_manager + from app.ui_layer.settings.general_settings import get_agent_profile_picture_info + picture_info = get_agent_profile_picture_info() await self._broadcast({ "type": "onboarding_complete", "data": { "success": True, "agentName": onboarding_manager.state.agent_name or "Agent", + "agentProfilePictureUrl": picture_info["url"], + "agentProfilePictureHasCustom": picture_info["has_custom"], }, }) if hasattr(self, "_onboarding_controller"): @@ -1805,6 +1906,7 @@ async def _handle_onboarding_back(self) -> None: ], "default": controller.get_step_default(), "provider": getattr(step, "provider", None), + "form_fields": self._get_step_form_fields(step), }, }, }) @@ -1979,6 +2081,27 @@ async def _handle_task_cancel(self, task_id: str) -> None: }, }) + async def _handle_option_click(self, value: str, session_id: str, message_id: str) -> None: + """Handle a user clicking an option button in a chat message.""" + try: + # Mark the option as selected in storage and in-memory + if self._chat and message_id: + if self._chat._storage: + try: + self._chat._storage.update_option_selected(message_id, value) + except Exception: + pass + # Update in-memory message so refreshes reflect the selection + for m in self._chat._messages: + if m.message_id == message_id: + m.option_selected = value + break + + # Route to the controller + await self._controller.handle_option_click(value, session_id) + except Exception as e: + logger.error(f"[OPTION_CLICK] Error handling option click: {e}", exc_info=True) + # ───────────────────────────────────────────────────────────────────── # Settings Operation Handlers # ───────────────────────────────────────────────────────────────────── @@ -1990,6 +2113,12 @@ async def _handle_settings_get(self) -> None: settings = { "agentName": result.get("agent_name", "CraftBot"), "theme": "dark", # Theme is managed client-side + "agentProfilePictureUrl": result.get( + "agent_profile_picture_url", "/api/agent-profile-picture" + ), + "agentProfilePictureHasCustom": result.get( + "agent_profile_picture_has_custom", False + ), } await self._broadcast({ @@ -3197,9 +3326,10 @@ async def _handle_skill_enable(self, name: str) -> None: "name": name, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_enable", @@ -3222,9 +3352,10 @@ async def _handle_skill_disable(self, name: str) -> None: "name": name, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_disable", @@ -3246,9 +3377,10 @@ async def _handle_skill_reload(self) -> None: "message": message, }, }) - # Refresh the list + # Refresh the list and sync skill commands if success: await self._handle_skill_list() + self._controller.sync_skill_commands() except Exception as e: await self._broadcast({ "type": "skill_reload", @@ -3258,6 +3390,27 @@ async def _handle_skill_reload(self) -> None: }, }) + async def _handle_skill_run(self, name: str, args_text: str = "") -> None: + """Run a skill by invoking it through the controller.""" + try: + await self._controller.invoke_skill(name, args_text, self._adapter_id) + await self._broadcast({ + "type": "skill_run", + "data": { + "success": True, + "name": name, + }, + }) + except Exception as e: + await self._broadcast({ + "type": "skill_run", + "data": { + "success": False, + "error": str(e), + "name": name, + }, + }) + async def _handle_skill_install(self, source: str) -> None: """Install a skill from path or git URL.""" try: @@ -4311,6 +4464,13 @@ async def _handle_chat_history(self, before_timestamp: float, limit: int = 50) - ] if m.task_session_id: msg_data["taskSessionId"] = m.task_session_id + if m.options: + msg_data["options"] = [ + {"label": o.label, "value": o.value, "style": o.style} + for o in m.options + ] + if m.option_selected: + msg_data["optionSelected"] = m.option_selected messages_data.append(msg_data) await self._broadcast({ @@ -4543,6 +4703,71 @@ async def _handle_chat_attachment_upload(self, data: Dict[str, Any]) -> None: }, }) + async def _handle_agent_profile_picture_upload(self, data: Dict[str, Any]) -> None: + """Handle uploading a new agent profile picture.""" + from app.ui_layer.settings.general_settings import ( + PROFILE_MIME_TO_EXT, + ALLOWED_PROFILE_EXTS, + MAX_PROFILE_PICTURE_BYTES, + save_agent_profile_picture, + ) + + try: + name = data.get("name", "") + # Accept "mimeType" (preferred — avoids collision with the envelope "type" key) + # and fall back to legacy "type" for compatibility. + mime_type = (data.get("mimeType") or data.get("type") or "").lower() + content_b64 = data.get("content", "") + + if not content_b64: + raise ValueError("No content provided") + + # Resolve extension from MIME first, then fall back to filename. + ext: Optional[str] = PROFILE_MIME_TO_EXT.get(mime_type) + if not ext and name: + guess = name.rsplit(".", 1)[-1].lower() if "." in name else "" + if guess in ALLOWED_PROFILE_EXTS: + ext = guess + if not ext: + raise ValueError( + f"Unsupported image type. Allowed: {', '.join(sorted(ALLOWED_PROFILE_EXTS))}" + ) + + raw_bytes = base64.b64decode(content_b64) + if len(raw_bytes) > MAX_PROFILE_PICTURE_BYTES: + raise ValueError( + f"Image too large (max {MAX_PROFILE_PICTURE_BYTES // (1024 * 1024)} MB)" + ) + + result = save_agent_profile_picture(ext, raw_bytes) + + await self._broadcast({ + "type": "agent_profile_picture_upload", + "data": result, + }) + except Exception as e: + await self._broadcast({ + "type": "agent_profile_picture_upload", + "data": { + "success": False, + "error": str(e), + }, + }) + + async def _handle_agent_profile_picture_remove(self) -> None: + """Handle removing the custom agent profile picture.""" + from app.ui_layer.settings.general_settings import remove_agent_profile_picture + + try: + result = remove_agent_profile_picture() + except Exception as e: + result = {"success": False, "error": str(e)} + + await self._broadcast({ + "type": "agent_profile_picture_remove", + "data": result, + }) + async def _handle_open_file(self, file_path: str) -> None: """Open a file with the system default application.""" import subprocess @@ -4810,18 +5035,23 @@ async def send_message_with_attachments( def _get_initial_state(self) -> Dict[str, Any]: """Get initial state for new connections.""" from app.onboarding import onboarding_manager + from app.ui_layer.settings.general_settings import get_agent_profile_picture_info state = self._controller.state metrics = self._metrics_collector.get_metrics() from app.config import get_app_version + picture_info = get_agent_profile_picture_info() + return { "version": get_app_version(), "agentState": state.agent_state.value, "guiMode": state.gui_mode, "needsHardOnboarding": onboarding_manager.needs_hard_onboarding, "agentName": onboarding_manager.state.agent_name or "Agent", + "agentProfilePictureUrl": picture_info["url"], + "agentProfilePictureHasCustom": picture_info["has_custom"], "currentTask": { "id": state.current_task_id, "name": state.current_task_name, @@ -4844,6 +5074,11 @@ def _get_initial_state(self) -> Dict[str, Any]: for att in m.attachments ]} if m.attachments else {}), **({"taskSessionId": m.task_session_id} if m.task_session_id else {}), + **({"options": [ + {"label": o.label, "value": o.value, "style": o.style} + for o in m.options + ]} if m.options else {}), + **({"optionSelected": m.option_selected} if m.option_selected else {}), } for m in self._chat.get_messages() ], @@ -4908,6 +5143,49 @@ async def _theme_css_handler(self, request: "web.Request") -> "web.Response": css = self._theme_adapter.get_theme_css() return web.Response(text=css, content_type="text/css") + async def _agent_profile_picture_handler(self, request: "web.Request") -> "web.Response": + """Serve the current agent profile picture (user upload or bundled default).""" + from aiohttp import web + + from app.ui_layer.settings.general_settings import ( + AGENT_PROFILE_DIR, + AGENT_PROFILE_DEFAULT_FILENAME, + EXT_TO_MIME, + _user_profile_picture_path, + ) + from app.onboarding import onboarding_manager + + ext = onboarding_manager.state.agent_profile_picture + target: Optional[Path] = None + mime_type = "image/png" + + if ext: + candidate = _user_profile_picture_path(ext) + if candidate.exists(): + target = candidate + mime_type = EXT_TO_MIME.get(ext.lower(), "application/octet-stream") + + if target is None: + default_path = AGENT_PROFILE_DIR / AGENT_PROFILE_DEFAULT_FILENAME + if default_path.exists(): + target = default_path + mime_type = "image/png" + + if target is None: + raise web.HTTPNotFound(reason="Avatar not available") + + try: + content = target.read_bytes() + return web.Response( + body=content, + content_type=mime_type, + headers={ + "Cache-Control": "no-cache, max-age=0", + }, + ) + except Exception as e: + raise web.HTTPInternalServerError(reason=str(e)) + async def _workspace_file_handler(self, request: "web.Request") -> "web.Response": """Serve files from the workspace directory.""" from aiohttp import web diff --git a/app/ui_layer/adapters/cli_adapter.py b/app/ui_layer/adapters/cli_adapter.py index 51eef778..158a0bed 100644 --- a/app/ui_layer/adapters/cli_adapter.py +++ b/app/ui_layer/adapters/cli_adapter.py @@ -181,6 +181,14 @@ async def _on_start(self) -> None: if onboarding.needs_hard_onboarding: await self._run_hard_onboarding(onboarding) + # Trigger soft onboarding if needed (after hard onboarding check) + from app.onboarding import onboarding_manager + if onboarding_manager.needs_soft_onboarding: + import asyncio + agent = self._controller.agent + if agent: + asyncio.create_task(agent.trigger_soft_onboarding()) + # Print logo and welcome _get_formatter().print_logo() from app.config import get_app_version diff --git a/app/ui_layer/adapters/tui_adapter.py b/app/ui_layer/adapters/tui_adapter.py index 742f2257..5cd5fd7a 100644 --- a/app/ui_layer/adapters/tui_adapter.py +++ b/app/ui_layer/adapters/tui_adapter.py @@ -390,6 +390,14 @@ async def _on_start(self) -> None: # Run onboarding before starting Textual app await self._run_hard_onboarding(onboarding) + # Trigger soft onboarding if needed (after hard onboarding check) + from app.onboarding import onboarding_manager + if onboarding_manager.needs_soft_onboarding: + import asyncio + agent = self._controller.agent + if agent: + asyncio.create_task(agent.trigger_soft_onboarding()) + # Queue initial messages from app.config import get_app_version await self.chat_updates.put( diff --git a/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.module.css b/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.module.css index 0be8b207..1d49fa06 100644 --- a/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.module.css +++ b/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.module.css @@ -37,6 +37,10 @@ color: #3b82f6; } +.paused { + color: #3b82f6; +} + /* Spinning animation for loader icon */ .spinning { animation: spin 1s linear infinite; @@ -101,6 +105,10 @@ background: #3b82f6; } +.dot_paused { + background: #3b82f6; +} + /* Pulse animation for active agent states */ .pulse { animation: pulse 1.5s ease-in-out infinite; diff --git a/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.tsx b/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.tsx index 27ca201e..c76ec10c 100644 --- a/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.tsx +++ b/app/ui_layer/browser/frontend/src/components/ui/StatusIndicator.tsx @@ -1,5 +1,5 @@ import React from 'react' -import { CheckCircle, XCircle, Loader, Clock, MessageCircle } from 'lucide-react' +import { CheckCircle, XCircle, Loader, Clock, MessageCircle, PauseCircle } from 'lucide-react' import styles from './StatusIndicator.module.css' import type { ActionStatus, AgentState } from '../../types' @@ -57,6 +57,8 @@ export function StatusIndicator({ return case 'waiting': return + case 'paused': + return case 'pending': case 'idle': default: diff --git a/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx b/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx index a557c8a1..ec5f37c2 100644 --- a/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx +++ b/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx @@ -39,6 +39,8 @@ interface WebSocketState { // Onboarding state needsHardOnboarding: boolean agentName: string + agentProfilePictureUrl: string + agentProfilePictureHasCustom: boolean onboardingStep: OnboardingStep | null onboardingError: string | null onboardingLoading: boolean @@ -87,6 +89,11 @@ interface WebSocketContextType extends WebSocketState { startLocalLLM: () => void requestSuggestedModels: () => void pullOllamaModel: (model: string) => void + // Option click (interactive buttons in chat) + sendOptionClick: (value: string, sessionId?: string, messageId?: string) => void + // Agent profile picture + uploadAgentProfilePicture: (name: string, mimeType: string, contentBase64: string) => void + removeAgentProfilePicture: () => void } // Initialize lastSeenMessageId from localStorage @@ -123,6 +130,8 @@ const defaultState: WebSocketState = { // Onboarding state needsHardOnboarding: false, agentName: 'Agent', + agentProfilePictureUrl: '/api/agent-profile-picture', + agentProfilePictureHasCustom: false, onboardingStep: null, onboardingError: null, onboardingLoading: false, @@ -265,6 +274,12 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { dashboardMetrics: data.dashboardMetrics || null, needsHardOnboarding: data.needsHardOnboarding || false, agentName: data.agentName || 'Agent', + agentProfilePictureUrl: + (data as InitialState & { agentProfilePictureUrl?: string }).agentProfilePictureUrl + || '/api/agent-profile-picture', + agentProfilePictureHasCustom: + (data as InitialState & { agentProfilePictureHasCustom?: boolean }).agentProfilePictureHasCustom + || false, hasMoreMessages: initMessages.length >= 50, hasMoreActions: initActions.filter((a: ActionItem) => a.itemType === 'task').length >= 15, })) @@ -516,7 +531,10 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { } case 'onboarding_complete': { - const response = msg.data as unknown as OnboardingCompleteResponse + const response = msg.data as unknown as OnboardingCompleteResponse & { + agentProfilePictureUrl?: string + agentProfilePictureHasCustom?: boolean + } if (response.success) { setState(prev => ({ ...prev, @@ -525,6 +543,43 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { onboardingLoading: false, onboardingError: null, agentName: response.agentName || 'Agent', + agentProfilePictureUrl: + response.agentProfilePictureUrl || prev.agentProfilePictureUrl, + agentProfilePictureHasCustom: + response.agentProfilePictureHasCustom ?? prev.agentProfilePictureHasCustom, + })) + } + break + } + + case 'agent_profile_picture_upload': { + const r = msg.data as unknown as { + success: boolean + url?: string + has_custom?: boolean + error?: string + } + if (r.success && r.url) { + setState(prev => ({ + ...prev, + agentProfilePictureUrl: r.url!, + agentProfilePictureHasCustom: r.has_custom ?? true, + })) + } + break + } + + case 'agent_profile_picture_remove': { + const r = msg.data as unknown as { + success: boolean + url?: string + has_custom?: boolean + } + if (r.success) { + setState(prev => ({ + ...prev, + agentProfilePictureUrl: r.url || '/api/agent-profile-picture', + agentProfilePictureHasCustom: r.has_custom ?? false, })) } break @@ -770,6 +825,32 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { } }, []) + const sendOptionClick = useCallback((value: string, sessionId?: string, messageId?: string) => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + wsRef.current.send(JSON.stringify({ type: 'option_click', value, sessionId, messageId })) + } + }, []) + + const uploadAgentProfilePicture = useCallback( + (name: string, mimeType: string, contentBase64: string) => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + wsRef.current.send(JSON.stringify({ + type: 'agent_profile_picture_upload', + name, + mimeType, + content: contentBase64, + })) + } + }, + [] + ) + + const removeAgentProfilePicture = useCallback(() => { + if (wsRef.current?.readyState === WebSocket.OPEN) { + wsRef.current.send(JSON.stringify({ type: 'agent_profile_picture_remove' })) + } + }, []) + const openFile = useCallback((path: string) => { if (wsRef.current?.readyState === WebSocket.OPEN) { wsRef.current.send(JSON.stringify({ type: 'open_file', path })) @@ -943,6 +1024,9 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { startLocalLLM, requestSuggestedModels, pullOllamaModel, + sendOptionClick, + uploadAgentProfilePicture, + removeAgentProfilePicture, }} > {children} diff --git a/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx b/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx index 0c5e26c0..1287ee43 100644 --- a/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx @@ -1,7 +1,8 @@ -import React, { memo, useState, useMemo } from 'react' +import React, { memo, useState, useMemo, useRef } from 'react' import { Reply } from 'lucide-react' import { MarkdownContent, AttachmentDisplay, IconButton } from '../../components/ui' import type { ChatMessage as ChatMessageType } from '../../types' +import { useWebSocket } from '../../contexts/WebSocketContext' import styles from './ChatPage.module.css' interface ChatMessageProps { @@ -13,6 +14,7 @@ interface ChatMessageProps { displayName: string, fullContent: string ) => void + onOptionClick?: (value: string, sessionId?: string, messageId?: string) => void } // Parse reply context from message content @@ -33,8 +35,14 @@ export const ChatMessageItem = memo(function ChatMessageItem({ onOpenFile, onOpenFolder, onReply, + onOptionClick, }: ChatMessageProps) { const [isHovered, setIsHovered] = useState(false) + const [optionClicked, setOptionClicked] = useState( + message.optionSelected || null + ) + const optionLockedRef = useRef(!!message.optionSelected) + const { agentProfilePictureUrl } = useWebSocket() // Show reply for ALL agent messages const canReply = message.style === 'agent' && onReply @@ -58,43 +66,80 @@ export const ChatMessageItem = memo(function ChatMessageItem({ } } + const isAgent = message.style === 'agent' + + const bubbleContainer = ( +
+
+
+ {message.sender} + + {new Date(message.timestamp * 1000).toLocaleTimeString()} + +
+ {/* Reply context callout - shown above user message when replying */} + {replyContext && ( +
+ +
+ )} +
+ +
+ {message.options && message.options.length > 0 && ( +
+ Please select a response to continue: + {message.options.map((opt, index) => ( + + ))} +
+ )} +
+ {/* Reply button - positioned outside the bubble at top-right */} + {canReply && isHovered && ( + } + variant="ghost" + size="sm" + onClick={handleReply} + tooltip="Reply to this message" + className={styles.replyButtonOutside} + /> + )} +
+ ) + return (
setIsHovered(true)} onMouseLeave={() => setIsHovered(false)} > - {/* Message bubble container - for positioning reply button outside */} -
-
-
- {message.sender} - - {new Date(message.timestamp * 1000).toLocaleTimeString()} - -
- {/* Reply context callout - shown above user message when replying */} - {replyContext && ( -
- -
- )} -
- -
-
- {/* Reply button - positioned outside the bubble at top-right */} - {canReply && isHovered && ( - } - variant="ghost" - size="sm" - onClick={handleReply} - tooltip="Reply to this message" - className={styles.replyButtonOutside} + {isAgent ? ( +
+ - )} -
+ {bubbleContainer} +
+ ) : ( + bubbleContainer + )} {message.attachments && message.attachments.length > 0 && (
{ } export function ChatPage() { - const { messages, actions, connected, sendMessage, cancelTask, cancellingTaskId, openFile, openFolder, lastSeenMessageId, markMessagesAsSeen, replyTarget, setReplyTarget, clearReplyTarget, loadOlderMessages, hasMoreMessages, loadingOlderMessages } = useWebSocket() + const { messages, actions, connected, sendMessage, cancelTask, cancellingTaskId, openFile, openFolder, lastSeenMessageId, markMessagesAsSeen, replyTarget, setReplyTarget, clearReplyTarget, loadOlderMessages, hasMoreMessages, loadingOlderMessages, sendOptionClick } = useWebSocket() // Derive agent status from actions and messages const status = useDerivedAgentStatus({ @@ -556,6 +556,7 @@ export function ChatPage() { onOpenFile={openFile} onOpenFolder={openFolder} onReply={handleChatReply} + onOptionClick={sendOptionClick} />
) diff --git a/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.module.css b/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.module.css index 31082228..31b30911 100644 --- a/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.module.css +++ b/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.module.css @@ -329,6 +329,324 @@ margin-top: var(--space-2); } +/* ── Profile Form (multi-field form step) ── */ + +.profileForm { + flex: 1; + overflow-y: auto; + display: flex; + flex-direction: column; + gap: var(--space-6); + padding-right: var(--space-2); + max-height: 420px; +} + +.profileForm::-webkit-scrollbar { + width: 6px; +} + +.profileForm::-webkit-scrollbar-track { + background: var(--bg-tertiary); + border-radius: var(--radius-full); +} + +.profileForm::-webkit-scrollbar-thumb { + background: var(--border-secondary); + border-radius: var(--radius-full); +} + +.profileForm::-webkit-scrollbar-thumb:hover { + background: var(--border-hover); +} + +.formField { + display: flex; + flex-direction: column; + gap: var(--space-1); +} + +.formFieldLabel { + font-size: var(--text-sm); + font-weight: var(--font-medium); + color: var(--text-primary); + margin-bottom: 2px; +} + +.formFieldHint { + font-size: var(--text-xs); + color: var(--text-secondary); + margin-top: 4px; +} + +/* ── Agent Identity step (avatar + name, side-by-side) ── */ + +.identityCard { + display: flex; + flex-direction: row; + gap: var(--space-4); + /* Stretch children to the row's natural height so the avatar matches + the right section exactly (no feedback loop from min sizes). */ + align-items: stretch; + flex-wrap: wrap; +} + +.identityAvatar { + flex-shrink: 0; + display: flex; +} + +/* Inside the identity card, the preview matches the row height so it + aligns with the right section. */ +.identityAvatar .imageUploadPreview { + width: 118px; + height: 118px; +} + +.identityDetails { + display: flex; + flex-direction: column; + gap: var(--space-2); + flex: 1; + min-width: 220px; +} + +.identityAvatarActions { + display: flex; + flex-direction: row; + gap: var(--space-2); + flex-wrap: wrap; + margin-top: var(--space-1); +} + +/* ── Image upload field (agent profile picture) ── */ + +.imageUploadRow { + display: flex; + align-items: center; + gap: var(--space-3); + flex-wrap: wrap; +} + +.imageUploadPreview { + width: 96px; + height: 96px; + /* Square with the same fillet as chat bubbles */ + border-radius: var(--radius-lg); + object-fit: cover; + border: 1px solid var(--border-primary); + background: var(--bg-tertiary); + flex-shrink: 0; +} + +.imageUploadActions { + display: flex; + gap: var(--space-2); + flex-wrap: wrap; +} + +.imageUploadError { + flex-basis: 100%; + font-size: var(--text-xs); + color: var(--color-error); +} + +/* ── Shared radio dot styles ── */ + +.formSelectOptionInline .optionRadio, +.formSelectOptionVertical .optionRadio { + width: 14px; + height: 14px; + min-width: 14px; + min-height: 14px; + border: 2px solid var(--text-secondary); + border-radius: var(--radius-full); + display: flex; + align-items: center; + justify-content: center; + flex-shrink: 0; + transition: all var(--transition-base); +} + +.formSelectOptionInline .optionRadio::after, +.formSelectOptionVertical .optionRadio::after { + content: ''; + width: 6px; + height: 6px; + border-radius: var(--radius-full); + background: var(--color-primary); + opacity: 0; + transition: opacity var(--transition-base); +} + +.formSelectOptionInline.selected .optionRadio, +.formSelectOptionVertical.selected .optionRadio { + border-color: var(--color-primary); +} + +.formSelectOptionInline.selected .optionRadio::after, +.formSelectOptionVertical.selected .optionRadio::after { + opacity: 1; +} + +.formSelectOptionInline:hover .optionRadio, +.formSelectOptionVertical:hover .optionRadio { + border-color: var(--text-primary); +} + +/* ── Inline select (no descriptions, e.g. Communication Tone) ── */ + +.formSelectInline { + display: flex; + flex-wrap: wrap; + gap: var(--space-2); +} + +.formSelectOptionInline { + display: flex; + align-items: center; + gap: var(--space-2); + padding: 8px 14px; + background: var(--bg-tertiary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-full); + cursor: pointer; + transition: all var(--transition-base); + font-size: var(--text-sm); +} + +.formSelectOptionInline:hover { + border-color: var(--text-secondary); +} + +.formSelectOptionInline.selected { + border-color: var(--color-primary); + background: var(--color-primary-subtle); +} + +/* ── Vertical select (with descriptions, e.g. Proactive Level) ── */ + +.formSelectVertical { + display: flex; + flex-direction: column; + gap: var(--space-2); +} + +.formSelectOptionVertical { + display: flex; + align-items: center; + gap: var(--space-2); + padding: 10px 14px; + background: var(--bg-tertiary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-lg); + cursor: pointer; + transition: all var(--transition-base); + font-size: var(--text-sm); +} + +.formSelectOptionVertical:hover { + border-color: var(--text-secondary); +} + +.formSelectOptionVertical.selected { + border-color: var(--color-primary); + background: var(--color-primary-subtle); +} + +/* Native dropdown for large option lists (e.g., language) */ +.formDropdown { + width: 100%; + padding: var(--space-2) var(--space-3); + background: var(--bg-tertiary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-lg); + color: var(--text-primary); + font-size: var(--text-sm); + font-family: var(--font-sans); + cursor: pointer; + transition: border-color var(--transition-base); + appearance: auto; +} + +.formDropdown:focus { + outline: none; + border-color: var(--color-primary); +} + +.formDropdown option { + background: var(--bg-secondary); + color: var(--text-primary); +} + +.formSelectLabel { + font-size: var(--text-sm); + color: var(--text-primary); +} + +.formSelectDesc { + font-size: var(--text-xs); + color: var(--text-secondary); + margin-left: 4px; +} + +.formSelectDesc::before { + content: '— '; +} + +/* Checkbox group for form fields */ +.formCheckboxGroup { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(140px, 1fr)); + gap: var(--space-2); +} + +.formCheckboxItem { + display: flex; + align-items: center; + gap: var(--space-2); + padding: 8px 12px; + background: var(--bg-tertiary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-lg); + cursor: pointer; + transition: all var(--transition-base); + font-size: var(--text-sm); + color: var(--text-primary); +} + +.formCheckboxItem:hover { + border-color: var(--text-secondary); +} + +.formCheckboxItem.selected { + border-color: var(--color-primary); + background: var(--color-primary-subtle); +} + +.formCheckboxItem .optionCheckbox { + width: 16px; + height: 16px; + min-width: 16px; + min-height: 16px; + border: 2px solid var(--text-secondary); + border-radius: var(--radius-sm); + display: flex; + align-items: center; + justify-content: center; + flex-shrink: 0; + transition: all var(--transition-base); +} + +.formCheckboxItem.selected .optionCheckbox { + border-color: var(--color-primary); + background: var(--color-primary); + color: var(--color-white, #fff); +} + +.formCheckboxItem:hover .optionCheckbox { + border-color: var(--text-primary); +} + /* Error Message */ .errorMessage { display: flex; diff --git a/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx b/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx index 46bf5e23..3d5b7458 100644 --- a/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Onboarding/OnboardingPage.tsx @@ -1,4 +1,4 @@ -import React, { useEffect, useState, useCallback } from 'react' +import React, { useEffect, useState, useCallback, useRef } from 'react' import { getOllamaInstallPercent } from '../../utils/ollamaInstall' import { Check, @@ -27,11 +27,13 @@ import { Wifi, WifiOff, RefreshCw, + Upload, + Trash2, type LucideIcon, } from 'lucide-react' import { Button } from '../../components/ui' import { useWebSocket } from '../../contexts/WebSocketContext' -import type { OnboardingStep, OnboardingStepOption } from '../../types' +import type { OnboardingStep, OnboardingStepOption, OnboardingFormField } from '../../types' import styles from './OnboardingPage.module.css' // Icon mapping for dynamic rendering @@ -53,7 +55,7 @@ const ICON_MAP: Record = { Sheet, } -const STEP_NAMES = ['Provider', 'API Key', 'Agent Name', 'MCP Servers', 'Skills'] +const STEP_NAMES = ['Provider', 'API Key', 'Agent Name', 'User Profile', 'MCP Servers', 'Skills'] // ── Ollama local-setup component ───────────────────────────────────────────── @@ -332,6 +334,10 @@ export function OnboardingPage() { skipOnboardingStep, goBackOnboardingStep, localLLM, + agentProfilePictureUrl, + agentProfilePictureHasCustom, + uploadAgentProfilePicture, + removeAgentProfilePicture, } = useWebSocket() // Local form state @@ -340,6 +346,34 @@ export function OnboardingPage() { // URL submitted from OllamaSetup const [ollamaUrl, setOllamaUrl] = useState('http://localhost:11434') const [ollamaConnected, setOllamaConnected] = useState(false) + // Form step state (for user_profile and similar multi-field steps) + const [formValues, setFormValues] = useState>({}) + // Picture upload state (for image_upload fields) + const [pictureUploading, setPictureUploading] = useState(false) + const [pictureError, setPictureError] = useState(null) + const pictureInputRef = useRef(null) + + // Reset picture-upload feedback when transitioning between steps + useEffect(() => { + setPictureUploading(false) + setPictureError(null) + }, [onboardingStep?.name]) + + // Clear uploading spinner once the context reflects the new picture + useEffect(() => { + if (pictureUploading) { + setPictureUploading(false) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [agentProfilePictureUrl]) + + // Safety: clear the spinner after a short timeout even if no ack arrives + // (e.g., on a failed upload that did not update the context URL). + useEffect(() => { + if (!pictureUploading) return + const t = window.setTimeout(() => setPictureUploading(false), 10000) + return () => window.clearTimeout(t) + }, [pictureUploading]) // Request first step when connected useEffect(() => { @@ -353,7 +387,17 @@ export function OnboardingPage() { if (onboardingStep) { setOllamaConnected(false) - if (onboardingStep.name === 'mcp' || onboardingStep.name === 'skills') { + // Form step (e.g., user_profile, agent_name) + // Preserve existing values when navigating back — only set defaults for missing fields + if (onboardingStep.form_fields && onboardingStep.form_fields.length > 0) { + setFormValues(prev => { + const defaults: Record = {} + for (const field of onboardingStep.form_fields) { + defaults[field.name] = prev[field.name] ?? (field.default ?? '') + } + return defaults + }) + } else if (onboardingStep.name === 'mcp' || onboardingStep.name === 'skills') { setSelectedValue(Array.isArray(onboardingStep.default) ? onboardingStep.default : []) } else if (onboardingStep.options.length > 0) { const defaultOption = onboardingStep.options.find(opt => opt.default) @@ -378,6 +422,46 @@ export function OnboardingPage() { setOllamaConnected(true) }, []) + const handlePictureSelect = useCallback(() => { + pictureInputRef.current?.click() + }, []) + + const handlePictureChange = useCallback( + (e: React.ChangeEvent, fieldName: string) => { + const file = e.target.files?.[0] + e.target.value = '' + if (!file) return + + setPictureError(null) + setPictureUploading(true) + + const reader = new FileReader() + reader.onload = () => { + const result = reader.result as string + const base64 = result.includes(',') ? result.split(',', 2)[1] : result + // Mark this form field as "has picture" using the file extension + const ext = (file.name.split('.').pop() || '').toLowerCase() + setFormValues(prev => ({ ...prev, [fieldName]: ext })) + uploadAgentProfilePicture(file.name, file.type || 'application/octet-stream', base64) + } + reader.onerror = () => { + setPictureUploading(false) + setPictureError('Could not read file') + } + reader.readAsDataURL(file) + }, + [uploadAgentProfilePicture] + ) + + const handlePictureRemove = useCallback( + (fieldName: string) => { + setPictureError(null) + setFormValues(prev => ({ ...prev, [fieldName]: '' })) + removeAgentProfilePicture() + }, + [removeAgentProfilePicture] + ) + const handleOptionSelect = useCallback((value: string) => { if (!onboardingStep) return if (onboardingStep.name === 'mcp' || onboardingStep.name === 'skills') { @@ -396,18 +480,21 @@ export function OnboardingPage() { if (isOllamaStep) { submitOnboardingStep(ollamaUrl) + } else if (onboardingStep.form_fields && onboardingStep.form_fields.length > 0) { + submitOnboardingStep(formValues) } else if (onboardingStep.options.length > 0) { submitOnboardingStep(selectedValue) } else { submitOnboardingStep(textValue) } - }, [onboardingStep, selectedValue, textValue, ollamaUrl, submitOnboardingStep]) + }, [onboardingStep, selectedValue, textValue, ollamaUrl, formValues, submitOnboardingStep]) const handleSkip = useCallback(() => skipOnboardingStep(), [skipOnboardingStep]) const handleBack = useCallback(() => goBackOnboardingStep(), [goBackOnboardingStep]) const isMultiSelect = onboardingStep?.name === 'mcp' || onboardingStep?.name === 'skills' - const isWideStep = isMultiSelect + const isFormStep = !!(onboardingStep?.form_fields && onboardingStep.form_fields.length > 0) + const isWideStep = isMultiSelect || isFormStep const isLastStep = onboardingStep ? onboardingStep.index === onboardingStep.total - 1 : false const isOllamaStep = @@ -419,6 +506,7 @@ export function OnboardingPage() { if (isOllamaStep) { return ollamaConnected || (localLLM.phase === 'connected' && !!localLLM.testResult?.success) } + if (isFormStep) return true // All form fields are optional if (onboardingStep.options.length > 0) { return isMultiSelect ? true : !!selectedValue } @@ -457,6 +545,237 @@ export function OnboardingPage() { ) } + // Agent Identity step — compact side-by-side layout (avatar + name) + if ( + onboardingStep.name === 'agent_name' && + onboardingStep.form_fields && + onboardingStep.form_fields.length > 0 + ) { + const nameField = onboardingStep.form_fields.find(f => f.field_type === 'text') + const avatarField = onboardingStep.form_fields.find(f => f.field_type === 'image_upload') + + return ( +
+
+ {avatarField && ( +
+ + handlePictureChange(e, avatarField.name)} + style={{ display: 'none' }} + /> +
+ )} +
+ {nameField && ( + <> + + + setFormValues((prev) => ({ ...prev, [nameField.name]: e.target.value })) + } + placeholder={nameField.placeholder || 'Enter a name'} + /> + + )} + {avatarField && ( +
+ + {agentProfilePictureHasCustom && ( + + )} +
+ )} + {pictureError && ( +
{pictureError}
+ )} +
+
+
+ ) + } + + // Form step (multi-field form, e.g., user_profile) + if (onboardingStep.form_fields && onboardingStep.form_fields.length > 0) { + return ( +
+
+ {onboardingStep.form_fields.map((field: OnboardingFormField) => ( +
+ + + {field.field_type === 'text' && ( + setFormValues(prev => ({ ...prev, [field.name]: e.target.value }))} + placeholder={field.placeholder || `Enter ${field.label.toLowerCase()}`} + /> + )} + + {field.field_type === 'select' && field.options.length > 20 ? ( + /* Large option list (e.g., languages) — use native dropdown */ + <> + + {field.placeholder && ( +
{field.placeholder}
+ )} + + ) : field.field_type === 'select' ? (() => { + const hasDescriptions = field.options.some(o => o.description && o.description !== o.label) + if (hasDescriptions) { + /* Options with descriptions — vertical stack */ + return ( +
+ {field.options.map(opt => { + const isSelected = formValues[field.name] === opt.value + return ( +
setFormValues(prev => ({ ...prev, [field.name]: opt.value }))} + > +
+ {opt.label} + {opt.description && opt.description !== opt.label && ( + {opt.description} + )} +
+ ) + })} +
+ ) + } + /* Simple options without descriptions — inline row */ + return ( +
+ {field.options.map(opt => { + const isSelected = formValues[field.name] === opt.value + return ( +
setFormValues(prev => ({ ...prev, [field.name]: opt.value }))} + > +
+ {opt.label} +
+ ) + })} +
+ ) + })() : null} + + {field.field_type === 'image_upload' && ( +
+ +
+ handlePictureChange(e, field.name)} + style={{ display: 'none' }} + /> + + {agentProfilePictureHasCustom && ( + + )} +
+ {pictureError && ( +
{pictureError}
+ )} +
+ )} + + {field.field_type === 'multi_checkbox' && ( +
+ {field.options.map(opt => { + const checked = Array.isArray(formValues[field.name]) && + (formValues[field.name] as string[]).includes(opt.value) + return ( +
{ + setFormValues(prev => { + const current = Array.isArray(prev[field.name]) ? (prev[field.name] as string[]) : [] + const updated = current.includes(opt.value) + ? current.filter(v => v !== opt.value) + : [...current, opt.value] + return { ...prev, [field.name]: updated } + }) + }} + > +
+ {checked && } +
+ {opt.label} +
+ ) + })} +
+ )} +
+ ))} +
+
+ ) + } + // Option-based step if (onboardingStep.options.length > 0) { return ( diff --git a/app/ui_layer/browser/frontend/src/pages/Settings/GeneralSettings.tsx b/app/ui_layer/browser/frontend/src/pages/Settings/GeneralSettings.tsx index 82a951b2..a5cca7ff 100644 --- a/app/ui_layer/browser/frontend/src/pages/Settings/GeneralSettings.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Settings/GeneralSettings.tsx @@ -9,6 +9,8 @@ import { Loader2, Download, RefreshCw, + Upload, + Trash2, } from 'lucide-react' import { Button, Badge, ConfirmModal } from '../../components/ui' import { useTheme } from '../../contexts/ThemeContext' @@ -45,7 +47,7 @@ function getInitialAgentName(): string { export function GeneralSettings() { const { send, onMessage, isConnected } = useSettingsWebSocket() - const { version } = useWebSocket() + const { version, agentProfilePictureUrl, agentProfilePictureHasCustom } = useWebSocket() const { theme: globalTheme, setTheme: setGlobalTheme } = useTheme() const [agentName, setAgentName] = useState(getInitialAgentName) const [initialAgentName, setInitialAgentName] = useState(getInitialAgentName) @@ -56,6 +58,21 @@ export function GeneralSettings() { const [isSaving, setIsSaving] = useState(false) const [saveStatus, setSaveStatus] = useState<'idle' | 'success' | 'error'>('idle') + // Agent profile picture + const [profilePictureUrl, setProfilePictureUrl] = useState(agentProfilePictureUrl) + const [hasCustomPicture, setHasCustomPicture] = useState(agentProfilePictureHasCustom) + const [pictureError, setPictureError] = useState(null) + const [isUploadingPicture, setIsUploadingPicture] = useState(false) + const pictureInputRef = useRef(null) + + // Keep local preview in sync with the central context value (e.g. after reconnect) + useEffect(() => { + setProfilePictureUrl(agentProfilePictureUrl) + }, [agentProfilePictureUrl]) + useEffect(() => { + setHasCustomPicture(agentProfilePictureHasCustom) + }, [agentProfilePictureHasCustom]) + // Agent file states const [userMdContent, setUserMdContent] = useState('') const [originalUserMdContent, setOriginalUserMdContent] = useState('') @@ -134,10 +151,45 @@ export function GeneralSettings() { // Set up message handlers const cleanups = [ onMessage('settings_get', (data: unknown) => { - const d = data as { success: boolean; settings?: { agentName: string; theme: string } } + const d = data as { + success: boolean + settings?: { + agentName: string + theme: string + agentProfilePictureUrl?: string + agentProfilePictureHasCustom?: boolean + } + } if (d.success && d.settings) { setAgentName(d.settings.agentName) setTheme(d.settings.theme) + if (d.settings.agentProfilePictureUrl) { + setProfilePictureUrl(d.settings.agentProfilePictureUrl) + } + if (typeof d.settings.agentProfilePictureHasCustom === 'boolean') { + setHasCustomPicture(d.settings.agentProfilePictureHasCustom) + } + } + }), + onMessage('agent_profile_picture_upload', (data: unknown) => { + const d = data as { success: boolean; url?: string; has_custom?: boolean; error?: string } + setIsUploadingPicture(false) + if (d.success && d.url) { + setProfilePictureUrl(d.url) + setHasCustomPicture(d.has_custom ?? true) + setPictureError(null) + } else { + setPictureError(d.error || 'Upload failed') + } + }), + onMessage('agent_profile_picture_remove', (data: unknown) => { + const d = data as { success: boolean; url?: string; has_custom?: boolean; error?: string } + if (d.success) { + setProfilePictureUrl(d.url || '/api/agent-profile-picture') + setHasCustomPicture(d.has_custom ?? false) + setPictureError(null) + } else { + setPictureError(d.error || 'Remove failed') } }), onMessage('settings_update', (data: unknown) => { @@ -292,6 +344,41 @@ export function GeneralSettings() { setTimeout(() => setSaveStatus('idle'), 3000) } + const handlePictureSelect = () => { + pictureInputRef.current?.click() + } + + const handlePictureChange = (e: React.ChangeEvent) => { + const file = e.target.files?.[0] + e.target.value = '' // allow re-selecting the same file later + if (!file) return + + setPictureError(null) + setIsUploadingPicture(true) + + const reader = new FileReader() + reader.onload = () => { + const result = reader.result as string + // Strip data URL prefix → raw base64 + const base64 = result.includes(',') ? result.split(',', 2)[1] : result + send('agent_profile_picture_upload', { + name: file.name, + mimeType: file.type || 'application/octet-stream', + content: base64, + }) + } + reader.onerror = () => { + setIsUploadingPicture(false) + setPictureError('Could not read file') + } + reader.readAsDataURL(file) + } + + const handlePictureRemove = () => { + setPictureError(null) + send('agent_profile_picture_remove') + } + const handleReset = () => { confirm({ title: 'Reset Agent', @@ -384,6 +471,58 @@ export function GeneralSettings() {
+
+ +
+ Agent avatar +
+ + + {hasCustomPicture && ( + + )} +
+
+ + Shown next to agent messages in chat. PNG/JPG/WEBP/GIF, max 5 MB. + + {pictureError && ( + + {pictureError} + + )} +
+
([]) @@ -150,6 +153,12 @@ export function SkillsSettings() { showToast('error', d.error || 'Failed to get skill info') } }), + onMessage('skill_run', (data: unknown) => { + const d = data as { success: boolean; name?: string; error?: string } + if (!d.success) { + showToast('error', d.error || 'Failed to run skill') + } + }), ] send('skill_list') @@ -185,6 +194,12 @@ export function SkillsSettings() { send('skill_info', { name }) } + const handleRunSkill = (name: string) => { + send('skill_run', { name }) + setViewingSkill(null) + navigate('/chat') + } + const handleInstallSkill = () => { const source = installSource.trim() if (!source) { @@ -533,6 +548,15 @@ export function SkillsSettings() { + {viewingSkill.enabled && ( + + )}