CraftOS-dev · ahmad-ajmal · Apr 17, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 14, 2026
diff --git a/agent_core/core/credentials/oauth_server.py b/agent_core/core/credentials/oauth_server.py
@@ -22,6 +22,7 @@
 """
 
 import asyncio
+import html
 import ipaddress
 import logging
 import os
@@ -120,10 +121,20 @@ class _OAuthCallbackHandler(BaseHTTPRequestHandler):
         def do_GET(self):
             """Handle GET request from OAuth callback."""
             params = parse_qs(urlparse(self.path).query)
-            result_holder["code"] = params.get("code", [None])[0]
-            result_holder["state"] = params.get("state", [None])[0]
+            returned_state = params.get("state", [None])[0]
             result_holder["error"] = params.get("error", [None])[0]
 
+            # Validate OAuth state parameter to prevent CSRF
+            expected_state = result_holder.get("expected_state")
+            if expected_state and returned_state != expected_state:
+                result_holder["error"] = "OAuth state mismatch — possible CSRF attack"
+                result_holder["code"] = None
+                logger.warning("[OAUTH] State mismatch: expected %s, got %s", expected_state, returned_state)
+            else:
+                result_holder["code"] = params.get("code", [None])[0]
+
+            result_holder["state"] = returned_state
+
             self.send_response(200)
             self.send_header("Content-Type", "text/html")
             self.end_headers()
@@ -132,8 +143,9 @@ def do_GET(self):
                     b"<h2>Authorization successful!</h2><p>You can close this tab.</p>"
                 )
             else:
+                safe_error = html.escape(str(result_holder.get('error') or 'Unknown error'))
                 self.wfile.write(
-                    f"<h2>Failed</h2><p>{result_holder['error']}</p>".encode()
+                    f"<h2>Failed</h2><p>{safe_error}</p>".encode()
                 )
 
         def log_message(self, format, *args):
@@ -203,8 +215,12 @@ def run_oauth_flow(
     if cancel_event and cancel_event.is_set():
         return None, "OAuth cancelled"
 
+    # Extract the state parameter from the auth URL for CSRF validation
+    auth_params = parse_qs(urlparse(auth_url).query)
+    expected_state = auth_params.get("state", [None])[0]
+
     # Use instance-level result holder instead of class-level state
-    result_holder: Dict[str, Any] = {"code": None, "state": None, "error": None}
+    result_holder: Dict[str, Any] = {"code": None, "state": None, "error": None, "expected_state": expected_state}
     handler_class = _make_callback_handler(result_holder)
 
     try:

diff --git a/agent_core/core/impl/action/manager.py b/agent_core/core/impl/action/manager.py
@@ -32,6 +32,15 @@
 
 nest_asyncio.apply()
 
+
+def _to_pretty_json(value: Any) -> str:
+    """Serialize a value to pretty-printed JSON for readable logs and event streams."""
+    try:
+        return json.dumps(value, indent=2, ensure_ascii=False, default=str)
+    except (TypeError, ValueError):
+        return str(value)
+
+
 # Type aliases for hooks
 OnActionStartHook = Callable[[str, Any, Dict, str, str], Any]  # (run_id, action, inputs, parent_id, started_at) -> awaitable
 OnActionEndHook = Callable[[str, Any, Dict, str, str, str], Any]  # (run_id, action, outputs, status, parent_id, ended_at) -> awaitable
@@ -205,10 +214,11 @@ async def execute_action(
         # Log to event stream
         # Only pass session_id when is_running_task=True (task stream exists)
         # When no task exists, use global stream by not passing task_id
+        pretty_input = _to_pretty_json(input_data)
         self._log_event_stream(
             is_gui_task=is_gui_task,
             event_type="action_start",
-            event=f"Running action {action.name} with input: {input_data}.",
+            event=f"Running action {action.name} with input: {pretty_input}.",
             display_message=f"Running {action.display_name}",
             action_name=action.name,
             session_id=session_id if is_running_task else None,
@@ -293,10 +303,11 @@ async def execute_action(
         # Only pass session_id when is_running_task=True (task stream exists)
         output_has_error = outputs and outputs.get("status") == "error"
         display_status = "failed" if (status == "error" or output_has_error) else "completed"
+        pretty_output = _to_pretty_json(outputs)
         self._log_event_stream(
             is_gui_task=is_gui_task,
             event_type="action_end",
-            event=f"Action {action.name} completed with output: {outputs}.",
+            event=f"Action {action.name} completed with output: {pretty_output}.",
             display_message=f"{action.display_name} → {display_status}",
             action_name=action.name,
             session_id=session_id if is_running_task else None,

diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py
@@ -217,11 +217,17 @@ def reinitialize(
             target_base_url = base_url
 
         try:
-            logger.info(f"[LLM] Reinitializing with provider: {target_provider}")
+            from app.config import get_llm_model as _get_llm_model  # type: ignore[import]
+            target_model = _get_llm_model()
+        except Exception:
+            target_model = None  # app context not available (e.g. agent_core standalone)
+
+        try:
+            logger.info(f"[LLM] Reinitializing with provider: {target_provider}, model: {target_model or 'registry default'}")
             ctx = ModelFactory.create(
                 provider=target_provider,
                 interface=InterfaceType.LLM,
-                model_override=None,
+                model_override=target_model,
                 api_key=target_api_key,
                 base_url=target_base_url,
                 deferred=False,
@@ -261,6 +267,16 @@ def reinitialize(
             else:
                 self._gemini_cache_manager = None
 
+            # Reset consecutive failure counter — a config change is an explicit
+            # user-initiated retry signal. Without this, a prior run that hit the
+            # failure threshold would continue to abort even with the new config.
+            if self._consecutive_failures > 0:
+                logger.info(
+                    f"[LLM] Resetting consecutive failure counter on reinitialize "
+                    f"(was {self._consecutive_failures})"
+                )
+                self._consecutive_failures = 0
+
             logger.info(f"[LLM] Reinitialized successfully with provider: {self.provider}, model: {self.model}")
             return self._initialized
         except EnvironmentError as e:
@@ -1149,9 +1165,22 @@ def _generate_openai(
                 "model": self.model,
                 "messages": messages,
                 "temperature": self.temperature,
-                "max_tokens": self.max_tokens,
             }
 
+            # Newer OpenAI models (o1, o3, o4, gpt-5, etc.) require
+            # 'max_completion_tokens' instead of the legacy 'max_tokens' parameter.
+            model_lower = (self.model or "").lower()
+            uses_max_completion_tokens = (
+                model_lower.startswith("o1")
+                or model_lower.startswith("o3")
+                or model_lower.startswith("o4")
+                or model_lower.startswith("gpt-5")
+            )
+            if uses_max_completion_tokens:
+                request_kwargs["max_completion_tokens"] = self.max_tokens
+            else:
+                request_kwargs["max_tokens"] = self.max_tokens
+
             # Always enforce JSON output format
             request_kwargs["response_format"] = {"type": "json_object"}
 

diff --git a/agent_core/core/impl/onboarding/config.py b/agent_core/core/impl/onboarding/config.py
@@ -28,28 +28,21 @@ def _get_config_file() -> Path:
 
 # Hard onboarding steps configuration
 # Each step has: id, required (must complete), title (display name)
-# Note: User name is collected during soft onboarding (conversational interview)
+# User profile (name, location, language, tone, etc.) is collected in the
+# user_profile form step during hard onboarding.
 HARD_ONBOARDING_STEPS = [
     {"id": "provider", "required": True, "title": "LLM Provider"},
     {"id": "api_key", "required": True, "title": "API Key"},
     {"id": "agent_name", "required": False, "title": "Agent Name"},
+    {"id": "user_profile", "required": False, "title": "User Profile"},
     {"id": "mcp", "required": False, "title": "MCP Servers"},
     {"id": "skills", "required": False, "title": "Skills"},
 ]
 
 # Soft onboarding interview questions template
-# Questions are grouped to reduce conversation turns
+# Identity/preferences are now collected in hard onboarding.
+# Soft onboarding focuses on job/role and deep life goals exploration.
 SOFT_ONBOARDING_QUESTIONS = [
-    # Batch 1: Identity (asked together)
-    "name",                        # What should I call you?
     "job",                         # What do you do for work?
-    "location",                    # Where are you located? (timezone inferred from this)
-    # Batch 2: Preferences (asked together)
-    "tone",                        # How would you like me to communicate?
-    "proactivity",                 # Should I be proactive or wait for instructions?
-    "approval",                    # What actions need your approval?
-    # Batch 3: Messaging
-    "preferred_messaging_platform",  # Where should I send notifications? (telegram/whatsapp/discord/slack/tui)
-    # Batch 4: Life goals
-    "life_goals",                  # What are your life goals and what do you want help with?
+    "life_goals",                  # Deep life goals exploration (multiple rounds)
 ]
diff --git a/agent_core/core/impl/onboarding/manager.py b/agent_core/core/impl/onboarding/manager.py
@@ -86,14 +86,17 @@ def is_complete(self) -> bool:
     def mark_hard_complete(
         self,
         user_name: Optional[str] = None,
-        agent_name: Optional[str] = None
+        agent_name: Optional[str] = None,
+        agent_profile_picture: Optional[str] = None,
     ) -> None:
         """
         Mark hard onboarding as complete.
 
         Args:
             user_name: User's name collected during onboarding
             agent_name: Agent's name configured during onboarding
+            agent_profile_picture: Extension of the uploaded agent profile
+                picture (e.g. "png"). None leaves the current value untouched.
         """
         state = self._ensure_state_loaded()
         state.hard_completed = True
@@ -102,9 +105,15 @@ def mark_hard_complete(
             state.user_name = user_name
         if agent_name:
             state.agent_name = agent_name
+        if agent_profile_picture is not None:
+            state.agent_profile_picture = agent_profile_picture
         save_state(state)
         logger.info("[ONBOARDING] Hard onboarding marked complete")
 
+    def save(self) -> None:
+        """Persist the current state to disk."""
+        save_state(self._ensure_state_loaded())
+
     def mark_soft_complete(self) -> None:
         """Mark soft onboarding as complete."""
         state = self._ensure_state_loaded()

diff --git a/agent_core/core/impl/onboarding/state.py b/agent_core/core/impl/onboarding/state.py
@@ -24,13 +24,16 @@ class OnboardingState:
         soft_completed_at: ISO timestamp when soft onboarding completed
         user_name: User's name collected during onboarding
         agent_name: Agent's name configured during onboarding
+        agent_profile_picture: Extension of the user-uploaded agent profile
+            picture (e.g. "png", "jpg"). None means the bundled default is used.
     """
     hard_completed: bool = False
     soft_completed: bool = False
     hard_completed_at: Optional[str] = None
     soft_completed_at: Optional[str] = None
     user_name: Optional[str] = None
     agent_name: Optional[str] = None
+    agent_profile_picture: Optional[str] = None
 
     @property
     def is_complete(self) -> bool:
@@ -56,6 +59,7 @@ def to_dict(self) -> dict:
             "soft_completed_at": self.soft_completed_at,
             "user_name": self.user_name,
             "agent_name": self.agent_name,
+            "agent_profile_picture": self.agent_profile_picture,
         }
 
     @classmethod
@@ -68,6 +72,7 @@ def from_dict(cls, data: dict) -> "OnboardingState":
             soft_completed_at=data.get("soft_completed_at"),
             user_name=data.get("user_name"),
             agent_name=data.get("agent_name"),
+            agent_profile_picture=data.get("agent_profile_picture"),
         )
 
 

diff --git a/agent_core/core/impl/task/manager.py b/agent_core/core/impl/task/manager.py
@@ -282,6 +282,7 @@ def create_task(
             compiled_actions=compiled_actions,
             selected_skills=selected_skills or [],
             conversation_id=conversation_id,
+            source_platform=original_platform,
         )
 
         self.tasks[task_id] = task

diff --git a/agent_core/core/impl/vlm/interface.py b/agent_core/core/impl/vlm/interface.py
@@ -259,7 +259,7 @@ def describe_image_bytes(
             return cleaned
         except Exception as e:
             logger.error(f"[ERROR] {e}")
-            return ""
+            raise
 
     async def generate_response_async(
         self,
@@ -332,13 +332,29 @@ def _openai_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str)
                 ],
             }
         )
-        response = self.client.chat.completions.create(
-            model=self.model,
-            messages=messages,
-            temperature=self.temperature,
-            max_tokens=2048,
-            response_format={"type": "json_object"},
+        # Newer OpenAI models (o1, o3, o4, gpt-5, etc.) require
+        # 'max_completion_tokens' instead of the legacy 'max_tokens' parameter.
+        # Note: response_format=json_object is intentionally NOT set here because
+        # describe_image returns plain text descriptions, not JSON. Enabling JSON
+        # mode would also require the prompt to contain the word "json".
+        request_kwargs: Dict[str, Any] = {
+            "model": self.model,
+            "messages": messages,
+            "temperature": self.temperature,
+        }
+        model_lower = (self.model or "").lower()
+        uses_max_completion_tokens = (
+            model_lower.startswith("o1")
+            or model_lower.startswith("o3")
+            or model_lower.startswith("o4")
+            or model_lower.startswith("gpt-5")
         )
+        if uses_max_completion_tokens:
+            request_kwargs["max_completion_tokens"] = 2048
+        else:
+            request_kwargs["max_tokens"] = 2048
+
+        response = self.client.chat.completions.create(**request_kwargs)
         content = response.choices[0].message.content.strip()
         token_count_input = response.usage.prompt_tokens
         token_count_output = response.usage.completion_tokens
@@ -451,7 +467,6 @@ def _byteplus_describe_bytes(self, image_bytes: bytes, sys: str | None, usr: str
             "messages": messages,
             "temperature": self.temperature,
             "max_tokens": 2048,
-            "response_format": {"type": "json_object"},
         }
         headers = {
             "Content-Type": "application/json",

diff --git a/agent_core/core/models/model_registry.py b/agent_core/core/models/model_registry.py
@@ -15,8 +15,8 @@
         InterfaceType.EMBEDDING: "text-embedding-004",
     },
     "anthropic": {
-        InterfaceType.LLM: "claude-sonnet-4-6",
-        InterfaceType.VLM: "claude-sonnet-4-6",
+        InterfaceType.LLM: "claude-sonnet-4-5-20250929",
+        InterfaceType.VLM: "claude-sonnet-4-5-20250929",
         InterfaceType.EMBEDDING: None,  # Anthropic does not provide native embedding models
     },
     "byteplus": {
@@ -36,7 +36,7 @@
     },
     "deepseek": {
         InterfaceType.LLM: "deepseek-chat",
-        InterfaceType.VLM: "deepseek-chat",
+        InterfaceType.VLM: None,
         InterfaceType.EMBEDDING: None,
     },
     "moonshot": {
@@ -46,7 +46,7 @@
     },
     "grok": {
         InterfaceType.LLM: "grok-3",
-        InterfaceType.VLM: "grok-2-vision-1212",
+        InterfaceType.VLM: "grok-4-0709",
         InterfaceType.EMBEDDING: None,
     },
 }
diff --git a/agent_core/core/prompts/action.py b/agent_core/core/prompts/action.py
@@ -198,15 +198,16 @@
 - When all todos completed BUT the user sends a NEW question or request, do NOT end the task. Add new todos for the follow-up and continue working.
 - If unrecoverable error, use 'task_end' with status 'abort'.
 - You must provide concrete parameter values for the action's input_schema.
+- When setting wait_for_user_reply=true on a send message action, the message MUST end with an explicit question (e.g., "Does this look good?" or "Would you like any changes?"). The agent will pause and wait for user input — if the message is a statement without a question, the user won't know a reply is expected and the task will hang indefinitely.
 
 File Reading Best Practices:
 - read_file returns content with line numbers in cat -n format
 - For large files, use offset/limit parameters for pagination:
   * Default reads first 2000 lines - check has_more to know if more exists
   * Use offset to skip to specific line numbers
   * Use limit to control how many lines to read
-- To find specific content in large files:
-  1. Use grep_files with keywords to locate relevant sections
+- To find specific content in files:
+  1. Use grep_files with a regex pattern to locate relevant sections (use output_mode='content' for lines with line numbers, or 'files_with_matches' to discover files first)
   2. Note the line numbers from grep results
   3. Use read_file with appropriate offset to read that section
 - DO NOT repeatedly read entire large files - use targeted reading with offset/limit

diff --git a/agent_core/core/prompts/context.py b/agent_core/core/prompts/context.py
@@ -192,6 +192,7 @@
 ENVIRONMENTAL_CONTEXT_PROMPT = """
 <agent_environment>
 - User Location: {user_location}
+- Current Working Directory: {working_directory}
 - Operating System: {operating_system} {os_version} ({os_platform})
 - VM Operating System: {vm_operating_system} {vm_os_version} ({vm_os_platform})
 - VM's screen resolution (GUI mode): {vm_resolution}

diff --git a/agent_core/core/prompts/routing.py b/agent_core/core/prompts/routing.py
@@ -53,8 +53,8 @@
 
 <output_format>
 Return ONLY a valid JSON object:
-- Route to existing: {{ "action": "route", "session_id": "<session_id>", "reason": "<brief>" }}
-- Create new: {{ "action": "new", "session_id": "new", "reason": "<brief>" }}
+- Route to existing: {{ "reason": "<brief>", "action": "route", "session_id": "<session_id>" }}
+- Create new: {{ "reason": "<brief>", "action": "new", "session_id": "new" }}
 </output_format>
 """