CraftOS-dev · zfoong · Jan 28, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026
diff --git a/.env.example b/.env.example
@@ -1,5 +1,6 @@
 OPENAI_API_KEY=
 GOOGLE_API_KEY=
+ANTHROPIC_API_KEY=
 BYTEPLUS_API_KEY=
 REMOTE_MODEL_URL=
 OMNIPARSER_BASE_URL=
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,7 @@ node_modules
 .env
 **/agent_logs.txt
 **/memory.txt
-**/gui/config
+**/gui/config
+.claude/
+OmniParser_CraftOS/
+_launch_agent.cmd
diff --git a/config.json b/config.json
@@ -1,5 +1,3 @@
 {
-    "conda_environment_name": "white-collar-agent",
-    "conda_environment_created": true,
-    "omniparser_repo_path": "/home/ahmad/Work/CraftOS/WhiteCollarAgent/OmniParser_CraftOS"
+    "omniparser_repo_path": "C:\\Users\\zfoong\\Desktop\\agent\\code\\git\\WhiteCollarAgent\\OmniParser_CraftOS"
 }
diff --git a/core/action/action_executor.py b/core/action/action_executor.py
@@ -32,11 +32,86 @@ def _atomic_action_venv_process(
     Executes an action inside an ephemeral virtual environment.
     Runs in a SEPARATE PROCESS.
     """
+    # GUI mode - in a Docker container
+    if mode == "GUI":
+        return GUIHandler.execute_action(GUIHandler.TARGET_CONTAINER, action_code, input_data, mode)
+
+    # Sandboxed mode - NOT in a Docker container
     try:
-        result = GUIHandler.execute_action(GUIHandler.TARGET_CONTAINER, action_code, input_data, mode)
-        return result
+        with tempfile.TemporaryDirectory(prefix="action_venv_") as tmpdir:
+            tmp = Path(tmpdir)
+
+            # ─── Create virtual environment ───
+            venv_dir = tmp / "venv"
+            venv.EnvBuilder(with_pip=True).create(venv_dir)
+
+            python_bin = (
+                venv_dir / "Scripts" / "python.exe"
+                if os.name == "nt"
+                else venv_dir / "bin" / "python"
+            )
+
+            # ─── Write action script ───
+            # We inject input_data as a global so the action code can access it
+            action_file = tmp / "action.py"
+            action_file.write_text(
+                f"""
+import json
+import sys
+
+input_data = json.loads({json.dumps(json.dumps(input_data))})
+
+# ─── USER CODE ───
+{action_code}
+
+# ─── Find and call the function ───
+func = None
+local_vars = dict(locals())
+for name, obj in local_vars.items():
+    if callable(obj) and not name.startswith('_') and name not in ('input_data', 'json', 'sys'):
+        func = obj
+        break
+
+if func is None:
+    # Fallback: check if output variable was set (legacy behavior)
+    if 'output' in local_vars:
+        print(local_vars['output'])
+        sys.exit(0)
+    else:
+        sys.exit(1)
+
+# Call the function and print result as JSON
+try:
+    result = func(input_data)
+    if isinstance(result, dict):
+        print(json.dumps(result, ensure_ascii=False))
+    else:
+        print(str(result))
+except Exception as e:
+    import traceback
+    print("Execution failed: " + str(e) + "\\n" + traceback.format_exc(), file=sys.stderr)
+    sys.exit(1)
+""",
+                encoding="utf-8",
+            )
+
+            proc = subprocess.run(
+                [python_bin, str(action_file)],
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+            )
+
+            return {
+                "stdout": proc.stdout.strip(),
+                "stderr": proc.stderr.strip(),
+                "returncode": proc.returncode,
+            }
+
+    except subprocess.TimeoutExpired:
+        return {"stdout": "", "stderr": "Execution timed out", "returncode": -1}
     except Exception as e:
-        return {"status": "error", "message": str(e)}
+        return {"stdout": "", "stderr": f"Execution failed: {e}", "returncode": -1}
 
 def _atomic_action_internal(
     action_name: str,

diff --git a/core/action/action_manager.py b/core/action/action_manager.py
@@ -159,7 +159,7 @@ async def execute_action(
             self._log_event_stream(
                 is_gui_task=is_gui_task,
                 event_type="action_start",
-                event=f"Running action {action.name} with input: {input_data}. {context if context else ''}",
+                event=f"Running action {action.name} with input: {input_data}.",
                 display_message=f"Running {action.name}",
                 action_name=action.name,
             )
@@ -239,7 +239,7 @@ async def execute_action(
             self._log_event_stream(
                 is_gui_task=is_gui_task,
                 event_type="action_end",
-                event=f"Action {action.name} completed with output: {outputs}. {context if context else ''}",
+                event=f"Action {action.name} completed with output: {outputs}.",
                 display_message=f"{action.name} → {display_status}",
                 action_name=action.name,
             )

diff --git a/core/agent_base.py b/core/agent_base.py
@@ -86,7 +86,8 @@ def __init__(
         *,
         data_dir: str = "core/data",
         chroma_path: str = "./chroma_db",
-        llm_provider: str = "byteplus",
+        llm_provider: str = "anthropic",
+        deferred_init: bool = False,
     ) -> None:
         """
         This constructor that initializes all agent components.
@@ -98,16 +99,22 @@ def __init__(
                 RAG components.
             llm_provider: Provider name passed to :class:`LLMInterface` and
                 :class:`VLMInterface`.
-        """        
-
+            deferred_init: If True, allow LLM/VLM initialization to be deferred
+                until API key is configured (useful for first-time setup).
+        """
+
         # persistence & memory
         self.db_interface = self._build_db_interface(
             data_dir = data_dir, chroma_path=chroma_path
         )
 
-        # LLM + prompt plumbing
-        self.llm = LLMInterface(provider=llm_provider, db_interface=self.db_interface)
-        self.vlm = VLMInterface(provider=llm_provider)
+        # LLM + prompt plumbing (may be deferred if API key not yet configured)
+        self.llm = LLMInterface(
+            provider=llm_provider,
+            db_interface=self.db_interface,
+            deferred=deferred_init,
+        )
+        self.vlm = VLMInterface(provider=llm_provider, deferred=deferred_init)
 
         self.event_stream_manager = EventStreamManager(self.llm)
 
@@ -744,6 +751,42 @@ def _parse_reasoning_response(self, response: str) -> ReasoningResult:
             action_query=action_query,
         )
 
+    # =====================================
+    # Initialization
+    # =====================================
+
+    def reinitialize_llm(self, provider: str | None = None) -> bool:
+        """Reinitialize LLM and VLM interfaces with updated configuration.
+
+        Call this after updating environment variables with new API keys.
+
+        Args:
+            provider: Optional provider to switch to. If None, uses current provider.
+
+        Returns:
+            True if both LLM and VLM were initialized successfully.
+        """
+        llm_ok = self.llm.reinitialize(provider)
+        vlm_ok = self.vlm.reinitialize(provider)
+
+        if llm_ok and vlm_ok:
+            logger.info(f"[AGENT] LLM and VLM reinitialized with provider: {self.llm.provider}")
+            # Update GUI module provider if needed
+            if hasattr(self, 'action_library') and hasattr(GUIHandler, 'gui_module'):
+                GUIHandler.gui_module = GUIModule(
+                    provider=self.llm.provider,
+                    action_library=self.action_library,
+                    action_router=self.action_router,
+                    context_engine=self.context_engine,
+                    action_manager=self.action_manager,
+                )
+        return llm_ok and vlm_ok
+
+    @property
+    def is_llm_initialized(self) -> bool:
+        """Check if the LLM interface is properly initialized."""
+        return self.llm.is_initialized
+
     # =====================================
     # Lifecycle
     # =====================================

diff --git a/core/data/action/end task.py b/core/data/action/end task.py
@@ -0,0 +1,73 @@
+from core.action.action_framework.registry import action
+
+
+@action(
+    name="end task",
+    description=(
+        "End the current task for this session with a final status. "
+        "Use status='complete' when the task is fully done, or 'abort' when it "
+        "should be cancelled/failed early. Always provide a brief reason."
+    ),
+    default=True,
+    mode="CLI",
+    input_schema={
+        "status": {
+            "type": "string",
+            "enum": ["complete", "abort"],
+            "example": "complete",
+            "description": "Final status for the task: 'complete' or 'abort'.",
+        },
+        "reason": {
+            "type": "string",
+            "example": "All steps executed successfully.",
+            "description": "Why the task is considered complete or why it should be aborted.",
+        },
+    },
+    output_schema={
+        "status": {
+            "type": "string",
+            "example": "success",
+            "description": "Result of the operation.",
+        },
+        "task_id": {
+            "type": "string",
+            "example": "user_request_1_abc123",
+            "description": "The session/task id affected.",
+        },
+    },
+    test_payload={
+        "status": "complete",
+        "reason": "All steps executed successfully.",
+        "simulated_mode": True,
+    },
+)
+def end_task(input_data: dict) -> dict:
+    import asyncio
+
+    status = (input_data.get("status") or "").strip().lower()
+    reason = input_data.get("reason")
+    simulated_mode = input_data.get("simulated_mode", False)
+
+    if status not in ("complete", "abort"):
+        return {
+            "status": "error",
+            "message": "Invalid status for end task. Use 'complete' or 'abort'.",
+        }
+
+    # In simulated mode, skip the actual interface call for testing
+    if simulated_mode:
+        return {"status": "success", "task_id": "test_task_id"}
+
+    import core.internal_action_interface as iai
+
+    if status == "complete":
+        res = asyncio.run(iai.InternalActionInterface.mark_task_completed(message=reason))
+    else:
+        # Map 'abort' to a cancellation by default
+        res = asyncio.run(iai.InternalActionInterface.mark_task_cancel(reason=reason))
+
+    if isinstance(res, dict) and res.get("status") == "ok":
+        res["status"] = "success"
+
+    return res
+
diff --git a/core/data/action/mark task cancel.py b/core/data/action/mark task cancel.py
diff --git a/core/data/action/mark task completed.py b/core/data/action/mark task completed.py