diff --git a/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py b/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py
index d90466871..5a1154648 100644
--- a/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py
+++ b/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py
@@ -156,6 +156,14 @@ def initialize(self, session_id: str, **kwargs: Any) -> None:  # type: ignore[ov
             return
         try:
             self._bridge = MemosBridgeClient()
+            # Register the fallback LLM handler BEFORE we open the
+            # session so it is available the very first time the
+            # plugin's facade asks for help (e.g. on the first
+            # `turn.start` retrieval call).
+            self._bridge.register_host_handler(
+                "host.llm.complete",
+                self._handle_host_llm_complete,
+            )
             self._open_session(session_id)
             logger.info(
                 "MemOS: bridge ready session=%s platform=%s (episode deferred)",
@@ -713,9 +721,7 @@ def handle_tool_call(self, tool_name: str, args: dict[str, Any], **_kwargs: Any)
                 elif kind == "policy":
                     body = self._clip(
                         "\n\n".join(
-                            part
-                            for part in [item.get("title"), item.get("procedure")]
-                            if part
+                            part for part in [item.get("title"), item.get("procedure")] if part
                         )
                     )
                     meta = {
@@ -905,6 +911,148 @@ def shutdown(self) -> None:  # type: ignore[override]
         # as a daemon if its viewer is running, so the memory panel
         # remains accessible between `hermes chat` sessions.
 
+    # ─── Host LLM bridge (fallback for plugin-side model failures) ────────
+
+    def _handle_host_llm_complete(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Run a fallback LLM call using the host (hermes) agent's models.
+
+        Wired into the bridge's reverse-RPC channel under the
+        ``host.llm.complete`` method. Triggered when the plugin's
+        configured summary or skill-evolver model fails — instead of
+        bubbling the error straight up (which would stall the V7
+        capture / reflection / skill pipeline), we replay the prompt
+        through ``agent.auxiliary_client.call_llm`` so hermes' own
+        provider stack (including its OpenRouter / Codex / custom
+        endpoint resolution) handles it.
+
+        If the host LLM also fails this raises, the bridge converts
+        that into a JSON-RPC error, the LlmClient ``markFail``s, and
+        the Overview card flips red — exactly matching the spec
+        "if the agent's main model is also down, stop falling back
+        and surface red".
+        """
+        messages = params.get("messages")
+        if not isinstance(messages, list) or not messages:
+            raise ValueError("host.llm.complete: missing messages")
+
+        # Lazy imports — these pull in heavy deps (openai client,
+        # credential pool, …) that we don't want to load until a
+        # fallback is actually requested.
+        try:
+            from agent.auxiliary_client import call_llm  # type: ignore[import-not-found]
+            from hermes_cli.runtime_provider import (  # type: ignore[import-not-found]
+                resolve_runtime_provider,
+            )
+        except Exception as err:
+            raise RuntimeError(f"host LLM bridge unavailable: {err}") from err
+
+        # Resolve hermes' MAIN conversation provider so the fallback
+        # uses exactly what the user configured for chat. Walking the
+        # generic auxiliary auto-detect chain would otherwise depend
+        # on env vars (`OPENROUTER_API_KEY`, `OPENAI_API_KEY`, …) that
+        # often don't propagate into the bridge subprocess and would
+        # leave us with no working credential. Pinning to the resolved
+        # main runtime guarantees we hit the same endpoint the user
+        # already authenticated for chat.
+        try:
+            runtime = resolve_runtime_provider()
+        except Exception as err:
+            raise RuntimeError(f"could not resolve hermes main runtime: {err}") from err
+
+        main_runtime: dict[str, str] = {}
+        for field in ("provider", "model", "base_url", "api_key", "api_mode"):
+            value = runtime.get(field) if isinstance(runtime, dict) else None
+            if isinstance(value, str) and value.strip():
+                main_runtime[field] = value.strip()
+
+        normalized = [
+            {
+                "role": str(m.get("role", "user")),
+                "content": str(m.get("content", "")),
+            }
+            for m in messages
+            if isinstance(m, dict)
+        ]
+        timeout_ms = params.get("timeoutMs")
+        timeout_s: float | None = None
+        if isinstance(timeout_ms, (int, float)) and timeout_ms > 0:
+            timeout_s = float(timeout_ms) / 1000.0
+
+        max_tokens = params.get("maxTokens")
+        temperature = params.get("temperature")
+
+        kwargs: dict[str, Any] = {
+            "messages": normalized,
+            # `main_runtime` makes `_resolve_auto` prefer the user's
+            # main conversation provider + model over the generic auto
+            # chain. If the user's main provider is also down,
+            # `call_llm` raises — which is exactly the "agent's own
+            # model is broken too, stop falling back" semantic we want
+            # (red light on Overview).
+            "main_runtime": main_runtime,
+        }
+        if isinstance(max_tokens, (int, float)) and max_tokens > 0:
+            kwargs["max_tokens"] = int(max_tokens)
+        if isinstance(temperature, (int, float)):
+            kwargs["temperature"] = float(temperature)
+        if timeout_s is not None:
+            kwargs["timeout"] = timeout_s
+
+        started = time.time()
+        try:
+            response = call_llm(**kwargs)
+        except Exception as err:
+            # Surface the original failure verbatim — the LlmClient
+            # will tag this as a "host fallback failed" terminal error
+            # and the Overview red-light path takes over.
+            raise RuntimeError(f"host LLM call failed: {err}") from err
+
+        # `call_llm` returns an OpenAI ChatCompletion-shaped object.
+        # Pluck the assistant text + token usage defensively so a
+        # non-standard host (e.g. Anthropic native) still produces a
+        # populated response.
+        text = ""
+        model = ""
+        usage_dict: dict[str, int] = {}
+        try:
+            choices = getattr(response, "choices", None) or response.get("choices", [])  # type: ignore[union-attr]
+            if choices:
+                first = choices[0]
+                msg = getattr(first, "message", None) or first.get("message", {})  # type: ignore[union-attr]
+                content = getattr(msg, "content", None) or msg.get("content", "")  # type: ignore[union-attr]
+                text = str(content or "")
+            model = (
+                getattr(response, "model", None)
+                or response.get("model", "")  # type: ignore[union-attr]
+                or ""
+            )
+            u = getattr(response, "usage", None) or response.get("usage", None)  # type: ignore[union-attr]
+            if u is not None:
+                pt = getattr(u, "prompt_tokens", None)
+                ct = getattr(u, "completion_tokens", None)
+                tt = getattr(u, "total_tokens", None)
+                if pt is None and isinstance(u, dict):
+                    pt = u.get("prompt_tokens")
+                    ct = u.get("completion_tokens")
+                    tt = u.get("total_tokens")
+                if isinstance(pt, int):
+                    usage_dict["promptTokens"] = pt
+                if isinstance(ct, int):
+                    usage_dict["completionTokens"] = ct
+                if isinstance(tt, int):
+                    usage_dict["totalTokens"] = tt
+        except Exception:
+            logger.debug("host.llm.complete: shape parse failed", exc_info=True)
+
+        result: dict[str, Any] = {
+            "text": text,
+            "model": str(model or ""),
+            "durationMs": int((time.time() - started) * 1000),
+        }
+        if usage_dict:
+            result["usage"] = usage_dict
+        return result
+
     # ─── Internals ────────────────────────────────────────────────────────
 
     def _open_session(self, session_id: str = "") -> None:
@@ -928,11 +1076,7 @@ def _is_transport_closed(self, err: Exception) -> bool:
         if isinstance(err, BridgeError) and err.code == "transport_closed":
             return True
         msg = str(err).lower()
-        return (
-            "broken pipe" in msg
-            or "bridge closed" in msg
-            or "transport_closed" in msg
-        )
+        return "broken pipe" in msg or "bridge closed" in msg or "transport_closed" in msg
 
     def _reconnect_bridge(self, session_id: str = "") -> None:
         old_bridge = self._bridge
diff --git a/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py b/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py
index 0810c6560..9d82a2264 100644
--- a/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py
+++ b/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py
@@ -66,6 +66,14 @@ def __init__(
         self._pending: dict[int, dict[str, Any]] = {}
         self._events: list[Callable[[dict[str, Any]], None]] = []
         self._logs: list[Callable[[dict[str, Any]], None]] = []
+        # Reverse-direction handlers: the bridge can send us a
+        # JSON-RPC request via `serverRequest(...)` (e.g.
+        # `host.llm.complete` for fallback LLM calls). Registered
+        # methods run on the dedicated reader thread; long-running
+        # work should spawn its own worker if it needs to. Each
+        # handler returns a JSON-serialisable value or raises to
+        # surface a JSON-RPC error back to the bridge.
+        self._host_handlers: dict[str, Callable[[dict[str, Any]], Any]] = {}
         self._closed = False
 
         node = node_binary or shutil.which("node") or "node"
@@ -173,6 +181,22 @@ def on_event(self, cb: Callable[[dict[str, Any]], None]) -> None:
     def on_log(self, cb: Callable[[dict[str, Any]], None]) -> None:
         self._logs.append(cb)
 
+    def register_host_handler(
+        self,
+        method: str,
+        handler: Callable[[dict[str, Any]], Any],
+    ) -> None:
+        """Register a handler for bridge → adapter (reverse) requests.
+
+        The Node-side bridge calls these via ``stdio.serverRequest``.
+        Most-recent registration wins. The handler runs on the reader
+        thread; if it blocks for a long time it stalls every other
+        bridge → adapter notification, so handlers that need to do
+        heavy work (e.g. an LLM call) are still expected to return
+        within the bridge-side timeout (default 60 s).
+        """
+        self._host_handlers[method] = handler
+
     def close(self) -> None:
         if self._closed:
             return
@@ -225,6 +249,68 @@ def _read_loop(self) -> None:
                     except Exception:
                         logger.debug("log listener threw", exc_info=True)
                 continue
+            # Reverse-direction request: the bridge is asking the
+            # adapter to do something (e.g. run a fallback LLM call
+            # via `host.llm.complete`). Dispatch to the registered
+            # handler and write the response back synchronously.
+            method = msg.get("method")
+            rpc_id = msg.get("id")
+            if (
+                isinstance(method, str)
+                and rpc_id is not None
+                and "result" not in msg
+                and "error" not in msg
+            ):
+                handler = self._host_handlers.get(method)
+                if handler is None:
+                    self._send_response(
+                        rpc_id,
+                        error={
+                            "code": -32601,
+                            "message": f"method not found: {method}",
+                            "data": {"code": "unknown_method"},
+                        },
+                    )
+                    continue
+                params = msg.get("params") or {}
+                if not isinstance(params, dict):
+                    params = {}
+                try:
+                    result = handler(params)
+                    self._send_response(rpc_id, result=result)
+                except Exception as err:
+                    logger.warning("host handler %s failed: %s", method, err)
+                    self._send_response(
+                        rpc_id,
+                        error={
+                            "code": -32000,
+                            "message": str(err) or err.__class__.__name__,
+                            "data": {"code": "host_handler_failed"},
+                        },
+                    )
+                continue
+
+    def _send_response(
+        self,
+        rpc_id: Any,
+        *,
+        result: Any = None,
+        error: dict[str, Any] | None = None,
+    ) -> None:
+        """Write a JSON-RPC response for a reverse-direction request."""
+        if self._closed:
+            return
+        payload: dict[str, Any] = {"jsonrpc": "2.0", "id": rpc_id}
+        if error is not None:
+            payload["error"] = error
+        else:
+            payload["result"] = result
+        with self._lock:
+            try:
+                self._proc.stdin.write(json.dumps(payload, ensure_ascii=False) + "\n")
+                self._proc.stdin.flush()
+            except (BrokenPipeError, OSError):
+                pass
 
     def _stderr_loop(self) -> None:
         assert self._proc.stderr is not None
diff --git a/apps/memos-local-plugin/agent-contract/memory-core.ts b/apps/memos-local-plugin/agent-contract/memory-core.ts
index caf5c7e31..bb4525207 100644
--- a/apps/memos-local-plugin/agent-contract/memory-core.ts
+++ b/apps/memos-local-plugin/agent-contract/memory-core.ts
@@ -57,17 +57,39 @@ export interface CoreHealth {
 
 /**
  * Per-model connectivity summary used by the viewer's Overview page.
- * `available` reflects whether the client is configured (non-null);
- * `lastOkAt` / `lastError` reflect the most recent **actual** call
- * outcome tracked by the runtime. A green dot means "called successfully
- * at least once and the last call was good"; red means "most recent
- * call failed" + the error text to surface in a tooltip.
+ *
+ * The viewer renders the slot in one of four colours, picked by
+ * comparing the timestamps below — most-recent event wins:
+ *
+ *   - **green** (`ok`)        : `lastOkAt` is the latest stamp →
+ *     primary provider answered directly.
+ *   - **yellow** (`fallback`) : `lastFallbackAt` is the latest stamp →
+ *     primary provider failed *but* the host LLM bridge rescued the
+ *     call. Only ever set on the LLM / skillEvolver slots; the
+ *     embedder has no fallback path so this stays `null`.
+ *   - **red** (`err`)         : `lastError.at` is the latest stamp →
+ *     primary provider failed and either no fallback was configured
+ *     or the fallback also failed. The accompanying `message` is
+ *     surfaced verbatim on the card.
+ *   - **idle / off**          : every timestamp is `null` → the
+ *     facade has not been called yet (idle) or no client is
+ *     configured at all (off).
+ *
+ * `lastError` is **sticky** — it is not cleared by a later success.
+ * The viewer's timestamp comparison naturally promotes a fresh
+ * success over a stale failure, while keeping the message available
+ * in case a subsequent failure flips the card back to red.
  */
 export interface ModelHealth {
   available: boolean;
   provider: string;
   model: string;
   lastOkAt: number | null;
+  /**
+   * Latest time the primary provider failed but the host LLM bridge
+   * answered successfully. Always `null` on the embedder slot.
+   */
+  lastFallbackAt: number | null;
   lastError: { at: number; message: string } | null;
 }
 
diff --git a/apps/memos-local-plugin/bridge.cts b/apps/memos-local-plugin/bridge.cts
index 913f10ce6..01b0ae8f2 100644
--- a/apps/memos-local-plugin/bridge.cts
+++ b/apps/memos-local-plugin/bridge.cts
@@ -63,9 +63,73 @@ async function main(): Promise<void> {
   )) as typeof import("./server/http.js");
 
   const pkgVersion = require("./package.json").version;
+
+  // ─── Host LLM bridge (reverse RPC, lazy-bound to stdio) ────────
+  // We need to register the bridge BEFORE bootstrap creates the
+  // LlmClients (so the very first `shouldFallback()` check sees a
+  // non-null bridge), but `stdio` itself doesn't exist until later
+  // in this function. The trick: hand a placeholder closure to
+  // bootstrap that defers actual stdio access to the time of the
+  // first fallback call. By then `stdio` has been assigned.
+  //
+  // Routing through `bootstrapMemoryCoreFull({ hostLlmBridge })`
+  // (instead of having `bridge.cts` call `registerHostLlmBridge`
+  // directly) avoids a subtle ESM module-identity issue: the static
+  // `import` chain inside `core/llm/client.ts` and the dynamic
+  // `await import(...)` here resolve to the same file URL but Node
+  // can occasionally treat them as different module instances with
+  // independent `currentBridge` slots. Registering inside bootstrap
+  // forces both ends to share the same module instance.
+  let stdio: import("./bridge/stdio.js").StdioServerHandle | null = null;
+  const lazyHostLlmBridge: import("./core/llm/host-bridge.js").HostLlmBridge =
+    {
+      id: `stdio.host.${args.agent}.v1`,
+      async complete(input) {
+        if (!stdio) {
+          throw new Error(
+            "host LLM bridge invoked before stdio server was ready",
+          );
+        }
+        const result = (await stdio.serverRequest(
+          "host.llm.complete",
+          {
+            messages: input.messages,
+            model: input.model,
+            temperature: input.temperature,
+            maxTokens: input.maxTokens,
+            timeoutMs: input.timeoutMs,
+          },
+          { timeoutMs: (input.timeoutMs ?? 60_000) + 5_000 },
+        )) as {
+          text?: string;
+          model?: string;
+          usage?: {
+            promptTokens?: number;
+            completionTokens?: number;
+            totalTokens?: number;
+          };
+          durationMs?: number;
+        };
+        return {
+          text: typeof result?.text === "string" ? result.text : "",
+          model:
+            typeof result?.model === "string"
+              ? result.model
+              : input.model ?? "",
+          usage: result?.usage,
+          durationMs:
+            typeof result?.durationMs === "number" ? result.durationMs : 0,
+        };
+      },
+    };
+
   const { core, config, home } = await bootstrapMemoryCoreFull({
     agent: args.agent,
     pkgVersion,
+    // Skip in daemon mode — daemon has no stdio, so reverse RPC
+    // can't ever fire and registering would just hide the "no
+    // bridge" error message.
+    hostLlmBridge: args.daemon ? null : lazyHostLlmBridge,
   });
   await core.init();
 
@@ -78,38 +142,56 @@ async function main(): Promise<void> {
   // viewer daemon. Used by install.sh (post-install) and admin/restart
   // (self-restart) to keep the Memory Viewer always available.
   if (args.daemon) {
+    // Daemon mode is the target of `POST /api/v1/admin/restart`,
+    // which re-spawns the bridge after a short sleep. On busy
+    // machines the previous bridge's listening socket can take a
+    // moment longer than expected to release, so we retry the bind
+    // a few times before giving up. Without this the user sees
+    // "重启超时" in the viewer because the new daemon raced its
+    // predecessor and lost.
     let viewer: import("./server/types.js").ServerHandle | null = null;
-    try {
-      viewer = await startHttpServer(
-        {
-          core,
-          home,
-          logTail: () => memoryBuffer().tail({ limit: 200 }),
-        },
-        {
-          port: viewerPort,
-          host: config.viewer.bindHost,
-          staticRoot: path.resolve(__dirname, "web/dist"),
-          agent: args.agent,
-        },
-      );
-      process.stderr.write(
-        `bridge: daemon viewer live at ${viewer.url} (agent=${args.agent})\n`,
-      );
-    } catch (err) {
-      const e = err as NodeJS.ErrnoException;
-      if (e?.code === "EADDRINUSE") {
+    const maxBindAttempts = 10;
+    for (let attempt = 1; attempt <= maxBindAttempts; attempt++) {
+      try {
+        viewer = await startHttpServer(
+          {
+            core,
+            home,
+            logTail: () => memoryBuffer().tail({ limit: 200 }),
+          },
+          {
+            port: viewerPort,
+            host: config.viewer.bindHost,
+            staticRoot: path.resolve(__dirname, "web/dist"),
+            agent: args.agent,
+          },
+        );
+        process.stderr.write(
+          `bridge: daemon viewer live at ${viewer.url} (agent=${args.agent})\n`,
+        );
+        break;
+      } catch (err) {
+        const e = err as NodeJS.ErrnoException;
+        if (e?.code === "EADDRINUSE" && attempt < maxBindAttempts) {
+          process.stderr.write(
+            `bridge: daemon port :${viewerPort} busy (attempt ${attempt}/${maxBindAttempts}), retrying in 1s...\n`,
+          );
+          await new Promise((r) => setTimeout(r, 1000));
+          continue;
+        }
+        if (e?.code === "EADDRINUSE") {
+          process.stderr.write(
+            `bridge: daemon port :${viewerPort} still in use after ${maxBindAttempts}s — exiting.\n`,
+          );
+          await core.shutdown();
+          process.exit(1);
+        }
         process.stderr.write(
-          `bridge: daemon port :${viewerPort} already in use — exiting.\n`,
+          `bridge: daemon viewer failed: ${(err as Error)?.message ?? String(err)}\n`,
         );
         await core.shutdown();
         process.exit(1);
       }
-      process.stderr.write(
-        `bridge: daemon viewer failed: ${(err as Error)?.message ?? String(err)}\n`,
-      );
-      await core.shutdown();
-      process.exit(1);
     }
 
     const shutdownDaemon = async (sig: string) => {
@@ -125,7 +207,10 @@ async function main(): Promise<void> {
   }
 
   // ─── Normal (stdio) mode ──────────────────────────────────────
-  const stdio = startStdioServer({ core });
+  // Assign the stdio handle into the closure variable so the host
+  // LLM bridge (registered earlier inside bootstrap) can dispatch
+  // reverse-direction requests to the adapter.
+  stdio = startStdioServer({ core });
 
   // Try to bind the viewer port. EADDRINUSE → stay headless.
   let viewer: import("./server/types.js").ServerHandle | null = null;
@@ -170,7 +255,7 @@ async function main(): Promise<void> {
         /* best-effort */
       }
     }
-    await waitForShutdown(core, stdio);
+    await waitForShutdown(core, stdio!);
     process.exit(0);
   };
 
diff --git a/apps/memos-local-plugin/bridge/stdio.ts b/apps/memos-local-plugin/bridge/stdio.ts
index f01a1209c..111edacfd 100644
--- a/apps/memos-local-plugin/bridge/stdio.ts
+++ b/apps/memos-local-plugin/bridge/stdio.ts
@@ -52,6 +52,20 @@ export interface StdioServerHandle {
   close: () => Promise<void>;
   /** Resolve once stdin ends. */
   done: Promise<void>;
+  /**
+   * Send a JSON-RPC request **from the bridge to the client** and wait
+   * for the matching response. Used by the host LLM bridge to ask the
+   * adapter (e.g. the Hermes Python provider) to run a fallback LLM
+   * call using the agent's own model.
+   *
+   * IDs use the `"srv-N"` prefix so they cannot collide with the
+   * client's numeric request IDs.
+   */
+  serverRequest<R = unknown>(
+    method: string,
+    params?: unknown,
+    options?: { timeoutMs?: number },
+  ): Promise<R>;
 }
 
 // ─── Server ─────────────────────────────────────────────────────────────────
@@ -66,6 +80,20 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle
   const eventsHandlers = new Set<symbol>();
   const logsHandlers = new Set<symbol>();
 
+  // ─── Server-initiated RPC bookkeeping ──
+  // Reverse-direction requests (bridge → client) live in their own
+  // ID namespace ("srv-1", "srv-2", …) so they never collide with the
+  // numeric IDs the Python client uses for forward requests.
+  let serverRequestSeq = 0;
+  const serverPending = new Map<
+    string,
+    {
+      resolve: (value: unknown) => void;
+      reject: (err: unknown) => void;
+      timer: ReturnType<typeof setTimeout> | null;
+    }
+  >();
+
   const eventsUnsubscribe = options.core.subscribeEvents((e) => {
     writeNotification(RPC_METHODS.EVENTS_NOTIFY, e);
   });
@@ -118,6 +146,31 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle
       return;
     }
 
+    // Reverse-direction response: the client is replying to a request
+    // we previously sent via `serverRequest`. Match by `srv-` ID and
+    // resolve / reject the matching pending promise.
+    const raw = msg as unknown as Record<string, unknown>;
+    if (
+      raw &&
+      typeof raw === "object" &&
+      typeof raw.id === "string" &&
+      (raw.id as string).startsWith("srv-") &&
+      (raw.result !== undefined || raw.error !== undefined)
+    ) {
+      const id = raw.id as string;
+      const pending = serverPending.get(id);
+      if (pending) {
+        serverPending.delete(id);
+        if (pending.timer) clearTimeout(pending.timer);
+        if (raw.error != null) {
+          pending.reject(raw.error);
+        } else {
+          pending.resolve(raw.result);
+        }
+      }
+      return;
+    }
+
     if (!msg || typeof msg !== "object" || msg.jsonrpc !== "2.0" || !msg.method) {
       writeLine(errorResponse(msg?.id ?? null, JSONRPC_INVALID_REQUEST, "not JSON-RPC 2.0"));
       return;
@@ -180,6 +233,32 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle
     });
   });
 
+  function serverRequest<R = unknown>(
+    method: string,
+    params?: unknown,
+    options?: { timeoutMs?: number },
+  ): Promise<R> {
+    const id = `srv-${++serverRequestSeq}`;
+    const timeoutMs = options?.timeoutMs ?? 60_000;
+    return new Promise<R>((resolve, reject) => {
+      if (closed) {
+        reject(new Error("stdio bridge closed"));
+        return;
+      }
+      const timer = setTimeout(() => {
+        if (serverPending.delete(id)) {
+          reject(new Error(`serverRequest ${method} timed out after ${timeoutMs}ms`));
+        }
+      }, timeoutMs);
+      serverPending.set(id, {
+        resolve: resolve as (value: unknown) => void,
+        reject,
+        timer,
+      });
+      writeLine({ jsonrpc: "2.0", id, method, params });
+    });
+  }
+
   return {
     get connected() {
       return !closed;
@@ -197,10 +276,18 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle
       } catch {
         /* ignore */
       }
+      // Reject any in-flight server-initiated requests so the host
+      // bridge doesn't hang while we tear down.
+      for (const [id, entry] of serverPending) {
+        if (entry.timer) clearTimeout(entry.timer);
+        entry.reject(new Error("stdio bridge closed"));
+        serverPending.delete(id);
+      }
       // Drain remaining lines then flush.
       await donePromise;
     },
     done: donePromise,
+    serverRequest,
   };
 }
 
diff --git a/apps/memos-local-plugin/core/embedding/embedder.ts b/apps/memos-local-plugin/core/embedding/embedder.ts
index 5400d6415..db798c129 100644
--- a/apps/memos-local-plugin/core/embedding/embedder.ts
+++ b/apps/memos-local-plugin/core/embedding/embedder.ts
@@ -78,6 +78,22 @@ export function createEmbedderWithProvider(
     return { text: i.text, role: i.role ?? "document" };
   }
 
+  function notifyStatus(detail: {
+    status: "ok" | "error";
+    provider: string;
+    model: string;
+    message?: string;
+    code?: string;
+    at?: number;
+  }): void {
+    if (!config.onStatus) return;
+    try {
+      config.onStatus({ kind: "embedding", ...detail });
+    } catch {
+      /* status sink errors are non-fatal */
+    }
+  }
+
   async function embedOne(input: string | EmbedInput): Promise<EmbeddingVector> {
     const vecs = await embedMany([input]);
     return vecs[0]!;
@@ -162,19 +178,28 @@ export function createEmbedderWithProvider(
             log: providerCtxLog,
           };
           raw = await provider.embed(texts, role, ctx);
+          // Record success but DO NOT clear `lastError` — the viewer
+          // compares `lastError.at` against `lastOkAt` to decide the
+          // overview card colour. Clearing here would let one cache-
+          // friendly success silently mask a still-real provider
+          // outage that just produced a `system_error` log row.
           lastOkAt = Date.now();
-          lastError = null;
+          notifyStatus({
+            status: "ok",
+            provider: provider.name,
+            model: config.model,
+            at: lastOkAt,
+          });
         } catch (err) {
           failures++;
-          lastError = {
-            at: Date.now(),
-            message:
-              err instanceof MemosError
-                ? `${err.code}: ${err.message}`
-                : err instanceof Error
-                ? err.message
-                : String(err),
-          };
+          const errAt = Date.now();
+          const errMessage =
+            err instanceof MemosError
+              ? `${err.code}: ${err.message}`
+              : err instanceof Error
+              ? err.message
+              : String(err);
+          lastError = { at: errAt, message: errMessage };
           logger.warn("provider.failed", {
             provider: provider.name,
             model: config.model,
@@ -182,6 +207,31 @@ export function createEmbedderWithProvider(
             count: texts.length,
             err: toErrDetail(err),
           });
+          // Notify the bootstrap-supplied error sink (if any). Wrapped in
+          // its own try/catch so a buggy sink never masks the original
+          // failure for the caller.
+          if (config.onError) {
+            try {
+              config.onError({
+                kind: "embedding",
+                provider: provider.name,
+                model: config.model,
+                message: errMessage,
+                code: err instanceof MemosError ? err.code : undefined,
+                at: errAt,
+              });
+            } catch {
+              /* sink errors are non-fatal */
+            }
+          }
+          notifyStatus({
+            status: "error",
+            provider: provider.name,
+            model: config.model,
+            message: errMessage,
+            code: err instanceof MemosError ? err.code : undefined,
+            at: errAt,
+          });
           throw err instanceof MemosError
             ? err
             : new MemosError(
diff --git a/apps/memos-local-plugin/core/embedding/types.ts b/apps/memos-local-plugin/core/embedding/types.ts
index 230250beb..75409323e 100644
--- a/apps/memos-local-plugin/core/embedding/types.ts
+++ b/apps/memos-local-plugin/core/embedding/types.ts
@@ -41,6 +41,40 @@ export interface EmbeddingConfig {
   headers?: Record<string, string>;
   /** If true, all output vectors are L2-normalized. Default: true. */
   normalize?: boolean;
+  /**
+   * Optional sink invoked once per terminal provider failure. Lets the
+   * bootstrap layer write a `system_error` row to `api_logs` so the
+   * Logs viewer can surface infrastructure failures alongside tool
+   * activity. Never throws; any exception inside the sink is swallowed.
+   */
+  onError?: (detail: EmbeddingErrorDetail) => void;
+  /**
+   * Optional durable status sink invoked on successful and failed
+   * provider calls. Unlike `onError` (human-facing log only), this
+   * is the machine-readable source used by the Overview model cards.
+   */
+  onStatus?: (detail: EmbeddingStatusDetail) => void;
+}
+
+export interface EmbeddingErrorDetail {
+  kind: "embedding";
+  provider: EmbeddingProviderName | string;
+  model: string;
+  message: string;
+  /** Stable `MemosError` code when the underlying error carries one. */
+  code?: string;
+  /** Epoch ms at which the failure occurred (defaults to Date.now()). */
+  at?: number;
+}
+
+export interface EmbeddingStatusDetail {
+  kind: "embedding";
+  status: "ok" | "error";
+  provider: EmbeddingProviderName | string;
+  model: string;
+  message?: string;
+  code?: string;
+  at?: number;
 }
 
 // ─── Roles ───────────────────────────────────────────────────────────────────
@@ -108,9 +142,12 @@ export interface EmbedStats {
   /** Most recent successful round-trip to the provider (epoch ms). */
   lastOkAt: number | null;
   /**
-   * Most recent failure. `null` if no call has failed yet, or a later
-   * call succeeded. Viewer overview uses this to render a red dot +
-   * error tooltip on the embedding card.
+   * Most recent failure. `null` until the embedder has thrown at least
+   * once. Once set, it is **not** cleared by a subsequent success —
+   * the viewer compares `lastError.at` against `lastOkAt` to decide
+   * whether the most recent event was good (green) or bad (red), so
+   * users never see a green dot while a real provider error is still
+   * sitting in the system_error log.
    */
   lastError: { at: number; message: string } | null;
 }
diff --git a/apps/memos-local-plugin/core/llm/client.ts b/apps/memos-local-plugin/core/llm/client.ts
index 0c6c75d89..53ac2746c 100644
--- a/apps/memos-local-plugin/core/llm/client.ts
+++ b/apps/memos-local-plugin/core/llm/client.ts
@@ -68,14 +68,41 @@ export function createLlmClientWithProvider(
   let totalPromptTokens = 0;
   let totalCompletionTokens = 0;
   let lastOkAt: number | null = null;
+  let lastFallbackAt: number | null = null;
   let lastError: { at: number; message: string } | null = null;
 
-  function markOk(): void {
+  /**
+   * Mark a successful primary-provider call. We **do not** clear
+   * `lastError` / `lastFallbackAt` here — the viewer picks the most
+   * recent event by timestamp to colour the overview card, so an
+   * earlier failure that already produced a `system_error` row stays
+   * visible until a later success out-dates it.
+   */
+  function markOk(): number {
     lastOkAt = Date.now();
-    lastError = null;
+    return lastOkAt;
   }
-  function markFail(err: unknown): void {
-    lastError = { at: Date.now(), message: summarizeErrMessage(err) };
+  /**
+   * Mark a primary-provider failure that was rescued by the host LLM
+   * bridge (yellow card). The original primary error is still kept on
+   * `lastError` so the viewer can show *why* fallback kicked in, and
+   * `lastFallbackAt` tracks when fallback happened so the timestamp
+   * comparison renders yellow instead of red.
+   */
+  function markFallback(err: unknown): number {
+    const at = Date.now();
+    lastFallbackAt = at;
+    lastError = { at, message: summarizeErrMessage(err) };
+    return at;
+  }
+  /**
+   * Mark a terminal failure — either no fallback configured or the
+   * host fallback also failed (red card).
+   */
+  function markFail(err: unknown): number {
+    const at = Date.now();
+    lastError = { at, message: summarizeErrMessage(err) };
+    return at;
   }
 
   function normalizeMessages(input: LlmMessage[] | string): LlmMessage[] {
@@ -137,7 +164,13 @@ export function createLlmClientWithProvider(
         durationMs: raw.durationMs,
       };
       record(completion, op, messages);
-      markOk();
+      const okAt = markOk();
+      notifyStatus({
+        status: "ok",
+        provider: provider.name,
+        model: config.model,
+        at: okAt,
+      });
       return { completion };
     } catch (err) {
       if (shouldFallback(err, config, provider.name)) {
@@ -160,15 +193,39 @@ export function createLlmClientWithProvider(
             durationMs: res.durationMs,
           };
           record(completion, op, messages);
-          markOk();
+          // The primary provider is still broken even though the host
+          // bridge saved this call. Tag the slot yellow (`lastFallbackAt`)
+          // and surface the upstream error to the user via the
+          // system_error log so they can see *why* fallback engaged.
+          const fallbackAt = markFallback(err);
+          notifyOnError(err);
+          notifyStatus({
+            status: "fallback",
+            provider: provider.name,
+            model: config.model,
+            message: summarizeErrMessage(err),
+            code: err instanceof MemosError ? err.code : undefined,
+            at: fallbackAt,
+            fallbackProvider: "host",
+          });
           return { completion };
         } catch (hostErr) {
           failures++;
-          markFail(hostErr);
+          const failAt = markFail(hostErr);
           facadeLog.error("host.fallback_failed", {
             primary: summarizeErr(err),
             host: summarizeErr(hostErr),
           });
+          notifyOnError(hostErr);
+          notifyStatus({
+            status: "error",
+            provider: provider.name,
+            model: config.model,
+            message: summarizeErrMessage(hostErr),
+            code: hostErr instanceof MemosError ? hostErr.code : undefined,
+            at: failAt,
+            fallbackProvider: "host",
+          });
           throw hostErr instanceof MemosError
             ? hostErr
             : new MemosError(
@@ -178,7 +235,16 @@ export function createLlmClientWithProvider(
         }
       }
       failures++;
-      markFail(err);
+      const failAt = markFail(err);
+      notifyOnError(err);
+      notifyStatus({
+        status: "error",
+        provider: provider.name,
+        model: config.model,
+        message: summarizeErrMessage(err),
+        code: err instanceof MemosError ? err.code : undefined,
+        at: failAt,
+      });
       throw err instanceof MemosError
         ? err
         : new MemosError(
@@ -189,6 +255,44 @@ export function createLlmClientWithProvider(
     }
   }
 
+  /**
+   * Forward a terminal failure to the bootstrap-supplied sink (if any).
+   * Wrapped so a buggy sink can never replace the original error the
+   * caller is about to receive. Skipped silently when no sink is set.
+   */
+  function notifyOnError(err: unknown): void {
+    if (!config.onError) return;
+    try {
+      config.onError({
+        provider: provider.name,
+        model: config.model,
+        message: summarizeErrMessage(err),
+        code: err instanceof MemosError ? err.code : undefined,
+        at: Date.now(),
+      });
+    } catch {
+      /* sink errors are non-fatal */
+    }
+  }
+
+  function notifyStatus(detail: {
+    status: "ok" | "fallback" | "error";
+    provider: string;
+    model: string;
+    message?: string;
+    code?: string;
+    at?: number;
+    fallbackProvider?: string;
+    fallbackModel?: string;
+  }): void {
+    if (!config.onStatus) return;
+    try {
+      config.onStatus(detail);
+    } catch {
+      /* status sink errors are non-fatal */
+    }
+  }
+
   function record(completion: LlmCompletion, op: string, messages: LlmMessage[]): void {
     if (completion.usage?.promptTokens) totalPromptTokens += completion.usage.promptTokens;
     if (completion.usage?.completionTokens) totalCompletionTokens += completion.usage.completionTokens;
@@ -322,11 +426,26 @@ export function createLlmClientWithProvider(
       });
       if (usage?.promptTokens) totalPromptTokens += usage.promptTokens;
       if (usage?.completionTokens) totalCompletionTokens += usage.completionTokens;
-      markOk();
+      const okAt = markOk();
+      notifyStatus({
+        status: "ok",
+        provider: provider.name,
+        model: config.model,
+        at: okAt,
+      });
     } catch (err) {
       failures++;
-      markFail(err);
+      const failAt = markFail(err);
       facadeLog.error("stream.failed", { err: summarizeErr(err) });
+      notifyOnError(err);
+      notifyStatus({
+        status: "error",
+        provider: provider.name,
+        model: config.model,
+        message: summarizeErrMessage(err),
+        code: err instanceof MemosError ? err.code : undefined,
+        at: failAt,
+      });
       throw err;
     }
   }
@@ -347,6 +466,7 @@ export function createLlmClientWithProvider(
         totalPromptTokens,
         totalCompletionTokens,
         lastOkAt,
+        lastFallbackAt,
         lastError,
       };
     },
@@ -358,6 +478,7 @@ export function createLlmClientWithProvider(
       totalPromptTokens = 0;
       totalCompletionTokens = 0;
       lastOkAt = null;
+      lastFallbackAt = null;
       lastError = null;
     },
     async close(): Promise<void> {
diff --git a/apps/memos-local-plugin/core/llm/types.ts b/apps/memos-local-plugin/core/llm/types.ts
index 125cd97fe..19eaef22c 100644
--- a/apps/memos-local-plugin/core/llm/types.ts
+++ b/apps/memos-local-plugin/core/llm/types.ts
@@ -32,6 +32,50 @@ export interface LlmConfig {
   maxTokens?: number;
   /** Extra HTTP headers for outgoing requests. */
   headers?: Record<string, string>;
+  /**
+   * Optional sink invoked once per terminal LLM failure. Lets the
+   * bootstrap layer record a `system_error` row in `api_logs` so the
+   * Logs viewer can surface infrastructure failures (auth, timeout,
+   * bad endpoint) right next to tool activity. Never throws; any
+   * exception inside the sink is swallowed by the facade.
+   */
+  onError?: (detail: LlmErrorDetail) => void;
+  /**
+   * Optional durable status sink invoked on primary success, host
+   * fallback success, and terminal failure. This is the machine-
+   * readable source used by Overview model cards so Hermes' viewer
+   * daemon can display status produced by a separate stdio bridge.
+   */
+  onStatus?: (detail: LlmStatusDetail) => void;
+}
+
+export interface LlmErrorDetail {
+  provider: LlmProviderName | string;
+  model: string;
+  message: string;
+  /** Stable `MemosError` code when the underlying error carries one. */
+  code?: string;
+  /** Epoch ms at which the failure occurred (defaults to Date.now()). */
+  at?: number;
+  /**
+   * Logical role of the failing client. Bootstrap configures the
+   * facade with a closure that knows whether it's the summary `llm`
+   * or the dedicated `reflectLlm` for skill evolution; the facade
+   * just passes whatever the closure injected through.
+   */
+  role?: "llm" | "skillEvolver";
+}
+
+export interface LlmStatusDetail {
+  status: "ok" | "fallback" | "error";
+  provider: LlmProviderName | string;
+  model: string;
+  message?: string;
+  code?: string;
+  at?: number;
+  fallbackProvider?: string;
+  fallbackModel?: string;
+  role?: "llm" | "skillEvolver";
 }
 
 // ─── Messages ────────────────────────────────────────────────────────────────
@@ -171,14 +215,30 @@ export interface ProviderCompletion {
 // ─── LlmClient facade ────────────────────────────────────────────────────────
 
 export interface LastCallStatus {
-  /** Most recent successful call (epoch ms), or `null` if none yet. */
+  /**
+   * Most recent direct success against the configured provider (epoch ms).
+   * Only set when the primary provider answered without going through
+   * `host_fallback`. Used by the viewer to render the green dot.
+   */
   lastOkAt: number | null;
   /**
-   * Most recent failed call. `null` if the client has either never been
-   * called or the last call (or a later one) succeeded. Viewer
-   * overview uses this to render a red dot + error tooltip.
+   * Most recent failure of the primary provider. Once set, this field is
+   * **not** cleared on subsequent success — the viewer compares
+   * timestamps across `lastOkAt` / `lastFallbackAt` / `lastError.at`
+   * to decide whether the latest event was good (green / yellow) or
+   * bad (red), so a real provider failure recorded in the
+   * `system_error` log can never be silently masked by a later
+   * successful call.
    */
   lastError: { at: number; message: string } | null;
+  /**
+   * Most recent time the primary provider failed but the host LLM
+   * fallback succeeded. Lets the viewer paint the slot yellow ("running
+   * on host fallback") instead of green or red. `null` when no
+   * fallback has happened in this process or the client has no
+   * `fallbackToHost` configured.
+   */
+  lastFallbackAt: number | null;
 }
 
 export interface LlmClientStats extends LastCallStatus {
diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
index 37e9a149e..05541b04f 100644
--- a/apps/memos-local-plugin/core/pipeline/memory-core.ts
+++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -73,7 +73,11 @@ import { runMigrations } from "../storage/migrator.js";
 import { makeRepos } from "../storage/repos/index.js";
 import { createEmbedder } from "../embedding/embedder.js";
 import { createLlmClient } from "../llm/client.js";
-import { getHostLlmBridge } from "../llm/host-bridge.js";
+import {
+  getHostLlmBridge,
+  registerHostLlmBridge,
+  type HostLlmBridge,
+} from "../llm/host-bridge.js";
 
 import { createPipeline } from "./orchestrator.js";
 import type { PipelineDeps, PipelineHandle } from "./types.js";
@@ -90,6 +94,23 @@ export interface BootstrapOptions {
   now?: () => number;
   /** Plugin package version (surfaced via `health()`). */
   pkgVersion?: string;
+  /**
+   * Optional adapter-supplied LLM bridge. When set, registered on the
+   * shared host-bridge singleton **before** the LLM clients are
+   * created so `shouldFallback()` can see it on the very first call.
+   *
+   * Wiring this through bootstrap (rather than asking the adapter to
+   * call `registerHostLlmBridge` itself) avoids a subtle ESM module-
+   * identity bug: when the adapter dynamically imports
+   * `core/llm/host-bridge.ts` from a different URL than the static
+   * `import` chain inside `core/llm/client.ts`, Node's module loader
+   * treats them as two separate modules with two independent
+   * `currentBridge` slots — register hits one, get sees the other,
+   * fallback never engages. Routing through bootstrap forces the
+   * register call to happen via the same module instance the LLM
+   * client closes over.
+   */
+  hostLlmBridge?: HostLlmBridge | null;
 }
 
 export interface BootstrapResult {
@@ -148,6 +169,77 @@ export async function bootstrapMemoryCoreFull(
   }
   const repos = makeRepos(db);
 
+  // ─── Host LLM bridge ──
+  // Register the adapter-supplied bridge BEFORE constructing any
+  // LlmClient so the very first call site sees a non-null bridge.
+  // The shouldFallback() check inside the LLM facade reads this
+  // singleton at every call; pinning it here guarantees the
+  // identity-by-module is the same instance the client closes over.
+  if (options.hostLlmBridge) {
+    registerHostLlmBridge(options.hostLlmBridge);
+    log.info("hostLlmBridge.registered", {
+      id: options.hostLlmBridge.id,
+    });
+  }
+
+  // ─── system_error sink ──
+  // Every facade we build (embedder / main LLM / reflect LLM) gets a
+  // tiny error sink that drops a `system_error` row into `api_logs`
+  // when the underlying provider call fails terminally. The Logs
+  // viewer renders these under the "系统" tag so users can correlate
+  // a red model card on the Overview with the exact provider message.
+  // Wrapped in try/catch because logging must never break the call.
+  function recordSystemError(
+    role: "embedding" | "llm" | "skillEvolver",
+    detail: {
+      provider: string;
+      model: string;
+      message: string;
+      code?: string;
+      at?: number;
+    },
+  ): void {
+    try {
+      repos.apiLogs.insert({
+        toolName: "system_error",
+        input: { role },
+        output: { role, ...detail },
+        durationMs: 0,
+        success: false,
+        calledAt: detail.at ?? Date.now(),
+      });
+    } catch {
+      /* the system_error row itself failing is non-fatal */
+    }
+  }
+
+  function recordSystemModelStatus(
+    role: "embedding" | "llm" | "skillEvolver",
+    detail: {
+      status: "ok" | "fallback" | "error";
+      provider: string;
+      model: string;
+      message?: string;
+      code?: string;
+      at?: number;
+      fallbackProvider?: string;
+      fallbackModel?: string;
+    },
+  ): void {
+    try {
+      repos.apiLogs.insert({
+        toolName: "system_model_status",
+        input: { role },
+        output: { role, ...detail },
+        durationMs: 0,
+        success: detail.status !== "error",
+        calledAt: detail.at ?? Date.now(),
+      });
+    } catch {
+      /* the status row itself failing is non-fatal */
+    }
+  }
+
   // 2. Providers (embedding + LLM) — nullable so we can run without them.
   // The LLM facade we build falls through to "local_only" when no remote
   // endpoint is configured, but we still catch construction errors so the
@@ -155,7 +247,19 @@ export async function bootstrapMemoryCoreFull(
   let embedder = null as ReturnType<typeof createEmbedder> | null;
   let llm = null as ReturnType<typeof createLlmClient> | null;
   try {
-    embedder = createEmbedder(config.embedding as never);
+    embedder = createEmbedder({
+      ...(config.embedding as object),
+      onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) =>
+        recordSystemError("embedding", d),
+      onStatus: (d: {
+        status: "ok" | "error";
+        provider: string;
+        model: string;
+        message?: string;
+        code?: string;
+        at?: number;
+      }) => recordSystemModelStatus("embedding", d),
+    } as never);
   } catch (err) {
     log.warn("embedder.unavailable", {
       err: err instanceof Error ? err.message : String(err),
@@ -163,7 +267,21 @@ export async function bootstrapMemoryCoreFull(
     embedder = null;
   }
   try {
-    llm = createLlmClient(config.llm as never);
+    llm = createLlmClient({
+      ...(config.llm as object),
+      onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) =>
+        recordSystemError("llm", d),
+      onStatus: (d: {
+        status: "ok" | "fallback" | "error";
+        provider: string;
+        model: string;
+        message?: string;
+        code?: string;
+        at?: number;
+        fallbackProvider?: string;
+        fallbackModel?: string;
+      }) => recordSystemModelStatus("llm", d),
+    } as never);
   } catch (err) {
     log.warn("llm.unavailable", {
       err: err instanceof Error ? err.message : String(err),
@@ -206,7 +324,25 @@ export async function bootstrapMemoryCoreFull(
         temperature: evolver?.temperature ?? 0,
         timeoutMs: evolver?.timeoutMs ?? 60_000,
         maxRetries: 3,
-        fallbackToHost: false,
+        // V7 §0.x — when the user's dedicated skill-evolver model is
+        // down (auth, model name typo, server outage), prefer falling
+        // back to the host agent's main LLM via the stdio host
+        // bridge instead of hard-failing the skill pipeline. The
+        // viewer paints the slot yellow + surfaces the upstream error
+        // so the operator still notices.
+        fallbackToHost: true,
+        onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) =>
+          recordSystemError("skillEvolver", d),
+        onStatus: (d: {
+          status: "ok" | "fallback" | "error";
+          provider: string;
+          model: string;
+          message?: string;
+          code?: string;
+          at?: number;
+          fallbackProvider?: string;
+          fallbackModel?: string;
+        }) => recordSystemModelStatus("skillEvolver", d),
       } as never);
       log.info("reflectLlm.ready", {
         provider: evolverProvider,
@@ -756,10 +892,25 @@ export function createMemoryCore(
     const embedderInfo = embedderHealth(handle.embedder, latestTraceTs());
     const skillEvolverInfo = resolveSkillEvolver(
       diskConfig ?? handle.config,
-      handle.llm,
+      // Prefer the dedicated reflect LLM stats so an independently
+      // configured skill-evolver model reports its OWN failures
+      // instead of inheriting the (possibly healthy) summary LLM's
+      // status. Falls back to `handle.llm` when the operator left
+      // skillEvolver blank — bootstrap aliases reflectLlm to llm
+      // in that case anyway.
+      handle.reflectLlm ?? handle.llm,
       latestTraceTs(),
     );
 
+    // NOTE: we deliberately do NOT fall back to `api_logs`-stored
+    // historical `system_error` rows here. Doing so used to keep the
+    // overview card red across restarts even after the operator had
+    // already fixed the misconfigured endpoint, because the ancient
+    // failure row would mask a freshly-booted process whose stats
+    // are still null. Now the card colour is driven purely by
+    // in-memory stats — if you want to inspect past failures, head
+    // to LogsView → 系统 tag.
+
     // Override model names from disk config if they differ from the
     // in-memory client (user saved new settings but hasn't restarted).
     if (diskConfig) {
@@ -775,6 +926,14 @@ export function createMemoryCore(
       }
     }
 
+    applyPersistedModelStatus(handle.repos, "llm", llmInfo);
+    applyPersistedModelStatus(handle.repos, "embedding", embedderInfo);
+    applyPersistedModelStatus(
+      handle.repos,
+      skillEvolverInfo.inherited ? "llm" : "skillEvolver",
+      skillEvolverInfo,
+    );
+
     return {
       ok: initialized && !shutDown,
       version: pkgVersion,
@@ -2653,9 +2812,75 @@ function durationSince(
  * the main `llm.*` model for skill induction. We surface that fallback
  * explicitly so the Overview card can label it as "inherited from LLM".
  */
+function applyPersistedModelStatus(
+  repos: PipelineHandle["repos"],
+  role: "embedding" | "llm" | "skillEvolver",
+  info: CoreHealth["llm"],
+): void {
+  const latest = findLatestPersistedModelStatus(repos, role, info.provider, info.model);
+  if (!latest) return;
+  info.lastOkAt = latest.status === "ok" ? latest.at : null;
+  info.lastFallbackAt = latest.status === "fallback" ? latest.at : null;
+  info.lastError =
+    latest.status === "error" || latest.status === "fallback"
+      ? { at: latest.at, message: latest.message || "(no message)" }
+      : null;
+}
+
+function findLatestPersistedModelStatus(
+  repos: PipelineHandle["repos"],
+  role: "embedding" | "llm" | "skillEvolver",
+  provider: string,
+  model: string,
+): {
+  status: "ok" | "fallback" | "error";
+  at: number;
+  message?: string;
+} | null {
+  try {
+    const rows = repos.apiLogs.list({
+      toolName: "system_model_status",
+      limit: 500,
+      offset: 0,
+    });
+    for (const row of rows) {
+      try {
+        const out = JSON.parse(row.outputJson) as {
+          role?: unknown;
+          status?: unknown;
+          provider?: unknown;
+          model?: unknown;
+          message?: unknown;
+        };
+        if (out.role !== role) continue;
+        // Only apply status rows for the currently configured model.
+        // This prevents an old 404 for a typo'd model from keeping the
+        // card red after the operator fixes Settings and restarts.
+        if (String(out.provider ?? "") !== provider) continue;
+        if (String(out.model ?? "") !== model) continue;
+        if (out.status !== "ok" && out.status !== "fallback" && out.status !== "error") {
+          continue;
+        }
+        return {
+          status: out.status,
+          at: row.calledAt,
+          message: typeof out.message === "string" ? out.message : undefined,
+        };
+      } catch {
+        // Malformed row — skip and keep walking.
+      }
+    }
+  } catch {
+    // Repo failure is non-fatal for health; leave in-memory stats.
+  }
+  return null;
+}
+
 function llmHealth(
   llm: PipelineHandle["llm"],
-  fallbackTs: number | null,
+  // Kept in the signature for source compatibility with older callers
+  // but intentionally unused — see comment below.
+  _fallbackTs: number | null,
 ): CoreHealth["llm"] {
   if (!llm) {
     return {
@@ -2663,26 +2888,30 @@ function llmHealth(
       provider: "none",
       model: "",
       lastOkAt: null,
+      lastFallbackAt: null,
       lastError: null,
     };
   }
   const s = llm.stats();
+  // We deliberately DO NOT fall back to the latest trace timestamp
+  // here. Doing so used to paint the slot "connected" on every
+  // restart even when the configured model was actually broken — any
+  // historical trace from a prior, working configuration would mask
+  // a fresh authentication / model-name failure. Now the colour is
+  // driven entirely by *this process's* facade activity.
   return {
     available: true,
     provider: llm.provider,
     model: llm.model,
-    // Prefer the live counter (most-recent call in this process).
-    // Fall back to `fallbackTs` — the newest trace timestamp — so the
-    // Overview card shows "connected" across plugin restarts as long
-    // as there's proof of a successful call on disk.
-    lastOkAt: s.lastOkAt ?? fallbackTs,
+    lastOkAt: s.lastOkAt,
+    lastFallbackAt: s.lastFallbackAt,
     lastError: s.lastError,
   };
 }
 
 function embedderHealth(
   embedder: PipelineHandle["embedder"],
-  fallbackTs: number | null,
+  _fallbackTs: number | null,
 ): CoreHealth["embedder"] {
   if (!embedder) {
     return {
@@ -2691,16 +2920,21 @@ function embedderHealth(
       model: "",
       dim: 0,
       lastOkAt: null,
+      lastFallbackAt: null,
       lastError: null,
     };
   }
   const s = embedder.stats();
+  // No `?? fallbackTs` here either — see `llmHealth`. The embedder
+  // also has no host fallback path, so `lastFallbackAt` stays `null`
+  // by definition.
   return {
     available: true,
     provider: embedder.provider,
     model: embedder.model,
     dim: embedder.dimensions,
-    lastOkAt: s.lastOkAt ?? fallbackTs,
+    lastOkAt: s.lastOkAt,
+    lastFallbackAt: null,
     lastError: s.lastError,
   };
 }
@@ -2714,17 +2948,19 @@ function resolveSkillEvolver(
     .skillEvolver;
   const own = (evolver?.model ?? "").trim();
   if (own) {
+    // `llm` here is the dedicated `reflectLlm` instance built from the
+    // skillEvolver config (see `bootstrapMemoryCoreFull`). Reading its
+    // stats means the Overview card flips red as soon as a skill
+    // crystallization call fails — independent of the summary LLM.
+    const s = llm?.stats();
     return {
       available: true,
       provider: evolver?.provider ?? "",
       model: own,
       inherited: false,
-      // Skill evolver uses its own LlmClient instance when the operator
-      // overrides the model. Today we don't expose that client through
-      // the pipeline handle, so status reporting follows the shared LLM
-      // while we keep the plumbing minimal.
-      lastOkAt: llm?.stats().lastOkAt ?? fallbackTs,
-      lastError: llm?.stats().lastError ?? null,
+      lastOkAt: s?.lastOkAt ?? null,
+      lastFallbackAt: s?.lastFallbackAt ?? null,
+      lastError: s?.lastError ?? null,
     };
   }
   const fallback = llmHealth(llm, fallbackTs);
@@ -2734,6 +2970,7 @@ function resolveSkillEvolver(
     model: fallback.model,
     inherited: true,
     lastOkAt: fallback.lastOkAt,
+    lastFallbackAt: fallback.lastFallbackAt,
     lastError: fallback.lastError,
   };
 }
diff --git a/apps/memos-local-plugin/core/pipeline/orchestrator.ts b/apps/memos-local-plugin/core/pipeline/orchestrator.ts
index 6f510e1a0..eab711708 100644
--- a/apps/memos-local-plugin/core/pipeline/orchestrator.ts
+++ b/apps/memos-local-plugin/core/pipeline/orchestrator.ts
@@ -915,6 +915,7 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle {
     db: deps.db,
     repos: deps.repos,
     llm: deps.llm,
+    reflectLlm: deps.reflectLlm,
     embedder: deps.embedder,
     sessionManager: session.sessionManager,
     episodeManager: session.episodeManager,
diff --git a/apps/memos-local-plugin/core/pipeline/types.ts b/apps/memos-local-plugin/core/pipeline/types.ts
index 93d2bea90..27eecc1ec 100644
--- a/apps/memos-local-plugin/core/pipeline/types.ts
+++ b/apps/memos-local-plugin/core/pipeline/types.ts
@@ -152,6 +152,15 @@ export interface PipelineHandle {
   readonly db: StorageDb;
   readonly repos: Repos;
   readonly llm: LlmClient | null;
+  /**
+   * Dedicated client for skill-evolution reflection. When the operator
+   * leaves `skillEvolver.*` blank, this is the same instance as `llm`
+   * (so call sites can blindly read whichever is non-null). When they
+   * configure their own model it carries its own `stats()` so the
+   * Overview / health endpoint reports the *actual* skill-evolver
+   * status instead of falling back to the summary LLM.
+   */
+  readonly reflectLlm: LlmClient | null;
   readonly embedder: Embedder | null;
 
   // Subscribers / runners.
diff --git a/apps/memos-local-plugin/install.sh b/apps/memos-local-plugin/install.sh
index b6af9d4d1..b9aa31690 100755
--- a/apps/memos-local-plugin/install.sh
+++ b/apps/memos-local-plugin/install.sh
@@ -578,9 +578,21 @@ install_hermes() {
   mkdir -p "${HOME}/.hermes"
 
   step "Stopping existing bridge daemon"
-  pkill -f "bridge.cts" >/dev/null 2>&1 || true
-  sleep 1
-  pkill -9 -f "bridge.cts" >/dev/null 2>&1 || true
+  local bridge_pids=""
+  bridge_pids="$(pgrep -f "bridge.cts" 2>/dev/null || true)"
+  if [[ -n "${bridge_pids}" ]]; then
+    kill ${bridge_pids} >/dev/null 2>&1 || true
+    local i
+    for i in {1..10}; do
+      sleep 1
+      pgrep -f "bridge.cts" >/dev/null 2>&1 || break
+    done
+    bridge_pids="$(pgrep -f "bridge.cts" 2>/dev/null || true)"
+    if [[ -n "${bridge_pids}" ]]; then
+      kill -9 ${bridge_pids} >/dev/null 2>&1 || true
+      sleep 1
+    fi
+  fi
   success "Bridge daemon stopped"
   local was_running="false"
   if pgrep -f "/bin/hermes" >/dev/null 2>&1; then
@@ -699,8 +711,7 @@ CFGEOF
       mkdir -p "${prefix}/logs"
       # Launch bridge in --daemon mode (pure HTTP, no stdio).
       # The process stays alive to serve the Memory Viewer.
-      ( cd "${prefix}" && nohup "${tsx_bin}" "${bridge_cts}" --agent=hermes --daemon >"${daemon_log}" 2>&1 ) &
-      disown $! 2>/dev/null || true
+      ( cd "${prefix}" && nohup "${tsx_bin}" "${bridge_cts}" --agent=hermes --daemon >"${daemon_log}" 2>&1 & )
 
       if wait_for_viewer "${HERMES_PORT}"; then
         success "Memory Viewer daemon running"
diff --git a/apps/memos-local-plugin/server/routes/admin.ts b/apps/memos-local-plugin/server/routes/admin.ts
index 28f2aed67..98ddcd14d 100644
--- a/apps/memos-local-plugin/server/routes/admin.ts
+++ b/apps/memos-local-plugin/server/routes/admin.ts
@@ -42,7 +42,7 @@ export function registerAdminRoutes(routes: Routes, deps: ServerDeps, options: S
       const pluginRoot = nodePath.resolve(nodePath.dirname(thisFile), "../..");
       const tsxBin = nodePath.join(pluginRoot, "node_modules/.bin/tsx");
       const bridgeScript = nodePath.join(pluginRoot, "bridge.cts");
-      const cmd = `sleep 1 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`;
+      const cmd = `sleep 3 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`;
       const child = spawn("bash", ["-c", cmd], {
         detached: true,
         stdio: "ignore",
@@ -72,7 +72,12 @@ export function registerAdminRoutes(routes: Routes, deps: ServerDeps, options: S
     const tsxBin = nodePath.join(pluginRoot, "node_modules/.bin/tsx");
     const bridgeScript = nodePath.join(pluginRoot, "bridge.cts");
 
-    const cmd = `sleep 1 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`;
+    // 3s sleep gives the OS time to release the viewer port (TCP
+    // TIME_WAIT lingers for ~1s on macOS/Linux even after the
+    // previous bridge fully exited). Bumped from 1s after operators
+    // hit "重启超时" because the new daemon kept losing the port-
+    // bind race against its predecessor.
+    const cmd = `sleep 3 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`;
     const child = spawn("bash", ["-c", cmd], {
       detached: true,
       stdio: "ignore",
diff --git a/apps/memos-local-plugin/server/routes/api-logs.ts b/apps/memos-local-plugin/server/routes/api-logs.ts
index 6ccd07bc0..8d4b77dc9 100644
--- a/apps/memos-local-plugin/server/routes/api-logs.ts
+++ b/apps/memos-local-plugin/server/routes/api-logs.ts
@@ -61,6 +61,14 @@ export function registerApiLogsRoutes(routes: Routes, deps: ServerDeps): void {
       "world_model_evolve",
       "task_done",
       "task_failed",
+      // Infrastructure-layer failures (embedding / summary LLM /
+      // skillEvolver). Surfaced under the "系统" tag in LogsView so
+      // operators can see provider errors without tailing logs.
+      "system_error",
+      // Durable machine-readable status source for Overview model
+      // cards. This is intentionally persisted because Hermes' viewer
+      // daemon and stdio bridge can be different processes.
+      "system_model_status",
     ] as const;
     const rows = await Promise.all(
       tools.map(async (t) => {
diff --git a/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts b/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts
index 5521daa0d..01e3c5eff 100644
--- a/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts
+++ b/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts
@@ -85,7 +85,7 @@ function buildFullChainLlm(): LlmClient {
   const script: FakeLlmScript = {
     completeJson: {
       // Intent classifier — task-shaped for everything meaningful.
-      "session.intent.classify": (input) => {
+      "session.intent.classify": (input: unknown) => {
         const text = lastUserMessage(input).toLowerCase();
         if (/^\s*(hi|hello|你好|嗨)\s*$/.test(text)) {
           return {
@@ -106,7 +106,7 @@ function buildFullChainLlm(): LlmClient {
       // Relation classifier — decides revision / follow_up / new_task.
       // IMPORTANT: check `NEW_USER_MESSAGE` only; the system prompt itself
       // mentions "wrong" / "redo", which would otherwise always match.
-      "session.relation.classify": (input) => {
+      "session.relation.classify": (input: unknown) => {
         const newMsg = newUserSegment(lastUserMessage(input));
         if (/不对|错了|重做|改一下|\bwrong\b|\bredo\b|not quite/i.test(newMsg)) {
           return {
@@ -137,7 +137,7 @@ function buildFullChainLlm(): LlmClient {
       },
 
       // R_human scoring — treat negative keywords as a failed turn.
-      "reward.reward.r_human.v3": (input) => {
+      "reward.reward.r_human.v3": (input: unknown) => {
         const text = lastUserMessage(input);
         if (/不对|错了|没覆盖|\bwrong\b/i.test(text)) {
           return {
@@ -167,7 +167,7 @@ function buildFullChainLlm(): LlmClient {
       },
 
       // Capture summarizer — one short line per step.
-      "capture.summarize": (input) => {
+      "capture.summarize": (input: unknown) => {
         const text = lastUserMessage(input);
         if (/fib|斐波那契/i.test(text)) return { summary: "斐波那契函数实现（Python 递归/迭代）" };
         if (/test|测试/i.test(text)) return { summary: "为 Python 函数补充单元测试（含边界）" };
@@ -177,19 +177,19 @@ function buildFullChainLlm(): LlmClient {
       },
 
       // α scorer — reflection quality.
-      "capture.alpha.reflection.score.v1": (input) => {
+      "capture.alpha.reflection.score.v1": (input: unknown) => {
         const text = lastUserMessage(input);
         const alpha = /关键|识别|根因|发现/i.test(text) ? 0.75 : 0.45;
         return { alpha, rationale: "rule-of-thumb by keyword" };
       },
 
       // Capture reflection synth (only when reflection was missing).
-      "capture.reflection.synth": (input) => ({
+      "capture.reflection.synth": (input: unknown) => ({
         reflection: "Scripted fallback: summarized step outcome.",
       }),
 
       // L2 induction — distills a policy from ≥2 similar traces.
-      "l2.l2.induction.v2": (input) => {
+      "l2.l2.induction.v2": (input: unknown) => {
         const text = lastUserMessage(input);
         const isPython = /python|pip|\.py\b/i.test(text);
         return {
diff --git a/apps/memos-local-plugin/tests/helpers/fake-llm.ts b/apps/memos-local-plugin/tests/helpers/fake-llm.ts
index 971b938b7..22d9fc1a3 100644
--- a/apps/memos-local-plugin/tests/helpers/fake-llm.ts
+++ b/apps/memos-local-plugin/tests/helpers/fake-llm.ts
@@ -89,6 +89,7 @@ export function fakeLlm(script: FakeLlmScript = {}): LlmClient {
         totalPromptTokens: 0,
         totalCompletionTokens: 0,
         lastOkAt: null,
+        lastFallbackAt: null,
         lastError: null,
       };
     },
diff --git a/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts b/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts
index e057defe7..3ff5dd78e 100644
--- a/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts
+++ b/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts
@@ -181,7 +181,7 @@ function buildLlm(): LlmClient {
         reason: "programming request",
       }),
 
-      "session.relation.classify": (input) => {
+      "session.relation.classify": (input: unknown) => {
         const newMsg = newUserSegment(lastUserContent(input));
         if (/不对|错了|改一下|\bwrong\b|redo/i.test(newMsg)) {
           return { relation: "revision", confidence: 0.9, reason: "negation" };
@@ -195,7 +195,7 @@ function buildLlm(): LlmClient {
         return { relation: "follow_up", confidence: 0.5, reason: "default" };
       },
 
-      "reward.reward.r_human.v3": (input) => {
+      "reward.reward.r_human.v3": (input: unknown) => {
         const text = lastUserContent(input);
         // We pre-fill the scorer with positive user feedback baked
         // into the "FEEDBACK:" block, so it should return a healthy
@@ -218,7 +218,7 @@ function buildLlm(): LlmClient {
         };
       },
 
-      "capture.summarize": (input) => {
+      "capture.summarize": (input: unknown) => {
         const text = lastUserContent(input);
         if (/fib/i.test(text)) return { summary: "Python fibonacci 函数实现" };
         if (/quicksort/i.test(text)) return { summary: "Python 快速排序实现" };
@@ -234,7 +234,7 @@ function buildLlm(): LlmClient {
         reason: "concrete root-cause reflection",
       }),
 
-      "l2.l2.induction.v2": (input) => {
+      "l2.l2.induction.v2": (input: unknown) => {
         const evidence = (input as { evidenceTraces?: Array<{ id: string }> })
           ?.evidenceTraces ?? [];
         return {
@@ -480,6 +480,7 @@ describe("OpenClaw adapter integration — multi-session full V7 chain", () => {
       agent: AGENT,
       core: core!,
       log: {
+        trace: (_m: string, _c?: unknown) => undefined,
         info: (_m: string, _c?: unknown) => undefined,
         warn: (_m: string, _c?: unknown) => undefined,
         error: (_m: string, _c?: unknown) => undefined,
diff --git a/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts b/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts
index 3e708f4e8..afed542af 100644
--- a/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts
+++ b/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts
@@ -52,7 +52,7 @@ function semanticFakeEmbedder(dims = 64): Embedder {
   };
   return {
     dimensions: dims,
-    provider: "test",
+    provider: "local",
     model: "semantic-fake",
     async embedOne(input: string | EmbedInput): Promise<EmbeddingVector> {
       stats.requests++;
@@ -168,13 +168,14 @@ describe("Hermes MemoryCore persistence", () => {
       agent: "hermes" as AgentKind,
       sessionId,
       episodeId,
-      userText: "请记住 HERMES_MEMOS_E2E_0428 viewer 端口是 18800",
       agentText: "已记录 Hermes MemOS 测试事实。",
       toolCalls: [
         {
           name: "memory_search",
           input: "{\"query\":\"HERMES_MEMOS_E2E_0428\"}",
           output: "[]",
+          startedAt: 1_700_000_000_002,
+          endedAt: 1_700_000_000_002,
         },
       ],
       ts: 1_700_000_000_002,
diff --git a/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts b/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts
index da7daea50..3253ca9bc 100644
--- a/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts
+++ b/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts
@@ -25,6 +25,7 @@ function stubCore(overrides: Partial<MemoryCore> = {}): MemoryCore {
         provider: "",
         model: "",
         lastOkAt: null,
+        lastFallbackAt: null,
         lastError: null,
       },
       embedder: {
@@ -33,6 +34,7 @@ function stubCore(overrides: Partial<MemoryCore> = {}): MemoryCore {
         model: "",
         dim: 0,
         lastOkAt: null,
+        lastFallbackAt: null,
         lastError: null,
       },
       skillEvolver: {
@@ -41,6 +43,7 @@ function stubCore(overrides: Partial<MemoryCore> = {}): MemoryCore {
         model: "",
         inherited: true,
         lastOkAt: null,
+        lastFallbackAt: null,
         lastError: null,
       },
     })),
@@ -80,6 +83,7 @@ function stubCore(overrides: Partial<MemoryCore> = {}): MemoryCore {
     shareTrace: vi.fn(async () => null),
     getPolicy: vi.fn(async () => null),
     listPolicies: vi.fn(async () => []),
+    countPolicies: vi.fn(async () => 0),
     setPolicyStatus: vi.fn(async () => null),
     deletePolicy: vi.fn(async () => ({ deleted: false })),
     editPolicyGuidance: vi.fn(async () => null),
@@ -87,6 +91,7 @@ function stubCore(overrides: Partial<MemoryCore> = {}): MemoryCore {
     updatePolicy: vi.fn(async () => null),
     getWorldModel: vi.fn(async () => null),
     listWorldModels: vi.fn(async () => []),
+    countWorldModels: vi.fn(async () => 0),
     deleteWorldModel: vi.fn(async () => ({ deleted: false })),
     shareWorldModel: vi.fn(async () => null),
     updateWorldModel: vi.fn(async () => null),
@@ -94,10 +99,13 @@ function stubCore(overrides: Partial<MemoryCore> = {}): MemoryCore {
     unarchiveWorldModel: vi.fn(async () => null),
     listEpisodes: vi.fn(async () => ["e-1", "e-2"]),
     listEpisodeRows: vi.fn(async () => []),
+    countEpisodes: vi.fn(async () => 0),
     timeline: vi.fn(async () => []),
     listTraces: vi.fn(async () => []),
+    countTraces: vi.fn(async () => 0),
     listApiLogs: vi.fn(async () => ({ logs: [], total: 0 })),
     listSkills: vi.fn(async () => []),
+    countSkills: vi.fn(async () => 0),
     getSkill: vi.fn(async () => null),
     archiveSkill: vi.fn(async () => {}),
     deleteSkill: vi.fn(async () => ({ deleted: false })),
diff --git a/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts b/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts
index cdf457d94..9aa6c4f6d 100644
--- a/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts
+++ b/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts
@@ -139,7 +139,6 @@ describe("memory/l2/induce", () => {
       episodeIds: ["ep_1", "ep_2"] as EpisodeId[],
       evidenceTraces: [mkTrace("tr_a", "ep_1", vec([1, 0])), mkTrace("tr_b", "ep_2", vec([0, 1]))],
       inducedBy: "l2.l2.induction.v1",
-      decisionGuidance: { preference: [], antiPattern: [] },
       now: 42,
     });
     expect(row.status).toBe("candidate");
diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts b/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts
index 1b4bbad9f..35198f936 100644
--- a/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts
+++ b/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts
@@ -147,7 +147,6 @@ export function seedWorldModel(
     policyIds: [...(args.policyIds ?? [])],
     sourceEpisodeIds: [...(args.sourceEpisodeIds ?? [])],
     inducedBy: "l3.abstraction.v1",
-    decisionGuidance: { preference: [], antiPattern: [] },
     vec: args.vec ?? vec([1, 0, 0]),
     createdAt: NOW,
     updatedAt: NOW,
diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts
index fdfe57f15..33e57f69e 100644
--- a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts
+++ b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts
@@ -73,6 +73,8 @@ function mkCluster(): PolicyCluster {
     domainTags: ["docker", "alpine", "pip"],
     centroidVec: vec([1, 0, 0]),
     avgGain: 0.3,
+    cohesion: 1,
+    admission: "strict",
   };
 }
 
@@ -171,7 +173,6 @@ describe("memory/l3/abstract", () => {
       cluster,
       episodeIds: ["ep_a", "ep_b", "ep_a"] as EpisodeId[],
       inducedBy: OP,
-      decisionGuidance: { preference: [], antiPattern: [] },
       now: NOW,
       id: "wm_test" as Parameters<typeof buildWorldModelRow>[0]["id"],
     });
diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts b/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts
index 0231d47c3..50c21064f 100644
--- a/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts
+++ b/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts
@@ -41,7 +41,6 @@ function mkWorldModel(partial: Partial<WorldModelRow> & { id: WorldModelId }): W
     policyIds: partial.policyIds ?? [],
     sourceEpisodeIds: partial.sourceEpisodeIds ?? [],
     inducedBy: partial.inducedBy ?? "",
-    decisionGuidance: { preference: [], antiPattern: [] },
     vec: partial.vec ?? vec([1, 0, 0]),
     createdAt: NOW,
     updatedAt: NOW,
diff --git a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts
index 09538780d..e13eb7b23 100644
--- a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts
+++ b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts
@@ -92,6 +92,7 @@ function seed(handle: TmpDbHandle) {
     vec: vec([1, 0, 0]),
     createdAt: NOW as never,
     updatedAt: NOW as never,
+    version: 1,
   });
   handle.repos.skills.upsert({
     id: "sk_weak" as SkillId,
@@ -110,6 +111,7 @@ function seed(handle: TmpDbHandle) {
     vec: vec([1, 0, 0]),
     createdAt: NOW as never,
     updatedAt: NOW as never,
+    version: 1,
   });
 
   handle.repos.worldModel.upsert({
@@ -122,7 +124,6 @@ function seed(handle: TmpDbHandle) {
     policyIds: [],
     sourceEpisodeIds: [],
     inducedBy: "",
-    decisionGuidance: { preference: [], antiPattern: [] },
     vec: vec([1, 0, 0]),
     createdAt: NOW as never,
     updatedAt: NOW as never,
diff --git a/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts
index 1105be325..5bc0b0d51 100644
--- a/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts
+++ b/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts
@@ -22,6 +22,9 @@ const cfg: RetrievalConfig = {
   minTraceSim: 0.3,
   tagFilter: "auto",
   decayHalfLifeDays: 30,
+  llmFilterEnabled: false,
+  llmFilterMaxKeep: 4,
+  llmFilterMinCandidates: 1,
 };
 
 const qv: EmbeddingVector = Float32Array.from([1, 0, 0]);
diff --git a/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts
index a98a1c092..b8ae8b4f6 100644
--- a/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts
+++ b/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts
@@ -25,6 +25,9 @@ const cfg: RetrievalConfig = {
   minTraceSim: 0.3,
   tagFilter: "auto",
   decayHalfLifeDays: 30,
+  llmFilterEnabled: false,
+  llmFilterMaxKeep: 4,
+  llmFilterMinCandidates: 1,
 };
 
 function seed(handle: TmpDbHandle) {
diff --git a/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts
index 897981444..a5465141f 100644
--- a/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts
+++ b/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts
@@ -25,6 +25,9 @@ const cfg: RetrievalConfig = {
   minTraceSim: 0.3,
   tagFilter: "auto",
   decayHalfLifeDays: 30,
+  llmFilterEnabled: false,
+  llmFilterMaxKeep: 4,
+  llmFilterMinCandidates: 1,
 };
 
 describe("retrieval/tier3 (with real sqlite)", () => {
@@ -41,7 +44,6 @@ describe("retrieval/tier3 (with real sqlite)", () => {
       policyIds: [],
       sourceEpisodeIds: [],
       inducedBy: "",
-      decisionGuidance: { preference: [], antiPattern: [] },
       vec: vec([1, 0, 0]),
       createdAt: NOW as never,
       updatedAt: NOW as never,
@@ -57,7 +59,6 @@ describe("retrieval/tier3 (with real sqlite)", () => {
       policyIds: [],
       sourceEpisodeIds: [],
       inducedBy: "",
-      decisionGuidance: { preference: [], antiPattern: [] },
       vec: vec([0, 1, 0]),
       createdAt: NOW as never,
       updatedAt: NOW as never,
diff --git a/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts b/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts
index 85416b66e..6b37ae55f 100644
--- a/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts
+++ b/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts
@@ -39,6 +39,7 @@ function fakeLlm(handler: (input: unknown) => unknown | Promise<unknown>): LlmCl
       totalPromptTokens: 0,
       totalCompletionTokens: 0,
       lastOkAt: null,
+      lastFallbackAt: null,
       lastError: null,
     }),
     resetStats: () => {},
diff --git a/apps/memos-local-plugin/tests/unit/skill/packager.test.ts b/apps/memos-local-plugin/tests/unit/skill/packager.test.ts
index 6a4a7a21c..bdb849045 100644
--- a/apps/memos-local-plugin/tests/unit/skill/packager.test.ts
+++ b/apps/memos-local-plugin/tests/unit/skill/packager.test.ts
@@ -95,6 +95,7 @@ describe("skill/packager", () => {
       vec: null,
       createdAt: NOW,
       updatedAt: NOW,
+      version: 1,
     } as SkillRow;
     const r = await buildSkillRow(
       {
diff --git a/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts b/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts
index 30db08db2..91b4644a1 100644
--- a/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts
+++ b/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts
@@ -94,6 +94,7 @@ describe("storage/end-to-end", () => {
           vec: vec([1, 0, 0]),
           createdAt: 101,
           updatedAt: 101,
+          version: 1,
         });
         repos.feedback.insert({
           id: "fb-1",
diff --git a/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts b/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts
index 1cf8f3dbf..b767f53a1 100644
--- a/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts
+++ b/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts
@@ -281,7 +281,6 @@ describe("storage/keyword channels — world model", () => {
       policyIds: [],
       sourceEpisodeIds: [],
       inducedBy: "",
-      decisionGuidance: { preference: [], antiPattern: [] },
       vec: vec([1, 0, 0]),
       createdAt: 0,
       updatedAt: 0,
diff --git a/apps/memos-local-plugin/tests/unit/storage/repos.test.ts b/apps/memos-local-plugin/tests/unit/storage/repos.test.ts
index 1ca4aa40d..8a25c3ed1 100644
--- a/apps/memos-local-plugin/tests/unit/storage/repos.test.ts
+++ b/apps/memos-local-plugin/tests/unit/storage/repos.test.ts
@@ -203,6 +203,7 @@ describe("storage/repos — happy paths", () => {
         vec: vec([1, 0]),
         createdAt: 1,
         updatedAt: 1,
+        version: 1,
       });
 
       expect(() =>
@@ -223,6 +224,7 @@ describe("storage/repos — happy paths", () => {
           vec: null,
           createdAt: 1,
           updatedAt: 1,
+          version: 1,
         }),
       ).toThrow(/UNIQUE/i);
 
diff --git a/apps/memos-local-plugin/web/src/stores/health.ts b/apps/memos-local-plugin/web/src/stores/health.ts
index d67374623..24ffeaaa7 100644
--- a/apps/memos-local-plugin/web/src/stores/health.ts
+++ b/apps/memos-local-plugin/web/src/stores/health.ts
@@ -11,26 +11,52 @@ import { api } from "../api/client";
 
 export type HealthStatus = "unknown" | "ok" | "degraded" | "down";
 
+/**
+ * Most-recent call status carried on every model slot. Populated by
+ * the core's `health()` endpoint from the underlying facade
+ * `stats()`. Overview compares the three timestamps below — the
+ * largest one wins — to paint the card green (ok), yellow (running
+ * on host fallback) or red (broken).
+ */
+export interface ModelCallStatus {
+  /** Epoch ms of the most recent direct primary-provider success. */
+  lastOkAt?: number | null;
+  /**
+   * Epoch ms of the most recent time the primary provider failed but
+   * the host LLM bridge rescued the call. Only ever set on the LLM /
+   * skillEvolver slots; the embedder has no fallback so this stays
+   * `null` there.
+   */
+  lastFallbackAt?: number | null;
+  /**
+   * Latest failure record. Sticky — not cleared by a later success;
+   * the timestamp comparison handles "we recovered" naturally.
+   */
+  lastError?: { at: number; message: string } | null;
+}
+
 export interface HealthPayload {
   ok: boolean;
   version?: string;
   uptimeMs?: number;
   agent?: string;
   paths?: Record<string, string>;
-  llm?: { available: boolean; provider: string; model: string };
-  embedder?: { available: boolean; provider: string; model: string; dim: number };
+  llm?: ({ available: boolean; provider: string; model: string }) & ModelCallStatus;
+  embedder?:
+    | ({ available: boolean; provider: string; model: string; dim: number } & ModelCallStatus);
   /**
    * `available` is `true` when the slot has a usable upstream — either a
    * concrete `provider+model+apiKey` of its own (`inherited=false`) or it
    * inherits from `llm.*` and that slot is itself available
    * (`inherited=true`). The viewer's setup banner uses this flag.
    */
-  skillEvolver?: {
-    available: boolean;
-    provider: string;
-    model: string;
-    inherited: boolean;
-  };
+  skillEvolver?:
+    | ({
+        available: boolean;
+        provider: string;
+        model: string;
+        inherited: boolean;
+      } & ModelCallStatus);
 }
 
 export const health = signal<HealthPayload | null>(null);
diff --git a/apps/memos-local-plugin/web/src/stores/i18n.ts b/apps/memos-local-plugin/web/src/stores/i18n.ts
index edd4e35fd..3b17b4993 100644
--- a/apps/memos-local-plugin/web/src/stores/i18n.ts
+++ b/apps/memos-local-plugin/web/src/stores/i18n.ts
@@ -216,6 +216,9 @@ const en = {
   "overview.metric.model.connectedAt": "Last OK call at {ts}",
   "overview.metric.model.failed": "Last call failed",
   "overview.metric.model.idle": "Not called yet",
+  "overview.metric.model.fallback": "Falling back to host model",
+  "overview.metric.model.fallback.tooltip":
+    "Primary provider unavailable, host LLM is handling the call. Original error: {msg}",
   "overview.metric.policies.breakdown": "{active} active · {candidate} candidate",
   "overview.metric.skills.breakdown": "{active} active · {candidate} candidate",
   "overview.live.title": "Live activity",
@@ -563,6 +566,11 @@ const en = {
   "logs.tag.skill": "Skill",
   "logs.tag.policy": "Experience",
   "logs.tag.world": "World model",
+  "logs.tag.system": "System",
+  "logs.system.role": "{role} call failed",
+  "logs.system.role.embedding": "Embedding model",
+  "logs.system.role.llm": "Summary model",
+  "logs.system.role.skillEvolver": "Skill evolver model",
   "logs.tool.search": "Search",
   "logs.tool.add": "Ingest",
   "logs.tool.skill_generate": "Generate",
@@ -866,6 +874,9 @@ const zh: Record<TranslationKey, string> = {
   "overview.metric.model.connectedAt": "上次成功调用于 {ts}",
   "overview.metric.model.failed": "上次调用失败",
   "overview.metric.model.idle": "暂未调用",
+  "overview.metric.model.fallback": "已降级到 Agent 内置模型",
+  "overview.metric.model.fallback.tooltip":
+    "原配置模型不可用，已自动切换到 Agent 内置模型继续工作。原始错误：{msg}",
   "overview.metric.policies.breakdown": "{active} 已启用 · {candidate} 候选",
   "overview.metric.skills.breakdown": "{active} 已启用 · {candidate} 候选",
   "overview.live.title": "实时活动",
@@ -1183,6 +1194,11 @@ const zh: Record<TranslationKey, string> = {
   "logs.tag.skill": "技能",
   "logs.tag.policy": "经验",
   "logs.tag.world": "环境认知",
+  "logs.tag.system": "系统",
+  "logs.system.role": "{role}调用失败",
+  "logs.system.role.embedding": "嵌入模型",
+  "logs.system.role.llm": "摘要模型",
+  "logs.system.role.skillEvolver": "技能进化模型",
   "logs.tool.search": "检索",
   "logs.tool.add": "写入",
   "logs.tool.skill_generate": "生成",
diff --git a/apps/memos-local-plugin/web/src/stores/restart.ts b/apps/memos-local-plugin/web/src/stores/restart.ts
index 37b97255a..898cff61b 100644
--- a/apps/memos-local-plugin/web/src/stores/restart.ts
+++ b/apps/memos-local-plugin/web/src/stores/restart.ts
@@ -63,12 +63,20 @@ async function pollHealthUntilUp(maxAttempts = 60): Promise<boolean> {
 }
 
 /**
- * Quick health check — just verify the server responds once.
- * Used for Hermes where the new daemon is already up before the old exits.
+ * Quick health check — verify the server responds again.
+ *
+ * Hermes' restart flow spawns the new daemon AFTER `sleep 3`
+ * (admin.ts) so the old bridge can fully release the viewer port,
+ * then `tsx` cold-compiles + bootstrap (DB migrations, embedder /
+ * LLM clients, host-bridge registration) takes another few seconds
+ * before the port is bound. Total worst-case wall time is ~10–15 s
+ * on slower machines. We poll every 1 s for up to 30 attempts (30 s
+ * total) so even a sluggish cold start succeeds without the user
+ * hitting the "重启超时" toast prematurely.
  */
-async function quickPollUp(maxAttempts = 10): Promise<boolean> {
+async function quickPollUp(maxAttempts = 30): Promise<boolean> {
   for (let i = 0; i < maxAttempts; i++) {
-    await new Promise((r) => setTimeout(r, 800));
+    await new Promise((r) => setTimeout(r, 1000));
     try {
       const res = await fetch("/api/v1/health");
       if (res.ok || res.status === 401 || res.status === 403) return true;
@@ -103,8 +111,11 @@ export async function triggerRestart(
       restartState.value = { phase: "restartFailed" };
     }
   } else {
-    // Hermes: new daemon spawns before old exits, transition is fast.
-    const ok = await quickPollUp(10);
+    // Hermes: new daemon spawns after `sleep 3` (admin.ts) so the
+    // old bridge can fully release the port; cold-start of tsx +
+    // bootstrap may take another few seconds. Use the default
+    // `quickPollUp` attempts (30s total).
+    const ok = await quickPollUp();
     if (ok) {
       window.location.href =
         window.location.pathname + "?_t=" + Date.now();
@@ -128,9 +139,9 @@ export async function triggerCleared(): Promise<void> {
       restartState.value = { phase: "restartFailed" };
     }
   } else {
-    // Hermes: clear-data spawns a new daemon. Give it extra time
-    // since it also needs to re-create the DB on first boot.
-    const ok = await quickPollUp(15);
+    // Hermes: clear-data spawns a new daemon. The default 30s of
+    // `quickPollUp` already covers the slow first-boot DB migration.
+    const ok = await quickPollUp();
     if (ok) {
       window.location.href =
         window.location.pathname + "?_t=" + Date.now();
diff --git a/apps/memos-local-plugin/web/src/styles/components.css b/apps/memos-local-plugin/web/src/styles/components.css
index ea662d025..8b3a0c747 100644
--- a/apps/memos-local-plugin/web/src/styles/components.css
+++ b/apps/memos-local-plugin/web/src/styles/components.css
@@ -368,10 +368,12 @@
   border-radius: 50%;
   flex-shrink: 0;
 }
-.status-dot--ok   { background: var(--green); box-shadow: 0 0 0 2px rgba(16, 185, 129, 0.18); }
-.status-dot--err  { background: var(--red);   box-shadow: 0 0 0 2px rgba(239, 68, 68, 0.20); }
-.status-dot--idle { background: var(--fg-dim); opacity: 0.6; }
-.status-dot--off  { background: transparent; border: 1px dashed var(--fg-dim); }
+.status-dot--ok       { background: var(--green); box-shadow: 0 0 0 2px rgba(16, 185, 129, 0.18); }
+/* Yellow ring — primary provider broken but host LLM bridge rescued the call. */
+.status-dot--fallback { background: #f59e0b;     box-shadow: 0 0 0 2px rgba(245, 158, 11, 0.22); }
+.status-dot--err      { background: var(--red);   box-shadow: 0 0 0 2px rgba(239, 68, 68, 0.20); }
+.status-dot--idle     { background: var(--fg-dim); opacity: 0.6; }
+.status-dot--off      { background: transparent; border: 1px dashed var(--fg-dim); }
 
 /* ── Pills (status badges) ──────────────────────────────────────── */
 
diff --git a/apps/memos-local-plugin/web/src/views/LogsView.tsx b/apps/memos-local-plugin/web/src/views/LogsView.tsx
index 426b5165d..e7463c9ad 100644
--- a/apps/memos-local-plugin/web/src/views/LogsView.tsx
+++ b/apps/memos-local-plugin/web/src/views/LogsView.tsx
@@ -33,7 +33,9 @@ type ToolFilter =
   | "world_model_generate"
   | "world_model_evolve"
   | "task_done"
-  | "task_failed";
+  | "task_failed"
+  | "system_error"
+  | "system_model_status";
 
 /**
  * Frontend log-tag categories. Each tag maps to one or more backend
@@ -49,7 +51,13 @@ type LogTag =
   | "task"
   | "skill"
   | "policy"
-  | "world";
+  | "world"
+  // Infrastructure-layer failures (embedding / summary LLM /
+  // skillEvolver provider errors). The bootstrap layer drops a
+  // `system_error` row into api_logs every time a model facade
+  // throws, so users can correlate Overview red dots with concrete
+  // upstream messages without tailing the server logs.
+  | "system";
 
 const LOG_TAGS: Array<{ v: LogTag; k: string }> = [
   { v: "", k: "common.all" },
@@ -59,6 +67,7 @@ const LOG_TAGS: Array<{ v: LogTag; k: string }> = [
   { v: "skill", k: "logs.tag.skill" },
   { v: "policy", k: "logs.tag.policy" },
   { v: "world", k: "logs.tag.world" },
+  { v: "system", k: "logs.tag.system" },
 ];
 
 /**
@@ -75,6 +84,7 @@ const ALLOWED_TOOLS: Record<LogTag, readonly ToolFilter[]> = {
   skill: ["skill_generate", "skill_evolve"],
   policy: ["policy_generate", "policy_evolve"],
   world: ["world_model_generate", "world_model_evolve"],
+  system: ["system_error", "system_model_status"],
 };
 
 interface ApiLogsResponse {
@@ -311,6 +321,10 @@ function LogCard({
             <MemorySearchDetail input={input} output={output} />
           ) : log.toolName === "memory_add" ? (
             <MemoryAddDetail input={input} output={output} />
+          ) : log.toolName === "system_error" ? (
+            <SystemErrorDetail output={output} />
+          ) : log.toolName === "system_model_status" ? (
+            <SystemModelStatusDetail output={output} />
           ) : log.toolName.startsWith("skill_") ||
             log.toolName.startsWith("policy_") ||
             log.toolName.startsWith("world_model_") ||
@@ -750,6 +764,117 @@ function LifecycleDetail({
   );
 }
 
+// ─── system_error template ──────────────────────────────────────────────
+
+interface SystemErrorPayload {
+  role?: "embedding" | "llm" | "skillEvolver";
+  provider?: string;
+  model?: string;
+  message?: string;
+  code?: string;
+  at?: number;
+}
+
+interface SystemModelStatusPayload extends SystemErrorPayload {
+  status?: "ok" | "fallback" | "error";
+  fallbackProvider?: string;
+  fallbackModel?: string;
+}
+
+/**
+ * Detail view for a `system_error` row. The bootstrap-installed sink
+ * stores a flat `{ role, provider, model, message, code, at }` blob so
+ * the renderer is intentionally minimal — one prominent red error line
+ * plus a row of metadata pills.
+ */
+function SystemErrorDetail({ output }: { output: unknown }) {
+  const out = (output ?? {}) as SystemErrorPayload;
+  const role = out.role ?? "(unknown)";
+  return (
+    <div class="vstack" style="gap:var(--sp-3)">
+      <section
+        class="card card--flat"
+        style="border-color:var(--danger);background:var(--danger-soft)"
+      >
+        <div
+          class="muted"
+          style="font-size:var(--fs-xs);margin-bottom:4px;color:var(--danger)"
+        >
+          {t("logs.system.role", { role: roleLabel(role) })}
+        </div>
+        <div class="mono" style="font-size:var(--fs-sm);line-height:1.5;word-break:break-word">
+          {out.message || "(no message)"}
+        </div>
+      </section>
+      <div class="hstack" style="gap:var(--sp-2);flex-wrap:wrap">
+        {out.provider && (
+          <span class="pill pill--info" style="font-family:var(--font-mono)">
+            provider: {out.provider}
+          </span>
+        )}
+        {out.model && (
+          <span class="pill pill--info" style="font-family:var(--font-mono)">
+            model: {out.model}
+          </span>
+        )}
+        {out.code && (
+          <span class="pill pill--failed" style="font-family:var(--font-mono)">
+            code: {out.code}
+          </span>
+        )}
+      </div>
+    </div>
+  );
+}
+
+function SystemModelStatusDetail({ output }: { output: unknown }) {
+  const out = (output ?? {}) as SystemModelStatusPayload;
+  const status = out.status ?? "error";
+  const role = out.role ?? "(unknown)";
+  const tone =
+    status === "ok"
+      ? { border: "var(--success)", bg: "var(--success-soft)", pill: "pill--active" }
+      : status === "fallback"
+      ? { border: "#f59e0b", bg: "rgba(245, 158, 11, 0.12)", pill: "pill--info" }
+      : { border: "var(--danger)", bg: "var(--danger-soft)", pill: "pill--failed" };
+  return (
+    <div class="vstack" style="gap:var(--sp-3)">
+      <section
+        class="card card--flat"
+        style={`border-color:${tone.border};background:${tone.bg}`}
+      >
+        <div class="hstack" style="gap:var(--sp-2);margin-bottom:6px;flex-wrap:wrap">
+          <span class={`pill ${tone.pill}`}>{status}</span>
+          <span class="pill pill--info">{roleLabel(role)}</span>
+          {out.provider && <span class="pill">provider: {out.provider}</span>}
+          {out.model && <span class="pill">model: {out.model}</span>}
+          {out.fallbackProvider && (
+            <span class="pill pill--info">fallback: {out.fallbackProvider}</span>
+          )}
+        </div>
+        {out.message && (
+          <div class="mono" style="font-size:var(--fs-sm);line-height:1.5;word-break:break-word">
+            {out.message}
+          </div>
+        )}
+      </section>
+    </div>
+  );
+}
+
+function roleLabel(role: string): string {
+  switch (role) {
+    case "embedding":
+      return t("logs.system.role.embedding");
+    case "llm":
+      return t("logs.system.role.llm");
+    case "skillEvolver":
+      return t("logs.system.role.skillEvolver");
+    default:
+      return role;
+  }
+}
+
 // ─── Generic fallback ───────────────────────────────────────────────────
 
 function GenericDetail({
@@ -887,6 +1012,29 @@ function buildSummary(log: ApiLogDTO, input: unknown, output: unknown): string {
       (inp.worldModelId as string | undefined);
     return id ? `world model ${truncate(id, 24)}` : log.toolName;
   }
+  if (log.toolName === "system_error") {
+    const role = (out.role as string | undefined) ?? "?";
+    const message = (out.message as string | undefined) ?? "";
+    const provider = (out.provider as string | undefined) ?? "";
+    const head = `[${roleLabel(role)}]`;
+    const tail = message
+      ? truncate(message, 80)
+      : provider
+      ? provider
+      : "(no message)";
+    return `${head} ${tail}`;
+  }
+  if (log.toolName === "system_model_status") {
+    const role = (out.role as string | undefined) ?? "?";
+    const status = (out.status as string | undefined) ?? "?";
+    const provider = (out.provider as string | undefined) ?? "";
+    const model = (out.model as string | undefined) ?? "";
+    const message = (out.message as string | undefined) ?? "";
+    const bits = [`[${roleLabel(role)}]`, status];
+    if (provider || model) bits.push([provider, model].filter(Boolean).join("/"));
+    if (message) bits.push(truncate(message, 60));
+    return bits.join(" · ");
+  }
   if (log.toolName === "task_done" || log.toolName === "task_failed") {
     const rHuman = typeof out.rHuman === "number" ? (out.rHuman as number).toFixed(2) : null;
     const source = (out.source as string | undefined) ?? "";
diff --git a/apps/memos-local-plugin/web/src/views/MemoriesView.tsx b/apps/memos-local-plugin/web/src/views/MemoriesView.tsx
index 15c7e2ddd..5944ff53a 100644
--- a/apps/memos-local-plugin/web/src/views/MemoriesView.tsx
+++ b/apps/memos-local-plugin/web/src/views/MemoriesView.tsx
@@ -760,7 +760,23 @@ function ToolCallCard({
 
 function formatToolPayload(v: unknown): string {
   if (v === undefined || v === null) return "";
-  if (typeof v === "string") return v;
+  // Tool inputs/outputs frequently arrive as already-stringified JSON
+  // (the agent serializes them before storing). Re-parse so the same
+  // 2-space pretty-print path applies regardless of upstream encoding.
+  if (typeof v === "string") {
+    const trimmed = v.trim();
+    if (
+      (trimmed.startsWith("{") && trimmed.endsWith("}")) ||
+      (trimmed.startsWith("[") && trimmed.endsWith("]"))
+    ) {
+      try {
+        return JSON.stringify(JSON.parse(trimmed), null, 2);
+      } catch {
+        return v;
+      }
+    }
+    return v;
+  }
   try {
     return JSON.stringify(v, null, 2);
   } catch {
@@ -898,6 +914,27 @@ function formatTs(ts: number): string {
   }
 }
 
+/**
+ * Format a step timestamp with millisecond precision (HH:MM:SS.mmm).
+ * Used in the per-step header row so users can tell apart sub-steps
+ * fired within the same second by a fast tool loop. Uses 24h fields
+ * directly so the locale's AM/PM suffix doesn't end up between the
+ * seconds and the millisecond fraction.
+ */
+function formatStepTime(ts: number): string {
+  if (!ts) return "—";
+  try {
+    const d = new Date(ts);
+    const hh = String(d.getHours()).padStart(2, "0");
+    const mm = String(d.getMinutes()).padStart(2, "0");
+    const ss = String(d.getSeconds()).padStart(2, "0");
+    const ms = String(d.getMilliseconds()).padStart(3, "0");
+    return `${hh}:${mm}:${ss}.${ms}`;
+  } catch {
+    return String(ts);
+  }
+}
+
 // ─── Right-side drawer ───────────────────────────────────────────────────
 
 /**
@@ -1205,8 +1242,9 @@ function TraceDrawer({
  *     and any `toolCalls` rendered through the existing
  *     `ToolCallCard`. Empty fields collapse silently.
  *
- * The first step is open by default; the rest start collapsed so the
- * drawer doesn't drown the user when a turn fired a dozen tools.
+ * Every step starts collapsed so the drawer stays compact when a turn
+ * fired a dozen tools — users opt into the detail they want instead of
+ * scrolling past an auto-expanded first step.
  */
 function StepList({ traces }: { traces: readonly TraceDTO[] }) {
   return (
@@ -1223,7 +1261,6 @@ function StepList({ traces }: { traces: readonly TraceDTO[] }) {
           return (
             <details
               key={tr.id}
-              open={idx === 0}
               class="card card--flat"
               style="padding:var(--sp-2) var(--sp-3)"
             >
@@ -1236,7 +1273,7 @@ function StepList({ traces }: { traces: readonly TraceDTO[] }) {
                 </span>
                 <span class={`pill pill--role-${role}`}>{roleLabel}</span>
                 <span class="mono muted" style="font-size:var(--fs-xs)">
-                  {new Date(tr.ts).toLocaleTimeString()}
+                  {formatStepTime(tr.ts)}
                 </span>
                 <span class="mono muted" style="font-size:var(--fs-xs)">
                   V {tr.value.toFixed(2)} · α {tr.alpha.toFixed(2)}
diff --git a/apps/memos-local-plugin/web/src/views/OverviewView.tsx b/apps/memos-local-plugin/web/src/views/OverviewView.tsx
index 79c001f86..bc0177f78 100644
--- a/apps/memos-local-plugin/web/src/views/OverviewView.tsx
+++ b/apps/memos-local-plugin/web/src/views/OverviewView.tsx
@@ -42,9 +42,14 @@ interface ModelInfo {
   model: string;
   dim?: number;
   inherited?: boolean;
-  /** Epoch ms of most recent successful call (null = never called). */
+  /** Epoch ms of most recent direct primary-provider success. */
   lastOkAt?: number | null;
-  /** Most recent failure, if the last call went bad. */
+  /**
+   * Epoch ms of most recent rescued-by-host-fallback call. Populates
+   * the "yellow" overview state.
+   */
+  lastFallbackAt?: number | null;
+  /** Most recent failure (sticky — see ModelHealth comment). */
   lastError?: { at: number; message: string } | null;
 }
 interface OverviewSummary {
@@ -259,17 +264,26 @@ function QuantityCard({
   );
 }
 
-type ModelDotKind = "ok" | "err" | "idle" | "off";
+type ModelDotKind = "ok" | "fallback" | "err" | "idle" | "off";
 
 /**
- * Derive the overview card status from a {@link ModelInfo}:
- *   - `off`  — the client isn't even configured
- *   - `idle` — configured but no call has happened yet (fresh install)
- *   - `err`  — last call failed, surface the error message in a tooltip
- *   - `ok`   — last call succeeded
+ * Derive the overview card status from a {@link ModelInfo}.
  *
- * Preference order: error wins (even over never-called), so a
- * freshly-failed provider doesn't pretend to be idle.
+ * The card is painted by picking the most-recent of three timestamps
+ * — `lastOkAt`, `lastFallbackAt`, `lastError.at` — and mapping that
+ * winner to a colour:
+ *
+ *   - `ok` (green)        — primary provider answered directly.
+ *   - `fallback` (yellow) — primary failed but host LLM bridge
+ *                           rescued the call. The card surfaces the
+ *                           original error so users know *why* it
+ *                           degraded.
+ *   - `err` (red)         — primary failed and either there was no
+ *                           fallback or the fallback also failed.
+ *
+ * `lastError` is sticky on the backend so it can sit alongside a
+ * fresher `lastOkAt` after recovery — comparing timestamps lets the
+ * UI naturally "go green again" without having to clear the message.
  */
 function modelStatusFromInfo(info: ModelInfo | undefined): {
   kind: ModelDotKind;
@@ -279,23 +293,65 @@ function modelStatusFromInfo(info: ModelInfo | undefined): {
   if (!info || info.available === false) {
     return { kind: "off", label: t("overview.metric.model.unconfigured") };
   }
-  if (info.lastError) {
+
+  const okAt = info.lastOkAt ?? 0;
+  const fbAt = info.lastFallbackAt ?? 0;
+  const errAt = info.lastError?.at ?? 0;
+  const max = Math.max(okAt, fbAt, errAt);
+
+  // Nothing has happened yet — fresh process, no calls landed.
+  if (max === 0) {
+    return { kind: "idle", label: t("overview.metric.model.idle") };
+  }
+
+  // Priority order matters when timestamps tie.
+  //
+  // The backend stamps `lastFallbackAt` and `lastError.at` with the
+  // SAME `Date.now()` inside `markFallback` (the upstream error is
+  // kept on `lastError` so the viewer can show *why* fallback
+  // engaged). When that happens, a strict "errAt === max ⇒ red"
+  // check would always win over the fallback branch and the slot
+  // would never go yellow. The current call succeeded — through the
+  // host bridge — so semantically it is the fallback state, with
+  // the error only providing context. Hence: fallback wins ties
+  // against err.
+  //
+  // We also let fallback win ties against ok for the rare case where
+  // a successful primary call and a fallback rescue happen in the
+  // same millisecond — yellow is the most informative state.
+  if (fbAt > 0 && fbAt >= errAt && fbAt >= okAt) {
+    const raw = (info.lastError?.message ?? "").trim();
+    const head = t("overview.metric.model.fallback");
+    const tail = raw ? `: ${raw.length > 60 ? raw.slice(0, 59) + "…" : raw}` : "";
     return {
-      kind: "err",
-      label: t("overview.metric.model.failed"),
-      tooltip: info.lastError.message,
+      kind: "fallback",
+      label: head + tail,
+      tooltip: raw
+        ? t("overview.metric.model.fallback.tooltip", { msg: raw })
+        : head,
     };
   }
-  if (info.lastOkAt) {
+
+  // Most recent event was a terminal failure.
+  if (errAt > 0 && errAt >= okAt) {
+    const raw = (info.lastError?.message ?? "").trim();
+    const short =
+      raw.length > 80 ? raw.slice(0, 79) + "…" : raw || t("overview.metric.model.failed");
     return {
-      kind: "ok",
-      label: t("overview.metric.model.connected"),
-      tooltip: t("overview.metric.model.connectedAt", {
-        ts: new Date(info.lastOkAt).toLocaleTimeString(),
-      }),
+      kind: "err",
+      label: short,
+      tooltip: raw || t("overview.metric.model.failed"),
     };
   }
-  return { kind: "idle", label: t("overview.metric.model.idle") };
+
+  // okAt is the largest — primary provider is working directly.
+  return {
+    kind: "ok",
+    label: t("overview.metric.model.connected"),
+    tooltip: t("overview.metric.model.connectedAt", {
+      ts: new Date(okAt).toLocaleTimeString(),
+    }),
+  };
 }
 
 function ModelCard({