diff --git a/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py b/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py index d90466871..5a1154648 100644 --- a/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py +++ b/apps/memos-local-plugin/adapters/hermes/memos_provider/__init__.py @@ -156,6 +156,14 @@ def initialize(self, session_id: str, **kwargs: Any) -> None: # type: ignore[ov return try: self._bridge = MemosBridgeClient() + # Register the fallback LLM handler BEFORE we open the + # session so it is available the very first time the + # plugin's facade asks for help (e.g. on the first + # `turn.start` retrieval call). + self._bridge.register_host_handler( + "host.llm.complete", + self._handle_host_llm_complete, + ) self._open_session(session_id) logger.info( "MemOS: bridge ready session=%s platform=%s (episode deferred)", @@ -713,9 +721,7 @@ def handle_tool_call(self, tool_name: str, args: dict[str, Any], **_kwargs: Any) elif kind == "policy": body = self._clip( "\n\n".join( - part - for part in [item.get("title"), item.get("procedure")] - if part + part for part in [item.get("title"), item.get("procedure")] if part ) ) meta = { @@ -905,6 +911,148 @@ def shutdown(self) -> None: # type: ignore[override] # as a daemon if its viewer is running, so the memory panel # remains accessible between `hermes chat` sessions. + # ─── Host LLM bridge (fallback for plugin-side model failures) ──────── + + def _handle_host_llm_complete(self, params: dict[str, Any]) -> dict[str, Any]: + """Run a fallback LLM call using the host (hermes) agent's models. + + Wired into the bridge's reverse-RPC channel under the + ``host.llm.complete`` method. Triggered when the plugin's + configured summary or skill-evolver model fails — instead of + bubbling the error straight up (which would stall the V7 + capture / reflection / skill pipeline), we replay the prompt + through ``agent.auxiliary_client.call_llm`` so hermes' own + provider stack (including its OpenRouter / Codex / custom + endpoint resolution) handles it. + + If the host LLM also fails this raises, the bridge converts + that into a JSON-RPC error, the LlmClient ``markFail``s, and + the Overview card flips red — exactly matching the spec + "if the agent's main model is also down, stop falling back + and surface red". + """ + messages = params.get("messages") + if not isinstance(messages, list) or not messages: + raise ValueError("host.llm.complete: missing messages") + + # Lazy imports — these pull in heavy deps (openai client, + # credential pool, …) that we don't want to load until a + # fallback is actually requested. + try: + from agent.auxiliary_client import call_llm # type: ignore[import-not-found] + from hermes_cli.runtime_provider import ( # type: ignore[import-not-found] + resolve_runtime_provider, + ) + except Exception as err: + raise RuntimeError(f"host LLM bridge unavailable: {err}") from err + + # Resolve hermes' MAIN conversation provider so the fallback + # uses exactly what the user configured for chat. Walking the + # generic auxiliary auto-detect chain would otherwise depend + # on env vars (`OPENROUTER_API_KEY`, `OPENAI_API_KEY`, …) that + # often don't propagate into the bridge subprocess and would + # leave us with no working credential. Pinning to the resolved + # main runtime guarantees we hit the same endpoint the user + # already authenticated for chat. + try: + runtime = resolve_runtime_provider() + except Exception as err: + raise RuntimeError(f"could not resolve hermes main runtime: {err}") from err + + main_runtime: dict[str, str] = {} + for field in ("provider", "model", "base_url", "api_key", "api_mode"): + value = runtime.get(field) if isinstance(runtime, dict) else None + if isinstance(value, str) and value.strip(): + main_runtime[field] = value.strip() + + normalized = [ + { + "role": str(m.get("role", "user")), + "content": str(m.get("content", "")), + } + for m in messages + if isinstance(m, dict) + ] + timeout_ms = params.get("timeoutMs") + timeout_s: float | None = None + if isinstance(timeout_ms, (int, float)) and timeout_ms > 0: + timeout_s = float(timeout_ms) / 1000.0 + + max_tokens = params.get("maxTokens") + temperature = params.get("temperature") + + kwargs: dict[str, Any] = { + "messages": normalized, + # `main_runtime` makes `_resolve_auto` prefer the user's + # main conversation provider + model over the generic auto + # chain. If the user's main provider is also down, + # `call_llm` raises — which is exactly the "agent's own + # model is broken too, stop falling back" semantic we want + # (red light on Overview). + "main_runtime": main_runtime, + } + if isinstance(max_tokens, (int, float)) and max_tokens > 0: + kwargs["max_tokens"] = int(max_tokens) + if isinstance(temperature, (int, float)): + kwargs["temperature"] = float(temperature) + if timeout_s is not None: + kwargs["timeout"] = timeout_s + + started = time.time() + try: + response = call_llm(**kwargs) + except Exception as err: + # Surface the original failure verbatim — the LlmClient + # will tag this as a "host fallback failed" terminal error + # and the Overview red-light path takes over. + raise RuntimeError(f"host LLM call failed: {err}") from err + + # `call_llm` returns an OpenAI ChatCompletion-shaped object. + # Pluck the assistant text + token usage defensively so a + # non-standard host (e.g. Anthropic native) still produces a + # populated response. + text = "" + model = "" + usage_dict: dict[str, int] = {} + try: + choices = getattr(response, "choices", None) or response.get("choices", []) # type: ignore[union-attr] + if choices: + first = choices[0] + msg = getattr(first, "message", None) or first.get("message", {}) # type: ignore[union-attr] + content = getattr(msg, "content", None) or msg.get("content", "") # type: ignore[union-attr] + text = str(content or "") + model = ( + getattr(response, "model", None) + or response.get("model", "") # type: ignore[union-attr] + or "" + ) + u = getattr(response, "usage", None) or response.get("usage", None) # type: ignore[union-attr] + if u is not None: + pt = getattr(u, "prompt_tokens", None) + ct = getattr(u, "completion_tokens", None) + tt = getattr(u, "total_tokens", None) + if pt is None and isinstance(u, dict): + pt = u.get("prompt_tokens") + ct = u.get("completion_tokens") + tt = u.get("total_tokens") + if isinstance(pt, int): + usage_dict["promptTokens"] = pt + if isinstance(ct, int): + usage_dict["completionTokens"] = ct + if isinstance(tt, int): + usage_dict["totalTokens"] = tt + except Exception: + logger.debug("host.llm.complete: shape parse failed", exc_info=True) + + result: dict[str, Any] = { + "text": text, + "model": str(model or ""), + "durationMs": int((time.time() - started) * 1000), + } + if usage_dict: + result["usage"] = usage_dict + return result + # ─── Internals ──────────────────────────────────────────────────────── def _open_session(self, session_id: str = "") -> None: @@ -928,11 +1076,7 @@ def _is_transport_closed(self, err: Exception) -> bool: if isinstance(err, BridgeError) and err.code == "transport_closed": return True msg = str(err).lower() - return ( - "broken pipe" in msg - or "bridge closed" in msg - or "transport_closed" in msg - ) + return "broken pipe" in msg or "bridge closed" in msg or "transport_closed" in msg def _reconnect_bridge(self, session_id: str = "") -> None: old_bridge = self._bridge diff --git a/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py b/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py index 0810c6560..9d82a2264 100644 --- a/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py +++ b/apps/memos-local-plugin/adapters/hermes/memos_provider/bridge_client.py @@ -66,6 +66,14 @@ def __init__( self._pending: dict[int, dict[str, Any]] = {} self._events: list[Callable[[dict[str, Any]], None]] = [] self._logs: list[Callable[[dict[str, Any]], None]] = [] + # Reverse-direction handlers: the bridge can send us a + # JSON-RPC request via `serverRequest(...)` (e.g. + # `host.llm.complete` for fallback LLM calls). Registered + # methods run on the dedicated reader thread; long-running + # work should spawn its own worker if it needs to. Each + # handler returns a JSON-serialisable value or raises to + # surface a JSON-RPC error back to the bridge. + self._host_handlers: dict[str, Callable[[dict[str, Any]], Any]] = {} self._closed = False node = node_binary or shutil.which("node") or "node" @@ -173,6 +181,22 @@ def on_event(self, cb: Callable[[dict[str, Any]], None]) -> None: def on_log(self, cb: Callable[[dict[str, Any]], None]) -> None: self._logs.append(cb) + def register_host_handler( + self, + method: str, + handler: Callable[[dict[str, Any]], Any], + ) -> None: + """Register a handler for bridge → adapter (reverse) requests. + + The Node-side bridge calls these via ``stdio.serverRequest``. + Most-recent registration wins. The handler runs on the reader + thread; if it blocks for a long time it stalls every other + bridge → adapter notification, so handlers that need to do + heavy work (e.g. an LLM call) are still expected to return + within the bridge-side timeout (default 60 s). + """ + self._host_handlers[method] = handler + def close(self) -> None: if self._closed: return @@ -225,6 +249,68 @@ def _read_loop(self) -> None: except Exception: logger.debug("log listener threw", exc_info=True) continue + # Reverse-direction request: the bridge is asking the + # adapter to do something (e.g. run a fallback LLM call + # via `host.llm.complete`). Dispatch to the registered + # handler and write the response back synchronously. + method = msg.get("method") + rpc_id = msg.get("id") + if ( + isinstance(method, str) + and rpc_id is not None + and "result" not in msg + and "error" not in msg + ): + handler = self._host_handlers.get(method) + if handler is None: + self._send_response( + rpc_id, + error={ + "code": -32601, + "message": f"method not found: {method}", + "data": {"code": "unknown_method"}, + }, + ) + continue + params = msg.get("params") or {} + if not isinstance(params, dict): + params = {} + try: + result = handler(params) + self._send_response(rpc_id, result=result) + except Exception as err: + logger.warning("host handler %s failed: %s", method, err) + self._send_response( + rpc_id, + error={ + "code": -32000, + "message": str(err) or err.__class__.__name__, + "data": {"code": "host_handler_failed"}, + }, + ) + continue + + def _send_response( + self, + rpc_id: Any, + *, + result: Any = None, + error: dict[str, Any] | None = None, + ) -> None: + """Write a JSON-RPC response for a reverse-direction request.""" + if self._closed: + return + payload: dict[str, Any] = {"jsonrpc": "2.0", "id": rpc_id} + if error is not None: + payload["error"] = error + else: + payload["result"] = result + with self._lock: + try: + self._proc.stdin.write(json.dumps(payload, ensure_ascii=False) + "\n") + self._proc.stdin.flush() + except (BrokenPipeError, OSError): + pass def _stderr_loop(self) -> None: assert self._proc.stderr is not None diff --git a/apps/memos-local-plugin/agent-contract/memory-core.ts b/apps/memos-local-plugin/agent-contract/memory-core.ts index caf5c7e31..bb4525207 100644 --- a/apps/memos-local-plugin/agent-contract/memory-core.ts +++ b/apps/memos-local-plugin/agent-contract/memory-core.ts @@ -57,17 +57,39 @@ export interface CoreHealth { /** * Per-model connectivity summary used by the viewer's Overview page. - * `available` reflects whether the client is configured (non-null); - * `lastOkAt` / `lastError` reflect the most recent **actual** call - * outcome tracked by the runtime. A green dot means "called successfully - * at least once and the last call was good"; red means "most recent - * call failed" + the error text to surface in a tooltip. + * + * The viewer renders the slot in one of four colours, picked by + * comparing the timestamps below — most-recent event wins: + * + * - **green** (`ok`) : `lastOkAt` is the latest stamp → + * primary provider answered directly. + * - **yellow** (`fallback`) : `lastFallbackAt` is the latest stamp → + * primary provider failed *but* the host LLM bridge rescued the + * call. Only ever set on the LLM / skillEvolver slots; the + * embedder has no fallback path so this stays `null`. + * - **red** (`err`) : `lastError.at` is the latest stamp → + * primary provider failed and either no fallback was configured + * or the fallback also failed. The accompanying `message` is + * surfaced verbatim on the card. + * - **idle / off** : every timestamp is `null` → the + * facade has not been called yet (idle) or no client is + * configured at all (off). + * + * `lastError` is **sticky** — it is not cleared by a later success. + * The viewer's timestamp comparison naturally promotes a fresh + * success over a stale failure, while keeping the message available + * in case a subsequent failure flips the card back to red. */ export interface ModelHealth { available: boolean; provider: string; model: string; lastOkAt: number | null; + /** + * Latest time the primary provider failed but the host LLM bridge + * answered successfully. Always `null` on the embedder slot. + */ + lastFallbackAt: number | null; lastError: { at: number; message: string } | null; } diff --git a/apps/memos-local-plugin/bridge.cts b/apps/memos-local-plugin/bridge.cts index 913f10ce6..01b0ae8f2 100644 --- a/apps/memos-local-plugin/bridge.cts +++ b/apps/memos-local-plugin/bridge.cts @@ -63,9 +63,73 @@ async function main(): Promise { )) as typeof import("./server/http.js"); const pkgVersion = require("./package.json").version; + + // ─── Host LLM bridge (reverse RPC, lazy-bound to stdio) ──────── + // We need to register the bridge BEFORE bootstrap creates the + // LlmClients (so the very first `shouldFallback()` check sees a + // non-null bridge), but `stdio` itself doesn't exist until later + // in this function. The trick: hand a placeholder closure to + // bootstrap that defers actual stdio access to the time of the + // first fallback call. By then `stdio` has been assigned. + // + // Routing through `bootstrapMemoryCoreFull({ hostLlmBridge })` + // (instead of having `bridge.cts` call `registerHostLlmBridge` + // directly) avoids a subtle ESM module-identity issue: the static + // `import` chain inside `core/llm/client.ts` and the dynamic + // `await import(...)` here resolve to the same file URL but Node + // can occasionally treat them as different module instances with + // independent `currentBridge` slots. Registering inside bootstrap + // forces both ends to share the same module instance. + let stdio: import("./bridge/stdio.js").StdioServerHandle | null = null; + const lazyHostLlmBridge: import("./core/llm/host-bridge.js").HostLlmBridge = + { + id: `stdio.host.${args.agent}.v1`, + async complete(input) { + if (!stdio) { + throw new Error( + "host LLM bridge invoked before stdio server was ready", + ); + } + const result = (await stdio.serverRequest( + "host.llm.complete", + { + messages: input.messages, + model: input.model, + temperature: input.temperature, + maxTokens: input.maxTokens, + timeoutMs: input.timeoutMs, + }, + { timeoutMs: (input.timeoutMs ?? 60_000) + 5_000 }, + )) as { + text?: string; + model?: string; + usage?: { + promptTokens?: number; + completionTokens?: number; + totalTokens?: number; + }; + durationMs?: number; + }; + return { + text: typeof result?.text === "string" ? result.text : "", + model: + typeof result?.model === "string" + ? result.model + : input.model ?? "", + usage: result?.usage, + durationMs: + typeof result?.durationMs === "number" ? result.durationMs : 0, + }; + }, + }; + const { core, config, home } = await bootstrapMemoryCoreFull({ agent: args.agent, pkgVersion, + // Skip in daemon mode — daemon has no stdio, so reverse RPC + // can't ever fire and registering would just hide the "no + // bridge" error message. + hostLlmBridge: args.daemon ? null : lazyHostLlmBridge, }); await core.init(); @@ -78,38 +142,56 @@ async function main(): Promise { // viewer daemon. Used by install.sh (post-install) and admin/restart // (self-restart) to keep the Memory Viewer always available. if (args.daemon) { + // Daemon mode is the target of `POST /api/v1/admin/restart`, + // which re-spawns the bridge after a short sleep. On busy + // machines the previous bridge's listening socket can take a + // moment longer than expected to release, so we retry the bind + // a few times before giving up. Without this the user sees + // "重启超时" in the viewer because the new daemon raced its + // predecessor and lost. let viewer: import("./server/types.js").ServerHandle | null = null; - try { - viewer = await startHttpServer( - { - core, - home, - logTail: () => memoryBuffer().tail({ limit: 200 }), - }, - { - port: viewerPort, - host: config.viewer.bindHost, - staticRoot: path.resolve(__dirname, "web/dist"), - agent: args.agent, - }, - ); - process.stderr.write( - `bridge: daemon viewer live at ${viewer.url} (agent=${args.agent})\n`, - ); - } catch (err) { - const e = err as NodeJS.ErrnoException; - if (e?.code === "EADDRINUSE") { + const maxBindAttempts = 10; + for (let attempt = 1; attempt <= maxBindAttempts; attempt++) { + try { + viewer = await startHttpServer( + { + core, + home, + logTail: () => memoryBuffer().tail({ limit: 200 }), + }, + { + port: viewerPort, + host: config.viewer.bindHost, + staticRoot: path.resolve(__dirname, "web/dist"), + agent: args.agent, + }, + ); + process.stderr.write( + `bridge: daemon viewer live at ${viewer.url} (agent=${args.agent})\n`, + ); + break; + } catch (err) { + const e = err as NodeJS.ErrnoException; + if (e?.code === "EADDRINUSE" && attempt < maxBindAttempts) { + process.stderr.write( + `bridge: daemon port :${viewerPort} busy (attempt ${attempt}/${maxBindAttempts}), retrying in 1s...\n`, + ); + await new Promise((r) => setTimeout(r, 1000)); + continue; + } + if (e?.code === "EADDRINUSE") { + process.stderr.write( + `bridge: daemon port :${viewerPort} still in use after ${maxBindAttempts}s — exiting.\n`, + ); + await core.shutdown(); + process.exit(1); + } process.stderr.write( - `bridge: daemon port :${viewerPort} already in use — exiting.\n`, + `bridge: daemon viewer failed: ${(err as Error)?.message ?? String(err)}\n`, ); await core.shutdown(); process.exit(1); } - process.stderr.write( - `bridge: daemon viewer failed: ${(err as Error)?.message ?? String(err)}\n`, - ); - await core.shutdown(); - process.exit(1); } const shutdownDaemon = async (sig: string) => { @@ -125,7 +207,10 @@ async function main(): Promise { } // ─── Normal (stdio) mode ────────────────────────────────────── - const stdio = startStdioServer({ core }); + // Assign the stdio handle into the closure variable so the host + // LLM bridge (registered earlier inside bootstrap) can dispatch + // reverse-direction requests to the adapter. + stdio = startStdioServer({ core }); // Try to bind the viewer port. EADDRINUSE → stay headless. let viewer: import("./server/types.js").ServerHandle | null = null; @@ -170,7 +255,7 @@ async function main(): Promise { /* best-effort */ } } - await waitForShutdown(core, stdio); + await waitForShutdown(core, stdio!); process.exit(0); }; diff --git a/apps/memos-local-plugin/bridge/stdio.ts b/apps/memos-local-plugin/bridge/stdio.ts index f01a1209c..111edacfd 100644 --- a/apps/memos-local-plugin/bridge/stdio.ts +++ b/apps/memos-local-plugin/bridge/stdio.ts @@ -52,6 +52,20 @@ export interface StdioServerHandle { close: () => Promise; /** Resolve once stdin ends. */ done: Promise; + /** + * Send a JSON-RPC request **from the bridge to the client** and wait + * for the matching response. Used by the host LLM bridge to ask the + * adapter (e.g. the Hermes Python provider) to run a fallback LLM + * call using the agent's own model. + * + * IDs use the `"srv-N"` prefix so they cannot collide with the + * client's numeric request IDs. + */ + serverRequest( + method: string, + params?: unknown, + options?: { timeoutMs?: number }, + ): Promise; } // ─── Server ───────────────────────────────────────────────────────────────── @@ -66,6 +80,20 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle const eventsHandlers = new Set(); const logsHandlers = new Set(); + // ─── Server-initiated RPC bookkeeping ── + // Reverse-direction requests (bridge → client) live in their own + // ID namespace ("srv-1", "srv-2", …) so they never collide with the + // numeric IDs the Python client uses for forward requests. + let serverRequestSeq = 0; + const serverPending = new Map< + string, + { + resolve: (value: unknown) => void; + reject: (err: unknown) => void; + timer: ReturnType | null; + } + >(); + const eventsUnsubscribe = options.core.subscribeEvents((e) => { writeNotification(RPC_METHODS.EVENTS_NOTIFY, e); }); @@ -118,6 +146,31 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle return; } + // Reverse-direction response: the client is replying to a request + // we previously sent via `serverRequest`. Match by `srv-` ID and + // resolve / reject the matching pending promise. + const raw = msg as unknown as Record; + if ( + raw && + typeof raw === "object" && + typeof raw.id === "string" && + (raw.id as string).startsWith("srv-") && + (raw.result !== undefined || raw.error !== undefined) + ) { + const id = raw.id as string; + const pending = serverPending.get(id); + if (pending) { + serverPending.delete(id); + if (pending.timer) clearTimeout(pending.timer); + if (raw.error != null) { + pending.reject(raw.error); + } else { + pending.resolve(raw.result); + } + } + return; + } + if (!msg || typeof msg !== "object" || msg.jsonrpc !== "2.0" || !msg.method) { writeLine(errorResponse(msg?.id ?? null, JSONRPC_INVALID_REQUEST, "not JSON-RPC 2.0")); return; @@ -180,6 +233,32 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle }); }); + function serverRequest( + method: string, + params?: unknown, + options?: { timeoutMs?: number }, + ): Promise { + const id = `srv-${++serverRequestSeq}`; + const timeoutMs = options?.timeoutMs ?? 60_000; + return new Promise((resolve, reject) => { + if (closed) { + reject(new Error("stdio bridge closed")); + return; + } + const timer = setTimeout(() => { + if (serverPending.delete(id)) { + reject(new Error(`serverRequest ${method} timed out after ${timeoutMs}ms`)); + } + }, timeoutMs); + serverPending.set(id, { + resolve: resolve as (value: unknown) => void, + reject, + timer, + }); + writeLine({ jsonrpc: "2.0", id, method, params }); + }); + } + return { get connected() { return !closed; @@ -197,10 +276,18 @@ export function startStdioServer(options: StdioServerOptions): StdioServerHandle } catch { /* ignore */ } + // Reject any in-flight server-initiated requests so the host + // bridge doesn't hang while we tear down. + for (const [id, entry] of serverPending) { + if (entry.timer) clearTimeout(entry.timer); + entry.reject(new Error("stdio bridge closed")); + serverPending.delete(id); + } // Drain remaining lines then flush. await donePromise; }, done: donePromise, + serverRequest, }; } diff --git a/apps/memos-local-plugin/core/embedding/embedder.ts b/apps/memos-local-plugin/core/embedding/embedder.ts index 5400d6415..db798c129 100644 --- a/apps/memos-local-plugin/core/embedding/embedder.ts +++ b/apps/memos-local-plugin/core/embedding/embedder.ts @@ -78,6 +78,22 @@ export function createEmbedderWithProvider( return { text: i.text, role: i.role ?? "document" }; } + function notifyStatus(detail: { + status: "ok" | "error"; + provider: string; + model: string; + message?: string; + code?: string; + at?: number; + }): void { + if (!config.onStatus) return; + try { + config.onStatus({ kind: "embedding", ...detail }); + } catch { + /* status sink errors are non-fatal */ + } + } + async function embedOne(input: string | EmbedInput): Promise { const vecs = await embedMany([input]); return vecs[0]!; @@ -162,19 +178,28 @@ export function createEmbedderWithProvider( log: providerCtxLog, }; raw = await provider.embed(texts, role, ctx); + // Record success but DO NOT clear `lastError` — the viewer + // compares `lastError.at` against `lastOkAt` to decide the + // overview card colour. Clearing here would let one cache- + // friendly success silently mask a still-real provider + // outage that just produced a `system_error` log row. lastOkAt = Date.now(); - lastError = null; + notifyStatus({ + status: "ok", + provider: provider.name, + model: config.model, + at: lastOkAt, + }); } catch (err) { failures++; - lastError = { - at: Date.now(), - message: - err instanceof MemosError - ? `${err.code}: ${err.message}` - : err instanceof Error - ? err.message - : String(err), - }; + const errAt = Date.now(); + const errMessage = + err instanceof MemosError + ? `${err.code}: ${err.message}` + : err instanceof Error + ? err.message + : String(err); + lastError = { at: errAt, message: errMessage }; logger.warn("provider.failed", { provider: provider.name, model: config.model, @@ -182,6 +207,31 @@ export function createEmbedderWithProvider( count: texts.length, err: toErrDetail(err), }); + // Notify the bootstrap-supplied error sink (if any). Wrapped in + // its own try/catch so a buggy sink never masks the original + // failure for the caller. + if (config.onError) { + try { + config.onError({ + kind: "embedding", + provider: provider.name, + model: config.model, + message: errMessage, + code: err instanceof MemosError ? err.code : undefined, + at: errAt, + }); + } catch { + /* sink errors are non-fatal */ + } + } + notifyStatus({ + status: "error", + provider: provider.name, + model: config.model, + message: errMessage, + code: err instanceof MemosError ? err.code : undefined, + at: errAt, + }); throw err instanceof MemosError ? err : new MemosError( diff --git a/apps/memos-local-plugin/core/embedding/types.ts b/apps/memos-local-plugin/core/embedding/types.ts index 230250beb..75409323e 100644 --- a/apps/memos-local-plugin/core/embedding/types.ts +++ b/apps/memos-local-plugin/core/embedding/types.ts @@ -41,6 +41,40 @@ export interface EmbeddingConfig { headers?: Record; /** If true, all output vectors are L2-normalized. Default: true. */ normalize?: boolean; + /** + * Optional sink invoked once per terminal provider failure. Lets the + * bootstrap layer write a `system_error` row to `api_logs` so the + * Logs viewer can surface infrastructure failures alongside tool + * activity. Never throws; any exception inside the sink is swallowed. + */ + onError?: (detail: EmbeddingErrorDetail) => void; + /** + * Optional durable status sink invoked on successful and failed + * provider calls. Unlike `onError` (human-facing log only), this + * is the machine-readable source used by the Overview model cards. + */ + onStatus?: (detail: EmbeddingStatusDetail) => void; +} + +export interface EmbeddingErrorDetail { + kind: "embedding"; + provider: EmbeddingProviderName | string; + model: string; + message: string; + /** Stable `MemosError` code when the underlying error carries one. */ + code?: string; + /** Epoch ms at which the failure occurred (defaults to Date.now()). */ + at?: number; +} + +export interface EmbeddingStatusDetail { + kind: "embedding"; + status: "ok" | "error"; + provider: EmbeddingProviderName | string; + model: string; + message?: string; + code?: string; + at?: number; } // ─── Roles ─────────────────────────────────────────────────────────────────── @@ -108,9 +142,12 @@ export interface EmbedStats { /** Most recent successful round-trip to the provider (epoch ms). */ lastOkAt: number | null; /** - * Most recent failure. `null` if no call has failed yet, or a later - * call succeeded. Viewer overview uses this to render a red dot + - * error tooltip on the embedding card. + * Most recent failure. `null` until the embedder has thrown at least + * once. Once set, it is **not** cleared by a subsequent success — + * the viewer compares `lastError.at` against `lastOkAt` to decide + * whether the most recent event was good (green) or bad (red), so + * users never see a green dot while a real provider error is still + * sitting in the system_error log. */ lastError: { at: number; message: string } | null; } diff --git a/apps/memos-local-plugin/core/llm/client.ts b/apps/memos-local-plugin/core/llm/client.ts index 0c6c75d89..53ac2746c 100644 --- a/apps/memos-local-plugin/core/llm/client.ts +++ b/apps/memos-local-plugin/core/llm/client.ts @@ -68,14 +68,41 @@ export function createLlmClientWithProvider( let totalPromptTokens = 0; let totalCompletionTokens = 0; let lastOkAt: number | null = null; + let lastFallbackAt: number | null = null; let lastError: { at: number; message: string } | null = null; - function markOk(): void { + /** + * Mark a successful primary-provider call. We **do not** clear + * `lastError` / `lastFallbackAt` here — the viewer picks the most + * recent event by timestamp to colour the overview card, so an + * earlier failure that already produced a `system_error` row stays + * visible until a later success out-dates it. + */ + function markOk(): number { lastOkAt = Date.now(); - lastError = null; + return lastOkAt; } - function markFail(err: unknown): void { - lastError = { at: Date.now(), message: summarizeErrMessage(err) }; + /** + * Mark a primary-provider failure that was rescued by the host LLM + * bridge (yellow card). The original primary error is still kept on + * `lastError` so the viewer can show *why* fallback kicked in, and + * `lastFallbackAt` tracks when fallback happened so the timestamp + * comparison renders yellow instead of red. + */ + function markFallback(err: unknown): number { + const at = Date.now(); + lastFallbackAt = at; + lastError = { at, message: summarizeErrMessage(err) }; + return at; + } + /** + * Mark a terminal failure — either no fallback configured or the + * host fallback also failed (red card). + */ + function markFail(err: unknown): number { + const at = Date.now(); + lastError = { at, message: summarizeErrMessage(err) }; + return at; } function normalizeMessages(input: LlmMessage[] | string): LlmMessage[] { @@ -137,7 +164,13 @@ export function createLlmClientWithProvider( durationMs: raw.durationMs, }; record(completion, op, messages); - markOk(); + const okAt = markOk(); + notifyStatus({ + status: "ok", + provider: provider.name, + model: config.model, + at: okAt, + }); return { completion }; } catch (err) { if (shouldFallback(err, config, provider.name)) { @@ -160,15 +193,39 @@ export function createLlmClientWithProvider( durationMs: res.durationMs, }; record(completion, op, messages); - markOk(); + // The primary provider is still broken even though the host + // bridge saved this call. Tag the slot yellow (`lastFallbackAt`) + // and surface the upstream error to the user via the + // system_error log so they can see *why* fallback engaged. + const fallbackAt = markFallback(err); + notifyOnError(err); + notifyStatus({ + status: "fallback", + provider: provider.name, + model: config.model, + message: summarizeErrMessage(err), + code: err instanceof MemosError ? err.code : undefined, + at: fallbackAt, + fallbackProvider: "host", + }); return { completion }; } catch (hostErr) { failures++; - markFail(hostErr); + const failAt = markFail(hostErr); facadeLog.error("host.fallback_failed", { primary: summarizeErr(err), host: summarizeErr(hostErr), }); + notifyOnError(hostErr); + notifyStatus({ + status: "error", + provider: provider.name, + model: config.model, + message: summarizeErrMessage(hostErr), + code: hostErr instanceof MemosError ? hostErr.code : undefined, + at: failAt, + fallbackProvider: "host", + }); throw hostErr instanceof MemosError ? hostErr : new MemosError( @@ -178,7 +235,16 @@ export function createLlmClientWithProvider( } } failures++; - markFail(err); + const failAt = markFail(err); + notifyOnError(err); + notifyStatus({ + status: "error", + provider: provider.name, + model: config.model, + message: summarizeErrMessage(err), + code: err instanceof MemosError ? err.code : undefined, + at: failAt, + }); throw err instanceof MemosError ? err : new MemosError( @@ -189,6 +255,44 @@ export function createLlmClientWithProvider( } } + /** + * Forward a terminal failure to the bootstrap-supplied sink (if any). + * Wrapped so a buggy sink can never replace the original error the + * caller is about to receive. Skipped silently when no sink is set. + */ + function notifyOnError(err: unknown): void { + if (!config.onError) return; + try { + config.onError({ + provider: provider.name, + model: config.model, + message: summarizeErrMessage(err), + code: err instanceof MemosError ? err.code : undefined, + at: Date.now(), + }); + } catch { + /* sink errors are non-fatal */ + } + } + + function notifyStatus(detail: { + status: "ok" | "fallback" | "error"; + provider: string; + model: string; + message?: string; + code?: string; + at?: number; + fallbackProvider?: string; + fallbackModel?: string; + }): void { + if (!config.onStatus) return; + try { + config.onStatus(detail); + } catch { + /* status sink errors are non-fatal */ + } + } + function record(completion: LlmCompletion, op: string, messages: LlmMessage[]): void { if (completion.usage?.promptTokens) totalPromptTokens += completion.usage.promptTokens; if (completion.usage?.completionTokens) totalCompletionTokens += completion.usage.completionTokens; @@ -322,11 +426,26 @@ export function createLlmClientWithProvider( }); if (usage?.promptTokens) totalPromptTokens += usage.promptTokens; if (usage?.completionTokens) totalCompletionTokens += usage.completionTokens; - markOk(); + const okAt = markOk(); + notifyStatus({ + status: "ok", + provider: provider.name, + model: config.model, + at: okAt, + }); } catch (err) { failures++; - markFail(err); + const failAt = markFail(err); facadeLog.error("stream.failed", { err: summarizeErr(err) }); + notifyOnError(err); + notifyStatus({ + status: "error", + provider: provider.name, + model: config.model, + message: summarizeErrMessage(err), + code: err instanceof MemosError ? err.code : undefined, + at: failAt, + }); throw err; } } @@ -347,6 +466,7 @@ export function createLlmClientWithProvider( totalPromptTokens, totalCompletionTokens, lastOkAt, + lastFallbackAt, lastError, }; }, @@ -358,6 +478,7 @@ export function createLlmClientWithProvider( totalPromptTokens = 0; totalCompletionTokens = 0; lastOkAt = null; + lastFallbackAt = null; lastError = null; }, async close(): Promise { diff --git a/apps/memos-local-plugin/core/llm/types.ts b/apps/memos-local-plugin/core/llm/types.ts index 125cd97fe..19eaef22c 100644 --- a/apps/memos-local-plugin/core/llm/types.ts +++ b/apps/memos-local-plugin/core/llm/types.ts @@ -32,6 +32,50 @@ export interface LlmConfig { maxTokens?: number; /** Extra HTTP headers for outgoing requests. */ headers?: Record; + /** + * Optional sink invoked once per terminal LLM failure. Lets the + * bootstrap layer record a `system_error` row in `api_logs` so the + * Logs viewer can surface infrastructure failures (auth, timeout, + * bad endpoint) right next to tool activity. Never throws; any + * exception inside the sink is swallowed by the facade. + */ + onError?: (detail: LlmErrorDetail) => void; + /** + * Optional durable status sink invoked on primary success, host + * fallback success, and terminal failure. This is the machine- + * readable source used by Overview model cards so Hermes' viewer + * daemon can display status produced by a separate stdio bridge. + */ + onStatus?: (detail: LlmStatusDetail) => void; +} + +export interface LlmErrorDetail { + provider: LlmProviderName | string; + model: string; + message: string; + /** Stable `MemosError` code when the underlying error carries one. */ + code?: string; + /** Epoch ms at which the failure occurred (defaults to Date.now()). */ + at?: number; + /** + * Logical role of the failing client. Bootstrap configures the + * facade with a closure that knows whether it's the summary `llm` + * or the dedicated `reflectLlm` for skill evolution; the facade + * just passes whatever the closure injected through. + */ + role?: "llm" | "skillEvolver"; +} + +export interface LlmStatusDetail { + status: "ok" | "fallback" | "error"; + provider: LlmProviderName | string; + model: string; + message?: string; + code?: string; + at?: number; + fallbackProvider?: string; + fallbackModel?: string; + role?: "llm" | "skillEvolver"; } // ─── Messages ──────────────────────────────────────────────────────────────── @@ -171,14 +215,30 @@ export interface ProviderCompletion { // ─── LlmClient facade ──────────────────────────────────────────────────────── export interface LastCallStatus { - /** Most recent successful call (epoch ms), or `null` if none yet. */ + /** + * Most recent direct success against the configured provider (epoch ms). + * Only set when the primary provider answered without going through + * `host_fallback`. Used by the viewer to render the green dot. + */ lastOkAt: number | null; /** - * Most recent failed call. `null` if the client has either never been - * called or the last call (or a later one) succeeded. Viewer - * overview uses this to render a red dot + error tooltip. + * Most recent failure of the primary provider. Once set, this field is + * **not** cleared on subsequent success — the viewer compares + * timestamps across `lastOkAt` / `lastFallbackAt` / `lastError.at` + * to decide whether the latest event was good (green / yellow) or + * bad (red), so a real provider failure recorded in the + * `system_error` log can never be silently masked by a later + * successful call. */ lastError: { at: number; message: string } | null; + /** + * Most recent time the primary provider failed but the host LLM + * fallback succeeded. Lets the viewer paint the slot yellow ("running + * on host fallback") instead of green or red. `null` when no + * fallback has happened in this process or the client has no + * `fallbackToHost` configured. + */ + lastFallbackAt: number | null; } export interface LlmClientStats extends LastCallStatus { diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index 37e9a149e..05541b04f 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -73,7 +73,11 @@ import { runMigrations } from "../storage/migrator.js"; import { makeRepos } from "../storage/repos/index.js"; import { createEmbedder } from "../embedding/embedder.js"; import { createLlmClient } from "../llm/client.js"; -import { getHostLlmBridge } from "../llm/host-bridge.js"; +import { + getHostLlmBridge, + registerHostLlmBridge, + type HostLlmBridge, +} from "../llm/host-bridge.js"; import { createPipeline } from "./orchestrator.js"; import type { PipelineDeps, PipelineHandle } from "./types.js"; @@ -90,6 +94,23 @@ export interface BootstrapOptions { now?: () => number; /** Plugin package version (surfaced via `health()`). */ pkgVersion?: string; + /** + * Optional adapter-supplied LLM bridge. When set, registered on the + * shared host-bridge singleton **before** the LLM clients are + * created so `shouldFallback()` can see it on the very first call. + * + * Wiring this through bootstrap (rather than asking the adapter to + * call `registerHostLlmBridge` itself) avoids a subtle ESM module- + * identity bug: when the adapter dynamically imports + * `core/llm/host-bridge.ts` from a different URL than the static + * `import` chain inside `core/llm/client.ts`, Node's module loader + * treats them as two separate modules with two independent + * `currentBridge` slots — register hits one, get sees the other, + * fallback never engages. Routing through bootstrap forces the + * register call to happen via the same module instance the LLM + * client closes over. + */ + hostLlmBridge?: HostLlmBridge | null; } export interface BootstrapResult { @@ -148,6 +169,77 @@ export async function bootstrapMemoryCoreFull( } const repos = makeRepos(db); + // ─── Host LLM bridge ── + // Register the adapter-supplied bridge BEFORE constructing any + // LlmClient so the very first call site sees a non-null bridge. + // The shouldFallback() check inside the LLM facade reads this + // singleton at every call; pinning it here guarantees the + // identity-by-module is the same instance the client closes over. + if (options.hostLlmBridge) { + registerHostLlmBridge(options.hostLlmBridge); + log.info("hostLlmBridge.registered", { + id: options.hostLlmBridge.id, + }); + } + + // ─── system_error sink ── + // Every facade we build (embedder / main LLM / reflect LLM) gets a + // tiny error sink that drops a `system_error` row into `api_logs` + // when the underlying provider call fails terminally. The Logs + // viewer renders these under the "系统" tag so users can correlate + // a red model card on the Overview with the exact provider message. + // Wrapped in try/catch because logging must never break the call. + function recordSystemError( + role: "embedding" | "llm" | "skillEvolver", + detail: { + provider: string; + model: string; + message: string; + code?: string; + at?: number; + }, + ): void { + try { + repos.apiLogs.insert({ + toolName: "system_error", + input: { role }, + output: { role, ...detail }, + durationMs: 0, + success: false, + calledAt: detail.at ?? Date.now(), + }); + } catch { + /* the system_error row itself failing is non-fatal */ + } + } + + function recordSystemModelStatus( + role: "embedding" | "llm" | "skillEvolver", + detail: { + status: "ok" | "fallback" | "error"; + provider: string; + model: string; + message?: string; + code?: string; + at?: number; + fallbackProvider?: string; + fallbackModel?: string; + }, + ): void { + try { + repos.apiLogs.insert({ + toolName: "system_model_status", + input: { role }, + output: { role, ...detail }, + durationMs: 0, + success: detail.status !== "error", + calledAt: detail.at ?? Date.now(), + }); + } catch { + /* the status row itself failing is non-fatal */ + } + } + // 2. Providers (embedding + LLM) — nullable so we can run without them. // The LLM facade we build falls through to "local_only" when no remote // endpoint is configured, but we still catch construction errors so the @@ -155,7 +247,19 @@ export async function bootstrapMemoryCoreFull( let embedder = null as ReturnType | null; let llm = null as ReturnType | null; try { - embedder = createEmbedder(config.embedding as never); + embedder = createEmbedder({ + ...(config.embedding as object), + onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) => + recordSystemError("embedding", d), + onStatus: (d: { + status: "ok" | "error"; + provider: string; + model: string; + message?: string; + code?: string; + at?: number; + }) => recordSystemModelStatus("embedding", d), + } as never); } catch (err) { log.warn("embedder.unavailable", { err: err instanceof Error ? err.message : String(err), @@ -163,7 +267,21 @@ export async function bootstrapMemoryCoreFull( embedder = null; } try { - llm = createLlmClient(config.llm as never); + llm = createLlmClient({ + ...(config.llm as object), + onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) => + recordSystemError("llm", d), + onStatus: (d: { + status: "ok" | "fallback" | "error"; + provider: string; + model: string; + message?: string; + code?: string; + at?: number; + fallbackProvider?: string; + fallbackModel?: string; + }) => recordSystemModelStatus("llm", d), + } as never); } catch (err) { log.warn("llm.unavailable", { err: err instanceof Error ? err.message : String(err), @@ -206,7 +324,25 @@ export async function bootstrapMemoryCoreFull( temperature: evolver?.temperature ?? 0, timeoutMs: evolver?.timeoutMs ?? 60_000, maxRetries: 3, - fallbackToHost: false, + // V7 §0.x — when the user's dedicated skill-evolver model is + // down (auth, model name typo, server outage), prefer falling + // back to the host agent's main LLM via the stdio host + // bridge instead of hard-failing the skill pipeline. The + // viewer paints the slot yellow + surfaces the upstream error + // so the operator still notices. + fallbackToHost: true, + onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) => + recordSystemError("skillEvolver", d), + onStatus: (d: { + status: "ok" | "fallback" | "error"; + provider: string; + model: string; + message?: string; + code?: string; + at?: number; + fallbackProvider?: string; + fallbackModel?: string; + }) => recordSystemModelStatus("skillEvolver", d), } as never); log.info("reflectLlm.ready", { provider: evolverProvider, @@ -756,10 +892,25 @@ export function createMemoryCore( const embedderInfo = embedderHealth(handle.embedder, latestTraceTs()); const skillEvolverInfo = resolveSkillEvolver( diskConfig ?? handle.config, - handle.llm, + // Prefer the dedicated reflect LLM stats so an independently + // configured skill-evolver model reports its OWN failures + // instead of inheriting the (possibly healthy) summary LLM's + // status. Falls back to `handle.llm` when the operator left + // skillEvolver blank — bootstrap aliases reflectLlm to llm + // in that case anyway. + handle.reflectLlm ?? handle.llm, latestTraceTs(), ); + // NOTE: we deliberately do NOT fall back to `api_logs`-stored + // historical `system_error` rows here. Doing so used to keep the + // overview card red across restarts even after the operator had + // already fixed the misconfigured endpoint, because the ancient + // failure row would mask a freshly-booted process whose stats + // are still null. Now the card colour is driven purely by + // in-memory stats — if you want to inspect past failures, head + // to LogsView → 系统 tag. + // Override model names from disk config if they differ from the // in-memory client (user saved new settings but hasn't restarted). if (diskConfig) { @@ -775,6 +926,14 @@ export function createMemoryCore( } } + applyPersistedModelStatus(handle.repos, "llm", llmInfo); + applyPersistedModelStatus(handle.repos, "embedding", embedderInfo); + applyPersistedModelStatus( + handle.repos, + skillEvolverInfo.inherited ? "llm" : "skillEvolver", + skillEvolverInfo, + ); + return { ok: initialized && !shutDown, version: pkgVersion, @@ -2653,9 +2812,75 @@ function durationSince( * the main `llm.*` model for skill induction. We surface that fallback * explicitly so the Overview card can label it as "inherited from LLM". */ +function applyPersistedModelStatus( + repos: PipelineHandle["repos"], + role: "embedding" | "llm" | "skillEvolver", + info: CoreHealth["llm"], +): void { + const latest = findLatestPersistedModelStatus(repos, role, info.provider, info.model); + if (!latest) return; + info.lastOkAt = latest.status === "ok" ? latest.at : null; + info.lastFallbackAt = latest.status === "fallback" ? latest.at : null; + info.lastError = + latest.status === "error" || latest.status === "fallback" + ? { at: latest.at, message: latest.message || "(no message)" } + : null; +} + +function findLatestPersistedModelStatus( + repos: PipelineHandle["repos"], + role: "embedding" | "llm" | "skillEvolver", + provider: string, + model: string, +): { + status: "ok" | "fallback" | "error"; + at: number; + message?: string; +} | null { + try { + const rows = repos.apiLogs.list({ + toolName: "system_model_status", + limit: 500, + offset: 0, + }); + for (const row of rows) { + try { + const out = JSON.parse(row.outputJson) as { + role?: unknown; + status?: unknown; + provider?: unknown; + model?: unknown; + message?: unknown; + }; + if (out.role !== role) continue; + // Only apply status rows for the currently configured model. + // This prevents an old 404 for a typo'd model from keeping the + // card red after the operator fixes Settings and restarts. + if (String(out.provider ?? "") !== provider) continue; + if (String(out.model ?? "") !== model) continue; + if (out.status !== "ok" && out.status !== "fallback" && out.status !== "error") { + continue; + } + return { + status: out.status, + at: row.calledAt, + message: typeof out.message === "string" ? out.message : undefined, + }; + } catch { + // Malformed row — skip and keep walking. + } + } + } catch { + // Repo failure is non-fatal for health; leave in-memory stats. + } + return null; +} + function llmHealth( llm: PipelineHandle["llm"], - fallbackTs: number | null, + // Kept in the signature for source compatibility with older callers + // but intentionally unused — see comment below. + _fallbackTs: number | null, ): CoreHealth["llm"] { if (!llm) { return { @@ -2663,26 +2888,30 @@ function llmHealth( provider: "none", model: "", lastOkAt: null, + lastFallbackAt: null, lastError: null, }; } const s = llm.stats(); + // We deliberately DO NOT fall back to the latest trace timestamp + // here. Doing so used to paint the slot "connected" on every + // restart even when the configured model was actually broken — any + // historical trace from a prior, working configuration would mask + // a fresh authentication / model-name failure. Now the colour is + // driven entirely by *this process's* facade activity. return { available: true, provider: llm.provider, model: llm.model, - // Prefer the live counter (most-recent call in this process). - // Fall back to `fallbackTs` — the newest trace timestamp — so the - // Overview card shows "connected" across plugin restarts as long - // as there's proof of a successful call on disk. - lastOkAt: s.lastOkAt ?? fallbackTs, + lastOkAt: s.lastOkAt, + lastFallbackAt: s.lastFallbackAt, lastError: s.lastError, }; } function embedderHealth( embedder: PipelineHandle["embedder"], - fallbackTs: number | null, + _fallbackTs: number | null, ): CoreHealth["embedder"] { if (!embedder) { return { @@ -2691,16 +2920,21 @@ function embedderHealth( model: "", dim: 0, lastOkAt: null, + lastFallbackAt: null, lastError: null, }; } const s = embedder.stats(); + // No `?? fallbackTs` here either — see `llmHealth`. The embedder + // also has no host fallback path, so `lastFallbackAt` stays `null` + // by definition. return { available: true, provider: embedder.provider, model: embedder.model, dim: embedder.dimensions, - lastOkAt: s.lastOkAt ?? fallbackTs, + lastOkAt: s.lastOkAt, + lastFallbackAt: null, lastError: s.lastError, }; } @@ -2714,17 +2948,19 @@ function resolveSkillEvolver( .skillEvolver; const own = (evolver?.model ?? "").trim(); if (own) { + // `llm` here is the dedicated `reflectLlm` instance built from the + // skillEvolver config (see `bootstrapMemoryCoreFull`). Reading its + // stats means the Overview card flips red as soon as a skill + // crystallization call fails — independent of the summary LLM. + const s = llm?.stats(); return { available: true, provider: evolver?.provider ?? "", model: own, inherited: false, - // Skill evolver uses its own LlmClient instance when the operator - // overrides the model. Today we don't expose that client through - // the pipeline handle, so status reporting follows the shared LLM - // while we keep the plumbing minimal. - lastOkAt: llm?.stats().lastOkAt ?? fallbackTs, - lastError: llm?.stats().lastError ?? null, + lastOkAt: s?.lastOkAt ?? null, + lastFallbackAt: s?.lastFallbackAt ?? null, + lastError: s?.lastError ?? null, }; } const fallback = llmHealth(llm, fallbackTs); @@ -2734,6 +2970,7 @@ function resolveSkillEvolver( model: fallback.model, inherited: true, lastOkAt: fallback.lastOkAt, + lastFallbackAt: fallback.lastFallbackAt, lastError: fallback.lastError, }; } diff --git a/apps/memos-local-plugin/core/pipeline/orchestrator.ts b/apps/memos-local-plugin/core/pipeline/orchestrator.ts index 6f510e1a0..eab711708 100644 --- a/apps/memos-local-plugin/core/pipeline/orchestrator.ts +++ b/apps/memos-local-plugin/core/pipeline/orchestrator.ts @@ -915,6 +915,7 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle { db: deps.db, repos: deps.repos, llm: deps.llm, + reflectLlm: deps.reflectLlm, embedder: deps.embedder, sessionManager: session.sessionManager, episodeManager: session.episodeManager, diff --git a/apps/memos-local-plugin/core/pipeline/types.ts b/apps/memos-local-plugin/core/pipeline/types.ts index 93d2bea90..27eecc1ec 100644 --- a/apps/memos-local-plugin/core/pipeline/types.ts +++ b/apps/memos-local-plugin/core/pipeline/types.ts @@ -152,6 +152,15 @@ export interface PipelineHandle { readonly db: StorageDb; readonly repos: Repos; readonly llm: LlmClient | null; + /** + * Dedicated client for skill-evolution reflection. When the operator + * leaves `skillEvolver.*` blank, this is the same instance as `llm` + * (so call sites can blindly read whichever is non-null). When they + * configure their own model it carries its own `stats()` so the + * Overview / health endpoint reports the *actual* skill-evolver + * status instead of falling back to the summary LLM. + */ + readonly reflectLlm: LlmClient | null; readonly embedder: Embedder | null; // Subscribers / runners. diff --git a/apps/memos-local-plugin/install.sh b/apps/memos-local-plugin/install.sh index b6af9d4d1..b9aa31690 100755 --- a/apps/memos-local-plugin/install.sh +++ b/apps/memos-local-plugin/install.sh @@ -578,9 +578,21 @@ install_hermes() { mkdir -p "${HOME}/.hermes" step "Stopping existing bridge daemon" - pkill -f "bridge.cts" >/dev/null 2>&1 || true - sleep 1 - pkill -9 -f "bridge.cts" >/dev/null 2>&1 || true + local bridge_pids="" + bridge_pids="$(pgrep -f "bridge.cts" 2>/dev/null || true)" + if [[ -n "${bridge_pids}" ]]; then + kill ${bridge_pids} >/dev/null 2>&1 || true + local i + for i in {1..10}; do + sleep 1 + pgrep -f "bridge.cts" >/dev/null 2>&1 || break + done + bridge_pids="$(pgrep -f "bridge.cts" 2>/dev/null || true)" + if [[ -n "${bridge_pids}" ]]; then + kill -9 ${bridge_pids} >/dev/null 2>&1 || true + sleep 1 + fi + fi success "Bridge daemon stopped" local was_running="false" if pgrep -f "/bin/hermes" >/dev/null 2>&1; then @@ -699,8 +711,7 @@ CFGEOF mkdir -p "${prefix}/logs" # Launch bridge in --daemon mode (pure HTTP, no stdio). # The process stays alive to serve the Memory Viewer. - ( cd "${prefix}" && nohup "${tsx_bin}" "${bridge_cts}" --agent=hermes --daemon >"${daemon_log}" 2>&1 ) & - disown $! 2>/dev/null || true + ( cd "${prefix}" && nohup "${tsx_bin}" "${bridge_cts}" --agent=hermes --daemon >"${daemon_log}" 2>&1 & ) if wait_for_viewer "${HERMES_PORT}"; then success "Memory Viewer daemon running" diff --git a/apps/memos-local-plugin/server/routes/admin.ts b/apps/memos-local-plugin/server/routes/admin.ts index 28f2aed67..98ddcd14d 100644 --- a/apps/memos-local-plugin/server/routes/admin.ts +++ b/apps/memos-local-plugin/server/routes/admin.ts @@ -42,7 +42,7 @@ export function registerAdminRoutes(routes: Routes, deps: ServerDeps, options: S const pluginRoot = nodePath.resolve(nodePath.dirname(thisFile), "../.."); const tsxBin = nodePath.join(pluginRoot, "node_modules/.bin/tsx"); const bridgeScript = nodePath.join(pluginRoot, "bridge.cts"); - const cmd = `sleep 1 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`; + const cmd = `sleep 3 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`; const child = spawn("bash", ["-c", cmd], { detached: true, stdio: "ignore", @@ -72,7 +72,12 @@ export function registerAdminRoutes(routes: Routes, deps: ServerDeps, options: S const tsxBin = nodePath.join(pluginRoot, "node_modules/.bin/tsx"); const bridgeScript = nodePath.join(pluginRoot, "bridge.cts"); - const cmd = `sleep 1 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`; + // 3s sleep gives the OS time to release the viewer port (TCP + // TIME_WAIT lingers for ~1s on macOS/Linux even after the + // previous bridge fully exited). Bumped from 1s after operators + // hit "重启超时" because the new daemon kept losing the port- + // bind race against its predecessor. + const cmd = `sleep 3 && "${tsxBin}" "${bridgeScript}" --agent=${agent} --daemon`; const child = spawn("bash", ["-c", cmd], { detached: true, stdio: "ignore", diff --git a/apps/memos-local-plugin/server/routes/api-logs.ts b/apps/memos-local-plugin/server/routes/api-logs.ts index 6ccd07bc0..8d4b77dc9 100644 --- a/apps/memos-local-plugin/server/routes/api-logs.ts +++ b/apps/memos-local-plugin/server/routes/api-logs.ts @@ -61,6 +61,14 @@ export function registerApiLogsRoutes(routes: Routes, deps: ServerDeps): void { "world_model_evolve", "task_done", "task_failed", + // Infrastructure-layer failures (embedding / summary LLM / + // skillEvolver). Surfaced under the "系统" tag in LogsView so + // operators can see provider errors without tailing logs. + "system_error", + // Durable machine-readable status source for Overview model + // cards. This is intentionally persisted because Hermes' viewer + // daemon and stdio bridge can be different processes. + "system_model_status", ] as const; const rows = await Promise.all( tools.map(async (t) => { diff --git a/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts b/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts index 5521daa0d..01e3c5eff 100644 --- a/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts +++ b/apps/memos-local-plugin/tests/e2e/v7-full-chain.e2e.test.ts @@ -85,7 +85,7 @@ function buildFullChainLlm(): LlmClient { const script: FakeLlmScript = { completeJson: { // Intent classifier — task-shaped for everything meaningful. - "session.intent.classify": (input) => { + "session.intent.classify": (input: unknown) => { const text = lastUserMessage(input).toLowerCase(); if (/^\s*(hi|hello|你好|嗨)\s*$/.test(text)) { return { @@ -106,7 +106,7 @@ function buildFullChainLlm(): LlmClient { // Relation classifier — decides revision / follow_up / new_task. // IMPORTANT: check `NEW_USER_MESSAGE` only; the system prompt itself // mentions "wrong" / "redo", which would otherwise always match. - "session.relation.classify": (input) => { + "session.relation.classify": (input: unknown) => { const newMsg = newUserSegment(lastUserMessage(input)); if (/不对|错了|重做|改一下|\bwrong\b|\bredo\b|not quite/i.test(newMsg)) { return { @@ -137,7 +137,7 @@ function buildFullChainLlm(): LlmClient { }, // R_human scoring — treat negative keywords as a failed turn. - "reward.reward.r_human.v3": (input) => { + "reward.reward.r_human.v3": (input: unknown) => { const text = lastUserMessage(input); if (/不对|错了|没覆盖|\bwrong\b/i.test(text)) { return { @@ -167,7 +167,7 @@ function buildFullChainLlm(): LlmClient { }, // Capture summarizer — one short line per step. - "capture.summarize": (input) => { + "capture.summarize": (input: unknown) => { const text = lastUserMessage(input); if (/fib|斐波那契/i.test(text)) return { summary: "斐波那契函数实现(Python 递归/迭代)" }; if (/test|测试/i.test(text)) return { summary: "为 Python 函数补充单元测试(含边界)" }; @@ -177,19 +177,19 @@ function buildFullChainLlm(): LlmClient { }, // α scorer — reflection quality. - "capture.alpha.reflection.score.v1": (input) => { + "capture.alpha.reflection.score.v1": (input: unknown) => { const text = lastUserMessage(input); const alpha = /关键|识别|根因|发现/i.test(text) ? 0.75 : 0.45; return { alpha, rationale: "rule-of-thumb by keyword" }; }, // Capture reflection synth (only when reflection was missing). - "capture.reflection.synth": (input) => ({ + "capture.reflection.synth": (input: unknown) => ({ reflection: "Scripted fallback: summarized step outcome.", }), // L2 induction — distills a policy from ≥2 similar traces. - "l2.l2.induction.v2": (input) => { + "l2.l2.induction.v2": (input: unknown) => { const text = lastUserMessage(input); const isPython = /python|pip|\.py\b/i.test(text); return { diff --git a/apps/memos-local-plugin/tests/helpers/fake-llm.ts b/apps/memos-local-plugin/tests/helpers/fake-llm.ts index 971b938b7..22d9fc1a3 100644 --- a/apps/memos-local-plugin/tests/helpers/fake-llm.ts +++ b/apps/memos-local-plugin/tests/helpers/fake-llm.ts @@ -89,6 +89,7 @@ export function fakeLlm(script: FakeLlmScript = {}): LlmClient { totalPromptTokens: 0, totalCompletionTokens: 0, lastOkAt: null, + lastFallbackAt: null, lastError: null, }; }, diff --git a/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts b/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts index e057defe7..3ff5dd78e 100644 --- a/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts +++ b/apps/memos-local-plugin/tests/integration/adapters/openclaw-full-chain.test.ts @@ -181,7 +181,7 @@ function buildLlm(): LlmClient { reason: "programming request", }), - "session.relation.classify": (input) => { + "session.relation.classify": (input: unknown) => { const newMsg = newUserSegment(lastUserContent(input)); if (/不对|错了|改一下|\bwrong\b|redo/i.test(newMsg)) { return { relation: "revision", confidence: 0.9, reason: "negation" }; @@ -195,7 +195,7 @@ function buildLlm(): LlmClient { return { relation: "follow_up", confidence: 0.5, reason: "default" }; }, - "reward.reward.r_human.v3": (input) => { + "reward.reward.r_human.v3": (input: unknown) => { const text = lastUserContent(input); // We pre-fill the scorer with positive user feedback baked // into the "FEEDBACK:" block, so it should return a healthy @@ -218,7 +218,7 @@ function buildLlm(): LlmClient { }; }, - "capture.summarize": (input) => { + "capture.summarize": (input: unknown) => { const text = lastUserContent(input); if (/fib/i.test(text)) return { summary: "Python fibonacci 函数实现" }; if (/quicksort/i.test(text)) return { summary: "Python 快速排序实现" }; @@ -234,7 +234,7 @@ function buildLlm(): LlmClient { reason: "concrete root-cause reflection", }), - "l2.l2.induction.v2": (input) => { + "l2.l2.induction.v2": (input: unknown) => { const evidence = (input as { evidenceTraces?: Array<{ id: string }> }) ?.evidenceTraces ?? []; return { @@ -480,6 +480,7 @@ describe("OpenClaw adapter integration — multi-session full V7 chain", () => { agent: AGENT, core: core!, log: { + trace: (_m: string, _c?: unknown) => undefined, info: (_m: string, _c?: unknown) => undefined, warn: (_m: string, _c?: unknown) => undefined, error: (_m: string, _c?: unknown) => undefined, diff --git a/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts b/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts index 3e708f4e8..afed542af 100644 --- a/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts +++ b/apps/memos-local-plugin/tests/unit/adapters/hermes-persistence.test.ts @@ -52,7 +52,7 @@ function semanticFakeEmbedder(dims = 64): Embedder { }; return { dimensions: dims, - provider: "test", + provider: "local", model: "semantic-fake", async embedOne(input: string | EmbedInput): Promise { stats.requests++; @@ -168,13 +168,14 @@ describe("Hermes MemoryCore persistence", () => { agent: "hermes" as AgentKind, sessionId, episodeId, - userText: "请记住 HERMES_MEMOS_E2E_0428 viewer 端口是 18800", agentText: "已记录 Hermes MemOS 测试事实。", toolCalls: [ { name: "memory_search", input: "{\"query\":\"HERMES_MEMOS_E2E_0428\"}", output: "[]", + startedAt: 1_700_000_000_002, + endedAt: 1_700_000_000_002, }, ], ts: 1_700_000_000_002, diff --git a/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts b/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts index da7daea50..3253ca9bc 100644 --- a/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts +++ b/apps/memos-local-plugin/tests/unit/bridge/methods.test.ts @@ -25,6 +25,7 @@ function stubCore(overrides: Partial = {}): MemoryCore { provider: "", model: "", lastOkAt: null, + lastFallbackAt: null, lastError: null, }, embedder: { @@ -33,6 +34,7 @@ function stubCore(overrides: Partial = {}): MemoryCore { model: "", dim: 0, lastOkAt: null, + lastFallbackAt: null, lastError: null, }, skillEvolver: { @@ -41,6 +43,7 @@ function stubCore(overrides: Partial = {}): MemoryCore { model: "", inherited: true, lastOkAt: null, + lastFallbackAt: null, lastError: null, }, })), @@ -80,6 +83,7 @@ function stubCore(overrides: Partial = {}): MemoryCore { shareTrace: vi.fn(async () => null), getPolicy: vi.fn(async () => null), listPolicies: vi.fn(async () => []), + countPolicies: vi.fn(async () => 0), setPolicyStatus: vi.fn(async () => null), deletePolicy: vi.fn(async () => ({ deleted: false })), editPolicyGuidance: vi.fn(async () => null), @@ -87,6 +91,7 @@ function stubCore(overrides: Partial = {}): MemoryCore { updatePolicy: vi.fn(async () => null), getWorldModel: vi.fn(async () => null), listWorldModels: vi.fn(async () => []), + countWorldModels: vi.fn(async () => 0), deleteWorldModel: vi.fn(async () => ({ deleted: false })), shareWorldModel: vi.fn(async () => null), updateWorldModel: vi.fn(async () => null), @@ -94,10 +99,13 @@ function stubCore(overrides: Partial = {}): MemoryCore { unarchiveWorldModel: vi.fn(async () => null), listEpisodes: vi.fn(async () => ["e-1", "e-2"]), listEpisodeRows: vi.fn(async () => []), + countEpisodes: vi.fn(async () => 0), timeline: vi.fn(async () => []), listTraces: vi.fn(async () => []), + countTraces: vi.fn(async () => 0), listApiLogs: vi.fn(async () => ({ logs: [], total: 0 })), listSkills: vi.fn(async () => []), + countSkills: vi.fn(async () => 0), getSkill: vi.fn(async () => null), archiveSkill: vi.fn(async () => {}), deleteSkill: vi.fn(async () => ({ deleted: false })), diff --git a/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts b/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts index cdf457d94..9aa6c4f6d 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l2/induce.test.ts @@ -139,7 +139,6 @@ describe("memory/l2/induce", () => { episodeIds: ["ep_1", "ep_2"] as EpisodeId[], evidenceTraces: [mkTrace("tr_a", "ep_1", vec([1, 0])), mkTrace("tr_b", "ep_2", vec([0, 1]))], inducedBy: "l2.l2.induction.v1", - decisionGuidance: { preference: [], antiPattern: [] }, now: 42, }); expect(row.status).toBe("candidate"); diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts b/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts index 1b4bbad9f..35198f936 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l3/_helpers.ts @@ -147,7 +147,6 @@ export function seedWorldModel( policyIds: [...(args.policyIds ?? [])], sourceEpisodeIds: [...(args.sourceEpisodeIds ?? [])], inducedBy: "l3.abstraction.v1", - decisionGuidance: { preference: [], antiPattern: [] }, vec: args.vec ?? vec([1, 0, 0]), createdAt: NOW, updatedAt: NOW, diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts index fdfe57f15..33e57f69e 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts @@ -73,6 +73,8 @@ function mkCluster(): PolicyCluster { domainTags: ["docker", "alpine", "pip"], centroidVec: vec([1, 0, 0]), avgGain: 0.3, + cohesion: 1, + admission: "strict", }; } @@ -171,7 +173,6 @@ describe("memory/l3/abstract", () => { cluster, episodeIds: ["ep_a", "ep_b", "ep_a"] as EpisodeId[], inducedBy: OP, - decisionGuidance: { preference: [], antiPattern: [] }, now: NOW, id: "wm_test" as Parameters[0]["id"], }); diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts b/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts index 0231d47c3..50c21064f 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l3/merge.test.ts @@ -41,7 +41,6 @@ function mkWorldModel(partial: Partial & { id: WorldModelId }): W policyIds: partial.policyIds ?? [], sourceEpisodeIds: partial.sourceEpisodeIds ?? [], inducedBy: partial.inducedBy ?? "", - decisionGuidance: { preference: [], antiPattern: [] }, vec: partial.vec ?? vec([1, 0, 0]), createdAt: NOW, updatedAt: NOW, diff --git a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts index 09538780d..e13eb7b23 100644 --- a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts +++ b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts @@ -92,6 +92,7 @@ function seed(handle: TmpDbHandle) { vec: vec([1, 0, 0]), createdAt: NOW as never, updatedAt: NOW as never, + version: 1, }); handle.repos.skills.upsert({ id: "sk_weak" as SkillId, @@ -110,6 +111,7 @@ function seed(handle: TmpDbHandle) { vec: vec([1, 0, 0]), createdAt: NOW as never, updatedAt: NOW as never, + version: 1, }); handle.repos.worldModel.upsert({ @@ -122,7 +124,6 @@ function seed(handle: TmpDbHandle) { policyIds: [], sourceEpisodeIds: [], inducedBy: "", - decisionGuidance: { preference: [], antiPattern: [] }, vec: vec([1, 0, 0]), createdAt: NOW as never, updatedAt: NOW as never, diff --git a/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts index 1105be325..5bc0b0d51 100644 --- a/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts +++ b/apps/memos-local-plugin/tests/unit/retrieval/tier1.test.ts @@ -22,6 +22,9 @@ const cfg: RetrievalConfig = { minTraceSim: 0.3, tagFilter: "auto", decayHalfLifeDays: 30, + llmFilterEnabled: false, + llmFilterMaxKeep: 4, + llmFilterMinCandidates: 1, }; const qv: EmbeddingVector = Float32Array.from([1, 0, 0]); diff --git a/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts index a98a1c092..b8ae8b4f6 100644 --- a/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts +++ b/apps/memos-local-plugin/tests/unit/retrieval/tier2.test.ts @@ -25,6 +25,9 @@ const cfg: RetrievalConfig = { minTraceSim: 0.3, tagFilter: "auto", decayHalfLifeDays: 30, + llmFilterEnabled: false, + llmFilterMaxKeep: 4, + llmFilterMinCandidates: 1, }; function seed(handle: TmpDbHandle) { diff --git a/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts index 897981444..a5465141f 100644 --- a/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts +++ b/apps/memos-local-plugin/tests/unit/retrieval/tier3.test.ts @@ -25,6 +25,9 @@ const cfg: RetrievalConfig = { minTraceSim: 0.3, tagFilter: "auto", decayHalfLifeDays: 30, + llmFilterEnabled: false, + llmFilterMaxKeep: 4, + llmFilterMinCandidates: 1, }; describe("retrieval/tier3 (with real sqlite)", () => { @@ -41,7 +44,6 @@ describe("retrieval/tier3 (with real sqlite)", () => { policyIds: [], sourceEpisodeIds: [], inducedBy: "", - decisionGuidance: { preference: [], antiPattern: [] }, vec: vec([1, 0, 0]), createdAt: NOW as never, updatedAt: NOW as never, @@ -57,7 +59,6 @@ describe("retrieval/tier3 (with real sqlite)", () => { policyIds: [], sourceEpisodeIds: [], inducedBy: "", - decisionGuidance: { preference: [], antiPattern: [] }, vec: vec([0, 1, 0]), createdAt: NOW as never, updatedAt: NOW as never, diff --git a/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts b/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts index 85416b66e..6b37ae55f 100644 --- a/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts +++ b/apps/memos-local-plugin/tests/unit/session/intent-classifier.test.ts @@ -39,6 +39,7 @@ function fakeLlm(handler: (input: unknown) => unknown | Promise): LlmCl totalPromptTokens: 0, totalCompletionTokens: 0, lastOkAt: null, + lastFallbackAt: null, lastError: null, }), resetStats: () => {}, diff --git a/apps/memos-local-plugin/tests/unit/skill/packager.test.ts b/apps/memos-local-plugin/tests/unit/skill/packager.test.ts index 6a4a7a21c..bdb849045 100644 --- a/apps/memos-local-plugin/tests/unit/skill/packager.test.ts +++ b/apps/memos-local-plugin/tests/unit/skill/packager.test.ts @@ -95,6 +95,7 @@ describe("skill/packager", () => { vec: null, createdAt: NOW, updatedAt: NOW, + version: 1, } as SkillRow; const r = await buildSkillRow( { diff --git a/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts b/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts index 30db08db2..91b4644a1 100644 --- a/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts +++ b/apps/memos-local-plugin/tests/unit/storage/end-to-end.test.ts @@ -94,6 +94,7 @@ describe("storage/end-to-end", () => { vec: vec([1, 0, 0]), createdAt: 101, updatedAt: 101, + version: 1, }); repos.feedback.insert({ id: "fb-1", diff --git a/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts b/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts index 1cf8f3dbf..b767f53a1 100644 --- a/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts +++ b/apps/memos-local-plugin/tests/unit/storage/fts-keyword.test.ts @@ -281,7 +281,6 @@ describe("storage/keyword channels — world model", () => { policyIds: [], sourceEpisodeIds: [], inducedBy: "", - decisionGuidance: { preference: [], antiPattern: [] }, vec: vec([1, 0, 0]), createdAt: 0, updatedAt: 0, diff --git a/apps/memos-local-plugin/tests/unit/storage/repos.test.ts b/apps/memos-local-plugin/tests/unit/storage/repos.test.ts index 1ca4aa40d..8a25c3ed1 100644 --- a/apps/memos-local-plugin/tests/unit/storage/repos.test.ts +++ b/apps/memos-local-plugin/tests/unit/storage/repos.test.ts @@ -203,6 +203,7 @@ describe("storage/repos — happy paths", () => { vec: vec([1, 0]), createdAt: 1, updatedAt: 1, + version: 1, }); expect(() => @@ -223,6 +224,7 @@ describe("storage/repos — happy paths", () => { vec: null, createdAt: 1, updatedAt: 1, + version: 1, }), ).toThrow(/UNIQUE/i); diff --git a/apps/memos-local-plugin/web/src/stores/health.ts b/apps/memos-local-plugin/web/src/stores/health.ts index d67374623..24ffeaaa7 100644 --- a/apps/memos-local-plugin/web/src/stores/health.ts +++ b/apps/memos-local-plugin/web/src/stores/health.ts @@ -11,26 +11,52 @@ import { api } from "../api/client"; export type HealthStatus = "unknown" | "ok" | "degraded" | "down"; +/** + * Most-recent call status carried on every model slot. Populated by + * the core's `health()` endpoint from the underlying facade + * `stats()`. Overview compares the three timestamps below — the + * largest one wins — to paint the card green (ok), yellow (running + * on host fallback) or red (broken). + */ +export interface ModelCallStatus { + /** Epoch ms of the most recent direct primary-provider success. */ + lastOkAt?: number | null; + /** + * Epoch ms of the most recent time the primary provider failed but + * the host LLM bridge rescued the call. Only ever set on the LLM / + * skillEvolver slots; the embedder has no fallback so this stays + * `null` there. + */ + lastFallbackAt?: number | null; + /** + * Latest failure record. Sticky — not cleared by a later success; + * the timestamp comparison handles "we recovered" naturally. + */ + lastError?: { at: number; message: string } | null; +} + export interface HealthPayload { ok: boolean; version?: string; uptimeMs?: number; agent?: string; paths?: Record; - llm?: { available: boolean; provider: string; model: string }; - embedder?: { available: boolean; provider: string; model: string; dim: number }; + llm?: ({ available: boolean; provider: string; model: string }) & ModelCallStatus; + embedder?: + | ({ available: boolean; provider: string; model: string; dim: number } & ModelCallStatus); /** * `available` is `true` when the slot has a usable upstream — either a * concrete `provider+model+apiKey` of its own (`inherited=false`) or it * inherits from `llm.*` and that slot is itself available * (`inherited=true`). The viewer's setup banner uses this flag. */ - skillEvolver?: { - available: boolean; - provider: string; - model: string; - inherited: boolean; - }; + skillEvolver?: + | ({ + available: boolean; + provider: string; + model: string; + inherited: boolean; + } & ModelCallStatus); } export const health = signal(null); diff --git a/apps/memos-local-plugin/web/src/stores/i18n.ts b/apps/memos-local-plugin/web/src/stores/i18n.ts index edd4e35fd..3b17b4993 100644 --- a/apps/memos-local-plugin/web/src/stores/i18n.ts +++ b/apps/memos-local-plugin/web/src/stores/i18n.ts @@ -216,6 +216,9 @@ const en = { "overview.metric.model.connectedAt": "Last OK call at {ts}", "overview.metric.model.failed": "Last call failed", "overview.metric.model.idle": "Not called yet", + "overview.metric.model.fallback": "Falling back to host model", + "overview.metric.model.fallback.tooltip": + "Primary provider unavailable, host LLM is handling the call. Original error: {msg}", "overview.metric.policies.breakdown": "{active} active · {candidate} candidate", "overview.metric.skills.breakdown": "{active} active · {candidate} candidate", "overview.live.title": "Live activity", @@ -563,6 +566,11 @@ const en = { "logs.tag.skill": "Skill", "logs.tag.policy": "Experience", "logs.tag.world": "World model", + "logs.tag.system": "System", + "logs.system.role": "{role} call failed", + "logs.system.role.embedding": "Embedding model", + "logs.system.role.llm": "Summary model", + "logs.system.role.skillEvolver": "Skill evolver model", "logs.tool.search": "Search", "logs.tool.add": "Ingest", "logs.tool.skill_generate": "Generate", @@ -866,6 +874,9 @@ const zh: Record = { "overview.metric.model.connectedAt": "上次成功调用于 {ts}", "overview.metric.model.failed": "上次调用失败", "overview.metric.model.idle": "暂未调用", + "overview.metric.model.fallback": "已降级到 Agent 内置模型", + "overview.metric.model.fallback.tooltip": + "原配置模型不可用,已自动切换到 Agent 内置模型继续工作。原始错误:{msg}", "overview.metric.policies.breakdown": "{active} 已启用 · {candidate} 候选", "overview.metric.skills.breakdown": "{active} 已启用 · {candidate} 候选", "overview.live.title": "实时活动", @@ -1183,6 +1194,11 @@ const zh: Record = { "logs.tag.skill": "技能", "logs.tag.policy": "经验", "logs.tag.world": "环境认知", + "logs.tag.system": "系统", + "logs.system.role": "{role}调用失败", + "logs.system.role.embedding": "嵌入模型", + "logs.system.role.llm": "摘要模型", + "logs.system.role.skillEvolver": "技能进化模型", "logs.tool.search": "检索", "logs.tool.add": "写入", "logs.tool.skill_generate": "生成", diff --git a/apps/memos-local-plugin/web/src/stores/restart.ts b/apps/memos-local-plugin/web/src/stores/restart.ts index 37b97255a..898cff61b 100644 --- a/apps/memos-local-plugin/web/src/stores/restart.ts +++ b/apps/memos-local-plugin/web/src/stores/restart.ts @@ -63,12 +63,20 @@ async function pollHealthUntilUp(maxAttempts = 60): Promise { } /** - * Quick health check — just verify the server responds once. - * Used for Hermes where the new daemon is already up before the old exits. + * Quick health check — verify the server responds again. + * + * Hermes' restart flow spawns the new daemon AFTER `sleep 3` + * (admin.ts) so the old bridge can fully release the viewer port, + * then `tsx` cold-compiles + bootstrap (DB migrations, embedder / + * LLM clients, host-bridge registration) takes another few seconds + * before the port is bound. Total worst-case wall time is ~10–15 s + * on slower machines. We poll every 1 s for up to 30 attempts (30 s + * total) so even a sluggish cold start succeeds without the user + * hitting the "重启超时" toast prematurely. */ -async function quickPollUp(maxAttempts = 10): Promise { +async function quickPollUp(maxAttempts = 30): Promise { for (let i = 0; i < maxAttempts; i++) { - await new Promise((r) => setTimeout(r, 800)); + await new Promise((r) => setTimeout(r, 1000)); try { const res = await fetch("/api/v1/health"); if (res.ok || res.status === 401 || res.status === 403) return true; @@ -103,8 +111,11 @@ export async function triggerRestart( restartState.value = { phase: "restartFailed" }; } } else { - // Hermes: new daemon spawns before old exits, transition is fast. - const ok = await quickPollUp(10); + // Hermes: new daemon spawns after `sleep 3` (admin.ts) so the + // old bridge can fully release the port; cold-start of tsx + + // bootstrap may take another few seconds. Use the default + // `quickPollUp` attempts (30s total). + const ok = await quickPollUp(); if (ok) { window.location.href = window.location.pathname + "?_t=" + Date.now(); @@ -128,9 +139,9 @@ export async function triggerCleared(): Promise { restartState.value = { phase: "restartFailed" }; } } else { - // Hermes: clear-data spawns a new daemon. Give it extra time - // since it also needs to re-create the DB on first boot. - const ok = await quickPollUp(15); + // Hermes: clear-data spawns a new daemon. The default 30s of + // `quickPollUp` already covers the slow first-boot DB migration. + const ok = await quickPollUp(); if (ok) { window.location.href = window.location.pathname + "?_t=" + Date.now(); diff --git a/apps/memos-local-plugin/web/src/styles/components.css b/apps/memos-local-plugin/web/src/styles/components.css index ea662d025..8b3a0c747 100644 --- a/apps/memos-local-plugin/web/src/styles/components.css +++ b/apps/memos-local-plugin/web/src/styles/components.css @@ -368,10 +368,12 @@ border-radius: 50%; flex-shrink: 0; } -.status-dot--ok { background: var(--green); box-shadow: 0 0 0 2px rgba(16, 185, 129, 0.18); } -.status-dot--err { background: var(--red); box-shadow: 0 0 0 2px rgba(239, 68, 68, 0.20); } -.status-dot--idle { background: var(--fg-dim); opacity: 0.6; } -.status-dot--off { background: transparent; border: 1px dashed var(--fg-dim); } +.status-dot--ok { background: var(--green); box-shadow: 0 0 0 2px rgba(16, 185, 129, 0.18); } +/* Yellow ring — primary provider broken but host LLM bridge rescued the call. */ +.status-dot--fallback { background: #f59e0b; box-shadow: 0 0 0 2px rgba(245, 158, 11, 0.22); } +.status-dot--err { background: var(--red); box-shadow: 0 0 0 2px rgba(239, 68, 68, 0.20); } +.status-dot--idle { background: var(--fg-dim); opacity: 0.6; } +.status-dot--off { background: transparent; border: 1px dashed var(--fg-dim); } /* ── Pills (status badges) ──────────────────────────────────────── */ diff --git a/apps/memos-local-plugin/web/src/views/LogsView.tsx b/apps/memos-local-plugin/web/src/views/LogsView.tsx index 426b5165d..e7463c9ad 100644 --- a/apps/memos-local-plugin/web/src/views/LogsView.tsx +++ b/apps/memos-local-plugin/web/src/views/LogsView.tsx @@ -33,7 +33,9 @@ type ToolFilter = | "world_model_generate" | "world_model_evolve" | "task_done" - | "task_failed"; + | "task_failed" + | "system_error" + | "system_model_status"; /** * Frontend log-tag categories. Each tag maps to one or more backend @@ -49,7 +51,13 @@ type LogTag = | "task" | "skill" | "policy" - | "world"; + | "world" + // Infrastructure-layer failures (embedding / summary LLM / + // skillEvolver provider errors). The bootstrap layer drops a + // `system_error` row into api_logs every time a model facade + // throws, so users can correlate Overview red dots with concrete + // upstream messages without tailing the server logs. + | "system"; const LOG_TAGS: Array<{ v: LogTag; k: string }> = [ { v: "", k: "common.all" }, @@ -59,6 +67,7 @@ const LOG_TAGS: Array<{ v: LogTag; k: string }> = [ { v: "skill", k: "logs.tag.skill" }, { v: "policy", k: "logs.tag.policy" }, { v: "world", k: "logs.tag.world" }, + { v: "system", k: "logs.tag.system" }, ]; /** @@ -75,6 +84,7 @@ const ALLOWED_TOOLS: Record = { skill: ["skill_generate", "skill_evolve"], policy: ["policy_generate", "policy_evolve"], world: ["world_model_generate", "world_model_evolve"], + system: ["system_error", "system_model_status"], }; interface ApiLogsResponse { @@ -311,6 +321,10 @@ function LogCard({ ) : log.toolName === "memory_add" ? ( + ) : log.toolName === "system_error" ? ( + + ) : log.toolName === "system_model_status" ? ( + ) : log.toolName.startsWith("skill_") || log.toolName.startsWith("policy_") || log.toolName.startsWith("world_model_") || @@ -750,6 +764,117 @@ function LifecycleDetail({ ); } +// ─── system_error template ────────────────────────────────────────────── + +interface SystemErrorPayload { + role?: "embedding" | "llm" | "skillEvolver"; + provider?: string; + model?: string; + message?: string; + code?: string; + at?: number; +} + +interface SystemModelStatusPayload extends SystemErrorPayload { + status?: "ok" | "fallback" | "error"; + fallbackProvider?: string; + fallbackModel?: string; +} + +/** + * Detail view for a `system_error` row. The bootstrap-installed sink + * stores a flat `{ role, provider, model, message, code, at }` blob so + * the renderer is intentionally minimal — one prominent red error line + * plus a row of metadata pills. + */ +function SystemErrorDetail({ output }: { output: unknown }) { + const out = (output ?? {}) as SystemErrorPayload; + const role = out.role ?? "(unknown)"; + return ( +
+
+
+ {t("logs.system.role", { role: roleLabel(role) })} +
+
+ {out.message || "(no message)"} +
+
+
+ {out.provider && ( + + provider: {out.provider} + + )} + {out.model && ( + + model: {out.model} + + )} + {out.code && ( + + code: {out.code} + + )} +
+
+ ); +} + +function SystemModelStatusDetail({ output }: { output: unknown }) { + const out = (output ?? {}) as SystemModelStatusPayload; + const status = out.status ?? "error"; + const role = out.role ?? "(unknown)"; + const tone = + status === "ok" + ? { border: "var(--success)", bg: "var(--success-soft)", pill: "pill--active" } + : status === "fallback" + ? { border: "#f59e0b", bg: "rgba(245, 158, 11, 0.12)", pill: "pill--info" } + : { border: "var(--danger)", bg: "var(--danger-soft)", pill: "pill--failed" }; + return ( +
+
+
+ {status} + {roleLabel(role)} + {out.provider && provider: {out.provider}} + {out.model && model: {out.model}} + {out.fallbackProvider && ( + fallback: {out.fallbackProvider} + )} +
+ {out.message && ( +
+ {out.message} +
+ )} +
+
+ ); +} + +function roleLabel(role: string): string { + switch (role) { + case "embedding": + return t("logs.system.role.embedding"); + case "llm": + return t("logs.system.role.llm"); + case "skillEvolver": + return t("logs.system.role.skillEvolver"); + default: + return role; + } +} + // ─── Generic fallback ─────────────────────────────────────────────────── function GenericDetail({ @@ -887,6 +1012,29 @@ function buildSummary(log: ApiLogDTO, input: unknown, output: unknown): string { (inp.worldModelId as string | undefined); return id ? `world model ${truncate(id, 24)}` : log.toolName; } + if (log.toolName === "system_error") { + const role = (out.role as string | undefined) ?? "?"; + const message = (out.message as string | undefined) ?? ""; + const provider = (out.provider as string | undefined) ?? ""; + const head = `[${roleLabel(role)}]`; + const tail = message + ? truncate(message, 80) + : provider + ? provider + : "(no message)"; + return `${head} ${tail}`; + } + if (log.toolName === "system_model_status") { + const role = (out.role as string | undefined) ?? "?"; + const status = (out.status as string | undefined) ?? "?"; + const provider = (out.provider as string | undefined) ?? ""; + const model = (out.model as string | undefined) ?? ""; + const message = (out.message as string | undefined) ?? ""; + const bits = [`[${roleLabel(role)}]`, status]; + if (provider || model) bits.push([provider, model].filter(Boolean).join("/")); + if (message) bits.push(truncate(message, 60)); + return bits.join(" · "); + } if (log.toolName === "task_done" || log.toolName === "task_failed") { const rHuman = typeof out.rHuman === "number" ? (out.rHuman as number).toFixed(2) : null; const source = (out.source as string | undefined) ?? ""; diff --git a/apps/memos-local-plugin/web/src/views/MemoriesView.tsx b/apps/memos-local-plugin/web/src/views/MemoriesView.tsx index 15c7e2ddd..5944ff53a 100644 --- a/apps/memos-local-plugin/web/src/views/MemoriesView.tsx +++ b/apps/memos-local-plugin/web/src/views/MemoriesView.tsx @@ -760,7 +760,23 @@ function ToolCallCard({ function formatToolPayload(v: unknown): string { if (v === undefined || v === null) return ""; - if (typeof v === "string") return v; + // Tool inputs/outputs frequently arrive as already-stringified JSON + // (the agent serializes them before storing). Re-parse so the same + // 2-space pretty-print path applies regardless of upstream encoding. + if (typeof v === "string") { + const trimmed = v.trim(); + if ( + (trimmed.startsWith("{") && trimmed.endsWith("}")) || + (trimmed.startsWith("[") && trimmed.endsWith("]")) + ) { + try { + return JSON.stringify(JSON.parse(trimmed), null, 2); + } catch { + return v; + } + } + return v; + } try { return JSON.stringify(v, null, 2); } catch { @@ -898,6 +914,27 @@ function formatTs(ts: number): string { } } +/** + * Format a step timestamp with millisecond precision (HH:MM:SS.mmm). + * Used in the per-step header row so users can tell apart sub-steps + * fired within the same second by a fast tool loop. Uses 24h fields + * directly so the locale's AM/PM suffix doesn't end up between the + * seconds and the millisecond fraction. + */ +function formatStepTime(ts: number): string { + if (!ts) return "—"; + try { + const d = new Date(ts); + const hh = String(d.getHours()).padStart(2, "0"); + const mm = String(d.getMinutes()).padStart(2, "0"); + const ss = String(d.getSeconds()).padStart(2, "0"); + const ms = String(d.getMilliseconds()).padStart(3, "0"); + return `${hh}:${mm}:${ss}.${ms}`; + } catch { + return String(ts); + } +} + // ─── Right-side drawer ─────────────────────────────────────────────────── /** @@ -1205,8 +1242,9 @@ function TraceDrawer({ * and any `toolCalls` rendered through the existing * `ToolCallCard`. Empty fields collapse silently. * - * The first step is open by default; the rest start collapsed so the - * drawer doesn't drown the user when a turn fired a dozen tools. + * Every step starts collapsed so the drawer stays compact when a turn + * fired a dozen tools — users opt into the detail they want instead of + * scrolling past an auto-expanded first step. */ function StepList({ traces }: { traces: readonly TraceDTO[] }) { return ( @@ -1223,7 +1261,6 @@ function StepList({ traces }: { traces: readonly TraceDTO[] }) { return (
@@ -1236,7 +1273,7 @@ function StepList({ traces }: { traces: readonly TraceDTO[] }) { {roleLabel} - {new Date(tr.ts).toLocaleTimeString()} + {formatStepTime(tr.ts)} V {tr.value.toFixed(2)} · α {tr.alpha.toFixed(2)} diff --git a/apps/memos-local-plugin/web/src/views/OverviewView.tsx b/apps/memos-local-plugin/web/src/views/OverviewView.tsx index 79c001f86..bc0177f78 100644 --- a/apps/memos-local-plugin/web/src/views/OverviewView.tsx +++ b/apps/memos-local-plugin/web/src/views/OverviewView.tsx @@ -42,9 +42,14 @@ interface ModelInfo { model: string; dim?: number; inherited?: boolean; - /** Epoch ms of most recent successful call (null = never called). */ + /** Epoch ms of most recent direct primary-provider success. */ lastOkAt?: number | null; - /** Most recent failure, if the last call went bad. */ + /** + * Epoch ms of most recent rescued-by-host-fallback call. Populates + * the "yellow" overview state. + */ + lastFallbackAt?: number | null; + /** Most recent failure (sticky — see ModelHealth comment). */ lastError?: { at: number; message: string } | null; } interface OverviewSummary { @@ -259,17 +264,26 @@ function QuantityCard({ ); } -type ModelDotKind = "ok" | "err" | "idle" | "off"; +type ModelDotKind = "ok" | "fallback" | "err" | "idle" | "off"; /** - * Derive the overview card status from a {@link ModelInfo}: - * - `off` — the client isn't even configured - * - `idle` — configured but no call has happened yet (fresh install) - * - `err` — last call failed, surface the error message in a tooltip - * - `ok` — last call succeeded + * Derive the overview card status from a {@link ModelInfo}. * - * Preference order: error wins (even over never-called), so a - * freshly-failed provider doesn't pretend to be idle. + * The card is painted by picking the most-recent of three timestamps + * — `lastOkAt`, `lastFallbackAt`, `lastError.at` — and mapping that + * winner to a colour: + * + * - `ok` (green) — primary provider answered directly. + * - `fallback` (yellow) — primary failed but host LLM bridge + * rescued the call. The card surfaces the + * original error so users know *why* it + * degraded. + * - `err` (red) — primary failed and either there was no + * fallback or the fallback also failed. + * + * `lastError` is sticky on the backend so it can sit alongside a + * fresher `lastOkAt` after recovery — comparing timestamps lets the + * UI naturally "go green again" without having to clear the message. */ function modelStatusFromInfo(info: ModelInfo | undefined): { kind: ModelDotKind; @@ -279,23 +293,65 @@ function modelStatusFromInfo(info: ModelInfo | undefined): { if (!info || info.available === false) { return { kind: "off", label: t("overview.metric.model.unconfigured") }; } - if (info.lastError) { + + const okAt = info.lastOkAt ?? 0; + const fbAt = info.lastFallbackAt ?? 0; + const errAt = info.lastError?.at ?? 0; + const max = Math.max(okAt, fbAt, errAt); + + // Nothing has happened yet — fresh process, no calls landed. + if (max === 0) { + return { kind: "idle", label: t("overview.metric.model.idle") }; + } + + // Priority order matters when timestamps tie. + // + // The backend stamps `lastFallbackAt` and `lastError.at` with the + // SAME `Date.now()` inside `markFallback` (the upstream error is + // kept on `lastError` so the viewer can show *why* fallback + // engaged). When that happens, a strict "errAt === max ⇒ red" + // check would always win over the fallback branch and the slot + // would never go yellow. The current call succeeded — through the + // host bridge — so semantically it is the fallback state, with + // the error only providing context. Hence: fallback wins ties + // against err. + // + // We also let fallback win ties against ok for the rare case where + // a successful primary call and a fallback rescue happen in the + // same millisecond — yellow is the most informative state. + if (fbAt > 0 && fbAt >= errAt && fbAt >= okAt) { + const raw = (info.lastError?.message ?? "").trim(); + const head = t("overview.metric.model.fallback"); + const tail = raw ? `: ${raw.length > 60 ? raw.slice(0, 59) + "…" : raw}` : ""; return { - kind: "err", - label: t("overview.metric.model.failed"), - tooltip: info.lastError.message, + kind: "fallback", + label: head + tail, + tooltip: raw + ? t("overview.metric.model.fallback.tooltip", { msg: raw }) + : head, }; } - if (info.lastOkAt) { + + // Most recent event was a terminal failure. + if (errAt > 0 && errAt >= okAt) { + const raw = (info.lastError?.message ?? "").trim(); + const short = + raw.length > 80 ? raw.slice(0, 79) + "…" : raw || t("overview.metric.model.failed"); return { - kind: "ok", - label: t("overview.metric.model.connected"), - tooltip: t("overview.metric.model.connectedAt", { - ts: new Date(info.lastOkAt).toLocaleTimeString(), - }), + kind: "err", + label: short, + tooltip: raw || t("overview.metric.model.failed"), }; } - return { kind: "idle", label: t("overview.metric.model.idle") }; + + // okAt is the largest — primary provider is working directly. + return { + kind: "ok", + label: t("overview.metric.model.connected"), + tooltip: t("overview.metric.model.connectedAt", { + ts: new Date(okAt).toLocaleTimeString(), + }), + }; } function ModelCard({