From 9ce2e0742445b9f44f21edba9adb49b534da80d3 Mon Sep 17 00:00:00 2001 From: yishuiliunian Date: Sat, 6 Jun 2026 11:24:17 +0800 Subject: [PATCH 1/2] fix(cluster): surface remote spawn LLM errors and stop self-target spawn deadlock Cross-hub spawn silently failed in three ways that compounded into an unactionable "empty result / already registered" symptom: - An ephemeral agent whose model has no provider on the target hub errored on resolve_provider but the loop fell through to "idle, exiting", reporting a falsely-successful empty Goal. Now an unrecovered turn error terminates with TerminateReason::Error and the real error text as the result, so the caller sees "Model not found" instead of nothing. - The Agent tool's `model` param had no description, so the LLM guessed "sonnet" (unsupported on the remote hub). Documented that omitting it inherits the parent model and that cross-hub forwards the name verbatim. - A hub targeting itself pre-registered a shadow then routed back through MetaHub into its own registry, colliding as "already registered" and orphaning a forked process. Self-target now spawns locally. --- .../src/dispatch/spawn_routing.rs | 43 ++++++++++++++++++- .../src/tools/collaboration/agent.rs | 5 ++- crates/loopal-runtime/src/agent_loop/run.rs | 20 +++++++-- .../tests/agent_loop/run_test.rs | 27 ++++++++++++ 4 files changed, 90 insertions(+), 5 deletions(-) diff --git a/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs b/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs index b2567d46..c709c0d9 100644 --- a/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs +++ b/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs @@ -34,11 +34,32 @@ pub async fn handle_spawn_agent( "'target_hub' cannot contain '/' (cross-hub address encoding), got: {target}" )); } - return super::cross_hub_forward::forward_cross_hub_spawn(hub, params, from_agent).await; + let own_hub = hub + .lock() + .await + .uplink + .as_ref() + .map(|u| u.hub_name().to_string()); + if !is_self_target(own_hub.as_deref(), target) { + return super::cross_hub_forward::forward_cross_hub_spawn(hub, params, from_agent).await; + } + let mut local_params = params; + if let Some(obj) = local_params.as_object_mut() { + obj.remove("target_hub"); + } + return spawn_local(hub, local_params, from_agent).await; } spawn_local(hub, params, from_agent).await } +// reason: a hub targeting itself would pre-register a shadow then route back +// through MetaHub into its own registry, colliding as "already registered" and +// orphaning a forked process. Self-target must spawn locally — same registry, +// no MetaHub round-trip. +fn is_self_target(own_hub: Option<&str>, target: &str) -> bool { + own_hub == Some(target) +} + async fn spawn_local( hub: &Arc>, params: Value, @@ -149,3 +170,23 @@ pub(super) async fn spawn_via_manager( info!(agent = %name, %agent_id, "spawn done"); Ok(json!({"agent_id": agent_id, "name": name})) } + +#[cfg(test)] +mod tests { + use super::is_self_target; + + #[test] + fn own_hub_equals_target_is_self() { + assert!(is_self_target(Some("hub-a"), "hub-a")); + } + + #[test] + fn different_hub_is_not_self() { + assert!(!is_self_target(Some("hub-a"), "hub-b")); + } + + #[test] + fn no_uplink_is_never_self() { + assert!(!is_self_target(None, "hub-a")); + } +} diff --git a/crates/loopal-agent/src/tools/collaboration/agent.rs b/crates/loopal-agent/src/tools/collaboration/agent.rs index 16e0d7a5..f4eb34df 100644 --- a/crates/loopal-agent/src/tools/collaboration/agent.rs +++ b/crates/loopal-agent/src/tools/collaboration/agent.rs @@ -29,7 +29,10 @@ impl Tool for AgentTool { "prompt": { "type": "string" }, "name": { "type": "string" }, "subagent_type": { "type": "string" }, - "model": { "type": "string" }, + "model": { + "type": "string", + "description": "Omit to inherit the parent agent's model. Only set to override, and only to a model the target hub actually has — a cross-hub spawn forwards this name verbatim, so an unsupported model fails with 'Model not found'." + }, "target_hub": { "type": "string", "description": "Spawn on a remote hub in the cluster (e.g. 'hub-b'). Requires MetaHub connection." diff --git a/crates/loopal-runtime/src/agent_loop/run.rs b/crates/loopal-runtime/src/agent_loop/run.rs index 4e92cb72..e6288c5b 100644 --- a/crates/loopal-runtime/src/agent_loop/run.rs +++ b/crates/loopal-runtime/src/agent_loop/run.rs @@ -15,6 +15,7 @@ use super::turn_context::TurnContext; impl AgentLoopRunner { pub(super) async fn run_loop(&mut self) -> Result { let mut last_output = String::new(); + let mut last_error: Option = None; let mut server_block_retry = false; let mut context_overflow_retry = false; // Need user input whenever the last message isn't User — covers empty @@ -108,16 +109,29 @@ impl AgentLoopRunner { continue; } error!(error = %e, "LLM request failed"); - self.transition_error(LoopalError::to_string(&e)).await?; + let msg = LoopalError::to_string(&e); + self.transition_error(msg.clone()).await?; + last_error = Some(msg); + // reason: an ephemeral agent has no UI/parent to retry, so an + // unrecovered turn error must terminate the loop with the real + // error — not fall through to "idle, exiting" which would report + // a successful empty result and hide the failure from the caller. + if matches!(self.params.config.lifecycle, LifecycleMode::Ephemeral) { + break; + } } } server_block_retry = false; context_overflow_retry = false; } + let (result, terminate_reason) = match last_error { + Some(err) if last_output.is_empty() => (err, TerminateReason::Error), + _ => (last_output, TerminateReason::Goal), + }; Ok(AgentOutput { - result: last_output, - terminate_reason: TerminateReason::Goal, + result, + terminate_reason, }) } diff --git a/crates/loopal-runtime/tests/agent_loop/run_test.rs b/crates/loopal-runtime/tests/agent_loop/run_test.rs index 80a589c8..8f5ae28e 100644 --- a/crates/loopal-runtime/tests/agent_loop/run_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/run_test.rs @@ -1,6 +1,7 @@ use loopal_error::{LoopalError, TerminateReason}; use loopal_protocol::AgentEventPayload; use loopal_provider_api::{StopReason, StreamChunk}; +use loopal_runtime::agent_loop::LifecycleMode; use super::mock_provider::{ make_interactive_multi_runner, make_multi_runner, make_runner_with_mock_provider, @@ -134,6 +135,32 @@ async fn test_prompt_driven_error_exits_cleanly() { ); } +/// Reproduces the cross-hub failure: a spawned (ephemeral) agent whose model +/// has no provider on this hub. resolve_provider errors on the first LLM call; +/// the loop must surface that error as the result + TerminateReason::Error +/// instead of returning an empty, falsely-successful Goal completion. +#[tokio::test] +async fn test_ephemeral_unresolved_model_propagates_error_to_result() { + let calls = vec![vec![ + Ok(StreamChunk::Text { text: "unused".into() }), + Ok(StreamChunk::Done { + stop_reason: StopReason::EndTurn, + }), + ]]; + let (mut runner, _event_rx) = make_multi_runner(calls); + runner.params.config.lifecycle = LifecycleMode::Ephemeral; + runner.params.config.router = + loopal_provider_api::ModelRouter::new("unknown-model-xyz".to_string()); + + let output = runner.run().await.unwrap(); + assert_eq!(output.terminate_reason, TerminateReason::Error); + assert!( + !output.result.is_empty() && output.result.contains("unknown-model-xyz"), + "real provider error must reach result (caller-visible), got: {:?}", + output.result + ); +} + /// Authoritative `Running` event is emitted before any `Stream`, so the /// TUI status bar can flip before the first LLM byte arrives. #[tokio::test] From 2955d9e8c22244bc454577b7074e93a1702db829 Mon Sep 17 00:00:00 2001 From: yishuiliunian Date: Sat, 6 Jun 2026 11:33:07 +0800 Subject: [PATCH 2/2] fix: address CI failure - rustfmt formatting in spawn_routing and run_test --- crates/loopal-agent-hub/src/dispatch/spawn_routing.rs | 3 ++- crates/loopal-runtime/tests/agent_loop/run_test.rs | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs b/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs index c709c0d9..39b7c22a 100644 --- a/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs +++ b/crates/loopal-agent-hub/src/dispatch/spawn_routing.rs @@ -41,7 +41,8 @@ pub async fn handle_spawn_agent( .as_ref() .map(|u| u.hub_name().to_string()); if !is_self_target(own_hub.as_deref(), target) { - return super::cross_hub_forward::forward_cross_hub_spawn(hub, params, from_agent).await; + return super::cross_hub_forward::forward_cross_hub_spawn(hub, params, from_agent) + .await; } let mut local_params = params; if let Some(obj) = local_params.as_object_mut() { diff --git a/crates/loopal-runtime/tests/agent_loop/run_test.rs b/crates/loopal-runtime/tests/agent_loop/run_test.rs index 8f5ae28e..0a8ea6f3 100644 --- a/crates/loopal-runtime/tests/agent_loop/run_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/run_test.rs @@ -142,7 +142,9 @@ async fn test_prompt_driven_error_exits_cleanly() { #[tokio::test] async fn test_ephemeral_unresolved_model_propagates_error_to_result() { let calls = vec![vec![ - Ok(StreamChunk::Text { text: "unused".into() }), + Ok(StreamChunk::Text { + text: "unused".into(), + }), Ok(StreamChunk::Done { stop_reason: StopReason::EndTurn, }),