diff --git a/CHANGELOG.md b/CHANGELOG.md index cf89204f6..1c8909e77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - [BREAKING] Renamed `GetNoteError` endpoint to `GetNetworkNoteStatus` and extended it to return the full lifecycle status of a network note (`Pending`, `Processed`, `Discarded`, `Committed`) instead of only error information. Consumed notes are now retained in the database after block commit instead of being deleted ([#1892](https://github.com/0xMiden/node/pull/1892)). - Extended `ValidatorStatus` proto response with `chain_tip`, `validated_transactions_count`, and `signed_blocks_count`; added Validator card to the network monitor dashboard ([#1900](https://github.com/0xMiden/node/pull/1900)). - Updated the RocksDB SMT backend to use budgeted deserialization for bytes read from disk, ported from `0xMiden/crypto` PR [#846](https://github.com/0xMiden/crypto/pull/846) ([#1923](https://github.com/0xMiden/node/pull/1923)). +- [BREAKING] Network monitor `/status` endpoint now emits a single `RemoteProverStatus` entry per remote prover that bundles status, workers, and test results, instead of separate entries ([#1980](https://github.com/0xMiden/node/pull/1980)). ## v0.14.9 (2026-04-21) diff --git a/bin/network-monitor/assets/index.css b/bin/network-monitor/assets/index.css index 750ad9d77..b0cbc2603 100644 --- a/bin/network-monitor/assets/index.css +++ b/bin/network-monitor/assets/index.css @@ -343,6 +343,10 @@ body { background: rgba(0, 0, 0, 0.05); border-radius: 4px; font-size: 12px; + display: flex; + align-items: center; + gap: 8px; + flex-wrap: wrap; } .worker-name { diff --git a/bin/network-monitor/assets/index.js b/bin/network-monitor/assets/index.js index a7a34551d..842a9567c 100644 --- a/bin/network-monitor/assets/index.js +++ b/bin/network-monitor/assets/index.js @@ -172,10 +172,11 @@ function collectGrpcWebEndpoints() { }); } // Remote Prover service - if (service.details.RemoteProverStatus && service.details.RemoteProverStatus.url) { + const proverUrl = service.details.RemoteProverStatus?.status?.url; + if (proverUrl) { endpoints.push({ - serviceKey: service.details.RemoteProverStatus.url, - baseUrl: service.details.RemoteProverStatus.url, + serviceKey: proverUrl, + baseUrl: proverUrl, grpcPath: '/remote_prover.ProxyStatusApi/Status', }); } @@ -303,55 +304,6 @@ async function fetchStatus() { } } -// Merge Remote Prover status and test entries into a single card per prover. -function mergeProverStatusAndTests(services) { - const testsByName = new Map(); - const merged = []; - const usedTests = new Set(); - - services.forEach(service => { - if (service.details && service.details.RemoteProverTest) { - testsByName.set(service.name, service); - } - }); - - services.forEach(service => { - if (service.details && service.details.RemoteProverStatus) { - const test = testsByName.get(service.name); - if (test) { - usedTests.add(service.name); - } - merged.push({ - ...service, - testDetails: test?.details?.RemoteProverTest ?? null, - testStatus: test?.status ?? null, - testError: test?.error ?? null - }); - } else if (!(service.details && service.details.RemoteProverTest)) { - // Non-prover entries pass through unchanged - merged.push(service); - } - }); - - // Add orphaned tests (in case a test arrives before a status) - testsByName.forEach((test, name) => { - if (!usedTests.has(name)) { - merged.push({ - name, - status: test.status, - last_checked: test.last_checked, - error: test.error, - details: null, - testDetails: test.details.RemoteProverTest, - testStatus: test.status, - testError: test.error - }); - } - }); - - return merged; -} - function updateDisplay() { if (!statusData) return; @@ -364,29 +316,28 @@ function updateDisplay() { const lastUpdateTime = new Date(statusData.last_updated * 1000); lastUpdated.textContent = lastUpdateTime.toLocaleString(); - // Group remote prover status + test into single cards - const processedServices = mergeProverStatusAndTests(statusData.services); - const rpcService = processedServices.find(s => s.details && s.details.RpcStatus); + const services = statusData.services; + const rpcService = services.find(s => s.details && s.details.RpcStatus); const rpcChainTip = rpcService?.details?.RpcStatus?.store_status?.chain_tip ?? rpcService?.details?.RpcStatus?.block_producer_status?.chain_tip ?? null; - // Compute effective health for a service, considering all signals for remote provers. + // Compute effective health const isServiceHealthy = (s) => { - if (s.details && s.details.RemoteProverStatus) { - const statusOk = s.status === 'Healthy'; - const testOk = s.testStatus == null || s.testStatus === 'Healthy'; - const probeResult = grpcWebProbeResults.get(s.details.RemoteProverStatus.url); - const probeOk = !probeResult || probeResult.ok; - return statusOk && testOk && probeOk; + if (s.status !== 'Healthy') return false; + const probeUrl = s.details?.RemoteProverStatus?.status?.url + ?? s.details?.RpcStatus?.url; + if (probeUrl) { + const probe = grpcWebProbeResults.get(probeUrl); + if (probe && !probe.ok) return false; } - return s.status === 'Healthy'; + return true; }; // Count healthy vs unhealthy services - const healthyServices = processedServices.filter(isServiceHealthy).length; - const totalServices = processedServices.length; + const healthyServices = services.filter(isServiceHealthy).length; + const totalServices = services.length; const allHealthy = healthyServices === totalServices; // Update footer @@ -404,7 +355,7 @@ function updateDisplay() { } // Generate status cards - const serviceCardsHtml = processedServices.map(service => { + const serviceCardsHtml = services.map(service => { const isHealthy = isServiceHealthy(service); const statusColor = isHealthy ? COLOR_HEALTHY : COLOR_UNHEALTHY; const statusIcon = isHealthy ? '✓' : '✗'; @@ -499,24 +450,32 @@ function updateDisplay() { ` : ''} ` : ''} - ${details.RemoteProverStatus ? ` -
URL: ${details.RemoteProverStatus.url}${renderCopyButton(details.RemoteProverStatus.url, 'URL')}
-
Version: ${details.RemoteProverStatus.version}
-
Proof Type: ${details.RemoteProverStatus.supported_proof_type}
- ${renderGrpcWebProbeSection(details.RemoteProverStatus.url)} - ${details.RemoteProverStatus.workers && details.RemoteProverStatus.workers.length > 0 ? ` -
- Workers (${details.RemoteProverStatus.workers.length}): - ${details.RemoteProverStatus.workers.map(worker => ` -
- ${worker.name} - - ${worker.version} - - ${worker.status} -
- `).join('')} -
- ` : ''} - ` : ''} + ${details.RemoteProverStatus ? (() => { + const p = details.RemoteProverStatus.status; + return ` +
URL: ${p.url}${renderCopyButton(p.url, 'URL')}
+
Version: ${p.version}
+
Proof Type: ${p.supported_proof_type}
+ ${renderGrpcWebProbeSection(p.url)} + ${p.workers && p.workers.length > 0 ? ` +
+ Workers (${p.workers.length}): + ${p.workers.map(worker => { + const nameDisplay = worker.name.length > 20 + ? `${worker.name.substring(0, 20)}...${renderCopyButton(worker.name, 'worker name')}` + : worker.name; + return ` +
+ ${nameDisplay} + ${worker.version} + ${worker.status} +
+ `; + }).join('')} +
+ ` : ''} + `; + })() : ''} ${details.FaucetTest ? `
Faucet: @@ -683,25 +642,29 @@ function updateDisplay() {
` : ''} - ${service.testDetails ? ` -
- Proof Generation Testing (${service.testDetails.proof_type}): -
-
- Success Rate: - ${formatSuccessRate(service.testDetails.success_count, service.testDetails.failure_count)} -
-
- Last Response Time: - ${service.testDetails.test_duration_ms}ms -
-
- Last Proof Size: - ${(service.testDetails.proof_size_bytes / 1024).toFixed(2)} KB + ${details.RemoteProverStatus?.test ? (() => { + const t = details.RemoteProverStatus.test; + const ts = details.RemoteProverStatus.test_status; + return ` +
+ Proof Generation Testing (${t.proof_type}): +
+
+ Success Rate: + ${formatSuccessRate(t.success_count, t.failure_count)} +
+
+ Last Response Time: + ${t.test_duration_ms}ms +
+
+ Last Proof Size: + ${(t.proof_size_bytes / 1024).toFixed(2)} KB +
-
- ` : ''} + `; + })() : ''}
`; } @@ -864,4 +827,3 @@ window.addEventListener('beforeunload', () => { clearInterval(grpcWebProbeInterval); } }); - diff --git a/bin/network-monitor/src/frontend.rs b/bin/network-monitor/src/frontend.rs index 2aad55b1f..320a2efe9 100644 --- a/bin/network-monitor/src/frontend.rs +++ b/bin/network-monitor/src/frontend.rs @@ -12,7 +12,7 @@ use tracing::{info, instrument}; use crate::COMPONENT; use crate::config::MonitorConfig; -use crate::status::{NetworkStatus, ServiceStatus}; +use crate::status::{NetworkStatus, RemoteProverDetails, ServiceDetails, ServiceStatus, Status}; // SERVER STATE // ================================================================================================ @@ -86,10 +86,9 @@ async fn get_status( services.push(faucet_rx.borrow().clone()); } - // Collect all remote prover statuses + // Collect all remote prover statuses, merging status + test into a single entry per prover. for (prover_status_rx, prover_test_rx) in &server_state.provers { - services.push(prover_status_rx.borrow().clone()); - services.push(prover_test_rx.borrow().clone()); + services.push(merge_prover(&prover_status_rx.borrow(), &prover_test_rx.borrow())); } // Collect explorer status if available @@ -150,3 +149,50 @@ async fn serve_favicon() -> Response { ) .into_response() } + +/// Merges the status and test receivers for a single remote prover into one `ServiceStatus`. +/// +/// The combined status is `Unhealthy` if either the status check or the test failed, `Unknown` +/// if the status checker has not yet seen the prover, and `Healthy` otherwise. The test result +/// is only attached when the test task has produced an actual `RemoteProverTest` result (before +/// the first test completes, the test channel holds the initial prover status and should not be +/// surfaced as a test). +fn merge_prover(status: &ServiceStatus, test: &ServiceStatus) -> ServiceStatus { + // Extract prover status details, or pass through the raw status if the prover is down + // (details will be `ServiceDetails::Error` in that case). + let status_details = match &status.details { + ServiceDetails::ProverStatusCheck(d) => d.clone(), + _ => return status.clone(), + }; + + // Only attach test details once the test task has produced a real result. + let (test_details, test_status, test_error) = match &test.details { + ServiceDetails::ProverTestResult(d) => { + (Some(d.clone()), Some(test.status.clone()), test.error.clone()) + }, + _ => (None, None, None), + }; + + let details = ServiceDetails::RemoteProverStatus(RemoteProverDetails { + status: status_details, + test: test_details, + test_status: test_status.clone(), + test_error: test_error.clone(), + }); + + let name = &status.name; + let base = match (&status.status, &test_status) { + (Status::Unhealthy, _) | (_, Some(Status::Unhealthy)) => { + let error = status + .error + .clone() + .or(test_error) + .unwrap_or_else(|| "prover is unhealthy".to_string()); + ServiceStatus::unhealthy(name, error, details) + }, + (Status::Unknown, _) => ServiceStatus::unknown(name, details), + _ => ServiceStatus::healthy(name, details), + }; + + base.with_last_checked(status.last_checked) +} diff --git a/bin/network-monitor/src/monitor/tasks.rs b/bin/network-monitor/src/monitor/tasks.rs index 46f59d351..1db8e6525 100644 --- a/bin/network-monitor/src/monitor/tasks.rs +++ b/bin/network-monitor/src/monitor/tasks.rs @@ -274,7 +274,7 @@ impl Tasks { // Extract proof_type directly from the service status // If the prover is not available during startup, skip spawning test tasks - let proof_type = if let ServiceDetails::RemoteProverStatus(details) = + let proof_type = if let ServiceDetails::ProverStatusCheck(details) = &initial_prover_status.details { Some(details.supported_proof_type.clone()) diff --git a/bin/network-monitor/src/remote_prover.rs b/bin/network-monitor/src/remote_prover.rs index e8bbcb553..27d0a9970 100644 --- a/bin/network-monitor/src/remote_prover.rs +++ b/bin/network-monitor/src/remote_prover.rs @@ -181,7 +181,7 @@ async fn test_remote_prover( ServiceStatus::healthy( name, - ServiceDetails::RemoteProverTest(ProverTestDetails { + ServiceDetails::ProverTestResult(ProverTestDetails { test_duration_ms: duration.as_millis() as u64, proof_size_bytes: response_inner.payload.len(), success_count: *success_count, @@ -196,7 +196,7 @@ async fn test_remote_prover( ServiceStatus::unhealthy( name, tonic_status_to_json(&e), - ServiceDetails::RemoteProverTest(ProverTestDetails { + ServiceDetails::ProverTestResult(ProverTestDetails { test_duration_ms: 0, proof_size_bytes: 0, success_count: *success_count, diff --git a/bin/network-monitor/src/service_status.rs b/bin/network-monitor/src/service_status.rs index 5565c0b71..689b976bf 100644 --- a/bin/network-monitor/src/service_status.rs +++ b/bin/network-monitor/src/service_status.rs @@ -110,6 +110,16 @@ impl ServiceStatus { details: ServiceDetails::Error, } } + + /// Overrides the `last_checked` timestamp on an existing status. + /// + /// Useful when composing a new status from pre-existing data where we want to preserve the + /// original check timestamp instead of using the moment of construction. + #[must_use] + pub fn with_last_checked(mut self, ts: u64) -> Self { + self.last_checked = ts; + self + } } // SERVICE DETAILS @@ -119,8 +129,12 @@ impl ServiceStatus { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ServiceDetails { RpcStatus(RpcStatusDetails), - RemoteProverStatus(RemoteProverStatusDetails), - RemoteProverTest(ProverTestDetails), + /// Remote prover status combined with its most recent test result. + RemoteProverStatus(RemoteProverDetails), + /// Internal: raw output of a remote prover status check task. + ProverStatusCheck(RemoteProverStatusDetails), + /// Internal: raw output of a remote prover test task. + ProverTestResult(ProverTestDetails), FaucetTest(FaucetTestDetails), NtxIncrement(IncrementDetails), NtxTracking(CounterTrackingDetails), @@ -130,6 +144,15 @@ pub enum ServiceDetails { Error, } +/// Remote prover status combined with its most recent test result. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteProverDetails { + pub status: RemoteProverStatusDetails, + pub test: Option, + pub test_status: Option, + pub test_error: Option, +} + /// Details of the increment service. #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct IncrementDetails { diff --git a/bin/network-monitor/src/status.rs b/bin/network-monitor/src/status.rs index 520792f9a..d0a6724d8 100644 --- a/bin/network-monitor/src/status.rs +++ b/bin/network-monitor/src/status.rs @@ -295,7 +295,7 @@ pub(crate) async fn check_remote_prover_status( .filter(|w| w.status != Status::Healthy) .map(|w| w.name.clone()) .collect(); - let details = ServiceDetails::RemoteProverStatus(remote_prover_details); + let details = ServiceDetails::ProverStatusCheck(remote_prover_details); if no_workers { ServiceStatus::unknown(display_name, details)