From cfd73d3431b5c5f175aff419a5ce97ca35c1f4b0 Mon Sep 17 00:00:00 2001 From: Lu Zhang <8418040+longbowlu@users.noreply.github.com> Date: Wed, 7 Sep 2022 15:55:21 -0700 Subject: [PATCH] enable span tracing latency via prom (#4512) --- Cargo.lock | 2 ++ .../sui-core/src/node_sync/node_follower.rs | 9 +++--- crates/sui-core/src/test_utils.rs | 22 --------------- crates/sui-node/src/lib.rs | 11 +------- crates/sui-node/src/main.rs | 19 +++++++++---- crates/sui-swarm/Cargo.toml | 1 + crates/sui-swarm/src/memory/node.rs | 4 +-- crates/sui/Cargo.toml | 1 + crates/sui/tests/full_node_tests.rs | 28 ++++++++++--------- crates/sui/tests/reconfiguration_tests.rs | 3 +- crates/test-utils/src/authority.rs | 8 ++++-- 11 files changed, 46 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 15a7dd7248d6f..a74aad0243b6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6966,6 +6966,7 @@ dependencies = [ "move-prover-boogie-backend", "move-unit-test", "multiaddr", + "prometheus", "rand 0.8.5", "rocksdb", "rustyline", @@ -7573,6 +7574,7 @@ dependencies = [ "anyhow", "futures", "mysten-network 0.1.0 (git+https://github.com/MystenLabs/mysten-infra?rev=d96230a9272c322a7eefac49708aadfff1eed77e)", + "prometheus", "rand 0.8.5", "sui-config", "sui-node", diff --git a/crates/sui-core/src/node_sync/node_follower.rs b/crates/sui-core/src/node_sync/node_follower.rs index 7054c4bec83bc..d8eb429ecedfa 100644 --- a/crates/sui-core/src/node_sync/node_follower.rs +++ b/crates/sui-core/src/node_sync/node_follower.rs @@ -309,10 +309,8 @@ where mod test { use super::*; use crate::{ - authority_active::gossip::GossipMetrics, - authority_client::NetworkAuthorityClient, - node_sync::SyncStatus, - test_utils::{spawn_test_authorities, test_authority_aggregator}, + authority_active::gossip::GossipMetrics, authority_client::NetworkAuthorityClient, + node_sync::SyncStatus, test_utils::test_authority_aggregator, }; use std::sync::{Arc, Mutex}; use sui_types::{ @@ -322,7 +320,8 @@ mod test { object::Object, }; use test_utils::{ - authority::test_and_configure_authority_configs, messages::make_transfer_sui_transaction, + authority::{spawn_test_authorities, test_and_configure_authority_configs}, + messages::make_transfer_sui_transaction, }; use tokio::{sync::broadcast, time::Instant}; diff --git a/crates/sui-core/src/test_utils.rs b/crates/sui-core/src/test_utils.rs index b0738c7942bbd..eeacaed4ed245 100644 --- a/crates/sui-core/src/test_utils.rs +++ b/crates/sui-core/src/test_utils.rs @@ -23,32 +23,10 @@ use sui_types::{ object::Object, }; -// Can't import SuiNode directly from sui_node - circular dependency -use test_utils::authority::{start_node, SuiNode}; - use futures::StreamExt; use tokio::time::sleep; use tracing::info; -/// Spawn all authorities in the test committee into a separate tokio task. -pub async fn spawn_test_authorities(objects: I, config: &NetworkConfig) -> Vec -where - I: IntoIterator + Clone, -{ - let mut handles = Vec::new(); - for validator in config.validator_configs() { - let node = start_node(validator).await; - let state = node.state(); - - for o in objects.clone() { - state.insert_genesis_object(o).await - } - - handles.push(node); - } - handles -} - /// Create a test authority aggregator. /// (duplicated from test-utils/src/authority.rs - that function can't be used /// in sui-core because of type name conflicts (sui_core::safe_client::SafeClient vs diff --git a/crates/sui-node/src/lib.rs b/crates/sui-node/src/lib.rs index 0b543b545e9cb..9ab552ea0a096 100644 --- a/crates/sui-node/src/lib.rs +++ b/crates/sui-node/src/lib.rs @@ -72,20 +72,11 @@ pub struct SuiNode { } impl SuiNode { - pub async fn start(config: &NodeConfig) -> Result { + pub async fn start(config: &NodeConfig, prometheus_registry: Registry) -> Result { // TODO: maybe have a config enum that takes care of this for us. let is_validator = config.consensus_config().is_some(); let is_full_node = !is_validator; - // - // Start metrics server - // - info!( - "Starting Prometheus HTTP endpoint at {}", - config.metrics_address - ); - let prometheus_registry = metrics::start_prometheus_server(config.metrics_address); - info!(node =? config.protocol_public_key(), "Initializing sui-node listening on {}", config.network_address ); diff --git a/crates/sui-node/src/main.rs b/crates/sui-node/src/main.rs index 30d7bbea21f50..00dde220bfd97 100644 --- a/crates/sui-node/src/main.rs +++ b/crates/sui-node/src/main.rs @@ -7,10 +7,11 @@ use multiaddr::Multiaddr; use std::path::PathBuf; use std::time::Duration; use sui_config::{Config, NodeConfig}; +use sui_node::metrics; use sui_telemetry::send_telemetry_event; use tokio::task; use tokio::time::sleep; -use tracing::warn; +use tracing::{info, warn}; #[derive(Parser)] #[clap(rename_all = "kebab-case", version)] @@ -43,16 +44,22 @@ const PROF_DUMP: &[u8] = b"prof.dump\0"; #[tokio::main] async fn main() -> Result<()> { + let args = Args::parse(); + let mut config = NodeConfig::load(&args.config_path)?; + + let prometheus_registry = metrics::start_prometheus_server(config.metrics_address); + info!( + "Started Prometheus HTTP endpoint at {}", + config.metrics_address + ); + // Initialize logging let (_guard, filter_handle) = telemetry_subscribers::TelemetryConfig::new(env!("CARGO_BIN_NAME")) .with_env() + .with_prom_registry(&prometheus_registry) .init(); - let args = Args::parse(); - - let mut config = NodeConfig::load(&args.config_path)?; - if let Some(listen_address) = args.listen_address { config.network_address = listen_address; } @@ -124,7 +131,7 @@ async fn main() -> Result<()> { sui_node::admin::start_admin_server(config.admin_interface_port, filter_handle); - let node = sui_node::SuiNode::start(&config).await?; + let node = sui_node::SuiNode::start(&config, prometheus_registry).await?; node.wait().await?; Ok(()) diff --git a/crates/sui-swarm/Cargo.toml b/crates/sui-swarm/Cargo.toml index 63ae9b38020ad..f5c42f9bc7c09 100644 --- a/crates/sui-swarm/Cargo.toml +++ b/crates/sui-swarm/Cargo.toml @@ -15,6 +15,7 @@ futures = "0.3.23" tempfile = "3.3.0" tonic-health = "0.6.0" tap = "1.0.1" +prometheus = "0.13.1" sui-config = { path = "../sui-config" } sui-node = { path = "../sui-node" } diff --git a/crates/sui-swarm/src/memory/node.rs b/crates/sui-swarm/src/memory/node.rs index 8bebe17f8919f..b797527193485 100644 --- a/crates/sui-swarm/src/memory/node.rs +++ b/crates/sui-swarm/src/memory/node.rs @@ -4,6 +4,7 @@ use anyhow::anyhow; use anyhow::Result; use futures::FutureExt; +use prometheus::Registry; use std::thread; use sui_config::NodeConfig; use sui_node::SuiNode; @@ -177,9 +178,8 @@ impl Container { } }; let runtime = builder.enable_all().build().unwrap(); - runtime.block_on(async move { - let _server = SuiNode::start(&config).await.unwrap(); + let _server = SuiNode::start(&config, Registry::new()).await.unwrap(); // Notify that we've successfully started the node let _ = startup_sender.send(()); // run until canceled diff --git a/crates/sui/Cargo.toml b/crates/sui/Cargo.toml index e0b79e606bc15..c437466a7a229 100644 --- a/crates/sui/Cargo.toml +++ b/crates/sui/Cargo.toml @@ -60,6 +60,7 @@ jemalloc-ctl = "^0.5" [dev-dependencies] tempfile = "3.3.0" futures = "0.3.23" +prometheus = "0.13.1" typed-store = { git = "https://github.com/MystenLabs/mysten-infra", rev = "d96230a9272c322a7eefac49708aadfff1eed77e"} typed-store-macros = { git = "https://github.com/MystenLabs/mysten-infra", rev = "d96230a9272c322a7eefac49708aadfff1eed77e"} diff --git a/crates/sui/tests/full_node_tests.rs b/crates/sui/tests/full_node_tests.rs index 6d220ff702631..465bee5c25fef 100644 --- a/crates/sui/tests/full_node_tests.rs +++ b/crates/sui/tests/full_node_tests.rs @@ -1,10 +1,6 @@ // Copyright (c) 2022, Mysten Labs, Inc. // SPDX-License-Identifier: Apache-2.0 -use std::net::SocketAddr; -use std::str::FromStr; -use std::{collections::BTreeMap, sync::Arc}; - use futures::future; use jsonrpsee::core::client::{Client, ClientT, Subscription, SubscriptionClientT}; use jsonrpsee::http_client::{HttpClient, HttpClientBuilder}; @@ -13,6 +9,10 @@ use jsonrpsee::ws_client::WsClientBuilder; use move_core_types::account_address::AccountAddress; use move_core_types::identifier::Identifier; use move_core_types::language_storage::ModuleId; +use prometheus::Registry; +use std::net::SocketAddr; +use std::str::FromStr; +use std::{collections::BTreeMap, sync::Arc}; use sui_types::base_types::SequenceNumber; use sui_types::event::TransferType; use sui_types::object::Owner; @@ -115,7 +115,7 @@ async fn test_full_node_follows_txes() -> Result<(), anyhow::Error> { let (swarm, mut context, _) = setup_network_and_wallet().await?; let config = swarm.config().generate_fullnode_config(); - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; let (transferred_object, _, receiver, digest) = transfer_coin(&mut context).await?; wait_for_tx(digest, node.state().clone()).await; @@ -145,7 +145,7 @@ async fn test_full_node_shared_objects() -> Result<(), anyhow::Error> { let (swarm, context, _) = setup_network_and_wallet().await?; let config = swarm.config().generate_fullnode_config(); - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; let sender = context.keystore.addresses().get(0).cloned().unwrap(); @@ -166,7 +166,7 @@ async fn test_full_node_move_function_index() -> Result<(), anyhow::Error> { let (swarm, context, _) = setup_network_and_wallet().await?; let config = swarm.config().generate_fullnode_config(); - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; let sender = context.keystore.addresses().get(0).cloned().unwrap(); let (package_ref, counter_id) = publish_basics_package_and_make_counter(&context, sender).await; let effects = increment_counter(&context, sender, None, package_ref, counter_id).await; @@ -213,7 +213,7 @@ async fn test_full_node_indexes() -> Result<(), anyhow::Error> { let (swarm, mut context, _) = setup_network_and_wallet().await?; let config = swarm.config().generate_fullnode_config(); - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; let (transferred_object, sender, receiver, digest) = transfer_coin(&mut context).await?; @@ -358,7 +358,7 @@ async fn test_full_node_cold_sync() -> Result<(), anyhow::Error> { sleep(Duration::from_millis(1000)).await; let config = swarm.config().generate_fullnode_config(); - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; wait_for_tx(digest, node.state().clone()).await; @@ -380,7 +380,7 @@ async fn test_full_node_sync_flood() -> Result<(), anyhow::Error> { let (swarm, context, _) = setup_network_and_wallet().await?; let config = swarm.config().generate_fullnode_config(); - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; let mut futures = Vec::new(); @@ -476,7 +476,7 @@ async fn set_up_subscription(swarm: &Swarm) -> Result<(SuiNode, Client), anyhow: let mut config = swarm.config().generate_fullnode_config(); config.websocket_address = Some(ws_addr); - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; let client = WsClientBuilder::default() .build(&format!("ws://{}", ws_server_url)) @@ -499,7 +499,7 @@ async fn set_up_jsonrpc( .generate_fullnode_config_with_custom_db_path(fullnode_db_path, false); config.json_rpc_address = jsonrpc_addr; - let node = SuiNode::start(&config).await?; + let node = SuiNode::start(&config, Registry::new()).await?; let client = HttpClientBuilder::default().build(&format!("http://{}", jsonrpc_server_url))?; Ok((node, client)) @@ -884,7 +884,9 @@ async fn test_full_node_quorum_driver_basic() -> Result<(), anyhow::Error> { async fn test_validator_node_has_no_quorum_driver() { let configs = test_and_configure_authority_configs(1); let validator_config = &configs.validator_configs()[0]; - let node = SuiNode::start(validator_config).await.unwrap(); + let node = SuiNode::start(validator_config, Registry::new()) + .await + .unwrap(); assert!(node.quorum_driver().is_none()); assert!(node.subscribe_to_quorum_driver_effects().is_err()); } diff --git a/crates/sui/tests/reconfiguration_tests.rs b/crates/sui/tests/reconfiguration_tests.rs index 7e23f2fb92fa7..ae2a61a0bd811 100644 --- a/crates/sui/tests/reconfiguration_tests.rs +++ b/crates/sui/tests/reconfiguration_tests.rs @@ -3,6 +3,7 @@ use futures::future::join_all; use multiaddr::Multiaddr; +use prometheus::Registry; use sui_config::ValidatorInfo; use sui_core::authority_active::checkpoint_driver::{ checkpoint_process_step, CheckpointProcessControl, @@ -41,7 +42,7 @@ async fn reconfig_end_to_end_tests() { let mut states = Vec::new(); let mut nodes = Vec::new(); for validator in configs.validator_configs() { - let node = SuiNode::start(validator).await.unwrap(); + let node = SuiNode::start(validator, Registry::new()).await.unwrap(); let state = node.state(); for gas in gas_objects.clone() { diff --git a/crates/test-utils/src/authority.rs b/crates/test-utils/src/authority.rs index bf71c8a9377fc..b91085747f0f4 100644 --- a/crates/test-utils/src/authority.rs +++ b/crates/test-utils/src/authority.rs @@ -1,6 +1,7 @@ // Copyright (c) 2022, Mysten Labs, Inc. // SPDX-License-Identifier: Apache-2.0 use crate::TEST_COMMITTEE_SIZE; +use prometheus::Registry; use rand::{prelude::StdRng, SeedableRng}; use std::collections::BTreeMap; use std::sync::Arc; @@ -51,8 +52,8 @@ pub fn test_and_configure_authority_configs(committee_size: usize) -> NetworkCon configs } -pub async fn start_node(config: &NodeConfig) -> SuiNode { - SuiNode::start(config).await.unwrap() +pub async fn start_node(config: &NodeConfig, prom_registry: Registry) -> SuiNode { + SuiNode::start(config, prom_registry).await.unwrap() } /// Spawn all authorities in the test committee into a separate tokio task. @@ -62,7 +63,8 @@ where { let mut handles = Vec::new(); for validator in config.validator_configs() { - let node = start_node(validator).await; + let prom_registry = Registry::new(); + let node = start_node(validator, prom_registry).await; let state = node.state(); for o in objects.clone() {