Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use prom server registry for load generator & adjust buckets #4581

Merged
merged 2 commits into from
Sep 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion crates/sui-benchmark/src/drivers/bench_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ pub struct BenchMetrics {
pub latency_s: HistogramVec,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl BenchMetrics {
fn new(registry: &Registry) -> Self {
Expand Down
4 changes: 3 additions & 1 deletion crates/sui-core/src/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ const POSITIVE_INT_BUCKETS: &[f64] = &[
1., 2., 5., 10., 20., 50., 100., 200., 500., 1000., 2000., 5000., 10000., 20000., 50000.,
];

const LATENCY_SEC_BUCKETS: &[f64] = &[0.001, 0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl AuthorityMetrics {
pub fn new(registry: &prometheus::Registry) -> AuthorityMetrics {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::{

use parking_lot::Mutex;
use prometheus::{
register_histogram_with_registry, register_int_counter_with_registry,
linear_buckets, register_histogram_with_registry, register_int_counter_with_registry,
register_int_gauge_with_registry, Histogram, IntCounter, IntGauge, Registry,
};
use sui_types::{
Expand Down Expand Up @@ -122,6 +122,9 @@ impl CheckpointMetrics {
checkpoint_frequency: register_histogram_with_registry!(
"checkpoint_frequency",
"Number of seconds elapsed between two consecutive checkpoint certificates",
// start from 1 min, increase by 3 min, so [1, 4, ... 58]
// safe to unwrap because params are good
linear_buckets(60., 180., 20).unwrap(),
registry,
)
.unwrap(),
Expand Down
9 changes: 9 additions & 0 deletions crates/sui-core/src/authority_active/gossip/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ pub struct GossipMetrics {
pub follower_stream_duration: Histogram,
}

const WAIT_FOR_FINALITY_LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];
const FOLLOWER_STREAM_DURATION_SEC_BUCKETS: &[f64] = &[
0.1, 1., 5., 10., 20., 30., 40., 50., 60., 90., 120., 180., 240., 300.,
];

impl GossipMetrics {
pub fn new(registry: &Registry) -> Self {
Self {
Expand Down Expand Up @@ -87,6 +94,7 @@ impl GossipMetrics {
wait_for_finality_latency_sec: register_histogram_with_registry!(
"gossip_wait_for_finality_latency_sec",
"Latency histogram for gossip/node sync process to wait for txs to become final, in seconds",
WAIT_FOR_FINALITY_LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand All @@ -105,6 +113,7 @@ impl GossipMetrics {
follower_stream_duration: register_histogram_with_registry!(
"follower_stream_duration",
"Latency histogram of the duration of the follower streams to peers, in seconds",
FOLLOWER_STREAM_DURATION_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down
76 changes: 28 additions & 48 deletions crates/sui-core/src/authority_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,107 +134,82 @@ impl AuthorityAPI for NetworkAuthorityClient {
&self,
transaction: Transaction,
) -> Result<TransactionInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_transaction_request_latency
.start_timer();

let response = self
.client()
self.client()
.transaction(transaction)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

/// Execute a certificate.
async fn handle_certificate(
&self,
certificate: CertifiedTransaction,
) -> Result<TransactionInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_certificate_request_latency
.start_timer();

let response = self
.client()
self.client()
.handle_certificate(certificate)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

async fn handle_account_info_request(
&self,
request: AccountInfoRequest,
) -> Result<AccountInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_account_info_request_latency
.start_timer();

let response = self
.client()
self.client()
.account_info(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

async fn handle_object_info_request(
&self,
request: ObjectInfoRequest,
) -> Result<ObjectInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_object_info_request_latency
.start_timer();

let response = self
.client()
self.client()
.object_info(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

/// Handle Object information requests for this account.
async fn handle_transaction_info_request(
&self,
request: TransactionInfoRequest,
) -> Result<TransactionInfoResponse, SuiError> {
let timer = self
let _timer = self
.metrics
.handle_transaction_info_request_latency
.start_timer();

let response = self
.client()
self.client()
.transaction_info(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

/// Handle Batch information requests for this authority.
Expand All @@ -257,18 +232,13 @@ impl AuthorityAPI for NetworkAuthorityClient {
&self,
request: CheckpointRequest,
) -> Result<CheckpointResponse, SuiError> {
let timer = self.metrics.handle_checkpoint_request_latency.start_timer();
let _timer = self.metrics.handle_checkpoint_request_latency.start_timer();

let response = self
.client()
self.client()
.checkpoint(request)
.await
.map(tonic::Response::into_inner)
.map_err(Into::into);

timer.stop_and_record();

response
.map_err(Into::into)
}

async fn handle_epoch(&self, request: EpochRequest) -> Result<EpochResponse, SuiError> {
Expand Down Expand Up @@ -488,42 +458,52 @@ pub struct NetworkAuthorityClientMetrics {
pub handle_checkpoint_request_latency: Histogram,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl NetworkAuthorityClientMetrics {
pub fn new(registry: &prometheus::Registry) -> Self {
Self {
handle_transaction_request_latency: register_histogram_with_registry!(
"handle_transaction_request_latency",
"Latency of handle transaction request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_certificate_request_latency: register_histogram_with_registry!(
"handle_certificate_request_latency",
"Latency of handle certificate request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_account_info_request_latency: register_histogram_with_registry!(
"handle_account_info_request_latency",
"Latency of handle account info request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_object_info_request_latency: register_histogram_with_registry!(
"handle_object_info_request_latency",
"Latency of handle object info request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_transaction_info_request_latency: register_histogram_with_registry!(
"handle_transaction_info_request_latency",
"Latency of handle transaction info request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
handle_checkpoint_request_latency: register_histogram_with_registry!(
"handle_checkpoint_request_latency",
"Latency of handle checkpoint request",
LATENCY_SEC_BUCKETS.to_vec(),
registry
)
.unwrap(),
Expand Down
4 changes: 3 additions & 1 deletion crates/sui-core/src/authority_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ pub struct ValidatorServiceMetrics {
pub handle_certificate_non_consensus_latency: Histogram,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[0.001, 0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl ValidatorServiceMetrics {
pub fn new(registry: &Registry) -> Self {
Expand Down
4 changes: 3 additions & 1 deletion crates/sui-core/src/safe_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pub struct SafeClientMetrics {
latency: HistogramVec,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[0.001, 0.01, 0.1, 1., 2., 3., 5., 10., 20., 30., 60., 180.];
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl SafeClientMetrics {
pub fn new(registry: &prometheus::Registry) -> Self {
Expand Down
4 changes: 4 additions & 0 deletions crates/sui-faucet/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ pub struct FaucetMetrics {
pub(crate) current_requests_in_flight: IntGauge,
pub(crate) process_latency: Histogram,
}
const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl FaucetMetrics {
pub fn new(registry: &Registry) -> Self {
Expand All @@ -40,6 +43,7 @@ impl FaucetMetrics {
process_latency: register_histogram_with_registry!(
"process_latency",
"Latency of processing a Faucet request",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down
5 changes: 5 additions & 0 deletions crates/sui-json-rpc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ pub struct JsonRpcMetrics {
errors_by_route: IntCounterVec,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[
0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
];

impl JsonRpcMetrics {
pub fn new(registry: &prometheus::Registry) -> Self {
Self {
Expand All @@ -211,6 +215,7 @@ impl JsonRpcMetrics {
"req_latency_by_route",
"Latency of a request by route",
&["route"],
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down
7 changes: 7 additions & 0 deletions crates/sui-quorum-driver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ pub struct QuorumDriverMetrics {
pub(crate) current_requests_in_flight: IntGauge,
}

const LATENCY_SEC_BUCKETS: &[f64] = &[
0.01, 0.05, 0.1, 0.25, 0.5, 1., 2., 4., 6., 8., 10., 20., 30., 60., 90.,
];

impl QuorumDriverMetrics {
pub fn new(registry: &Registry) -> Self {
Self {
Expand Down Expand Up @@ -65,18 +69,21 @@ impl QuorumDriverMetrics {
latency_sec_immediate_return: register_histogram_with_registry!(
"quorum_driver_latency_sec_immediate_return",
"Latency of processing an immdediate_return execution request, in sec",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
latency_sec_wait_for_tx_cert: register_histogram_with_registry!(
"quorum_driver_latency_sec_wait_for_tx_cert",
"Latency of processing an wait_for_tx_cert execution request, in sec",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
latency_sec_wait_for_effects_cert: register_histogram_with_registry!(
"quorum_driver_latency_sec_wait_for_effects_cert",
"Latency of processing an wait_for_effects_cert execution request, in sec",
LATENCY_SEC_BUCKETS.to_vec(),
registry,
)
.unwrap(),
Expand Down