Skip to content

Commit

Permalink
Merge a4110fd into 50e9ca5
Browse files Browse the repository at this point in the history
  • Loading branch information
dapplion committed May 1, 2023
2 parents 50e9ca5 + a4110fd commit b51dd78
Show file tree
Hide file tree
Showing 17 changed files with 380 additions and 361 deletions.
12 changes: 0 additions & 12 deletions packages/beacon-node/src/metrics/metrics/beacon.ts
Expand Up @@ -13,10 +13,6 @@ export function createBeaconMetrics(register: RegistryMetricCreator) {
// From https://github.com/ethereum/beacon-metrics/blob/master/metrics.md
// Interop-metrics

peers: register.gauge({
name: "libp2p_peers",
help: "number of connected peers",
}),
headSlot: register.gauge({
name: "beacon_head_slot",
help: "slot of the head block of the beacon chain",
Expand Down Expand Up @@ -95,14 +91,6 @@ export function createBeaconMetrics(register: RegistryMetricCreator) {
buckets: [1, 2, 3, 5, 7, 10, 20, 30, 50, 100],
}),

reqResp: {
rateLimitErrors: register.gauge<"method">({
name: "beacon_reqresp_rate_limiter_errors_total",
help: "Count rate limiter errors",
labelNames: ["method"],
}),
},

blockProductionTime: register.histogram<"source">({
name: "beacon_block_production_seconds",
help: "Full runtime of block production",
Expand Down
349 changes: 51 additions & 298 deletions packages/beacon-node/src/metrics/metrics/lodestar.ts
Expand Up @@ -32,252 +32,62 @@ export function createLodestarMetrics(
}

return {
// Peers

peersByDirection: register.gauge<"direction">({
name: "lodestar_peers_by_direction_count",
help: "number of peers, labeled by direction",
labelNames: ["direction"],
}),
peersByClient: register.gauge<"client">({
name: "lodestar_peers_by_client_count",
help: "number of peers, labeled by client",
labelNames: ["client"],
}),
peerLongLivedAttnets: register.histogram({
name: "lodestar_peer_long_lived_attnets_count",
help: "Histogram of current count of long lived attnets of connected peers",
buckets: [0, 4, 16, 32, 64],
}),
peerScoreByClient: register.histogram<"client">({
name: "lodestar_app_peer_score",
help: "Current peer score at lodestar app side",
// Min score = -100, max score = 100, disconnect = -20, ban = -50
buckets: [-100, -50, -20, 0, 25],
labelNames: ["client"],
}),
peerConnectionLength: register.histogram({
name: "lodestar_peer_connection_seconds",
help: "Current peer connection length in second",
// Have good resolution on shorter times. After 1 day, don't count any longer
// 5s 20s 1m 3m 10m 30m 1h 6h 24h
buckets: [5, 20, 60, 180, 600, 1200, 3600, 21600, 86400],
}),
peersSync: register.gauge({
name: "lodestar_peers_sync_count",
help: "Current count of peers useful for sync",
}),
peerConnectedEvent: register.gauge<"direction" | "status">({
name: "lodestar_peer_connected_total",
help: "Total number of peer:connected event, labeled by direction",
labelNames: ["direction", "status"],
}),
peerDisconnectedEvent: register.gauge<"direction">({
name: "lodestar_peer_disconnected_total",
help: "Total number of peer:disconnected event, labeled by direction",
labelNames: ["direction"],
}),
peerGoodbyeReceived: register.gauge<"reason">({
name: "lodestar_peer_goodbye_received_total",
help: "Total number of goodbye received, labeled by reason",
labelNames: ["reason"],
}),
peerLongConnectionDisconnect: register.gauge<"reason">({
name: "lodestar_peer_long_connection_disconnect_total",
help: "For peers with long connection, track disconnect reason",
labelNames: ["reason"],
}),
peerGoodbyeSent: register.gauge<"reason">({
name: "lodestar_peer_goodbye_sent_total",
help: "Total number of goodbye sent, labeled by reason",
labelNames: ["reason"],
}),
peersRequestedToConnect: register.gauge({
name: "lodestar_peers_requested_total_to_connect",
help: "Prioritization results total peers count requested to connect",
}),
peersRequestedToDisconnect: register.gauge<"reason">({
name: "lodestar_peers_requested_total_to_disconnect",
help: "Prioritization results total peers count requested to disconnect",
labelNames: ["reason"],
}),
peersRequestedSubnetsToQuery: register.gauge<"type">({
name: "lodestar_peers_requested_total_subnets_to_query",
help: "Prioritization results total subnets to query and discover peers in",
labelNames: ["type"],
}),
peersRequestedSubnetsPeerCount: register.gauge<"type">({
name: "lodestar_peers_requested_total_subnets_peers_count",
help: "Prioritization results total peers in subnets to query and discover peers in",
labelNames: ["type"],
}),
peersReportPeerCount: register.gauge<"reason">({
name: "lodestar_peers_report_peer_count",
help: "network.reportPeer count by reason",
labelNames: ["reason"],
}),
peerManager: {
heartbeatDuration: register.histogram({
name: "lodestar_peer_manager_heartbeat_duration_seconds",
help: "Peer manager heartbeat function duration in seconds",
buckets: [0.001, 0.01, 0.1, 1],
}),
},

discovery: {
peersToConnect: register.gauge({
name: "lodestar_discovery_peers_to_connect",
help: "Current peers to connect count from discoverPeers requests",
}),
cachedENRsSize: register.gauge({
name: "lodestar_discovery_cached_enrs_size",
help: "Current size of the cachedENRs Set",
}),
findNodeQueryRequests: register.gauge<"action">({
name: "lodestar_discovery_find_node_query_requests_total",
help: "Total count of find node queries started",
labelNames: ["action"],
}),
findNodeQueryTime: register.histogram({
name: "lodestar_discovery_find_node_query_time_seconds",
help: "Time to complete a find node query in seconds in seconds",
buckets: [5, 60],
}),
findNodeQueryEnrCount: register.gauge({
name: "lodestar_discovery_find_node_query_enrs_total",
help: "Total count of found ENRs in queries",
}),
discoveredStatus: register.gauge<"status">({
name: "lodestar_discovery_discovered_status_total_count",
help: "Total count of status results of PeerDiscovery.onDiscovered() function",
labelNames: ["status"],
}),
dialAttempts: register.gauge({
name: "lodestar_discovery_total_dial_attempts",
help: "Total dial attempts by peer discovery",
}),
dialTime: register.histogram<"status">({
name: "lodestar_discovery_dial_time_seconds",
help: "Time to dial peers in seconds",
labelNames: ["status"],
buckets: [0.1, 5, 60],
}),
},

gossipPeer: {
scoreByThreshold: register.gauge<"threshold">({
name: "lodestar_gossip_peer_score_by_threshold_count",
help: "Gossip peer score by threshold",
labelNames: ["threshold"],
}),
meshPeersByClient: register.gauge<"client">({
name: "lodestar_gossip_mesh_peers_by_client_count",
help: "number of mesh peers, labeled by client",
labelNames: ["client"],
}),
scoreByClient: register.histogram<"client">({
name: "lodestar_gossip_score_by_client",
help: "Gossip peer score by client",
labelNames: ["client"],
// based on gossipScoreThresholds and negativeGossipScoreIgnoreThreshold
buckets: [-16000, -8000, -4000, -1000, 0, 5, 100],
}),
score: register.avgMinMax({
name: "lodestar_gossip_score_avg_min_max",
help: "Avg min max of all gossip peer scores",
}),
},
gossipMesh: {
peersByType: register.gauge<"type" | "fork">({
name: "lodestar_gossip_mesh_peers_by_type_count",
help: "Number of connected mesh peers per gossip type",
labelNames: ["type", "fork"],
}),
peersByBeaconAttestationSubnet: register.gauge<"subnet" | "fork">({
name: "lodestar_gossip_mesh_peers_by_beacon_attestation_subnet_count",
help: "Number of connected mesh peers per beacon attestation subnet",
labelNames: ["subnet", "fork"],
}),
peersBySyncCommitteeSubnet: register.gauge<"subnet" | "fork">({
name: "lodestar_gossip_mesh_peers_by_sync_committee_subnet_count",
help: "Number of connected mesh peers per sync committee subnet",
labelNames: ["subnet", "fork"],
gossipValidationQueue: {
length: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_length",
help: "Count of total gossip validation queue length",
labelNames: ["topic"],
}),
dropRatio: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_current_drop_ratio",
help: "Current drop ratio of gossip validation queue",
labelNames: ["topic"],
}),
droppedJobs: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_dropped_jobs_total",
help: "Count of total gossip validation queue dropped jobs",
labelNames: ["topic"],
}),
jobTime: register.histogram<"topic">({
name: "lodestar_gossip_validation_queue_job_time_seconds",
help: "Time to process gossip validation queue job in seconds",
labelNames: ["topic"],
buckets: [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10],
}),
jobWaitTime: register.histogram<"topic">({
name: "lodestar_gossip_validation_queue_job_wait_time_seconds",
help: "Time from job added to the queue to starting the job in seconds",
labelNames: ["topic"],
buckets: [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10],
}),
},
gossipTopic: {
peersByType: register.gauge<"type" | "fork">({
name: "lodestar_gossip_topic_peers_by_type_count",
help: "Number of connected topic peers per gossip type",
labelNames: ["type", "fork"],
}),
peersByBeaconAttestationSubnet: register.gauge<"subnet" | "fork">({
name: "lodestar_gossip_topic_peers_by_beacon_attestation_subnet_count",
help: "Number of connected topic peers per beacon attestation subnet",
labelNames: ["subnet", "fork"],
}),
peersBySyncCommitteeSubnet: register.gauge<"subnet" | "fork">({
name: "lodestar_gossip_topic_peers_by_sync_committee_subnet_count",
help: "Number of connected topic peers per sync committee subnet",
labelNames: ["subnet", "fork"],
concurrency: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_concurrency",
help: "Current count of jobs being run on network processor for topic",
labelNames: ["topic"],
}),
},

gossipValidationAccept: register.gauge<"topic">({
name: "lodestar_gossip_validation_accept_total",
help: "Count of total gossip validation accept",
labelNames: ["topic"],
}),
gossipValidationIgnore: register.gauge<"topic">({
name: "lodestar_gossip_validation_ignore_total",
help: "Count of total gossip validation ignore",
labelNames: ["topic"],
}),
gossipValidationReject: register.gauge<"topic">({
name: "lodestar_gossip_validation_reject_total",
help: "Count of total gossip validation reject",
labelNames: ["topic"],
}),
gossipValidationError: register.gauge<"topic" | "error">({
name: "lodestar_gossip_validation_error_total",
help: "Count of total gossip validation errors detailed",
labelNames: ["topic", "error"],
}),

gossipValidationQueueLength: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_length",
help: "Count of total gossip validation queue length",
labelNames: ["topic"],
}),

gossipValidationQueueDropRatio: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_current_drop_ratio",
help: "Current drop ratio of gossip validation queue",
labelNames: ["topic"],
}),
gossipValidationQueueDroppedJobs: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_dropped_jobs_total",
help: "Count of total gossip validation queue dropped jobs",
labelNames: ["topic"],
}),
gossipValidationQueueJobTime: register.histogram<"topic">({
name: "lodestar_gossip_validation_queue_job_time_seconds",
help: "Time to process gossip validation queue job in seconds",
labelNames: ["topic"],
buckets: [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10],
}),
gossipValidationQueueJobWaitTime: register.histogram<"topic">({
name: "lodestar_gossip_validation_queue_job_wait_time_seconds",
help: "Time from job added to the queue to starting the job in seconds",
labelNames: ["topic"],
buckets: [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10],
}),
gossipValidationQueueConcurrency: register.gauge<"topic">({
name: "lodestar_gossip_validation_queue_concurrency",
help: "Current count of jobs being run on network processor for topic",
labelNames: ["topic"],
}),

networkProcessor: {
gossipValidationAccept: register.gauge<"topic">({
name: "lodestar_gossip_validation_accept_total",
help: "Count of total gossip validation accept",
labelNames: ["topic"],
}),
gossipValidationIgnore: register.gauge<"topic">({
name: "lodestar_gossip_validation_ignore_total",
help: "Count of total gossip validation ignore",
labelNames: ["topic"],
}),
gossipValidationReject: register.gauge<"topic">({
name: "lodestar_gossip_validation_reject_total",
help: "Count of total gossip validation reject",
labelNames: ["topic"],
}),
gossipValidationError: register.gauge<"topic" | "error">({
name: "lodestar_gossip_validation_error_total",
help: "Count of total gossip validation errors detailed",
labelNames: ["topic", "error"],
}),
executeWorkCalls: register.gauge({
name: "lodestar_network_processor_execute_work_calls_total",
help: "Total calls to network processor execute work fn",
Expand All @@ -293,63 +103,6 @@ export function createLodestarMetrics(
}),
},

discv5: {
decodeEnrAttemptCount: register.counter({
name: "lodestar_discv5_decode_enr_attempt_count",
help: "Count of total attempts to decode enrs",
}),
decodeEnrErrorCount: register.counter({
name: "lodestar_discv5_decode_enr_error_count",
help: "Count of total errors attempting to decode enrs",
}),
},

attnetsService: {
committeeSubnets: register.gauge({
name: "lodestar_attnets_service_committee_subnets_total",
help: "Count of committee subnets",
}),
subscriptionsCommittee: register.gauge({
name: "lodestar_attnets_service_committee_subscriptions_total",
help: "Count of committee subscriptions",
}),
subscriptionsRandom: register.gauge({
name: "lodestar_attnets_service_random_subscriptions_total",
help: "Count of random subscriptions",
}),
subscribeSubnets: register.gauge<"subnet" | "src">({
name: "lodestar_attnets_service_subscribe_subnets_total",
help: "Count of subscribe_subnets calls",
labelNames: ["subnet", "src"],
}),
unsubscribeSubnets: register.gauge<"subnet" | "src">({
name: "lodestar_attnets_service_unsubscribe_subnets_total",
help: "Count of unsubscribe_subnets calls",
labelNames: ["subnet", "src"],
}),
aggregatorSlotSubnetCount: register.gauge({
name: "lodestar_attnets_service_aggregator_slot_subnet_total",
help: "Count of aggregator per slot and subnet",
}),
},

syncnetsService: {
subscriptionsCommittee: register.gauge({
name: "lodestar_syncnets_service_committee_subscriptions_total",
help: "Count of syncnet committee subscriptions",
}),
subscribeSubnets: register.gauge<"subnet">({
name: "lodestar_syncnets_service_subscribe_subnets_total",
help: "Count of syncnet subscribe_subnets calls",
labelNames: ["subnet"],
}),
unsubscribeSubnets: register.gauge<"subnet">({
name: "lodestar_syncnets_service_unsubscribe_subnets_total",
help: "Count of syncnet unsubscribe_subnets calls",
labelNames: ["subnet"],
}),
},

regenQueue: {
length: register.gauge({
name: "lodestar_regen_queue_length",
Expand Down

0 comments on commit b51dd78

Please sign in to comment.