Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: track epoch transition steps time in metrics #6143

Merged
merged 7 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions packages/beacon-node/src/metrics/metrics/lodestar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,12 @@ export function createLodestarMetrics(
help: "Time to call commit after process a single epoch transition in seconds",
buckets: [0.01, 0.05, 0.1, 0.2, 0.5, 0.75, 1],
}),
epochTransitionStepTime: register.histogram<"step">({
name: "lodestar_stfn_epoch_transition_step_seconds",
help: "Time to call each step of epoch transition in seconds",
labelNames: ["step"],
buckets: [0.01, 0.05, 0.1, 0.2, 0.5, 0.75, 1],
}),
processBlockTime: register.histogram({
name: "lodestar_stfn_process_block_seconds",
help: "Time to process a single block in seconds",
Expand Down
69 changes: 61 additions & 8 deletions packages/state-transition/src/epoch/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
CachedBeaconStatePhase0,
EpochTransitionCache,
} from "../types.js";
import {BeaconStateTransitionMetrics} from "../metrics.js";
import {processEffectiveBalanceUpdates} from "./processEffectiveBalanceUpdates.js";
import {processEth1DataReset} from "./processEth1DataReset.js";
import {processHistoricalRootsUpdate} from "./processHistoricalRootsUpdate.js";
Expand Down Expand Up @@ -50,7 +51,12 @@ export {computeUnrealizedCheckpoints} from "./computeUnrealizedCheckpoints.js";
const maxValidatorsPerStateSlashing = SLOTS_PER_EPOCH * MAX_ATTESTER_SLASHINGS * MAX_VALIDATORS_PER_COMMITTEE;
const maxSafeValidators = Math.floor(Number.MAX_SAFE_INTEGER / MAX_EFFECTIVE_BALANCE);

export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, cache: EpochTransitionCache): void {
export function processEpoch(
fork: ForkSeq,
state: CachedBeaconStateAllForks,
cache: EpochTransitionCache,
metrics?: BeaconStateTransitionMetrics | null
): void {
// state.slashings is initially a Gwei (BigInt) vector, however since Nov 2023 it's converted to UintNum64 (number) vector in the state transition because:
// - state.slashings[nextEpoch % EPOCHS_PER_SLASHINGS_VECTOR] is reset per epoch in processSlashingsReset()
// - max slashed validators per epoch is SLOTS_PER_EPOCH * MAX_ATTESTER_SLASHINGS * MAX_VALIDATORS_PER_COMMITTEE which is 32 * 2 * 2048 = 131072 on mainnet
Expand All @@ -59,19 +65,53 @@ export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, ca
throw new Error("Lodestar does not support this network, parameters don't fit number value inside state.slashings");
}

processJustificationAndFinalization(state, cache);
{
const timer = metrics?.epochTransitionStepTime.startTimer({
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it make sense to have a metric utility function withTimer which just does the instrumentation? could refactor in a separate PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, that would be nice

step: "processJustificationAndFinalization",
});
processJustificationAndFinalization(state, cache);
timer?.();
}

if (fork >= ForkSeq.altair) {
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processInactivityUpdates"});
processInactivityUpdates(state as CachedBeaconStateAltair, cache);
timer?.();
}

// processRewardsAndPenalties() is 2nd step in the specs, we optimize to do it
// after processSlashings() to update balances only once
// processRewardsAndPenalties(state, cache);
processRegistryUpdates(state, cache);
{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processRegistryUpdates"});
processRegistryUpdates(state, cache);
timer?.();
}

// accumulate slashing penalties and only update balances once in processRewardsAndPenalties()
const slashingPenalties = processSlashings(state, cache, false);
processRewardsAndPenalties(state, cache, slashingPenalties);
let slashingPenalties: number[];
{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processSlashings"});
slashingPenalties = processSlashings(state, cache, false);
timer?.();
}

{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processRewardsAndPenalties"});
processRewardsAndPenalties(state, cache, slashingPenalties);
timer?.();
}

processEth1DataReset(state, cache);
processEffectiveBalanceUpdates(state, cache);

{
const timer = metrics?.epochTransitionStepTime.startTimer({
step: "processEffectiveBalanceUpdates",
});
processEffectiveBalanceUpdates(state, cache);
timer?.();
}

processSlashingsReset(state, cache);
processRandaoMixesReset(state, cache);

Expand All @@ -84,7 +124,20 @@ export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, ca
if (fork === ForkSeq.phase0) {
processParticipationRecordUpdates(state as CachedBeaconStatePhase0);
} else {
processParticipationFlagUpdates(state as CachedBeaconStateAltair);
processSyncCommitteeUpdates(state as CachedBeaconStateAltair);
{
const timer = metrics?.epochTransitionStepTime.startTimer({
step: "processParticipationFlagUpdates",
});
processParticipationFlagUpdates(state as CachedBeaconStateAltair);
timer?.();
}

{
const timer = metrics?.epochTransitionStepTime.startTimer({
step: "processSyncCommitteeUpdates",
});
processSyncCommitteeUpdates(state as CachedBeaconStateAltair);
timer?.();
}
}
}
3 changes: 2 additions & 1 deletion packages/state-transition/src/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {AttesterStatus} from "./util/attesterStatus.js";
export type BeaconStateTransitionMetrics = {
epochTransitionTime: Histogram;
epochTransitionCommitTime: Histogram;
epochTransitionStepTime: Histogram<"step">;
processBlockTime: Histogram;
processBlockCommitTime: Histogram;
stateHashTreeRootTime: Histogram;
Expand All @@ -23,7 +24,7 @@ export type BeaconStateTransitionMetrics = {
type LabelValues<T extends string> = Partial<Record<T, string | number>>;

interface Histogram<T extends string = string> {
startTimer(): () => void;
startTimer(labels?: LabelValues<T>): (labels?: LabelValues<T>) => number;

observe(value: number): void;
observe(labels: LabelValues<T>, values: number): void;
Expand Down
28 changes: 21 additions & 7 deletions packages/state-transition/src/stateTransition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {toHexString} from "@chainsafe/ssz";
import {allForks, Slot, ssz} from "@lodestar/types";
import {SLOTS_PER_EPOCH} from "@lodestar/params";
import {BeaconStateTransitionMetrics, onPostStateMetrics, onStateCloneMetrics} from "./metrics.js";
import {beforeProcessEpoch, EpochTransitionCacheOpts} from "./cache/epochTransitionCache.js";
import {beforeProcessEpoch, EpochTransitionCache, EpochTransitionCacheOpts} from "./cache/epochTransitionCache.js";
import {
CachedBeaconStateAllForks,
CachedBeaconStatePhase0,
Expand Down Expand Up @@ -165,19 +165,33 @@ function processSlotsWithTransientCache(

const epochTransitionTimer = metrics?.epochTransitionTime.startTimer();

const epochTransitionCache = beforeProcessEpoch(postState, epochTransitionCacheOpts);
processEpoch(fork, postState, epochTransitionCache);
let epochTransitionCache: EpochTransitionCache;
{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "beforeProcessEpoch"});
epochTransitionCache = beforeProcessEpoch(postState, epochTransitionCacheOpts);
timer?.();
}

processEpoch(fork, postState, epochTransitionCache, metrics);

const {currentEpoch, statuses, balances} = epochTransitionCache;
metrics?.registerValidatorStatuses(currentEpoch, statuses, balances);

postState.slot++;
postState.epochCtx.afterProcessEpoch(postState, epochTransitionCache);

{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "afterProcessEpoch"});
postState.epochCtx.afterProcessEpoch(postState, epochTransitionCache);
timer?.();
}

// Running commit here is not strictly necessary. The cost of running commit twice (here + after process block)
// Should be negligible but gives better metrics to differentiate the cost of it for block and epoch proc.
const epochTransitionCommitTimer = metrics?.epochTransitionCommitTime.startTimer();
postState.commit();
epochTransitionCommitTimer?.();
{
const timer = metrics?.epochTransitionCommitTime.startTimer();
postState.commit();
timer?.();
}

// Note: time only on success. Include beforeProcessEpoch, processEpoch, afterProcessEpoch, commit
epochTransitionTimer?.();
Expand Down
Loading