From 4c23e6cd246d02efbe844aee3fd9f8f71d124b62 Mon Sep 17 00:00:00 2001 From: Dadepo Aderemi Date: Tue, 8 Mar 2022 15:32:06 +0100 Subject: [PATCH] Add retry and metrics to the execution engine --- dashboards/lodestar_general.json | 340 ++++++++++++++++++ .../src/eth1/provider/jsonRpcHttpClient.ts | 51 ++- .../beacon-node/src/execution/engine/http.ts | 45 ++- .../beacon-node/src/execution/engine/index.ts | 15 +- .../src/metrics/metrics/lodestar.ts | 10 + packages/beacon-node/src/node/nodejs.ts | 2 +- .../test/e2e/eth1/jsonRpcHttpClient.test.ts | 186 ++++++++++ .../test/sim/merge-interop.test.ts | 8 +- .../test/unit/executionEngine/http.test.ts | 11 +- .../options/beaconNodeOptions/execution.ts | 20 ++ .../unit/options/beaconNodeOptions.test.ts | 4 + .../src/metrics/metrics/executionEngine.ts | 27 ++ .../unit/executionEngine/httpRetry.test.ts | 148 ++++++++ packages/utils/src/retry.ts | 2 +- packages/utils/test/unit/retry.test.ts | 2 +- 15 files changed, 840 insertions(+), 31 deletions(-) create mode 100644 packages/lodestar/src/metrics/metrics/executionEngine.ts create mode 100644 packages/lodestar/test/unit/executionEngine/httpRetry.test.ts diff --git a/dashboards/lodestar_general.json b/dashboards/lodestar_general.json index 24d201056bc4..120929c2975c 100644 --- a/dashboards/lodestar_general.json +++ b/dashboards/lodestar_general.json @@ -15447,6 +15447,346 @@ ], "title": "Backfill Stats", "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 369, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 377, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": false, + "expr": "delta(execution_engine_response_time_seconds_count[$__rate_interval])", + "interval": "", + "legendFormat": "{{method}}", + "refId": "A" + } + ], + "title": "Total Requests rate", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 373, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": false, + "expr": "delta(execution_engine_retry_count[$__rate_interval])/delta(execution_engine_response_time_seconds_count[$__rate_interval])", + "interval": "", + "legendFormat": "{{method}}", + "refId": "A" + } + ], + "title": "Avg Retries", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 375, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": false, + "expr": "delta(execution_engine_error_count[$__rate_interval])", + "interval": "", + "legendFormat": "{{method}}", + "refId": "A" + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 371, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": false, + "expr": "delta(execution_engine_response_time_seconds_sum[$__rate_interval])/delta(execution_engine_response_time_seconds_count[$__rate_interval])", + "interval": "", + "legendFormat": "{{method}}", + "refId": "A" + } + ], + "title": "Avg response time", + "type": "timeseries" + } + ], + "title": "Execution Stats", + "type": "row" } ], "refresh": "30s", diff --git a/packages/beacon-node/src/eth1/provider/jsonRpcHttpClient.ts b/packages/beacon-node/src/eth1/provider/jsonRpcHttpClient.ts index 2f97bf8a77ef..657971d2a730 100644 --- a/packages/beacon-node/src/eth1/provider/jsonRpcHttpClient.ts +++ b/packages/beacon-node/src/eth1/provider/jsonRpcHttpClient.ts @@ -2,7 +2,7 @@ // Note: isomorphic-fetch is not well mantained and does not support abort signals import fetch from "cross-fetch"; -import {ErrorAborted, TimeoutError} from "@lodestar/utils"; +import {ErrorAborted, TimeoutError, retry} from "@lodestar/utils"; import {IGauge, IHistogram} from "../../metrics/interface.js"; import {IJson, IRpcPayload} from "../interface.js"; import {encodeJwtToken} from "./jwt.js"; @@ -29,6 +29,10 @@ export type ReqOpts = { timeout?: number; // To label request metrics routeId?: string; + // retry opts + retryAttempts?: number; + retryDelay?: number; + shouldRetry?: (lastError: Error) => boolean; }; export type JsonRpcHttpClientMetrics = { @@ -37,10 +41,12 @@ export type JsonRpcHttpClientMetrics = { requestUsedFallbackUrl: IGauge; activeRequests: IGauge; configUrlsCount: IGauge; + retryCount: IGauge; }; export interface IJsonRpcHttpClient { fetch(payload: IRpcPayload

, opts?: ReqOpts): Promise; + fetchWithRetries(payload: IRpcPayload

, opts?: ReqOpts): Promise; fetchBatch(rpcPayloadArr: IRpcPayload[], opts?: ReqOpts): Promise; } @@ -64,11 +70,20 @@ export class JsonRpcHttpClient implements IJsonRpcHttpClient { /** If returns true, do not fallback to other urls and throw early */ shouldNotFallback?: (error: Error) => boolean; /** - * If provided, the requests to the RPC server will be bundled with a HS256 encoded - * token using this secret. Otherwise the requests to the RPC server will be unauthorized + * Optional: If provided, use this jwt secret to HS256 encode and add a jwt token in the + * request header which can be authenticated by the RPC server to provide access. + * A fresh token is generated on each requests as EL spec mandates the ELs to check + * the token freshness +-5 seconds (via `iat` property of the token claim) + * + * Otherwise the requests to the RPC server will be unauthorized * and it might deny responses to the RPC requests. */ jwtSecret?: Uint8Array; + /** Retry attempts */ + retryAttempts?: number; + /** Retry delay, only relevant with retry attempts */ + retryDelay?: number; + /** Metrics for retry, could be expanded later */ metrics?: JsonRpcHttpClientMetrics | null; } ) { @@ -85,13 +100,8 @@ export class JsonRpcHttpClient implements IJsonRpcHttpClient { this.jwtSecret = opts?.jwtSecret; this.metrics = opts?.metrics ?? null; - // Set config metric gauges once - - const metrics = this.metrics; - if (metrics) { - metrics.configUrlsCount.set(urls.length); - metrics.activeRequests.addCollect(() => metrics.activeRequests.set(this.activeRequests)); - } + this.metrics?.configUrlsCount.set(urls.length); + this.metrics?.activeRequests.addCollect(() => this.metrics?.activeRequests.set(this.activeRequests)); } /** @@ -102,6 +112,27 @@ export class JsonRpcHttpClient implements IJsonRpcHttpClient { return parseRpcResponse(res, payload); } + /** + * Perform RPC request with retry + */ + async fetchWithRetries(payload: IRpcPayload

, opts?: ReqOpts): Promise { + const routeId = opts?.routeId ?? "unknown"; + return await retry( + async (attempt) => { + /** If this is a retry, increment the retry counter for this method */ + if (attempt > 0) { + this.opts?.metrics?.retryCount.inc({routeId}); + } + return this.fetch(payload, opts); + }, + { + retries: opts?.retryAttempts ?? this.opts?.retryAttempts ?? 1, + retryDelay: opts?.retryDelay ?? this.opts?.retryAttempts ?? 0, + shouldRetry: opts?.shouldRetry, + } + ); + } + /** * Perform RPC batched request * Type-wise assumes all requests results have the same type diff --git a/packages/beacon-node/src/execution/engine/http.ts b/packages/beacon-node/src/execution/engine/http.ts index 1ef66a322a82..fb51c8ba689d 100644 --- a/packages/beacon-node/src/execution/engine/http.ts +++ b/packages/beacon-node/src/execution/engine/http.ts @@ -25,8 +25,15 @@ import { } from "./interface.js"; import {PayloadIdCache} from "./payloadIdCache.js"; +export type ExecutionEngineModules = { + signal: AbortSignal; + metrics?: IMetrics | null; +}; + export type ExecutionEngineHttpOpts = { urls: string[]; + retryAttempts: number; + retryDelay: number; timeout?: number; /** * 256 bit jwt secret in hex format without the leading 0x. If provided, the execution engine @@ -44,6 +51,8 @@ export const defaultExecutionEngineHttpOpts: ExecutionEngineHttpOpts = { * port/url, one can override this and skip providing a jwt secret. */ urls: ["http://localhost:8551"], + retryAttempts: 3, + retryDelay: 2000, timeout: 12000, }; @@ -65,12 +74,12 @@ export class ExecutionEngineHttp implements IExecutionEngine { readonly payloadIdCache = new PayloadIdCache(); private readonly rpc: IJsonRpcHttpClient; - constructor(opts: ExecutionEngineHttpOpts, signal: AbortSignal, metrics?: IMetrics | null) { + constructor(opts: ExecutionEngineHttpOpts, {metrics, signal}: ExecutionEngineModules) { this.rpc = new JsonRpcHttpClient(opts.urls, { + ...opts, signal, - timeout: opts.timeout, - jwtSecret: opts.jwtSecretHex ? fromHex(opts.jwtSecretHex) : undefined, metrics: metrics?.executionEnginerHttpClient, + jwtSecret: opts.jwtSecretHex ? fromHex(opts.jwtSecretHex) : undefined, }); } @@ -103,12 +112,15 @@ export class ExecutionEngineHttp implements IExecutionEngine { const method = "engine_newPayloadV1"; const serializedExecutionPayload = serializeExecutionPayload(executionPayload); const {status, latestValidHash, validationError} = await this.rpc - .fetch( - {method, params: [serializedExecutionPayload]}, + .fetchWithRetries( + { + method, + params: [serializedExecutionPayload], + }, notifyNewPayloadOpts ) // If there are errors by EL like connection refused, internal error, they need to be - // treated seperate from being INVALID. For now, just pass the error upstream. + // treated separate from being INVALID. For now, just pass the error upstream. .catch((e: Error): EngineApiRpcReturnTypes[typeof method] => { if (e instanceof HttpRpcError || e instanceof ErrorJsonRpcResponse) { return {status: ExecutePayloadStatus.ELERROR, latestValidHash: null, validationError: e.message}; @@ -207,8 +219,14 @@ export class ExecutionEngineHttp implements IExecutionEngine { const { payloadStatus: {status, latestValidHash: _latestValidHash, validationError}, payloadId, - } = await this.rpc.fetch( - {method, params: [{headBlockHash: headBlockHashData, safeBlockHash, finalizedBlockHash}, apiPayloadAttributes]}, + } = await this.rpc.fetchWithRetries( + { + method, + params: [ + {headBlockHash: headBlockHashData, safeBlockHash: headBlockHashData, finalizedBlockHash}, + apiPayloadAttributes, + ], + }, forkchoiceUpdatedV1Opts ); @@ -257,11 +275,16 @@ export class ExecutionEngineHttp implements IExecutionEngine { */ async getPayload(payloadId: PayloadId): Promise { const method = "engine_getPayloadV1"; - const executionPayloadRpc = await this.rpc.fetch< + const executionPayloadRpc = await this.rpc.fetchWithRetries< EngineApiRpcReturnTypes[typeof method], EngineApiRpcParamTypes[typeof method] - >({method, params: [payloadId]}, getPayloadOpts); - + >( + { + method, + params: [payloadId], + }, + getPayloadOpts + ); return parseExecutionPayload(executionPayloadRpc); } diff --git a/packages/beacon-node/src/execution/engine/index.ts b/packages/beacon-node/src/execution/engine/index.ts index d6f4502a2f7f..3735fe8a2b92 100644 --- a/packages/beacon-node/src/execution/engine/index.ts +++ b/packages/beacon-node/src/execution/engine/index.ts @@ -1,6 +1,11 @@ import {IExecutionEngine} from "./interface.js"; import {ExecutionEngineDisabled} from "./disabled.js"; -import {ExecutionEngineHttp, ExecutionEngineHttpOpts, defaultExecutionEngineHttpOpts} from "./http.js"; +import { + ExecutionEngineHttp, + ExecutionEngineModules, + ExecutionEngineHttpOpts, + defaultExecutionEngineHttpOpts, +} from "./http.js"; import {ExecutionEngineMock, ExecutionEngineMockOpts} from "./mock.js"; export { @@ -15,10 +20,12 @@ export type ExecutionEngineOpts = | ({mode?: "http"} & ExecutionEngineHttpOpts) | ({mode: "mock"} & ExecutionEngineMockOpts) | {mode: "disabled"}; - export const defaultExecutionEngineOpts: ExecutionEngineOpts = defaultExecutionEngineHttpOpts; -export function initializeExecutionEngine(opts: ExecutionEngineOpts, signal: AbortSignal): IExecutionEngine { +export function initializeExecutionEngine( + opts: ExecutionEngineOpts, + modules: ExecutionEngineModules +): IExecutionEngine { switch (opts.mode) { case "mock": return new ExecutionEngineMock(opts); @@ -26,6 +33,6 @@ export function initializeExecutionEngine(opts: ExecutionEngineOpts, signal: Abo return new ExecutionEngineDisabled(); case "http": default: - return new ExecutionEngineHttp(opts, signal); + return new ExecutionEngineHttp(opts, modules); } } diff --git a/packages/beacon-node/src/metrics/metrics/lodestar.ts b/packages/beacon-node/src/metrics/metrics/lodestar.ts index 3210b18a47a6..bdcf86d9b088 100644 --- a/packages/beacon-node/src/metrics/metrics/lodestar.ts +++ b/packages/beacon-node/src/metrics/metrics/lodestar.ts @@ -1019,6 +1019,11 @@ export function createLodestarMetrics( help: "eth1 JsonHttpClient - total count of request errors", labelNames: ["routeId"], }), + retryCount: register.gauge<"routeId">({ + name: "lodestar_eth1_http_client_request_retries_total", + help: "eth1 JsonHttpClient - total count of request retries", + labelNames: ["routeId"], + }), requestUsedFallbackUrl: register.gauge({ name: "lodestar_eth1_http_client_request_used_fallback_url_total", help: "eth1 JsonHttpClient - total count of requests on fallback url(s)", @@ -1046,6 +1051,11 @@ export function createLodestarMetrics( help: "ExecutionEngineHttp client - total count of request errors", labelNames: ["routeId"], }), + retryCount: register.gauge<"routeId">({ + name: "lodestar_execution_engine_http_client_request_retries_total", + help: "ExecutionEngineHttp client - total count of request retries", + labelNames: ["routeId"], + }), requestUsedFallbackUrl: register.gauge({ name: "lodestar_execution_engine_http_client_request_used_fallback_url_total", help: "ExecutionEngineHttp client - total count of requests on fallback url(s)", diff --git a/packages/beacon-node/src/node/nodejs.ts b/packages/beacon-node/src/node/nodejs.ts index 7ea39a000774..65d5b29cd122 100644 --- a/packages/beacon-node/src/node/nodejs.ts +++ b/packages/beacon-node/src/node/nodejs.ts @@ -146,7 +146,7 @@ export class BeaconNode { {config, db, metrics, logger: logger.child(opts.logger.eth1), signal}, anchorState ), - executionEngine: initializeExecutionEngine(opts.executionEngine, signal), + executionEngine: initializeExecutionEngine(opts.executionEngine, {metrics, signal}), executionBuilder: opts.executionBuilder.enabled ? initializeExecutionBuilder(opts.executionBuilder, config) : undefined, diff --git a/packages/beacon-node/test/e2e/eth1/jsonRpcHttpClient.test.ts b/packages/beacon-node/test/e2e/eth1/jsonRpcHttpClient.test.ts index 3413fab1c1c7..af82c86c1a75 100644 --- a/packages/beacon-node/test/e2e/eth1/jsonRpcHttpClient.test.ts +++ b/packages/beacon-node/test/e2e/eth1/jsonRpcHttpClient.test.ts @@ -155,3 +155,189 @@ describe("eth1 / jsonRpcHttpClient", function () { }); } }); + +describe("eth1 / jsonRpcHttpClient - with retries", async function () { + this.timeout("10 seconds"); + const port = 36421; + const noMethodError = {code: -32601, message: "Method not found"}; + const afterHooks: (() => Promise)[] = []; + + afterEach(async function () { + while (afterHooks.length) { + const afterHook = afterHooks.pop(); + if (afterHook) + await afterHook().catch((e: Error) => { + // eslint-disable-next-line no-console + console.error("Error in afterEach hook", e); + }); + } + }); + + it("should retry ENOTFOUND", async function () { + let retryCount = 0; + + const url = "https://goerli.fake-website.io"; + const payload = {method: "get", params: []}; + const retryAttempts = 2; + + const controller = new AbortController(); + const eth1JsonRpcClient = new JsonRpcHttpClient([url], {signal: controller.signal}); + await expect( + eth1JsonRpcClient.fetchWithRetries(payload, { + retryAttempts, + shouldRetry: () => { + // using the shouldRetry function to keep tab of the retried requests + retryCount++; + return true; + }, + }) + ).to.be.rejectedWith("getaddrinfo ENOTFOUND"); + expect(retryCount).to.be.equal(retryAttempts, "ENOTFOUND should be retried before failing"); + }); + + it("should retry ECONNREFUSED", async function () { + let retryCount = 0; + + const url = `http://localhost:${port + 1}`; + const payload = {method: "get", params: []}; + const retryAttempts = 2; + + const controller = new AbortController(); + const eth1JsonRpcClient = new JsonRpcHttpClient([url], {signal: controller.signal}); + await expect( + eth1JsonRpcClient.fetchWithRetries(payload, { + retryAttempts, + shouldRetry: () => { + // using the shouldRetry function to keep tab of the retried requests + retryCount++; + return true; + }, + }) + ).to.be.rejectedWith("connect ECONNREFUSED"); + expect(retryCount).to.be.equal(retryAttempts, "connect ECONNREFUSED should be retried before failing"); + }); + + it("should retry 404", async function () { + let retryCount = 0; + + const server = http.createServer((req, res) => { + retryCount++; + res.statusCode = 404; + res.end(); + }); + + await new Promise((resolve) => server.listen(port, resolve)); + afterHooks.push( + () => + new Promise((resolve, reject) => + server.close((err) => { + if (err) reject(err); + else resolve(); + }) + ) + ); + + const url = `http://localhost:${port}`; + const payload = {method: "get", params: []}; + const retryAttempts = 2; + + const controller = new AbortController(); + const eth1JsonRpcClient = new JsonRpcHttpClient([url], {signal: controller.signal}); + await expect(eth1JsonRpcClient.fetchWithRetries(payload, {retryAttempts})).to.be.rejectedWith("Not Found"); + expect(retryCount).to.be.equal(retryAttempts, "404 responses should be retried before failing"); + }); + + it("should retry timeout", async function () { + let retryCount = 0; + + const server = http.createServer(() => { + retryCount++; + // leave the request open until timeout + }); + + await new Promise((resolve) => server.listen(port, resolve)); + afterHooks.push( + () => + new Promise((resolve, reject) => + server.close((err) => { + if (err) reject(err); + else resolve(); + }) + ) + ); + + const url = `http://localhost:${port}`; + const payload = {method: "get", params: []}; + const retryAttempts = 2; + const timeout = 2000; + + const controller = new AbortController(); + const eth1JsonRpcClient = new JsonRpcHttpClient([url], {signal: controller.signal}); + await expect(eth1JsonRpcClient.fetchWithRetries(payload, {retryAttempts, timeout})).to.be.rejectedWith( + "Timeout request" + ); + expect(retryCount).to.be.equal(retryAttempts, "Timeout request should be retried before failing"); + }); + + it("should retry aborted", async function () { + let retryCount = 0; + const server = http.createServer(() => { + retryCount++; + // leave the request open until timeout + }); + + await new Promise((resolve) => server.listen(port, resolve)); + afterHooks.push( + () => + new Promise((resolve, reject) => + server.close((err) => { + if (err) reject(err); + else resolve(); + }) + ) + ); + + const url = `http://localhost:${port}`; + const payload = {method: "get", params: []}; + const retryAttempts = 2; + const timeout = 2000; + + const controller = new AbortController(); + setTimeout(() => controller.abort(), 50); + const eth1JsonRpcClient = new JsonRpcHttpClient([url], {signal: controller.signal}); + await expect(eth1JsonRpcClient.fetchWithRetries(payload, {retryAttempts, timeout})).to.be.rejectedWith( + "Aborted request" + ); + expect(retryCount).to.be.equal(retryAttempts, "Aborted request should be retried before failing"); + }); + + it("should not retry payload error", async function () { + let retryCount = 0; + + const server = http.createServer((req, res) => { + retryCount++; + res.setHeader("Content-Type", "application/json"); + res.end(JSON.stringify({jsonrpc: "2.0", id: 83, error: noMethodError})); + }); + + await new Promise((resolve) => server.listen(port, resolve)); + afterHooks.push( + () => + new Promise((resolve, reject) => + server.close((err) => { + if (err) reject(err); + else resolve(); + }) + ) + ); + + const url = `http://localhost:${port}`; + const payload = {method: "get", params: []}; + const retryAttempts = 2; + + const controller = new AbortController(); + const eth1JsonRpcClient = new JsonRpcHttpClient([url], {signal: controller.signal}); + await expect(eth1JsonRpcClient.fetchWithRetries(payload, {retryAttempts})).to.be.rejectedWith("Method not found"); + expect(retryCount).to.be.equal(1, "Payload error (non-network error) should not be retried"); + }); +}); diff --git a/packages/beacon-node/test/sim/merge-interop.test.ts b/packages/beacon-node/test/sim/merge-interop.test.ts index 00054bc920ed..9f0207ce13df 100644 --- a/packages/beacon-node/test/sim/merge-interop.test.ts +++ b/packages/beacon-node/test/sim/merge-interop.test.ts @@ -20,6 +20,7 @@ import {getAndInitDevValidators} from "../utils/node/validator.js"; import {Eth1Provider} from "../../src/index.js"; import {ZERO_HASH} from "../../src/constants/index.js"; import {bytesToData, dataToBytes, quantityToNum} from "../../src/eth1/provider/utils.js"; +import {defaultExecutionEngineHttpOpts} from "../../src/execution/engine/http.js"; import {logFilesDir} from "./params.js"; import {shell} from "./shell.js"; @@ -46,6 +47,8 @@ import {shell} from "./shell.js"; const terminalTotalDifficultyPreMerge = 10; const TX_SCENARIOS = process.env.TX_SCENARIOS?.split(",") || []; const jwtSecretHex = "0xdc6457099f127cf0bac78de8b297df04951281909db4f58b43def7c7151e765d"; +const retryAttempts = defaultExecutionEngineHttpOpts.retryAttempts; +const retryDelay = defaultExecutionEngineHttpOpts.retryDelay; describe("executionEngine / ExecutionEngineHttp", function () { this.timeout("10min"); @@ -157,7 +160,10 @@ describe("executionEngine / ExecutionEngineHttp", function () { } const controller = new AbortController(); - const executionEngine = new ExecutionEngineHttp({urls: [engineApiUrl], jwtSecretHex}, controller.signal); + const executionEngine = new ExecutionEngineHttp( + {urls: [engineApiUrl], jwtSecretHex, retryAttempts, retryDelay}, + {signal: controller.signal} + ); // 1. Prepare a payload diff --git a/packages/beacon-node/test/unit/executionEngine/http.test.ts b/packages/beacon-node/test/unit/executionEngine/http.test.ts index fe554976578a..43b2018ce228 100644 --- a/packages/beacon-node/test/unit/executionEngine/http.test.ts +++ b/packages/beacon-node/test/unit/executionEngine/http.test.ts @@ -5,6 +5,7 @@ import { ExecutionEngineHttp, parseExecutionPayload, serializeExecutionPayload, + defaultExecutionEngineHttpOpts, } from "../../../src/execution/engine/http.js"; chai.use(chaiAsPromised); @@ -39,7 +40,14 @@ describe("ExecutionEngine / http", () => { const baseUrl = await server.listen(0); - executionEngine = new ExecutionEngineHttp({urls: [baseUrl]}, controller.signal); + executionEngine = new ExecutionEngineHttp( + { + urls: [baseUrl], + retryAttempts: defaultExecutionEngineHttpOpts.retryAttempts, + retryDelay: defaultExecutionEngineHttpOpts.retryDelay, + }, + {signal: controller.signal} + ); }); it("getPayload", async () => { @@ -140,7 +148,6 @@ describe("ExecutionEngine / http", () => { await executionEngine.notifyForkchoiceUpdate( forkChoiceHeadData.headBlockHash, - forkChoiceHeadData.safeBlockHash, forkChoiceHeadData.finalizedBlockHash ); diff --git a/packages/cli/src/options/beaconNodeOptions/execution.ts b/packages/cli/src/options/beaconNodeOptions/execution.ts index 4aed03650f19..4542f9a4bf26 100644 --- a/packages/cli/src/options/beaconNodeOptions/execution.ts +++ b/packages/cli/src/options/beaconNodeOptions/execution.ts @@ -5,6 +5,8 @@ import {ICliCommandOptions, extractJwtHexSecret} from "../../util/index.js"; export type ExecutionEngineArgs = { "execution.urls": string[]; "execution.timeout": number; + "execution.retryAttempts": number; + "execution.retryDelay": number; "jwt-secret"?: string; }; @@ -12,6 +14,8 @@ export function parseArgs(args: ExecutionEngineArgs): IBeaconNodeOptions["execut return { urls: args["execution.urls"], timeout: args["execution.timeout"], + retryAttempts: args["execution.retryAttempts"], + retryDelay: args["execution.retryDelay"], /** * jwtSecret is parsed as hex instead of bytes because the merge with defaults * in beaconOptions messes up the bytes array as as index => value object @@ -39,6 +43,22 @@ export const options: ICliCommandOptions = { group: "execution", }, + "execution.retryAttempts": { + description: "Number of retry attempts when calling execution engine API", + type: "number", + defaultDescription: + defaultOptions.executionEngine.mode === "http" ? String(defaultOptions.executionEngine.retryAttempts) : "1", + group: "execution", + }, + + "execution.retryDelay": { + description: "Delay time in milliseconds between retries when retrying calls to the execution engine API", + type: "number", + defaultDescription: + defaultOptions.executionEngine.mode === "http" ? String(defaultOptions.executionEngine.retryDelay) : "0", + group: "execution", + }, + "jwt-secret": { description: "File path to a shared hex-encoded jwt secret which will be used to generate and bundle HS256 encoded jwt tokens for authentication with the EL client's rpc server hosting engine apis. Secret to be exactly same as the one used by the corresponding EL client.", diff --git a/packages/cli/test/unit/options/beaconNodeOptions.test.ts b/packages/cli/test/unit/options/beaconNodeOptions.test.ts index f70eeeccb787..1f3db397721a 100644 --- a/packages/cli/test/unit/options/beaconNodeOptions.test.ts +++ b/packages/cli/test/unit/options/beaconNodeOptions.test.ts @@ -33,6 +33,8 @@ describe("options / beaconNodeOptions", () => { "execution.urls": ["http://localhost:8551"], "execution.timeout": 12000, + "execution.retryDelay": 2000, + "execution.retryAttempts": 1, "builder.enabled": false, "builder.urls": ["http://localhost:8661"], @@ -95,6 +97,8 @@ describe("options / beaconNodeOptions", () => { }, executionEngine: { urls: ["http://localhost:8551"], + retryAttempts: 1, + retryDelay: 2000, timeout: 12000, }, executionBuilder: { diff --git a/packages/lodestar/src/metrics/metrics/executionEngine.ts b/packages/lodestar/src/metrics/metrics/executionEngine.ts new file mode 100644 index 000000000000..0e9476d213e1 --- /dev/null +++ b/packages/lodestar/src/metrics/metrics/executionEngine.ts @@ -0,0 +1,27 @@ +import {RegistryMetricCreator} from "../utils/registryMetricCreator"; + +export type IExecutionEngineMetrics = ReturnType; + +// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types, @typescript-eslint/explicit-function-return-type +export function createExecutionEngineMetrics(register: RegistryMetricCreator) { + return { + executionEngine: { + responseTime: register.histogram<"method">({ + name: "execution_engine_response_time_seconds", + help: "Total response time (including retries) for execution engine requests in seconds", + labelNames: ["method"], + buckets: [0.1, 1, 10, 100], + }), + retryCount: register.gauge<"method">({ + name: "execution_engine_retry_count", + help: "Count of retries to the execution engine", + labelNames: ["method"], + }), + errorCount: register.gauge<"method">({ + name: "execution_engine_error_count", + help: "Count of api requests that finally failed", + labelNames: ["method"], + }), + }, + }; +} diff --git a/packages/lodestar/test/unit/executionEngine/httpRetry.test.ts b/packages/lodestar/test/unit/executionEngine/httpRetry.test.ts new file mode 100644 index 000000000000..f7dc9bed3ba4 --- /dev/null +++ b/packages/lodestar/test/unit/executionEngine/httpRetry.test.ts @@ -0,0 +1,148 @@ +import chai, {expect} from "chai"; +import chaiAsPromised from "chai-as-promised"; +import {fastify} from "fastify"; +import {AbortController} from "@chainsafe/abort-controller"; +import {fromHexString} from "@chainsafe/ssz"; +import {ExecutionEngineHttp} from "../../../src/executionEngine/http"; +import {defaultExecutionEngineHttpOpts} from "../../../lib/executionEngine/http"; +import {bytesToData, numToQuantity} from "../../../src/eth1/provider/utils"; + +chai.use(chaiAsPromised); + +describe("ExecutionEngine / http ", () => { + const afterCallbacks: (() => Promise | void)[] = []; + after(async () => { + while (afterCallbacks.length > 0) { + const callback = afterCallbacks.pop(); + if (callback) await callback(); + } + }); + + let executionEngine: ExecutionEngineHttp; + let returnValue: unknown = {}; + let reqJsonRpcPayload: unknown = {}; + let baseUrl: string; + let errorResponsesBeforeSuccess = 0; + let controller: AbortController; + + before("Prepare server", async () => { + controller = new AbortController(); + const server = fastify({logger: false}); + + server.post("/", async (req) => { + if (errorResponsesBeforeSuccess === 0) { + reqJsonRpcPayload = req.body; + delete (reqJsonRpcPayload as {id?: number}).id; + return returnValue; + } else { + --errorResponsesBeforeSuccess; + throw Error(`Will succeed after ${errorResponsesBeforeSuccess} more attempts`); + } + }); + + afterCallbacks.push(async () => { + controller.abort(); + await server.close(); + }); + + baseUrl = await server.listen(0); + + executionEngine = new ExecutionEngineHttp( + { + urls: [baseUrl], + retryAttempts: defaultExecutionEngineHttpOpts.retryAttempts, + retryDelay: defaultExecutionEngineHttpOpts.retryDelay, + }, + {signal: controller.signal} + ); + }); + + describe("notifyForkchoiceUpdate", async function () { + it("notifyForkchoiceUpdate no retry when no pay load attributes", async function () { + /** + * curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"engine_forkchoiceUpdated","params":[{"headBlockHash":"0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", "finalizedBlockHash":"0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174"}],"id":67}' http://localhost:8545 + */ + errorResponsesBeforeSuccess = 2; + const forkChoiceHeadData = { + headBlockHash: "0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", + safeBlockHash: "0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", + finalizedBlockHash: "0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", + }; + + returnValue = { + jsonrpc: "2.0", + id: 67, + result: {payloadStatus: {status: "VALID", latestValidHash: null, validationError: null}, payloadId: "0x"}, + }; + + expect(errorResponsesBeforeSuccess).to.be.equal(2, "errorResponsesBeforeSuccess should not be 2 before request"); + try { + await executionEngine.notifyForkchoiceUpdate( + forkChoiceHeadData.headBlockHash, + forkChoiceHeadData.finalizedBlockHash + ); + } catch (err) { + expect(err).to.be.instanceOf(Error); + } + expect(errorResponsesBeforeSuccess).to.be.equal( + 1, + "errorResponsesBeforeSuccess no retry should be decremented once" + ); + }); + + it("notifyForkchoiceUpdate with retry when pay load attributes", async function () { + this.timeout("10 min"); + /** + * curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"engine_forkchoiceUpdated","params":[{"headBlockHash":"0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", "finalizedBlockHash":"0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174"}, {timestamp: "1647036763", prevRandao: "0x0000000000000000000000000000000000000000000000000000000000000000", suggestedFeeRecipient: "0xa94f5374fce5edbc8e2a8697c15331677e6ebf0b"}}],"id":67}' http://localhost:8545 + */ + errorResponsesBeforeSuccess = defaultExecutionEngineHttpOpts.retryAttempts - 1; + const forkChoiceHeadData = { + headBlockHash: "0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", + safeBlockHash: "0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", + finalizedBlockHash: "0xb084c10440f05f5a23a55d1d7ebcb1b3892935fb56f23cdc9a7f42c348eed174", + }; + const payloadAttributes = { + timestamp: 1647036763, + prevRandao: fromHexString("0x0000000000000000000000000000000000000000000000000000000000000000"), + suggestedFeeRecipient: fromHexString("0xa94f5374fce5edbc8e2a8697c15331677e6ebf0b"), + }; + + const request = { + jsonrpc: "2.0", + method: "engine_forkchoiceUpdatedV1", + params: [ + forkChoiceHeadData, + { + timestamp: numToQuantity(payloadAttributes.timestamp), + prevRandao: bytesToData(payloadAttributes.prevRandao), + suggestedFeeRecipient: bytesToData(payloadAttributes.suggestedFeeRecipient), + }, + ], + }; + returnValue = { + jsonrpc: "2.0", + id: 67, + result: { + payloadStatus: {status: "VALID", latestValidHash: null, validationError: null}, + payloadId: Buffer.alloc(8, 1), + }, + }; + + expect(errorResponsesBeforeSuccess).to.not.be.equal( + 0, + "errorResponsesBeforeSuccess should not be zero before request" + ); + await executionEngine.notifyForkchoiceUpdate( + forkChoiceHeadData.headBlockHash, + forkChoiceHeadData.finalizedBlockHash, + payloadAttributes + ); + + expect(reqJsonRpcPayload).to.deep.equal(request, "Wrong request JSON RPC payload"); + expect(errorResponsesBeforeSuccess).to.be.equal( + 0, + "errorResponsesBeforeSuccess should be zero after request with retries" + ); + }); + }); +}); diff --git a/packages/utils/src/retry.ts b/packages/utils/src/retry.ts index 9974c8ac1741..2e1f95d9ad8e 100644 --- a/packages/utils/src/retry.ts +++ b/packages/utils/src/retry.ts @@ -28,7 +28,7 @@ export async function retry(fn: (attempt: number) => A | Promise, opts?: R const shouldRetry = opts?.shouldRetry; let lastError: Error = Error("RetryError"); - for (let i = 1; i <= maxRetries; i++) { + for (let i = 0; i < maxRetries; i++) { try { return await fn(i); } catch (e) { diff --git a/packages/utils/test/unit/retry.test.ts b/packages/utils/test/unit/retry.test.ts index 7a5416d0d750..c00feb26f743 100644 --- a/packages/utils/test/unit/retry.test.ts +++ b/packages/utils/test/unit/retry.test.ts @@ -30,7 +30,7 @@ describe("retry", () => { { id: "Succeed at the last attempt", fn: async (attempt) => { - if (attempt < retries) throw sampleError; + if (attempt < retries - 1) throw sampleError; else return sampleResult; }, opts: {retries},