diff --git a/app/app.go b/app/app.go index 03f1392f9..e485cb728 100644 --- a/app/app.go +++ b/app/app.go @@ -587,16 +587,16 @@ func callValidatorMock(ctx context.Context, duty core.Duty, cl eth2client.Servic case core.DutyAttester: err := validatormock.Attest(ctx, cl.(*eth2http.Service), signer, eth2p0.Slot(duty.Slot), pubshares...) if err != nil { - log.Warn(ctx, "Attestation failed", err) + log.Warn(ctx, "Mock attestation failed", err) } else { - log.Info(ctx, "Attestation success", z.I64("slot", duty.Slot)) + log.Info(ctx, "Mock attestation submitted to validatorapi", z.I64("slot", duty.Slot)) } case core.DutyProposer: err := validatormock.ProposeBlock(ctx, cl.(*eth2http.Service), signer, eth2p0.Slot(duty.Slot), addr, pubshares...) if err != nil { - log.Warn(ctx, "Failed to propose block", err) + log.Warn(ctx, "Mock block proposal failed", err) } else { - log.Info(ctx, "Block proposed successfully", z.I64("slot", duty.Slot)) + log.Info(ctx, "Mock block proposal submitted to validatorapi", z.I64("slot", duty.Slot)) } default: log.Warn(ctx, "Invalid duty type", nil) diff --git a/core/sigagg/sigagg.go b/core/sigagg/sigagg.go index bfa4fc7a2..d957487cc 100644 --- a/core/sigagg/sigagg.go +++ b/core/sigagg/sigagg.go @@ -27,7 +27,9 @@ import ( "github.com/coinbase/kryptology/pkg/signatures/bls/bls_sig" "github.com/obolnetwork/charon/app/errors" + "github.com/obolnetwork/charon/app/log" "github.com/obolnetwork/charon/app/tracer" + "github.com/obolnetwork/charon/app/z" "github.com/obolnetwork/charon/core" "github.com/obolnetwork/charon/tbls" "github.com/obolnetwork/charon/tbls/tblsconv" @@ -101,6 +103,8 @@ func (a *Aggregator) Aggregate(ctx context.Context, duty core.Duty, pubkey core. return err } + log.Debug(ctx, "Aggregated threshold partial signatures", z.Any("duty", duty)) + // Call subscriptions. for _, sub := range a.subs { err := sub(ctx, duty, pubkey, aggSig) diff --git a/testutil/compose/compose/alert.go b/testutil/compose/compose/alert.go index 2861a78dc..ad35151c0 100644 --- a/testutil/compose/compose/alert.go +++ b/testutil/compose/compose/alert.go @@ -17,6 +17,7 @@ package main import ( "context" + "encoding/json" "fmt" "io" "net" @@ -30,14 +31,14 @@ import ( ) // startAlertCollector starts a server that accepts alert webhooks until the context is closed and returns -// a channel on which the received webhooks will be sent. -func startAlertCollector(ctx context.Context, port int) (chan []byte, error) { +// a channel on which the received alert titles will be sent. +func startAlertCollector(ctx context.Context, port int) (chan string, error) { l, err := net.Listen("tcp", fmt.Sprintf("0.0.0.0:%d", port)) if err != nil { return nil, errors.Wrap(err, "new listener") } - bodies := make(chan []byte) + resp := make(chan string, 100) server := http.Server{ Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() @@ -48,9 +49,28 @@ func startAlertCollector(ctx context.Context, port int) (chan []byte, error) { return } - log.Info(ctx, "Received webhook", z.Str("body", string(b))) + wrapper := struct { + Body string `json:"body"` + }{} + if err := json.Unmarshal(b, &wrapper); err != nil { + log.Error(ctx, "Unmarshal body wrapper", err, z.Str("body", string(b))) + return + } + + alert := struct { + Title string `json:"title"` + }{} + if err := json.Unmarshal(b, &alert); err != nil { + log.Error(ctx, "Unmarshal alert", err, z.Str("body", string(b))) + return + } else if alert.Title == "" { + log.Error(ctx, "Alert title empty", err, z.Str("body", string(b))) + return + } + + log.Info(ctx, "Received webhook", z.Str("body", string(b)), z.Str("title", alert.Title)) - bodies <- b + resp <- alert.Title }), } @@ -66,8 +86,8 @@ func startAlertCollector(ctx context.Context, port int) (chan []byte, error) { if err := eg.Wait(); !errors.Is(err, context.Canceled) && !errors.Is(err, http.ErrServerClosed) { log.Error(ctx, "Alert collector", err) } - close(bodies) + close(resp) }() - return bodies, nil + return resp, nil } diff --git a/testutil/compose/compose/main.go b/testutil/compose/compose/main.go index 7b552c914..494f6a25f 100644 --- a/testutil/compose/compose/main.go +++ b/testutil/compose/compose/main.go @@ -50,7 +50,7 @@ func newRootCmd() *cobra.Command { root.AddCommand(newNewCmd()) root.AddCommand(newCleanCmd()) - root.AddCommand(newAutoCmd()) + root.AddCommand(newAutoCmd(nil)) root.AddCommand(newDockerCmd( "define", "Creates a docker-compose.yml that executes `charon create dkg` if keygen==dkg", @@ -123,7 +123,7 @@ func newDockerCmd(use string, short string, runFunc runFunc) *cobra.Command { return cmd } -func newAutoCmd() *cobra.Command { +func newAutoCmd(tmplCallback func(data *compose.TmplData)) *cobra.Command { cmd := &cobra.Command{ Use: "auto", Short: "Convenience function that runs `compose define && compose lock && compose run`", @@ -141,8 +141,17 @@ func newAutoCmd() *cobra.Command { rootCtx := log.WithTopic(cmd.Context(), "auto") + var lastTmpl compose.TmplData for _, runFunc := range runFuncs { - _, err := runFunc(rootCtx) + lastTmpl, err = runFunc(rootCtx) + if err != nil { + return err + } + } + + if tmplCallback != nil { + tmplCallback(&lastTmpl) + err := compose.WriteDockerCompose(*dir, lastTmpl) if err != nil { return err } @@ -150,6 +159,9 @@ func newAutoCmd() *cobra.Command { ctx := rootCtx if *alertTimeout != 0 { + // Ensure everything is clean before we start with alert test. + _ = execDown(rootCtx, *dir) + var cancel context.CancelFunc ctx, cancel = context.WithTimeout(rootCtx, *alertTimeout) defer cancel() @@ -160,17 +172,17 @@ func newAutoCmd() *cobra.Command { return err } - if err := execUp(ctx, *dir); !errors.Is(err, context.DeadlineExceeded) { - return err - } + defer func() { + _ = execDown(rootCtx, *dir) + }() - if err := execDown(rootCtx, *dir); err != nil { + if err := execUp(ctx, *dir); !errors.Is(err, context.DeadlineExceeded) { return err } var fail bool for alert := range alerts { - log.Error(rootCtx, "Received alert", nil, z.Str("alert", string(alert))) + log.Error(rootCtx, "Received alert", nil, z.Str("alert", alert)) fail = true } if fail { @@ -248,6 +260,7 @@ func execUp(ctx context.Context, dir string) error { "--remove-orphans", "--build", "--abort-on-container-exit", + "--quiet-pull", ) cmd.Dir = dir cmd.Stdout = os.Stdout diff --git a/testutil/compose/compose/smoke_internal_test.go b/testutil/compose/compose/smoke_internal_test.go new file mode 100644 index 000000000..51d1b07c0 --- /dev/null +++ b/testutil/compose/compose/smoke_internal_test.go @@ -0,0 +1,142 @@ +// Copyright © 2022 Obol Labs Inc. +// +// This program is free software: you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +package main + +import ( + "context" + "flag" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/obolnetwork/charon/testutil/compose" +) + +//go:generate go test . -run=TestSmoke -integration -v +var integration = flag.Bool("integration", false, "Enable docker based integration test") + +func TestSmoke(t *testing.T) { + if !*integration { + t.Skip("Skipping smoke integration test") + } + + tests := []struct { + Name string + ConfigFunc func(*compose.Config) + TmplFunc func(*compose.TmplData) + }{ + { + Name: "default alpha", + ConfigFunc: func(conf *compose.Config) { + conf.KeyGen = compose.KeyGenCreate + conf.FeatureSet = "alpha" + }, + }, + { + Name: "default beta", + ConfigFunc: func(conf *compose.Config) { + conf.KeyGen = compose.KeyGenCreate + conf.FeatureSet = "beta" + }, + }, + { + Name: "default stable", + ConfigFunc: func(conf *compose.Config) { + conf.KeyGen = compose.KeyGenCreate + conf.FeatureSet = "stable" + }, + }, + { + Name: "dkg", + ConfigFunc: func(conf *compose.Config) { + conf.KeyGen = compose.KeyGenDKG + }, + }, + { + Name: "very large dkg", + ConfigFunc: func(conf *compose.Config) { + conf.NumNodes = 21 + conf.Threshold = 14 + conf.NumValidators = 1000 + conf.KeyGen = compose.KeyGenDKG + }, + }, + { + Name: "version matrix", + TmplFunc: func(data *compose.TmplData) { + data.Nodes[0].ImageTag = "latest" + data.Nodes[1].ImageTag = "latest" + data.Nodes[2].ImageTag = "v0.5.0" // TODO(corver): Update this with new releases. + data.Nodes[3].ImageTag = "v0.5.0" + }, + }, + { + Name: "teku versions", // TODO(corver): Do the same for lighthouse. + ConfigFunc: func(conf *compose.Config) { + conf.VCs = []compose.VCType{compose.VCTeku} + }, + TmplFunc: func(data *compose.TmplData) { + data.VCs[0].Image = "consensys/teku:latest" + data.VCs[1].Image = "consensys/teku:22.5" + data.VCs[2].Image = "consensys/teku:22.4" + data.VCs[3].Image = "consensys/teku:22.3" + }, + }, + { + Name: "1 of 4 down", + TmplFunc: func(data *compose.TmplData) { + node0 := data.Nodes[0] + for i := 0; i < len(node0.EnvVars); i++ { + if strings.HasPrefix(node0.EnvVars[i].Key, "p2p") { + data.Nodes[0].EnvVars[i].Key = "unset" // Zero p2p flags to it cannot communicate + } + } + }, + }, + } + + for _, test := range tests { + t.Run(test.Name, func(t *testing.T) { + dir, err := os.MkdirTemp("", "") + require.NoError(t, err) + + conf := compose.NewDefaultConfig() + if test.ConfigFunc != nil { + test.ConfigFunc(&conf) + } + require.NoError(t, compose.WriteConfig(dir, conf)) + + cmd := newAutoCmd(func(data *compose.TmplData) { + data.MonitoringPorts = false + if test.TmplFunc != nil { + test.TmplFunc(data) + } + }) + require.NoError(t, cmd.Flags().Set("compose-dir", dir)) + require.NoError(t, cmd.Flags().Set("alert-timeout", "30s")) + + err = cmd.ExecuteContext(context.Background()) + require.NoError(t, err) + }) + } +} + +// TestFlagFalse ensures the integration flag default value is false. +func TestFlagFalse(t *testing.T) { + require.False(t, *integration) +} diff --git a/testutil/compose/compose_internal_test.go b/testutil/compose/compose_internal_test.go index 2464b6314..59f7e7096 100644 --- a/testutil/compose/compose_internal_test.go +++ b/testutil/compose/compose_internal_test.go @@ -39,14 +39,14 @@ func TestDockerCompose(t *testing.T) { { Name: "define dkg", ConfFunc: func(conf *Config) { - conf.KeyGen = keyGenDKG + conf.KeyGen = KeyGenDKG }, RunFunc: Define, }, { Name: "define create", ConfFunc: func(conf *Config) { - conf.KeyGen = keyGenCreate + conf.KeyGen = KeyGenCreate }, RunFunc: Define, }, @@ -54,7 +54,7 @@ func TestDockerCompose(t *testing.T) { Name: "lock dkg", ConfFunc: func(conf *Config) { conf.Step = stepDefined - conf.KeyGen = keyGenDKG + conf.KeyGen = KeyGenDKG }, RunFunc: Lock, }, @@ -62,7 +62,7 @@ func TestDockerCompose(t *testing.T) { Name: "lock create", ConfFunc: func(conf *Config) { conf.Step = stepDefined - conf.KeyGen = keyGenCreate + conf.KeyGen = KeyGenCreate }, RunFunc: Lock, }, @@ -110,6 +110,6 @@ func TestParseTemplate(t *testing.T) { _, err := template.New("").Parse(string(tmpl)) require.NoError(t, err) - _, err = getVC(vcTeku, 0, 1) + _, err = getVC(VCTeku, 0, 1) require.NoError(t, err) } diff --git a/testutil/compose/config.go b/testutil/compose/config.go index 58633d191..d9f1153a3 100644 --- a/testutil/compose/config.go +++ b/testutil/compose/config.go @@ -20,7 +20,7 @@ const ( configFile = "config.json" defaultImageTag = "latest" defaultBeaconNode = "mock" - defaultKeyGen = keyGenCreate + defaultKeyGen = KeyGenCreate defaultNumVals = 1 defaultNumNodes = 4 defaultThreshold = 3 @@ -35,21 +35,21 @@ const ( cmdCreateDKG = "[create,dkg]" ) -// vcType defines a validator client type. -type vcType string +// VCType defines a validator client type. +type VCType string const ( - vcMock vcType = "mock" - vcTeku vcType = "teku" - vcLighthouse vcType = "lighthouse" + VCMock VCType = "mock" + VCTeku VCType = "teku" + VCLighthouse VCType = "lighthouse" ) // KeyGen defines a key generation process. type KeyGen string const ( - keyGenDKG KeyGen = "dkg" - keyGenCreate KeyGen = "create" + KeyGenDKG KeyGen = "dkg" + KeyGenCreate KeyGen = "create" ) // step defines the current completed compose step. @@ -94,7 +94,7 @@ type Config struct { BeaconNode string `json:"beacon_node"` // VCs define the types of validator clients to use. - VCs []vcType `json:"validator_clients"` + VCs []VCType `json:"validator_clients"` // FeatureSet defines the minimum feature set to enable. FeatureSet string `json:"feature_set"` @@ -117,7 +117,7 @@ func NewDefaultConfig() Config { Threshold: defaultThreshold, NumValidators: defaultNumVals, ImageTag: defaultImageTag, - VCs: []vcType{vcTeku, vcLighthouse, vcMock}, + VCs: []VCType{VCTeku, VCLighthouse, VCMock}, KeyGen: defaultKeyGen, BeaconNode: defaultBeaconNode, Step: stepNew, diff --git a/testutil/compose/define.go b/testutil/compose/define.go index e56ccea9f..8c652bb0b 100644 --- a/testutil/compose/define.go +++ b/testutil/compose/define.go @@ -113,7 +113,7 @@ func Define(ctx context.Context, dir string, conf Config) (TmplData, error) { } var data TmplData - if conf.KeyGen == keyGenDKG { + if conf.KeyGen == KeyGenDKG { log.Info(ctx, "Creating node*/p2pkey for ENRs required for charon create dkg") // charon create dkg requires operator ENRs, so we need to create p2pkeys now. @@ -172,7 +172,7 @@ func Define(ctx context.Context, dir string, conf Config) (TmplData, error) { log.Info(ctx, "Creating config.json") conf.Step = stepDefined - if err := writeConfig(dir, conf); err != nil { + if err := WriteConfig(dir, conf); err != nil { return TmplData{}, err } @@ -349,8 +349,8 @@ func nodeFile(dir string, i int, file string) string { return path.Join(dir, fmt.Sprintf("node%d", i), file) } -// writeConfig writes the config as yaml to disk. -func writeConfig(dir string, conf Config) error { +// WriteConfig writes the config as yaml to disk. +func WriteConfig(dir string, conf Config) error { b, err := json.MarshalIndent(conf, "", " ") if err != nil { return errors.Wrap(err, "marshal config") diff --git a/testutil/compose/docker-compose.template b/testutil/compose/docker-compose.template index cd3b650a7..ed5074315 100644 --- a/testutil/compose/docker-compose.template +++ b/testutil/compose/docker-compose.template @@ -12,6 +12,8 @@ services: {{- range $i, $node := .Nodes}} node{{$i}}: <<: *node-base + {{if .ImageTag}}image: ghcr.io/obolnetwork/charon:{{.ImageTag}} + {{end -}} {{- if .EnvVars}} environment: {{- range $node.EnvVars}} @@ -58,28 +60,36 @@ services: {{if .Monitoring}} prometheus: image: prom/prometheus:latest + {{- if .MonitoringPorts}} ports: - "9090:9090" + {{end -}} networks: [compose] volumes: - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml grafana: image: grafana/grafana:latest + {{- if .MonitoringPorts}} ports: - "3000:3000" + {{end -}} networks: [compose] volumes: - ./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml - ./grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/datasource.yml + - ./grafana/notifiers.yml:/etc/grafana/provisioning/notifiers/notifiers.yml - ./grafana/grafana.ini:/etc/grafana/grafana.ini:ro - - ./grafana/simnet_dash.json:/etc/dashboards/simnet_dash.json + - ./grafana/dash_simnet.json:/etc/dashboards/dash_simnet.json + - ./grafana/dash_alerts.json:/etc/dashboards/dash_alerts.json jaeger: image: jaegertracing/all-in-one:latest networks: [compose] + {{- if .MonitoringPorts}} ports: - "16686:16686" + {{end -}} {{end}} networks: compose: diff --git a/testutil/compose/lock.go b/testutil/compose/lock.go index 535c20256..d435b9a9e 100644 --- a/testutil/compose/lock.go +++ b/testutil/compose/lock.go @@ -35,7 +35,7 @@ func Lock(ctx context.Context, dir string, conf Config) (TmplData, error) { var data TmplData switch conf.KeyGen { - case keyGenCreate: + case KeyGenCreate: splitKeysDir, err := getRelSplitKeysDir(dir, conf.SplitKeysDir) if err != nil { return TmplData{}, err @@ -59,7 +59,7 @@ func Lock(ctx context.Context, dir string, conf Config) (TmplData, error) { CharonCommand: cmdCreateCluster, Nodes: []node{n}, } - case keyGenDKG: + case KeyGenDKG: var nodes []node for i := 0; i < conf.NumNodes; i++ { @@ -83,7 +83,7 @@ func Lock(ctx context.Context, dir string, conf Config) (TmplData, error) { log.Info(ctx, "Create keys and cluster lock with: docker-compose up") conf.Step = stepLocked - if err := writeConfig(dir, conf); err != nil { + if err := WriteConfig(dir, conf); err != nil { return TmplData{}, err } @@ -104,7 +104,7 @@ func newNodeEnvs(index int, validatorMock bool, conf Config) []kv { } lockFile := "/compose/cluster-lock.json" - if conf.KeyGen == keyGenDKG { + if conf.KeyGen == KeyGenDKG { // Lock files for DKG in node dirs. lockFile = fmt.Sprintf("/compose/node%d/cluster-lock.json", index) } diff --git a/testutil/compose/new.go b/testutil/compose/new.go index f71d5bbf0..929d3a29e 100644 --- a/testutil/compose/new.go +++ b/testutil/compose/new.go @@ -36,5 +36,5 @@ func New(ctx context.Context, dir string, conf Config) error { z.Str("config", fmt.Sprintf("%#v", conf)), ) - return writeConfig(dir, conf) + return WriteConfig(dir, conf) } diff --git a/testutil/compose/run.go b/testutil/compose/run.go index ecdc11974..138f37a97 100644 --- a/testutil/compose/run.go +++ b/testutil/compose/run.go @@ -56,6 +56,7 @@ func Run(ctx context.Context, dir string, conf Config) (TmplData, error) { Nodes: nodes, Bootnode: true, Monitoring: true, + MonitoringPorts: true, VCs: vcs, } @@ -70,14 +71,14 @@ func Run(ctx context.Context, dir string, conf Config) (TmplData, error) { } // getVC returns the validator client template data for the provided type and index. -func getVC(typ vcType, nodeIdx int, numVals int) (vc, error) { - vcByType := map[vcType]vc{ - vcLighthouse: { - Label: string(vcLighthouse), +func getVC(typ VCType, nodeIdx int, numVals int) (vc, error) { + vcByType := map[VCType]vc{ + VCLighthouse: { + Label: string(VCLighthouse), Build: "lighthouse", }, - vcTeku: { - Label: string(vcTeku), + VCTeku: { + Label: string(VCTeku), Image: "consensys/teku:latest", Command: `| validator-client @@ -90,7 +91,7 @@ func getVC(typ vcType, nodeIdx int, numVals int) (vc, error) { } resp := vcByType[typ] - if typ == vcTeku { + if typ == VCTeku { var keys []string for i := 0; i < numVals; i++ { keys = append(keys, fmt.Sprintf("/compose/node%d/keystore-%d.json:/compose/node%d/keystore-%d.txt", nodeIdx, i, nodeIdx, i)) diff --git a/testutil/compose/static/grafana/dash_alerts.json b/testutil/compose/static/grafana/dash_alerts.json new file mode 100644 index 000000000..d0f616547 --- /dev/null +++ b/testutil/compose/static/grafana/dash_alerts.json @@ -0,0 +1,812 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 5 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0m", + "frequency": "15s", + "handler": 1, + "name": "Error Log Rate alert", + "noDataState": "ok", + "notifications": [] + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "increase(app_log_error_total[30s]) ", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 5, + "visible": true + } + ], + "title": "Error Log Rate", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 5 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0", + "frequency": "15s", + "handler": 1, + "name": "Warn Log Rate alert", + "noDataState": "ok", + "notifications": [] + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 9, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "increase(app_log_warn_total[30s]) ", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 5, + "visible": true + } + ], + "title": "Warn Log Rate", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 1 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0", + "frequency": "15s", + "handler": 1, + "name": "Validator API Error Rate alert", + "noDataState": "ok", + "notifications": [] + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 0, + "y": 6 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "increase(core_validatorapi_request_error_total{endpoint!=\"proxy\"}[30s]) ", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 1, + "visible": true + } + ], + "title": "Validator API Error Rate", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 5 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0s", + "frequency": "15s", + "handler": 1, + "name": "Proxy API Error Rate alert", + "noDataState": "ok", + "notifications": [] + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 9, + "y": 6 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "increase(core_validatorapi_request_error_total{endpoint=\"proxy\"}[30s]) ", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 5, + "visible": true + } + ], + "title": "Proxy API Error Rate", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.5 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0s", + "frequency": "30s", + "handler": 1, + "name": "Broadcast Duty Rate alert", + "noDataState": "ok", + "notifications": [] + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 0, + "y": 12 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "increase(core_bcast_broadcast_total[30s])", + "interval": "", + "legendFormat": "{{job}} - {{type}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "lt", + "value": 0.5, + "visible": true + } + ], + "title": "Broadcast Duty Rate", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 40 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0s", + "frequency": "15s", + "handler": 1, + "name": "Outstanding Duty Count alert", + "noDataState": "ok", + "notifications": [] + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 9, + "y": 12 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "core_bcast_broadcast_total - core_scheduler_duty_total", + "interval": "", + "legendFormat": "{{job}} - {{type}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 40, + "visible": true + } + ], + "title": "Outstanding Duty Count", + "type": "timeseries" + } + ], + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Alerts", + "uid": "a9mqsmrnk", + "version": 1, + "weekStart": "" +} diff --git a/testutil/compose/static/grafana/simnet_dash.json b/testutil/compose/static/grafana/dash_simnet.json similarity index 100% rename from testutil/compose/static/grafana/simnet_dash.json rename to testutil/compose/static/grafana/dash_simnet.json diff --git a/testutil/compose/static/grafana/grafana.ini b/testutil/compose/static/grafana/grafana.ini index f9a212b73..2ab0213ab 100644 --- a/testutil/compose/static/grafana/grafana.ini +++ b/testutil/compose/static/grafana/grafana.ini @@ -4,3 +4,6 @@ org_role = Admin [log] level = warn + +[unified_alerting] +enabled = false diff --git a/testutil/compose/static/grafana/notifiers.yml b/testutil/compose/static/grafana/notifiers.yml new file mode 100644 index 000000000..be27b1dc6 --- /dev/null +++ b/testutil/compose/static/grafana/notifiers.yml @@ -0,0 +1,14 @@ +apiVersion: 1 + +notifiers: + - name: compose-webhook + type: webhook + uid: compose-webhook + org_id: 1 + is_default: true + send_reminder: false + frequency: 1s + disable_resolve_message: true + settings: + url: http://host.docker.internal:26354 + httpMethod: POST diff --git a/testutil/compose/static/prometheus/prometheus.yml b/testutil/compose/static/prometheus/prometheus.yml index 1278b5931..b8e615254 100644 --- a/testutil/compose/static/prometheus/prometheus.yml +++ b/testutil/compose/static/prometheus/prometheus.yml @@ -1,6 +1,6 @@ global: - scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 5s # Evaluate rules every 15 seconds. The default is every 1 minute. scrape_configs: - job_name: 'node0' diff --git a/testutil/compose/template.go b/testutil/compose/template.go index 885af5c59..b8c6739b8 100644 --- a/testutil/compose/template.go +++ b/testutil/compose/template.go @@ -43,8 +43,9 @@ type TmplData struct { Nodes []node VCs []vc - Bootnode bool - Monitoring bool + Bootnode bool + Monitoring bool + MonitoringPorts bool } // vc represents a validator client service in a docker-compose.yml. @@ -58,8 +59,9 @@ type vc struct { // node represents a charon node service in a docker-compose.yml. type node struct { - EnvVars []kv - Ports []port + ImageTag string + EnvVars []kv + Ports []port } // kv is a key value pair. diff --git a/testutil/compose/testdata/TestDockerCompose_define_create_template.golden b/testutil/compose/testdata/TestDockerCompose_define_create_template.golden index 7f4908de5..3fbd40156 100644 --- a/testutil/compose/testdata/TestDockerCompose_define_create_template.golden +++ b/testutil/compose/testdata/TestDockerCompose_define_create_template.golden @@ -5,11 +5,13 @@ "CharonCommand": "No charon commands needed for keygen=create define step", "Nodes": [ { + "ImageTag": "", "EnvVars": null, "Ports": null } ], "VCs": null, "Bootnode": false, - "Monitoring": false + "Monitoring": false, + "MonitoringPorts": false } \ No newline at end of file diff --git a/testutil/compose/testdata/TestDockerCompose_define_create_yml.golden b/testutil/compose/testdata/TestDockerCompose_define_create_yml.golden index 94206ad0c..46b3e6886 100644 --- a/testutil/compose/testdata/TestDockerCompose_define_create_yml.golden +++ b/testutil/compose/testdata/TestDockerCompose_define_create_yml.golden @@ -11,5 +11,6 @@ x-node-base: &node-base services: node0: <<: *node-base + networks: compose: diff --git a/testutil/compose/testdata/TestDockerCompose_define_dkg_template.golden b/testutil/compose/testdata/TestDockerCompose_define_dkg_template.golden index 0c8b23af7..9c10e407a 100644 --- a/testutil/compose/testdata/TestDockerCompose_define_dkg_template.golden +++ b/testutil/compose/testdata/TestDockerCompose_define_dkg_template.golden @@ -5,6 +5,7 @@ "CharonCommand": "[create,dkg]", "Nodes": [ { + "ImageTag": "", "EnvVars": [ { "Key": "name", @@ -40,5 +41,6 @@ ], "VCs": null, "Bootnode": false, - "Monitoring": false + "Monitoring": false, + "MonitoringPorts": false } \ No newline at end of file diff --git a/testutil/compose/testdata/TestDockerCompose_define_dkg_yml.golden b/testutil/compose/testdata/TestDockerCompose_define_dkg_yml.golden index e01ef9d81..f0d1924e1 100644 --- a/testutil/compose/testdata/TestDockerCompose_define_dkg_yml.golden +++ b/testutil/compose/testdata/TestDockerCompose_define_dkg_yml.golden @@ -11,6 +11,7 @@ x-node-base: &node-base services: node0: <<: *node-base + environment: CHARON_NAME: compose CHARON_NUM_VALIDATORS: 1 diff --git a/testutil/compose/testdata/TestDockerCompose_lock_create_template.golden b/testutil/compose/testdata/TestDockerCompose_lock_create_template.golden index 73d5ebdc8..4a41dc11d 100644 --- a/testutil/compose/testdata/TestDockerCompose_lock_create_template.golden +++ b/testutil/compose/testdata/TestDockerCompose_lock_create_template.golden @@ -5,6 +5,7 @@ "CharonCommand": "[create,cluster]", "Nodes": [ { + "ImageTag": "", "EnvVars": [ { "Key": "threshold", @@ -32,5 +33,6 @@ ], "VCs": null, "Bootnode": false, - "Monitoring": false + "Monitoring": false, + "MonitoringPorts": false } \ No newline at end of file diff --git a/testutil/compose/testdata/TestDockerCompose_lock_create_yml.golden b/testutil/compose/testdata/TestDockerCompose_lock_create_yml.golden index e2621c74a..51a0a7d02 100644 --- a/testutil/compose/testdata/TestDockerCompose_lock_create_yml.golden +++ b/testutil/compose/testdata/TestDockerCompose_lock_create_yml.golden @@ -11,6 +11,7 @@ x-node-base: &node-base services: node0: <<: *node-base + environment: CHARON_THRESHOLD: 3 CHARON_NODES: 4 diff --git a/testutil/compose/testdata/TestDockerCompose_lock_dkg_template.golden b/testutil/compose/testdata/TestDockerCompose_lock_dkg_template.golden index febb03577..98005b66a 100644 --- a/testutil/compose/testdata/TestDockerCompose_lock_dkg_template.golden +++ b/testutil/compose/testdata/TestDockerCompose_lock_dkg_template.golden @@ -5,6 +5,7 @@ "CharonCommand": "dkg", "Nodes": [ { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -74,6 +75,7 @@ "Ports": null }, { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -143,6 +145,7 @@ "Ports": null }, { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -212,6 +215,7 @@ "Ports": null }, { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -283,5 +287,6 @@ ], "VCs": null, "Bootnode": true, - "Monitoring": false + "Monitoring": false, + "MonitoringPorts": false } \ No newline at end of file diff --git a/testutil/compose/testdata/TestDockerCompose_lock_dkg_yml.golden b/testutil/compose/testdata/TestDockerCompose_lock_dkg_yml.golden index cab63469c..f86171582 100644 --- a/testutil/compose/testdata/TestDockerCompose_lock_dkg_yml.golden +++ b/testutil/compose/testdata/TestDockerCompose_lock_dkg_yml.golden @@ -11,6 +11,7 @@ x-node-base: &node-base services: node0: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node0 CHARON_JAEGER_SERVICE: node0 @@ -31,6 +32,7 @@ services: node1: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node1 CHARON_JAEGER_SERVICE: node1 @@ -51,6 +53,7 @@ services: node2: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node2 CHARON_JAEGER_SERVICE: node2 @@ -71,6 +74,7 @@ services: node3: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node3 CHARON_JAEGER_SERVICE: node3 diff --git a/testutil/compose/testdata/TestDockerCompose_run_template.golden b/testutil/compose/testdata/TestDockerCompose_run_template.golden index 8a140a5f7..f4c71e566 100644 --- a/testutil/compose/testdata/TestDockerCompose_run_template.golden +++ b/testutil/compose/testdata/TestDockerCompose_run_template.golden @@ -5,6 +5,7 @@ "CharonCommand": "run", "Nodes": [ { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -74,6 +75,7 @@ "Ports": null }, { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -143,6 +145,7 @@ "Ports": null }, { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -212,6 +215,7 @@ "Ports": null }, { + "ImageTag": "", "EnvVars": [ { "Key": "data-dir", @@ -312,5 +316,6 @@ } ], "Bootnode": true, - "Monitoring": true + "Monitoring": true, + "MonitoringPorts": true } \ No newline at end of file diff --git a/testutil/compose/testdata/TestDockerCompose_run_yml.golden b/testutil/compose/testdata/TestDockerCompose_run_yml.golden index 8382254df..e4ee23be9 100644 --- a/testutil/compose/testdata/TestDockerCompose_run_yml.golden +++ b/testutil/compose/testdata/TestDockerCompose_run_yml.golden @@ -11,6 +11,7 @@ x-node-base: &node-base services: node0: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node0 CHARON_JAEGER_SERVICE: node0 @@ -31,6 +32,7 @@ services: node1: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node1 CHARON_JAEGER_SERVICE: node1 @@ -51,6 +53,7 @@ services: node2: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node2 CHARON_JAEGER_SERVICE: node2 @@ -71,6 +74,7 @@ services: node3: <<: *node-base + environment: CHARON_DATA_DIR: /compose/node3 CHARON_JAEGER_SERVICE: node3 @@ -158,14 +162,16 @@ services: volumes: - ./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml - ./grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/datasource.yml + - ./grafana/notifiers.yml:/etc/grafana/provisioning/notifiers/notifiers.yml - ./grafana/grafana.ini:/etc/grafana/grafana.ini:ro - - ./grafana/simnet_dash.json:/etc/dashboards/simnet_dash.json + - ./grafana/dash_simnet.json:/etc/dashboards/dash_simnet.json + - ./grafana/dash_alerts.json:/etc/dashboards/dash_alerts.json jaeger: image: jaegertracing/all-in-one:latest networks: [compose] ports: - "16686:16686" - + networks: compose: