forked from ava-labs/avalanchego
-
Notifications
You must be signed in to change notification settings - Fork 4
/
health.go
114 lines (99 loc) · 3.46 KB
/
health.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Copyright (C) 2019-2022, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.
package platformvm
import (
"context"
"errors"
"fmt"
"strings"
"time"
"go.uber.org/zap"
"github.com/MetalBlockchain/metalgo/database"
"github.com/MetalBlockchain/metalgo/utils/constants"
)
const fallbackMinPercentConnected = 0.8
var errNotEnoughStake = errors.New("not connected to enough stake")
func (vm *VM) HealthCheck(context.Context) (interface{}, error) {
// Returns nil if this node is connected to > alpha percent of the Primary Network's stake
primaryPercentConnected, err := vm.getPercentConnected(constants.PrimaryNetworkID)
if err != nil {
return nil, fmt.Errorf("couldn't get percent connected: %w", err)
}
vm.metrics.SetPercentConnected(primaryPercentConnected)
details := map[string]float64{
"primary-percentConnected": primaryPercentConnected,
}
localPrimaryValidator, err := vm.state.GetCurrentValidator(
constants.PrimaryNetworkID,
vm.ctx.NodeID,
)
switch err {
case nil:
vm.metrics.SetTimeUntilUnstake(time.Until(localPrimaryValidator.EndTime))
case database.ErrNotFound:
vm.metrics.SetTimeUntilUnstake(0)
default:
return nil, fmt.Errorf("couldn't get current local validator: %w", err)
}
primaryMinPercentConnected, ok := vm.MinPercentConnectedStakeHealthy[constants.PrimaryNetworkID]
if !ok {
// This should never happen according to the comment for
// [MinPercentConnectedStakeHealthy] but we include it here to avoid the
// situation where a regression causes the key to be missing so that we
// don't accidentally set [primaryMinPercentConnected] to 0.
vm.ctx.Log.Warn("primary network min connected stake not given",
zap.Float64("fallback value", fallbackMinPercentConnected),
)
primaryMinPercentConnected = fallbackMinPercentConnected
}
var errorReasons []string
if primaryPercentConnected < primaryMinPercentConnected {
errorReasons = append(errorReasons,
fmt.Sprintf("connected to %f%% of primary network stake; should be connected to at least %f%%",
primaryPercentConnected*100,
primaryMinPercentConnected*100,
),
)
}
for subnetID := range vm.TrackedSubnets {
percentConnected, err := vm.getPercentConnected(subnetID)
if err != nil {
return nil, fmt.Errorf("couldn't get percent connected for %q: %w", subnetID, err)
}
minPercentConnected, ok := vm.MinPercentConnectedStakeHealthy[subnetID]
if !ok {
minPercentConnected = primaryMinPercentConnected
}
vm.metrics.SetSubnetPercentConnected(subnetID, percentConnected)
key := fmt.Sprintf("%s-percentConnected", subnetID)
details[key] = percentConnected
localSubnetValidator, err := vm.state.GetCurrentValidator(
subnetID,
vm.ctx.NodeID,
)
switch err {
case nil:
vm.metrics.SetTimeUntilSubnetUnstake(subnetID, time.Until(localSubnetValidator.EndTime))
case database.ErrNotFound:
vm.metrics.SetTimeUntilSubnetUnstake(subnetID, 0)
default:
return nil, fmt.Errorf("couldn't get current subnet validator of %q: %w", subnetID, err)
}
if percentConnected < minPercentConnected {
errorReasons = append(errorReasons,
fmt.Sprintf("connected to %f%% of %q weight; should be connected to at least %f%%",
percentConnected*100,
subnetID,
minPercentConnected*100,
),
)
}
}
if len(errorReasons) == 0 || !vm.StakingEnabled {
return details, nil
}
return details, fmt.Errorf("platform layer is unhealthy err: %w, details: %s",
errNotEnoughStake,
strings.Join(errorReasons, ", "),
)
}