-
Notifications
You must be signed in to change notification settings - Fork 15
/
upstream_monitor.go
101 lines (87 loc) · 2.64 KB
/
upstream_monitor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package cli
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/miekg/dns"
"github.com/Control-D-Inc/ctrld"
)
const (
// maxFailureRequest is the maximum failed queries allowed before an upstream is marked as down.
maxFailureRequest = 100
// checkUpstreamBackoffSleep is the time interval between each upstream checks.
checkUpstreamBackoffSleep = 2 * time.Second
)
// upstreamMonitor performs monitoring upstreams health.
type upstreamMonitor struct {
cfg *ctrld.Config
down map[string]*atomic.Bool
failureReq map[string]*atomic.Uint64
mu sync.Mutex
checking map[string]bool
}
func newUpstreamMonitor(cfg *ctrld.Config) *upstreamMonitor {
um := &upstreamMonitor{
cfg: cfg,
down: make(map[string]*atomic.Bool),
failureReq: make(map[string]*atomic.Uint64),
checking: make(map[string]bool),
}
for n := range cfg.Upstream {
upstream := upstreamPrefix + n
um.down[upstream] = new(atomic.Bool)
um.failureReq[upstream] = new(atomic.Uint64)
}
um.down[upstreamOS] = new(atomic.Bool)
um.failureReq[upstreamOS] = new(atomic.Uint64)
return um
}
// increaseFailureCount increase failed queries count for an upstream by 1.
func (um *upstreamMonitor) increaseFailureCount(upstream string) {
failedCount := um.failureReq[upstream].Add(1)
um.down[upstream].Store(failedCount >= maxFailureRequest)
}
// isDown reports whether the given upstream is being marked as down.
func (um *upstreamMonitor) isDown(upstream string) bool {
return um.down[upstream].Load()
}
// reset marks an upstream as up and set failed queries counter to zero.
func (um *upstreamMonitor) reset(upstream string) {
um.failureReq[upstream].Store(0)
um.down[upstream].Store(false)
}
// checkUpstream checks the given upstream status, periodically sending query to upstream
// until successfully. An upstream status/counter will be reset once it becomes reachable.
func (um *upstreamMonitor) checkUpstream(upstream string, uc *ctrld.UpstreamConfig) {
um.mu.Lock()
isChecking := um.checking[upstream]
if isChecking {
um.mu.Unlock()
return
}
um.checking[upstream] = true
um.mu.Unlock()
resolver, err := ctrld.NewResolver(uc)
if err != nil {
mainLog.Load().Warn().Err(err).Msg("could not check upstream")
return
}
msg := new(dns.Msg)
msg.SetQuestion(".", dns.TypeNS)
check := func() error {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
uc.ReBootstrap()
_, err := resolver.Resolve(ctx, msg)
return err
}
for {
if err := check(); err == nil {
mainLog.Load().Debug().Msgf("upstream %q is online", uc.Endpoint)
um.reset(upstream)
return
}
time.Sleep(checkUpstreamBackoffSleep)
}
}