/
forwarder_health.go
220 lines (184 loc) · 5.67 KB
/
forwarder_health.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-2020 Datadog, Inc.
package forwarder
import (
"expvar"
"fmt"
"net/http"
"regexp"
"time"
"github.com/DataDog/datadog-agent/pkg/status/health"
httputils "github.com/DataDog/datadog-agent/pkg/util/http"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/version"
)
const (
fakeAPIKey = "00000000000000000000000000000000"
)
var (
apiKeyStatusUnknown = expvar.String{}
apiKeyInvalid = expvar.String{}
apiKeyValid = expvar.String{}
apiKeyFake = expvar.String{}
validateAPIKeyTimeout = 10 * time.Second
apiKeyStatus = expvar.Map{}
)
func init() {
apiKeyStatusUnknown.Set("Unable to validate API Key")
apiKeyInvalid.Set("API Key invalid")
apiKeyValid.Set("API Key valid")
apiKeyFake.Set("Fake API Key that skips validation")
}
func initForwarderHealthExpvars() {
apiKeyStatus.Init()
forwarderExpvars.Set("APIKeyStatus", &apiKeyStatus)
}
// forwarderHealth report the health status of the Forwarder. A Forwarder is
// unhealthy if the API keys are not longer valid
type forwarderHealth struct {
health *health.Handle
stop chan bool
stopped chan struct{}
timeout time.Duration
keysPerDomains map[string][]string
keysPerAPIEndpoint map[string][]string
disableAPIKeyChecking bool
validationInterval time.Duration
}
func (fh *forwarderHealth) init() {
fh.stop = make(chan bool, 1)
fh.stopped = make(chan struct{})
fh.keysPerAPIEndpoint = make(map[string][]string)
fh.computeDomainsURL()
// Since timeout is the maximum duration we can wait, we need to divide it
// by the total number of api keys to obtain the max duration for each key
apiKeyCount := 0
for _, apiKeys := range fh.keysPerDomains {
apiKeyCount += len(apiKeys)
}
fh.timeout = validateAPIKeyTimeout
if apiKeyCount != 0 {
fh.timeout /= time.Duration(apiKeyCount)
}
}
func (fh *forwarderHealth) Start() {
if fh.disableAPIKeyChecking {
return
}
fh.health = health.RegisterReadiness("forwarder")
fh.init()
go fh.healthCheckLoop()
}
func (fh *forwarderHealth) Stop() {
if fh.disableAPIKeyChecking {
return
}
fh.health.Deregister() //nolint:errcheck
fh.stop <- true
<-fh.stopped
}
func (fh *forwarderHealth) healthCheckLoop() {
log.Debug("Waiting for APIkey validity to be confirmed.")
validateTicker := time.NewTicker(fh.validationInterval)
defer validateTicker.Stop()
defer close(fh.stopped)
valid := fh.hasValidAPIKey()
// If no key is valid, no need to keep checking, they won't magically become valid
if !valid {
log.Errorf("No valid api key found, reporting the forwarder as unhealthy.")
return
}
for {
select {
case <-fh.stop:
return
case <-validateTicker.C:
valid := fh.hasValidAPIKey()
if !valid {
log.Errorf("No valid api key found, reporting the forwarder as unhealthy.")
return
}
case <-fh.health.C:
}
}
}
// computeDomainsURL populates a map containing API Endpoints per API keys that belongs to the forwarderHealth struct
func (fh *forwarderHealth) computeDomainsURL() {
for domain, apiKeys := range fh.keysPerDomains {
apiDomain := ""
re := regexp.MustCompile("datadoghq.[a-z]*")
if re.MatchString(domain) {
apiDomain = "https://api." + re.FindString(domain)
} else {
apiDomain = domain
}
fh.keysPerAPIEndpoint[apiDomain] = append(fh.keysPerAPIEndpoint[apiDomain], apiKeys...)
}
}
func (fh *forwarderHealth) setAPIKeyStatus(apiKey string, domain string, status expvar.Var) {
if len(apiKey) > 5 {
apiKey = apiKey[len(apiKey)-5:]
}
obfuscatedKey := fmt.Sprintf("API key ending with %s", apiKey)
apiKeyStatus.Set(obfuscatedKey, status)
}
func (fh *forwarderHealth) validateAPIKey(apiKey, domain string) (bool, error) {
if apiKey == fakeAPIKey {
fh.setAPIKeyStatus(apiKey, domain, &apiKeyFake)
return true, nil
}
url := fmt.Sprintf("%s%s?api_key=%s", domain, v1ValidateEndpoint, apiKey)
transport := httputils.CreateHTTPTransport()
client := &http.Client{
Transport: transport,
Timeout: fh.timeout,
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
fh.setAPIKeyStatus(apiKey, domain, &apiKeyStatusUnknown)
return false, err
}
req.Header.Set(useragentHTTPHeaderKey, fmt.Sprintf("datadog-agent/%s", version.AgentVersion))
resp, err := client.Do(req)
if err != nil {
fh.setAPIKeyStatus(apiKey, domain, &apiKeyStatusUnknown)
return false, err
}
defer resp.Body.Close()
// Server will respond 200 if the key is valid or 403 if invalid
if resp.StatusCode == 200 {
fh.setAPIKeyStatus(apiKey, domain, &apiKeyValid)
return true, nil
} else if resp.StatusCode == 403 {
fh.setAPIKeyStatus(apiKey, domain, &apiKeyInvalid)
return false, nil
}
fh.setAPIKeyStatus(apiKey, domain, &apiKeyStatusUnknown)
return false, fmt.Errorf("Unexpected response code from the apikey validation endpoint: %v", resp.StatusCode)
}
func (fh *forwarderHealth) hasValidAPIKey() bool {
validKey := false
apiError := false
for domain, apiKeys := range fh.keysPerAPIEndpoint {
for _, apiKey := range apiKeys {
v, err := fh.validateAPIKey(apiKey, domain)
if err != nil {
log.Debug(err)
apiError = true
} else if v {
log.Debugf("api_key '%s' for domain %s is valid", apiKey, domain)
validKey = true
} else {
log.Warnf("api_key '%s' for domain %s is invalid", apiKey, domain)
}
}
}
// If there is an error during the api call, we assume that there is a
// valid key to avoid killing lots of agent on an outage.
if apiError {
return true
}
return validKey
}