forked from hashicorp/consul
-
Notifications
You must be signed in to change notification settings - Fork 0
/
acl.go
439 lines (391 loc) · 11.5 KB
/
acl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
package agent
import (
"fmt"
"sync"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/golang-lru"
"github.com/hashicorp/serf/serf"
)
// There's enough behavior difference with client-side ACLs that we've
// intentionally kept this code separate from the server-side ACL code in
// consul/acl.go. We may refactor some of the caching logic in the future,
// but for now we are developing this separately to see how things shake out.
const (
// anonymousToken is the token ID we re-write to if there is no token ID
// provided.
anonymousToken = "anonymous"
// Maximum number of cached ACL entries.
aclCacheSize = 10 * 1024
)
// aclCacheEntry is used to cache ACL tokens.
type aclCacheEntry struct {
// ACL is the cached ACL.
ACL acl.ACL
// Expires is set based on the TTL for the ACL.
Expires time.Time
// ETag is used as an optimization when fetching ACLs from servers to
// avoid transmitting data back when the agent has a good copy, which is
// usually the case when refreshing a TTL.
ETag string
}
// aclManager is used by the agent to keep track of state related to ACLs,
// including caching tokens from the servers. This has some internal state that
// we don't want to dump into the agent itself.
type aclManager struct {
// acls is a cache mapping ACL tokens to compiled policies.
acls *lru.TwoQueueCache
// master is the ACL to use when the agent master token is supplied.
master acl.ACL
// down is the ACL to use when the servers are down. This may be nil
// which means to try and use the cached policy if there is one (or
// deny if there isn't a policy in the cache).
down acl.ACL
// disabled is used to keep track of feedback from the servers that ACLs
// are disabled. If the manager discovers that ACLs are disabled, this
// will be set to the next time we should check to see if they have been
// enabled. This helps cut useless traffic, but allows us to turn on ACL
// support at the servers without having to restart the whole cluster.
disabled time.Time
disabledLock sync.RWMutex
}
// newACLManager returns an ACL manager based on the given config.
func newACLManager(config *config.RuntimeConfig) (*aclManager, error) {
// Set up the cache from ID to ACL (we don't cache policies like the
// servers; only one level).
acls, err := lru.New2Q(aclCacheSize)
if err != nil {
return nil, err
}
// Build a policy for the agent master token.
policy := &acl.Policy{
Agents: []*acl.AgentPolicy{
&acl.AgentPolicy{
Node: config.NodeName,
Policy: acl.PolicyWrite,
},
},
Nodes: []*acl.NodePolicy{
&acl.NodePolicy{
Name: "",
Policy: acl.PolicyRead,
},
},
}
master, err := acl.New(acl.DenyAll(), policy, nil)
if err != nil {
return nil, err
}
var down acl.ACL
switch config.ACLDownPolicy {
case "allow":
down = acl.AllowAll()
case "deny":
down = acl.DenyAll()
case "extend-cache":
// Leave the down policy as nil to signal this.
default:
return nil, fmt.Errorf("invalid ACL down policy %q", config.ACLDownPolicy)
}
// Give back a manager.
return &aclManager{
acls: acls,
master: master,
down: down,
}, nil
}
// isDisabled returns true if the manager has discovered that ACLs are disabled
// on the servers.
func (m *aclManager) isDisabled() bool {
m.disabledLock.RLock()
defer m.disabledLock.RUnlock()
return time.Now().Before(m.disabled)
}
// lookupACL attempts to locate the compiled policy associated with the given
// token. The agent may be used to perform RPC calls to the servers to fetch
// policies that aren't in the cache.
func (m *aclManager) lookupACL(a *Agent, id string) (acl.ACL, error) {
// Handle some special cases for the ID.
if len(id) == 0 {
id = anonymousToken
} else if acl.RootACL(id) != nil {
return nil, acl.ErrRootDenied
} else if a.tokens.IsAgentMasterToken(id) {
return m.master, nil
}
// Try the cache first.
var cached *aclCacheEntry
if raw, ok := m.acls.Get(id); ok {
cached = raw.(*aclCacheEntry)
}
if cached != nil && time.Now().Before(cached.Expires) {
metrics.IncrCounter([]string{"acl", "cache_hit"}, 1)
return cached.ACL, nil
}
metrics.IncrCounter([]string{"acl", "cache_miss"}, 1)
// At this point we might have a stale cached ACL, or none at all, so
// try to contact the servers.
args := structs.ACLPolicyRequest{
Datacenter: a.config.ACLDatacenter,
ACL: id,
}
if cached != nil {
args.ETag = cached.ETag
}
var reply structs.ACLPolicy
err := a.RPC("ACL.GetPolicy", &args, &reply)
if err != nil {
if acl.IsErrDisabled(err) {
a.logger.Printf("[DEBUG] agent: ACLs disabled on servers, will check again after %s", a.config.ACLDisabledTTL)
m.disabledLock.Lock()
m.disabled = time.Now().Add(a.config.ACLDisabledTTL)
m.disabledLock.Unlock()
return nil, nil
} else if acl.IsErrNotFound(err) {
return nil, acl.ErrNotFound
} else {
a.logger.Printf("[DEBUG] agent: Failed to get policy for ACL from servers: %v", err)
if m.down != nil {
return m.down, nil
} else if cached != nil {
return cached.ACL, nil
} else {
return acl.DenyAll(), nil
}
}
}
// Use the old cached compiled ACL if we can, otherwise compile it and
// resolve any parents.
var compiled acl.ACL
if cached != nil && cached.ETag == reply.ETag {
compiled = cached.ACL
} else {
parent := acl.RootACL(reply.Parent)
if parent == nil {
parent, err = m.lookupACL(a, reply.Parent)
if err != nil {
return nil, err
}
}
acl, err := acl.New(parent, reply.Policy, nil)
if err != nil {
return nil, err
}
compiled = acl
}
// Update the cache.
cached = &aclCacheEntry{
ACL: compiled,
ETag: reply.ETag,
}
if reply.TTL > 0 {
cached.Expires = time.Now().Add(reply.TTL)
}
m.acls.Add(id, cached)
return compiled, nil
}
// resolveToken is the primary interface used by ACL-checkers in the agent
// endpoints, which is the one place where we do some ACL enforcement on
// clients. Some of the enforcement is normative (e.g. self and monitor)
// and some is informative (e.g. catalog and health).
func (a *Agent) resolveToken(id string) (acl.ACL, error) {
// Disable ACLs if version 8 enforcement isn't enabled.
if !a.config.ACLEnforceVersion8 {
return nil, nil
}
// Bail if there's no ACL datacenter configured. This means that agent
// enforcement isn't on.
if a.config.ACLDatacenter == "" {
return nil, nil
}
// Bail if the ACL manager is disabled. This happens if it gets feedback
// from the servers that ACLs are disabled.
if a.acls.isDisabled() {
return nil, nil
}
// This will look in the cache and fetch from the servers if necessary.
return a.acls.lookupACL(a, id)
}
// vetServiceRegister makes sure the service registration action is allowed by
// the given token.
func (a *Agent) vetServiceRegister(token string, service *structs.NodeService) error {
// Resolve the token and bail if ACLs aren't enabled.
rule, err := a.resolveToken(token)
if err != nil {
return err
}
if rule == nil {
return nil
}
// Vet the service itself.
if !rule.ServiceWrite(service.Service, nil) {
return acl.ErrPermissionDenied
}
// Vet any service that might be getting overwritten.
services := a.State.Services()
if existing, ok := services[service.ID]; ok {
if !rule.ServiceWrite(existing.Service, nil) {
return acl.ErrPermissionDenied
}
}
return nil
}
// vetServiceUpdate makes sure the service update action is allowed by the given
// token.
func (a *Agent) vetServiceUpdate(token string, serviceID string) error {
// Resolve the token and bail if ACLs aren't enabled.
rule, err := a.resolveToken(token)
if err != nil {
return err
}
if rule == nil {
return nil
}
// Vet any changes based on the existing services's info.
services := a.State.Services()
if existing, ok := services[serviceID]; ok {
if !rule.ServiceWrite(existing.Service, nil) {
return acl.ErrPermissionDenied
}
} else {
return fmt.Errorf("Unknown service %q", serviceID)
}
return nil
}
// vetCheckRegister makes sure the check registration action is allowed by the
// given token.
func (a *Agent) vetCheckRegister(token string, check *structs.HealthCheck) error {
// Resolve the token and bail if ACLs aren't enabled.
rule, err := a.resolveToken(token)
if err != nil {
return err
}
if rule == nil {
return nil
}
// Vet the check itself.
if len(check.ServiceName) > 0 {
if !rule.ServiceWrite(check.ServiceName, nil) {
return acl.ErrPermissionDenied
}
} else {
if !rule.NodeWrite(a.config.NodeName, nil) {
return acl.ErrPermissionDenied
}
}
// Vet any check that might be getting overwritten.
checks := a.State.Checks()
if existing, ok := checks[check.CheckID]; ok {
if len(existing.ServiceName) > 0 {
if !rule.ServiceWrite(existing.ServiceName, nil) {
return acl.ErrPermissionDenied
}
} else {
if !rule.NodeWrite(a.config.NodeName, nil) {
return acl.ErrPermissionDenied
}
}
}
return nil
}
// vetCheckUpdate makes sure that a check update is allowed by the given token.
func (a *Agent) vetCheckUpdate(token string, checkID types.CheckID) error {
// Resolve the token and bail if ACLs aren't enabled.
rule, err := a.resolveToken(token)
if err != nil {
return err
}
if rule == nil {
return nil
}
// Vet any changes based on the existing check's info.
checks := a.State.Checks()
if existing, ok := checks[checkID]; ok {
if len(existing.ServiceName) > 0 {
if !rule.ServiceWrite(existing.ServiceName, nil) {
return acl.ErrPermissionDenied
}
} else {
if !rule.NodeWrite(a.config.NodeName, nil) {
return acl.ErrPermissionDenied
}
}
} else {
return fmt.Errorf("Unknown check %q", checkID)
}
return nil
}
// filterMembers redacts members that the token doesn't have access to.
func (a *Agent) filterMembers(token string, members *[]serf.Member) error {
// Resolve the token and bail if ACLs aren't enabled.
rule, err := a.resolveToken(token)
if err != nil {
return err
}
if rule == nil {
return nil
}
// Filter out members based on the node policy.
m := *members
for i := 0; i < len(m); i++ {
node := m[i].Name
if rule.NodeRead(node) {
continue
}
a.logger.Printf("[DEBUG] agent: dropping node %q from result due to ACLs", node)
m = append(m[:i], m[i+1:]...)
i--
}
*members = m
return nil
}
// filterServices redacts services that the token doesn't have access to.
func (a *Agent) filterServices(token string, services *map[string]*structs.NodeService) error {
// Resolve the token and bail if ACLs aren't enabled.
rule, err := a.resolveToken(token)
if err != nil {
return err
}
if rule == nil {
return nil
}
// Filter out services based on the service policy.
for id, service := range *services {
if rule.ServiceRead(service.Service) {
continue
}
a.logger.Printf("[DEBUG] agent: dropping service %q from result due to ACLs", id)
delete(*services, id)
}
return nil
}
// filterChecks redacts checks that the token doesn't have access to.
func (a *Agent) filterChecks(token string, checks *map[types.CheckID]*structs.HealthCheck) error {
// Resolve the token and bail if ACLs aren't enabled.
rule, err := a.resolveToken(token)
if err != nil {
return err
}
if rule == nil {
return nil
}
// Filter out checks based on the node or service policy.
for id, check := range *checks {
if len(check.ServiceName) > 0 {
if rule.ServiceRead(check.ServiceName) {
continue
}
} else {
if rule.NodeRead(a.config.NodeName) {
continue
}
}
a.logger.Printf("[DEBUG] agent: dropping check %q from result due to ACLs", id)
delete(*checks, id)
}
return nil
}