Skip to content

Commit 8da7e0e

Browse files
committed
security: sign and verify redirect URLs (part six); dpq
* cluster-key: - skip signing requests when csk version is zero (ie., not generated) - can legally happen since primary keeps driving certain startup action _after_ datapath is already enabled ----- * datapath query (dpq): - errors to include: tag, key=value - remove count; refactor ----- * prev. commit: f616e23 Signed-off-by: Alex Aizman <alex.aizman@gmail.com>
1 parent ebdc6ed commit 8da7e0e

File tree

3 files changed

+48
-23
lines changed

3 files changed

+48
-23
lines changed

ais/csk.go

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ import (
1717
"strconv"
1818
"sync"
1919
"sync/atomic"
20+
"time"
2021

2122
"github.com/NVIDIA/aistore/cmn/cos"
2223
"github.com/NVIDIA/aistore/cmn/debug"
2324
"github.com/NVIDIA/aistore/cmn/mono"
25+
"github.com/NVIDIA/aistore/cmn/nlog"
2426
"github.com/NVIDIA/aistore/memsys"
2527
)
2628

@@ -46,18 +48,21 @@ import (
4648
// and compares it against the provided signature. Any mismatch results in a 401.
4749

4850
// TODO: sign.verify()
51+
// - check that csk is always initialized across (enable/disable; lifecycle events)
52+
// (see related sign.warn())
4953
// - anti-replay logic (sliding time window, per-sender nonce tracking, etc.)
5054
// - validate pid: a) != "" (weak) or b) in Smap (strong)
51-
// - stricter handling for legacy unsigned redirects
5255
// - optionally, extend HMAC payload to cover assorted query parameters
5356

5457
const (
55-
cskTag = "csk"
56-
cskKeyLen = 16 // len(secret)
57-
cskSigLen = 43 // = base64.RawURLEncoding.EncodedLen(sha256.Size)
58-
cskSepa = 0 // to separate strings in HMAC payload
59-
cskBase = 36 // base for all signed int64/uint64 fields
60-
cskURLOverhead = 160 // pid+utm+vpams+x+u qparams (~145 worst-case)
58+
cskTag = "csk"
59+
60+
cskKeyLen = 16 // len(secret)
61+
cskSigLen = 43 // = base64.RawURLEncoding.EncodedLen(sha256.Size)
62+
cskSepa = 0 // to separate strings in HMAC payload
63+
cskBase = 36 // base for all signed int64/uint64 fields
64+
cskURLOverhead = 160 // pid+utm+vpams+x+u qparams (~145 worst-case)
65+
cskUptime = 10 * time.Second // cluster uptime after which we start warning if CSK version remains zero
6166
)
6267

6368
type (
@@ -157,10 +162,16 @@ func (sign *signer) compute(pid string) {
157162
binary.BigEndian.PutUint64(b8[:], sign.nonce)
158163
sb.WriteBytes(b8[:])
159164

165+
// load key
166+
k := sign.h.owner.csk.load()
167+
if k.ver == 0 {
168+
sign.warn()
169+
return
170+
}
171+
debug.Assert(len(k.secret) > 0, k.String())
172+
160173
// hash
161174
hb := handAlloc()
162-
k := sign.h.owner.csk.load()
163-
debug.Assert(k.ver > 0 && len(k.secret) > 0)
164175
if hb.h == nil || hb.ver != k.ver {
165176
hb.h = hmac.New(sha256.New, k.secret)
166177
hb.ver = k.ver
@@ -178,6 +189,19 @@ func (sign *signer) compute(pid string) {
178189
handFree(hb)
179190
}
180191

192+
func (sign *signer) warn() {
193+
const skip = "skip HMAC sign (cluster-key version is zero)"
194+
uptime := sign.h.keepalive.cluUptime(mono.NanoTime())
195+
switch {
196+
case uptime < cskUptime:
197+
// expected during early startup, stay quiet
198+
case uptime < cskUptime<<1:
199+
nlog.Warningln(sign.h.String(), skip)
200+
case uptime < min(time.Minute, cskUptime<<2):
201+
nlog.Errorln(sign.h.String(), skip)
202+
}
203+
}
204+
181205
func (sign *signer) buildURL(nodeURL string, now int64) string {
182206
var (
183207
h = sign.h

ais/dpq.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ import (
4949
// Note: it's always a good idea to explicitly add each and every new query into one of the
5050
// non-default categories, potentially 3, 4, or 5.
5151

52+
const dpqTag = "dpq"
53+
5254
type (
5355
cskgrp struct {
5456
nonce uint64 // QparamNonce
@@ -73,8 +75,6 @@ type (
7375
}
7476
csk cskgrp // csk envelope (group CSK/HMAC)
7577

76-
count int
77-
7878
// boolean fields
7979
skipVC bool // QparamSkipVC (skip loading existing object's metadata)
8080
isGFN bool // QparamIsGFNRequest
@@ -123,7 +123,7 @@ func dpqAlloc() (d *dpq) {
123123
}
124124

125125
func dpqFree(d *dpq) {
126-
c := d.count
126+
c := len(d.m)
127127
m := d.m
128128

129129
clear(m)
@@ -143,7 +143,6 @@ func (dpq *dpq) parse(rawQuery string) error {
143143
query = rawQuery // r.URL.RawQuery
144144
iters int
145145
)
146-
dpq.count = 0
147146
for query != "" && iters < maxDpqCap {
148147
var (
149148
err error
@@ -210,7 +209,6 @@ func (dpq *dpq) parse(rawQuery string) error {
210209
case apc.QparamMptUploadID, apc.QparamMptPartNo, apc.QparamFltPresence, apc.QparamBinfoWithOrWithoutRemote,
211210
apc.QparamAppendType, apc.QparamETLName, apc.QparamETLTransformArgs:
212211
dpq.m[key] = value
213-
dpq.count++
214212

215213
// Finally, assorted named exceptions that we simply skip, and b) all the rest parameters
216214
default:
@@ -224,17 +222,19 @@ func (dpq *dpq) parse(rawQuery string) error {
224222

225223
// Default: unescape and store in map
226224
dpq.m[key], err = _unescape(value)
227-
dpq.count++
228225
}
229226

230227
// common error return
231228
if err != nil {
232-
return err
229+
if errors.Is(err, strconv.ErrSyntax) {
230+
err = strconv.ErrSyntax
231+
}
232+
return fmt.Errorf("%s: '%s=%s', err: %v", dpqTag, key, value, err)
233233
}
234234
iters++
235235
}
236236
if iters >= maxDpqCap {
237-
return errors.New("exceeded max number of dpq iterations: " + strconv.Itoa(iters))
237+
return fmt.Errorf("%s: exceeded maximum number of iterations (%d)", dpqTag, iters)
238238
}
239239

240240
return nil
@@ -281,7 +281,7 @@ func (dpq *dpq) _arch(key, val string) (err error) {
281281
}
282282
// either/or
283283
if dpq.arch.path != "" && dpq.arch.mmode != "" { // (empty arch.regx is fine - is EmptyMatchAny)
284-
err = fmt.Errorf("query parameters archpath=%q (match one) and archregx=%q (match many) are mutually exclusive",
284+
err = fmt.Errorf("%s: archpath=%q (match one) and archregx=%q (match many) are mutually exclusive", dpqTag,
285285
apc.QparamArchpath, apc.QparamArchregx)
286286
}
287287
return err
@@ -309,24 +309,25 @@ func (dpq *dpq) _archstr() string {
309309
//
310310

311311
func cskFromQ(q url.Values) (*cskgrp, error) {
312+
const tag = "from-q"
312313
sig := q.Get(apc.QparamHMAC)
313314
if sig == "" {
314315
return nil, nil
315316
}
316317
if len(sig) != cskSigLen {
317-
return nil, fmt.Errorf("invalid signature length: %d", len(sig))
318+
return nil, fmt.Errorf("%s: invalid signature length: %d", tag, len(sig))
318319
}
319320

320321
s := q.Get(apc.QparamNonce)
321322
nonce, err := strconv.ParseUint(s, cskBase, 64)
322323
if err != nil {
323-
return nil, fmt.Errorf("invalid nonce: %v", err)
324+
return nil, fmt.Errorf("%s: invalid nonce '%s=%s': %v", tag, apc.QparamNonce, s, err)
324325
}
325326

326327
s = q.Get(apc.QparamSmapVer)
327328
smapVer, err := strconv.ParseInt(s, cskBase, 64)
328329
if err != nil {
329-
return nil, fmt.Errorf("invalid smap version: %v", err)
330+
return nil, fmt.Errorf("%s: invalid Smap version %q: %v", tag, s, err)
330331
}
331332
return &cskgrp{nonce: nonce, smapVer: smapVer, hmacSig: sig}, nil
332333
}

ais/earlystart.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ func (p *proxy) primaryStartup(loadedSmap *smapX, config *cmn.Config, ntargets i
336336
cos.ExitLog(err)
337337
}
338338

339-
// 11. metasync (smap, config, etl & bmd) and startup as primary
339+
// 11. metasync (all cluster-level meta) and startup as primary
340340
smap = p.owner.smap.get()
341341
var (
342342
actMsgExt = p.newAmsgStr(metaction2, bmd)
@@ -345,7 +345,7 @@ func (p *proxy) primaryStartup(loadedSmap *smapX, config *cmn.Config, ntargets i
345345
if cluConfig.Auth.CSKEnabled() {
346346
k := p.owner.csk.gen(smap.Version)
347347
pairs = append(pairs, revsPair{k, actMsgExt})
348-
}
348+
} // note: can do signed requests after this point
349349

350350
wg := p.metasyncer.sync(pairs...)
351351
wg.Wait()

0 commit comments

Comments
 (0)