Skip to content

Commit 2b0de20

Browse files
committed
global configuration - cluster scope (major upd)
* mark additional sections as cluster-scoped: - checksum, auth, tcb, tco, arch, rate_limit, keepalivetracker, net, transport, periodic * clarify and enforce `allow:"cluster"` in the cluster config * restrict `transient` (in memory) updates to sections NOT tagged with `allow:"cluster"` ------- * [backward compatibility]: introduce `CopyPropsOpts` control structure and a new option to ignore cluster-scope violations when applying local config-override ------- * part two, prev. commit: 445871c Signed-off-by: Alex Aizman <alex.aizman@gmail.com>
1 parent 445871c commit 2b0de20

File tree

7 files changed

+53
-38
lines changed

7 files changed

+53
-38
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ AIS consistently shows [balanced I/O distribution and linear scalability](https:
1616
***HTTP-based API:** A feature-rich, native API (with user-friendly SDKs for Go and Python), and compliant [Amazon S3 API](/docs/s3compat.md) for running unmodified S3 clients.
1717
***Monitoring:** Comprehensive observability with integrated Prometheus metrics, Grafana dashboards, detailed logs with configurable verbosity, and CLI-based performance tracking for complete cluster visibility and troubleshooting. See [AIStore Observability](/docs/monitoring-overview.md) for details.
1818
***Chunked Objects:** High-performance chunked object representation, with independently retrievable chunks, metadata v2, and checksum-protected manifests. Supports rechunking, parallel reads, and seamless integration with [Get-Batch](/docs/get_batch.md), [blob-downloader](/docs/blob_downloader.md), and multipart uploads to supported cloud backends.
19-
***Secure Redirects (cluster-key):** Configurable cryptographic signing of redirect URLs using HMAC-SHA256 with a versioned cluster key.
19+
***Secure Redirects:** Configurable cryptographic signing of redirect URLs using HMAC-SHA256 with a versioned cluster key (stored in memory only).
2020
***Load-Aware Throttling:** Dynamic request throttling based on a five-dimensional load vector (CPU, memory, disk, FDs, goroutines) to protect AIS clusters under stress.
2121
***Unified Namespace:** Attach AIS clusters together to provide fast, unified access to the entirety of hosted datasets, allowing users to reference shared buckets with cluster-specific identifiers.
2222
***Turn-key Cache:** In addition to robust data protection features, AIS offers a per-bucket configurable LRU-based cache with eviction thresholds and storage capacity watermarks.

ais/daemon.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ func initDaemon(version, buildTime string) cos.Runner {
174174
if err := toUpdate.FillFromKVS(kvs); err != nil {
175175
cos.ExitLog(err)
176176
}
177-
if err := setConfigInMem(toUpdate, config, apc.Daemon); err != nil {
177+
if err := setConfigInMem(toUpdate, config, apc.Daemon, true /*transient*/); err != nil {
178178
cos.ExitLogf("failed to update config in memory: %v", err)
179179
}
180180

ais/gconfig.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,10 @@ func (*configOwner) persistBytes(payload msPayload, globalFpath string) (done bo
187187
return
188188
}
189189

190-
// NOTE: must be called under config-owner lock
191-
func setConfig(toUpdate *cmn.ConfigToSet, transient bool) (err error) {
190+
// must be called under config-owner lock
191+
func setConfig(toUpdate *cmn.ConfigToSet, transient bool) error {
192192
clone := cmn.GCO.Clone()
193-
err = setConfigInMem(toUpdate, clone, apc.Daemon)
194-
if err != nil {
193+
if err := setConfigInMem(toUpdate, clone, apc.Daemon, transient); err != nil {
195194
return err
196195
}
197196
override := cmn.GCO.GetOverride()
@@ -211,9 +210,8 @@ func setConfig(toUpdate *cmn.ConfigToSet, transient bool) (err error) {
211210
return nil
212211
}
213212

214-
func setConfigInMem(toUpdate *cmn.ConfigToSet, config *cmn.Config, asType string) (err error) {
215-
err = config.UpdateClusterConfig(toUpdate, asType)
216-
return
213+
func setConfigInMem(toUpdate *cmn.ConfigToSet, config *cmn.Config, asType string, transient bool) error {
214+
return config.UpdateClusterConfig(toUpdate, asType, cmn.CopyPropsOpts{Transient: transient})
217215
}
218216

219217
func (co *configOwner) resetDaemonConfig() (err error) {

ais/prxclu.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1246,7 +1246,7 @@ func (p *proxy) setCluCfgTransient(w http.ResponseWriter, r *http.Request, toUpd
12461246
}
12471247

12481248
func _setConfPre(ctx *configModifier, clone *globalConfig) (updated bool, err error) {
1249-
if err = clone.Apply(ctx.toUpdate, apc.Cluster); err != nil {
1249+
if err = cmn.CopyProps(ctx.toUpdate, clone, apc.Cluster); err != nil {
12501250
return
12511251
}
12521252
updated = true

cmn/config.go

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,8 @@ import (
3131
jsoniter "github.com/json-iterator/go"
3232
)
3333

34-
// TODO post-4.1:
35-
// - revisit `allow:"cluster"` usage across ClusterConfig
36-
// - for 4.1, making a single change - marking `AuthConf` as cluster-scoped
37-
3834
const (
39-
confDisabled = "Disabled"
35+
confDisabled = "Disabled" // common conf.String()
4036
)
4137

4238
type (
@@ -98,7 +94,10 @@ type (
9894
}
9995
)
10096

101-
// global configuration
97+
// Global configuration
98+
// Note: updating any of these fields on a per-node basis:
99+
// - will fail for (cluster-scoped) sections tagged with `allow:"cluster"`
100+
// - is at your own risk otherwise; such changes may cause inconsistent behavior across the cluster.
102101
type (
103102
ClusterConfig struct {
104103
Backend BackendConf `json:"backend" allow:"cluster"`
@@ -108,29 +107,29 @@ type (
108107
UUID string `json:"uuid"`
109108
Dsort DsortConf `json:"distributed_sort"`
110109
Proxy ProxyConf `json:"proxy" allow:"cluster"`
111-
Cksum CksumConf `json:"checksum"`
110+
Cksum CksumConf `json:"checksum" allow:"cluster"`
112111
Auth AuthConf `json:"auth" allow:"cluster"`
113112
Tracing TracingConf `json:"tracing"`
114-
TCB TCBConf `json:"tcb"`
115-
TCO TCOConf `json:"tco"`
116-
Arch ArchConf `json:"arch"`
113+
TCB TCBConf `json:"tcb" allow:"cluster"`
114+
TCO TCOConf `json:"tco" allow:"cluster"`
115+
Arch ArchConf `json:"arch" allow:"cluster"`
117116
RateLimit RateLimitConf `json:"rate_limit"`
118117
Keepalive KeepaliveConf `json:"keepalivetracker"`
119118
Rebalance RebalanceConf `json:"rebalance" allow:"cluster"`
120119
Log LogConf `json:"log"`
121120
EC ECConf `json:"ec" allow:"cluster"`
122-
Net NetConf `json:"net"`
121+
Net NetConf `json:"net" allow:"cluster"`
123122
Timeout TimeoutConf `json:"timeout"`
124123
Space SpaceConf `json:"space"`
125-
Transport TransportConf `json:"transport"`
124+
Transport TransportConf `json:"transport" allow:"cluster"`
126125
Memsys MemsysConf `json:"memsys"`
127126
Disk DiskConf `json:"disk"`
128127
FSHC FSHCConf `json:"fshc"`
129128
Chunks ChunksConf `json:"chunks" allow:"cluster"`
130129
LRU LRUConf `json:"lru"`
131130
Client ClientConf `json:"client"`
132131
Mirror MirrorConf `json:"mirror" allow:"cluster"`
133-
Periodic PeriodConf `json:"periodic"`
132+
Periodic PeriodConf `json:"periodic" allow:"cluster"`
134133
Downloader DownloaderConf `json:"downloader"`
135134
Features feat.Flags `json:"features,string" allow:"cluster"` // enumerated features to flip assorted global defaults (cmn/feat/feat and docs/feat*)
136135
Version int64 `json:"config_version,string"`
@@ -1065,9 +1064,8 @@ func (c *Config) SetRole(role string) {
10651064
c.role = role
10661065
}
10671066

1068-
func (c *Config) UpdateClusterConfig(updateConf *ConfigToSet, asType string) (err error) {
1069-
err = c.ClusterConfig.Apply(updateConf, asType)
1070-
if err != nil {
1067+
func (c *Config) UpdateClusterConfig(updateConf *ConfigToSet, asType string, opts CopyPropsOpts) (err error) {
1068+
if err = CopyProps(updateConf, &c.ClusterConfig, asType, opts); err != nil {
10711069
return
10721070
}
10731071
return c.Validate()
@@ -1129,10 +1127,6 @@ func (ctu *ConfigToSet) FillFromKVS(kvs []string) (err error) {
11291127
// ClusterConfig //
11301128
///////////////////
11311129

1132-
func (c *ClusterConfig) Apply(updateConf *ConfigToSet, asType string) error {
1133-
return CopyProps(updateConf, c, asType)
1134-
}
1135-
11361130
func (c *ClusterConfig) String() string {
11371131
if c == nil {
11381132
return "Conf <nil>"
@@ -2782,7 +2776,7 @@ func handleOverrideConfig(config *Config) error {
27822776
config.LocalConfig.FSP = *overrideConfig.FSP // override local config's fspaths
27832777
overrideConfig.FSP = nil
27842778
}
2785-
return config.UpdateClusterConfig(overrideConfig, apc.Daemon)
2779+
return config.UpdateClusterConfig(overrideConfig, apc.Daemon, CopyPropsOpts{Transient: false, IgnoreScope: true})
27862780
}
27872781

27882782
func SaveOverrideConfig(configDir string, toUpdate *ConfigToSet) error {

cmn/gco.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ func (gco *gco) Update(cluConfig *ClusterConfig) (err error) {
110110
config.ClusterConfig = *cluConfig
111111
override := gco.GetOverride()
112112
if override != nil {
113-
err = config.UpdateClusterConfig(override, apc.Daemon) // update and validate
113+
err = config.UpdateClusterConfig(override, apc.Daemon, CopyPropsOpts{Transient: false, IgnoreScope: true}) // update and validate
114114
} else {
115115
err = config.Validate()
116116
}

cmn/iter_fields.go

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/NVIDIA/aistore/api/apc"
1717
"github.com/NVIDIA/aistore/cmn/cos"
1818
"github.com/NVIDIA/aistore/cmn/debug"
19+
"github.com/NVIDIA/aistore/cmn/nlog"
1920
)
2021

2122
const IterFieldNameSepa = "."
@@ -56,6 +57,14 @@ type (
5657
updateFunc func(uniqueTag string, field IterField) (error, bool)
5758
)
5859

60+
type (
61+
// controls how CopyProps/_copyProps enforce config scope and transient rules
62+
CopyPropsOpts struct {
63+
Transient bool // treat changes as transient: forbid writes to allow:"cluster"
64+
IgnoreScope bool // when true, skip allow:"cluster" violations instead of failing (config override-only)
65+
}
66+
)
67+
5968
// interface guard
6069
var _ IterField = (*field)(nil)
6170

@@ -233,16 +242,20 @@ func iterFields(prefix string, v any, updf updateFunc, opts IterOpts) (dirty, st
233242
}
234243

235244
// CopyProps update dst with the values from src
236-
func CopyProps(src, dst any, asType string) error {
245+
func CopyProps(src, dst any, asType string, copts ...CopyPropsOpts) error {
237246
var (
238247
srcVal = reflect.ValueOf(src)
239248
dstVal = reflect.ValueOf(dst).Elem()
249+
opts CopyPropsOpts
240250
)
251+
if len(copts) > 0 {
252+
opts = copts[0]
253+
}
241254
debug.Assertf(slices.Contains([]string{apc.Daemon, apc.Cluster}, asType), "unexpected config level: %s", asType)
242255
if srcVal.Kind() == reflect.Ptr {
243256
srcVal = srcVal.Elem()
244257
}
245-
return _copyProps(srcVal, dstVal, asType)
258+
return _copyProps(srcVal, dstVal, asType, opts)
246259
}
247260

248261
// copyProps helper: whether v.Kind() supports IsNil()
@@ -277,7 +290,7 @@ func peel2(v reflect.Value) reflect.Value {
277290
return v
278291
}
279292

280-
func _copyProps(srcVal, dstVal reflect.Value, asType string) error {
293+
func _copyProps(srcVal, dstVal reflect.Value, asType string, opts CopyPropsOpts) error {
281294
// normalize pointers on entry
282295
for srcVal.Kind() == reflect.Ptr && !srcVal.IsNil() {
283296
srcVal = srcVal.Elem()
@@ -353,7 +366,7 @@ func _copyProps(srcVal, dstVal reflect.Value, asType string) error {
353366
if nilable(srcField.Kind()) && srcField.IsNil() {
354367
continue
355368
}
356-
if err := _copyProps(srcField, dstVal, asType); err != nil {
369+
if err := _copyProps(srcField, dstVal, asType, opts); err != nil {
357370
return err
358371
}
359372
continue
@@ -372,7 +385,17 @@ func _copyProps(srcVal, dstVal reflect.Value, asType string) error {
372385
// scope enforcement from dst tag
373386
if dtf, ok := dstVal.Type().FieldByName(fieldName); ok {
374387
allowedScope := dtf.Tag.Get("allow")
375-
if allowedScope != "" && allowedScope != asType {
388+
389+
// 3 special cases
390+
switch {
391+
case opts.IgnoreScope && allowedScope == apc.Cluster && asType == apc.Daemon:
392+
name := strings.ToLower(fieldName)
393+
nlog.Warningln("ignoring node override for cluster-scoped config:", name)
394+
continue
395+
case opts.Transient && allowedScope == apc.Cluster:
396+
name := strings.ToLower(fieldName)
397+
return fmt.Errorf("%s (cluster-scoped) configuration cannot be changed transiently", name)
398+
case allowedScope != "" && allowedScope != asType:
376399
name := strings.ToLower(fieldName)
377400
if allowedScope == apc.Cluster && asType == apc.Daemon {
378401
return fmt.Errorf("%s configuration can only be globally updated", name)
@@ -401,7 +424,7 @@ func _copyProps(srcVal, dstVal reflect.Value, asType string) error {
401424

402425
// recurse only when _both_ are structs
403426
if s.Kind() == reflect.Struct && d.Kind() == reflect.Struct {
404-
if err := _copyProps(srcField, dstField, asType); err != nil { // pass originals to allow alloc
427+
if err := _copyProps(srcField, dstField, asType, opts); err != nil { // pass originals to allow alloc
405428
return err
406429
}
407430
continue

0 commit comments

Comments
 (0)