Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scanner Config #677

Merged
merged 6 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions api/autopilot.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@ type (

// HostsConfig contains all hosts settings used in the autopilot.
HostsConfig struct {
AllowRedundantIPs bool `json:"allowRedundantIPs"`
MaxDowntimeHours uint64 `json:"maxDowntimeHours"`
ScoreOverrides map[types.PublicKey]float64 `json:"scoreOverrides"`
AllowRedundantIPs bool `json:"allowRedundantIPs"`
MaxDowntimeHours uint64 `json:"maxDowntimeHours"`
MinRecentScanFailures uint64 `json:"minRecentScanFailures"`
peterjan marked this conversation as resolved.
Show resolved Hide resolved
ScoreOverrides map[types.PublicKey]float64 `json:"scoreOverrides"`
}

// WalletConfig contains all wallet settings used in the autopilot.
Expand Down
3 changes: 1 addition & 2 deletions autopilot/autopilot.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ func (ap *Autopilot) triggerHandlerPOST(jc jape.Context) {
}

// New initializes an Autopilot.
func New(id string, bus Bus, workers []Worker, logger *zap.Logger, heartbeat time.Duration, scannerScanInterval time.Duration, scannerBatchSize, scannerMinRecentFailures, scannerNumThreads uint64, migrationHealthCutoff float64, accountsRefillInterval time.Duration, revisionSubmissionBuffer, migratorParallelSlabsPerWorker uint64, revisionBroadcastInterval time.Duration) (*Autopilot, error) {
func New(id string, bus Bus, workers []Worker, logger *zap.Logger, heartbeat time.Duration, scannerScanInterval time.Duration, scannerBatchSize, scannerNumThreads uint64, migrationHealthCutoff float64, accountsRefillInterval time.Duration, revisionSubmissionBuffer, migratorParallelSlabsPerWorker uint64, revisionBroadcastInterval time.Duration) (*Autopilot, error) {
ap := &Autopilot{
alerts: alerts.WithOrigin(bus, fmt.Sprintf("autopilot.%s", id)),
id: id,
Expand All @@ -552,7 +552,6 @@ func New(id string, bus Bus, workers []Worker, logger *zap.Logger, heartbeat tim
scanner, err := newScanner(
ap,
scannerBatchSize,
scannerMinRecentFailures,
scannerNumThreads,
scannerScanInterval,
scannerTimeoutInterval,
Expand Down
3 changes: 2 additions & 1 deletion autopilot/hostscore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ var cfg = api.AutopilotConfig{
Set: api.DefaultAutopilotID,
},
Hosts: api.HostsConfig{
MaxDowntimeHours: 24 * 7 * 2,
MaxDowntimeHours: 24 * 7 * 2,
MinRecentScanFailures: 10,
},
Wallet: api.WalletConfig{
DefragThreshold: 1000,
Expand Down
30 changes: 16 additions & 14 deletions autopilot/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,9 @@ type (
logger *zap.SugaredLogger
ap *Autopilot

scanBatchSize uint64
scanThreads uint64
scanMinInterval time.Duration
scanMinRecentFailures uint64
scanBatchSize uint64
scanThreads uint64
scanMinInterval time.Duration

timeoutMinInterval time.Duration
timeoutMinTimeout time.Duration
Expand Down Expand Up @@ -119,7 +118,7 @@ func (t *tracker) timeout() time.Duration {
return time.Duration(percentile) * time.Millisecond
}

func newScanner(ap *Autopilot, scanBatchSize, scanMinRecentFailures, scanThreads uint64, scanMinInterval, timeoutMinInterval, timeoutMinTimeout time.Duration) (*scanner, error) {
func newScanner(ap *Autopilot, scanBatchSize, scanThreads uint64, scanMinInterval, timeoutMinInterval, timeoutMinTimeout time.Duration) (*scanner, error) {
if scanBatchSize == 0 {
return nil, errors.New("scanner batch size has to be greater than zero")
}
Expand All @@ -137,10 +136,9 @@ func newScanner(ap *Autopilot, scanBatchSize, scanMinRecentFailures, scanThreads
logger: ap.logger.Named("scanner"),
ap: ap,

scanBatchSize: scanBatchSize,
scanThreads: scanThreads,
scanMinInterval: scanMinInterval,
scanMinRecentFailures: scanMinRecentFailures,
scanBatchSize: scanBatchSize,
scanThreads: scanThreads,
scanMinInterval: scanMinInterval,

timeoutMinInterval: timeoutMinInterval,
timeoutMinTimeout: timeoutMinTimeout,
Expand Down Expand Up @@ -169,7 +167,12 @@ func (s *scanner) tryPerformHostScan(ctx context.Context, w scanWorker, force bo
s.scanning = true
s.mu.Unlock()

maxDowntimeHours := s.ap.State().cfg.Hosts.MaxDowntimeHours
hostCfg := s.ap.State().cfg.Hosts
maxDowntime := time.Duration(hostCfg.MaxDowntimeHours) * time.Hour
minRecentScanFailures := hostCfg.MinRecentScanFailures
if maxDowntime == 0 || minRecentScanFailures == 0 {
s.logger.Warn("host pruning is disabled, please set maxDowntimeHours and minRecentScanFailures in the host config")
}

go func() {
for resp := range s.launchScanWorkers(ctx, w, s.launchHostScans()) {
Expand All @@ -181,10 +184,9 @@ func (s *scanner) tryPerformHostScan(ctx context.Context, w scanWorker, force bo
}
}

if !s.ap.isStopped() && maxDowntimeHours > 0 {
s.logger.Debugf("removing hosts that have been offline for more than %v hours", maxDowntimeHours)
maxDowntime := time.Hour * time.Duration(maxDowntimeHours)
removed, err := s.bus.RemoveOfflineHosts(ctx, s.scanMinRecentFailures, maxDowntime)
if !s.ap.isStopped() && maxDowntime > 0 && minRecentScanFailures > 0 {
s.logger.Debugf("removing hosts that have been offline for more than %v and failed %d consecutive scans", maxDowntime, minRecentScanFailures)
removed, err := s.bus.RemoveOfflineHosts(ctx, minRecentScanFailures, maxDowntime)
if removed > 0 {
s.logger.Infof("removed %v offline hosts", removed)
}
Expand Down
4 changes: 4 additions & 0 deletions bus/bus.go
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,10 @@ func (b *bus) hostsRemoveHandlerPOST(jc jape.Context) {
jc.Error(errors.New("maxDowntime must be non-zero"), http.StatusBadRequest)
return
}
if hrr.MinRecentScanFailures == 0 {
jc.Error(errors.New("minRecentScanFailures must be non-zero"), http.StatusBadRequest)
return
}
removed, err := b.hdb.RemoveOfflineHosts(jc.Request.Context(), hrr.MinRecentScanFailures, time.Duration(hrr.MaxDowntimeHours))
if jc.Check("couldn't remove offline hosts", err) != nil {
return
Expand Down
2 changes: 0 additions & 2 deletions cmd/renterd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ var (
RevisionBroadcastInterval: 24 * time.Hour,
ScannerBatchSize: 1000,
ScannerInterval: 24 * time.Hour,
ScannerMinRecentFailures: 10,
ScannerNumThreads: 100,
MigratorParallelSlabsPerWorker: 1,
},
Expand Down Expand Up @@ -300,7 +299,6 @@ func main() {
flag.DurationVar(&cfg.Autopilot.RevisionBroadcastInterval, "autopilot.revisionBroadcastInterval", cfg.Autopilot.RevisionBroadcastInterval, "interval at which the autopilot broadcasts contract revisions to be mined - can be overwritten using the RENTERD_AUTOPILOT_REVISION_BROADCAST_INTERVAL environment variable - setting it to 0 will disable this feature")
flag.Uint64Var(&cfg.Autopilot.ScannerBatchSize, "autopilot.scannerBatchSize", cfg.Autopilot.ScannerBatchSize, "size of the batch with which hosts are scanned")
flag.DurationVar(&cfg.Autopilot.ScannerInterval, "autopilot.scannerInterval", cfg.Autopilot.ScannerInterval, "interval at which hosts are scanned")
flag.Uint64Var(&cfg.Autopilot.ScannerMinRecentFailures, "autopilot.scannerMinRecentFailures", cfg.Autopilot.ScannerMinRecentFailures, "minimum amount of consesutive failed scans a host must have before it is removed for exceeding the max downtime")
flag.Uint64Var(&cfg.Autopilot.ScannerNumThreads, "autopilot.scannerNumThreads", cfg.Autopilot.ScannerNumThreads, "number of threads that scan hosts")
flag.Uint64Var(&cfg.Autopilot.MigratorParallelSlabsPerWorker, "autopilot.migratorParallelSlabsPerWorker", cfg.Autopilot.MigratorParallelSlabsPerWorker, "number of slabs that the autopilot migrates in parallel per worker. Can be overwritten using the RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER environment variable")
flag.BoolVar(&cfg.Autopilot.Enabled, "autopilot.enabled", cfg.Autopilot.Enabled, "enable/disable the autopilot - can be overwritten using the RENTERD_AUTOPILOT_ENABLED environment variable")
Expand Down
1 change: 0 additions & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ type (
RevisionSubmissionBuffer uint64 `yaml:"revisionSubmissionBuffer"`
ScannerInterval time.Duration `yaml:"scannerInterval"`
ScannerBatchSize uint64 `yaml:"scannerBatchSize"`
ScannerMinRecentFailures uint64 `yaml:"scannerMinRecentFailures"`
ScannerNumThreads uint64 `yaml:"scannerNumThreads"`
MigratorParallelSlabsPerWorker uint64 `yaml:"migratorParallelSlabsPerWorker"`
}
Expand Down
2 changes: 1 addition & 1 deletion internal/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ func NewWorker(cfg config.Worker, b worker.Bus, seed types.PrivateKey, l *zap.Lo
}

func NewAutopilot(cfg AutopilotConfig, b autopilot.Bus, workers []autopilot.Worker, l *zap.Logger) (http.Handler, RunFn, ShutdownFn, error) {
ap, err := autopilot.New(cfg.ID, b, workers, l, cfg.Heartbeat, cfg.ScannerInterval, cfg.ScannerBatchSize, cfg.ScannerMinRecentFailures, cfg.ScannerNumThreads, cfg.MigrationHealthCutoff, cfg.AccountsRefillInterval, cfg.RevisionSubmissionBuffer, cfg.MigratorParallelSlabsPerWorker, cfg.RevisionBroadcastInterval)
ap, err := autopilot.New(cfg.ID, b, workers, l, cfg.Heartbeat, cfg.ScannerInterval, cfg.ScannerBatchSize, cfg.ScannerNumThreads, cfg.MigrationHealthCutoff, cfg.AccountsRefillInterval, cfg.RevisionSubmissionBuffer, cfg.MigratorParallelSlabsPerWorker, cfg.RevisionBroadcastInterval)
if err != nil {
return nil, nil, nil, err
}
Expand Down
6 changes: 3 additions & 3 deletions internal/testing/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ var (
Set: testContractSet,
},
Hosts: api.HostsConfig{
MaxDowntimeHours: 10,
AllowRedundantIPs: true, // allow for integration tests by default
MaxDowntimeHours: 10,
MinRecentScanFailures: 10,
AllowRedundantIPs: true, // allow for integration tests by default
},
}

Expand Down Expand Up @@ -951,7 +952,6 @@ func testApCfg() node.AutopilotConfig {
ScannerInterval: time.Second,
ScannerBatchSize: 10,
ScannerNumThreads: 1,
ScannerMinRecentFailures: 5,
},
}
}
5 changes: 3 additions & 2 deletions stores/autopilot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ func TestAutopilotStore(t *testing.T) {
Set: testContractSet,
},
Hosts: api.HostsConfig{
MaxDowntimeHours: 10,
AllowRedundantIPs: true, // allow for integration tests by default
MaxDowntimeHours: 10,
MinRecentScanFailures: 10,
AllowRedundantIPs: true, // allow for integration tests by default
},
Wallet: api.WalletConfig{
DefragThreshold: 1234,
Expand Down