Skip to content

Commit

Permalink
retry: Mitigate timing issues
Browse files Browse the repository at this point in the history
`WithBackoff()` will now make one final retry if the timeout expires
during the sleep phase between attempts, which can be a long period
depending on the attempts made and the maximum sleep time.
  • Loading branch information
lippserd committed Apr 10, 2024
1 parent 2b3a5d4 commit 3a3baaf
Showing 1 changed file with 22 additions and 5 deletions.
27 changes: 22 additions & 5 deletions pkg/retry/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ type IsRetryable func(error) bool

// Settings aggregates optional settings for WithBackoff.
type Settings struct {
// If >0, Timeout lets WithBackoff stop retrying once elapsed,
// but allows the RetryableFunc its execution time and **doesn't abort** it if it exceeds Timeout.
// If >0, Timeout lets WithBackoff stop retrying gracefully once elapsed based on the following criteria:
// * If the execution of RetryableFunc has taken longer than Timeout, no further attempts are made.
// * If Timeout elapses during the sleep phase between retries, one final retry is attempted.
// * RetryableFunc is always granted its full execution time and is not canceled if it exceeds Timeout.
// This means that WithBackoff may not stop exactly after Timeout expires,
// or may not retry at all if the first execution of RetryableFunc already takes longer than Timeout.
Timeout time.Duration
Expand All @@ -50,6 +52,7 @@ func WithBackoff(
}

start := time.Now()
timedOut := false
for attempt := uint64(1); ; /* true */ attempt++ {
prevErr := err

Expand Down Expand Up @@ -80,16 +83,30 @@ func WithBackoff(
return
}

select {
case <-timeout:
// Stop retrying immediately if executing the retryable function took longer than the timeout.
timedOut = true
default:
}

if timedOut {
err = errors.Wrap(err, "retry deadline exceeded")

return
}

if settings.OnRetryableError != nil {
settings.OnRetryableError(time.Since(start), attempt, err, prevErr)
}

select {
case <-time.After(b(attempt)):
case <-timeout:
err = errors.Wrap(err, "retry deadline exceeded")

return
// Do not stop retrying immediately, but start one last attempt to mitigate timing issues where
// the timeout expires while waiting for the next attempt and
// therefore no retries have happened during this possibly long period.
timedOut = true
case <-ctx.Done():
err = errors.Wrap(ctx.Err(), err.Error())

Expand Down

0 comments on commit 3a3baaf

Please sign in to comment.