Skip to content

Commit

Permalink
Fix retry delay going negative for large retries with exponential del…
Browse files Browse the repository at this point in the history
…ays (#2164)

- Suppress `xUnit1042` warnings.
- Add repro for #2163.
- Add an assertion for the delay's value not being negative.
- Fix retry delays going negative with `DelayBackoffType.Exponential` after 1,024 retries.
- Only compute the effective maximum `TimeSpan` once.
- Assert delays are positive.
- Add more unit tests related to the exponential delay becoming negative.
- Remove redundant comment.
- Use `TheoryData<int>` for retry test cases.
  • Loading branch information
martincostello committed Jun 25, 2024
1 parent 5d81f99 commit 7301ef3
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 13 deletions.
24 changes: 18 additions & 6 deletions src/Polly.Core/Retry/RetryHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ internal static class RetryHelper

private const double ExponentialFactor = 2.0;

// Upper-bound to prevent overflow beyond TimeSpan.MaxValue. Potential truncation during conversion from double to long
// (as described at https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/builtin-types/numeric-conversions)
// is avoided by the arbitrary subtraction of 1,000.
private static readonly double MaxTimeSpanTicks = (double)TimeSpan.MaxValue.Ticks - 1_000;

public static bool IsValidDelay(TimeSpan delay) => delay >= TimeSpan.Zero;

public static TimeSpan GetRetryDelay(
Expand Down Expand Up @@ -101,20 +106,27 @@ private static TimeSpan DecorrelatedJitterBackoffV2(int attempt, TimeSpan baseDe
// This factor allows the median values to fall approximately at 1, 2, 4 etc seconds, instead of 1.4, 2.8, 5.6, 11.2.
const double RpScalingFactor = 1 / 1.4d;

// Upper-bound to prevent overflow beyond TimeSpan.MaxValue. Potential truncation during conversion from double to long
// (as described at https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/builtin-types/numeric-conversions)
// is avoided by the arbitrary subtraction of 1000. Validated by unit-test Backoff_should_not_overflow_to_give_negative_timespan.
double maxTimeSpanDouble = (double)TimeSpan.MaxValue.Ticks - 1000;

long targetTicksFirstDelay = baseDelay.Ticks;

double t = attempt + randomizer();
double next = Math.Pow(ExponentialFactor, t) * Math.Tanh(Math.Sqrt(PFactor * t));

// At t >=1024, the above will tend to infinity which would otherwise cause the
// ticks to go negative. See https://github.com/App-vNext/Polly/issues/2163.
if (double.IsInfinity(next))
{
prev = next;
return TimeSpan.FromTicks((long)MaxTimeSpanTicks);
}

double formulaIntrinsicValue = next - prev;
prev = next;

return TimeSpan.FromTicks((long)Math.Min(formulaIntrinsicValue * RpScalingFactor * targetTicksFirstDelay, maxTimeSpanDouble));
long ticks = (long)Math.Min(formulaIntrinsicValue * RpScalingFactor * targetTicksFirstDelay, MaxTimeSpanTicks);

Debug.Assert(ticks >= 0, "ticks cannot be negative");

return TimeSpan.FromTicks(ticks);
}

#pragma warning disable IDE0047 // Remove unnecessary parentheses which offer less mental gymnastics
Expand Down
2 changes: 2 additions & 0 deletions src/Polly.Core/Retry/RetryResilienceStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ protected internal override async ValueTask<Outcome<T>> ExecuteCore<TState>(Func
}
}

Debug.Assert(delay >= TimeSpan.Zero, "The delay cannot be negative.");

var onRetryArgs = new OnRetryArguments<T>(context, outcome, attempt, delay, executionTime);
_telemetry.Report<OnRetryArguments<T>, T>(new(ResilienceEventSeverity.Warning, RetryConstants.OnRetryEvent), onRetryArgs);

Expand Down
59 changes: 59 additions & 0 deletions test/Polly.Core.Tests/Issues/IssuesTests.InfiniteRetry_2163.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using Microsoft.Extensions.Time.Testing;
using Polly.Retry;

namespace Polly.Core.Tests.Issues;

public partial class IssuesTests
{
[Fact(Timeout = 15_000)]
public async Task InfiniteRetry_Delay_Does_Not_Overflow_2163()
{
// Arrange
int attempts = 0;
int succeedAfter = 2049;

var options = new RetryStrategyOptions<bool>
{
BackoffType = DelayBackoffType.Exponential,
Delay = TimeSpan.FromSeconds(2),
MaxDelay = TimeSpan.FromSeconds(30),
MaxRetryAttempts = int.MaxValue,
UseJitter = true,
OnRetry = (args) =>
{
args.RetryDelay.Should().BeGreaterThan(TimeSpan.Zero, $"RetryDelay is less than zero after {args.AttemptNumber} attempts");
attempts++;
return default;
},
ShouldHandle = (args) => new ValueTask<bool>(!args.Outcome.Result),
};

var listener = new FakeTelemetryListener();
var telemetry = TestUtilities.CreateResilienceTelemetry(listener);
var timeProvider = new FakeTimeProvider();

var strategy = new RetryResilienceStrategy<bool>(options, timeProvider, telemetry);
var pipeline = strategy.AsPipeline();

using var cts = new CancellationTokenSource(Debugger.IsAttached ? TimeSpan.MaxValue : TimeSpan.FromSeconds(10));

// Act
var executing = pipeline.ExecuteAsync((_) =>
{
return new ValueTask<bool>(attempts >= succeedAfter);
}, cts.Token);

while (!executing.IsCompleted && !cts.IsCancellationRequested)
{
timeProvider.Advance(TimeSpan.FromSeconds(1));
}

// Assert
cts.Token.ThrowIfCancellationRequested();

var actual = await executing;

actual.Should().BeTrue();
attempts.Should().Be(succeedAfter);
}
}
71 changes: 64 additions & 7 deletions test/Polly.Core.Tests/Retry/RetryHelperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@ public class RetryHelperTests
{
private Func<double> _randomizer = new RandomUtil(0).NextDouble;

public static TheoryData<int> Attempts()
{
#pragma warning disable IDE0028
return new()
{
1,
2,
3,
4,
10,
100,
1_000,
1_024,
1_025,
};
#pragma warning restore IDE0028
}

[Fact]
public void IsValidDelay_Ok()
{
Expand Down Expand Up @@ -187,14 +205,51 @@ public void GetRetryDelay_OverflowWithMaxDelay_ReturnsMaxDelay()
RetryHelper.GetRetryDelay(DelayBackoffType.Exponential, false, 1000, TimeSpan.FromDays(1), TimeSpan.FromDays(2), ref state, _randomizer).Should().Be(TimeSpan.FromDays(2));
}

[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
[InlineData(4)]
[InlineData(10)]
[InlineData(100)]
[InlineData(1000)]
[Theory]
[MemberData(nameof(Attempts))]
public void GetRetryDelay_Exponential_Is_Positive_When_No_Maximum_Delay(int attempt)
{
var jitter = true;
var type = DelayBackoffType.Exponential;

var baseDelay = TimeSpan.FromSeconds(2);
TimeSpan? maxDelay = null;

var random = new RandomUtil(0).NextDouble;
double state = 0;

var first = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);
var second = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);

first.Should().BePositive();
second.Should().BePositive();
}

[Theory]
[MemberData(nameof(Attempts))]
public void GetRetryDelay_Exponential_Does_Not_Exceed_MaxDelay(int attempt)
{
var jitter = true;
var type = DelayBackoffType.Exponential;

var baseDelay = TimeSpan.FromSeconds(2);
var maxDelay = TimeSpan.FromSeconds(30);

var random = new RandomUtil(0).NextDouble;
double state = 0;

var first = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);
var second = RetryHelper.GetRetryDelay(type, jitter, attempt, baseDelay, maxDelay, ref state, random);

first.Should().BePositive();
first.Should().BeLessThanOrEqualTo(maxDelay);

second.Should().BePositive();
second.Should().BeLessThanOrEqualTo(maxDelay);
}

[Theory]
[MemberData(nameof(Attempts))]
public void ExponentialWithJitter_Ok(int count)
{
var delay = TimeSpan.FromSeconds(7.8);
Expand All @@ -203,6 +258,7 @@ public void ExponentialWithJitter_Ok(int count)

newDelays.Should().ContainInConsecutiveOrder(oldDelays);
newDelays.Should().HaveCount(oldDelays.Count);
newDelays.Should().AllSatisfy(delay => delay.Should().BePositive());
}

[Fact]
Expand All @@ -213,6 +269,7 @@ public void ExponentialWithJitter_EnsureRandomness()
var delays2 = GetExponentialWithJitterBackoff(false, delay, 100, RandomUtil.Instance.NextDouble);

delays1.SequenceEqual(delays2).Should().BeFalse();
delays1.Should().AllSatisfy(delay => delay.Should().BePositive());
}

private static IReadOnlyList<TimeSpan> GetExponentialWithJitterBackoff(bool contrib, TimeSpan baseDelay, int retryCount, Func<double>? randomizer = null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ public void ArgValidation_Ok()
pool.Get(System.Threading.Timeout.InfiniteTimeSpan).Should().NotBeNull();
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public void RentReturn_Reusable_EnsureProperBehavior(object timeProvider)
{
Expand All @@ -48,7 +50,9 @@ public void RentReturn_Reusable_EnsureProperBehavior(object timeProvider)
#endif
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public void RentReturn_NotReusable_EnsureProperBehavior(object timeProvider)
{
Expand All @@ -63,7 +67,9 @@ public void RentReturn_NotReusable_EnsureProperBehavior(object timeProvider)
cts2.Token.Should().NotBeNull();
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public async Task Rent_Cancellable_EnsureCancelled(object timeProvider)
{
Expand All @@ -80,7 +86,9 @@ public async Task Rent_Cancellable_EnsureCancelled(object timeProvider)
await TestUtilities.AssertWithTimeoutAsync(() => cts.IsCancellationRequested.Should().BeTrue());
}

#pragma warning disable xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[MemberData(nameof(TimeProviders))]
#pragma warning restore xUnit1042 // The member referenced by the MemberData attribute returns untyped data rows
[Theory]
public async Task Rent_NotCancellable_EnsureNotCancelled(object timeProvider)
{
Expand Down

0 comments on commit 7301ef3

Please sign in to comment.