Skip to content

Commit

Permalink
Make background job creation resilient to transient exceptions
Browse files Browse the repository at this point in the history
CoreBackgroundJobFactory class now supports idempotent retries for background job creation logic that don't lead to multiple similar jobs even if a timeout exception is thrown when attempt was actually succeeded (i.e. TransactionInDoubt state in ADO.NET and so on).

The new behavior can be enabled by setting the BackgroundJobClient.RetryAttempts to a positive value, and it will be enabled by default in version 2.0.
  • Loading branch information
odinserj committed Jun 21, 2019
1 parent dfee307 commit 5c08eb4
Show file tree
Hide file tree
Showing 4 changed files with 353 additions and 22 deletions.
20 changes: 20 additions & 0 deletions src/Hangfire.Core/BackgroundJobClient.cs
Expand Up @@ -116,6 +116,26 @@ public BackgroundJobClient([NotNull] JobStorage storage, [NotNull] IJobFilterPro
_factory = factory;
}

public int RetryAttempts
{
get
{
if (_factory is BackgroundJobFactory factory)
{
return factory.RetryAttempts;
}

return 0;
}
set
{
if (_factory is BackgroundJobFactory factory)
{
factory.RetryAttempts = value;
}
}
}

/// <inheritdoc />
public string Create(Job job, IState state)
{
Expand Down
20 changes: 20 additions & 0 deletions src/Hangfire.Core/Client/BackgroundJobFactory.cs
Expand Up @@ -39,6 +39,26 @@ public BackgroundJobFactory([NotNull] IJobFilterProvider filterProvider)
{
}

public int RetryAttempts
{
get
{
if (_innerFactory is CoreBackgroundJobFactory factory)
{
return factory.RetryAttempts;
}

return 0;
}
set
{
if (_innerFactory is CoreBackgroundJobFactory factory)
{
factory.RetryAttempts = value;
}
}
}

internal BackgroundJobFactory(
[NotNull] IJobFilterProvider filterProvider,
[NotNull] IBackgroundJobFactory innerFactory)
Expand Down
139 changes: 119 additions & 20 deletions src/Hangfire.Core/Client/CoreBackgroundJobFactory.cs
Expand Up @@ -15,60 +15,159 @@
// License along with Hangfire. If not, see <http://www.gnu.org/licenses/>.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.ExceptionServices;
using System.Threading;
using Hangfire.Annotations;
using Hangfire.Common;
using Hangfire.Logging;
using Hangfire.States;

namespace Hangfire.Client
{
internal class CoreBackgroundJobFactory : IBackgroundJobFactory
{
private readonly IStateMachine _stateMachine;
private readonly ILog _logger = LogProvider.GetLogger(typeof(CoreBackgroundJobFactory));
private readonly object _syncRoot = new object();
private int _retryAttempts;
private Func<int, TimeSpan> _retryDelayFunc;

public CoreBackgroundJobFactory([NotNull] IStateMachine stateMachine)
{
if (stateMachine == null) throw new ArgumentNullException(nameof(stateMachine));
_stateMachine = stateMachine;
StateMachine = stateMachine ?? throw new ArgumentNullException(nameof(stateMachine));
RetryAttempts = 0;
RetryDelayFunc = GetRetryDelay;
}

public IStateMachine StateMachine => _stateMachine;
public IStateMachine StateMachine { get; }

public int RetryAttempts
{
get { lock (_syncRoot) { return _retryAttempts; } }
set { lock (_syncRoot) { _retryAttempts = value; } }
}

public Func<int, TimeSpan> RetryDelayFunc
{
get { lock (_syncRoot) { return _retryDelayFunc; } }
set { lock (_syncRoot) { _retryDelayFunc = value; } }
}

public BackgroundJob Create(CreateContext context)
{
var attemptsLeft = Math.Max(RetryAttempts, 0);
var parameters = context.Parameters.ToDictionary(
x => x.Key,
x => SerializationHelper.Serialize(x.Value, SerializationOption.User));

var createdAt = DateTime.UtcNow;
var jobId = context.Connection.CreateExpiredJob(

// Retry may cause multiple background jobs to be created, especially when there's
// a timeout-related exception. But initialization attempt will be performed only
// for the most recent job, leaving all the previous ones in a non-initialized state
// and making them invisible to other parts of the system, since no one knows their
// identifiers. Since they also will be eventually expired leaving no trace, we can
// consider that only one background job is created, regardless of retry attempts
// number.
var jobId = RetryOnException(ref attemptsLeft, _ => context.Connection.CreateExpiredJob(
context.Job,
parameters,
createdAt,
TimeSpan.FromDays(30));
TimeSpan.FromDays(30)));

var backgroundJob = new BackgroundJob(jobId, context.Job, createdAt);

if (context.InitialState != null)
{
using (var transaction = context.Connection.CreateWriteTransaction())
RetryOnException(ref attemptsLeft, attempt =>
{
var applyContext = new ApplyStateContext(
context.Storage,
context.Connection,
transaction,
backgroundJob,
context.InitialState,
oldStateName: null,
profiler: context.Profiler);

_stateMachine.ApplyState(applyContext);

transaction.Commit();
}
if (attempt > 0)
{
// Normally, a distributed lock should be applied when making a retry, since
// it's possible to get a timeout exception, when transaction was actually
// committed. But since background job can't be returned to a position where
// it's state is null, and since only the current thread knows the job's identifier
// when its state is null, and since we shouldn't do anything when it's non-null,
// there will be no any race conditions.
var data = context.Connection.GetJobData(jobId);
if (data == null) throw new InvalidOperationException($"Was unable to initialize a background job '{jobId}', because it doesn't exists.");
if (!String.IsNullOrEmpty(data.State)) return;
}
using (var transaction = context.Connection.CreateWriteTransaction())
{
var applyContext = new ApplyStateContext(
context.Storage,
context.Connection,
transaction,
backgroundJob,
context.InitialState,
oldStateName: null,
profiler: context.Profiler);
StateMachine.ApplyState(applyContext);
transaction.Commit();
}
});
}

return backgroundJob;
}

private void RetryOnException(ref int attemptsLeft, Action<int> action)
{
RetryOnException(ref attemptsLeft, attempt =>
{
action(attempt);
return true;
});
}

private T RetryOnException<T>(ref int attemptsLeft, Func<int, T> action)
{
var exceptions = new List<Exception>();
var attempt = 0;
var delay = TimeSpan.Zero;

do
{
try
{
if (delay > TimeSpan.Zero)
{
Thread.Sleep(delay);
}

return action(attempt++);
}
catch (Exception ex)
{
exceptions.Add(ex);
_logger.DebugException("An exception occurred while creating a background job, see inner exception for details.", ex);
delay = RetryDelayFunc(attempt);
}
} while (attemptsLeft-- > 0);

if (exceptions.Count == 1)
{
ExceptionDispatchInfo.Capture(exceptions[0]).Throw();
}

throw new AggregateException(exceptions);
}

private static TimeSpan GetRetryDelay(int retryAttempt)
{
switch (retryAttempt)
{
case 1: return TimeSpan.Zero;
case 2: return TimeSpan.FromMilliseconds(50);
case 3: return TimeSpan.FromMilliseconds(100);
default: return TimeSpan.FromMilliseconds(500);
}
}
}
}

0 comments on commit 5c08eb4

Please sign in to comment.