diff --git a/src/Telemetry/Telemetry.cs b/src/Telemetry/Telemetry.cs index 75853419b..2b3f9b8de 100644 --- a/src/Telemetry/Telemetry.cs +++ b/src/Telemetry/Telemetry.cs @@ -384,6 +384,7 @@ public enum TelemetryMeasureName GetPrimaryKeysDurationMs, InsertGlobalStateTableRowDurationMs, ReleaseLeasesDurationMs, + RetryAttemptNumber, SetLastSyncVersionDurationMs, TransactionDurationMs, UpdateLastSyncVersionDurationMs, @@ -408,6 +409,7 @@ public enum TelemetryErrorName ProcessChanges, PropsNotExistOnTable, ReleaseLeases, + ReleaseLeasesNoRetriesLeft, ReleaseLeasesRollback, RenewLeases, RenewLeasesLoop, diff --git a/src/TriggerBinding/SqlTableChangeMonitor.cs b/src/TriggerBinding/SqlTableChangeMonitor.cs index d0f8241f0..f46da1865 100644 --- a/src/TriggerBinding/SqlTableChangeMonitor.cs +++ b/src/TriggerBinding/SqlTableChangeMonitor.cs @@ -35,7 +35,7 @@ internal sealed class SqlTableChangeMonitor : IDisposable public const int MaxLeaseRenewalCount = 10; public const int LeaseIntervalInSeconds = 60; public const int LeaseRenewalIntervalInSeconds = 15; - + public const int MaxRetryReleaseLeases = 3; private readonly string _connectionString; private readonly int _userTableId; private readonly SqlObject _userTable; @@ -491,8 +491,9 @@ private async Task ReleaseLeasesAsync(SqlConnection connection, CancellationToke await this._rowsLock.WaitAsync(token); this._logger.LogDebugWithThreadId("END WaitRowsLock - ReleaseLeases"); long newLastSyncVersion = this.RecomputeLastSyncVersion(); + bool retrySucceeded = false; - try + for (int retryCount = 1; retryCount <= MaxRetryReleaseLeases && !retrySucceeded; retryCount++) { var transactionSw = Stopwatch.StartNew(); long releaseLeasesDurationMs = 0L, updateLastSyncVersionDurationMs = 0L; @@ -528,14 +529,30 @@ private async Task ReleaseLeasesAsync(SqlConnection connection, CancellationToke { [TelemetryMeasureName.ReleaseLeasesDurationMs] = releaseLeasesDurationMs, [TelemetryMeasureName.UpdateLastSyncVersionDurationMs] = updateLastSyncVersionDurationMs, + [TelemetryMeasureName.TransactionDurationMs] = transactionSw.ElapsedMilliseconds, }; TelemetryInstance.TrackEvent(TelemetryEventName.ReleaseLeasesEnd, this._telemetryProps, measures); + retrySucceeded = true; } catch (Exception ex) { - this._logger.LogError($"Failed to execute SQL commands to release leases for table '{this._userTable.FullName}' due to exception: {ex.GetType()}. Exception message: {ex.Message}"); - TelemetryInstance.TrackException(TelemetryErrorName.ReleaseLeases, ex, this._telemetryProps); + if (retryCount < MaxRetryReleaseLeases) + { + this._logger.LogError($"Failed to execute SQL commands to release leases in attempt: {retryCount} for table '{this._userTable.FullName}' due to exception: {ex.GetType()}. Exception message: {ex.Message}"); + + var measures = new Dictionary + { + [TelemetryMeasureName.RetryAttemptNumber] = retryCount, + }; + + TelemetryInstance.TrackException(TelemetryErrorName.ReleaseLeases, ex, this._telemetryProps, measures); + } + else + { + this._logger.LogError($"Failed to release leases for table '{this._userTable.FullName}' after {MaxRetryReleaseLeases} attempts due to exception: {ex.GetType()}. Exception message: {ex.Message}"); + TelemetryInstance.TrackException(TelemetryErrorName.ReleaseLeasesNoRetriesLeft, ex, this._telemetryProps); + } try { @@ -549,20 +566,9 @@ private async Task ReleaseLeasesAsync(SqlConnection connection, CancellationToke } } } - catch (Exception e) - { - // What should we do if releasing the leases fails? We could try to release them again or just wait, - // since eventually the lease time will expire. Then another thread will re-process the same changes - // though, so less than ideal. But for now that's the functionality. - this._logger.LogError($"Failed to release leases for table '{this._userTable.FullName}' due to exception: {e.GetType()}. Exception message: {e.Message}"); - TelemetryInstance.TrackException(TelemetryErrorName.ReleaseLeases, e, this._telemetryProps); - } - finally - { - // Want to do this before releasing the lock in case the renew leases thread wakes up. It will see that - // the state is checking for changes and not renew the (just released) leases. - await this.ClearRowsAsync(false); - } + // Want to do this before releasing the lock in case the renew leases thread wakes up. It will see that + // the state is checking for changes and not renew the (just released) leases. + await this.ClearRowsAsync(false); } ///