/
SqlDatabaseTransientErrorDetectionStrategy.cs
201 lines (166 loc) · 10 KB
/
SqlDatabaseTransientErrorDetectionStrategy.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
//
// Purpose:
// Provides the transient error detection logic for transient faults that are specific to SQL Database.
//
// Notes:
// -This class was forked from the Windows Azure Transient Fault Handling Library ("Topaz")
// available here: http://topaz.codeplex.com/ and will now be maintained by us.
// - In the future, we should consider moving this to a config file to make updates
// easier in-case WA Sql Database decides to change their error codes.
using System;
using System.ComponentModel;
using Microsoft.Data.SqlClient;
namespace Microsoft.Azure.SqlDatabase.ElasticScale
{
internal partial class TransientFaultHandling
{
/// <summary>
/// Provides the transient error detection logic for transient faults that are specific to SQL Database.
/// </summary>
internal sealed class SqlDatabaseTransientErrorDetectionStrategy : ITransientErrorDetectionStrategy
{
#region ProcessNetLibErrorCode enumeration
/// <summary>
/// Error codes reported by the DBNETLIB module.
/// </summary>
private enum ProcessNetLibErrorCode
{
ZeroBytes = -3,
Timeout = -2,
/* Timeout expired. The timeout period elapsed prior to completion of the operation or the server is not responding. */
Unknown = -1,
InsufficientMemory = 1,
AccessDenied = 2,
ConnectionBusy = 3,
ConnectionBroken = 4,
ConnectionLimit = 5,
ServerNotFound = 6,
NetworkNotFound = 7,
InsufficientResources = 8,
NetworkBusy = 9,
NetworkAccessDenied = 10,
GeneralError = 11,
IncorrectMode = 12,
NameNotFound = 13,
InvalidConnection = 14,
ReadWriteError = 15,
TooManyHandles = 16,
ServerError = 17,
SSLError = 18,
EncryptionError = 19,
EncryptionNotSupported = 20
}
#endregion
#region ITransientErrorDetectionStrategy implementation
/// <summary>
/// Determines whether the specified exception represents a transient failure that can be compensated by a retry.
/// </summary>
/// <param name="ex">The exception object to be verified.</param>
/// <returns>true if the specified exception is considered as transient; otherwise, false.</returns>
public bool IsTransient(Exception ex)
{
if (ex != null)
{
SqlException sqlException;
if ((sqlException = ex as SqlException) != null)
{
// Enumerate through all errors found in the exception.
foreach (SqlError err in sqlException.Errors)
{
switch (err.Number)
{
// SQL Error Code: 40501
// The service is currently busy. Retry the request after 10 seconds. Code: (reason code to be decoded).
case ThrottlingCondition.ThrottlingErrorNumber:
// Decode the reason code from the error message to determine the grounds for throttling.
var condition = ThrottlingCondition.FromError(err);
// Attach the decoded values as additional attributes to the original SQL exception.
sqlException.Data[condition.ThrottlingMode.GetType().Name] =
condition.ThrottlingMode.ToString();
sqlException.Data[condition.GetType().Name] = condition;
return true;
// SQL Error Code: 10928
// Resource ID: %d. The %s limit for the database is %d and has been reached.
case 10928:
// SQL Error Code: 10929
// Resource ID: %d. The %s minimum guarantee is %d, maximum limit is %d and the current usage for the database is %d.
// However, the server is currently too busy to support requests greater than %d for this database.
case 10929:
// SQL Error Code: 10053
// A transport-level error has occurred when receiving results from the server.
// An established connection was aborted by the software in your host machine.
case 10053:
// SQL Error Code: 10054
// A transport-level error has occurred when sending the request to the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 10054:
// SQL Error Code: 10060
// A network-related or instance-specific error occurred while establishing a connection to SQL Server.
// The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
// is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
// because the connected party did not properly respond after a period of time, or established connection failed
// because connected host has failed to respond.)"}
case 10060:
// SQL Error Code: 18401
// Login failed for user '%s'. Reason: Server is in script upgrade mode. Only administrator can connect at this time.
// Devnote: this can happen when SQL is going through recovery (e.g. after failover)
case 18401:
// SQL Error Code: 40197
// The service has encountered an error processing your request. Please try again.
case 40197:
// SQL Error Code: 40540
// The service has encountered an error processing your request. Please try again.
case 40540:
// SQL Error Code: 40613
// Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
// support, and provide them the session tracing ID of ZZZZZ.
case 40613:
// SQL Error Code: 40143
// The service has encountered an error processing your request. Please try again.
case 40143:
// SQL Error Code: 233
// The client was unable to establish a connection because of an error during connection initialization process before login.
// Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
// to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 233:
// SQL Error Code: 64
// A connection was successfully established with the server, but then an error occurred during the login process.
// (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
case 64:
// DBNETLIB Error Code: 20
// The instance of SQL Server you attempted to connect to does not support encryption.
case (int)ProcessNetLibErrorCode.EncryptionNotSupported:
return true;
}
}
// Prelogin failure can happen due to waits expiring on windows handles. Or
// due to a bug in the gateway code, a dropped database with a pooled connection
// when reset results in a timeout error instead of immediate failure.
Win32Exception wex = sqlException.InnerException as Win32Exception;
if (wex != null)
{
switch (wex.NativeErrorCode)
{
// Timeout expired
case 0x102:
return true;
// Semaphore timeout expired
case 0x121:
return true;
}
}
}
else if (ex is TimeoutException)
{
return true;
}
}
return false;
}
#endregion
}
}
}