/
deployment_error_classifier.go
149 lines (131 loc) · 4.71 KB
/
deployment_error_classifier.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*
* Copyright (c) Microsoft Corporation.
* Licensed under the MIT license.
*/
package reconcilers
import (
"github.com/Azure/azure-service-operator/hack/generated/pkg/armclient"
)
type DeploymentErrorClassification string
const (
DeploymentErrorRetryable = DeploymentErrorClassification("retryable")
DeploymentErrorFatal = DeploymentErrorClassification("fatal")
)
type DeploymentErrorDetails struct {
Classification DeploymentErrorClassification
Code string
Message string
}
const (
UnknownErrorCode = "UnknownError"
UnknownErrorMessage = "There was an unknown deployment error"
)
func stringOrDefault(str string, def string) string {
if str == "" {
return def
}
return str
}
func stringPtrOrDefault(str *string, def string) string {
if str == nil {
return def
}
if *str == "" {
return def
}
return *str
}
func ClassifyDeploymentError(deploymentError *armclient.DeploymentError) DeploymentErrorDetails {
if deploymentError == nil {
// Default to retrying if we're asked to classify a nil error
return DeploymentErrorDetails{
Classification: DeploymentErrorRetryable,
Code: UnknownErrorCode,
Message: UnknownErrorMessage,
}
}
if len(deploymentError.Details) == 0 {
// Default to retrying if we're asked to classify an error with no details
return DeploymentErrorDetails{
Classification: DeploymentErrorRetryable,
Code: stringOrDefault(deploymentError.Code, UnknownErrorCode),
Message: stringOrDefault(deploymentError.Message, UnknownErrorMessage),
}
}
// Classify all of the details -- there may ALWAYS be only one but
// since the API technically allows a list just deal with it in case
// it actually happens in some rare case.
// First check if any errors are fatal
for _, detail := range deploymentError.Details {
classification := classifyInnerDeploymentError(detail)
// A single fatal sub-error means the error as a whole is fatal
if classification == DeploymentErrorFatal {
return DeploymentErrorDetails{
Classification: classification,
Code: stringOrDefault(detail.Code, UnknownErrorCode),
Message: stringOrDefault(detail.Message, UnknownErrorMessage),
}
}
}
// Otherwise return the first error (which must have been retryable since we didn't return above)
return DeploymentErrorDetails{
Classification: DeploymentErrorRetryable,
Code: stringOrDefault(deploymentError.Details[0].Code, UnknownErrorCode),
Message: stringOrDefault(deploymentError.Details[0].Message, UnknownErrorMessage),
}
}
func classifyInnerDeploymentError(deploymentError armclient.DeploymentError) DeploymentErrorClassification {
// See https://docs.microsoft.com/en-us/azure/azure-resource-manager/templates/common-deployment-errors
// for a breakdown of common deployment error codes. Note that the error codes documented there are
// the inner error codes we're parsing here.
if deploymentError.Code == "" {
// If there's no code, assume we can retry on it
return DeploymentErrorRetryable
}
switch deploymentError.Code {
case "AnotherOperationInProgress",
"AuthorizationFailed",
"AllocationFailed",
"InvalidResourceReference",
"InvalidSubscriptionRegistrationState",
"LinkedAuthorizationFailed",
"MissingRegistrationForLocation",
"MissingSubscriptionRegistration",
"NoRegisteredProviderFound",
"NotFound",
// It sounds weird to retry on "OperationNotAllowed" but according to the docs
// it's a quota issue, so we can in theory retry through it
"OperationNotAllowed",
"ParentResourceNotFound",
"ResourceGroupNotFound",
"ResourceNotFound",
"ResourceQuotaExceeded",
"SubscriptionNotRegistered":
return DeploymentErrorRetryable
case "BadRequest",
"Conflict", // TODO: is conflict always not retryable?
"PublicIpForGatewayIsRequired", // TODO: There's not a great way to look at an arbitrary error returned by this API and determine if it's a 4xx or 5xx level... ugh
"InvalidParameter",
"InvalidParameterValue",
"InvalidRequestContent",
"InvalidTemplate",
"InvalidValuesForRequestParameters",
"InvalidGatewaySkuProvidedForGatewayVpnType",
"InvalidGatewaySize",
"LocationRequired",
"MissingRequiredParameter",
"PasswordTooLong",
"PrivateIPAddressInReservedRange",
"PrivateIPAddressNotInSubnet",
"PropertyChangeNotAllowed",
"RequestDisallowedByPolicy", // TODO: Technically could probably retry through this?
"ReservedResourceName",
"SkuNotAvailable",
"SubscriptionNotFound":
return DeploymentErrorFatal
default:
// TODO: We could technically avoid listing the above Retryable errors since that's the default anyway
// If we don't know what the error is, default to retrying on it
return DeploymentErrorRetryable
}
}