-
Notifications
You must be signed in to change notification settings - Fork 10
rollout: Include health report in service object #51
Changes from all commits
013b647
0d42934
a1f4653
4e641f1
b7d207b
6da8e9e
52a0478
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,19 @@ const ( | |
Unhealthy | ||
) | ||
|
||
func (d DiagnosisResult) String() string { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +++ good way |
||
switch d { | ||
case Inconclusive: | ||
return "inconclusive" | ||
case Healthy: | ||
return "healthy" | ||
case Unhealthy: | ||
return "unhealthy" | ||
default: | ||
return "unknown" | ||
} | ||
} | ||
|
||
// Diagnosis is the information about the health of the revision. | ||
type Diagnosis struct { | ||
OverallResult DiagnosisResult | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package health | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config" | ||
) | ||
|
||
// StringReport returns a human-readable report of the diagnosis. | ||
func StringReport(healthCriteria []config.Metric, diagnosis Diagnosis) string { | ||
report := fmt.Sprintf("status: %s\n", diagnosis.OverallResult.String()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if we also added the time we wrote this down? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could, but I'd probably have a function I will defer this after #57 since we would need the mocked time introduced there. Doing it here would be double work and would make this PR much longer. |
||
report += "metrics:" | ||
for i, result := range diagnosis.CheckResults { | ||
criteria := healthCriteria[i] | ||
|
||
// Include percentile value for latency criteria. | ||
if criteria.Type == config.LatencyMetricsCheck { | ||
report += fmt.Sprintf("\n- %s[p%.0f]: %.2f (needs %.2f)", criteria.Type, criteria.Percentile, result.ActualValue, criteria.Threshold) | ||
continue | ||
} | ||
|
||
format := "\n- %s: %.2f (needs %.2f)" | ||
if criteria.Type == config.RequestCountMetricsCheck { | ||
// No decimals for request count. | ||
format = "\n- %s: %.0f (needs %.0f)" | ||
} | ||
report += fmt.Sprintf(format, criteria.Type, result.ActualValue, criteria.Threshold) | ||
} | ||
|
||
return report | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package health_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config" | ||
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/health" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestStringReport(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
healthCriteria []config.Metric | ||
diagnosis health.Diagnosis | ||
expected string | ||
}{ | ||
{ | ||
name: "single metrics", | ||
healthCriteria: []config.Metric{ | ||
{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750}, | ||
}, | ||
diagnosis: health.Diagnosis{ | ||
OverallResult: health.Unhealthy, | ||
CheckResults: []health.CheckResult{ | ||
{Threshold: 750, ActualValue: 1000, IsCriteriaMet: true}, | ||
}, | ||
}, | ||
expected: "status: unhealthy\n" + | ||
"metrics:" + | ||
"\n- request-latency[p99]: 1000.00 (needs 750.00)", | ||
}, | ||
{ | ||
name: "more than one metrics", | ||
healthCriteria: []config.Metric{ | ||
{Type: config.RequestCountMetricsCheck, Threshold: 1000}, | ||
{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750}, | ||
{Type: config.ErrorRateMetricsCheck, Threshold: 5}, | ||
}, | ||
diagnosis: health.Diagnosis{ | ||
OverallResult: health.Healthy, | ||
CheckResults: []health.CheckResult{ | ||
{Threshold: 1000, ActualValue: 1500, IsCriteriaMet: true}, | ||
{Threshold: 750, ActualValue: 500, IsCriteriaMet: true}, | ||
{Threshold: 5, ActualValue: 2, IsCriteriaMet: true}, | ||
}, | ||
}, | ||
expected: "status: healthy\n" + | ||
"metrics:" + | ||
"\n- request-count: 1500 (needs 1000)" + | ||
"\n- request-latency[p99]: 500.00 (needs 750.00)" + | ||
"\n- error-rate-percent: 2.00 (needs 5.00)", | ||
}, | ||
{ | ||
name: "no metrics", | ||
expected: "status: unknown\nmetrics:", | ||
}, | ||
} | ||
|
||
for _, test := range tests { | ||
t.Run(test.name, func(tt *testing.T) { | ||
report := health.StringReport(test.healthCriteria, test.diagnosis) | ||
assert.Equal(tt, test.expected, report) | ||
}) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,14 @@ import ( | |
"google.golang.org/api/run/v1" | ||
) | ||
|
||
// Annotations name for information related to the rollout. | ||
const ( | ||
StableRevisionAnnotation = "rollout.cloud.run/stableRevision" | ||
CandidateRevisionAnnotation = "rollout.cloud.run/candidateRevision" | ||
LastFailedCandidateRevisionAnnotation = "rollout.cloud.run/lastFailedCandidateRevision" | ||
LastHealthReportAnnotation = "rollout.cloud.run/lastHealthReport" | ||
) | ||
|
||
// ServiceRecord holds a service object and information about it. | ||
type ServiceRecord struct { | ||
*run.Service | ||
|
@@ -115,6 +123,11 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) { | |
r.log.Debug("new candidate, assign some traffic") | ||
svc = r.PrepareRollForward(svc, stable, candidate) | ||
svc = r.updateAnnotations(svc, stable, candidate) | ||
|
||
// TODO(gvso): create a setHealthReportAnnotation that appends the | ||
// current time to the report before setting the annotation. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this probably could be part of the health report string generation IMO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My initial thought was also this one, but the problem is that we don't generate a report when we have a new candidate. We have a string literal in that case |
||
setAnnotation(svc, LastHealthReportAnnotation, "new candidate, no health report available yet") | ||
|
||
err := r.replaceService(svc) | ||
return svc, errors.Wrap(err, "failed to replace service") | ||
} | ||
|
@@ -132,19 +145,18 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) { | |
case health.Healthy: | ||
r.log.Debug("healthy candidate, roll forward") | ||
svc = r.PrepareRollForward(svc, stable, candidate) | ||
break | ||
case health.Unhealthy: | ||
r.log.Info("unhealthy candidate, rollback") | ||
r.shouldRollback = true | ||
svc = r.PrepareRollback(svc, stable, candidate) | ||
break | ||
default: | ||
return nil, errors.Errorf("invalid candidate's health diagnosis %v", diagnosis.OverallResult) | ||
} | ||
|
||
// TODO(gvso): include annotation about the diagnosis (especially when | ||
// diagnosis is unhealthy). | ||
svc = r.updateAnnotations(svc, stable, candidate) | ||
report := health.StringReport(r.strategy.HealthCriteria, diagnosis) | ||
setAnnotation(svc, LastHealthReportAnnotation, report) | ||
|
||
err = r.replaceService(svc) | ||
return svc, errors.Wrap(err, "failed to replace service") | ||
} | ||
|
@@ -282,26 +294,30 @@ func (r *Rollout) nextCandidateTraffic(current int64) int64 { | |
|
||
// updateAnnotations updates the annotations to keep some state about the rollout. | ||
func (r *Rollout) updateAnnotations(svc *run.Service, stable, candidate string) *run.Service { | ||
if svc.Metadata.Annotations == nil { | ||
svc.Metadata.Annotations = make(map[string]string) | ||
} | ||
|
||
// The candidate has become the stable revision. | ||
if r.promoteToStable { | ||
svc.Metadata.Annotations[StableRevisionAnnotation] = candidate | ||
setAnnotation(svc, StableRevisionAnnotation, candidate) | ||
delete(svc.Metadata.Annotations, CandidateRevisionAnnotation) | ||
return svc | ||
} | ||
|
||
svc.Metadata.Annotations[StableRevisionAnnotation] = stable | ||
svc.Metadata.Annotations[CandidateRevisionAnnotation] = candidate | ||
setAnnotation(svc, StableRevisionAnnotation, stable) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice, yeah you can just use it here :D |
||
setAnnotation(svc, CandidateRevisionAnnotation, candidate) | ||
if r.shouldRollback { | ||
svc.Metadata.Annotations[LastFailedCandidateRevisionAnnotation] = candidate | ||
setAnnotation(svc, LastFailedCandidateRevisionAnnotation, candidate) | ||
} | ||
|
||
return svc | ||
} | ||
|
||
// setAnnotation sets the value of an annotation. | ||
func setAnnotation(svc *run.Service, key, value string) { | ||
if svc.Metadata.Annotations == nil { | ||
svc.Metadata.Annotations = make(map[string]string) | ||
} | ||
svc.Metadata.Annotations[key] = value | ||
} | ||
|
||
// diagnoseCandidate returns the candidate's diagnosis based on metrics. | ||
func (r *Rollout) diagnoseCandidate(candidate string, healthCriteria []config.Metric) (d health.Diagnosis, err error) { | ||
healthCheckOffset := time.Duration(r.strategy.HealthOffsetMinute) * time.Minute | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
testify is able to compare more than primitives types, so I really didn't need go-cmp.