Skip to content
This repository has been archived by the owner on Jan 11, 2023. It is now read-only.

Commit

Permalink
rollout: Include health report in service object (#51)
Browse files Browse the repository at this point in the history
This adds annotation about the last health report. It uses the key
rollout.cloud.run/lastHealthReport and assigns it a string with the last
diagnosis (healthy, unhealthy) and the check results.
  • Loading branch information
Getulio Valentin Sánchez committed Aug 7, 2020
1 parent b780b66 commit 6f36920
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 24 deletions.
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ require (
cloud.google.com/go v0.60.0 // indirect
github.com/TV4/logrus-stackdriver-formatter v0.1.0
github.com/go-stack/stack v1.8.0 // indirect
github.com/google/go-cmp v0.5.0
github.com/mattn/go-isatty v0.0.12
github.com/pkg/errors v0.9.1
github.com/sirupsen/logrus v1.6.0
Expand Down
13 changes: 13 additions & 0 deletions pkg/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ const (
Unhealthy
)

func (d DiagnosisResult) String() string {
switch d {
case Inconclusive:
return "inconclusive"
case Healthy:
return "healthy"
case Unhealthy:
return "unhealthy"
default:
return "unknown"
}
}

// Diagnosis is the information about the health of the revision.
type Diagnosis struct {
OverallResult DiagnosisResult
Expand Down
31 changes: 31 additions & 0 deletions pkg/health/report.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package health

import (
"fmt"

"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
)

// StringReport returns a human-readable report of the diagnosis.
func StringReport(healthCriteria []config.Metric, diagnosis Diagnosis) string {
report := fmt.Sprintf("status: %s\n", diagnosis.OverallResult.String())
report += "metrics:"
for i, result := range diagnosis.CheckResults {
criteria := healthCriteria[i]

// Include percentile value for latency criteria.
if criteria.Type == config.LatencyMetricsCheck {
report += fmt.Sprintf("\n- %s[p%.0f]: %.2f (needs %.2f)", criteria.Type, criteria.Percentile, result.ActualValue, criteria.Threshold)
continue
}

format := "\n- %s: %.2f (needs %.2f)"
if criteria.Type == config.RequestCountMetricsCheck {
// No decimals for request count.
format = "\n- %s: %.0f (needs %.0f)"
}
report += fmt.Sprintf(format, criteria.Type, result.ActualValue, criteria.Threshold)
}

return report
}
66 changes: 66 additions & 0 deletions pkg/health/report_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package health_test

import (
"testing"

"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/health"
"github.com/stretchr/testify/assert"
)

func TestStringReport(t *testing.T) {
tests := []struct {
name string
healthCriteria []config.Metric
diagnosis health.Diagnosis
expected string
}{
{
name: "single metrics",
healthCriteria: []config.Metric{
{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750},
},
diagnosis: health.Diagnosis{
OverallResult: health.Unhealthy,
CheckResults: []health.CheckResult{
{Threshold: 750, ActualValue: 1000, IsCriteriaMet: true},
},
},
expected: "status: unhealthy\n" +
"metrics:" +
"\n- request-latency[p99]: 1000.00 (needs 750.00)",
},
{
name: "more than one metrics",
healthCriteria: []config.Metric{
{Type: config.RequestCountMetricsCheck, Threshold: 1000},
{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750},
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
diagnosis: health.Diagnosis{
OverallResult: health.Healthy,
CheckResults: []health.CheckResult{
{Threshold: 1000, ActualValue: 1500, IsCriteriaMet: true},
{Threshold: 750, ActualValue: 500, IsCriteriaMet: true},
{Threshold: 5, ActualValue: 2, IsCriteriaMet: true},
},
},
expected: "status: healthy\n" +
"metrics:" +
"\n- request-count: 1500 (needs 1000)" +
"\n- request-latency[p99]: 500.00 (needs 750.00)" +
"\n- error-rate-percent: 2.00 (needs 5.00)",
},
{
name: "no metrics",
expected: "status: unknown\nmetrics:",
},
}

for _, test := range tests {
t.Run(test.name, func(tt *testing.T) {
report := health.StringReport(test.healthCriteria, test.diagnosis)
assert.Equal(tt, test.expected, report)
})
}
}
7 changes: 0 additions & 7 deletions pkg/rollout/revision.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@ import (
"google.golang.org/api/run/v1"
)

// Annotations name for information related to the rollout.
const (
StableRevisionAnnotation = "rollout.cloud.run/stableRevision"
CandidateRevisionAnnotation = "rollout.cloud.run/candidateRevision"
LastFailedCandidateRevisionAnnotation = "rollout.cloud.run/lastFailedCandidateRevision"
)

// DetectStableRevisionName returns the stable revision of the Cloud Run service.
//
// It first checks if there's a revision with the tag "stable". If such a
Expand Down
40 changes: 28 additions & 12 deletions pkg/rollout/rollout.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ import (
"google.golang.org/api/run/v1"
)

// Annotations name for information related to the rollout.
const (
StableRevisionAnnotation = "rollout.cloud.run/stableRevision"
CandidateRevisionAnnotation = "rollout.cloud.run/candidateRevision"
LastFailedCandidateRevisionAnnotation = "rollout.cloud.run/lastFailedCandidateRevision"
LastHealthReportAnnotation = "rollout.cloud.run/lastHealthReport"
)

// ServiceRecord holds a service object and information about it.
type ServiceRecord struct {
*run.Service
Expand Down Expand Up @@ -115,6 +123,11 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) {
r.log.Debug("new candidate, assign some traffic")
svc = r.PrepareRollForward(svc, stable, candidate)
svc = r.updateAnnotations(svc, stable, candidate)

// TODO(gvso): create a setHealthReportAnnotation that appends the
// current time to the report before setting the annotation.
setAnnotation(svc, LastHealthReportAnnotation, "new candidate, no health report available yet")

err := r.replaceService(svc)
return svc, errors.Wrap(err, "failed to replace service")
}
Expand All @@ -132,19 +145,18 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) {
case health.Healthy:
r.log.Debug("healthy candidate, roll forward")
svc = r.PrepareRollForward(svc, stable, candidate)
break
case health.Unhealthy:
r.log.Info("unhealthy candidate, rollback")
r.shouldRollback = true
svc = r.PrepareRollback(svc, stable, candidate)
break
default:
return nil, errors.Errorf("invalid candidate's health diagnosis %v", diagnosis.OverallResult)
}

// TODO(gvso): include annotation about the diagnosis (especially when
// diagnosis is unhealthy).
svc = r.updateAnnotations(svc, stable, candidate)
report := health.StringReport(r.strategy.HealthCriteria, diagnosis)
setAnnotation(svc, LastHealthReportAnnotation, report)

err = r.replaceService(svc)
return svc, errors.Wrap(err, "failed to replace service")
}
Expand Down Expand Up @@ -282,26 +294,30 @@ func (r *Rollout) nextCandidateTraffic(current int64) int64 {

// updateAnnotations updates the annotations to keep some state about the rollout.
func (r *Rollout) updateAnnotations(svc *run.Service, stable, candidate string) *run.Service {
if svc.Metadata.Annotations == nil {
svc.Metadata.Annotations = make(map[string]string)
}

// The candidate has become the stable revision.
if r.promoteToStable {
svc.Metadata.Annotations[StableRevisionAnnotation] = candidate
setAnnotation(svc, StableRevisionAnnotation, candidate)
delete(svc.Metadata.Annotations, CandidateRevisionAnnotation)
return svc
}

svc.Metadata.Annotations[StableRevisionAnnotation] = stable
svc.Metadata.Annotations[CandidateRevisionAnnotation] = candidate
setAnnotation(svc, StableRevisionAnnotation, stable)
setAnnotation(svc, CandidateRevisionAnnotation, candidate)
if r.shouldRollback {
svc.Metadata.Annotations[LastFailedCandidateRevisionAnnotation] = candidate
setAnnotation(svc, LastFailedCandidateRevisionAnnotation, candidate)
}

return svc
}

// setAnnotation sets the value of an annotation.
func setAnnotation(svc *run.Service, key, value string) {
if svc.Metadata.Annotations == nil {
svc.Metadata.Annotations = make(map[string]string)
}
svc.Metadata.Annotations[key] = value
}

// diagnoseCandidate returns the candidate's diagnosis based on metrics.
func (r *Rollout) diagnoseCandidate(candidate string, healthCriteria []config.Metric) (d health.Diagnosis, err error) {
healthCheckOffset := time.Duration(r.strategy.HealthOffsetMinute) * time.Minute
Expand Down
22 changes: 18 additions & 4 deletions pkg/rollout/rollout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
runMocker "github.com/GoogleCloudPlatform/cloud-run-release-operator/internal/run/mock"
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/rollout"
"github.com/google/go-cmp/cmp"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"google.golang.org/api/run/v1"
Expand Down Expand Up @@ -86,6 +85,7 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "new candidate, no health report available yet",
rollout.StableRevisionAnnotation: "test-002",
rollout.CandidateRevisionAnnotation: "test-003",
},
Expand Down Expand Up @@ -123,6 +123,7 @@ func TestUpdateService(t *testing.T) {
},
lastReady: "test-002",
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "new candidate, no health report available yet",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-002",
},
Expand All @@ -146,6 +147,10 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "status: healthy\n" +
"metrics:" +
"\n- request-latency[p99]: 500.00 (needs 750.00)" +
"\n- error-rate-percent: 1.00 (needs 5.00)",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-002",
},
Expand All @@ -165,6 +170,7 @@ func TestUpdateService(t *testing.T) {
},
lastReady: "test-003",
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "new candidate, no health report available yet",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-003",
},
Expand All @@ -187,6 +193,10 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "status: healthy\n" +
"metrics:" +
"\n- request-latency[p99]: 500.00 (needs 750.00)" +
"\n- error-rate-percent: 1.00 (needs 5.00)",
rollout.StableRevisionAnnotation: "test-002",
},
outTraffic: []*run.TrafficTarget{
Expand All @@ -207,6 +217,10 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 0.95},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "status: unhealthy\n" +
"metrics:" +
"\n- request-latency[p99]: 500.00 (needs 100.00)" +
"\n- error-rate-percent: 1.00 (needs 0.95)",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-002",
rollout.LastFailedCandidateRevisionAnnotation: "test-002",
Expand Down Expand Up @@ -285,8 +299,8 @@ func TestUpdateService(t *testing.T) {
} else if test.nilService {
assert.Nil(t, svc)
} else {
assert.True(t, cmp.Equal(test.outAnnotations, svc.Metadata.Annotations))
assert.True(t, cmp.Equal(test.outTraffic, svc.Spec.Traffic))
assert.Equal(t, test.outAnnotations, svc.Metadata.Annotations)
assert.Equal(t, test.outTraffic, svc.Spec.Traffic)
}
})

Expand Down Expand Up @@ -418,7 +432,7 @@ func TestPrepareRollForward(t *testing.T) {

t.Run(test.name, func(t *testing.T) {
svc = r.PrepareRollForward(svc, test.stable, test.candidate)
assert.True(t, cmp.Equal(test.expected, svc.Spec.Traffic))
assert.Equal(t, test.expected, svc.Spec.Traffic)
})
}
}
Expand Down

0 comments on commit 6f36920

Please sign in to comment.