Skip to content
This repository has been archived by the owner on Jan 11, 2023. It is now read-only.

rollout: Include health report in service object #51

Merged
merged 7 commits into from
Aug 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ require (
cloud.google.com/go v0.60.0 // indirect
github.com/TV4/logrus-stackdriver-formatter v0.1.0
github.com/go-stack/stack v1.8.0 // indirect
github.com/google/go-cmp v0.5.0
Copy link
Contributor Author

@gvso gvso Aug 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testify is able to compare more than primitives types, so I really didn't need go-cmp.

github.com/mattn/go-isatty v0.0.12
github.com/pkg/errors v0.9.1
github.com/sirupsen/logrus v1.6.0
Expand Down
13 changes: 13 additions & 0 deletions pkg/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ const (
Unhealthy
)

func (d DiagnosisResult) String() string {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+++ good way

switch d {
case Inconclusive:
return "inconclusive"
case Healthy:
return "healthy"
case Unhealthy:
return "unhealthy"
default:
return "unknown"
}
}

// Diagnosis is the information about the health of the revision.
type Diagnosis struct {
OverallResult DiagnosisResult
Expand Down
31 changes: 31 additions & 0 deletions pkg/health/report.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package health

import (
"fmt"

"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
)

// StringReport returns a human-readable report of the diagnosis.
func StringReport(healthCriteria []config.Metric, diagnosis Diagnosis) string {
report := fmt.Sprintf("status: %s\n", diagnosis.OverallResult.String())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if we also added the time we wrote this down?
does the user have any way of telling when was this status report for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but I'd probably have a function setHealthReportAnnotation(report string) in rollout since there are cases in which this function is not called (e.g. new candidate). In that function, we can append the time at the end of any report.

I will defer this after #57 since we would need the mocked time introduced there. Doing it here would be double work and would make this PR much longer.

report += "metrics:"
for i, result := range diagnosis.CheckResults {
criteria := healthCriteria[i]

// Include percentile value for latency criteria.
if criteria.Type == config.LatencyMetricsCheck {
report += fmt.Sprintf("\n- %s[p%.0f]: %.2f (needs %.2f)", criteria.Type, criteria.Percentile, result.ActualValue, criteria.Threshold)
continue
}

format := "\n- %s: %.2f (needs %.2f)"
if criteria.Type == config.RequestCountMetricsCheck {
// No decimals for request count.
format = "\n- %s: %.0f (needs %.0f)"
}
report += fmt.Sprintf(format, criteria.Type, result.ActualValue, criteria.Threshold)
}

return report
}
66 changes: 66 additions & 0 deletions pkg/health/report_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package health_test

import (
"testing"

"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/health"
"github.com/stretchr/testify/assert"
)

func TestStringReport(t *testing.T) {
tests := []struct {
name string
healthCriteria []config.Metric
diagnosis health.Diagnosis
expected string
}{
{
name: "single metrics",
healthCriteria: []config.Metric{
{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750},
},
diagnosis: health.Diagnosis{
OverallResult: health.Unhealthy,
CheckResults: []health.CheckResult{
{Threshold: 750, ActualValue: 1000, IsCriteriaMet: true},
},
},
expected: "status: unhealthy\n" +
"metrics:" +
"\n- request-latency[p99]: 1000.00 (needs 750.00)",
},
{
name: "more than one metrics",
healthCriteria: []config.Metric{
{Type: config.RequestCountMetricsCheck, Threshold: 1000},
{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750},
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
diagnosis: health.Diagnosis{
OverallResult: health.Healthy,
CheckResults: []health.CheckResult{
{Threshold: 1000, ActualValue: 1500, IsCriteriaMet: true},
{Threshold: 750, ActualValue: 500, IsCriteriaMet: true},
{Threshold: 5, ActualValue: 2, IsCriteriaMet: true},
},
},
expected: "status: healthy\n" +
"metrics:" +
"\n- request-count: 1500 (needs 1000)" +
"\n- request-latency[p99]: 500.00 (needs 750.00)" +
"\n- error-rate-percent: 2.00 (needs 5.00)",
},
{
name: "no metrics",
expected: "status: unknown\nmetrics:",
},
}

for _, test := range tests {
t.Run(test.name, func(tt *testing.T) {
report := health.StringReport(test.healthCriteria, test.diagnosis)
assert.Equal(tt, test.expected, report)
})
}
}
7 changes: 0 additions & 7 deletions pkg/rollout/revision.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@ import (
"google.golang.org/api/run/v1"
)

// Annotations name for information related to the rollout.
const (
StableRevisionAnnotation = "rollout.cloud.run/stableRevision"
CandidateRevisionAnnotation = "rollout.cloud.run/candidateRevision"
LastFailedCandidateRevisionAnnotation = "rollout.cloud.run/lastFailedCandidateRevision"
)

// DetectStableRevisionName returns the stable revision of the Cloud Run service.
//
// It first checks if there's a revision with the tag "stable". If such a
Expand Down
40 changes: 28 additions & 12 deletions pkg/rollout/rollout.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ import (
"google.golang.org/api/run/v1"
)

// Annotations name for information related to the rollout.
const (
StableRevisionAnnotation = "rollout.cloud.run/stableRevision"
CandidateRevisionAnnotation = "rollout.cloud.run/candidateRevision"
LastFailedCandidateRevisionAnnotation = "rollout.cloud.run/lastFailedCandidateRevision"
LastHealthReportAnnotation = "rollout.cloud.run/lastHealthReport"
)

// ServiceRecord holds a service object and information about it.
type ServiceRecord struct {
*run.Service
Expand Down Expand Up @@ -115,6 +123,11 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) {
r.log.Debug("new candidate, assign some traffic")
svc = r.PrepareRollForward(svc, stable, candidate)
svc = r.updateAnnotations(svc, stable, candidate)

// TODO(gvso): create a setHealthReportAnnotation that appends the
// current time to the report before setting the annotation.
Copy link
Contributor

@ahmetb ahmetb Aug 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this probably could be part of the health report string generation IMO.
doesn't have to be here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial thought was also this one, but the problem is that we don't generate a report when we have a new candidate. We have a string literal in that case

setAnnotation(svc, LastHealthReportAnnotation, "new candidate, no health report available yet")

err := r.replaceService(svc)
return svc, errors.Wrap(err, "failed to replace service")
}
Expand All @@ -132,19 +145,18 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) {
case health.Healthy:
r.log.Debug("healthy candidate, roll forward")
svc = r.PrepareRollForward(svc, stable, candidate)
break
case health.Unhealthy:
r.log.Info("unhealthy candidate, rollback")
r.shouldRollback = true
svc = r.PrepareRollback(svc, stable, candidate)
break
default:
return nil, errors.Errorf("invalid candidate's health diagnosis %v", diagnosis.OverallResult)
}

// TODO(gvso): include annotation about the diagnosis (especially when
// diagnosis is unhealthy).
svc = r.updateAnnotations(svc, stable, candidate)
report := health.StringReport(r.strategy.HealthCriteria, diagnosis)
setAnnotation(svc, LastHealthReportAnnotation, report)

err = r.replaceService(svc)
return svc, errors.Wrap(err, "failed to replace service")
}
Expand Down Expand Up @@ -282,26 +294,30 @@ func (r *Rollout) nextCandidateTraffic(current int64) int64 {

// updateAnnotations updates the annotations to keep some state about the rollout.
func (r *Rollout) updateAnnotations(svc *run.Service, stable, candidate string) *run.Service {
if svc.Metadata.Annotations == nil {
svc.Metadata.Annotations = make(map[string]string)
}

// The candidate has become the stable revision.
if r.promoteToStable {
svc.Metadata.Annotations[StableRevisionAnnotation] = candidate
setAnnotation(svc, StableRevisionAnnotation, candidate)
delete(svc.Metadata.Annotations, CandidateRevisionAnnotation)
return svc
}

svc.Metadata.Annotations[StableRevisionAnnotation] = stable
svc.Metadata.Annotations[CandidateRevisionAnnotation] = candidate
setAnnotation(svc, StableRevisionAnnotation, stable)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, yeah you can just use it here :D

setAnnotation(svc, CandidateRevisionAnnotation, candidate)
if r.shouldRollback {
svc.Metadata.Annotations[LastFailedCandidateRevisionAnnotation] = candidate
setAnnotation(svc, LastFailedCandidateRevisionAnnotation, candidate)
}

return svc
}

// setAnnotation sets the value of an annotation.
func setAnnotation(svc *run.Service, key, value string) {
if svc.Metadata.Annotations == nil {
svc.Metadata.Annotations = make(map[string]string)
}
svc.Metadata.Annotations[key] = value
}

// diagnoseCandidate returns the candidate's diagnosis based on metrics.
func (r *Rollout) diagnoseCandidate(candidate string, healthCriteria []config.Metric) (d health.Diagnosis, err error) {
healthCheckOffset := time.Duration(r.strategy.HealthOffsetMinute) * time.Minute
Expand Down
22 changes: 18 additions & 4 deletions pkg/rollout/rollout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
runMocker "github.com/GoogleCloudPlatform/cloud-run-release-operator/internal/run/mock"
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/rollout"
"github.com/google/go-cmp/cmp"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"google.golang.org/api/run/v1"
Expand Down Expand Up @@ -86,6 +85,7 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "new candidate, no health report available yet",
rollout.StableRevisionAnnotation: "test-002",
rollout.CandidateRevisionAnnotation: "test-003",
},
Expand Down Expand Up @@ -123,6 +123,7 @@ func TestUpdateService(t *testing.T) {
},
lastReady: "test-002",
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "new candidate, no health report available yet",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-002",
},
Expand All @@ -146,6 +147,10 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "status: healthy\n" +
"metrics:" +
"\n- request-latency[p99]: 500.00 (needs 750.00)" +
"\n- error-rate-percent: 1.00 (needs 5.00)",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-002",
},
Expand All @@ -165,6 +170,7 @@ func TestUpdateService(t *testing.T) {
},
lastReady: "test-003",
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "new candidate, no health report available yet",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-003",
},
Expand All @@ -187,6 +193,10 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 5},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "status: healthy\n" +
"metrics:" +
"\n- request-latency[p99]: 500.00 (needs 750.00)" +
"\n- error-rate-percent: 1.00 (needs 5.00)",
rollout.StableRevisionAnnotation: "test-002",
},
outTraffic: []*run.TrafficTarget{
Expand All @@ -207,6 +217,10 @@ func TestUpdateService(t *testing.T) {
{Type: config.ErrorRateMetricsCheck, Threshold: 0.95},
},
outAnnotations: map[string]string{
rollout.LastHealthReportAnnotation: "status: unhealthy\n" +
"metrics:" +
"\n- request-latency[p99]: 500.00 (needs 100.00)" +
"\n- error-rate-percent: 1.00 (needs 0.95)",
rollout.StableRevisionAnnotation: "test-001",
rollout.CandidateRevisionAnnotation: "test-002",
rollout.LastFailedCandidateRevisionAnnotation: "test-002",
Expand Down Expand Up @@ -285,8 +299,8 @@ func TestUpdateService(t *testing.T) {
} else if test.nilService {
assert.Nil(t, svc)
} else {
assert.True(t, cmp.Equal(test.outAnnotations, svc.Metadata.Annotations))
assert.True(t, cmp.Equal(test.outTraffic, svc.Spec.Traffic))
assert.Equal(t, test.outAnnotations, svc.Metadata.Annotations)
assert.Equal(t, test.outTraffic, svc.Spec.Traffic)
}
})

Expand Down Expand Up @@ -418,7 +432,7 @@ func TestPrepareRollForward(t *testing.T) {

t.Run(test.name, func(t *testing.T) {
svc = r.PrepareRollForward(svc, test.stable, test.candidate)
assert.True(t, cmp.Equal(test.expected, svc.Spec.Traffic))
assert.Equal(t, test.expected, svc.Spec.Traffic)
})
}
}
Expand Down