GoogleCloudPlatform · gvso · Aug 7, 2020 · Aug 4, 2020 · Aug 5, 2020 · Aug 5, 2020
diff --git a/go.mod b/go.mod
@@ -6,7 +6,6 @@ require (
 	cloud.google.com/go v0.60.0 // indirect
 	github.com/TV4/logrus-stackdriver-formatter v0.1.0
 	github.com/go-stack/stack v1.8.0 // indirect
-	github.com/google/go-cmp v0.5.0
 	github.com/mattn/go-isatty v0.0.12
 	github.com/pkg/errors v0.9.1
 	github.com/sirupsen/logrus v1.6.0

diff --git a/pkg/health/health.go b/pkg/health/health.go
@@ -22,6 +22,19 @@ const (
 	Unhealthy
 )
 
+func (d DiagnosisResult) String() string {
+	switch d {
+	case Inconclusive:
+		return "inconclusive"
+	case Healthy:
+		return "healthy"
+	case Unhealthy:
+		return "unhealthy"
+	default:
+		return "unknown"
+	}
+}
+
 // Diagnosis is the information about the health of the revision.
 type Diagnosis struct {
 	OverallResult DiagnosisResult

diff --git a/pkg/health/report.go b/pkg/health/report.go
@@ -0,0 +1,31 @@
+package health
+
+import (
+	"fmt"
+
+	"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
+)
+
+// StringReport returns a human-readable report of the diagnosis.
+func StringReport(healthCriteria []config.Metric, diagnosis Diagnosis) string {
+	report := fmt.Sprintf("status: %s\n", diagnosis.OverallResult.String())
+	report += "metrics:"
+	for i, result := range diagnosis.CheckResults {
+		criteria := healthCriteria[i]
+
+		// Include percentile value for latency criteria.
+		if criteria.Type == config.LatencyMetricsCheck {
+			report += fmt.Sprintf("\n- %s[p%.0f]: %.2f (needs %.2f)", criteria.Type, criteria.Percentile, result.ActualValue, criteria.Threshold)
+			continue
+		}
+
+		format := "\n- %s: %.2f (needs %.2f)"
+		if criteria.Type == config.RequestCountMetricsCheck {
+			// No decimals for request count.
+			format = "\n- %s: %.0f (needs %.0f)"
+		}
+		report += fmt.Sprintf(format, criteria.Type, result.ActualValue, criteria.Threshold)
+	}
+
+	return report
+}
diff --git a/pkg/health/report_test.go b/pkg/health/report_test.go
@@ -0,0 +1,66 @@
+package health_test
+
+import (
+	"testing"
+
+	"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
+	"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/health"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestStringReport(t *testing.T) {
+	tests := []struct {
+		name           string
+		healthCriteria []config.Metric
+		diagnosis      health.Diagnosis
+		expected       string
+	}{
+		{
+			name: "single metrics",
+			healthCriteria: []config.Metric{
+				{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750},
+			},
+			diagnosis: health.Diagnosis{
+				OverallResult: health.Unhealthy,
+				CheckResults: []health.CheckResult{
+					{Threshold: 750, ActualValue: 1000, IsCriteriaMet: true},
+				},
+			},
+			expected: "status: unhealthy\n" +
+				"metrics:" +
+				"\n- request-latency[p99]: 1000.00 (needs 750.00)",
+		},
+		{
+			name: "more than one metrics",
+			healthCriteria: []config.Metric{
+				{Type: config.RequestCountMetricsCheck, Threshold: 1000},
+				{Type: config.LatencyMetricsCheck, Percentile: 99, Threshold: 750},
+				{Type: config.ErrorRateMetricsCheck, Threshold: 5},
+			},
+			diagnosis: health.Diagnosis{
+				OverallResult: health.Healthy,
+				CheckResults: []health.CheckResult{
+					{Threshold: 1000, ActualValue: 1500, IsCriteriaMet: true},
+					{Threshold: 750, ActualValue: 500, IsCriteriaMet: true},
+					{Threshold: 5, ActualValue: 2, IsCriteriaMet: true},
+				},
+			},
+			expected: "status: healthy\n" +
+				"metrics:" +
+				"\n- request-count: 1500 (needs 1000)" +
+				"\n- request-latency[p99]: 500.00 (needs 750.00)" +
+				"\n- error-rate-percent: 2.00 (needs 5.00)",
+		},
+		{
+			name:     "no metrics",
+			expected: "status: unknown\nmetrics:",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(tt *testing.T) {
+			report := health.StringReport(test.healthCriteria, test.diagnosis)
+			assert.Equal(tt, test.expected, report)
+		})
+	}
+}
diff --git a/pkg/rollout/revision.go b/pkg/rollout/revision.go
@@ -4,13 +4,6 @@ import (
 	"google.golang.org/api/run/v1"
 )
 
-// Annotations name for information related to the rollout.
-const (
-	StableRevisionAnnotation              = "rollout.cloud.run/stableRevision"
-	CandidateRevisionAnnotation           = "rollout.cloud.run/candidateRevision"
-	LastFailedCandidateRevisionAnnotation = "rollout.cloud.run/lastFailedCandidateRevision"
-)
-
 // DetectStableRevisionName returns the stable revision of the Cloud Run service.
 //
 // It first checks if there's a revision with the tag "stable". If such a

diff --git a/pkg/rollout/rollout.go b/pkg/rollout/rollout.go
@@ -15,6 +15,14 @@ import (
 	"google.golang.org/api/run/v1"
 )
 
+// Annotations name for information related to the rollout.
+const (
+	StableRevisionAnnotation              = "rollout.cloud.run/stableRevision"
+	CandidateRevisionAnnotation           = "rollout.cloud.run/candidateRevision"
+	LastFailedCandidateRevisionAnnotation = "rollout.cloud.run/lastFailedCandidateRevision"
+	LastHealthReportAnnotation            = "rollout.cloud.run/lastHealthReport"
+)
+
 // ServiceRecord holds a service object and information about it.
 type ServiceRecord struct {
 	*run.Service
@@ -115,6 +123,11 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) {
 		r.log.Debug("new candidate, assign some traffic")
 		svc = r.PrepareRollForward(svc, stable, candidate)
 		svc = r.updateAnnotations(svc, stable, candidate)
+
+		// TODO(gvso): create a setHealthReportAnnotation that appends the
+		// current time to the report before setting the annotation.
+		setAnnotation(svc, LastHealthReportAnnotation, "new candidate, no health report available yet")
+
 		err := r.replaceService(svc)
 		return svc, errors.Wrap(err, "failed to replace service")
 	}
@@ -132,19 +145,18 @@ func (r *Rollout) UpdateService(svc *run.Service) (*run.Service, error) {
 	case health.Healthy:
 		r.log.Debug("healthy candidate, roll forward")
 		svc = r.PrepareRollForward(svc, stable, candidate)
-		break
 	case health.Unhealthy:
 		r.log.Info("unhealthy candidate, rollback")
 		r.shouldRollback = true
 		svc = r.PrepareRollback(svc, stable, candidate)
-		break
 	default:
 		return nil, errors.Errorf("invalid candidate's health diagnosis %v", diagnosis.OverallResult)
 	}
 
-	// TODO(gvso): include annotation about the diagnosis (especially when
-	// diagnosis is unhealthy).
 	svc = r.updateAnnotations(svc, stable, candidate)
+	report := health.StringReport(r.strategy.HealthCriteria, diagnosis)
+	setAnnotation(svc, LastHealthReportAnnotation, report)
+
 	err = r.replaceService(svc)
 	return svc, errors.Wrap(err, "failed to replace service")
 }
@@ -282,26 +294,30 @@ func (r *Rollout) nextCandidateTraffic(current int64) int64 {
 
 // updateAnnotations updates the annotations to keep some state about the rollout.
 func (r *Rollout) updateAnnotations(svc *run.Service, stable, candidate string) *run.Service {
-	if svc.Metadata.Annotations == nil {
-		svc.Metadata.Annotations = make(map[string]string)
-	}
-
 	// The candidate has become the stable revision.
 	if r.promoteToStable {
-		svc.Metadata.Annotations[StableRevisionAnnotation] = candidate
+		setAnnotation(svc, StableRevisionAnnotation, candidate)
 		delete(svc.Metadata.Annotations, CandidateRevisionAnnotation)
 		return svc
 	}
 
-	svc.Metadata.Annotations[StableRevisionAnnotation] = stable
-	svc.Metadata.Annotations[CandidateRevisionAnnotation] = candidate
+	setAnnotation(svc, StableRevisionAnnotation, stable)
+	setAnnotation(svc, CandidateRevisionAnnotation, candidate)
 	if r.shouldRollback {
-		svc.Metadata.Annotations[LastFailedCandidateRevisionAnnotation] = candidate
+		setAnnotation(svc, LastFailedCandidateRevisionAnnotation, candidate)
 	}
 
 	return svc
 }
 
+// setAnnotation sets the value of an annotation.
+func setAnnotation(svc *run.Service, key, value string) {
+	if svc.Metadata.Annotations == nil {
+		svc.Metadata.Annotations = make(map[string]string)
+	}
+	svc.Metadata.Annotations[key] = value
+}
+
 // diagnoseCandidate returns the candidate's diagnosis based on metrics.
 func (r *Rollout) diagnoseCandidate(candidate string, healthCriteria []config.Metric) (d health.Diagnosis, err error) {
 	healthCheckOffset := time.Duration(r.strategy.HealthOffsetMinute) * time.Minute

diff --git a/pkg/rollout/rollout_test.go b/pkg/rollout/rollout_test.go
@@ -10,7 +10,6 @@ import (
 	runMocker "github.com/GoogleCloudPlatform/cloud-run-release-operator/internal/run/mock"
 	"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/config"
 	"github.com/GoogleCloudPlatform/cloud-run-release-operator/pkg/rollout"
-	"github.com/google/go-cmp/cmp"
 	"github.com/sirupsen/logrus"
 	"github.com/stretchr/testify/assert"
 	"google.golang.org/api/run/v1"
@@ -86,6 +85,7 @@ func TestUpdateService(t *testing.T) {
 				{Type: config.ErrorRateMetricsCheck, Threshold: 5},
 			},
 			outAnnotations: map[string]string{
+				rollout.LastHealthReportAnnotation:  "new candidate, no health report available yet",
 				rollout.StableRevisionAnnotation:    "test-002",
 				rollout.CandidateRevisionAnnotation: "test-003",
 			},
@@ -123,6 +123,7 @@ func TestUpdateService(t *testing.T) {
 			},
 			lastReady: "test-002",
 			outAnnotations: map[string]string{
+				rollout.LastHealthReportAnnotation:  "new candidate, no health report available yet",
 				rollout.StableRevisionAnnotation:    "test-001",
 				rollout.CandidateRevisionAnnotation: "test-002",
 			},
@@ -146,6 +147,10 @@ func TestUpdateService(t *testing.T) {
 				{Type: config.ErrorRateMetricsCheck, Threshold: 5},
 			},
 			outAnnotations: map[string]string{
+				rollout.LastHealthReportAnnotation: "status: healthy\n" +
+					"metrics:" +
+					"\n- request-latency[p99]: 500.00 (needs 750.00)" +
+					"\n- error-rate-percent: 1.00 (needs 5.00)",
 				rollout.StableRevisionAnnotation:    "test-001",
 				rollout.CandidateRevisionAnnotation: "test-002",
 			},
@@ -165,6 +170,7 @@ func TestUpdateService(t *testing.T) {
 			},
 			lastReady: "test-003",
 			outAnnotations: map[string]string{
+				rollout.LastHealthReportAnnotation:  "new candidate, no health report available yet",
 				rollout.StableRevisionAnnotation:    "test-001",
 				rollout.CandidateRevisionAnnotation: "test-003",
 			},
@@ -187,6 +193,10 @@ func TestUpdateService(t *testing.T) {
 				{Type: config.ErrorRateMetricsCheck, Threshold: 5},
 			},
 			outAnnotations: map[string]string{
+				rollout.LastHealthReportAnnotation: "status: healthy\n" +
+					"metrics:" +
+					"\n- request-latency[p99]: 500.00 (needs 750.00)" +
+					"\n- error-rate-percent: 1.00 (needs 5.00)",
 				rollout.StableRevisionAnnotation: "test-002",
 			},
 			outTraffic: []*run.TrafficTarget{
@@ -207,6 +217,10 @@ func TestUpdateService(t *testing.T) {
 				{Type: config.ErrorRateMetricsCheck, Threshold: 0.95},
 			},
 			outAnnotations: map[string]string{
+				rollout.LastHealthReportAnnotation: "status: unhealthy\n" +
+					"metrics:" +
+					"\n- request-latency[p99]: 500.00 (needs 100.00)" +
+					"\n- error-rate-percent: 1.00 (needs 0.95)",
 				rollout.StableRevisionAnnotation:              "test-001",
 				rollout.CandidateRevisionAnnotation:           "test-002",
 				rollout.LastFailedCandidateRevisionAnnotation: "test-002",
@@ -285,8 +299,8 @@ func TestUpdateService(t *testing.T) {
 			} else if test.nilService {
 				assert.Nil(t, svc)
 			} else {
-				assert.True(t, cmp.Equal(test.outAnnotations, svc.Metadata.Annotations))
-				assert.True(t, cmp.Equal(test.outTraffic, svc.Spec.Traffic))
+				assert.Equal(t, test.outAnnotations, svc.Metadata.Annotations)
+				assert.Equal(t, test.outTraffic, svc.Spec.Traffic)
 			}
 		})
 
@@ -418,7 +432,7 @@ func TestPrepareRollForward(t *testing.T) {
 
 		t.Run(test.name, func(t *testing.T) {
 			svc = r.PrepareRollForward(svc, test.stable, test.candidate)
-			assert.True(t, cmp.Equal(test.expected, svc.Spec.Traffic))
+			assert.Equal(t, test.expected, svc.Spec.Traffic)
 		})
 	}
 }