diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ffcdb3e04..5c017e365f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -98,6 +98,11 @@ Adding a new version? You'll need three changes: which accepts a namespaced name of headless kong admin service which should have Admin API endpoints exposed under a named port called `admin` [#3421](https://github.com/Kong/kubernetes-ingress-controller/pull/3421) +- Added `dataplane` metrics label for `ingress_controller_configuration_push_count` + and `ingress_controller_configuration_push_duration_milliseconds`. This means + that all time series for those metrics will get a new label designating the + address of the dataplane that the configuration push has been targeted for. + [#3521](https://github.com/Kong/kubernetes-ingress-controller/pull/3521) ### Fixed diff --git a/internal/dataplane/deckerrors/conflict.go b/internal/dataplane/deckerrors/conflict.go new file mode 100644 index 0000000000..2cfe89d24e --- /dev/null +++ b/internal/dataplane/deckerrors/conflict.go @@ -0,0 +1,47 @@ +package deckerrors + +import ( + "errors" + "net/http" + + deckutils "github.com/kong/deck/utils" + "github.com/kong/go-kong/kong" +) + +// ConfigConflictError is an error used to wrap deck config conflict errors +// returned from deck functions transforming KongRawState to KongState +// (e.g. state.Get, dump.Get). +type ConfigConflictError struct { + Err error +} + +func (e ConfigConflictError) Error() string { + return e.Err.Error() +} + +func (e ConfigConflictError) Is(err error) bool { + return errors.Is(err, ConfigConflictError{}) +} + +func (e ConfigConflictError) Unwrap() error { + return e.Err +} + +func IsConflictErr(err error) bool { + var apiErr *kong.APIError + if errors.As(err, &apiErr) && apiErr.Code() == http.StatusConflict || + errors.Is(err, ConfigConflictError{}) { + return true + } + + var deckErrArray deckutils.ErrArray + if errors.As(err, &deckErrArray) { + for _, err := range deckErrArray.Errors { + if IsConflictErr(err) { + return true + } + } + } + + return false +} diff --git a/internal/dataplane/kong_client.go b/internal/dataplane/kong_client.go index e921c58bdd..d16de7a1ef 100644 --- a/internal/dataplane/kong_client.go +++ b/internal/dataplane/kong_client.go @@ -10,7 +10,6 @@ import ( "github.com/kong/deck/file" "github.com/kong/go-kong/kong" - "github.com/prometheus/client_golang/prometheus" "github.com/samber/lo" "github.com/sirupsen/logrus" "github.com/sourcegraph/conc/iter" @@ -430,15 +429,11 @@ func (c *KongClient) Update(ctx context.Context) error { // parse the Kubernetes objects from the storer into Kong configuration kongstate, translationFailures := p.Build() if failuresCount := len(translationFailures); failuresCount > 0 { - c.prometheusMetrics.TranslationCount.With(prometheus.Labels{ - metrics.SuccessKey: metrics.SuccessFalse, - }).Inc() + c.prometheusMetrics.RecordTranslationFailure() c.recordResourceFailureEvents(translationFailures, KongConfigurationTranslationFailedEventReason) c.logger.Debugf("%d translation failures have occurred when building data-plane configuration", failuresCount) } else { - c.prometheusMetrics.TranslationCount.With(prometheus.Labels{ - metrics.SuccessKey: metrics.SuccessTrue, - }).Inc() + c.prometheusMetrics.RecordTranslationSuccess() c.logger.Debug("successfully built data-plane configuration") } diff --git a/internal/dataplane/sendconfig/sendconfig.go b/internal/dataplane/sendconfig/sendconfig.go index 5c1ea8c1f6..eb60756efb 100644 --- a/internal/dataplane/sendconfig/sendconfig.go +++ b/internal/dataplane/sendconfig/sendconfig.go @@ -5,11 +5,8 @@ import ( "context" "encoding/hex" "encoding/json" - "errors" "fmt" "io" - "net" - "net/http" "sync" "time" @@ -20,10 +17,10 @@ import ( "github.com/kong/deck/state" deckutils "github.com/kong/deck/utils" "github.com/kong/go-kong/kong" - "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "github.com/kong/kubernetes-ingress-controller/v2/internal/adminapi" + "github.com/kong/kubernetes-ingress-controller/v2/internal/dataplane/deckerrors" "github.com/kong/kubernetes-ingress-controller/v2/internal/dataplane/deckgen" "github.com/kong/kubernetes-ingress-controller/v2/internal/metrics" ) @@ -58,7 +55,7 @@ func PerformUpdate(ctx context.Context, } } - var metricsProtocol string + var metricsProtocol metrics.Protocol timeStart := time.Now() if config.InMemory { metricsProtocol = metrics.ProtocolDBLess @@ -68,30 +65,14 @@ func PerformUpdate(ctx context.Context, dumpConfig := dump.Config{SelectorTags: config.FilterTags, SkipCACerts: config.SkipCACertificates} err = onUpdateDBMode(ctx, targetContent, client, dumpConfig, config) } - timeEnd := time.Now() + duration := time.Since(timeStart) if err != nil { - promMetrics.ConfigPushCount.With(prometheus.Labels{ - metrics.SuccessKey: metrics.SuccessFalse, - metrics.ProtocolKey: metricsProtocol, - metrics.FailureReasonKey: pushFailureReason(err), - }).Inc() - promMetrics.ConfigPushDuration.With(prometheus.Labels{ - metrics.SuccessKey: metrics.SuccessFalse, - metrics.ProtocolKey: metricsProtocol, - }).Observe(float64(timeEnd.Sub(timeStart).Milliseconds())) + promMetrics.RecordPushFailure(metricsProtocol, duration, client.BaseRootURL(), err) return nil, err } - promMetrics.ConfigPushCount.With(prometheus.Labels{ - metrics.SuccessKey: metrics.SuccessTrue, - metrics.ProtocolKey: metricsProtocol, - metrics.FailureReasonKey: "", - }).Inc() - promMetrics.ConfigPushDuration.With(prometheus.Labels{ - metrics.SuccessKey: metrics.SuccessTrue, - metrics.ProtocolKey: metricsProtocol, - }).Observe(float64(timeEnd.Sub(timeStart).Milliseconds())) + promMetrics.RecordPushSuccess(metricsProtocol, duration, client.BaseRootURL()) log.Info("successfully synced configuration to kong.") return newSHA, nil } @@ -143,7 +124,7 @@ func onUpdateDBMode( ts, err := targetState(ctx, targetContent, cs, config.Version, client, dumpConfig) if err != nil { - return deckConfigConflictError{err} + return deckerrors.ConfigConflictError{Err: err} } syncer, err := diff.NewSyncer(diff.SyncerOpts{ @@ -218,58 +199,6 @@ func hasSHAUpdateAlreadyBeenReported(latestUpdateSHA []byte) bool { return false } -// deckConfigConflictError is an error used to wrap deck config conflict errors returned from deck functions -// transforming KongRawState to KongState (e.g. state.Get, dump.Get). -type deckConfigConflictError struct { - err error -} - -func (e deckConfigConflictError) Error() string { - return e.err.Error() -} - -func (e deckConfigConflictError) Is(target error) bool { - _, ok := target.(deckConfigConflictError) - return ok -} - -func (e deckConfigConflictError) Unwrap() error { - return e.err -} - -// pushFailureReason extracts config push failure reason from an error returned from onUpdateInMemoryMode or onUpdateDBMode. -func pushFailureReason(err error) string { - var netErr net.Error - if errors.As(err, &netErr) { - return metrics.FailureReasonNetwork - } - - if isConflictErr(err) { - return metrics.FailureReasonConflict - } - - return metrics.FailureReasonOther -} - -func isConflictErr(err error) bool { - var apiErr *kong.APIError - if errors.As(err, &apiErr) && apiErr.Code() == http.StatusConflict || - errors.Is(err, deckConfigConflictError{}) { - return true - } - - var deckErrArray deckutils.ErrArray - if errors.As(err, &deckErrArray) { - for _, err := range deckErrArray.Errors { - if isConflictErr(err) { - return true - } - } - } - - return false -} - type KonnectAwareClient interface { IsKonnect() bool } diff --git a/internal/dataplane/sendconfig/sendconfig_test.go b/internal/dataplane/sendconfig/sendconfig_test.go index 101790e9ea..ef4f043797 100644 --- a/internal/dataplane/sendconfig/sendconfig_test.go +++ b/internal/dataplane/sendconfig/sendconfig_test.go @@ -3,18 +3,12 @@ package sendconfig import ( "context" "errors" - "fmt" - "net" - "net/http" "testing" - deckutils "github.com/kong/deck/utils" "github.com/kong/go-kong/kong" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - "github.com/kong/kubernetes-ingress-controller/v2/internal/metrics" ) func TestUpdateReportingUtilities(t *testing.T) { @@ -28,84 +22,6 @@ func TestUpdateReportingUtilities(t *testing.T) { assert.True(t, hasSHAUpdateAlreadyBeenReported([]byte("yet-another-fake-sha"))) } -func TestPushFailureReason(t *testing.T) { - apiConflictErr := kong.NewAPIError(http.StatusConflict, "conflict api error") - networkErr := net.UnknownNetworkError("network error") - genericError := errors.New("generic error") - - testCases := []struct { - name string - err error - expectedReason string - }{ - { - name: "generic_error", - err: genericError, - expectedReason: metrics.FailureReasonOther, - }, - { - name: "api_conflict_error", - err: apiConflictErr, - expectedReason: metrics.FailureReasonConflict, - }, - { - name: "api_conflict_error_wrapped", - err: fmt.Errorf("wrapped conflict api err: %w", apiConflictErr), - expectedReason: metrics.FailureReasonConflict, - }, - { - name: "deck_config_conflict_error_empty", - err: deckConfigConflictError{}, - expectedReason: metrics.FailureReasonConflict, - }, - { - name: "deck_config_conflict_error_with_generic_error", - err: deckConfigConflictError{genericError}, - expectedReason: metrics.FailureReasonConflict, - }, - { - name: "deck_err_array_with_api_conflict_error", - err: deckutils.ErrArray{Errors: []error{apiConflictErr}}, - expectedReason: metrics.FailureReasonConflict, - }, - { - name: "wrapped_deck_err_array_with_api_conflict_error", - err: fmt.Errorf("wrapped: %w", deckutils.ErrArray{Errors: []error{apiConflictErr}}), - expectedReason: metrics.FailureReasonConflict, - }, - { - name: "deck_err_array_with_generic_error", - err: deckutils.ErrArray{Errors: []error{genericError}}, - expectedReason: metrics.FailureReasonOther, - }, - { - name: "deck_err_array_empty", - err: deckutils.ErrArray{Errors: []error{genericError}}, - expectedReason: metrics.FailureReasonOther, - }, - { - name: "network_error", - err: networkErr, - expectedReason: metrics.FailureReasonNetwork, - }, - { - name: "network_error_wrapped_in_deck_config_conflict_error", - err: deckConfigConflictError{networkErr}, - expectedReason: metrics.FailureReasonNetwork, - }, - } - - for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - reason := pushFailureReason(tc.err) - require.Equal(t, tc.expectedReason, reason) - }) - } -} - type konnectAwareClientMock struct { expected bool } @@ -204,5 +120,4 @@ func TestHasConfigurationChanged(t *testing.T) { require.Equal(t, tc.expectedResult, result) }) } - } diff --git a/internal/metrics/prometheus.go b/internal/metrics/prometheus.go index 2fd7502e43..3665db236f 100644 --- a/internal/metrics/prometheus.go +++ b/internal/metrics/prometheus.go @@ -1,11 +1,16 @@ package metrics import ( + "errors" "fmt" + "net" "sync" + "time" "github.com/prometheus/client_golang/prometheus" "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/kong/kubernetes-ingress-controller/v2/internal/dataplane/deckerrors" ) type CtrlFuncMetrics struct { @@ -29,11 +34,13 @@ const ( SuccessKey string = "success" ) +type Protocol string + const ( // ProtocolDBLess indicates that configuration was sent to Kong using the DB-less protocol (POST /config). - ProtocolDBLess string = "db-less" + ProtocolDBLess Protocol = "db-less" // ProtocolDeck indicates that configuration was sent to Kong using the DB mode protocol (deck sync). - ProtocolDeck string = "deck" + ProtocolDeck Protocol = "deck" // ProtocolKey defines the key of the metric label indicating which protocol KIC used to configure Kong. ProtocolKey string = "protocol" @@ -53,6 +60,11 @@ const ( FailureReasonKey string = "failure_reason" ) +const ( + // DataplaneKey defines the name of the metric label indicating which dataplane this time series is relevant for. + DataplaneKey string = "dataplane" +) + const ( MetricNameConfigPushCount = "ingress_controller_configuration_push_count" MetricNameTranslationCount = "ingress_controller_translation_count" @@ -69,15 +81,19 @@ func NewCtrlFuncMetrics() *CtrlFuncMetrics { Name: MetricNameConfigPushCount, Help: fmt.Sprintf( "Count of successful/failed configuration pushes to Kong. "+ + "`%s` describes the dataplane that was the target of configuration push. "+ "`%s` describes the configuration protocol (`%s` or `%s`) in use. "+ "`%s` describes whether there were unrecoverable errors (`%s`) or not (`%s`). "+ "`%s` is populated in case of `%s=\"%s\"` and describes the reason of failure "+ "(one of `%s`, `%s`, `%s`).", - ProtocolKey, ProtocolDBLess, ProtocolDeck, SuccessKey, SuccessFalse, SuccessTrue, FailureReasonKey, SuccessKey, - SuccessFalse, FailureReasonConflict, FailureReasonNetwork, FailureReasonOther, + DataplaneKey, + ProtocolKey, ProtocolDBLess, ProtocolDeck, + SuccessKey, SuccessFalse, SuccessTrue, + FailureReasonKey, SuccessKey, SuccessFalse, + FailureReasonConflict, FailureReasonNetwork, FailureReasonOther, ), }, - []string{SuccessKey, ProtocolKey, FailureReasonKey}, + []string{SuccessKey, ProtocolKey, FailureReasonKey, DataplaneKey}, ) controllerMetrics.TranslationCount = prometheus.NewCounterVec( @@ -98,13 +114,16 @@ func NewCtrlFuncMetrics() *CtrlFuncMetrics { Name: MetricNameConfigPushDuration, Help: fmt.Sprintf( "How long it took to push the configuration to Kong, in milliseconds. "+ + "`%s` describes the dataplane that was the target of configuration push. "+ "`%s` describes the configuration protocol (`%s` or `%s`) in use. "+ "`%s` describes whether there were unrecoverable errors (`%s`) or not (`%s`).", - ProtocolKey, ProtocolDBLess, ProtocolDeck, SuccessKey, SuccessFalse, SuccessTrue, + DataplaneKey, + ProtocolKey, ProtocolDBLess, ProtocolDeck, + SuccessKey, SuccessFalse, SuccessTrue, ), Buckets: prometheus.ExponentialBuckets(100, 1.33, 30), }, - []string{SuccessKey, ProtocolKey}, + []string{SuccessKey, ProtocolKey, DataplaneKey}, ) _once.Do(func() { @@ -113,3 +132,97 @@ func NewCtrlFuncMetrics() *CtrlFuncMetrics { return controllerMetrics } + +// RecordPushSuccess records a successful configuration push. +func (c *CtrlFuncMetrics) RecordPushSuccess(p Protocol, d time.Duration, dataplane string) { + dpOpt := withDataplane(dataplane) + c.recordPushCount(p, dpOpt) + c.recordPushDuration(p, d, dpOpt) +} + +// RecordPushFailure records a failed configuration push. +func (c *CtrlFuncMetrics) RecordPushFailure(p Protocol, d time.Duration, dataplane string, err error) { + dpOpt := withDataplane(dataplane) + c.recordPushCount(p, dpOpt, withError(err)) + c.recordPushDuration(p, d, dpOpt, withFailure()) +} + +// RecordTranslationSuccess records a successful configuration translation. +func (c *CtrlFuncMetrics) RecordTranslationSuccess() { + c.TranslationCount.With(prometheus.Labels{ + SuccessKey: SuccessTrue, + }).Inc() +} + +// RecordTranslationFailure records a failed configuration translation. +func (c *CtrlFuncMetrics) RecordTranslationFailure() { + c.TranslationCount.With(prometheus.Labels{ + SuccessKey: SuccessFalse, + }).Inc() +} + +type recordOption func(prometheus.Labels) prometheus.Labels + +func withError(err error) recordOption { + return func(l prometheus.Labels) prometheus.Labels { + l[FailureReasonKey] = pushFailureReason(err) + l[SuccessKey] = SuccessFalse + return l + } +} + +func withFailure() recordOption { + return func(l prometheus.Labels) prometheus.Labels { + l[SuccessKey] = SuccessFalse + return l + } +} + +func withDataplane(dataplane string) recordOption { + return func(l prometheus.Labels) prometheus.Labels { + l[DataplaneKey] = dataplane + return l + } +} + +func (c *CtrlFuncMetrics) recordPushCount(p Protocol, opts ...recordOption) { + labels := prometheus.Labels{ + SuccessKey: SuccessTrue, + ProtocolKey: string(p), + FailureReasonKey: "", + } + + for _, opt := range opts { + labels = opt(labels) + } + + c.ConfigPushCount.With(labels).Inc() +} + +func (c *CtrlFuncMetrics) recordPushDuration(p Protocol, d time.Duration, opts ...recordOption) { + labels := prometheus.Labels{ + SuccessKey: SuccessTrue, + ProtocolKey: string(p), + } + + for _, opt := range opts { + labels = opt(labels) + } + + c.ConfigPushDuration.With(labels).Observe(float64(d.Milliseconds())) +} + +// pushFailureReason extracts config push failure reason from an error returned +// from sendconfig's onUpdateInMemoryMode or onUpdateDBMode. +func pushFailureReason(err error) string { + var netErr net.Error + if errors.As(err, &netErr) { + return FailureReasonNetwork + } + + if deckerrors.IsConflictErr(err) { + return FailureReasonConflict + } + + return FailureReasonOther +} diff --git a/internal/metrics/prometheus_test.go b/internal/metrics/prometheus_test.go index 5269c5caa5..f5aeca4040 100644 --- a/internal/metrics/prometheus_test.go +++ b/internal/metrics/prometheus_test.go @@ -1,16 +1,131 @@ package metrics import ( + "errors" + "fmt" + "net" + "net/http" "testing" + "time" + deckutils "github.com/kong/deck/utils" + "github.com/kong/go-kong/kong" "github.com/stretchr/testify/require" + + "github.com/kong/kubernetes-ingress-controller/v2/internal/dataplane/deckerrors" ) func TestNewCtrlFuncMetricsDoesNotPanicWhenCalledTwice(t *testing.T) { require.NotPanics(t, func() { - NewCtrlFuncMetrics() + _ = NewCtrlFuncMetrics() }) require.NotPanics(t, func() { - NewCtrlFuncMetrics() + _ = NewCtrlFuncMetrics() + }) +} + +func TestRecordPush(t *testing.T) { + m := NewCtrlFuncMetrics() + t.Run("recording push success works", func(t *testing.T) { + require.NotPanics(t, func() { + m.RecordPushSuccess(ProtocolDBLess, time.Millisecond, "https://10.0.0.1:8080") + }) + }) + t.Run("recording push failure works", func(t *testing.T) { + require.NotPanics(t, func() { + m.RecordPushFailure(ProtocolDBLess, time.Millisecond, "https://10.0.0.1:8080", fmt.Errorf("custom error")) + }) + }) +} + +func TestRecordTranslation(t *testing.T) { + m := NewCtrlFuncMetrics() + t.Run("recording translation success works", func(t *testing.T) { + require.NotPanics(t, func() { + m.RecordTranslationSuccess() + }) + }) + t.Run("recording translation failure works", func(t *testing.T) { + require.NotPanics(t, func() { + m.RecordTranslationFailure() + }) }) } + +func TestPushFailureReason(t *testing.T) { + apiConflictErr := kong.NewAPIError(http.StatusConflict, "conflict api error") + networkErr := net.UnknownNetworkError("network error") + genericError := errors.New("generic error") + + testCases := []struct { + name string + err error + expectedReason string + }{ + { + name: "generic_error", + err: genericError, + expectedReason: FailureReasonOther, + }, + { + name: "api_conflict_error", + err: apiConflictErr, + expectedReason: FailureReasonConflict, + }, + { + name: "api_conflict_error_wrapped", + err: fmt.Errorf("wrapped conflict api err: %w", apiConflictErr), + expectedReason: FailureReasonConflict, + }, + { + name: "deck_config_conflict_error_empty", + err: deckerrors.ConfigConflictError{}, + expectedReason: FailureReasonConflict, + }, + { + name: "deck_config_conflict_error_with_generic_error", + err: deckerrors.ConfigConflictError{Err: genericError}, + expectedReason: FailureReasonConflict, + }, + { + name: "deck_err_array_with_api_conflict_error", + err: deckutils.ErrArray{Errors: []error{apiConflictErr}}, + expectedReason: FailureReasonConflict, + }, + { + name: "wrapped_deck_err_array_with_api_conflict_error", + err: fmt.Errorf("wrapped: %w", deckutils.ErrArray{Errors: []error{apiConflictErr}}), + expectedReason: FailureReasonConflict, + }, + { + name: "deck_err_array_with_generic_error", + err: deckutils.ErrArray{Errors: []error{genericError}}, + expectedReason: FailureReasonOther, + }, + { + name: "deck_err_array_empty", + err: deckutils.ErrArray{Errors: []error{genericError}}, + expectedReason: FailureReasonOther, + }, + { + name: "network_error", + err: networkErr, + expectedReason: FailureReasonNetwork, + }, + { + name: "network_error_wrapped_in_deck_config_conflict_error", + err: deckerrors.ConfigConflictError{Err: networkErr}, + expectedReason: FailureReasonNetwork, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + reason := pushFailureReason(tc.err) + require.Equal(t, tc.expectedReason, reason) + }) + } +}