Skip to content

Commit

Permalink
feat(diag) expose unparseable config errors (#5773)
Browse files Browse the repository at this point in the history
Add a diagnostic endpoint for raw config errors to simplify problem
identification when Kong does not return flattened errors.

---------

Co-authored-by: Patryk Małek <patryk.malek@konghq.com>
  • Loading branch information
rainest and pmalek committed Apr 4, 2024
1 parent 9b2325a commit 2570997
Show file tree
Hide file tree
Showing 16 changed files with 135 additions and 37 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ Adding a new version? You'll need three changes:

## Unreleased

### Added

- Add a `/debug/config/raw-error` endpoint to the config dump diagnostic
server. This endpoint outputs the original Kong `/config` endpoint error for
failed configuration pushes in case error parsing fails. Attempt to log the
`message` field of errors that KIC cannot fully parse.
[#5773](https://github.com/Kong/kubernetes-ingress-controller/issues/5773)

### Fixed

- Remove unnecessary tag support check that could incorrectly delete configuration if the check did not execute properly.
Expand Down
16 changes: 8 additions & 8 deletions internal/dataplane/kong_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ func (c *KongClient) sendToClient(
// apply the configuration update in Kong
timedCtx, cancel := context.WithTimeout(ctx, c.requestTimeout)
defer cancel()
newConfigSHA, entityErrors, err := sendconfig.PerformUpdate(
newConfigSHA, err := sendconfig.PerformUpdate(
timedCtx,
logger,
client,
Expand All @@ -578,14 +578,14 @@ func (c *KongClient) sendToClient(
c.configChangeDetector,
)

c.recordResourceFailureEvents(entityErrors, KongConfigurationApplyFailedEventReason)
c.recordResourceFailureEvents(err.ResourceFailures, KongConfigurationApplyFailedEventReason)
// Only record events on applying configuration to Kong gateway here.
if !client.IsKonnect() {
c.recordApplyConfigurationEvents(err, client.BaseRootURL())
}
sendDiagnostic(err != nil)
sendDiagnostic(err.Err != nil, err.RawBody)

if err != nil {
if err.Err != nil {
if expired, ok := timedCtx.Deadline(); ok && time.Now().After(expired) {
logger.Error(nil, "Exceeded Kong API timeout, consider increasing --proxy-timeout-seconds")
}
Expand All @@ -611,7 +611,7 @@ func (c *KongClient) SetConfigStatusNotifier(n clients.ConfigStatusNotifier) {
// Dataplane Client - Kong - Private
// -----------------------------------------------------------------------------

type sendDiagnosticFn func(failed bool)
type sendDiagnosticFn func(failed bool, raw []byte)

// prepareSendDiagnosticFn generates sendDiagnosticFn.
// Diagnostics are sent only when provided diagnostic config (--dump-config) is set.
Expand All @@ -625,7 +625,7 @@ func prepareSendDiagnosticFn(
) sendDiagnosticFn {
if diagnosticConfig == (util.ConfigDumpDiagnostic{}) {
// noop, diagnostics won't be sent
return func(bool) {}
return func(bool, []byte) {}
}

var config *file.Content
Expand All @@ -640,15 +640,15 @@ func prepareSendDiagnosticFn(
config = targetContent
}

return func(failed bool) {
return func(failed bool, raw []byte) {
// Given that we can send multiple configs to this channel and
// the fact that the API that exposes that can only expose 1 config
// at a time it means that users utilizing the diagnostics API
// might not see exactly what they intend to see i.e. come failures
// or successfully send configs might be covered by those send
// later on but we're OK with this limitation of said API.
select {
case diagnosticConfig.Configs <- util.ConfigDump{Failed: failed, Config: *config}:
case diagnosticConfig.Configs <- util.ConfigDump{Failed: failed, Config: *config, RawResponseBody: raw}:
logger.V(util.DebugLevel).Info("Shipping config to diagnostic server")
default:
logger.Error(nil, "Config diagnostic buffer full, dropping diagnostic config")
Expand Down
3 changes: 2 additions & 1 deletion internal/dataplane/kong_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,11 @@ type mockUpdateStrategy struct {
func (m *mockUpdateStrategy) Update(_ context.Context, content sendconfig.ContentWithHash) (
err error,
resourceErrors []sendconfig.ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
err = m.onUpdate(content)
return err, nil, nil
return err, nil, nil, nil
}

func (m *mockUpdateStrategy) MetricsProtocol() metrics.Protocol {
Expand Down
7 changes: 4 additions & 3 deletions internal/dataplane/sendconfig/backoff_strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,22 @@ func NewUpdateStrategyWithBackoff(
func (s UpdateStrategyWithBackoff) Update(ctx context.Context, targetContent ContentWithHash) (
err error,
resourceErrors []ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
if canUpdate, whyNot := s.backoffStrategy.CanUpdate(targetContent.Hash); !canUpdate {
return NewUpdateSkippedDueToBackoffStrategyError(whyNot), nil, nil
return NewUpdateSkippedDueToBackoffStrategyError(whyNot), nil, nil, nil
}

err, resourceErrors, resourceErrorsParseErr = s.decorated.Update(ctx, targetContent)
err, resourceErrors, rawErrBody, resourceErrorsParseErr = s.decorated.Update(ctx, targetContent)
if err != nil {
s.logger.V(util.DebugLevel).Info("Update failed, registering it for backoff strategy", "reason", err.Error())
s.backoffStrategy.RegisterUpdateFailure(err, targetContent.Hash)
} else {
s.backoffStrategy.RegisterUpdateSuccess()
}

return err, resourceErrors, resourceErrorsParseErr
return err, resourceErrors, rawErrBody, resourceErrorsParseErr
}

func (s UpdateStrategyWithBackoff) MetricsProtocol() metrics.Protocol {
Expand Down
7 changes: 4 additions & 3 deletions internal/dataplane/sendconfig/backoff_strategy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ func newMockUpdateStrategy(shouldSucceed bool) *mockUpdateStrategy {
func (m *mockUpdateStrategy) Update(context.Context, sendconfig.ContentWithHash) (
err error,
resourceErrors []sendconfig.ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
m.wasUpdateCalled = true

if !m.shouldSucceed {
return errors.New("update failure occurred"), nil, nil
return errors.New("update failure occurred"), nil, nil, nil
}

return nil, nil, nil
return nil, nil, nil, nil
}

func (m *mockUpdateStrategy) MetricsProtocol() metrics.Protocol {
Expand Down Expand Up @@ -119,7 +120,7 @@ func TestUpdateStrategyWithBackoff(t *testing.T) {
backoffStrategy := newMockBackoffStrategy(tc.updateShouldBeAllowed)

decoratedStrategy := sendconfig.NewUpdateStrategyWithBackoff(updateStrategy, backoffStrategy, logger)
err, _, _ := decoratedStrategy.Update(ctx, sendconfig.ContentWithHash{})
err, _, _, _ := decoratedStrategy.Update(ctx, sendconfig.ContentWithHash{})
if tc.expectError != nil {
require.Equal(t, tc.expectError, err)
} else {
Expand Down
13 changes: 8 additions & 5 deletions internal/dataplane/sendconfig/dbmode.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,19 @@ func NewUpdateStrategyDBModeKonnect(
func (s UpdateStrategyDBMode) Update(ctx context.Context, targetContent ContentWithHash) (
err error,
resourceErrors []ResourceError,
// this is always empty for DB mode, as it does not have a single config error, and will instead return individual
// errors as a deckutils.ErrArray. we need it for the interface signature, however.
rawErrBody []byte,
resourceErrorsParseErr error,
) {
cs, err := s.currentState(ctx)
if err != nil {
return fmt.Errorf("failed getting current state for %s: %w", s.client.BaseRootURL(), err), nil, nil
return fmt.Errorf("failed getting current state for %s: %w", s.client.BaseRootURL(), err), nil, nil, nil
}

ts, err := s.targetState(ctx, cs, targetContent.Content)
if err != nil {
return deckerrors.ConfigConflictError{Err: err}, nil, nil
return deckerrors.ConfigConflictError{Err: err}, nil, nil, nil
}

syncer, err := diff.NewSyncer(diff.SyncerOpts{
Expand All @@ -74,15 +77,15 @@ func (s UpdateStrategyDBMode) Update(ctx context.Context, targetContent ContentW
IsKonnect: s.isKonnect,
})
if err != nil {
return fmt.Errorf("creating a new syncer for %s: %w", s.client.BaseRootURL(), err), nil, nil
return fmt.Errorf("creating a new syncer for %s: %w", s.client.BaseRootURL(), err), nil, nil, nil
}

_, errs, _ := syncer.Solve(ctx, s.concurrency, false, false)
if errs != nil {
return deckutils.ErrArray{Errors: errs}, nil, nil
return deckutils.ErrArray{Errors: errs}, nil, nil, nil
}

return nil, nil, nil
return nil, nil, nil, nil
}

func (s UpdateStrategyDBMode) MetricsProtocol() metrics.Protocol {
Expand Down
7 changes: 4 additions & 3 deletions internal/dataplane/sendconfig/inmemory.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,21 @@ func NewUpdateStrategyInMemory(
func (s UpdateStrategyInMemory) Update(ctx context.Context, targetState ContentWithHash) (
err error,
resourceErrors []ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
dblessConfig := s.configConverter.Convert(targetState.Content)
config, err := json.Marshal(dblessConfig)
if err != nil {
return fmt.Errorf("constructing kong configuration: %w", err), nil, nil
return fmt.Errorf("constructing kong configuration: %w", err), nil, nil, nil
}

if errBody, err := s.configService.ReloadDeclarativeRawConfig(ctx, bytes.NewReader(config), true, true); err != nil {
resourceErrors, parseErr := parseFlatEntityErrors(errBody, s.logger)
return err, resourceErrors, parseErr
return err, resourceErrors, errBody, parseErr
}

return nil, nil, nil
return nil, nil, nil, nil
}

func (s UpdateStrategyInMemory) MetricsProtocol() metrics.Protocol {
Expand Down
16 changes: 16 additions & 0 deletions internal/dataplane/sendconfig/inmemory_error_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,24 @@ func parseFlatEntityErrors(body []byte, logger logr.Logger) ([]ResourceError, er

err := json.Unmarshal(body, &configError)
if err != nil {
// we _should_ arguably be able to parse the "message" field into a ConfigError even if we can't parse a full set
// of flattened errors, but for some reason those incomplete errors still don't unmarshal. as a fallback, try and
// yank the field out via a more basic unmarshal target
fallback := map[string]interface{}{}
if fallbackErr := json.Unmarshal(body, &fallback); fallbackErr == nil {
if message, ok := fallback["message"]; ok {
logger.Error(nil, "Could not fully parse config error", "message", message)
}
}
return resourceErrors, fmt.Errorf("could not unmarshal config error: %w", err)
}
if len(configError.Flattened) == 0 {
if len(configError.Message) > 0 {
logger.Error(nil, "Config error missing per-resource errors", "message", configError.Message)
} else {
logger.Error(nil, "Config error missing per-resource and message", "message", configError.Message)
}
}
for _, ee := range configError.Flattened {
raw := rawResourceError{
Name: ee.Name,
Expand Down
36 changes: 28 additions & 8 deletions internal/dataplane/sendconfig/sendconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,26 @@ type AdminAPIClient interface {
KonnectControlPlane() string
}

// Note that UpdateError is used for both DB-less and DB-backed update strategies, but does not have parity between the
// two. Pending a refactor of database reconciler (https://github.com/Kong/go-database-reconciler/issues/22), KIC does
// not have access to per-resource errors or any original response bodies in database mode. Future DB mode work would
// need to find some way to reconcile the single /config raw body and many per-resource responses from DB endpoints.

// UpdateError wraps several pieces of error information relevant to a failed Kong update attempt.
type UpdateError struct {
// RawBody is the original Kong HTTP error response body from a failed update.
RawBody []byte
// ResourceFailures are per-resource failures from a Kong configuration update attempt.
ResourceFailures []failures.ResourceFailure
// Err is an overall description of the update failure.
Err error
}

// Error implements the Error interface. It returns the string value of the Err field.
func (e UpdateError) Error() string {
return fmt.Sprintf("%s", e.Err)
}

// PerformUpdate writes `targetContent` to Kong Admin API specified by `kongConfig`.
func PerformUpdate(
ctx context.Context,
Expand All @@ -47,33 +67,33 @@ func PerformUpdate(
promMetrics *metrics.CtrlFuncMetrics,
updateStrategyResolver UpdateStrategyResolver,
configChangeDetector ConfigurationChangeDetector,
) ([]byte, []failures.ResourceFailure, error) {
) ([]byte, UpdateError) {
oldSHA := client.LastConfigSHA()
newSHA, err := deckgen.GenerateSHA(targetContent)
if err != nil {
return oldSHA, []failures.ResourceFailure{}, err
return oldSHA, UpdateError{ResourceFailures: []failures.ResourceFailure{}, Err: err}
}

// disable optimization if reverse sync is enabled
if !config.EnableReverseSync {
configurationChanged, err := configChangeDetector.HasConfigurationChanged(ctx, oldSHA, newSHA, targetContent, client, client.AdminAPIClient())
if err != nil {
return nil, []failures.ResourceFailure{}, err
return nil, UpdateError{Err: err}
}
if !configurationChanged {
if client.IsKonnect() {
logger.V(util.DebugLevel).Info("No configuration change, skipping sync to Konnect")
} else {
logger.V(util.DebugLevel).Info("No configuration change, skipping sync to Kong")
}
return oldSHA, []failures.ResourceFailure{}, nil
return oldSHA, UpdateError{}
}
}

updateStrategy := updateStrategyResolver.ResolveUpdateStrategy(client)
logger = logger.WithValues("update_strategy", updateStrategy.Type())
timeStart := time.Now()
err, resourceErrors, resourceErrorsParseErr := updateStrategy.Update(ctx, ContentWithHash{
err, resourceErrors, rawErrBody, resourceErrorsParseErr := updateStrategy.Update(ctx, ContentWithHash{
Content: targetContent,
Hash: newSHA,
})
Expand All @@ -83,12 +103,12 @@ func PerformUpdate(
if err != nil {
// Not pushing metrics in case it's an update skip due to a backoff.
if errors.As(err, &UpdateSkippedDueToBackoffStrategyError{}) {
return nil, []failures.ResourceFailure{}, err
return nil, UpdateError{RawBody: rawErrBody, Err: err}
}

resourceFailures := resourceErrorsToResourceFailures(resourceErrors, resourceErrorsParseErr, logger)
promMetrics.RecordPushFailure(metricsProtocol, duration, client.BaseRootURL(), len(resourceFailures), err)
return nil, resourceFailures, err
return nil, UpdateError{ResourceFailures: resourceFailures, RawBody: rawErrBody, Err: err}
}

promMetrics.RecordPushSuccess(metricsProtocol, duration, client.BaseRootURL())
Expand All @@ -99,7 +119,7 @@ func PerformUpdate(
logger.V(util.InfoLevel).Info("Successfully synced configuration to Kong")
}

return newSHA, nil, nil
return newSHA, UpdateError{}
}

// -----------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions internal/dataplane/sendconfig/strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type UpdateStrategy interface {
Update(ctx context.Context, targetContent ContentWithHash) (
err error,
resourceErrors []ResourceError,
rawErrorBody []byte,
resourceErrorsParseErr error,
)

Expand Down
16 changes: 16 additions & 0 deletions internal/diagnostics/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Server struct {

successfulConfigDump file.Content
failedConfigDump file.Content
rawErrBody []byte
configLock *sync.RWMutex
}

Expand Down Expand Up @@ -122,6 +123,7 @@ func (s *Server) receiveConfig(ctx context.Context) {
s.configLock.Lock()
if dump.Failed {
s.failedConfigDump = dump.Config
s.rawErrBody = dump.RawResponseBody
} else {
s.successfulConfigDump = dump.Config
}
Expand Down Expand Up @@ -156,6 +158,7 @@ func installProfilingHandlers(mux *http.ServeMux) {
func (s *Server) installDumpHandlers(mux *http.ServeMux) {
mux.HandleFunc("/debug/config/successful", s.handleLastValidConfig)
mux.HandleFunc("/debug/config/failed", s.handleLastFailedConfig)
mux.HandleFunc("/debug/config/raw-error", s.handleLastErrBody)
}

// redirectTo redirects request to a certain destination.
Expand All @@ -182,3 +185,16 @@ func (s *Server) handleLastFailedConfig(rw http.ResponseWriter, _ *http.Request)
rw.WriteHeader(http.StatusInternalServerError)
}
}

func (s *Server) handleLastErrBody(rw http.ResponseWriter, _ *http.Request) {
rw.Header().Set("Content-Type", "text/plain")
s.configLock.RLock()
defer s.configLock.RUnlock()
raw := s.rawErrBody
if len(raw) == 0 {
raw = []byte("No raw error body available.\n")
}
if _, err := rw.Write(raw); err != nil {
rw.WriteHeader(http.StatusInternalServerError)
}
}
9 changes: 7 additions & 2 deletions internal/diagnostics/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ func TestDiagnosticsServer_ConfigDumps(t *testing.T) {
for i := 0; i < configDumpsToWrite; i++ {
failed = !failed // Toggle failed flag.
configsCh <- util.ConfigDump{
Config: file.Content{},
Failed: failed,
Config: file.Content{},
Failed: failed,
RawResponseBody: []byte("fake error body"),
}
}
}()
Expand All @@ -74,6 +75,10 @@ func TestDiagnosticsServer_ConfigDumps(t *testing.T) {
if err == nil {
_ = resp.Body.Close()
}
resp, err = httpClient.Get(fmt.Sprintf("http://localhost:%d/debug/config/raw-error", port))
if err == nil {
_ = resp.Body.Close()
}
}
}
}()
Expand Down
Loading

0 comments on commit 2570997

Please sign in to comment.