Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(diag) expose unparseable config errors #5773

Merged
merged 5 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ Adding a new version? You'll need three changes:

## Unreleased

### Added

- Add a `/debug/config/raw-error` endpoint to the config dump diagnostic
server. This endpoint outputs the original Kong `/config` endpoint error for
failed configuration pushes in case error parsing fails. Attempt to log the
`message` field of errors that KIC cannot fully parse.
[#5773](https://github.com/Kong/kubernetes-ingress-controller/issues/5773)

### Fixed

- Remove unnecessary tag support check that could incorrectly delete configuration if the check did not execute properly.
Expand Down
16 changes: 8 additions & 8 deletions internal/dataplane/kong_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ func (c *KongClient) sendToClient(
// apply the configuration update in Kong
timedCtx, cancel := context.WithTimeout(ctx, c.requestTimeout)
defer cancel()
newConfigSHA, entityErrors, err := sendconfig.PerformUpdate(
newConfigSHA, err := sendconfig.PerformUpdate(
timedCtx,
logger,
client,
Expand All @@ -578,14 +578,14 @@ func (c *KongClient) sendToClient(
c.configChangeDetector,
)

c.recordResourceFailureEvents(entityErrors, KongConfigurationApplyFailedEventReason)
c.recordResourceFailureEvents(err.ResourceFailures, KongConfigurationApplyFailedEventReason)
// Only record events on applying configuration to Kong gateway here.
if !client.IsKonnect() {
c.recordApplyConfigurationEvents(err, client.BaseRootURL())
}
sendDiagnostic(err != nil)
sendDiagnostic(err.Err != nil, err.RawBody)

if err != nil {
if err.Err != nil {
if expired, ok := timedCtx.Deadline(); ok && time.Now().After(expired) {
logger.Error(nil, "Exceeded Kong API timeout, consider increasing --proxy-timeout-seconds")
}
Expand All @@ -611,7 +611,7 @@ func (c *KongClient) SetConfigStatusNotifier(n clients.ConfigStatusNotifier) {
// Dataplane Client - Kong - Private
// -----------------------------------------------------------------------------

type sendDiagnosticFn func(failed bool)
type sendDiagnosticFn func(failed bool, raw []byte)

// prepareSendDiagnosticFn generates sendDiagnosticFn.
// Diagnostics are sent only when provided diagnostic config (--dump-config) is set.
Expand All @@ -625,7 +625,7 @@ func prepareSendDiagnosticFn(
) sendDiagnosticFn {
if diagnosticConfig == (util.ConfigDumpDiagnostic{}) {
// noop, diagnostics won't be sent
return func(bool) {}
return func(bool, []byte) {}
}

var config *file.Content
Expand All @@ -640,15 +640,15 @@ func prepareSendDiagnosticFn(
config = targetContent
}

return func(failed bool) {
return func(failed bool, raw []byte) {
// Given that we can send multiple configs to this channel and
// the fact that the API that exposes that can only expose 1 config
// at a time it means that users utilizing the diagnostics API
// might not see exactly what they intend to see i.e. come failures
// or successfully send configs might be covered by those send
// later on but we're OK with this limitation of said API.
select {
case diagnosticConfig.Configs <- util.ConfigDump{Failed: failed, Config: *config}:
case diagnosticConfig.Configs <- util.ConfigDump{Failed: failed, Config: *config, RawResponseBody: raw}:
logger.V(util.DebugLevel).Info("Shipping config to diagnostic server")
default:
logger.Error(nil, "Config diagnostic buffer full, dropping diagnostic config")
Expand Down
3 changes: 2 additions & 1 deletion internal/dataplane/kong_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,11 @@ type mockUpdateStrategy struct {
func (m *mockUpdateStrategy) Update(_ context.Context, content sendconfig.ContentWithHash) (
err error,
resourceErrors []sendconfig.ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
err = m.onUpdate(content)
return err, nil, nil
return err, nil, nil, nil
}

func (m *mockUpdateStrategy) MetricsProtocol() metrics.Protocol {
Expand Down
7 changes: 4 additions & 3 deletions internal/dataplane/sendconfig/backoff_strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,22 @@ func NewUpdateStrategyWithBackoff(
func (s UpdateStrategyWithBackoff) Update(ctx context.Context, targetContent ContentWithHash) (
err error,
resourceErrors []ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
if canUpdate, whyNot := s.backoffStrategy.CanUpdate(targetContent.Hash); !canUpdate {
return NewUpdateSkippedDueToBackoffStrategyError(whyNot), nil, nil
return NewUpdateSkippedDueToBackoffStrategyError(whyNot), nil, nil, nil
}

err, resourceErrors, resourceErrorsParseErr = s.decorated.Update(ctx, targetContent)
err, resourceErrors, rawErrBody, resourceErrorsParseErr = s.decorated.Update(ctx, targetContent)
if err != nil {
s.logger.V(util.DebugLevel).Info("Update failed, registering it for backoff strategy", "reason", err.Error())
s.backoffStrategy.RegisterUpdateFailure(err, targetContent.Hash)
} else {
s.backoffStrategy.RegisterUpdateSuccess()
}

return err, resourceErrors, resourceErrorsParseErr
return err, resourceErrors, rawErrBody, resourceErrorsParseErr
}

func (s UpdateStrategyWithBackoff) MetricsProtocol() metrics.Protocol {
Expand Down
7 changes: 4 additions & 3 deletions internal/dataplane/sendconfig/backoff_strategy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ func newMockUpdateStrategy(shouldSucceed bool) *mockUpdateStrategy {
func (m *mockUpdateStrategy) Update(context.Context, sendconfig.ContentWithHash) (
err error,
resourceErrors []sendconfig.ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
m.wasUpdateCalled = true

if !m.shouldSucceed {
return errors.New("update failure occurred"), nil, nil
return errors.New("update failure occurred"), nil, nil, nil
}

return nil, nil, nil
return nil, nil, nil, nil
}

func (m *mockUpdateStrategy) MetricsProtocol() metrics.Protocol {
Expand Down Expand Up @@ -119,7 +120,7 @@ func TestUpdateStrategyWithBackoff(t *testing.T) {
backoffStrategy := newMockBackoffStrategy(tc.updateShouldBeAllowed)

decoratedStrategy := sendconfig.NewUpdateStrategyWithBackoff(updateStrategy, backoffStrategy, logger)
err, _, _ := decoratedStrategy.Update(ctx, sendconfig.ContentWithHash{})
err, _, _, _ := decoratedStrategy.Update(ctx, sendconfig.ContentWithHash{})
if tc.expectError != nil {
require.Equal(t, tc.expectError, err)
} else {
Expand Down
13 changes: 8 additions & 5 deletions internal/dataplane/sendconfig/dbmode.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,19 @@ func NewUpdateStrategyDBModeKonnect(
func (s UpdateStrategyDBMode) Update(ctx context.Context, targetContent ContentWithHash) (
err error,
resourceErrors []ResourceError,
// this is always empty for DB mode, as it does not have a single config error, and will instead return individual
// errors as a deckutils.ErrArray. we need it for the interface signature, however.
rawErrBody []byte,
resourceErrorsParseErr error,
) {
cs, err := s.currentState(ctx)
if err != nil {
return fmt.Errorf("failed getting current state for %s: %w", s.client.BaseRootURL(), err), nil, nil
return fmt.Errorf("failed getting current state for %s: %w", s.client.BaseRootURL(), err), nil, nil, nil
}

ts, err := s.targetState(ctx, cs, targetContent.Content)
if err != nil {
return deckerrors.ConfigConflictError{Err: err}, nil, nil
return deckerrors.ConfigConflictError{Err: err}, nil, nil, nil
}

syncer, err := diff.NewSyncer(diff.SyncerOpts{
Expand All @@ -74,15 +77,15 @@ func (s UpdateStrategyDBMode) Update(ctx context.Context, targetContent ContentW
IsKonnect: s.isKonnect,
})
if err != nil {
return fmt.Errorf("creating a new syncer for %s: %w", s.client.BaseRootURL(), err), nil, nil
return fmt.Errorf("creating a new syncer for %s: %w", s.client.BaseRootURL(), err), nil, nil, nil
}

_, errs, _ := syncer.Solve(ctx, s.concurrency, false, false)
if errs != nil {
return deckutils.ErrArray{Errors: errs}, nil, nil
return deckutils.ErrArray{Errors: errs}, nil, nil, nil
}

return nil, nil, nil
return nil, nil, nil, nil
}

func (s UpdateStrategyDBMode) MetricsProtocol() metrics.Protocol {
Expand Down
7 changes: 4 additions & 3 deletions internal/dataplane/sendconfig/inmemory.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,21 @@ func NewUpdateStrategyInMemory(
func (s UpdateStrategyInMemory) Update(ctx context.Context, targetState ContentWithHash) (
err error,
resourceErrors []ResourceError,
rawErrBody []byte,
resourceErrorsParseErr error,
) {
dblessConfig := s.configConverter.Convert(targetState.Content)
config, err := json.Marshal(dblessConfig)
if err != nil {
return fmt.Errorf("constructing kong configuration: %w", err), nil, nil
return fmt.Errorf("constructing kong configuration: %w", err), nil, nil, nil
}

if errBody, err := s.configService.ReloadDeclarativeRawConfig(ctx, bytes.NewReader(config), true, true); err != nil {
resourceErrors, parseErr := parseFlatEntityErrors(errBody, s.logger)
return err, resourceErrors, parseErr
return err, resourceErrors, errBody, parseErr
}

return nil, nil, nil
return nil, nil, nil, nil
}

func (s UpdateStrategyInMemory) MetricsProtocol() metrics.Protocol {
Expand Down
16 changes: 16 additions & 0 deletions internal/dataplane/sendconfig/inmemory_error_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,24 @@ func parseFlatEntityErrors(body []byte, logger logr.Logger) ([]ResourceError, er

err := json.Unmarshal(body, &configError)
if err != nil {
// we _should_ arguably be able to parse the "message" field into a ConfigError even if we can't parse a full set
// of flattened errors, but for some reason those incomplete errors still don't unmarshal. as a fallback, try and
// yank the field out via a more basic unmarshal target
fallback := map[string]interface{}{}
if fallbackErr := json.Unmarshal(body, &fallback); fallbackErr == nil {
if message, ok := fallback["message"]; ok {
logger.Error(nil, "Could not fully parse config error", "message", message)
}
}
return resourceErrors, fmt.Errorf("could not unmarshal config error: %w", err)
}
if len(configError.Flattened) == 0 {
if len(configError.Message) > 0 {
logger.Error(nil, "Config error missing per-resource errors", "message", configError.Message)
} else {
logger.Error(nil, "Config error missing per-resource and message", "message", configError.Message)
}
}
for _, ee := range configError.Flattened {
raw := rawResourceError{
Name: ee.Name,
Expand Down
36 changes: 28 additions & 8 deletions internal/dataplane/sendconfig/sendconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,26 @@ type AdminAPIClient interface {
KonnectControlPlane() string
}

// Note that UpdateError is used for both DB-less and DB-backed update strategies, but does not have parity between the
// two. Pending a refactor of database reconciler (https://github.com/Kong/go-database-reconciler/issues/22), KIC does
// not have access to per-resource errors or any original response bodies in database mode. Future DB mode work would
// need to find some way to reconcile the single /config raw body and many per-resource responses from DB endpoints.

// UpdateError wraps several pieces of error information relevant to a failed Kong update attempt.
type UpdateError struct {
// RawBody is the original Kong HTTP error response body from a failed update.
RawBody []byte
// ResourceFailures are per-resource failures from a Kong configuration update attempt.
ResourceFailures []failures.ResourceFailure
// Err is an overall description of the update failure.
Err error
}

// Error implements the Error interface. It returns the string value of the Err field.
func (e UpdateError) Error() string {
return fmt.Sprintf("%s", e.Err)
}

// PerformUpdate writes `targetContent` to Kong Admin API specified by `kongConfig`.
func PerformUpdate(
ctx context.Context,
Expand All @@ -47,33 +67,33 @@ func PerformUpdate(
promMetrics *metrics.CtrlFuncMetrics,
updateStrategyResolver UpdateStrategyResolver,
configChangeDetector ConfigurationChangeDetector,
) ([]byte, []failures.ResourceFailure, error) {
) ([]byte, UpdateError) {
oldSHA := client.LastConfigSHA()
newSHA, err := deckgen.GenerateSHA(targetContent)
if err != nil {
return oldSHA, []failures.ResourceFailure{}, err
return oldSHA, UpdateError{ResourceFailures: []failures.ResourceFailure{}, Err: err}
}

// disable optimization if reverse sync is enabled
if !config.EnableReverseSync {
configurationChanged, err := configChangeDetector.HasConfigurationChanged(ctx, oldSHA, newSHA, targetContent, client, client.AdminAPIClient())
if err != nil {
return nil, []failures.ResourceFailure{}, err
return nil, UpdateError{Err: err}
}
if !configurationChanged {
if client.IsKonnect() {
logger.V(util.DebugLevel).Info("No configuration change, skipping sync to Konnect")
} else {
logger.V(util.DebugLevel).Info("No configuration change, skipping sync to Kong")
}
return oldSHA, []failures.ResourceFailure{}, nil
return oldSHA, UpdateError{}
}
}

updateStrategy := updateStrategyResolver.ResolveUpdateStrategy(client)
logger = logger.WithValues("update_strategy", updateStrategy.Type())
timeStart := time.Now()
err, resourceErrors, resourceErrorsParseErr := updateStrategy.Update(ctx, ContentWithHash{
err, resourceErrors, rawErrBody, resourceErrorsParseErr := updateStrategy.Update(ctx, ContentWithHash{
Content: targetContent,
Hash: newSHA,
})
Expand All @@ -83,12 +103,12 @@ func PerformUpdate(
if err != nil {
// Not pushing metrics in case it's an update skip due to a backoff.
if errors.As(err, &UpdateSkippedDueToBackoffStrategyError{}) {
return nil, []failures.ResourceFailure{}, err
return nil, UpdateError{RawBody: rawErrBody, Err: err}
}

resourceFailures := resourceErrorsToResourceFailures(resourceErrors, resourceErrorsParseErr, logger)
promMetrics.RecordPushFailure(metricsProtocol, duration, client.BaseRootURL(), len(resourceFailures), err)
return nil, resourceFailures, err
return nil, UpdateError{ResourceFailures: resourceFailures, RawBody: rawErrBody, Err: err}
}

promMetrics.RecordPushSuccess(metricsProtocol, duration, client.BaseRootURL())
Expand All @@ -99,7 +119,7 @@ func PerformUpdate(
logger.V(util.InfoLevel).Info("Successfully synced configuration to Kong")
}

return newSHA, nil, nil
return newSHA, UpdateError{}
}

// -----------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions internal/dataplane/sendconfig/strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type UpdateStrategy interface {
Update(ctx context.Context, targetContent ContentWithHash) (
err error,
resourceErrors []ResourceError,
rawErrorBody []byte,
resourceErrorsParseErr error,
)

Expand Down
16 changes: 16 additions & 0 deletions internal/diagnostics/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Server struct {

successfulConfigDump file.Content
failedConfigDump file.Content
rawErrBody []byte
configLock *sync.RWMutex
}

Expand Down Expand Up @@ -122,6 +123,7 @@ func (s *Server) receiveConfig(ctx context.Context) {
s.configLock.Lock()
if dump.Failed {
s.failedConfigDump = dump.Config
s.rawErrBody = dump.RawResponseBody
} else {
s.successfulConfigDump = dump.Config
}
Expand Down Expand Up @@ -156,6 +158,7 @@ func installProfilingHandlers(mux *http.ServeMux) {
func (s *Server) installDumpHandlers(mux *http.ServeMux) {
mux.HandleFunc("/debug/config/successful", s.handleLastValidConfig)
mux.HandleFunc("/debug/config/failed", s.handleLastFailedConfig)
mux.HandleFunc("/debug/config/raw-error", s.handleLastErrBody)
}

// redirectTo redirects request to a certain destination.
Expand All @@ -182,3 +185,16 @@ func (s *Server) handleLastFailedConfig(rw http.ResponseWriter, _ *http.Request)
rw.WriteHeader(http.StatusInternalServerError)
}
}

func (s *Server) handleLastErrBody(rw http.ResponseWriter, _ *http.Request) {
rw.Header().Set("Content-Type", "text/plain")
s.configLock.RLock()
defer s.configLock.RUnlock()
raw := s.rawErrBody
if len(raw) == 0 {
raw = []byte("No raw error body available.\n")
}
if _, err := rw.Write(raw); err != nil {
rw.WriteHeader(http.StatusInternalServerError)
}
}
9 changes: 7 additions & 2 deletions internal/diagnostics/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ func TestDiagnosticsServer_ConfigDumps(t *testing.T) {
for i := 0; i < configDumpsToWrite; i++ {
failed = !failed // Toggle failed flag.
configsCh <- util.ConfigDump{
Config: file.Content{},
Failed: failed,
Config: file.Content{},
Failed: failed,
RawResponseBody: []byte("fake error body"),
}
}
}()
Expand All @@ -74,6 +75,10 @@ func TestDiagnosticsServer_ConfigDumps(t *testing.T) {
if err == nil {
_ = resp.Body.Close()
}
resp, err = httpClient.Get(fmt.Sprintf("http://localhost:%d/debug/config/raw-error", port))
if err == nil {
_ = resp.Body.Close()
}
}
}
}()
Expand Down
Loading
Loading