From deec624cff1def94c6191e3cb7c135e25b79a2af Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 13 Apr 2022 15:23:40 -0700 Subject: [PATCH 1/4] wip --- .../controllers/v2/podController.go | 2 +- .../policies/chain-management_linux.go | 35 ++++++++++++++++--- .../policies/chain-management_linux_test.go | 12 +++++-- .../dataplane/policies/policymanager_linux.go | 1 + 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index 1d13fdf4d6..3524758146 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -421,7 +421,7 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error { targetSetKeyValue := ipsets.NewIPSetMetadata(labelKeyValue, ipsets.KeyValueLabelOfPod) allSets := []*ipsets.IPSetMetadata{targetSetKey, targetSetKeyValue} - klog.Infof("Creating ipsets %v if it does not already exist", allSets) + klog.Infof("Creating ipsets %+v and %+v if they do not exist", targetSetKey, targetSetKeyValue) klog.Infof("Adding pod %s (ip : %s) to ipset %s and %s", podKey, npmPodObj.PodIP, labelKey, labelKeyValue) if err = c.dp.AddToSets(allSets, podMetadata); err != nil { return fmt.Errorf("[syncAddedPod] Error: failed to add pod to label ipset with err: %w", err) diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index f913578fcc..7e1f38669e 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -21,8 +21,10 @@ const ( // TODO replace all util constants with local constants defaultlockWaitTimeInSeconds string = "60" - doesNotExistErrorCode int = 1 // Bad rule (does a matching rule exist in that chain?) - couldntLoadTargetErrorCode int = 2 // Couldn't load target `AZURE-NPM-EGRESS':No such file or directory + doesNotExistErrorCode int = 1 // stderr possibility: Bad rule (does a matching rule exist in that chain?) + doesNotExistMessage string = "No chain/target/match by that name" + couldntLoadTargetErrorCode int = 2 // Couldn't load target `AZURE-NPM-EGRESS':No such file or directory + couldntLoadTargetMessage string = "Couldn't load target `AZURE-NPM':No such file or directory" // transferred from iptm.go and not sure why this length is important minLineNumberStringLength int = 3 @@ -50,6 +52,17 @@ var ( } jumpFromForwardToAzureChainArgs = append([]string{util.IptablesForwardChain}, jumpToAzureChainArgs...) + removeDeprecatedJumpIgnoredErrors = []*exitErrorInfo{ + { + exitCode: doesNotExistErrorCode, + msg: doesNotExistMessage, + }, + { + exitCode: couldntLoadTargetErrorCode, + msg: couldntLoadTargetMessage, + }, + } + listForwardEntriesArgs = []string{ util.IptablesWaitFlag, defaultlockWaitTimeInSeconds, util.IptablesTableFlag, util.IptablesFilterTable, util.IptablesNumericFlag, util.IptablesListFlag, util.IptablesForwardChain, util.IptablesLineNumbersFlag, @@ -64,6 +77,11 @@ var ( } ) +type exitErrorInfo struct { + exitCode int + msg string +} + type staleChains struct { chainsToCleanup map[string]struct{} } @@ -156,8 +174,8 @@ func (pMgr *PolicyManager) bootup(_ []string) error { defer pMgr.reconcileManager.forceUnlock() // 1. delete the deprecated jump to AZURE-NPM - deprecatedErrCode, deprecatedErr := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) - if deprecatedErr == nil { + deprecatedErrCode, deprecatedErr := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) + if deprecatedErrCode == 0 { klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") } else { switch deprecatedErrCode { @@ -254,6 +272,10 @@ deleteLoop: // this function has a direct comparison in NPM v1 iptables manager (iptm.go) func (pMgr *PolicyManager) runIPTablesCommand(operationFlag string, args ...string) (int, error) { + return pMgr.ignoreErrorsAndRunIPTablesCommand(nil, operationFlag, args...) +} + +func (pMgr *PolicyManager) ignoreErrorsAndRunIPTablesCommand(ignored []*exitErrorInfo, operationFlag string, args ...string) (int, error) { allArgs := []string{util.IptablesWaitFlag, defaultlockWaitTimeInSeconds, operationFlag} allArgs = append(allArgs, args...) @@ -267,6 +289,11 @@ func (pMgr *PolicyManager) runIPTablesCommand(operationFlag string, args ...stri errCode := exitError.ExitStatus() allArgsString := strings.Join(allArgs, " ") msgStr := strings.TrimSuffix(string(output), "\n") + for _, info := range ignored { + if errCode == info.exitCode && strings.Contains(msgStr, info.msg) { + return errCode, fmt.Errorf("not able to run iptables command [%s %s] due to benign Stderr: [%s]", util.Iptables, allArgsString, msgStr) + } + } if errCode > 0 { metrics.SendErrorLogAndMetric(util.IptmID, "error: There was an error running command: [%s %s] Stderr: [%v, %s]", util.Iptables, allArgsString, exitError, msgStr) } diff --git a/npm/pkg/dataplane/policies/chain-management_linux_test.go b/npm/pkg/dataplane/policies/chain-management_linux_test.go index 1713a6734a..5141d15127 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux_test.go +++ b/npm/pkg/dataplane/policies/chain-management_linux_test.go @@ -375,7 +375,11 @@ func TestBootupLinux(t *testing.T) { { name: "success after restore failure (no NPM prior)", calls: []testutils.TestCmd{ - {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 2}, // AZURE-NPM chain didn't exist + { + Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, + ExitCode: 2, + Stdout: "iptables v1.8.4 (legacy): Couldn't load target `AZURE-NPM':No such file or directory", + }, // AZURE-NPM chain didn't exist {Cmd: listAllCommandStrings, PipedToCommand: true}, {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, fakeIPTablesRestoreFailureCommand, // e.g. xtables lock held by another app. Currently the stdout doesn't matter for retrying @@ -389,7 +393,11 @@ func TestBootupLinux(t *testing.T) { { name: "success: v2 existed prior", calls: []testutils.TestCmd{ - {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist + { + Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, + ExitCode: 1, + Stdout: "No chain/target/match by that name", + }, // deprecated rule did not exist {Cmd: listAllCommandStrings, PipedToCommand: true}, { Cmd: []string{"grep", "Chain AZURE-NPM"}, diff --git a/npm/pkg/dataplane/policies/policymanager_linux.go b/npm/pkg/dataplane/policies/policymanager_linux.go index 66399d9005..f7560b97cb 100644 --- a/npm/pkg/dataplane/policies/policymanager_linux.go +++ b/npm/pkg/dataplane/policies/policymanager_linux.go @@ -175,6 +175,7 @@ func (pMgr *PolicyManager) deleteJumpRule(policy *NPMNetworkPolicy, direction Un specs = append([]string{baseChainName}, specs...) errCode, err := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, specs...) + // if this actually happens (don't think it should), could use ignoreErrorsAndRunIPTablesCommand instead with: "Bad rule (does a matching rule exist in that chain?)" if err != nil && errCode != doesNotExistErrorCode { errorString := fmt.Sprintf("failed to delete jump from %s chain to %s chain for policy %s with exit code %d", baseChainName, chainName, policy.PolicyKey, errCode) log.Errorf("%s: %w", errorString, err) From 167e5ca036ad3f6fe268434b98fe3f9b66454d0e Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 15 Apr 2022 10:15:06 -0700 Subject: [PATCH 2/4] fix lint --- npm/pkg/dataplane/policies/chain-management_linux.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index 7e1f38669e..f3c9149fa4 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -31,6 +31,8 @@ const ( ) var ( + errBenignIPTablesFailure = errors.New("benign iptables failure") + // Must loop through a slice because we need a deterministic order for fexec commands for UTs. iptablesAzureChains = []string{ util.IptablesAzureChain, @@ -291,7 +293,7 @@ func (pMgr *PolicyManager) ignoreErrorsAndRunIPTablesCommand(ignored []*exitErro msgStr := strings.TrimSuffix(string(output), "\n") for _, info := range ignored { if errCode == info.exitCode && strings.Contains(msgStr, info.msg) { - return errCode, fmt.Errorf("not able to run iptables command [%s %s] due to benign Stderr: [%s]", util.Iptables, allArgsString, msgStr) + return errCode, fmt.Errorf("not able to run iptables command [%s %s] due to benign Stderr: [%s]: %w", util.Iptables, allArgsString, msgStr, errBenignIPTablesFailure) } } if errCode > 0 { From ac7c7dce684c4e4245d0d1400634e04178351560 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 19 Apr 2022 10:42:56 -0700 Subject: [PATCH 3/4] change wording in logs --- npm/pkg/dataplane/policies/chain-management_linux.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index f3c9149fa4..905172ae54 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -183,10 +183,10 @@ func (pMgr *PolicyManager) bootup(_ []string) error { switch deprecatedErrCode { case couldntLoadTargetErrorCode: // couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too) - klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr) + klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and output: %s", deprecatedErrCode, deprecatedErr) case doesNotExistErrorCode: // doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist - klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr) + klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior. Exit code %d and output: %s", deprecatedErrCode, deprecatedErr) default: klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error()) } From b7f18ea56b09e50b8eb7cdcdd08e09ba872f02fc Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 19 Apr 2022 12:03:25 -0700 Subject: [PATCH 4/4] address comment --- .../policies/chain-management_linux.go | 49 ++++++++----------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index 905172ae54..87732602b2 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -21,18 +21,14 @@ const ( // TODO replace all util constants with local constants defaultlockWaitTimeInSeconds string = "60" - doesNotExistErrorCode int = 1 // stderr possibility: Bad rule (does a matching rule exist in that chain?) - doesNotExistMessage string = "No chain/target/match by that name" - couldntLoadTargetErrorCode int = 2 // Couldn't load target `AZURE-NPM-EGRESS':No such file or directory - couldntLoadTargetMessage string = "Couldn't load target `AZURE-NPM':No such file or directory" + doesNotExistErrorCode int = 1 // stderr possibility: Bad rule (does a matching rule exist in that chain?) + couldntLoadTargetErrorCode int = 2 // Couldn't load target `AZURE-NPM-EGRESS':No such file or directory // transferred from iptm.go and not sure why this length is important minLineNumberStringLength int = 3 ) var ( - errBenignIPTablesFailure = errors.New("benign iptables failure") - // Must loop through a slice because we need a deterministic order for fexec commands for UTs. iptablesAzureChains = []string{ util.IptablesAzureChain, @@ -56,12 +52,16 @@ var ( removeDeprecatedJumpIgnoredErrors = []*exitErrorInfo{ { - exitCode: doesNotExistErrorCode, - msg: doesNotExistMessage, + // doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist + exitCode: doesNotExistErrorCode, + stdErr: "No chain/target/match by that name", + messageToLog: "didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior", }, { - exitCode: couldntLoadTargetErrorCode, - msg: couldntLoadTargetMessage, + // couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too) + exitCode: couldntLoadTargetErrorCode, + stdErr: "Couldn't load target `AZURE-NPM':No such file or directory", + messageToLog: "didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist", }, } @@ -80,8 +80,9 @@ var ( ) type exitErrorInfo struct { - exitCode int - msg string + exitCode int + stdErr string + messageToLog string } type staleChains struct { @@ -179,17 +180,8 @@ func (pMgr *PolicyManager) bootup(_ []string) error { deprecatedErrCode, deprecatedErr := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) if deprecatedErrCode == 0 { klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") - } else { - switch deprecatedErrCode { - case couldntLoadTargetErrorCode: - // couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too) - klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and output: %s", deprecatedErrCode, deprecatedErr) - case doesNotExistErrorCode: - // doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist - klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior. Exit code %d and output: %s", deprecatedErrCode, deprecatedErr) - default: - klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error()) - } + } else if deprecatedErr != nil { + klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error()) } currentChains, err := ioutil.AllCurrentAzureChains(pMgr.ioShim.Exec, defaultlockWaitTimeInSeconds) @@ -290,16 +282,17 @@ func (pMgr *PolicyManager) ignoreErrorsAndRunIPTablesCommand(ignored []*exitErro if ok := errors.As(err, &exitError); ok { errCode := exitError.ExitStatus() allArgsString := strings.Join(allArgs, " ") - msgStr := strings.TrimSuffix(string(output), "\n") + outputString := strings.TrimSuffix(string(output), "\n") for _, info := range ignored { - if errCode == info.exitCode && strings.Contains(msgStr, info.msg) { - return errCode, fmt.Errorf("not able to run iptables command [%s %s] due to benign Stderr: [%s]: %w", util.Iptables, allArgsString, msgStr, errBenignIPTablesFailure) + if errCode == info.exitCode && strings.Contains(outputString, info.stdErr) { + klog.Infof("%s. not able to run iptables command [%s %s]. exit code: %d, output: %s", info.messageToLog, util.Iptables, allArgsString, errCode, outputString) + return errCode, nil } } if errCode > 0 { - metrics.SendErrorLogAndMetric(util.IptmID, "error: There was an error running command: [%s %s] Stderr: [%v, %s]", util.Iptables, allArgsString, exitError, msgStr) + metrics.SendErrorLogAndMetric(util.IptmID, "error: There was an error running command: [%s %s] Stderr: [%v, %s]", util.Iptables, allArgsString, exitError, outputString) } - return errCode, npmerrors.SimpleErrorWrapper(fmt.Sprintf("failed to run iptables command [%s %s] Stderr: [%s]", util.Iptables, allArgsString, msgStr), exitError) + return errCode, npmerrors.SimpleErrorWrapper(fmt.Sprintf("failed to run iptables command [%s %s] Stderr: [%s]", util.Iptables, allArgsString, outputString), exitError) } return 0, nil }