Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion npm/pkg/controlplane/controllers/v2/podController.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error {
targetSetKeyValue := ipsets.NewIPSetMetadata(labelKeyValue, ipsets.KeyValueLabelOfPod)
allSets := []*ipsets.IPSetMetadata{targetSetKey, targetSetKeyValue}

klog.Infof("Creating ipsets %v if it does not already exist", allSets)
klog.Infof("Creating ipsets %+v and %+v if they do not exist", targetSetKey, targetSetKeyValue)
klog.Infof("Adding pod %s (ip : %s) to ipset %s and %s", podKey, npmPodObj.PodIP, labelKey, labelKeyValue)
if err = c.dp.AddToSets(allSets, podMetadata); err != nil {
return fmt.Errorf("[syncAddedPod] Error: failed to add pod to label ipset with err: %w", err)
Expand Down
56 changes: 39 additions & 17 deletions npm/pkg/dataplane/policies/chain-management_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const (
// TODO replace all util constants with local constants
defaultlockWaitTimeInSeconds string = "60"

doesNotExistErrorCode int = 1 // Bad rule (does a matching rule exist in that chain?)
doesNotExistErrorCode int = 1 // stderr possibility: Bad rule (does a matching rule exist in that chain?)
couldntLoadTargetErrorCode int = 2 // Couldn't load target `AZURE-NPM-EGRESS':No such file or directory

// transferred from iptm.go and not sure why this length is important
Expand Down Expand Up @@ -50,6 +50,21 @@ var (
}
jumpFromForwardToAzureChainArgs = append([]string{util.IptablesForwardChain}, jumpToAzureChainArgs...)

removeDeprecatedJumpIgnoredErrors = []*exitErrorInfo{
{
// doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist
exitCode: doesNotExistErrorCode,
stdErr: "No chain/target/match by that name",
messageToLog: "didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior",
},
{
// couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too)
exitCode: couldntLoadTargetErrorCode,
stdErr: "Couldn't load target `AZURE-NPM':No such file or directory",
messageToLog: "didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist",
},
}

listForwardEntriesArgs = []string{
util.IptablesWaitFlag, defaultlockWaitTimeInSeconds, util.IptablesTableFlag, util.IptablesFilterTable,
util.IptablesNumericFlag, util.IptablesListFlag, util.IptablesForwardChain, util.IptablesLineNumbersFlag,
Expand All @@ -64,6 +79,12 @@ var (
}
)

type exitErrorInfo struct {
exitCode int
stdErr string
messageToLog string
}

type staleChains struct {
chainsToCleanup map[string]struct{}
}
Expand Down Expand Up @@ -156,20 +177,11 @@ func (pMgr *PolicyManager) bootup(_ []string) error {
defer pMgr.reconcileManager.forceUnlock()

// 1. delete the deprecated jump to AZURE-NPM
deprecatedErrCode, deprecatedErr := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...)
if deprecatedErr == nil {
deprecatedErrCode, deprecatedErr := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...)
if deprecatedErrCode == 0 {
klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain")
} else {
switch deprecatedErrCode {
case couldntLoadTargetErrorCode:
// couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too)
klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr)
case doesNotExistErrorCode:
// doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist
klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr)
default:
klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error())
}
} else if deprecatedErr != nil {
klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error())
}

currentChains, err := ioutil.AllCurrentAzureChains(pMgr.ioShim.Exec, defaultlockWaitTimeInSeconds)
Expand Down Expand Up @@ -254,6 +266,10 @@ deleteLoop:

// this function has a direct comparison in NPM v1 iptables manager (iptm.go)
func (pMgr *PolicyManager) runIPTablesCommand(operationFlag string, args ...string) (int, error) {
return pMgr.ignoreErrorsAndRunIPTablesCommand(nil, operationFlag, args...)
}

func (pMgr *PolicyManager) ignoreErrorsAndRunIPTablesCommand(ignored []*exitErrorInfo, operationFlag string, args ...string) (int, error) {
allArgs := []string{util.IptablesWaitFlag, defaultlockWaitTimeInSeconds, operationFlag}
allArgs = append(allArgs, args...)

Expand All @@ -266,11 +282,17 @@ func (pMgr *PolicyManager) runIPTablesCommand(operationFlag string, args ...stri
if ok := errors.As(err, &exitError); ok {
errCode := exitError.ExitStatus()
allArgsString := strings.Join(allArgs, " ")
msgStr := strings.TrimSuffix(string(output), "\n")
outputString := strings.TrimSuffix(string(output), "\n")
for _, info := range ignored {
if errCode == info.exitCode && strings.Contains(outputString, info.stdErr) {
klog.Infof("%s. not able to run iptables command [%s %s]. exit code: %d, output: %s", info.messageToLog, util.Iptables, allArgsString, errCode, outputString)
return errCode, nil
}
}
if errCode > 0 {
metrics.SendErrorLogAndMetric(util.IptmID, "error: There was an error running command: [%s %s] Stderr: [%v, %s]", util.Iptables, allArgsString, exitError, msgStr)
metrics.SendErrorLogAndMetric(util.IptmID, "error: There was an error running command: [%s %s] Stderr: [%v, %s]", util.Iptables, allArgsString, exitError, outputString)
}
return errCode, npmerrors.SimpleErrorWrapper(fmt.Sprintf("failed to run iptables command [%s %s] Stderr: [%s]", util.Iptables, allArgsString, msgStr), exitError)
return errCode, npmerrors.SimpleErrorWrapper(fmt.Sprintf("failed to run iptables command [%s %s] Stderr: [%s]", util.Iptables, allArgsString, outputString), exitError)
}
return 0, nil
}
Expand Down
12 changes: 10 additions & 2 deletions npm/pkg/dataplane/policies/chain-management_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,11 @@ func TestBootupLinux(t *testing.T) {
{
name: "success after restore failure (no NPM prior)",
calls: []testutils.TestCmd{
{Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 2}, // AZURE-NPM chain didn't exist
{
Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"},
ExitCode: 2,
Stdout: "iptables v1.8.4 (legacy): Couldn't load target `AZURE-NPM':No such file or directory",
}, // AZURE-NPM chain didn't exist
{Cmd: listAllCommandStrings, PipedToCommand: true},
{Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1},
fakeIPTablesRestoreFailureCommand, // e.g. xtables lock held by another app. Currently the stdout doesn't matter for retrying
Expand All @@ -389,7 +393,11 @@ func TestBootupLinux(t *testing.T) {
{
name: "success: v2 existed prior",
calls: []testutils.TestCmd{
{Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist
{
Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"},
ExitCode: 1,
Stdout: "No chain/target/match by that name",
}, // deprecated rule did not exist
{Cmd: listAllCommandStrings, PipedToCommand: true},
{
Cmd: []string{"grep", "Chain AZURE-NPM"},
Expand Down
1 change: 1 addition & 0 deletions npm/pkg/dataplane/policies/policymanager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ func (pMgr *PolicyManager) deleteJumpRule(policy *NPMNetworkPolicy, direction Un

specs = append([]string{baseChainName}, specs...)
errCode, err := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, specs...)
// if this actually happens (don't think it should), could use ignoreErrorsAndRunIPTablesCommand instead with: "Bad rule (does a matching rule exist in that chain?)"
if err != nil && errCode != doesNotExistErrorCode {
errorString := fmt.Sprintf("failed to delete jump from %s chain to %s chain for policy %s with exit code %d", baseChainName, chainName, policy.PolicyKey, errCode)
log.Errorf("%s: %w", errorString, err)
Expand Down