From 9cc0d1f9b93ca4d2574c8fe08ea94ec034254f32 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 3 Dec 2021 13:28:57 -0800 Subject: [PATCH 1/3] delete deprecated jump to azure chain --- .../policies/chain-management_linux.go | 35 +++++++-- .../policies/chain-management_linux_test.go | 75 ++++++++++++++++--- 2 files changed, 91 insertions(+), 19 deletions(-) diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index ece94acfa8..ca8546d54d 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -46,6 +46,11 @@ var ( util.IptablesCtstateFlag, util.IptablesNewState, } + deprecatedJumpFromForwardToAzureChainArgs = []string{ + util.IptablesForwardChain, + util.IptablesJumpFlag, + util.IptablesAzureChain, + } ) type staleChains struct { @@ -130,16 +135,31 @@ func (pMgr *PolicyManager) initializeNPMChains() error { // removeNPMChains removes the jump rule from FORWARD chain to AZURE-NPM chain // and flushes and deletes all NPM Chains. func (pMgr *PolicyManager) removeNPMChains() error { + // 1.1 delete the deprecated jump rule from FORWARD chain to AZURE-NPM chain, if it exists + deprecatedDeleteErrCode, deprecatedDeleteErr := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) + if deprecatedDeleteErr == nil { + klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") + } else if deprecatedDeleteErrCode == couldntLoadTargetErrorCode || deprecatedDeleteErrCode == doesNotExistErrorCode { + // see explanation of error codes in switch statement below + klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedDeleteErrCode, deprecatedDeleteErr.Error()) + } + + // 1.2 delete the jump rule that has ctstate NEW deleteErrCode, deleteErr := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, jumpFromForwardToAzureChainArgs...) - // couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too) - // we can ignore this error code, since there's no problem if the rule doesn't exist - hadDeleteError := deleteErr != nil && deleteErrCode != couldntLoadTargetErrorCode - if hadDeleteError { - // log as an error because this is unexpected, but don't return an error because for example, we could have AZURE-NPM chain exists but the jump to it doesn't exist - metrics.SendErrorLogAndMetric(util.IptmID, "Error: failed to delete jump from FORWARD chain to AZURE-NPM chain with exit code %d and error: %s", deleteErrCode, deleteErr.Error()) - // FIXME update ID + if deleteErr != nil { + switch deleteErrCode { + case couldntLoadTargetErrorCode: + // couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too) + klog.Infof("didn't delete jump from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and error: %s", deleteErrCode, deleteErr.Error()) + case doesNotExistErrorCode: + // doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist + klog.Infof("didn't delete jump from FORWARD chain to AZURE-NPM chain likely because we're transitioning from v1.4.9 or older. Exit code %d and error: %s", deleteErrCode, deleteErr.Error()) + default: + klog.Errorf("failed to delete jump from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deleteErrCode, deleteErr.Error()) + } } + // 2. flush current NPM chains creatorToFlush, chainsToDelete := pMgr.creatorAndChainsForReset() if len(chainsToDelete) == 0 { return nil @@ -149,6 +169,7 @@ func (pMgr *PolicyManager) removeNPMChains() error { return npmerrors.SimpleErrorWrapper("failed to flush chains", restoreError) } + // 3. delete current NPM chains // TODO aggregate an error for each chain that failed to delete var anyDeleteErr error for _, chainName := range chainsToDelete { diff --git a/npm/pkg/dataplane/policies/chain-management_linux_test.go b/npm/pkg/dataplane/policies/chain-management_linux_test.go index 6d6b2ed024..2fb01a02c4 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux_test.go +++ b/npm/pkg/dataplane/policies/chain-management_linux_test.go @@ -193,6 +193,7 @@ func TestRemoveNPMChains(t *testing.T) { { name: "success when there are chains currently", calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, { @@ -211,27 +212,17 @@ func TestRemoveNPMChains(t *testing.T) { { name: "success when there are no chains currently", calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, }, wantErr: false, }, - { - name: "no error on failure to delete", - calls: []testutils.TestCmd{ - { - Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}, - ExitCode: 1, // delete failure - }, - {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, - {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, - }, - wantErr: false, - }, { name: "failure on restore", calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, { @@ -245,6 +236,7 @@ func TestRemoveNPMChains(t *testing.T) { { name: "failure on destroy", calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, { @@ -260,6 +252,65 @@ func TestRemoveNPMChains(t *testing.T) { }, wantErr: true, }, + { + name: "does not exist error while deleting jump", + calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist + { + Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}, + ExitCode: 1, // does not exist error code + }, + {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, + {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, + }, + wantErr: false, + }, + { + name: "no AZURE-NPM chain while deleting jump", + calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 2}, // same code as delete below + { + Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}, + ExitCode: 2, // couldn't load target error code + }, + {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, + {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, + }, + wantErr: false, + }, + { + name: "unknown error while deleting jump", + calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist + { + Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}, + ExitCode: 3, // unknown delete failure + }, + {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, + {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, + }, + wantErr: false, + }, + { + name: "successfully delete deprecated jump", + calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}}, // deprecated rule existed + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, + {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, + {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, + }, + wantErr: false, + }, + { + name: "unknown error while deleting deprecated jump", + calls: []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 3}, + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, + {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, + {Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1}, + }, + wantErr: false, + }, } for _, tt := range tests { tt := tt From ecd01b04fe8862a98b18387176e8dccb341e3848 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 7 Dec 2021 18:43:30 -0800 Subject: [PATCH 2/3] fix UT --- npm/pkg/dataplane/policies/testutils_linux.go | 1 + 1 file changed, 1 insertion(+) diff --git a/npm/pkg/dataplane/policies/testutils_linux.go b/npm/pkg/dataplane/policies/testutils_linux.go index 684868e45f..7cb50479de 100644 --- a/npm/pkg/dataplane/policies/testutils_linux.go +++ b/npm/pkg/dataplane/policies/testutils_linux.go @@ -59,6 +59,7 @@ func GetInitializeTestCalls() []testutils.TestCmd { func GetResetTestCalls() []testutils.TestCmd { return []testutils.TestCmd{ + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}}, {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, {Cmd: listPolicyChainNamesCommandStrings, PipedToCommand: true}, { From 2c20c668e8494a9360a64cc21f5b38175fc9925b Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 10 Dec 2021 10:50:41 -0800 Subject: [PATCH 3/3] fix logs --- .../policies/chain-management_linux.go | 20 ++++++++++++------- .../policies/chain-management_linux_test.go | 4 ++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index ca8546d54d..4ff6048337 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -136,12 +136,20 @@ func (pMgr *PolicyManager) initializeNPMChains() error { // and flushes and deletes all NPM Chains. func (pMgr *PolicyManager) removeNPMChains() error { // 1.1 delete the deprecated jump rule from FORWARD chain to AZURE-NPM chain, if it exists - deprecatedDeleteErrCode, deprecatedDeleteErr := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) - if deprecatedDeleteErr == nil { + deprecatedErrCode, deprecatedErr := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) + if deprecatedErr == nil { klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") - } else if deprecatedDeleteErrCode == couldntLoadTargetErrorCode || deprecatedDeleteErrCode == doesNotExistErrorCode { - // see explanation of error codes in switch statement below - klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedDeleteErrCode, deprecatedDeleteErr.Error()) + } else { + switch deprecatedErrCode { + case couldntLoadTargetErrorCode: + // couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too) + klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr) + case doesNotExistErrorCode: + // doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist + klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr) + default: + klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error()) + } } // 1.2 delete the jump rule that has ctstate NEW @@ -149,10 +157,8 @@ func (pMgr *PolicyManager) removeNPMChains() error { if deleteErr != nil { switch deleteErrCode { case couldntLoadTargetErrorCode: - // couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too) klog.Infof("didn't delete jump from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and error: %s", deleteErrCode, deleteErr.Error()) case doesNotExistErrorCode: - // doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist klog.Infof("didn't delete jump from FORWARD chain to AZURE-NPM chain likely because we're transitioning from v1.4.9 or older. Exit code %d and error: %s", deleteErrCode, deleteErr.Error()) default: klog.Errorf("failed to delete jump from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deleteErrCode, deleteErr.Error()) diff --git a/npm/pkg/dataplane/policies/chain-management_linux_test.go b/npm/pkg/dataplane/policies/chain-management_linux_test.go index 2fb01a02c4..f9d1da568f 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux_test.go +++ b/npm/pkg/dataplane/policies/chain-management_linux_test.go @@ -266,9 +266,9 @@ func TestRemoveNPMChains(t *testing.T) { wantErr: false, }, { - name: "no AZURE-NPM chain while deleting jump", + name: "no AZURE-NPM chain while deleting both jumps", calls: []testutils.TestCmd{ - {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 2}, // same code as delete below + {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 2}, // same code as the next command { Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}, ExitCode: 2, // couldn't load target error code