diff --git a/npm/pkg/dataplane/dataplane_linux.go b/npm/pkg/dataplane/dataplane_linux.go index 41fa1664fa..b1d1548e1d 100644 --- a/npm/pkg/dataplane/dataplane_linux.go +++ b/npm/pkg/dataplane/dataplane_linux.go @@ -2,6 +2,7 @@ package dataplane import ( "github.com/Azure/azure-container-networking/npm/pkg/dataplane/policies" + "github.com/Azure/azure-container-networking/npm/util" npmerrors "github.com/Azure/azure-container-networking/npm/util/errors" ) @@ -20,6 +21,8 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error { } func (dp *DataPlane) bootupDataPlane() error { + util.DetectIptablesVersion(dp.ioShim) + // It is important to keep order to clean-up ACLs before ipsets. Otherwise we won't be able to delete ipsets referenced by ACLs if err := dp.policyMgr.Bootup(nil); err != nil { return npmerrors.ErrorWrapper(npmerrors.BootupDataplane, false, "failed to reset policy dataplane", err) diff --git a/npm/pkg/dataplane/dataplane_test.go b/npm/pkg/dataplane/dataplane_test.go index c879b41f07..c5bc3c4d37 100644 --- a/npm/pkg/dataplane/dataplane_test.go +++ b/npm/pkg/dataplane/dataplane_test.go @@ -393,7 +393,7 @@ func TestUpdatePodCache(t *testing.T) { } func getBootupTestCalls() []testutils.TestCmd { - return append(policies.GetBootupTestCalls(), ipsets.GetResetTestCalls()...) + return append(policies.GetBootupTestCalls(true), ipsets.GetResetTestCalls()...) } func getAddPolicyTestCallsForDP(networkPolicy *policies.NPMNetworkPolicy) []testutils.TestCmd { diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index a529e1b51c..92cdd0cb95 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -147,23 +147,24 @@ func isBaseChain(chain string) bool { } /* - Called once at startup. - Like the rest of PolicyManager, minimizes the number of OS calls by consolidating all possible actions into one iptables-restore call. - - 1. Delete the deprecated jump from FORWARD to AZURE-NPM chain (if it exists). - 2. Cleanup old NPM chains, and configure base chains and their rules. - 1. Do the following via iptables-restore --noflush: - - flush all deprecated chains - - flush old v2 policy chains - - create/flush the base chains - - add rules for the base chains, except for AZURE-NPM (so that PolicyManager will be deactivated) - 2. In the background: - - delete all deprecated chains - - delete old v2 policy chains - 3. Add/reposition the jump from FORWARD chain to AZURE-NPM chain. - - TODO: could use one grep call instead of separate calls for getting jump line nums and for getting deprecated chains and old v2 policy chains - - would use a grep pattern like so: | +Called once at startup. +Like the rest of PolicyManager, minimizes the number of OS calls by consolidating all possible actions into one iptables-restore call. + +1. Delete the deprecated jump from FORWARD to AZURE-NPM chain (if it exists). +2. Cleanup old NPM chains, and configure base chains and their rules. + 1. Do the following via iptables-restore --noflush: + - flush all deprecated chains + - flush old v2 policy chains + - create/flush the base chains + - add rules for the base chains, except for AZURE-NPM (so that PolicyManager will be deactivated) + 2. In the background: + - delete all deprecated chains + - delete old v2 policy chains + +3. Add/reposition the jump from FORWARD chain to AZURE-NPM chain. + +TODO: could use one grep call instead of separate calls for getting jump line nums and for getting deprecated chains and old v2 policy chains + - would use a grep pattern like so: | */ func (pMgr *PolicyManager) bootup(_ []string) error { klog.Infof("booting up iptables Azure chains") @@ -173,6 +174,79 @@ func (pMgr *PolicyManager) bootup(_ []string) error { pMgr.reconcileManager.forceLock() defer pMgr.reconcileManager.forceUnlock() + if strings.Contains(util.Iptables, "nft") { + util.Iptables = util.IptablesLegacy + util.IptablesSave = util.IptablesSaveLegacy + util.IptablesRestore = util.IptablesRestoreLegacy + + // 0. delete the deprecated jump to deprecated AZURE-NPM in legacy iptables + deprecatedErrCode, deprecatedErr := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) + if deprecatedErrCode == 0 { + klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") + } else if deprecatedErr != nil { + metrics.SendErrorLogAndMetric(util.IptmID, + "failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", + deprecatedErrCode, deprecatedErr.Error()) + } + + // 0. delete the deprecated jump to current AZURE-NPM in legacy iptables + deprecatedErrCode, deprecatedErr = pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, jumpFromForwardToAzureChainArgs...) + if deprecatedErrCode == 0 { + klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") + } else if deprecatedErr != nil { + metrics.SendErrorLogAndMetric(util.IptmID, + "failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", + deprecatedErrCode, deprecatedErr.Error()) + } + + // clean up current chains in legacy iptables + currentChains, err := ioutil.AllCurrentAzureChains(pMgr.ioShim.Exec, util.IptablesDefaultWaitTime) + if err != nil { + return npmerrors.SimpleErrorWrapper("failed to get current chains for bootup", err) + } + + // We have only one chance to clean existing legacy iptables chains. + // So flush all the chains and then destroy them + var aggregateError error + for chain := range currentChains { + errCode, err := pMgr.runIPTablesCommand(util.IptablesFlushFlag, chain) + if err != nil && errCode != doesNotExistErrorCode { + // add to staleChains if it's not one of the iptablesAzureChains + pMgr.staleChains.add(chain) + currentErrString := fmt.Sprintf("failed to flush chain %s with err [%v]", chain, err) + if aggregateError == nil { + aggregateError = npmerrors.SimpleError(currentErrString) + } else { + aggregateError = npmerrors.SimpleErrorWrapper(fmt.Sprintf("%s and had previous error", currentErrString), aggregateError) + } + } + } + + for chain := range currentChains { + errCode, err := pMgr.runIPTablesCommand(util.IptablesDestroyFlag, chain) + if err != nil && errCode != doesNotExistErrorCode { + // add to staleChains if it's not one of the iptablesAzureChains + pMgr.staleChains.add(chain) + currentErrString := fmt.Sprintf("failed to delete chain %s with err [%v]", chain, err) + if aggregateError == nil { + aggregateError = npmerrors.SimpleError(currentErrString) + } else { + aggregateError = npmerrors.SimpleErrorWrapper(fmt.Sprintf("%s and had previous error", currentErrString), aggregateError) + } + } + } + + if aggregateError != nil { + metrics.SendErrorLogAndMetric(util.IptmID, + "failed to flush and delete stale chain in legacy iptables with error: %s", + aggregateError.Error()) + } + + util.Iptables = util.IptablesNft + util.IptablesSave = util.IptablesSaveNft + util.IptablesRestore = util.IptablesRestoreNft + } + // 1. delete the deprecated jump to AZURE-NPM deprecatedErrCode, deprecatedErr := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) if deprecatedErrCode == 0 { diff --git a/npm/pkg/dataplane/policies/chain-management_linux_test.go b/npm/pkg/dataplane/policies/chain-management_linux_test.go index 8e7addcc82..75fa6994b5 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux_test.go +++ b/npm/pkg/dataplane/policies/chain-management_linux_test.go @@ -365,7 +365,7 @@ func TestBootupLinux(t *testing.T) { // but the fake command exit codes and stdouts are in line with having no NPM prior { name: "success (no NPM prior)", - calls: GetBootupTestCalls(), + calls: GetBootupTestCalls(false), wantErr: false, }, { diff --git a/npm/pkg/dataplane/policies/policymanager_test.go b/npm/pkg/dataplane/policies/policymanager_test.go index f280d63bb9..31e0beea4f 100644 --- a/npm/pkg/dataplane/policies/policymanager_test.go +++ b/npm/pkg/dataplane/policies/policymanager_test.go @@ -101,7 +101,7 @@ func (p promVals) testPrometheusMetrics(t *testing.T) { // see chain-management_linux_test.go for testing when an error occurs func TestBootup(t *testing.T) { metrics.ReinitializeAll() - calls := GetBootupTestCalls() + calls := GetBootupTestCalls(false) ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) pMgr := NewPolicyManager(ioshim, ipsetConfig) diff --git a/npm/pkg/dataplane/policies/testutils_linux.go b/npm/pkg/dataplane/policies/testutils_linux.go index 59b167e0fd..caf8f51ddc 100644 --- a/npm/pkg/dataplane/policies/testutils_linux.go +++ b/npm/pkg/dataplane/policies/testutils_linux.go @@ -50,8 +50,22 @@ func GetRemovePolicyFailureTestCalls(policy *NPMNetworkPolicy) []testutils.TestC return append(calls, fakeIPTablesRestoreFailureCommand) } -func GetBootupTestCalls() []testutils.TestCmd { - return []testutils.TestCmd{ +func GetBootupTestCalls(addDetectCalls bool) []testutils.TestCmd { + detectIptable := []testutils.TestCmd{ + {Cmd: []string{"iptables-nft-save", "-t", "mangle"}, Stdout: ""}, //nolint // AZURE-NPM chain didn't exist + {Cmd: []string{"iptables-save", "-t", "mangle"}, Stdout: `# Generated by iptables-save v1.8.7 on Wed May 3 01:35:24 2023 + *mangle + :PREROUTING ACCEPT [0:0] + :INPUT ACCEPT [0:0] + :FORWARD ACCEPT [0:0] + :OUTPUT ACCEPT [0:0] + :POSTROUTING ACCEPT [0:0] + :KUBE-IPTABLES-HINT - [0:0] + :KUBE-KUBELET-CANARY - [0:0] + :KUBE-PROXY-CANARY - [0:0] + COMMIT`}, //nolint // AZURE-NPM chain didn't exist + } + bootUp := []testutils.TestCmd{ {Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 2}, //nolint // AZURE-NPM chain didn't exist {Cmd: listAllCommandStrings, PipedToCommand: true}, { @@ -63,6 +77,11 @@ func GetBootupTestCalls() []testutils.TestCmd { {Cmd: []string{"grep", "AZURE-NPM"}, ExitCode: 1}, {Cmd: []string{"iptables", "-w", "60", "-I", "FORWARD", "-j", "AZURE-NPM", "-m", "conntrack", "--ctstate", "NEW"}}, } + + if addDetectCalls { + return append(detectIptable, bootUp...) + } + return bootUp } func getFakeDeleteJumpCommand(chainName, jumpRule string) testutils.TestCmd { diff --git a/npm/pkg/dataplane/policies/testutils_windows.go b/npm/pkg/dataplane/policies/testutils_windows.go index dfc25dab94..914ea76986 100644 --- a/npm/pkg/dataplane/policies/testutils_windows.go +++ b/npm/pkg/dataplane/policies/testutils_windows.go @@ -10,6 +10,6 @@ func GetRemovePolicyTestCalls(_ *NPMNetworkPolicy) []testutils.TestCmd { return []testutils.TestCmd{} } -func GetBootupTestCalls() []testutils.TestCmd { +func GetBootupTestCalls(_ bool) []testutils.TestCmd { return []testutils.TestCmd{} } diff --git a/npm/util/const.go b/npm/util/const.go index ff10cc53d6..38885935be 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -2,6 +2,14 @@ // MIT License package util +import ( + "bytes" + "fmt" + "strings" + + "github.com/Azure/azure-container-networking/common" +) + // kubernetes related constants. const ( KubeSystemFlag string = "kube-system" @@ -19,15 +27,25 @@ const ( k8sMinorVerForNewPolicyDef string = "11" ) +var ( + Iptables = IptablesLegacy + Ip6tables = Ip6tablesLegacy //nolint (avoid warning to capitalize this p) + IptablesSave = IptablesSaveLegacy + IptablesRestore = IptablesRestoreLegacy +) + // iptables related constants. const ( PlaceAzureChainAfterKubeServices = false PlaceAzureChainFirst = true - Iptables string = "iptables" - Ip6tables string = "ip6tables" //nolint (avoid warning to capitalize this p) - IptablesSave string = "iptables-save" - IptablesRestore string = "iptables-restore" + IptablesNft string = "iptables-nft" + Ip6tablesLegacy string = "ip6tables" //nolint (avoid warning to capitalize this p) + IptablesSaveNft string = "iptables-nft-save" + IptablesRestoreNft string = "iptables-nft-restore" + IptablesLegacy string = "iptables" + IptablesSaveLegacy string = "iptables-save" + IptablesRestoreLegacy string = "iptables-restore" IptablesRestoreNoFlushFlag string = "--noflush" IptablesRestoreTableFlag string = "-T" IptablesRestoreCommit string = "COMMIT" @@ -253,3 +271,71 @@ const ( DaemonDataplaneID // for v2 FanOutServerID // for v2 ) + +func DetectIptablesVersion(ioShim *common.IOShim) { + cmd := ioShim.Exec.Command(IptablesSaveNft, "-t", "mangle") + + output, err := cmd.CombinedOutput() + if err != nil { + fmt.Printf("Error running iptables-nft-save: %s", err) + return + } + + if strings.Contains(string(output), "KUBE-IPTABLES-HINT") || strings.Contains(string(output), "KUBE-KUBELET-CANARY") { + Iptables = IptablesNft + IptablesSave = IptablesSaveNft + IptablesRestore = IptablesRestoreNft + } else { + lCmd := ioShim.Exec.Command(IptablesSaveLegacy, "-t", "mangle") + + loutput, err := lCmd.CombinedOutput() + if err != nil { + fmt.Printf("Error running iptables-legacy-save: %s", err) + return + } + + if strings.Contains(string(loutput), "KUBE-IPTABLES-HINT") || strings.Contains(string(loutput), "KUBE-KUBELET-CANARY") { + Iptables = IptablesLegacy + IptablesSave = IptablesSaveLegacy + IptablesRestore = IptablesRestoreLegacy + } else { + lsavecmd := ioShim.Exec.Command(IptablesSaveNft) + lsaveoutput, err := lsavecmd.CombinedOutput() + if err != nil { + fmt.Printf("Error running iptables-nft-save: %s", err) + return + } + + lcount := countLines(lsaveoutput) + + savecmd := ioShim.Exec.Command(IptablesSaveLegacy) + saveoutput, err := savecmd.CombinedOutput() + if err != nil { + fmt.Printf("Error running iptables-legacy-save: %s", err) + return + } + + count := countLines(saveoutput) + + if lcount > count { + Iptables = IptablesLegacy + IptablesSave = IptablesSaveLegacy + IptablesRestore = IptablesRestoreLegacy + } else { + Iptables = IptablesNft + IptablesSave = IptablesSaveNft + IptablesRestore = IptablesRestoreNft + } + } + } +} + +func countLines(output []byte) int { + count := 0 + for _, x := range bytes.Split(output, []byte("\n")) { + if len(x) >= 1 && x[0] == '-' { + count++ + } + } + return count +}