-
Notifications
You must be signed in to change notification settings - Fork 260
Description
Symptoms
Unexpected connectivity for some pods.
For this issue, you would see this in NPM logs if they are not truncated: failed to flush and delete stale chain in legacy iptables with error
Mitigation
- Option 1: Restart NPM pod for impacted node or all nodes.
- Option 2: Execute onto the impacted node and run
iptables-legacy -F <chain>for every chain starting withAZURE-NPM.
Cause
NPM uses nftables instead of legacy iptables for Ubuntu 22 (in AKS, this should correspond with k8s 1.25+).
In this case, NPM tries the following in legacy iptables: 1) deleting the jump rule from FORWARD to AZURE-NPM chain and 2) flushing and destroying any NPM chains. If there are transient errors for both step 1 and (most of) step 2, some legacy rules will remain, causing unexpected connectivity. NPM tries to queue the stale chains for cleanup in a background thread. That retry mechanism has a couple flaws:
- The background thread uses nftables, not legacy iptables.
- The background thread does not flush chains, it only destroys chains.
Code
azure-container-networking/npm/pkg/dataplane/policies/chain-management_linux.go
Lines 189 to 261 in 571afbc
| if strings.Contains(util.Iptables, "nft") { | |
| klog.Info("detected nft iptables. cleaning up legacy iptables") | |
| util.Iptables = util.IptablesLegacy | |
| util.IptablesSave = util.IptablesSaveLegacy | |
| util.IptablesRestore = util.IptablesRestoreLegacy | |
| // 0. delete the deprecated jump to deprecated AZURE-NPM in legacy iptables | |
| deprecatedErrCode, deprecatedErr := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...) | |
| if deprecatedErrCode == 0 { | |
| klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") | |
| } else if deprecatedErr != nil { | |
| metrics.SendErrorLogAndMetric(util.IptmID, | |
| "failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", | |
| deprecatedErrCode, deprecatedErr.Error()) | |
| } | |
| // 0. delete the deprecated jump to current AZURE-NPM in legacy iptables | |
| deprecatedErrCode, deprecatedErr = pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, jumpFromForwardToAzureChainArgs...) | |
| if deprecatedErrCode == 0 { | |
| klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain") | |
| } else if deprecatedErr != nil { | |
| metrics.SendErrorLogAndMetric(util.IptmID, | |
| "failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", | |
| deprecatedErrCode, deprecatedErr.Error()) | |
| } | |
| // clean up current chains in legacy iptables | |
| currentChains, err := ioutil.AllCurrentAzureChains(pMgr.ioShim.Exec, util.IptablesDefaultWaitTime) | |
| if err != nil { | |
| return npmerrors.SimpleErrorWrapper("failed to get current chains for bootup", err) | |
| } | |
| // We have only one chance to clean existing legacy iptables chains. | |
| // So flush all the chains and then destroy them | |
| var aggregateError error | |
| for chain := range currentChains { | |
| errCode, err := pMgr.runIPTablesCommand(util.IptablesFlushFlag, chain) | |
| if err != nil && errCode != doesNotExistErrorCode { | |
| // add to staleChains if it's not one of the iptablesAzureChains | |
| pMgr.staleChains.add(chain) | |
| currentErrString := fmt.Sprintf("failed to flush chain %s with err [%v]", chain, err) | |
| if aggregateError == nil { | |
| aggregateError = npmerrors.SimpleError(currentErrString) | |
| } else { | |
| aggregateError = npmerrors.SimpleErrorWrapper(fmt.Sprintf("%s and had previous error", currentErrString), aggregateError) | |
| } | |
| } | |
| } | |
| for chain := range currentChains { | |
| errCode, err := pMgr.runIPTablesCommand(util.IptablesDestroyFlag, chain) | |
| if err != nil && errCode != doesNotExistErrorCode { | |
| // add to staleChains if it's not one of the iptablesAzureChains | |
| pMgr.staleChains.add(chain) | |
| currentErrString := fmt.Sprintf("failed to delete chain %s with err [%v]", chain, err) | |
| if aggregateError == nil { | |
| aggregateError = npmerrors.SimpleError(currentErrString) | |
| } else { | |
| aggregateError = npmerrors.SimpleErrorWrapper(fmt.Sprintf("%s and had previous error", currentErrString), aggregateError) | |
| } | |
| } | |
| } | |
| if aggregateError != nil { | |
| metrics.SendErrorLogAndMetric(util.IptmID, | |
| "failed to flush and delete stale chain in legacy iptables with error: %s", | |
| aggregateError.Error()) | |
| } | |
| util.Iptables = util.IptablesNft | |
| util.IptablesSave = util.IptablesSaveNft | |
| util.IptablesRestore = util.IptablesRestoreNft | |
| } |