Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions npm/ipsm/ipsm.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,22 +211,21 @@ func (ipsMgr *IpsetManager) run(entry *ipsEntry) (int, error) {
}

func (ipsMgr *IpsetManager) createList(listName string) error {
prometheusTimer := metrics.StartNewTimer()

if _, exists := ipsMgr.listMap[listName]; exists {
return nil
}

defer metrics.RecordIPSetExecTime(prometheusTimer) // record execution time regardless of failure

entry := &ipsEntry{
name: listName,
operationFlag: util.IpsetCreationFlag,
set: util.GetHashedName(listName),
spec: []string{util.IpsetSetListFlag},
}
log.Logf("Creating List: %+v", entry)
timer := metrics.StartNewTimer()
errCode, err := ipsMgr.run(entry)
metrics.RecordIPSetExecTime(timer) // record execution time regardless of failure
if err != nil && errCode != 1 {
metrics.SendErrorLogAndMetric(util.IpsmID, "Error: failed to create ipset list %s.", listName)
return err
Expand Down Expand Up @@ -619,9 +618,10 @@ func (ipsMgr *IpsetManager) DestroyNpmIpsets() error {
_, err := ipsMgr.run(flushEntry)
if err != nil {
metrics.SendErrorLogAndMetric(util.IpsmID, "{DestroyNpmIpsets} Error: failed to flush ipset %s", ipsetName)
} else {
metrics.RemoveAllEntriesFromIPSet(ipsetName)
}
}

for _, ipsetName := range ipsetLists {
deleteEntry := &ipsEntry{
operationFlag: util.IpsetDestroyFlag,
Expand All @@ -631,8 +631,6 @@ func (ipsMgr *IpsetManager) DestroyNpmIpsets() error {
if err != nil {
destroyFailureCount++
metrics.SendErrorLogAndMetric(util.IpsmID, "{DestroyNpmIpsets} Error: failed to destroy ipset %s", ipsetName)
} else {
metrics.RemoveAllEntriesFromIPSet(ipsetName)
}
}

Expand All @@ -644,9 +642,10 @@ func (ipsMgr *IpsetManager) DestroyNpmIpsets() error {
} else {
metrics.ResetNumIPSets()
}
// NOTE: in v2, we reset ipset entries, but in v1 we only remove entries for ipsets we delete.
// So v2 may underestimate the number of entries if there are destroy failures,
// but v1 may miss removing entries if some sets are in the prometheus metric but not in the kernel.
// NOTE: in v2, we reset metrics blindly, regardless of errors
// So v2 would underestimate the number of ipsets/entries if there are destroy failures.
// In v1 we remove entries for ipsets we flush.
// We may miss removing entries if some sets are in the prometheus metric but not in the kernel.

return nil
}
8 changes: 4 additions & 4 deletions npm/iptm/iptm.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,6 @@ func (iptMgr *IptablesManager) UninitNpmChains() error {

// Add adds a rule in iptables.
func (iptMgr *IptablesManager) Add(entry *IptEntry) error {
prometheusTimer := metrics.StartNewTimer()
defer metrics.RecordACLRuleExecTime(prometheusTimer) // record execution time regardless of failure

log.Logf("Adding iptables entry: %+v.", entry)

// Since there is a RETURN statement added to each DROP chain, we need to make sure
Expand All @@ -186,7 +183,10 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error {
} else {
iptMgr.OperationFlag = util.IptablesInsertionFlag
}
if _, err := iptMgr.run(entry); err != nil {
timer := metrics.StartNewTimer()
_, err := iptMgr.run(entry)
metrics.RecordACLRuleExecTime(timer) // record execution time regardless of failure
if err != nil {
metrics.SendErrorLogAndMetric(util.IptmID, "Error: failed to create iptables rules.")
return err
}
Expand Down
10 changes: 10 additions & 0 deletions npm/metrics/acl_rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@ func IncNumACLRules() {
numACLRules.Inc()
}

// IncNumACLRulesBy increments the number of ACL rules by the amount.
func IncNumACLRulesBy(amount int) {
numACLRules.Add(float64(amount))
}

// DecNumACLRules decrements the number of ACL rules.
func DecNumACLRules() {
numACLRules.Dec()
}

// DecNumACLRulesBy decrements the number of ACL rules by the amount.
func DecNumACLRulesBy(amount int) {
numACLRules.Add(float64(-amount))
}

// ResetNumACLRules sets the number of ACL rules to 0.
func ResetNumACLRules() {
numACLRules.Set(0)
Expand Down
13 changes: 11 additions & 2 deletions npm/metrics/acl_rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package metrics
import "testing"

var (
numRulesMetric = &basicMetric{ResetNumACLRules, IncNumACLRules, DecNumACLRules, GetNumACLRules}
ruleExecMetric = &recordingMetric{RecordACLRuleExecTime, GetACLRuleExecCount}
numRulesMetric = &basicMetric{ResetNumACLRules, IncNumACLRules, DecNumACLRules, GetNumACLRules}
numRulesAmountMetric = &amountMetric{basicMetric: numRulesMetric, incBy: IncNumACLRulesBy, decBy: DecNumACLRulesBy}
ruleExecMetric = &recordingMetric{RecordACLRuleExecTime, GetACLRuleExecCount}
)

func TestRecordACLRuleExecTime(t *testing.T) {
Expand All @@ -22,3 +23,11 @@ func TestDecNumACLRules(t *testing.T) {
func TestResetNumACLRules(t *testing.T) {
testResetMetric(t, numRulesMetric)
}

func TestIncNumACLRulesBy(t *testing.T) {
numRulesAmountMetric.testIncByMetric(t)
}

func TestDecNumACLRulesBy(t *testing.T) {
numRulesAmountMetric.testDecByMetric(t)
}
19 changes: 19 additions & 0 deletions npm/metrics/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,25 @@ type basicMetric struct {
get func() (int, error)
}

type amountMetric struct {
*basicMetric
incBy func(int)
decBy func(int)
}

func (metric *amountMetric) testIncByMetric(t *testing.T) {
metric.reset()
metric.incBy(2)
assertMetricVal(t, metric.basicMetric, 2)
}

func (metric *amountMetric) testDecByMetric(t *testing.T) {
metric.reset()
metric.incBy(5)
metric.decBy(2)
assertMetricVal(t, metric.basicMetric, 3)
}

type recordingMetric struct {
record func(timer *Timer)
getCount func() (int, error)
Expand Down
11 changes: 6 additions & 5 deletions npm/pkg/dataplane/ipsets/ipsetmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana
func (iMgr *IPSetManager) ResetIPSets() error {
iMgr.Lock()
defer iMgr.Unlock()
metrics.ResetNumIPSets()
metrics.ResetIPSetEntries()
err := iMgr.resetIPSets()
iMgr.setMap = make(map[string]*IPSet)
iMgr.clearDirtyCache()
if err != nil {
metrics.SendErrorLogAndMetric(util.IpsmID, "error: failed to reset ipsetmanager: %s", err.Error())
return fmt.Errorf("error while resetting ipsetmanager: %w", err)
}
// TODO update prometheus metrics here instead of in OS-specific functions (done in Linux right now)
// metrics.ResetNumIPSets() and metrics.ResetIPSetEntries()
return nil
}

Expand Down Expand Up @@ -384,22 +386,21 @@ func (iMgr *IPSetManager) RemoveFromList(listMetadata *IPSetMetadata, setMetadat
}

func (iMgr *IPSetManager) ApplyIPSets() error {
prometheusTimer := metrics.StartNewTimer()

iMgr.Lock()
defer iMgr.Unlock()

if len(iMgr.toAddOrUpdateCache) == 0 && len(iMgr.toDeleteCache) == 0 {
klog.Info("[IPSetManager] No IPSets to apply")
return nil
}
defer metrics.RecordIPSetExecTime(prometheusTimer) // record execution time regardless of failure

klog.Infof("[IPSetManager] toAddUpdateCache %+v \n ", iMgr.toAddOrUpdateCache)
klog.Infof("[IPSetManager] toDeleteCache %+v \n ", iMgr.toDeleteCache)
iMgr.sanitizeDirtyCache()

// Call the appropriate apply ipsets
prometheusTimer := metrics.StartNewTimer()
defer metrics.RecordIPSetExecTime(prometheusTimer) // record execution time regardless of failure
err := iMgr.applyIPSets()
if err != nil {
metrics.SendErrorLogAndMetric(util.IpsmID, "error: failed to apply ipsets: %s", err.Error())
Expand Down
17 changes: 5 additions & 12 deletions npm/pkg/dataplane/ipsets/ipsetmanager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"fmt"
"strings"

"github.com/Azure/azure-container-networking/npm/metrics"
"github.com/Azure/azure-container-networking/npm/pkg/dataplane/parse"
"github.com/Azure/azure-container-networking/npm/util"
npmerrors "github.com/Azure/azure-container-networking/npm/util/errors"
Expand Down Expand Up @@ -90,26 +89,20 @@ func (iMgr *IPSetManager) resetIPSets() error {
grepCommand := iMgr.ioShim.Exec.Command(ioutil.Grep, azureNPMPrefix)
azureIPSets, haveAzureIPSets, commandError := ioutil.PipeCommandToGrep(listCommand, grepCommand)
if commandError != nil {
return npmerrors.SimpleErrorWrapper("failed to run ipset list for resetting IPSets", commandError)
return npmerrors.SimpleErrorWrapper("failed to run ipset list for resetting IPSets (prometheus metrics may be off now)", commandError)
}
if !haveAzureIPSets {
metrics.ResetNumIPSets()
metrics.ResetIPSetEntries()
return nil
}
creator, originalNumAzureSets, destroyFailureCount := iMgr.fileCreatorForReset(azureIPSets)
restoreError := creator.RunCommandWithFile(ipsetCommand, ipsetRestoreFlag)
if restoreError != nil {
metrics.SetNumIPSets(originalNumAzureSets)
// NOTE: the num entries for sets may be incorrect if the restore fails
klog.Errorf(
"failed to restore ipsets (prometheus metrics may be off now). Had originalNumAzureSets %d and destroyFailureCount %d with err: %v",
originalNumAzureSets, destroyFailureCount, restoreError,
)
return npmerrors.SimpleErrorWrapper("failed to run ipset restore for resetting IPSets", restoreError)
}
if metrics.NumIPSetsIsPositive() {
metrics.SetNumIPSets(*destroyFailureCount)
} else {
metrics.ResetNumIPSets()
}
metrics.ResetIPSetEntries() // NOTE: the num entries for sets that fail to flush may be incorrect after this
return nil
}

Expand Down
Loading