diff --git a/npm/azure-npm.yaml b/npm/azure-npm.yaml index 547957704c..969c71acea 100644 --- a/npm/azure-npm.yaml +++ b/npm/azure-npm.yaml @@ -49,7 +49,7 @@ roleRef: name: azure-npm apiGroup: rbac.authorization.k8s.io --- -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: DaemonSet metadata: name: azure-npm @@ -67,6 +67,8 @@ spec: k8s-app: azure-npm annotations: scheduler.alpha.kubernetes.io/critical-pod: '' + prometheus.io/scrape: "true" + prometheus.io/port: "8000" spec: priorityClassName: system-node-critical tolerations: diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 86f5148387..6ef9316207 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -9,7 +9,9 @@ import ( "syscall" "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" + "github.com/prometheus/client_golang/prometheus" ) type ipsEntry struct { @@ -180,6 +182,8 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro // CreateSet creates an ipset. func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { + timer := metrics.StartNewTimer() + if _, exists := ipsMgr.setMap[setName]; exists { return nil } @@ -199,6 +203,10 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { ipsMgr.setMap[setName] = NewIpset(setName) + metrics.NumIPSets.Inc() + timer.StopAndRecord(metrics.AddIPSetExecTime) + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Set(0) + return nil } @@ -225,6 +233,9 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { delete(ipsMgr.setMap, setName) + metrics.NumIPSets.Dec() + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Set(0) + return nil } @@ -269,6 +280,8 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error { // Stores the podUid as the context for this ip. ipsMgr.setMap[setName].elements[ip] = podUid + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Inc() + return nil } @@ -310,6 +323,8 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error { // Now cleanup the cache delete(ipsMgr.setMap[setName].elements, ip) + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Dec() + if len(ipsMgr.setMap[setName].elements) == 0 { ipsMgr.DeleteSet(setName) } @@ -360,6 +375,8 @@ func (ipsMgr *IpsetManager) Destroy() error { return err } + //TODO set metrics.IPSetInventory to 0 for all set names + return nil } @@ -424,5 +441,7 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error { } cmd.Wait() + //TODO based on the set name and number of entries in the config file, update metrics.IPSetInventory + return nil } diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index fc9df6d8c8..df48a57fb2 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -6,7 +6,10 @@ import ( "os" "testing" + "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/Azure/azure-container-networking/npm/metrics/promutil" "github.com/Azure/azure-container-networking/npm/util" + "github.com/prometheus/client_golang/prometheus" ) func TestSave(t *testing.T) { @@ -127,14 +130,34 @@ func TestCreateSet(t *testing.T) { } }() - if err := ipsMgr.CreateSet("test-set", []string{util.IpsetNetHashFlag}); err != nil { + gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets) + countVal, err2 := promutil.GetCountValue(metrics.AddIPSetExecTime) + + testSet1Name := "test-set" + if err := ipsMgr.CreateSet(testSet1Name, []string{util.IpsetNetHashFlag}); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") } + testSet2Name := "test-set-with-maxelem" spec := append([]string{util.IpsetNetHashFlag, util.IpsetMaxelemName, util.IpsetMaxelemNum}) - if err := ipsMgr.CreateSet("test-set-with-maxelem", spec); err != nil { + if err := ipsMgr.CreateSet(testSet2Name, spec); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet when set maxelem") } + + newGaugeVal, err3 := promutil.GetValue(metrics.NumIPSets) + newCountVal, err4 := promutil.GetCountValue(metrics.AddIPSetExecTime) + testSet1Count, err5 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSet1Name}) + testSet2Count, err6 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSet2Name}) + promutil.NotifyIfErrors(t, err1, err2, err3, err4, err5, err6) + if newGaugeVal != gaugeVal+2 { + t.Errorf("Change in ipset number didn't register in Prometheus") + } + if newCountVal != countVal+2 { + t.Errorf("Execution time didn't register in Prometheus") + } + if testSet1Count != 0 || testSet2Count != 0 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") + } } func TestDeleteSet(t *testing.T) { @@ -149,13 +172,26 @@ func TestDeleteSet(t *testing.T) { } }() - if err := ipsMgr.CreateSet("test-set", append([]string{util.IpsetNetHashFlag})); err != nil { + testSetName := "test-set" + if err := ipsMgr.CreateSet(testSetName, append([]string{util.IpsetNetHashFlag})); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.CreateSet") } - if err := ipsMgr.DeleteSet("test-set"); err != nil { + gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets) + + if err := ipsMgr.DeleteSet(testSetName); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } + + newGaugeVal, err2 := promutil.GetValue(metrics.NumIPSets) + testSetCount, err3 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName}) + promutil.NotifyIfErrors(t, err1, err2, err3) + if newGaugeVal != gaugeVal-1 { + t.Errorf("Change in ipset number didn't register in prometheus") + } + if testSetCount != 0 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") + } } func TestAddToSet(t *testing.T) { @@ -170,13 +206,20 @@ func TestAddToSet(t *testing.T) { } }() - if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { + testSetName := "test-set" + if err := ipsMgr.AddToSet(testSetName, "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { t.Errorf("TestAddToSet failed @ ipsMgr.AddToSet") } - if err := ipsMgr.AddToSet("test-set", "1.2.3.4/nomatch", util.IpsetNetHashFlag, ""); err != nil { + if err := ipsMgr.AddToSet(testSetName, "1.2.3.4/nomatch", util.IpsetNetHashFlag, ""); err != nil { t.Errorf("TestAddToSet with nomatch failed @ ipsMgr.AddToSet") } + + testSetCount, err1 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName}) + promutil.NotifyIfErrors(t, err1) + if testSetCount != 2 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") + } } func TestAddToSetWithCachePodInfo(t *testing.T) { @@ -231,22 +274,29 @@ func TestDeleteFromSet(t *testing.T) { } }() - if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { + testSetName := "test-set" + if err := ipsMgr.AddToSet(testSetName, "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet") } - if len(ipsMgr.setMap["test-set"].elements) != 1 { + if len(ipsMgr.setMap[testSetName].elements) != 1 { t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet") } - if err := ipsMgr.DeleteFromSet("test-set", "1.2.3.4", ""); err != nil { + if err := ipsMgr.DeleteFromSet(testSetName, "1.2.3.4", ""); err != nil { t.Errorf("TestDeleteFromSet failed @ ipsMgr.DeleteFromSet") } // After deleting the only entry, "1.2.3.4" from "test-set", "test-set" ipset won't exist - if _, exists := ipsMgr.setMap["test-set"]; exists { + if _, exists := ipsMgr.setMap[testSetName]; exists { t.Errorf("TestDeleteFromSet failed @ ipsMgr.DeleteFromSet") } + + testSetCount, err1 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName}) + promutil.NotifyIfErrors(t, err1) + if testSetCount != 0 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") + } } func TestDeleteFromSetWithPodCache(t *testing.T) { @@ -373,6 +423,7 @@ func TestRun(t *testing.T) { } func TestMain(m *testing.M) { + metrics.InitializeAll() ipsMgr := NewIpsetManager() ipsMgr.Save(util.IpsetConfigFile) diff --git a/npm/iptm/iptm.go b/npm/iptm/iptm.go index d906710fb6..d5b8a0eff7 100644 --- a/npm/iptm/iptm.go +++ b/npm/iptm/iptm.go @@ -17,6 +17,7 @@ import ( "golang.org/x/sys/unix" "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" "k8s.io/apimachinery/pkg/util/wait" // utiliptables "k8s.io/kubernetes/pkg/util/iptables" @@ -298,6 +299,8 @@ func (iptMgr *IptablesManager) DeleteChain(chain string) error { // Add adds a rule in iptables. func (iptMgr *IptablesManager) Add(entry *IptEntry) error { + timer := metrics.StartNewTimer() + log.Logf("Adding iptables entry: %+v.", entry) if entry.IsJumpEntry { @@ -310,6 +313,9 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error { return err } + metrics.NumIPTableRules.Inc() + timer.StopAndRecord(metrics.AddIPTableRuleExecTime) + return nil } @@ -332,6 +338,8 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error { return err } + metrics.NumIPTableRules.Dec() + return nil } diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 04a454d6a5..dcb0c4f046 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -1,9 +1,11 @@ package iptm import ( - "testing" "os" + "testing" + "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/Azure/azure-container-networking/npm/metrics/promutil" "github.com/Azure/azure-container-networking/npm/util" ) @@ -147,9 +149,23 @@ func TestAdd(t *testing.T) { util.IptablesReject, }, } + + gaugeVal, err1 := promutil.GetValue(metrics.NumIPTableRules) + countVal, err2 := promutil.GetCountValue(metrics.AddIPTableRuleExecTime) + if err := iptMgr.Add(entry); err != nil { t.Errorf("TestAdd failed @ iptMgr.Add") } + + newGaugeVal, err3 := promutil.GetValue(metrics.NumIPTableRules) + newCountVal, err4 := promutil.GetCountValue(metrics.AddIPTableRuleExecTime) + promutil.NotifyIfErrors(t, err1, err2, err3, err4) + if newGaugeVal != gaugeVal+1 { + t.Errorf("Change in iptable rule number didn't register in prometheus") + } + if newCountVal != countVal+1 { + t.Errorf("Execution time didn't register in prometheus") + } } func TestDelete(t *testing.T) { @@ -175,9 +191,17 @@ func TestDelete(t *testing.T) { t.Errorf("TestDelete failed @ iptMgr.Add") } + gaugeVal, err1 := promutil.GetValue(metrics.NumIPTableRules) + if err := iptMgr.Delete(entry); err != nil { t.Errorf("TestDelete failed @ iptMgr.Delete") } + + newGaugeVal, err2 := promutil.GetValue(metrics.NumIPTableRules) + promutil.NotifyIfErrors(t, err1, err2) + if newGaugeVal != gaugeVal-1 { + t.Errorf("Change in iptable rule number didn't register in prometheus") + } } func TestRun(t *testing.T) { @@ -202,6 +226,7 @@ func TestRun(t *testing.T) { } func TestMain(m *testing.M) { + metrics.InitializeAll() iptMgr := NewIptablesManager() iptMgr.Save(util.IptablesConfigFile) diff --git a/npm/metrics/http.go b/npm/metrics/http.go new file mode 100644 index 0000000000..6954a25fb1 --- /dev/null +++ b/npm/metrics/http.go @@ -0,0 +1,43 @@ +package metrics + +import ( + "net/http" + "time" + + "github.com/Azure/azure-container-networking/log" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + // HTTPPort is the port used by the HTTP server (includes a preceding colon) + HTTPPort = ":8000" + + //MetricsPath is the path for the Prometheus metrics endpoint (includes preceding slash) + MetricsPath = "/metrics" +) + +var started = false +var handler http.Handler + +// StartHTTP starts a HTTP server in a Go routine with endpoint on port 8000. Metrics are exposed on the endpoint /metrics. +// By being exposed, the metrics can be scraped by a Prometheus Server or Container Insights. +// The function will pause for delayAmountAfterStart seconds after starting the HTTP server for the first time. +func StartHTTP(delayAmountAfterStart int) { + if started { + return + } + started = true + + http.Handle(MetricsPath, getHandler()) + log.Logf("Starting Prometheus HTTP Server") + go http.ListenAndServe(HTTPPort, nil) + time.Sleep(time.Second * time.Duration(delayAmountAfterStart)) +} + +// getHandler returns the HTTP handler for the metrics endpoint +func getHandler() http.Handler { + if handler == nil { + handler = promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) + } + return handler +} diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go new file mode 100644 index 0000000000..312962b51c --- /dev/null +++ b/npm/metrics/prometheus-metrics.go @@ -0,0 +1,112 @@ +package metrics + +import ( + "github.com/Azure/azure-container-networking/log" + "github.com/prometheus/client_golang/prometheus" +) + +const namespace = "npm" + +// Prometheus Metrics +// Gauge metrics have the methods Inc(), Dec(), and Set(float64) +// Summary metrics has the method Observe(float64) +// For any Vector metric, you can call With(prometheus.Labels) before the above methods +// e.g. SomeGaugeVec.With(prometheus.Labels{label1: val1, label2: val2, ...).Dec() +var ( + NumPolicies prometheus.Gauge + AddPolicyExecTime prometheus.Summary + NumIPTableRules prometheus.Gauge + AddIPTableRuleExecTime prometheus.Summary + NumIPSets prometheus.Gauge + AddIPSetExecTime prometheus.Summary + IPSetInventory *prometheus.GaugeVec +) + +// Constants for metric names and descriptions as well as exported labels for Vector metrics +const ( + numPoliciesName = "num_policies" + numPoliciesHelp = "The number of current network policies for this node" + + addPolicyExecTimeName = "add_policy_exec_time" + addPolicyExecTimeHelp = "Execution time in milliseconds for adding a network policy" + + numIPTableRulesName = "num_iptables_rules" + numIPTableRulesHelp = "The number of current IPTable rules for this node" + + addIPTableRuleExecTimeName = "add_iptables_rule_exec_time" + addIPTableRuleExecTimeHelp = "Execution time in milliseconds for adding an IPTable rule to a chain" + + numIPSetsName = "num_ipsets" + numIPSetsHelp = "The number of current IP sets for this node" + + addIPSetExecTimeName = "add_ipset_exec_time" + addIPSetExecTimeHelp = "Execution time in milliseconds for creating an IP set" + + ipsetInventoryName = "ipset_counts" + ipsetInventoryHelp = "Number of entries in each individual IPSet" + SetNameLabel = "set_name" +) + +var registry = prometheus.NewRegistry() +var haveInitialized = false + +// InitializeAll creates all the Prometheus Metrics. The metrics will be nil before this method is called. +func InitializeAll() { + if !haveInitialized { + NumPolicies = createGauge(numPoliciesName, numPoliciesHelp) + AddPolicyExecTime = createSummary(addPolicyExecTimeName, addPolicyExecTimeHelp) + NumIPTableRules = createGauge(numIPTableRulesName, numIPTableRulesHelp) + AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeName, addIPTableRuleExecTimeHelp) + NumIPSets = createGauge(numIPSetsName, numIPSetsHelp) + AddIPSetExecTime = createSummary(addIPSetExecTimeName, addIPSetExecTimeHelp) + IPSetInventory = createGaugeVec(ipsetInventoryName, ipsetInventoryHelp, SetNameLabel) + log.Logf("Finished initializing all Prometheus metrics") + haveInitialized = true + } +} + +func register(collector prometheus.Collector, name string) { + err := registry.Register(collector) + if err != nil { + log.Errorf("Error creating metric %s", name) + } +} + +func createGauge(name string, helpMessage string) prometheus.Gauge { + gauge := prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: name, + Help: helpMessage, + }, + ) + register(gauge, name) + return gauge +} + +func createGaugeVec(name string, helpMessage string, labels ...string) *prometheus.GaugeVec { + gaugeVec := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: name, + Help: helpMessage, + }, + labels, + ) + register(gaugeVec, name) + return gaugeVec +} + +func createSummary(name string, helpMessage string) prometheus.Summary { + summary := prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: name, + Help: helpMessage, + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + // quantiles e.g. the "0.5 quantile" will actually be the phi quantile for some phi in [0.5 - 0.05, 0.5 + 0.05] + }, + ) + register(summary, name) + return summary +} diff --git a/npm/metrics/promutil/test-util.go b/npm/metrics/promutil/test-util.go new file mode 100644 index 0000000000..61feb2eec8 --- /dev/null +++ b/npm/metrics/promutil/test-util.go @@ -0,0 +1,58 @@ +package promutil + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +// NotifyIfErrors writes any non-nil errors to a testing utility +func NotifyIfErrors(t *testing.T, errors ...error) { + allGood := true + for _, err := range errors { + if err != nil { + allGood = false + break + } + } + if !allGood { + t.Errorf("Encountered these errors while getting metric values: ") + for _, err := range errors { + if err != nil { + t.Errorf("%v", err) + } + } + } +} + +// GetValue is used for validation. It returns a Gauge metric's value. +func GetValue(gaugeMetric prometheus.Gauge) (int, error) { + dtoMetric, err := getDTOMetric(gaugeMetric) + if err != nil { + return 0, err + } + return int(dtoMetric.Gauge.GetValue()), nil +} + +// GetVecValue is used for validation. It returns a Gauge Vec metric's value. +func GetVecValue(gaugeVecMetric *prometheus.GaugeVec, labels prometheus.Labels) (int, error) { + return GetValue(gaugeVecMetric.With(labels)) +} + +// GetCountValue is used for validation. It returns the number of times a Summary metric has recorded an observation. +func GetCountValue(summaryMetric prometheus.Summary) (int, error) { + dtoMetric, err := getDTOMetric(summaryMetric) + if err != nil { + return 0, err + } + return int(dtoMetric.Summary.GetSampleCount()), nil +} + +func getDTOMetric(collector prometheus.Collector) (*dto.Metric, error) { + channel := make(chan prometheus.Metric, 1) + collector.Collect(channel) + metric := &dto.Metric{} + err := (<-channel).Write(metric) + return metric, err +} diff --git a/npm/metrics/timer.go b/npm/metrics/timer.go new file mode 100644 index 0000000000..b88e3b01f9 --- /dev/null +++ b/npm/metrics/timer.go @@ -0,0 +1,35 @@ +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +// Timer is a one-time-use tool for recording time between a start and end point +type Timer struct { + before int64 + after int64 +} + +// StartNewTimer creates a new Timer +func StartNewTimer() *Timer { + return &Timer{time.Now().UnixNano(), 0} +} + +// StopAndRecord ends a timer and records its delta in a summary +func (timer *Timer) StopAndRecord(observer prometheus.Summary) { + observer.Observe(timer.timeElapsed()) +} + +func (timer *Timer) stop() { + timer.after = time.Now().UnixNano() +} + +func (timer *Timer) timeElapsed() float64 { + if timer.after == 0 { + timer.stop() + } + millisecondDifference := float64(timer.after-timer.before) / 1000000.0 + return millisecondDifference +} diff --git a/npm/namespace_test.go b/npm/namespace_test.go index 5d0922f186..4b947f7768 100644 --- a/npm/namespace_test.go +++ b/npm/namespace_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/util" @@ -178,6 +179,7 @@ func TestDeleteNamespace(t *testing.T) { } func TestMain(m *testing.M) { + metrics.InitializeAll() iptMgr := iptm.NewIptablesManager() iptMgr.Save(util.IptablesConfigFile) diff --git a/npm/nwpolicy.go b/npm/nwpolicy.go index a15b199883..3c9a73421c 100644 --- a/npm/nwpolicy.go +++ b/npm/nwpolicy.go @@ -8,6 +8,7 @@ import ( "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" networkingv1 "k8s.io/api/networking/v1" ) @@ -35,6 +36,7 @@ func (npMgr *NetworkPolicyManager) AddNetworkPolicy(npObj *networkingv1.NetworkP npNs = "ns-" + npObj.ObjectMeta.Namespace npName = npObj.ObjectMeta.Name allNs = npMgr.nsMap[util.KubeAllNamespacesFlag] + timer = metrics.StartNewTimer() ) log.Logf("NETWORK POLICY CREATING: NameSpace%s, Name:%s", npNs, npName) @@ -127,6 +129,9 @@ func (npMgr *NetworkPolicyManager) AddNetworkPolicy(npObj *networkingv1.NetworkP } } + metrics.NumPolicies.Inc() + timer.StopAndRecord(metrics.AddPolicyExecTime) + return nil } @@ -196,6 +201,8 @@ func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(npObj *networkingv1.Netwo } } + metrics.NumPolicies.Dec() + return nil } diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index afb0432d99..a1ad3f7216 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -7,6 +7,8 @@ import ( "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/Azure/azure-container-networking/npm/metrics/promutil" "github.com/Azure/azure-container-networking/npm/util" corev1 "k8s.io/api/core/v1" @@ -98,6 +100,9 @@ func TestAddNetworkPolicy(t *testing.T) { }, } + gaugeVal, err1 := promutil.GetValue(metrics.NumPolicies) + countVal, err2 := promutil.GetCountValue(metrics.AddPolicyExecTime) + npMgr.Lock() if err := npMgr.AddNetworkPolicy(allowIngress); err != nil { t.Errorf("TestAddNetworkPolicy failed @ allowIngress AddNetworkPolicy") @@ -144,6 +149,16 @@ func TestAddNetworkPolicy(t *testing.T) { t.Errorf("Error: %v", err) } npMgr.Unlock() + + newGaugeVal, err3 := promutil.GetValue(metrics.NumPolicies) + newCountVal, err4 := promutil.GetCountValue(metrics.AddPolicyExecTime) + promutil.NotifyIfErrors(t, err1, err2, err3, err4) + if newGaugeVal != gaugeVal+2 { + t.Errorf("Change in policy number didn't register in prometheus") + } + if newCountVal != countVal+2 { + t.Errorf("Execution time didn't register in prometheus") + } } func TestUpdateNetworkPolicy(t *testing.T) { @@ -340,8 +355,16 @@ func TestDeleteNetworkPolicy(t *testing.T) { t.Errorf("TestAddNetworkPolicy failed @ AddNetworkPolicy") } + gaugeVal, err1 := promutil.GetValue(metrics.NumPolicies) + if err := npMgr.DeleteNetworkPolicy(allow); err != nil { t.Errorf("TestDeleteNetworkPolicy failed @ DeleteNetworkPolicy") } npMgr.Unlock() + + newGaugeVal, err2 := promutil.GetValue(metrics.NumPolicies) + promutil.NotifyIfErrors(t, err1, err2) + if newGaugeVal != gaugeVal-1 { + t.Errorf("Change in policy number didn't register in prometheus") + } } diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 6b915e3f7c..468ce77b65 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -7,6 +7,7 @@ import ( "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm" + "github.com/Azure/azure-container-networking/npm/metrics" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/informers" @@ -43,6 +44,8 @@ func main() { panic(err.Error()) } + metrics.InitializeAll() + // Creates the in-cluster config config, err := rest.InClusterConfig() if err != nil { @@ -67,5 +70,7 @@ func main() { panic(err.Error) } + metrics.StartHTTP(0) + select {} }