From ae95accf579e6c91aac864b09eda0579d21b9879 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 16 Jun 2020 15:59:56 -0400 Subject: [PATCH 01/53] prometheus additions to testmain (commented out right now) --- npm/iptm/iptm_test.go | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 04a454d6a5..c73e1cfaba 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -1,10 +1,12 @@ package iptm import ( - "testing" "os" + "testing" "github.com/Azure/azure-container-networking/npm/util" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" ) func TestSave(t *testing.T) { @@ -201,6 +203,15 @@ func TestRun(t *testing.T) { } } +var ( + opsProcessed = promauto.NewCounter(prometheus.CounterOpts{ + Name: "myapp_processed_ops_total", + Help: "The total number of processed events", + }) + + someGauge = prometheus.NewGauge +) + func TestMain(m *testing.M) { iptMgr := NewIptablesManager() iptMgr.Save(util.IptablesConfigFile) @@ -209,5 +220,19 @@ func TestMain(m *testing.M) { iptMgr.Restore(util.IptablesConfigFile) + // fmt.Printf("exit code: %d", exitCode) + + // go func() { + // for { + // opsProcessed.Inc() + // time.Sleep(2 * time.Second) + // } + // }() + + // http.Handle("/metrics", promhttp.Handler()) + // http.ListenAndServe(":8081", nil) + + // time.Sleep(10 * time.Second) + os.Exit(exitCode) } From 75056992fe6bd2b7390eafeae6e586f0dc53fa94 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 16:30:42 -0400 Subject: [PATCH 02/53] home of the npm prometheus metrics and tools for updating them, testing them --- npm/metrics/http.go | 65 +++++++++++++++++++ npm/metrics/prometheus-metrics.go | 104 ++++++++++++++++++++++++++++++ npm/metrics/test-tools.go | 56 ++++++++++++++++ npm/metrics/timer.go | 36 +++++++++++ 4 files changed, 261 insertions(+) create mode 100644 npm/metrics/http.go create mode 100644 npm/metrics/prometheus-metrics.go create mode 100644 npm/metrics/test-tools.go create mode 100644 npm/metrics/timer.go diff --git a/npm/metrics/http.go b/npm/metrics/http.go new file mode 100644 index 0000000000..dbaf135c78 --- /dev/null +++ b/npm/metrics/http.go @@ -0,0 +1,65 @@ +package metrics + +import ( + "fmt" + "io/ioutil" + "net/http" + "regexp" + "strconv" + "time" +) + +var started = false + +func StartHTTP(asGoRoutine bool) { + if started { + return + } + started = true + + http.Handle("/metrics", GetHandler()) + http.HandleFunc("/hi", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Hi!\n") + }) + if asGoRoutine { + go http.ListenAndServe(":8000", nil) + } else { + http.ListenAndServe(":8000", nil) + } +} + +func getMetricsText() (string, error) { + response, err := http.Get("http://localhost:8000/metrics") + if err != nil { + return "", err + } + defer response.Body.Close() + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return "", err + } + return string(body), nil +} + +func GetValue(metricName string) (int, error) { + if !started { + StartHTTP(true) + time.Sleep(2 * time.Second) + } + regex := regexp.MustCompile(metricName + " [0-9]+") + if regex == nil { + return 0, fmt.Errorf("Couldn't compile regular expression for metric: " + metricName) + } + text, err := getMetricsText() + if err != nil { + return 0, err + } + locations := regex.FindStringIndex(text) + if locations == nil { + return 0, fmt.Errorf("Couldn't find a match for metric: " + metricName) + } + start := locations[0] + end := locations[1] + value := text[start+len(metricName)+1 : end] + return strconv.Atoi(value) +} diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go new file mode 100644 index 0000000000..bb375a2d8c --- /dev/null +++ b/npm/metrics/prometheus-metrics.go @@ -0,0 +1,104 @@ +package metrics + +import ( + "fmt" + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +// var networkingRegistry *prometheus.Registery +// var hostName = os.Getenv("HOSTNAME") + +const tempHelp = "temporary help description" //TODO unique for each metric +const namespace = "npm" + +// TODO add quantiles for summaries? remove quantiles? + +var ( + NumPolicies = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_policies", + // Help: tempHelp, + }, + //[]string{"node"}, + // include labels in a slice like above if a vector + ) + + AddPolicyExecTime = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "add_policy_exec_time", + // Help: tempHelp, + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, // TODO remove? + }, + ) + + NumIpTableRules = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_iptables_rules", + // Help: tempHelp, + }, + ) + + AddIpTableRuleExecTime = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "add_iptables_rule_exec_time", + // Help: tempHelp, + }, + ) + + NumIpSets = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_ipsets", + // Help: tempHelp, + }, + ) + + AddIpSetExecTime = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "add_ipset_exec_time", + // Help: tempHelp, + }, + ) +) + +var allMetrics = []prometheus.Collector{NumPolicies, AddPolicyExecTime, NumIpTableRules, AddIpTableRuleExecTime, NumIpSets, AddIpSetExecTime} +var handler http.Handler + +func init() { + // networkingRegistry = prometheus.NewRegistry() + for _, metric := range allMetrics { + err := prometheus.DefaultRegisterer.Register(metric) + if err != nil { + fmt.Printf("While registering a certain prometheus metric, an error occurred: %s", err) + } + } +} + +func GetHandler() http.Handler { + if handler == nil { + handler = promhttp.Handler() + // handler = promhttp.HandlerFor(networkingRegistry, promhttp.HandlerOpts{}) // promhttp.Handler() + } + return handler +} + +func Observe(summary prometheus.Summary, value float64) { + summary.Observe(value) + // if changed to a vector, use summary.WithLabelValues(hostName).Observe(value) +} + +func Inc(gauge prometheus.Gauge) { + gauge.Inc() +} + +func Dec(gauge prometheus.Gauge) { + gauge.Dec() +} diff --git a/npm/metrics/test-tools.go b/npm/metrics/test-tools.go new file mode 100644 index 0000000000..057e454e70 --- /dev/null +++ b/npm/metrics/test-tools.go @@ -0,0 +1,56 @@ +package metrics + +import ( + "fmt" + "testing" + "time" +) + +// DIDN'T WORK +func GaugeIncTest(t *testing.T, metricName string, action func()) { + if !started { + StartHTTP(true) + time.Sleep(2 * time.Second) + } + val, err := GetValue(metricName) + action() + + if err != nil { + t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) + return + } + + newVal, err := GetValue(metricName) + fmt.Println(val) + fmt.Println(newVal) + if err != nil { + t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) + } + if newVal != val+1 { + t.Errorf("Metric adjustment didn't register in prometheus for metric: " + metricName) + } +} + +func GaugeDecTest(t *testing.T, metricName string, action func()) { + if !started { + StartHTTP(true) + time.Sleep(2 * time.Second) + } + val, err := GetValue(metricName) + action() + + if err != nil { + t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) + return + } + + newVal, err := GetValue(metricName) + fmt.Println(val) + fmt.Println(newVal) + if err != nil { + t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) + } + if newVal != val-1 { + t.Errorf("Metric adjustment didn't register in prometheus for metric: " + metricName) + } +} diff --git a/npm/metrics/timer.go b/npm/metrics/timer.go new file mode 100644 index 0000000000..2ed5f3c8c9 --- /dev/null +++ b/npm/metrics/timer.go @@ -0,0 +1,36 @@ +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +// Timer is a one-time-use tool for recording time between a start and end point +type Timer struct { + before int64 + after int64 +} + +// StartNewTimer creates a new Timer +func StartNewTimer() *Timer { + return &Timer{time.Now().Unix(), 0} +} + +// StopAndRecord ends a timer and records its delta in an observer +func (timer *Timer) StopAndRecord(observer prometheus.Summary) { + Observe(observer, timer.timeElapsed()) +} + +func (timer *Timer) stop() { + timer.after = time.Now().Unix() +} + +func (timer *Timer) timeElapsed() float64 { + if timer.after == 0 { + timer.stop() + } + millisecondDifference := (timer.after - timer.before) / 1000000 + secondDifference := float64(millisecondDifference) / 1000.0 + return secondDifference +} From e57526ebf442580017e8b97acc04c45339ae75aa Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 16:32:32 -0400 Subject: [PATCH 03/53] add/remove policy metrics --- npm/nwpolicy.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/npm/nwpolicy.go b/npm/nwpolicy.go index bf12fc273f..cf310caeb1 100644 --- a/npm/nwpolicy.go +++ b/npm/nwpolicy.go @@ -5,6 +5,7 @@ package npm import ( "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" networkingv1 "k8s.io/api/networking/v1" ) @@ -32,6 +33,7 @@ func (npMgr *NetworkPolicyManager) AddNetworkPolicy(npObj *networkingv1.NetworkP npNs = "ns-" + npObj.ObjectMeta.Namespace npName = npObj.ObjectMeta.Name allNs = npMgr.nsMap[util.KubeAllNamespacesFlag] + timer = metrics.StartNewTimer() ) log.Printf("NETWORK POLICY CREATING: %v", npObj) @@ -121,6 +123,9 @@ func (npMgr *NetworkPolicyManager) AddNetworkPolicy(npObj *networkingv1.NetworkP } } + metrics.Inc(metrics.NumPolicies) + timer.StopAndRecord(metrics.AddPolicyExecTime) + return nil } @@ -140,6 +145,7 @@ func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(npObj *networkingv1.Netwo err error ns *namespace allNs = npMgr.nsMap[util.KubeAllNamespacesFlag] + // timer = metrics.StartNewTimer() ) npNs, npName := "ns-"+npObj.ObjectMeta.Namespace, npObj.ObjectMeta.Name @@ -187,5 +193,8 @@ func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(npObj *networkingv1.Netwo } } + metrics.Dec(metrics.NumPolicies) + // timer.StopAndRecord(metrics.RemovePolicyExecTime) + return nil } From c9e352948cde2ffc3ee3281042b78b87c7e42748 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 16:35:47 -0400 Subject: [PATCH 04/53] add/remove iptables rule metric measurements --- npm/iptm/iptm.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/npm/iptm/iptm.go b/npm/iptm/iptm.go index 7ce0f2557a..c93b7b9e25 100644 --- a/npm/iptm/iptm.go +++ b/npm/iptm/iptm.go @@ -17,6 +17,7 @@ import ( "golang.org/x/sys/unix" "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" "k8s.io/apimachinery/pkg/util/wait" // utiliptables "k8s.io/kubernetes/pkg/util/iptables" @@ -298,6 +299,8 @@ func (iptMgr *IptablesManager) DeleteChain(chain string) error { // Add adds a rule in iptables. func (iptMgr *IptablesManager) Add(entry *IptEntry) error { + timer := metrics.StartNewTimer() + log.Printf("Adding iptables entry: %+v.", entry) if entry.IsJumpEntry { @@ -310,6 +313,9 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error { return err } + metrics.Inc(metrics.NumIpTableRules) + timer.StopAndRecord(metrics.AddIpTableRuleExecTime) + return nil } @@ -332,6 +338,8 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error { return err } + metrics.Dec(metrics.NumIpTableRules) + return nil } From aa8fc127f45eb679ae9a04f523d2ddc159d9a83d Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 16:36:27 -0400 Subject: [PATCH 05/53] add/remove ipset metric measurements --- npm/ipsm/ipsm.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 0eaf8c5042..18662c70b9 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -9,6 +9,7 @@ import ( "syscall" "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" ) @@ -182,6 +183,8 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro // CreateSet creates an ipset. func (ipsMgr *IpsetManager) CreateSet(setName, spec string) error { + timer := metrics.StartNewTimer() + if _, exists := ipsMgr.setMap[setName]; exists { return nil } @@ -201,6 +204,9 @@ func (ipsMgr *IpsetManager) CreateSet(setName, spec string) error { ipsMgr.setMap[setName] = NewIpset(setName) + metrics.Inc(metrics.NumIpSets) + timer.StopAndRecord(metrics.AddIpSetExecTime) + return nil } @@ -231,6 +237,8 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { delete(ipsMgr.setMap, setName) + metrics.Dec(metrics.NumIpSets) + return nil } From 44e500b1e3858c3bc5b4c3ab4ebc0615df98215f Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 16:37:37 -0400 Subject: [PATCH 06/53] testing for gauges. want to soon remove the boolean for including prometheus in unit testing --- npm/ipsm/ipsm_test.go | 56 ++++++++++++++++++++ npm/iptm/iptm_test.go | 120 ++++++++++++++++++++++++++++++++++-------- npm/nwpolicy_test.go | 50 ++++++++++++++++++ 3 files changed, 204 insertions(+), 22 deletions(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index c90fe37d72..c4cde26137 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -6,9 +6,17 @@ import ( "os" "testing" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" ) +const testPrometheusToo = true +const prometheusErrorMessage = "You can turn off Prometheus testing by flipping the boolean constant testPrometheusToo." + +func printPrometheusError(t *testing.T, message string) { + t.Errorf(message + ". " + prometheusErrorMessage) +} + func TestSave(t *testing.T) { ipsMgr := NewIpsetManager() if err := ipsMgr.Save(util.IpsetTestConfigFile); err != nil { @@ -127,9 +135,31 @@ func TestCreateSet(t *testing.T) { } }() + var ( + val = 0 + newVal = 0 + err error + ) + if testPrometheusToo { + val, err = metrics.GetValue("num_ipsets") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + } + if err := ipsMgr.CreateSet("test-set", util.IpsetNetHashFlag); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") } + + if testPrometheusToo { + newVal, err = metrics.GetValue("num_ipsets") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + if newVal != val+1 { + printPrometheusError(t, "Create ipset didn't register in prometheus") + } + } } func TestDeleteSet(t *testing.T) { @@ -151,6 +181,32 @@ func TestDeleteSet(t *testing.T) { if err := ipsMgr.DeleteSet("test-set"); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } + + var ( + val = 0 + newVal = 0 + err error + ) + if testPrometheusToo { + val, err = metrics.GetValue("num_ipsets") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + } + + if err := ipsMgr.CreateSet("test-set", util.IpsetNetHashFlag); err != nil { + t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") + } + + if testPrometheusToo { + newVal, err = metrics.GetValue("num_ipsets") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + if newVal != val-1 { + printPrometheusError(t, "Delete ipset didn't register in prometheus") + } + } } func TestAddToSet(t *testing.T) { diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index c73e1cfaba..ea44f76606 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -3,12 +3,20 @@ package iptm import ( "os" "testing" + "time" + + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" ) +const testPrometheusToo = true +const prometheusErrorMessage = "You can turn off Prometheus testing by flipping the boolean constant testPrometheusToo." + +func printPrometheusError(t *testing.T, message string) { + t.Errorf(message + ". " + prometheusErrorMessage) +} + func TestSave(t *testing.T) { iptMgr := &IptablesManager{} if err := iptMgr.Save(util.IptablesTestConfigFile); err != nil { @@ -149,9 +157,32 @@ func TestAdd(t *testing.T) { util.IptablesReject, }, } + + var ( + val = 0 + newVal = 0 + err error + ) + if testPrometheusToo { + val, err = metrics.GetValue("num_iptables_rules") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + } + if err := iptMgr.Add(entry); err != nil { t.Errorf("TestAdd failed @ iptMgr.Add") } + + if testPrometheusToo { + newVal, err = metrics.GetValue("num_iptables_rules") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + if newVal != val+1 { + printPrometheusError(t, "Add iptable rule didn't register in prometheus") + } + } } func TestDelete(t *testing.T) { @@ -177,9 +208,31 @@ func TestDelete(t *testing.T) { t.Errorf("TestDelete failed @ iptMgr.Add") } + var ( + val = 0 + newVal = 0 + err error + ) + if testPrometheusToo { + val, err = metrics.GetValue("num_iptables_rules") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + } + if err := iptMgr.Delete(entry); err != nil { t.Errorf("TestDelete failed @ iptMgr.Delete") } + + if testPrometheusToo { + newVal, err = metrics.GetValue("num_iptables_rules") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + if newVal != val-1 { + printPrometheusError(t, "Delete iptable rule didn't register in prometheus") + } + } } func TestRun(t *testing.T) { @@ -203,15 +256,6 @@ func TestRun(t *testing.T) { } } -var ( - opsProcessed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "myapp_processed_ops_total", - Help: "The total number of processed events", - }) - - someGauge = prometheus.NewGauge -) - func TestMain(m *testing.M) { iptMgr := NewIptablesManager() iptMgr.Save(util.IptablesConfigFile) @@ -220,19 +264,51 @@ func TestMain(m *testing.M) { iptMgr.Restore(util.IptablesConfigFile) - // fmt.Printf("exit code: %d", exitCode) + // messWithMetrics() - // go func() { - // for { - // opsProcessed.Inc() - // time.Sleep(2 * time.Second) - // } - // }() + os.Exit(exitCode) +} - // http.Handle("/metrics", promhttp.Handler()) - // http.ListenAndServe(":8081", nil) +func messWithMetrics() { + go func() { + for { + metrics.Inc(metrics.NumPolicies) + time.Sleep(2 * time.Second) + } + }() - // time.Sleep(10 * time.Second) + go func() { + for k := 0; k < 25; k++ { + for j := 0; j < 2*k; j++ { + metrics.Inc(metrics.NumIpSets) + } + time.Sleep(2 * time.Second) + } + }() - os.Exit(exitCode) + go func() { + for j := 0; j < 500; j += 2 { + for k := 0; k < 2; k++ { + metrics.Observe(metrics.AddPolicyExecTime, float64(2*k*j)) + time.Sleep(time.Second * time.Duration((k+1)/2)) + } + for k := 0; k < 3; k++ { + metrics.Observe(metrics.AddPolicyExecTime, float64(-k+j)) + time.Sleep(time.Second * time.Duration(k/3)) + } + } + }() + + go func() { + for { + for k := 0; k < 2; k++ { + metrics.Observe(metrics.AddIpSetExecTime, float64(2*k)) + time.Sleep(time.Second * time.Duration((k+1)/2)) + } + for k := 0; k < 3; k++ { + metrics.Observe(metrics.AddIpSetExecTime, float64(-k)) + time.Sleep(time.Second * time.Duration(k+1)) + } + } + }() } diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index 81a70f06bb..e5b0ab2356 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -7,6 +7,7 @@ import ( "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" corev1 "k8s.io/api/core/v1" @@ -15,6 +16,13 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) +const testPrometheusToo = true +const prometheusErrorMessage = "You can turn off Prometheus testing by flipping the boolean constant testPrometheusToo." + +func printPrometheusError(t *testing.T, message string) { + t.Errorf(message + ". " + prometheusErrorMessage) +} + func TestAddNetworkPolicy(t *testing.T) { npMgr := &NetworkPolicyManager{ nsMap: make(map[string]*namespace), @@ -91,6 +99,17 @@ func TestAddNetworkPolicy(t *testing.T) { }, } + var ( + val = 0 + newVal = 0 + ) + if testPrometheusToo { + val, err = metrics.GetValue("num_policies") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + } + npMgr.Lock() if err := npMgr.AddNetworkPolicy(allowIngress); err != nil { t.Errorf("TestAddNetworkPolicy failed @ allowIngress AddNetworkPolicy") @@ -126,6 +145,16 @@ func TestAddNetworkPolicy(t *testing.T) { t.Errorf("Error: %v", err) } npMgr.Unlock() + + if testPrometheusToo { + newVal, err = metrics.GetValue("num_policies") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + if newVal != val+2 { + printPrometheusError(t, "Add newtork policy didn't register in prometheus") + } + } } func TestUpdateNetworkPolicy(t *testing.T) { @@ -322,8 +351,29 @@ func TestDeleteNetworkPolicy(t *testing.T) { t.Errorf("TestAddNetworkPolicy failed @ AddNetworkPolicy") } + var ( + val = 0 + newVal = 0 + ) + if testPrometheusToo { + val, err = metrics.GetValue("num_policies") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + } + if err := npMgr.DeleteNetworkPolicy(allow); err != nil { t.Errorf("TestDeleteNetworkPolicy failed @ DeleteNetworkPolicy") } npMgr.Unlock() + + if testPrometheusToo { + newVal, err = metrics.GetValue("num_policies") + if err != nil { + printPrometheusError(t, "Problem getting http metrics") + } + if newVal != val-1 { + printPrometheusError(t, "Delete network policy didn't register in prometheus") + } + } } From ccd3762acd53a7ea9a8363a868a12a4608029e08 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 16:38:23 -0400 Subject: [PATCH 07/53] run http server that exposes prometheus from main --- npm/plugin/main.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 6b915e3f7c..fb087b5547 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -7,6 +7,7 @@ import ( "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm" + "github.com/Azure/azure-container-networking/npm/metrics" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/informers" @@ -67,5 +68,7 @@ func main() { panic(err.Error) } + metrics.StartHTTP(false) + select {} } From 8ed5dd8e97cbb68233a313ea4578f47df10f944e Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 16:53:48 -0400 Subject: [PATCH 08/53] cleaner test additions with less code --- npm/ipsm/ipsm_test.go | 61 +++++++++++++------------------------------ npm/iptm/iptm_test.go | 61 +++++++++++++------------------------------ npm/nwpolicy_test.go | 59 +++++++++++++---------------------------- 3 files changed, 54 insertions(+), 127 deletions(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index c4cde26137..9c4e915d36 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -10,13 +10,6 @@ import ( "github.com/Azure/azure-container-networking/npm/util" ) -const testPrometheusToo = true -const prometheusErrorMessage = "You can turn off Prometheus testing by flipping the boolean constant testPrometheusToo." - -func printPrometheusError(t *testing.T, message string) { - t.Errorf(message + ". " + prometheusErrorMessage) -} - func TestSave(t *testing.T) { ipsMgr := NewIpsetManager() if err := ipsMgr.Save(util.IpsetTestConfigFile); err != nil { @@ -135,30 +128,21 @@ func TestCreateSet(t *testing.T) { } }() - var ( - val = 0 - newVal = 0 - err error - ) - if testPrometheusToo { - val, err = metrics.GetValue("num_ipsets") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } + val, err := metrics.GetValue("num_ipsets") + if err != nil { + t.Errorf("%v", err) } if err := ipsMgr.CreateSet("test-set", util.IpsetNetHashFlag); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") } - if testPrometheusToo { - newVal, err = metrics.GetValue("num_ipsets") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } - if newVal != val+1 { - printPrometheusError(t, "Create ipset didn't register in prometheus") - } + newVal, err := metrics.GetValue("num_ipsets") + if err != nil { + t.Errorf("%v", err) + } + if newVal != val+1 { + t.Errorf("Create ipset didn't register in prometheus") } } @@ -182,30 +166,21 @@ func TestDeleteSet(t *testing.T) { t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } - var ( - val = 0 - newVal = 0 - err error - ) - if testPrometheusToo { - val, err = metrics.GetValue("num_ipsets") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } + val, err := metrics.GetValue("num_ipsets") + if err != nil { + t.Errorf("%v", err) } if err := ipsMgr.CreateSet("test-set", util.IpsetNetHashFlag); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") } - if testPrometheusToo { - newVal, err = metrics.GetValue("num_ipsets") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } - if newVal != val-1 { - printPrometheusError(t, "Delete ipset didn't register in prometheus") - } + newVal, err := metrics.GetValue("num_ipsets") + if err != nil { + t.Errorf("%v", err) + } + if newVal != val-1 { + t.Errorf("Delete ipset didn't register in prometheus") } } diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index ea44f76606..911c666d27 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -10,13 +10,6 @@ import ( "github.com/Azure/azure-container-networking/npm/util" ) -const testPrometheusToo = true -const prometheusErrorMessage = "You can turn off Prometheus testing by flipping the boolean constant testPrometheusToo." - -func printPrometheusError(t *testing.T, message string) { - t.Errorf(message + ". " + prometheusErrorMessage) -} - func TestSave(t *testing.T) { iptMgr := &IptablesManager{} if err := iptMgr.Save(util.IptablesTestConfigFile); err != nil { @@ -158,30 +151,21 @@ func TestAdd(t *testing.T) { }, } - var ( - val = 0 - newVal = 0 - err error - ) - if testPrometheusToo { - val, err = metrics.GetValue("num_iptables_rules") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } + val, err := metrics.GetValue("num_iptables_rules") + if err != nil { + t.Errorf("%v", err) } if err := iptMgr.Add(entry); err != nil { t.Errorf("TestAdd failed @ iptMgr.Add") } - if testPrometheusToo { - newVal, err = metrics.GetValue("num_iptables_rules") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } - if newVal != val+1 { - printPrometheusError(t, "Add iptable rule didn't register in prometheus") - } + newVal, err := metrics.GetValue("num_iptables_rules") + if err != nil { + t.Errorf("%v", err) + } + if newVal != val+1 { + t.Errorf("Add iptable rule didn't register in prometheus") } } @@ -208,30 +192,21 @@ func TestDelete(t *testing.T) { t.Errorf("TestDelete failed @ iptMgr.Add") } - var ( - val = 0 - newVal = 0 - err error - ) - if testPrometheusToo { - val, err = metrics.GetValue("num_iptables_rules") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } + val, err := metrics.GetValue("num_iptables_rules") + if err != nil { + t.Errorf("%v", err) } if err := iptMgr.Delete(entry); err != nil { t.Errorf("TestDelete failed @ iptMgr.Delete") } - if testPrometheusToo { - newVal, err = metrics.GetValue("num_iptables_rules") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } - if newVal != val-1 { - printPrometheusError(t, "Delete iptable rule didn't register in prometheus") - } + newVal, err := metrics.GetValue("num_iptables_rules") + if err != nil { + t.Errorf("%v", err) + } + if newVal != val-1 { + t.Errorf("Delete iptable rule didn't register in prometheus") } } diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index e5b0ab2356..3c01f6fae3 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -16,13 +16,6 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) -const testPrometheusToo = true -const prometheusErrorMessage = "You can turn off Prometheus testing by flipping the boolean constant testPrometheusToo." - -func printPrometheusError(t *testing.T, message string) { - t.Errorf(message + ". " + prometheusErrorMessage) -} - func TestAddNetworkPolicy(t *testing.T) { npMgr := &NetworkPolicyManager{ nsMap: make(map[string]*namespace), @@ -99,15 +92,9 @@ func TestAddNetworkPolicy(t *testing.T) { }, } - var ( - val = 0 - newVal = 0 - ) - if testPrometheusToo { - val, err = metrics.GetValue("num_policies") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } + val, err := metrics.GetValue("num_policies") + if err != nil { + t.Errorf("%v", err) } npMgr.Lock() @@ -146,14 +133,12 @@ func TestAddNetworkPolicy(t *testing.T) { } npMgr.Unlock() - if testPrometheusToo { - newVal, err = metrics.GetValue("num_policies") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } - if newVal != val+2 { - printPrometheusError(t, "Add newtork policy didn't register in prometheus") - } + newVal, err := metrics.GetValue("num_policies") + if err != nil { + t.Errorf("%v", err) + } + if newVal != val+2 { + t.Errorf("Add newtork policy didn't register in prometheus") } } @@ -351,15 +336,9 @@ func TestDeleteNetworkPolicy(t *testing.T) { t.Errorf("TestAddNetworkPolicy failed @ AddNetworkPolicy") } - var ( - val = 0 - newVal = 0 - ) - if testPrometheusToo { - val, err = metrics.GetValue("num_policies") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } + val, err := metrics.GetValue("num_policies") + if err != nil { + t.Errorf("%v", err) } if err := npMgr.DeleteNetworkPolicy(allow); err != nil { @@ -367,13 +346,11 @@ func TestDeleteNetworkPolicy(t *testing.T) { } npMgr.Unlock() - if testPrometheusToo { - newVal, err = metrics.GetValue("num_policies") - if err != nil { - printPrometheusError(t, "Problem getting http metrics") - } - if newVal != val-1 { - printPrometheusError(t, "Delete network policy didn't register in prometheus") - } + newVal, err := metrics.GetValue("num_policies") + if err != nil { + t.Errorf("%v", err) + } + if newVal != val-1 { + t.Errorf("Delete network policy didn't register in prometheus") } } From ef5f168f573514261df1e82c65d1facde97f0b85 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 18:35:58 -0400 Subject: [PATCH 09/53] removed incorrect instance of AddSet in the TestDeleteSet test --- npm/ipsm/ipsm_test.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 9c4e915d36..4a5f77fc2a 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -162,17 +162,13 @@ func TestDeleteSet(t *testing.T) { t.Errorf("TestDeleteSet failed @ ipsMgr.CreateSet") } - if err := ipsMgr.DeleteSet("test-set"); err != nil { - t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") - } - val, err := metrics.GetValue("num_ipsets") if err != nil { t.Errorf("%v", err) } - if err := ipsMgr.CreateSet("test-set", util.IpsetNetHashFlag); err != nil { - t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") + if err := ipsMgr.DeleteSet("test-set"); err != nil { + t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } newVal, err := metrics.GetValue("num_ipsets") From 8decd79c51793bf6815e721c28b00905b60bbeaf Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 18:49:51 -0400 Subject: [PATCH 10/53] added prometheus annotations to pod templates --- npm/azure-npm.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/npm/azure-npm.yaml b/npm/azure-npm.yaml index 6a313a0721..5a307f2fb9 100644 --- a/npm/azure-npm.yaml +++ b/npm/azure-npm.yaml @@ -49,7 +49,7 @@ roleRef: name: azure-npm apiGroup: rbac.authorization.k8s.io --- -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: DaemonSet metadata: name: azure-npm @@ -67,6 +67,8 @@ spec: k8s-app: azure-npm annotations: scheduler.alpha.kubernetes.io/critical-pod: '' + prometheus.io/scrape: "true" + prometheus.io/port: "8000" spec: priorityClassName: system-node-critical tolerations: From 1d9e319791d7f9d99979f541b9bd47f76e0c85c8 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 20:14:06 -0400 Subject: [PATCH 11/53] deleted unused file --- npm/metrics/test-tools.go | 56 --------------------------------------- 1 file changed, 56 deletions(-) delete mode 100644 npm/metrics/test-tools.go diff --git a/npm/metrics/test-tools.go b/npm/metrics/test-tools.go deleted file mode 100644 index 057e454e70..0000000000 --- a/npm/metrics/test-tools.go +++ /dev/null @@ -1,56 +0,0 @@ -package metrics - -import ( - "fmt" - "testing" - "time" -) - -// DIDN'T WORK -func GaugeIncTest(t *testing.T, metricName string, action func()) { - if !started { - StartHTTP(true) - time.Sleep(2 * time.Second) - } - val, err := GetValue(metricName) - action() - - if err != nil { - t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) - return - } - - newVal, err := GetValue(metricName) - fmt.Println(val) - fmt.Println(newVal) - if err != nil { - t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) - } - if newVal != val+1 { - t.Errorf("Metric adjustment didn't register in prometheus for metric: " + metricName) - } -} - -func GaugeDecTest(t *testing.T, metricName string, action func()) { - if !started { - StartHTTP(true) - time.Sleep(2 * time.Second) - } - val, err := GetValue(metricName) - action() - - if err != nil { - t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) - return - } - - newVal, err := GetValue(metricName) - fmt.Println(val) - fmt.Println(newVal) - if err != nil { - t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) - } - if newVal != val-1 { - t.Errorf("Metric adjustment didn't register in prometheus for metric: " + metricName) - } -} From 19c3e93d20ca4685ddcdf865a2a24016a3349c1e Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 21:30:53 -0400 Subject: [PATCH 12/53] much more organized initialization of metrics now. now includes map from metric to metric name --- npm/metrics/prometheus-metrics.go | 82 ++++++++++++++----------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index bb375a2d8c..7052ed98c6 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -11,70 +11,62 @@ import ( // var networkingRegistry *prometheus.Registery // var hostName = os.Getenv("HOSTNAME") -const tempHelp = "temporary help description" //TODO unique for each metric const namespace = "npm" -// TODO add quantiles for summaries? remove quantiles? - var ( - NumPolicies = prometheus.NewGauge( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_policies", - // Help: tempHelp, - }, - //[]string{"node"}, - // include labels in a slice like above if a vector - ) + NumPolicies = createGauge(numPoliciesLabel, "The number of current network policies for this node") + AddPolicyExecTime = createSummary(addPolicyExecTimeLabel, "Execution time for adding a network policy") + NumIPTableRules = createGauge(numPoliciesLabel, "The number of current IPTable rules for this node") + AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, "Execution time for adding an IPTable rule to a chain") + NumIPSets = createGauge(numIPSetsLabel, "The number of current IP sets for this node") + AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, "Execution time for creating an IP set") +) - AddPolicyExecTime = prometheus.NewSummary( - prometheus.SummaryOpts{ - Namespace: namespace, - Name: "add_policy_exec_time", - // Help: tempHelp, - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, // TODO remove? - }, - ) +const ( + numPoliciesLabel = "num_policies" + addPolicyExecTimeLabel = "add_policy_exec_time" + numIPTableRules = "num_iptables_rules" + addIPTableRuleExecTimeLabel = "add_iptables_rule_exec_time" + numIPSetsLabel = "num_ipsets" + addIPSetExecTimeLabel = "add_ipset_exec_time" +) - NumIpTableRules = prometheus.NewGauge( - prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_iptables_rules", - // Help: tempHelp, - }, - ) +var allMetrics = map[prometheus.Collector]string{ + NumPolicies: numPoliciesLabel, + AddPolicyExecTime: addPolicyExecTimeLabel, + NumIPTableRules: numIPTableRules, + AddIPTableRuleExecTime: addIPTableRuleExecTimeLabel, + NumIPSets: numIPSetsLabel, + AddIPSetExecTime: addIPSetExecTimeLabel, +} - AddIpTableRuleExecTime = prometheus.NewSummary( - prometheus.SummaryOpts{ - Namespace: namespace, - Name: "add_iptables_rule_exec_time", - // Help: tempHelp, - }, - ) +var handler http.Handler - NumIpSets = prometheus.NewGauge( +func createGauge(name string, helpMessage string) prometheus.Gauge { + return prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: namespace, - Name: "num_ipsets", - // Help: tempHelp, + Name: name, + Help: helpMessage, }, + //[]string{"node"}, // include labels in a slice like this if creating Vectors ) +} - AddIpSetExecTime = prometheus.NewSummary( +func createSummary(name string, helpMessage string) prometheus.Summary { + return prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, - Name: "add_ipset_exec_time", - // Help: tempHelp, + Name: name, + Help: helpMessage, + // Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, // TODO add quantiles?? }, ) -) - -var allMetrics = []prometheus.Collector{NumPolicies, AddPolicyExecTime, NumIpTableRules, AddIpTableRuleExecTime, NumIpSets, AddIpSetExecTime} -var handler http.Handler +} func init() { // networkingRegistry = prometheus.NewRegistry() - for _, metric := range allMetrics { + for metric := range allMetrics { err := prometheus.DefaultRegisterer.Register(metric) if err != nil { fmt.Printf("While registering a certain prometheus metric, an error occurred: %s", err) From 75d5772367fd2ecb90bd9847b18f429fe76fdb2c Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 21:32:38 -0400 Subject: [PATCH 13/53] add ability to get summary count value. now getting gauge values and this new count value are done by passing the metric itself as a param instead of a string --- npm/metrics/http.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index dbaf135c78..d8a3daf411 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -7,6 +7,8 @@ import ( "regexp" "strconv" "time" + + "github.com/prometheus/client_golang/prometheus" ) var started = false @@ -41,7 +43,15 @@ func getMetricsText() (string, error) { return string(body), nil } -func GetValue(metricName string) (int, error) { +func GetValue(gaugeMetric prometheus.Collector) (int, error) { + return getMetricValue(allMetrics[gaugeMetric]) +} + +func GetCountValue(summaryMetric prometheus.Collector) (int, error) { + return getMetricValue(allMetrics[summaryMetric] + "_count") +} + +func getMetricValue(metricName string) (int, error) { if !started { StartHTTP(true) time.Sleep(2 * time.Second) From 3a081b19e307f4b3502d23309b086abe6e3cb8cc Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 21:33:57 -0400 Subject: [PATCH 14/53] condenses prometheus testing code base by condensing all prometheus error messages into a function --- npm/metrics/test-util.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 npm/metrics/test-util.go diff --git a/npm/metrics/test-util.go b/npm/metrics/test-util.go new file mode 100644 index 0000000000..c114fbc0bc --- /dev/null +++ b/npm/metrics/test-util.go @@ -0,0 +1,21 @@ +package metrics + +import "testing" + +func NotifyIfErrors(t *testing.T, errors ...error) { + allGood := true + for _, err := range errors { + if err != nil { + allGood = false + break + } + } + if !allGood { + t.Errorf("Encountered these errors while getting metric values: ") + for _, err := range errors { + if err != nil { + t.Errorf("%v", err) + } + } + } +} From d41fe1092be8c6d3b8408506db3db3dbfc7d0868 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 21:35:12 -0400 Subject: [PATCH 15/53] added testing for summary counts, condensed prometheus error handling code, and updated calls to use new form for getting metric values --- npm/ipsm/ipsm_test.go | 33 ++++++++++++++------------------- npm/iptm/iptm_test.go | 33 ++++++++++++++------------------- npm/nwpolicy_test.go | 33 ++++++++++++++------------------- 3 files changed, 42 insertions(+), 57 deletions(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 8d3e176bb1..46e3fd550a 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -128,10 +128,8 @@ func TestCreateSet(t *testing.T) { } }() - val, err := metrics.GetValue("num_ipsets") - if err != nil { - t.Errorf("%v", err) - } + gaugeVal, err1 := metrics.GetValue(metrics.NumIPSets) + countVal, err2 := metrics.GetCountValue(metrics.AddIPSetExecTime) if err := ipsMgr.CreateSet("test-set", []string{util.IpsetNetHashFlag}); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") @@ -142,12 +140,14 @@ func TestCreateSet(t *testing.T) { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet when set maxelem") } - newVal, err := metrics.GetValue("num_ipsets") - if err != nil { - t.Errorf("%v", err) + newGaugeVal, err3 := metrics.GetValue(metrics.NumIPSets) + newCountVal, err4 := metrics.GetCountValue(metrics.AddIPSetExecTime) + metrics.NotifyIfErrors(t, []error{err1, err2, err3, err4}) + if newGaugeVal != gaugeVal+2 { + t.Errorf("Change in ipset number didn't register in prometheus") } - if newVal != val+2 { - t.Errorf("Create ipset didn't register in prometheus") + if newCountVal != countVal+1 { + t.Errorf("Execution time didn't register in prometheus") } } @@ -167,21 +167,16 @@ func TestDeleteSet(t *testing.T) { t.Errorf("TestDeleteSet failed @ ipsMgr.CreateSet") } - val, err := metrics.GetValue("num_ipsets") - if err != nil { - t.Errorf("%v", err) - } + gaugeVal, err1 := metrics.GetValue("num_ipsets") if err := ipsMgr.DeleteSet("test-set"); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } - newVal, err := metrics.GetValue("num_ipsets") - if err != nil { - t.Errorf("%v", err) - } - if newVal != val-1 { - t.Errorf("Delete ipset didn't register in prometheus") + newGaugeVal, err2 := metrics.GetValue("num_ipsets") + metrics.NotifyIfErrors(t, []error{err1, err2}) + if newGaugeVal != gaugeVal-1 { + t.Errorf("Change in ipset number didn't register in prometheus") } } diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 911c666d27..7f5ff21116 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -151,21 +151,21 @@ func TestAdd(t *testing.T) { }, } - val, err := metrics.GetValue("num_iptables_rules") - if err != nil { - t.Errorf("%v", err) - } + gaugeVal, err1 := metrics.GetValue(metrics.NumIPTableRules) + countVal, err2 := metrics.GetCountValue(metrics.AddIPTableRuleExecTime) if err := iptMgr.Add(entry); err != nil { t.Errorf("TestAdd failed @ iptMgr.Add") } - newVal, err := metrics.GetValue("num_iptables_rules") - if err != nil { - t.Errorf("%v", err) + newGaugeVal, err2 := metrics.GetValue(metrics.NumIPTableRules) + newCountVal, err4 := metrics.GetCountValue(metrics.AddIPTableRuleExecTime) + metrics.NotifyIfErrors(t, []error{err1, err2, err3, err4}) + if newGaugeVal != gaugeVal+1 { + t.Errorf("Change in iptable rule number didn't register in prometheus") } - if newVal != val+1 { - t.Errorf("Add iptable rule didn't register in prometheus") + if newCountVal != countVal+1 { + t.Errorf("Execution time didn't register in prometheus") } } @@ -192,21 +192,16 @@ func TestDelete(t *testing.T) { t.Errorf("TestDelete failed @ iptMgr.Add") } - val, err := metrics.GetValue("num_iptables_rules") - if err != nil { - t.Errorf("%v", err) - } + gaugeVal, err1 := metrics.GetValue(metrics.NumIPTableRules) if err := iptMgr.Delete(entry); err != nil { t.Errorf("TestDelete failed @ iptMgr.Delete") } - newVal, err := metrics.GetValue("num_iptables_rules") - if err != nil { - t.Errorf("%v", err) - } - if newVal != val-1 { - t.Errorf("Delete iptable rule didn't register in prometheus") + newGaugeVal, err2 := metrics.GetValue(metrics.NumIPTableRules) + metrics.NotifyIfErrors(t, []error{err1, err2}) + if newGaugeVal != gaugeVal-1 { + t.Errorf("Change in iptable rule number didn't register in prometheus") } } diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index 88a2d42eee..fab4bcb343 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -92,10 +92,8 @@ func TestAddNetworkPolicy(t *testing.T) { }, } - val, err := metrics.GetValue("num_policies") - if err != nil { - t.Errorf("%v", err) - } + gaugeVal, err1 := metrics.GetValue(metrics.NumPolicies) + countVal, err2 := metrics.GetCountValue(metrics.AddPolicyExecTime) npMgr.Lock() if err := npMgr.AddNetworkPolicy(allowIngress); err != nil { @@ -133,12 +131,14 @@ func TestAddNetworkPolicy(t *testing.T) { } npMgr.Unlock() - newVal, err := metrics.GetValue("num_policies") - if err != nil { - t.Errorf("%v", err) + newGaugeVal, err3 := metrics.GetValue(metrics.NumPolicies) + newCountVal, err4 := metrics.GetCountValue(metrics.AddPolicyExecTime) + metrics.NotifyIfErrors(t, []error{err1, err2, err3, err4}) + if newGaugeVal != gaugeVal+2 { + t.Errorf("Change in policy number didn't register in prometheus") } - if newVal != val+2 { - t.Errorf("Add newtork policy didn't register in prometheus") + if newCountVal != countVal+1 { + t.Errorf("Execution time didn't register in prometheus") } } @@ -336,21 +336,16 @@ func TestDeleteNetworkPolicy(t *testing.T) { t.Errorf("TestAddNetworkPolicy failed @ AddNetworkPolicy") } - val, err := metrics.GetValue("num_policies") - if err != nil { - t.Errorf("%v", err) - } + gaugeVal, err1 := metrics.GetValue(metrics.NumPolicies) if err := npMgr.DeleteNetworkPolicy(allow); err != nil { t.Errorf("TestDeleteNetworkPolicy failed @ DeleteNetworkPolicy") } npMgr.Unlock() - newVal, err := metrics.GetValue("num_policies") - if err != nil { - t.Errorf("%v", err) - } - if newVal != val-1 { - t.Errorf("Delete network policy didn't register in prometheus") + newGaugeVal, err2 := metrics.GetValue(metrics.NumPolicies) + metrics.NotifyIfErrors(t, []error{err1, err2}) + if newGaugeVal != gaugeVal-1 { + t.Errorf("Change in policy number didn't register in prometheus") } } From 9fb3bfb58cdc340e17c1d130315b4516b57564f8 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 21:41:47 -0400 Subject: [PATCH 16/53] update based on variable spelling change in metrics package --- npm/ipsm/ipsm.go | 6 +++--- npm/iptm/iptm.go | 6 +++--- npm/iptm/iptm_test.go | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index e623e8fbc8..5f35eacf80 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -204,8 +204,8 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { ipsMgr.setMap[setName] = NewIpset(setName) - metrics.Inc(metrics.NumIpSets) - timer.StopAndRecord(metrics.AddIpSetExecTime) + metrics.Inc(metrics.NumIPSets) + timer.StopAndRecord(metrics.AddIPSetExecTime) return nil } @@ -233,7 +233,7 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { delete(ipsMgr.setMap, setName) - metrics.Dec(metrics.NumIpSets) + metrics.Dec(metrics.NumIPSets) return nil } diff --git a/npm/iptm/iptm.go b/npm/iptm/iptm.go index c93b7b9e25..2f021cdcf1 100644 --- a/npm/iptm/iptm.go +++ b/npm/iptm/iptm.go @@ -313,8 +313,8 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error { return err } - metrics.Inc(metrics.NumIpTableRules) - timer.StopAndRecord(metrics.AddIpTableRuleExecTime) + metrics.Inc(metrics.NumIPTableRules) + timer.StopAndRecord(metrics.AddIPTableRuleExecTime) return nil } @@ -338,7 +338,7 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error { return err } - metrics.Dec(metrics.NumIpTableRules) + metrics.Dec(metrics.NumIPTableRules) return nil } diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 7f5ff21116..22c21f640e 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -250,7 +250,7 @@ func messWithMetrics() { go func() { for k := 0; k < 25; k++ { for j := 0; j < 2*k; j++ { - metrics.Inc(metrics.NumIpSets) + metrics.Inc(metrics.NumIPSets) } time.Sleep(2 * time.Second) } @@ -272,11 +272,11 @@ func messWithMetrics() { go func() { for { for k := 0; k < 2; k++ { - metrics.Observe(metrics.AddIpSetExecTime, float64(2*k)) + metrics.Observe(metrics.AddIPSetExecTime, float64(2*k)) time.Sleep(time.Second * time.Duration((k+1)/2)) } for k := 0; k < 3; k++ { - metrics.Observe(metrics.AddIpSetExecTime, float64(-k)) + metrics.Observe(metrics.AddIPSetExecTime, float64(-k)) time.Sleep(time.Second * time.Duration(k+1)) } } From 67675df935a9f80d49390d9b6dcc23da6d83f9da Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 21:50:17 -0400 Subject: [PATCH 17/53] Added comments for functions and moved http handler code to the http file --- npm/metrics/http.go | 27 ++++++++++++++++++++------- npm/metrics/prometheus-metrics.go | 12 +++--------- npm/metrics/test-util.go | 1 + npm/metrics/timer.go | 2 +- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index d8a3daf411..930232857b 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -9,29 +9,40 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" ) var started = false +const httpPort = ":8000" + +// StartHTTP starts a HTTP endpoint on port 8000. Metrics are exposed on the endpoint /metrics. +// Set asGoRoutine to true if you want to be able to effectively run other code after calling this. func StartHTTP(asGoRoutine bool) { if started { return } started = true - http.Handle("/metrics", GetHandler()) - http.HandleFunc("/hi", func(w http.ResponseWriter, r *http.Request) { - fmt.Fprintf(w, "Hi!\n") - }) + http.Handle("/metrics", getHandler()) if asGoRoutine { - go http.ListenAndServe(":8000", nil) + go http.ListenAndServe(httpPort, nil) } else { - http.ListenAndServe(":8000", nil) + http.ListenAndServe(httpPort, nil) + } +} + +// getHandler returns the HTTP handler for the metrics endpoint +func getHandler() http.Handler { + if handler == nil { + handler = promhttp.Handler() + // handler = promhttp.HandlerFor(networkingRegistry, promhttp.HandlerOpts{}) // promhttp.Handler() } + return handler } func getMetricsText() (string, error) { - response, err := http.Get("http://localhost:8000/metrics") + response, err := http.Get("http://localhost" + httpPort + "/metrics") if err != nil { return "", err } @@ -43,10 +54,12 @@ func getMetricsText() (string, error) { return string(body), nil } +// GetValue returns a gaugeMetric's value as shown in the HTML Prometheus endpoint. func GetValue(gaugeMetric prometheus.Collector) (int, error) { return getMetricValue(allMetrics[gaugeMetric]) } +// GetCountValue returns the number of times a summaryMetric has recorded an observation as shown in the HTML Prometheus endpoint. func GetCountValue(summaryMetric prometheus.Collector) (int, error) { return getMetricValue(allMetrics[summaryMetric] + "_count") } diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 7052ed98c6..4f36b79e97 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -5,7 +5,6 @@ import ( "net/http" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" ) // var networkingRegistry *prometheus.Registery @@ -74,23 +73,18 @@ func init() { } } -func GetHandler() http.Handler { - if handler == nil { - handler = promhttp.Handler() - // handler = promhttp.HandlerFor(networkingRegistry, promhttp.HandlerOpts{}) // promhttp.Handler() - } - return handler -} - +// Observe records a value in the given summary func Observe(summary prometheus.Summary, value float64) { summary.Observe(value) // if changed to a vector, use summary.WithLabelValues(hostName).Observe(value) } +// Inc increases a gauge by 1 func Inc(gauge prometheus.Gauge) { gauge.Inc() } +// Dec decreases a gauge by 1 func Dec(gauge prometheus.Gauge) { gauge.Dec() } diff --git a/npm/metrics/test-util.go b/npm/metrics/test-util.go index c114fbc0bc..95a4573876 100644 --- a/npm/metrics/test-util.go +++ b/npm/metrics/test-util.go @@ -2,6 +2,7 @@ package metrics import "testing" +// NotifyIfErrors writes any non-nil errors to the testing utility func NotifyIfErrors(t *testing.T, errors ...error) { allGood := true for _, err := range errors { diff --git a/npm/metrics/timer.go b/npm/metrics/timer.go index 2ed5f3c8c9..4d676443d2 100644 --- a/npm/metrics/timer.go +++ b/npm/metrics/timer.go @@ -17,7 +17,7 @@ func StartNewTimer() *Timer { return &Timer{time.Now().Unix(), 0} } -// StopAndRecord ends a timer and records its delta in an observer +// StopAndRecord ends a timer and records its delta in a summary func (timer *Timer) StopAndRecord(observer prometheus.Summary) { Observe(observer, timer.timeElapsed()) } From 605567a10b49940937d8b3a6da568c3f976c213f Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 21:57:56 -0400 Subject: [PATCH 18/53] fixed problem of registering same metric name for different metrics, and passing in the wrong param type for testing --- npm/ipsm/ipsm_test.go | 4 ++-- npm/iptm/iptm_test.go | 4 ++-- npm/metrics/prometheus-metrics.go | 2 +- npm/nwpolicy_test.go | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 46e3fd550a..b5196bc74c 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -142,7 +142,7 @@ func TestCreateSet(t *testing.T) { newGaugeVal, err3 := metrics.GetValue(metrics.NumIPSets) newCountVal, err4 := metrics.GetCountValue(metrics.AddIPSetExecTime) - metrics.NotifyIfErrors(t, []error{err1, err2, err3, err4}) + metrics.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+2 { t.Errorf("Change in ipset number didn't register in prometheus") } @@ -174,7 +174,7 @@ func TestDeleteSet(t *testing.T) { } newGaugeVal, err2 := metrics.GetValue("num_ipsets") - metrics.NotifyIfErrors(t, []error{err1, err2}) + metrics.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in ipset number didn't register in prometheus") } diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 22c21f640e..cf841ea373 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -160,7 +160,7 @@ func TestAdd(t *testing.T) { newGaugeVal, err2 := metrics.GetValue(metrics.NumIPTableRules) newCountVal, err4 := metrics.GetCountValue(metrics.AddIPTableRuleExecTime) - metrics.NotifyIfErrors(t, []error{err1, err2, err3, err4}) + metrics.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+1 { t.Errorf("Change in iptable rule number didn't register in prometheus") } @@ -199,7 +199,7 @@ func TestDelete(t *testing.T) { } newGaugeVal, err2 := metrics.GetValue(metrics.NumIPTableRules) - metrics.NotifyIfErrors(t, []error{err1, err2}) + metrics.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in iptable rule number didn't register in prometheus") } diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 4f36b79e97..cc88dacf54 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -15,7 +15,7 @@ const namespace = "npm" var ( NumPolicies = createGauge(numPoliciesLabel, "The number of current network policies for this node") AddPolicyExecTime = createSummary(addPolicyExecTimeLabel, "Execution time for adding a network policy") - NumIPTableRules = createGauge(numPoliciesLabel, "The number of current IPTable rules for this node") + NumIPTableRules = createGauge(numIPTableRules, "The number of current IPTable rules for this node") AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, "Execution time for adding an IPTable rule to a chain") NumIPSets = createGauge(numIPSetsLabel, "The number of current IP sets for this node") AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, "Execution time for creating an IP set") diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index fab4bcb343..d4ae3fe0e2 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -133,7 +133,7 @@ func TestAddNetworkPolicy(t *testing.T) { newGaugeVal, err3 := metrics.GetValue(metrics.NumPolicies) newCountVal, err4 := metrics.GetCountValue(metrics.AddPolicyExecTime) - metrics.NotifyIfErrors(t, []error{err1, err2, err3, err4}) + metrics.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+2 { t.Errorf("Change in policy number didn't register in prometheus") } @@ -344,7 +344,7 @@ func TestDeleteNetworkPolicy(t *testing.T) { npMgr.Unlock() newGaugeVal, err2 := metrics.GetValue(metrics.NumPolicies) - metrics.NotifyIfErrors(t, []error{err1, err2}) + metrics.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in policy number didn't register in prometheus") } From b0d1f944d64cea68386c7e4854082a4db67af51e Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 22:04:21 -0400 Subject: [PATCH 19/53] made prometheus testing folder with interactive testing file. moved old random metric flux testing function over from ipsm_test --- npm/iptm/iptm_test.go | 47 --------------------- npm/metrics/testing/main/test-main.go | 58 ++++++++++++++++++++++++++ npm/metrics/testing/old-test-tools.go | 30 +++++++++++++ npm/metrics/{ => testing}/test-util.go | 0 4 files changed, 88 insertions(+), 47 deletions(-) create mode 100644 npm/metrics/testing/main/test-main.go create mode 100644 npm/metrics/testing/old-test-tools.go rename npm/metrics/{ => testing}/test-util.go (100%) diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index cf841ea373..751531d355 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -3,7 +3,6 @@ package iptm import ( "os" "testing" - "time" "github.com/Azure/azure-container-networking/npm/metrics" @@ -234,51 +233,5 @@ func TestMain(m *testing.M) { iptMgr.Restore(util.IptablesConfigFile) - // messWithMetrics() - os.Exit(exitCode) } - -func messWithMetrics() { - go func() { - for { - metrics.Inc(metrics.NumPolicies) - time.Sleep(2 * time.Second) - } - }() - - go func() { - for k := 0; k < 25; k++ { - for j := 0; j < 2*k; j++ { - metrics.Inc(metrics.NumIPSets) - } - time.Sleep(2 * time.Second) - } - }() - - go func() { - for j := 0; j < 500; j += 2 { - for k := 0; k < 2; k++ { - metrics.Observe(metrics.AddPolicyExecTime, float64(2*k*j)) - time.Sleep(time.Second * time.Duration((k+1)/2)) - } - for k := 0; k < 3; k++ { - metrics.Observe(metrics.AddPolicyExecTime, float64(-k+j)) - time.Sleep(time.Second * time.Duration(k/3)) - } - } - }() - - go func() { - for { - for k := 0; k < 2; k++ { - metrics.Observe(metrics.AddIPSetExecTime, float64(2*k)) - time.Sleep(time.Second * time.Duration((k+1)/2)) - } - for k := 0; k < 3; k++ { - metrics.Observe(metrics.AddIPSetExecTime, float64(-k)) - time.Sleep(time.Second * time.Duration(k+1)) - } - } - }() -} diff --git a/npm/metrics/testing/main/test-main.go b/npm/metrics/testing/main/test-main.go new file mode 100644 index 0000000000..ac8ac0d98f --- /dev/null +++ b/npm/metrics/testing/main/test-main.go @@ -0,0 +1,58 @@ +package main + +import ( + "time" + + "github.com/Azure/azure-container-networking/npm/metrics" +) + +// file for testing metrics visually + +func main() { + messWithMetrics() + metrics.StartHTTP(false) +} + +func messWithMetrics() { + go func() { + for { + metrics.Inc(metrics.NumPolicies) + time.Sleep(2 * time.Second) + } + }() + + go func() { + for k := 0; k < 25; k++ { + for j := 0; j < 2*k; j++ { + metrics.Inc(metrics.NumIPSets) + } + time.Sleep(2 * time.Second) + } + }() + + go func() { + for j := 0; j < 500; j += 2 { + for k := 0; k < 2; k++ { + metrics.Observe(metrics.AddPolicyExecTime, float64(2*k*j)) + time.Sleep(time.Second * time.Duration((k+1)/2)) + } + for k := 0; k < 3; k++ { + metrics.Observe(metrics.AddPolicyExecTime, float64(-k+j)) + time.Sleep(time.Second * time.Duration(k/3)) + } + } + }() + + go func() { + for { + for k := 0; k < 2; k++ { + metrics.Observe(metrics.AddIPSetExecTime, float64(2*k)) + time.Sleep(time.Second * time.Duration((k+1)/2)) + } + for k := 0; k < 3; k++ { + metrics.Observe(metrics.AddIPSetExecTime, float64(-k)) + time.Sleep(time.Second * time.Duration(k+1)) + } + } + }() +} diff --git a/npm/metrics/testing/old-test-tools.go b/npm/metrics/testing/old-test-tools.go new file mode 100644 index 0000000000..0e81c70812 --- /dev/null +++ b/npm/metrics/testing/old-test-tools.go @@ -0,0 +1,30 @@ +package metrics + +import ( + "testing" +) + +// DIDN'T WORK +func GaugeIncTest(t *testing.T, metricName string, action func()) { + // if !started { + // StartHTTP(true) + // time.Sleep(2 * time.Second) + // } + // val, err := GetValue(metricName) + // action() + + // if err != nil { + // t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) + // return + // } + + // newVal, err := GetValue(metricName) + // fmt.Println(val) + // fmt.Println(newVal) + // if err != nil { + // t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) + // } + // if newVal != val+1 { + // t.Errorf("Metric adjustment didn't register in prometheus for metric: " + metricName) + // } +} diff --git a/npm/metrics/test-util.go b/npm/metrics/testing/test-util.go similarity index 100% rename from npm/metrics/test-util.go rename to npm/metrics/testing/test-util.go From 680cc88ec2726e4e851d041516fcde0f2cec3f4b Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 22:11:34 -0400 Subject: [PATCH 20/53] moved testing around again --- npm/metrics/{testing => }/test-util.go | 0 npm/metrics/testing/old-test-tools.go | 30 --------------------- npm/metrics/testing/{main => }/test-main.go | 2 ++ 3 files changed, 2 insertions(+), 30 deletions(-) rename npm/metrics/{testing => }/test-util.go (100%) delete mode 100644 npm/metrics/testing/old-test-tools.go rename npm/metrics/testing/{main => }/test-main.go (93%) diff --git a/npm/metrics/testing/test-util.go b/npm/metrics/test-util.go similarity index 100% rename from npm/metrics/testing/test-util.go rename to npm/metrics/test-util.go diff --git a/npm/metrics/testing/old-test-tools.go b/npm/metrics/testing/old-test-tools.go deleted file mode 100644 index 0e81c70812..0000000000 --- a/npm/metrics/testing/old-test-tools.go +++ /dev/null @@ -1,30 +0,0 @@ -package metrics - -import ( - "testing" -) - -// DIDN'T WORK -func GaugeIncTest(t *testing.T, metricName string, action func()) { - // if !started { - // StartHTTP(true) - // time.Sleep(2 * time.Second) - // } - // val, err := GetValue(metricName) - // action() - - // if err != nil { - // t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) - // return - // } - - // newVal, err := GetValue(metricName) - // fmt.Println(val) - // fmt.Println(newVal) - // if err != nil { - // t.Errorf("Problem getting http prometheus metrics for metric: " + metricName) - // } - // if newVal != val+1 { - // t.Errorf("Metric adjustment didn't register in prometheus for metric: " + metricName) - // } -} diff --git a/npm/metrics/testing/main/test-main.go b/npm/metrics/testing/test-main.go similarity index 93% rename from npm/metrics/testing/main/test-main.go rename to npm/metrics/testing/test-main.go index ac8ac0d98f..098fddd1e2 100644 --- a/npm/metrics/testing/main/test-main.go +++ b/npm/metrics/testing/test-main.go @@ -7,6 +7,8 @@ import ( ) // file for testing metrics visually +// view metrics in terminal with command: +// wget -qO- localhost:8000/metrics func main() { messWithMetrics() From c2cbc616efe25ef2600f114de2ba0ba05fbb36b1 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 22:13:43 -0400 Subject: [PATCH 21/53] fixed spelling mistake --- npm/iptm/iptm_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 751531d355..64fffd1f0b 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -157,7 +157,7 @@ func TestAdd(t *testing.T) { t.Errorf("TestAdd failed @ iptMgr.Add") } - newGaugeVal, err2 := metrics.GetValue(metrics.NumIPTableRules) + newGaugeVal, err3 := metrics.GetValue(metrics.NumIPTableRules) newCountVal, err4 := metrics.GetCountValue(metrics.AddIPTableRuleExecTime) metrics.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+1 { From 59c56a731daeb4f568a4a5934ec4ceebca841d45 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 24 Jun 2020 22:15:37 -0400 Subject: [PATCH 22/53] counting mistake in unit test --- npm/nwpolicy_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index d4ae3fe0e2..2313d5ed14 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -137,7 +137,7 @@ func TestAddNetworkPolicy(t *testing.T) { if newGaugeVal != gaugeVal+2 { t.Errorf("Change in policy number didn't register in prometheus") } - if newCountVal != countVal+1 { + if newCountVal != countVal+2 { t.Errorf("Execution time didn't register in prometheus") } } From 00a19504396372a6a9658ccf77042d9948da549c Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 25 Jun 2020 12:10:53 -0400 Subject: [PATCH 23/53] handler variable ws in wrong file. Changed stdout printing to logging --- npm/metrics/http.go | 2 ++ npm/metrics/prometheus-metrics.go | 7 ++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index 930232857b..aae6d7ffc2 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -16,6 +16,8 @@ var started = false const httpPort = ":8000" +var handler http.Handler + // StartHTTP starts a HTTP endpoint on port 8000. Metrics are exposed on the endpoint /metrics. // Set asGoRoutine to true if you want to be able to effectively run other code after calling this. func StartHTTP(asGoRoutine bool) { diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index cc88dacf54..4b56e349e9 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -1,8 +1,7 @@ package metrics import ( - "fmt" - "net/http" + "github.com/Azure/azure-container-networking/log" "github.com/prometheus/client_golang/prometheus" ) @@ -39,8 +38,6 @@ var allMetrics = map[prometheus.Collector]string{ AddIPSetExecTime: addIPSetExecTimeLabel, } -var handler http.Handler - func createGauge(name string, helpMessage string) prometheus.Gauge { return prometheus.NewGauge( prometheus.GaugeOpts{ @@ -68,7 +65,7 @@ func init() { for metric := range allMetrics { err := prometheus.DefaultRegisterer.Register(metric) if err != nil { - fmt.Printf("While registering a certain prometheus metric, an error occurred: %s", err) + log.Printf("While registering a certain prometheus metric, an error occurred: %s", err) } } } From 6bb6bb0fda44a36a82a6625c401cb88facc316e1 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 25 Jun 2020 15:29:23 -0400 Subject: [PATCH 24/53] fixed parameter errors and counting error in a test --- npm/ipsm/ipsm_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index b5196bc74c..6d3b50d763 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -146,7 +146,7 @@ func TestCreateSet(t *testing.T) { if newGaugeVal != gaugeVal+2 { t.Errorf("Change in ipset number didn't register in prometheus") } - if newCountVal != countVal+1 { + if newCountVal != countVal+2 { t.Errorf("Execution time didn't register in prometheus") } } @@ -167,13 +167,13 @@ func TestDeleteSet(t *testing.T) { t.Errorf("TestDeleteSet failed @ ipsMgr.CreateSet") } - gaugeVal, err1 := metrics.GetValue("num_ipsets") + gaugeVal, err1 := metrics.GetValue(metrics.NumIPSets) if err := ipsMgr.DeleteSet("test-set"); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } - newGaugeVal, err2 := metrics.GetValue("num_ipsets") + newGaugeVal, err2 := metrics.GetValue(metrics.NumIPSets) metrics.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in ipset number didn't register in prometheus") From 142c8f58aba5826fa51a0b3a7e1a37c95d537d46 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 26 Jun 2020 15:49:20 -0400 Subject: [PATCH 25/53] moved utilities for testing prometheus metrics to npm/util. Updated StartHTTP to have an additional parameter for waiting after starting the server --- npm/metrics/http.go | 72 ++++++------------------------- npm/metrics/prometheus-metrics.go | 14 +++++- npm/util/prometheus-util.go | 57 ++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 60 deletions(-) create mode 100644 npm/util/prometheus-util.go diff --git a/npm/metrics/http.go b/npm/metrics/http.go index aae6d7ffc2..4238f62db2 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -1,37 +1,39 @@ package metrics import ( - "fmt" - "io/ioutil" "net/http" - "regexp" - "strconv" "time" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) -var started = false +const ( + // HTTPPort is the port used by the HTTP server (includes a preceding colon) + HTTPPort = ":8000" -const httpPort = ":8000" + //MetricsPath is the path for the Prometheus metrics endpoint (includes preceding slash) + MetricsPath = "/metrics" +) +var started = false var handler http.Handler -// StartHTTP starts a HTTP endpoint on port 8000. Metrics are exposed on the endpoint /metrics. +// StartHTTP starts a HTTP server with endpoint on port 8000. Metrics are exposed on the endpoint /metrics. // Set asGoRoutine to true if you want to be able to effectively run other code after calling this. -func StartHTTP(asGoRoutine bool) { +// The function will pause for delayAmountAfterStart seconds after starting the HTTP server for the first time. +func StartHTTP(asGoRoutine bool, delayAmountAfterStart int) { if started { return } started = true - http.Handle("/metrics", getHandler()) + http.Handle(MetricsPath, getHandler()) if asGoRoutine { - go http.ListenAndServe(httpPort, nil) + go http.ListenAndServe(HTTPPort, nil) } else { - http.ListenAndServe(httpPort, nil) + http.ListenAndServe(HTTPPort, nil) } + time.Sleep(time.Second * time.Duration(delayAmountAfterStart)) } // getHandler returns the HTTP handler for the metrics endpoint @@ -42,49 +44,3 @@ func getHandler() http.Handler { } return handler } - -func getMetricsText() (string, error) { - response, err := http.Get("http://localhost" + httpPort + "/metrics") - if err != nil { - return "", err - } - defer response.Body.Close() - body, err := ioutil.ReadAll(response.Body) - if err != nil { - return "", err - } - return string(body), nil -} - -// GetValue returns a gaugeMetric's value as shown in the HTML Prometheus endpoint. -func GetValue(gaugeMetric prometheus.Collector) (int, error) { - return getMetricValue(allMetrics[gaugeMetric]) -} - -// GetCountValue returns the number of times a summaryMetric has recorded an observation as shown in the HTML Prometheus endpoint. -func GetCountValue(summaryMetric prometheus.Collector) (int, error) { - return getMetricValue(allMetrics[summaryMetric] + "_count") -} - -func getMetricValue(metricName string) (int, error) { - if !started { - StartHTTP(true) - time.Sleep(2 * time.Second) - } - regex := regexp.MustCompile(metricName + " [0-9]+") - if regex == nil { - return 0, fmt.Errorf("Couldn't compile regular expression for metric: " + metricName) - } - text, err := getMetricsText() - if err != nil { - return 0, err - } - locations := regex.FindStringIndex(text) - if locations == nil { - return 0, fmt.Errorf("Couldn't find a match for metric: " + metricName) - } - start := locations[0] - end := locations[1] - value := text[start+len(metricName)+1 : end] - return strconv.Atoi(value) -} diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 4b56e349e9..c8c166d123 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -1,6 +1,8 @@ package metrics import ( + "fmt" + "github.com/Azure/azure-container-networking/log" "github.com/prometheus/client_golang/prometheus" @@ -29,7 +31,7 @@ const ( addIPSetExecTimeLabel = "add_ipset_exec_time" ) -var allMetrics = map[prometheus.Collector]string{ +var allMetricNames = map[prometheus.Collector]string{ NumPolicies: numPoliciesLabel, AddPolicyExecTime: addPolicyExecTimeLabel, NumIPTableRules: numIPTableRules, @@ -62,12 +64,14 @@ func createSummary(name string, helpMessage string) prometheus.Summary { func init() { // networkingRegistry = prometheus.NewRegistry() - for metric := range allMetrics { + for metric := range allMetricNames { + prometheus.DefaultRegisterer.MustRegister(metric) err := prometheus.DefaultRegisterer.Register(metric) if err != nil { log.Printf("While registering a certain prometheus metric, an error occurred: %s", err) } } + fmt.Println("hey") } // Observe records a value in the given summary @@ -85,3 +89,9 @@ func Inc(gauge prometheus.Gauge) { func Dec(gauge prometheus.Gauge) { gauge.Dec() } + +// GetMetricName is for validation purposes. It returns the name representation of any metric registered in this file. +// Returns an empty string if the metric is not declared and exported in this file. +func GetMetricName(collector prometheus.Collector) string { + return allMetricNames[collector] +} diff --git a/npm/util/prometheus-util.go b/npm/util/prometheus-util.go new file mode 100644 index 0000000000..501bca6e65 --- /dev/null +++ b/npm/util/prometheus-util.go @@ -0,0 +1,57 @@ +package util + +import ( + "fmt" + "io/ioutil" + "net/http" + "regexp" + "strconv" + + "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/prometheus/client_golang/prometheus" +) + +const delayAfterHTTPStart = 2 + +// GetValue is used for validation. It returns a gaugeMetric's value as shown in the HTML Prometheus endpoint. +func GetValue(gaugeMetric prometheus.Collector) (int, error) { + return getMetricValue(metrics.GetMetricName(gaugeMetric)) +} + +// GetCountValue is used for validation. It returns the number of times a summaryMetric has recorded an observation as shown in the HTML Prometheus endpoint. +func GetCountValue(summaryMetric prometheus.Collector) (int, error) { + return getMetricValue(metrics.GetMetricName(summaryMetric) + "_count") +} + +func getMetricValue(metricName string) (int, error) { + metrics.StartHTTP(true, delayAfterHTTPStart) + regex := regexp.MustCompile(metricName + " [0-9]+") + if regex == nil { + return 0, fmt.Errorf("Couldn't compile regular expression for metric: " + metricName) + } + text, err := getMetricsText() + if err != nil { + return 0, err + } + locations := regex.FindStringIndex(text) + if locations == nil { + return 0, fmt.Errorf("Couldn't find a match for metric: " + metricName) + } + start := locations[0] + end := locations[1] + value := text[start+len(metricName)+1 : end] + return strconv.Atoi(value) +} + +func getMetricsText() (string, error) { + response, err := http.Get("http://localhost" + metrics.HTTPPort + metrics.MetricsPath) + if err != nil { + return "", err + } + defer response.Body.Close() + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return "", err + } + return string(body), nil +} From 94a79308934c2d41e9762a1bc0cf8b049b27c523 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 26 Jun 2020 15:58:37 -0400 Subject: [PATCH 26/53] updated uses of StartHTTP to have the extra parameter --- npm/metrics/testing/test-main.go | 2 +- npm/plugin/main.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/npm/metrics/testing/test-main.go b/npm/metrics/testing/test-main.go index 098fddd1e2..c81858346e 100644 --- a/npm/metrics/testing/test-main.go +++ b/npm/metrics/testing/test-main.go @@ -12,7 +12,7 @@ import ( func main() { messWithMetrics() - metrics.StartHTTP(false) + metrics.StartHTTP(false, 0) } func messWithMetrics() { diff --git a/npm/plugin/main.go b/npm/plugin/main.go index fb087b5547..886c94bbd3 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -68,7 +68,7 @@ func main() { panic(err.Error) } - metrics.StartHTTP(false) + metrics.StartHTTP(false, 0) select {} } From 7edc4cde52caa282ab22234f97d36b9eda2c4005 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 26 Jun 2020 16:09:14 -0400 Subject: [PATCH 27/53] updated GetValue and GetCountValue uses to use the prometheus features of the util package, which is now moved to a promutil package within npm/metrics/ --- npm/ipsm/ipsm_test.go | 13 +++++++------ npm/iptm/iptm_test.go | 14 +++++++------- .../promutil/util.go} | 2 +- npm/nwpolicy_test.go | 13 +++++++------ 4 files changed, 22 insertions(+), 20 deletions(-) rename npm/{util/prometheus-util.go => metrics/promutil/util.go} (98%) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 6d3b50d763..508fe36f98 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/Azure/azure-container-networking/npm/metrics/promutil" "github.com/Azure/azure-container-networking/npm/util" ) @@ -128,8 +129,8 @@ func TestCreateSet(t *testing.T) { } }() - gaugeVal, err1 := metrics.GetValue(metrics.NumIPSets) - countVal, err2 := metrics.GetCountValue(metrics.AddIPSetExecTime) + gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets) + countVal, err2 := promutil.GetCountValue(metrics.AddIPSetExecTime) if err := ipsMgr.CreateSet("test-set", []string{util.IpsetNetHashFlag}); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") @@ -140,8 +141,8 @@ func TestCreateSet(t *testing.T) { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet when set maxelem") } - newGaugeVal, err3 := metrics.GetValue(metrics.NumIPSets) - newCountVal, err4 := metrics.GetCountValue(metrics.AddIPSetExecTime) + newGaugeVal, err3 := promutil.GetValue(metrics.NumIPSets) + newCountVal, err4 := promutil.GetCountValue(metrics.AddIPSetExecTime) metrics.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+2 { t.Errorf("Change in ipset number didn't register in prometheus") @@ -167,13 +168,13 @@ func TestDeleteSet(t *testing.T) { t.Errorf("TestDeleteSet failed @ ipsMgr.CreateSet") } - gaugeVal, err1 := metrics.GetValue(metrics.NumIPSets) + gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets) if err := ipsMgr.DeleteSet("test-set"); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } - newGaugeVal, err2 := metrics.GetValue(metrics.NumIPSets) + newGaugeVal, err2 := promutil.GetValue(metrics.NumIPSets) metrics.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in ipset number didn't register in prometheus") diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 64fffd1f0b..d97149e1eb 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -5,7 +5,7 @@ import ( "testing" "github.com/Azure/azure-container-networking/npm/metrics" - + "github.com/Azure/azure-container-networking/npm/metrics/promutil" "github.com/Azure/azure-container-networking/npm/util" ) @@ -150,15 +150,15 @@ func TestAdd(t *testing.T) { }, } - gaugeVal, err1 := metrics.GetValue(metrics.NumIPTableRules) - countVal, err2 := metrics.GetCountValue(metrics.AddIPTableRuleExecTime) + gaugeVal, err1 := promutil.GetValue(metrics.NumIPTableRules) + countVal, err2 := promutil.GetCountValue(metrics.AddIPTableRuleExecTime) if err := iptMgr.Add(entry); err != nil { t.Errorf("TestAdd failed @ iptMgr.Add") } - newGaugeVal, err3 := metrics.GetValue(metrics.NumIPTableRules) - newCountVal, err4 := metrics.GetCountValue(metrics.AddIPTableRuleExecTime) + newGaugeVal, err3 := promutil.GetValue(metrics.NumIPTableRules) + newCountVal, err4 := promutil.GetCountValue(metrics.AddIPTableRuleExecTime) metrics.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+1 { t.Errorf("Change in iptable rule number didn't register in prometheus") @@ -191,13 +191,13 @@ func TestDelete(t *testing.T) { t.Errorf("TestDelete failed @ iptMgr.Add") } - gaugeVal, err1 := metrics.GetValue(metrics.NumIPTableRules) + gaugeVal, err1 := promutil.GetValue(metrics.NumIPTableRules) if err := iptMgr.Delete(entry); err != nil { t.Errorf("TestDelete failed @ iptMgr.Delete") } - newGaugeVal, err2 := metrics.GetValue(metrics.NumIPTableRules) + newGaugeVal, err2 := promutil.GetValue(metrics.NumIPTableRules) metrics.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in iptable rule number didn't register in prometheus") diff --git a/npm/util/prometheus-util.go b/npm/metrics/promutil/util.go similarity index 98% rename from npm/util/prometheus-util.go rename to npm/metrics/promutil/util.go index 501bca6e65..d56e88ca90 100644 --- a/npm/util/prometheus-util.go +++ b/npm/metrics/promutil/util.go @@ -1,4 +1,4 @@ -package util +package promutil import ( "fmt" diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index 2313d5ed14..2de0eccc30 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -8,6 +8,7 @@ import ( "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/iptm" "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/Azure/azure-container-networking/npm/metrics/promutil" "github.com/Azure/azure-container-networking/npm/util" corev1 "k8s.io/api/core/v1" @@ -92,8 +93,8 @@ func TestAddNetworkPolicy(t *testing.T) { }, } - gaugeVal, err1 := metrics.GetValue(metrics.NumPolicies) - countVal, err2 := metrics.GetCountValue(metrics.AddPolicyExecTime) + gaugeVal, err1 := promutil.GetValue(metrics.NumPolicies) + countVal, err2 := promutil.GetCountValue(metrics.AddPolicyExecTime) npMgr.Lock() if err := npMgr.AddNetworkPolicy(allowIngress); err != nil { @@ -131,8 +132,8 @@ func TestAddNetworkPolicy(t *testing.T) { } npMgr.Unlock() - newGaugeVal, err3 := metrics.GetValue(metrics.NumPolicies) - newCountVal, err4 := metrics.GetCountValue(metrics.AddPolicyExecTime) + newGaugeVal, err3 := promutil.GetValue(metrics.NumPolicies) + newCountVal, err4 := promutil.GetCountValue(metrics.AddPolicyExecTime) metrics.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+2 { t.Errorf("Change in policy number didn't register in prometheus") @@ -336,14 +337,14 @@ func TestDeleteNetworkPolicy(t *testing.T) { t.Errorf("TestAddNetworkPolicy failed @ AddNetworkPolicy") } - gaugeVal, err1 := metrics.GetValue(metrics.NumPolicies) + gaugeVal, err1 := promutil.GetValue(metrics.NumPolicies) if err := npMgr.DeleteNetworkPolicy(allow); err != nil { t.Errorf("TestDeleteNetworkPolicy failed @ DeleteNetworkPolicy") } npMgr.Unlock() - newGaugeVal, err2 := metrics.GetValue(metrics.NumPolicies) + newGaugeVal, err2 := promutil.GetValue(metrics.NumPolicies) metrics.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in policy number didn't register in prometheus") From 8f0ece1e01ace5a5c9ba277ca589b4288468075c Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 26 Jun 2020 16:29:53 -0400 Subject: [PATCH 28/53] removed unnecessary comments, removed print statement, and added quantiles to all summary metrics --- npm/metrics/http.go | 1 - npm/metrics/prometheus-metrics.go | 15 ++++----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index 4238f62db2..736c99f7c9 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -40,7 +40,6 @@ func StartHTTP(asGoRoutine bool, delayAmountAfterStart int) { func getHandler() http.Handler { if handler == nil { handler = promhttp.Handler() - // handler = promhttp.HandlerFor(networkingRegistry, promhttp.HandlerOpts{}) // promhttp.Handler() } return handler } diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index c8c166d123..e024198bfc 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -1,16 +1,11 @@ package metrics import ( - "fmt" - "github.com/Azure/azure-container-networking/log" "github.com/prometheus/client_golang/prometheus" ) -// var networkingRegistry *prometheus.Registery -// var hostName = os.Getenv("HOSTNAME") - const namespace = "npm" var ( @@ -54,16 +49,15 @@ func createGauge(name string, helpMessage string) prometheus.Gauge { func createSummary(name string, helpMessage string) prometheus.Summary { return prometheus.NewSummary( prometheus.SummaryOpts{ - Namespace: namespace, - Name: name, - Help: helpMessage, - // Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, // TODO add quantiles?? + Namespace: namespace, + Name: name, + Help: helpMessage, + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, //quantiles }, ) } func init() { - // networkingRegistry = prometheus.NewRegistry() for metric := range allMetricNames { prometheus.DefaultRegisterer.MustRegister(metric) err := prometheus.DefaultRegisterer.Register(metric) @@ -71,7 +65,6 @@ func init() { log.Printf("While registering a certain prometheus metric, an error occurred: %s", err) } } - fmt.Println("hey") } // Observe records a value in the given summary From 36b574e358c1abc970162c064a54a8cdf7a3cf29 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 26 Jun 2020 16:53:30 -0400 Subject: [PATCH 29/53] fixed problem of double registering metrics --- npm/metrics/prometheus-metrics.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index e024198bfc..e18518b644 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -1,8 +1,6 @@ package metrics import ( - "github.com/Azure/azure-container-networking/log" - "github.com/prometheus/client_golang/prometheus" ) @@ -60,10 +58,6 @@ func createSummary(name string, helpMessage string) prometheus.Summary { func init() { for metric := range allMetricNames { prometheus.DefaultRegisterer.MustRegister(metric) - err := prometheus.DefaultRegisterer.Register(metric) - if err != nil { - log.Printf("While registering a certain prometheus metric, an error occurred: %s", err) - } } } From 185efca9c10b5c09770508760beb40405c8c7c6d Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Mon, 29 Jun 2020 10:45:33 -0400 Subject: [PATCH 30/53] wait longer for http server to start --- npm/metrics/promutil/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index d56e88ca90..84a9bed307 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -11,7 +11,7 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const delayAfterHTTPStart = 2 +const delayAfterHTTPStart = 10 // GetValue is used for validation. It returns a gaugeMetric's value as shown in the HTML Prometheus endpoint. func GetValue(gaugeMetric prometheus.Collector) (int, error) { From 85949baf9dd674182c4abdd593ff3f553683d688 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 11:29:20 -0400 Subject: [PATCH 31/53] moved tool in test-util.go to promutil/util.go --- npm/ipsm/ipsm_test.go | 4 ++-- npm/iptm/iptm_test.go | 4 ++-- npm/metrics/promutil/util.go | 20 ++++++++++++++++++++ npm/metrics/test-util.go | 22 ---------------------- npm/nwpolicy_test.go | 4 ++-- 5 files changed, 26 insertions(+), 28 deletions(-) delete mode 100644 npm/metrics/test-util.go diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 508fe36f98..4b2277ea3b 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -143,7 +143,7 @@ func TestCreateSet(t *testing.T) { newGaugeVal, err3 := promutil.GetValue(metrics.NumIPSets) newCountVal, err4 := promutil.GetCountValue(metrics.AddIPSetExecTime) - metrics.NotifyIfErrors(t, err1, err2, err3, err4) + promutil.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+2 { t.Errorf("Change in ipset number didn't register in prometheus") } @@ -175,7 +175,7 @@ func TestDeleteSet(t *testing.T) { } newGaugeVal, err2 := promutil.GetValue(metrics.NumIPSets) - metrics.NotifyIfErrors(t, err1, err2) + promutil.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in ipset number didn't register in prometheus") } diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index d97149e1eb..54172efb6e 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -159,7 +159,7 @@ func TestAdd(t *testing.T) { newGaugeVal, err3 := promutil.GetValue(metrics.NumIPTableRules) newCountVal, err4 := promutil.GetCountValue(metrics.AddIPTableRuleExecTime) - metrics.NotifyIfErrors(t, err1, err2, err3, err4) + promutil.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+1 { t.Errorf("Change in iptable rule number didn't register in prometheus") } @@ -198,7 +198,7 @@ func TestDelete(t *testing.T) { } newGaugeVal, err2 := promutil.GetValue(metrics.NumIPTableRules) - metrics.NotifyIfErrors(t, err1, err2) + promutil.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in iptable rule number didn't register in prometheus") } diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index 84a9bed307..e4d008602a 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -6,6 +6,7 @@ import ( "net/http" "regexp" "strconv" + "testing" "github.com/Azure/azure-container-networking/npm/metrics" "github.com/prometheus/client_golang/prometheus" @@ -13,6 +14,25 @@ import ( const delayAfterHTTPStart = 10 +// NotifyIfErrors writes any non-nil errors to a testing utility +func NotifyIfErrors(t *testing.T, errors ...error) { + allGood := true + for _, err := range errors { + if err != nil { + allGood = false + break + } + } + if !allGood { + t.Errorf("Encountered these errors while getting metric values: ") + for _, err := range errors { + if err != nil { + t.Errorf("%v", err) + } + } + } +} + // GetValue is used for validation. It returns a gaugeMetric's value as shown in the HTML Prometheus endpoint. func GetValue(gaugeMetric prometheus.Collector) (int, error) { return getMetricValue(metrics.GetMetricName(gaugeMetric)) diff --git a/npm/metrics/test-util.go b/npm/metrics/test-util.go deleted file mode 100644 index 95a4573876..0000000000 --- a/npm/metrics/test-util.go +++ /dev/null @@ -1,22 +0,0 @@ -package metrics - -import "testing" - -// NotifyIfErrors writes any non-nil errors to the testing utility -func NotifyIfErrors(t *testing.T, errors ...error) { - allGood := true - for _, err := range errors { - if err != nil { - allGood = false - break - } - } - if !allGood { - t.Errorf("Encountered these errors while getting metric values: ") - for _, err := range errors { - if err != nil { - t.Errorf("%v", err) - } - } - } -} diff --git a/npm/nwpolicy_test.go b/npm/nwpolicy_test.go index 2de0eccc30..4376d5032d 100644 --- a/npm/nwpolicy_test.go +++ b/npm/nwpolicy_test.go @@ -134,7 +134,7 @@ func TestAddNetworkPolicy(t *testing.T) { newGaugeVal, err3 := promutil.GetValue(metrics.NumPolicies) newCountVal, err4 := promutil.GetCountValue(metrics.AddPolicyExecTime) - metrics.NotifyIfErrors(t, err1, err2, err3, err4) + promutil.NotifyIfErrors(t, err1, err2, err3, err4) if newGaugeVal != gaugeVal+2 { t.Errorf("Change in policy number didn't register in prometheus") } @@ -345,7 +345,7 @@ func TestDeleteNetworkPolicy(t *testing.T) { npMgr.Unlock() newGaugeVal, err2 := promutil.GetValue(metrics.NumPolicies) - metrics.NotifyIfErrors(t, err1, err2) + promutil.NotifyIfErrors(t, err1, err2) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in policy number didn't register in prometheus") } From 9c074dfa7a3ae0f8e5096bbc1a90d8c7d354bde1 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 11:42:51 -0400 Subject: [PATCH 32/53] fixed timer to be in milliseconds and updated metric descriptions to mention units --- npm/metrics/prometheus-metrics.go | 7 ++++--- .../testing/{test-main.go => visualizing-metrics.go} | 7 +++---- npm/metrics/timer.go | 9 ++++----- 3 files changed, 11 insertions(+), 12 deletions(-) rename npm/metrics/testing/{test-main.go => visualizing-metrics.go} (82%) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index e18518b644..8ed621acb4 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -8,11 +8,11 @@ const namespace = "npm" var ( NumPolicies = createGauge(numPoliciesLabel, "The number of current network policies for this node") - AddPolicyExecTime = createSummary(addPolicyExecTimeLabel, "Execution time for adding a network policy") + AddPolicyExecTime = createSummary(addPolicyExecTimeLabel, "Execution time in milliseconds for adding a network policy") NumIPTableRules = createGauge(numIPTableRules, "The number of current IPTable rules for this node") - AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, "Execution time for adding an IPTable rule to a chain") + AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, "Execution time in milliseconds for adding an IPTable rule to a chain") NumIPSets = createGauge(numIPSetsLabel, "The number of current IP sets for this node") - AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, "Execution time for creating an IP set") + AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, "Execution time in milliseconds for creating an IP set") ) const ( @@ -24,6 +24,7 @@ const ( addIPSetExecTimeLabel = "add_ipset_exec_time" ) +// include any metric in this map var allMetricNames = map[prometheus.Collector]string{ NumPolicies: numPoliciesLabel, AddPolicyExecTime: addPolicyExecTimeLabel, diff --git a/npm/metrics/testing/test-main.go b/npm/metrics/testing/visualizing-metrics.go similarity index 82% rename from npm/metrics/testing/test-main.go rename to npm/metrics/testing/visualizing-metrics.go index c81858346e..824e0fde50 100644 --- a/npm/metrics/testing/test-main.go +++ b/npm/metrics/testing/visualizing-metrics.go @@ -6,15 +6,14 @@ import ( "github.com/Azure/azure-container-networking/npm/metrics" ) -// file for testing metrics visually -// view metrics in terminal with command: -// wget -qO- localhost:8000/metrics - +// Run this file to test prometheus-metrics.go metrics visually. +// View metrics in the command line with: wget -qO- localhost:8000/metrics func main() { messWithMetrics() metrics.StartHTTP(false, 0) } +// Arbitrary changes that will bring noticeable changes between different wget responses. func messWithMetrics() { go func() { for { diff --git a/npm/metrics/timer.go b/npm/metrics/timer.go index 4d676443d2..e424934cbf 100644 --- a/npm/metrics/timer.go +++ b/npm/metrics/timer.go @@ -14,7 +14,7 @@ type Timer struct { // StartNewTimer creates a new Timer func StartNewTimer() *Timer { - return &Timer{time.Now().Unix(), 0} + return &Timer{time.Now().UnixNano(), 0} } // StopAndRecord ends a timer and records its delta in a summary @@ -23,14 +23,13 @@ func (timer *Timer) StopAndRecord(observer prometheus.Summary) { } func (timer *Timer) stop() { - timer.after = time.Now().Unix() + timer.after = time.Now().UnixNano() } func (timer *Timer) timeElapsed() float64 { if timer.after == 0 { timer.stop() } - millisecondDifference := (timer.after - timer.before) / 1000000 - secondDifference := float64(millisecondDifference) / 1000.0 - return secondDifference + millisecondDifference := (timer.after - timer.before) / 1000000.0 + return float64(millisecondDifference) } From f3bf2b6456da92c4637d8782ae520e21d0eb8919 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 11:46:21 -0400 Subject: [PATCH 33/53] removed unnecessary comments --- npm/metrics/prometheus-metrics.go | 1 - npm/nwpolicy.go | 2 -- 2 files changed, 3 deletions(-) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 8ed621acb4..5292dc2602 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -65,7 +65,6 @@ func init() { // Observe records a value in the given summary func Observe(summary prometheus.Summary, value float64) { summary.Observe(value) - // if changed to a vector, use summary.WithLabelValues(hostName).Observe(value) } // Inc increases a gauge by 1 diff --git a/npm/nwpolicy.go b/npm/nwpolicy.go index 7d8ffbedcc..0a0d63976b 100644 --- a/npm/nwpolicy.go +++ b/npm/nwpolicy.go @@ -151,7 +151,6 @@ func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(npObj *networkingv1.Netwo err error ns *namespace allNs = npMgr.nsMap[util.KubeAllNamespacesFlag] - // timer = metrics.StartNewTimer() ) npNs, npName := "ns-"+npObj.ObjectMeta.Namespace, npObj.ObjectMeta.Name @@ -203,7 +202,6 @@ func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(npObj *networkingv1.Netwo } metrics.Dec(metrics.NumPolicies) - // timer.StopAndRecord(metrics.RemovePolicyExecTime) return nil } From 0fbf146b455e4c97f8c9de5482ce9c07f25a7ad3 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 11:52:36 -0400 Subject: [PATCH 34/53] http server always started in a go routine now. Added comment justifying the use of an http server --- npm/metrics/http.go | 12 ++++-------- npm/metrics/promutil/util.go | 2 +- npm/metrics/testing/visualizing-metrics.go | 2 +- npm/plugin/main.go | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index 736c99f7c9..0a99a44d71 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -18,21 +18,17 @@ const ( var started = false var handler http.Handler -// StartHTTP starts a HTTP server with endpoint on port 8000. Metrics are exposed on the endpoint /metrics. -// Set asGoRoutine to true if you want to be able to effectively run other code after calling this. +// StartHTTP starts a HTTP server in a Go routine with endpoint on port 8000. Metrics are exposed on the endpoint /metrics. +// By being exposed, the metrics can be scraped by a Prometheus Server or Container Insights. // The function will pause for delayAmountAfterStart seconds after starting the HTTP server for the first time. -func StartHTTP(asGoRoutine bool, delayAmountAfterStart int) { +func StartHTTP(delayAmountAfterStart int) { if started { return } started = true http.Handle(MetricsPath, getHandler()) - if asGoRoutine { - go http.ListenAndServe(HTTPPort, nil) - } else { - http.ListenAndServe(HTTPPort, nil) - } + go http.ListenAndServe(HTTPPort, nil) time.Sleep(time.Second * time.Duration(delayAmountAfterStart)) } diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index e4d008602a..03a16bee46 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -44,7 +44,7 @@ func GetCountValue(summaryMetric prometheus.Collector) (int, error) { } func getMetricValue(metricName string) (int, error) { - metrics.StartHTTP(true, delayAfterHTTPStart) + metrics.StartHTTP(delayAfterHTTPStart) regex := regexp.MustCompile(metricName + " [0-9]+") if regex == nil { return 0, fmt.Errorf("Couldn't compile regular expression for metric: " + metricName) diff --git a/npm/metrics/testing/visualizing-metrics.go b/npm/metrics/testing/visualizing-metrics.go index 824e0fde50..c03f3cc155 100644 --- a/npm/metrics/testing/visualizing-metrics.go +++ b/npm/metrics/testing/visualizing-metrics.go @@ -10,7 +10,7 @@ import ( // View metrics in the command line with: wget -qO- localhost:8000/metrics func main() { messWithMetrics() - metrics.StartHTTP(false, 0) + metrics.StartHTTP(0) } // Arbitrary changes that will bring noticeable changes between different wget responses. diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 886c94bbd3..08130d7baf 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -68,7 +68,7 @@ func main() { panic(err.Error) } - metrics.StartHTTP(false, 0) + metrics.StartHTTP(0) select {} } From f17d70e3d9846f7124f0855c3abb1af719684861 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 13:12:34 -0400 Subject: [PATCH 35/53] debugging http connection refused in pipeline --- npm/metrics/http.go | 9 ++++++++- npm/metrics/promutil/util.go | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index 0a99a44d71..02b602b17e 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -4,6 +4,8 @@ import ( "net/http" "time" + // "github.com/Azure/azure-container-networking/log" + "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -28,7 +30,12 @@ func StartHTTP(delayAmountAfterStart int) { started = true http.Handle(MetricsPath, getHandler()) - go http.ListenAndServe(HTTPPort, nil) + go func() { + err := http.ListenAndServe(HTTPPort, nil)) + if err != nil { + panic(err.Error) + } + }() time.Sleep(time.Second * time.Duration(delayAmountAfterStart)) } diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index 03a16bee46..422bf423b3 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -12,7 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const delayAfterHTTPStart = 10 +const delayAfterHTTPStart = 80 // NotifyIfErrors writes any non-nil errors to a testing utility func NotifyIfErrors(t *testing.T, errors ...error) { From 7c337fe8d2069a05d1202bcb9579719f1cb867e1 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 13:27:36 -0400 Subject: [PATCH 36/53] fixed syntax error --- npm/metrics/http.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index 02b602b17e..66d4a8f412 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -31,7 +31,7 @@ func StartHTTP(delayAmountAfterStart int) { http.Handle(MetricsPath, getHandler()) go func() { - err := http.ListenAndServe(HTTPPort, nil)) + err := http.ListenAndServe(HTTPPort, nil) if err != nil { panic(err.Error) } From 46be509d64723b72e6438a0736c13100707cdfc8 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 17:04:18 -0400 Subject: [PATCH 37/53] removed debugging wrapper around http service --- npm/metrics/http.go | 9 +-------- npm/metrics/promutil/util.go | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index 66d4a8f412..0a99a44d71 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -4,8 +4,6 @@ import ( "net/http" "time" - // "github.com/Azure/azure-container-networking/log" - "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -30,12 +28,7 @@ func StartHTTP(delayAmountAfterStart int) { started = true http.Handle(MetricsPath, getHandler()) - go func() { - err := http.ListenAndServe(HTTPPort, nil) - if err != nil { - panic(err.Error) - } - }() + go http.ListenAndServe(HTTPPort, nil) time.Sleep(time.Second * time.Duration(delayAmountAfterStart)) } diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index 422bf423b3..03a16bee46 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -12,7 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const delayAfterHTTPStart = 80 +const delayAfterHTTPStart = 10 // NotifyIfErrors writes any non-nil errors to a testing utility func NotifyIfErrors(t *testing.T, errors ...error) { From 0a51a18dad7048069a30c95601606f1bf6ad22c1 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 17:05:22 -0400 Subject: [PATCH 38/53] sleep so that the testing metrics endpoint can be pinged --- npm/metrics/testing/visualizing-metrics.go | 1 + 1 file changed, 1 insertion(+) diff --git a/npm/metrics/testing/visualizing-metrics.go b/npm/metrics/testing/visualizing-metrics.go index c03f3cc155..b61b652395 100644 --- a/npm/metrics/testing/visualizing-metrics.go +++ b/npm/metrics/testing/visualizing-metrics.go @@ -11,6 +11,7 @@ import ( func main() { messWithMetrics() metrics.StartHTTP(0) + time.Sleep(time.Minute * 10) } // Arbitrary changes that will bring noticeable changes between different wget responses. From e032f9b3387070f2818293d4b1867110c1e56922 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 30 Jun 2020 17:09:34 -0400 Subject: [PATCH 39/53] redesigned GetValue and GetCountValue so that they don't use http calls --- npm/metrics/promutil/util.go | 54 +++++++++++------------------------- 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index 03a16bee46..def553f880 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -1,15 +1,10 @@ package promutil import ( - "fmt" - "io/ioutil" - "net/http" - "regexp" - "strconv" "testing" - "github.com/Azure/azure-container-networking/npm/metrics" "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" ) const delayAfterHTTPStart = 10 @@ -33,45 +28,28 @@ func NotifyIfErrors(t *testing.T, errors ...error) { } } -// GetValue is used for validation. It returns a gaugeMetric's value as shown in the HTML Prometheus endpoint. +// GetValue is used for validation. It returns a gaugeMetric's value. func GetValue(gaugeMetric prometheus.Collector) (int, error) { - return getMetricValue(metrics.GetMetricName(gaugeMetric)) + dtoMetric, err := getDTOMetric(gaugeMetric) + if err != nil { + return 0, err + } + return int(dtoMetric.Gauge.GetValue()), nil } -// GetCountValue is used for validation. It returns the number of times a summaryMetric has recorded an observation as shown in the HTML Prometheus endpoint. +// GetCountValue is used for validation. It returns the number of times a summaryMetric has recorded an observation. func GetCountValue(summaryMetric prometheus.Collector) (int, error) { - return getMetricValue(metrics.GetMetricName(summaryMetric) + "_count") -} - -func getMetricValue(metricName string) (int, error) { - metrics.StartHTTP(delayAfterHTTPStart) - regex := regexp.MustCompile(metricName + " [0-9]+") - if regex == nil { - return 0, fmt.Errorf("Couldn't compile regular expression for metric: " + metricName) - } - text, err := getMetricsText() + dtoMetric, err := getDTOMetric(summaryMetric) if err != nil { return 0, err } - locations := regex.FindStringIndex(text) - if locations == nil { - return 0, fmt.Errorf("Couldn't find a match for metric: " + metricName) - } - start := locations[0] - end := locations[1] - value := text[start+len(metricName)+1 : end] - return strconv.Atoi(value) + return int(dtoMetric.Summary.GetSampleCount()), nil } -func getMetricsText() (string, error) { - response, err := http.Get("http://localhost" + metrics.HTTPPort + metrics.MetricsPath) - if err != nil { - return "", err - } - defer response.Body.Close() - body, err := ioutil.ReadAll(response.Body) - if err != nil { - return "", err - } - return string(body), nil +func getDTOMetric(collector prometheus.Collector) (*dto.Metric, error) { + channel := make(chan prometheus.Metric, 1) + collector.Collect(channel) + metric := &dto.Metric{} + err := (<-channel).Write(metric) + return metric, err } From f7c17ba03a7547b1fe6e84ff117bdc7f6877d726 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 2 Jul 2020 13:06:07 -0400 Subject: [PATCH 40/53] removed random but helpful testing file - will write about quick testing in a wiki page --- npm/metrics/testing/visualizing-metrics.go | 60 ---------------------- 1 file changed, 60 deletions(-) delete mode 100644 npm/metrics/testing/visualizing-metrics.go diff --git a/npm/metrics/testing/visualizing-metrics.go b/npm/metrics/testing/visualizing-metrics.go deleted file mode 100644 index b61b652395..0000000000 --- a/npm/metrics/testing/visualizing-metrics.go +++ /dev/null @@ -1,60 +0,0 @@ -package main - -import ( - "time" - - "github.com/Azure/azure-container-networking/npm/metrics" -) - -// Run this file to test prometheus-metrics.go metrics visually. -// View metrics in the command line with: wget -qO- localhost:8000/metrics -func main() { - messWithMetrics() - metrics.StartHTTP(0) - time.Sleep(time.Minute * 10) -} - -// Arbitrary changes that will bring noticeable changes between different wget responses. -func messWithMetrics() { - go func() { - for { - metrics.Inc(metrics.NumPolicies) - time.Sleep(2 * time.Second) - } - }() - - go func() { - for k := 0; k < 25; k++ { - for j := 0; j < 2*k; j++ { - metrics.Inc(metrics.NumIPSets) - } - time.Sleep(2 * time.Second) - } - }() - - go func() { - for j := 0; j < 500; j += 2 { - for k := 0; k < 2; k++ { - metrics.Observe(metrics.AddPolicyExecTime, float64(2*k*j)) - time.Sleep(time.Second * time.Duration((k+1)/2)) - } - for k := 0; k < 3; k++ { - metrics.Observe(metrics.AddPolicyExecTime, float64(-k+j)) - time.Sleep(time.Second * time.Duration(k/3)) - } - } - }() - - go func() { - for { - for k := 0; k < 2; k++ { - metrics.Observe(metrics.AddIPSetExecTime, float64(2*k)) - time.Sleep(time.Second * time.Duration((k+1)/2)) - } - for k := 0; k < 3; k++ { - metrics.Observe(metrics.AddIPSetExecTime, float64(-k)) - time.Sleep(time.Second * time.Duration(k+1)) - } - } - }() -} From 4797697bbdaba38563240eede13e3bcf530ecfed Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 10 Jul 2020 16:00:01 -0400 Subject: [PATCH 41/53] milliseconds were being truncated. now they have decimals --- npm/metrics/timer.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/npm/metrics/timer.go b/npm/metrics/timer.go index e424934cbf..bda78214ef 100644 --- a/npm/metrics/timer.go +++ b/npm/metrics/timer.go @@ -30,6 +30,6 @@ func (timer *Timer) timeElapsed() float64 { if timer.after == 0 { timer.stop() } - millisecondDifference := (timer.after - timer.before) / 1000000.0 - return float64(millisecondDifference) + millisecondDifference := float64(timer.after-timer.before) / 1000000.0 + return millisecondDifference } From 7a5eb6e86c0520d90832b309e1c83897a9a1ba22 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 10 Jul 2020 17:16:43 -0400 Subject: [PATCH 42/53] use direct Prometheus metric commands instead of wrapping them --- npm/ipsm/ipsm.go | 9 +++++++-- npm/iptm/iptm.go | 4 ++-- npm/metrics/prometheus-metrics.go | 15 --------------- npm/metrics/timer.go | 2 +- npm/nwpolicy.go | 4 ++-- 5 files changed, 12 insertions(+), 22 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 22d3c6f87f..5a698f93ae 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -202,7 +202,7 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { ipsMgr.setMap[setName] = NewIpset(setName) - metrics.Inc(metrics.NumIPSets) + metrics.NumIPSets.Inc() timer.StopAndRecord(metrics.AddIPSetExecTime) return nil @@ -231,7 +231,8 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { delete(ipsMgr.setMap, setName) - metrics.Dec(metrics.NumIPSets) + metrics.NumIPSets.Dec() + // TODO set count metric with setName label to 0 return nil } @@ -277,6 +278,8 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error { // Stores the podUid as the context for this ip. ipsMgr.setMap[setName].elements[ip] = podUid + // TODO increment set count metric with setName label + return nil } @@ -318,6 +321,8 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error { // Now cleanup the cache delete(ipsMgr.setMap[setName].elements, ip) + // TODO decrement set count metric with setName label + if len(ipsMgr.setMap[setName].elements) == 0 { ipsMgr.DeleteSet(setName) } diff --git a/npm/iptm/iptm.go b/npm/iptm/iptm.go index d09cf474f2..d5b8a0eff7 100644 --- a/npm/iptm/iptm.go +++ b/npm/iptm/iptm.go @@ -313,7 +313,7 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error { return err } - metrics.Inc(metrics.NumIPTableRules) + metrics.NumIPTableRules.Inc() timer.StopAndRecord(metrics.AddIPTableRuleExecTime) return nil @@ -338,7 +338,7 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error { return err } - metrics.Dec(metrics.NumIPTableRules) + metrics.NumIPTableRules.Dec() return nil } diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 5292dc2602..6b996c4771 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -62,21 +62,6 @@ func init() { } } -// Observe records a value in the given summary -func Observe(summary prometheus.Summary, value float64) { - summary.Observe(value) -} - -// Inc increases a gauge by 1 -func Inc(gauge prometheus.Gauge) { - gauge.Inc() -} - -// Dec decreases a gauge by 1 -func Dec(gauge prometheus.Gauge) { - gauge.Dec() -} - // GetMetricName is for validation purposes. It returns the name representation of any metric registered in this file. // Returns an empty string if the metric is not declared and exported in this file. func GetMetricName(collector prometheus.Collector) string { diff --git a/npm/metrics/timer.go b/npm/metrics/timer.go index bda78214ef..b88e3b01f9 100644 --- a/npm/metrics/timer.go +++ b/npm/metrics/timer.go @@ -19,7 +19,7 @@ func StartNewTimer() *Timer { // StopAndRecord ends a timer and records its delta in a summary func (timer *Timer) StopAndRecord(observer prometheus.Summary) { - Observe(observer, timer.timeElapsed()) + observer.Observe(timer.timeElapsed()) } func (timer *Timer) stop() { diff --git a/npm/nwpolicy.go b/npm/nwpolicy.go index 48476171bb..fa6dfb1ff4 100644 --- a/npm/nwpolicy.go +++ b/npm/nwpolicy.go @@ -129,7 +129,7 @@ func (npMgr *NetworkPolicyManager) AddNetworkPolicy(npObj *networkingv1.NetworkP } } - metrics.Inc(metrics.NumPolicies) + metrics.NumPolicies.Inc() timer.StopAndRecord(metrics.AddPolicyExecTime) return nil @@ -201,7 +201,7 @@ func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(npObj *networkingv1.Netwo } } - metrics.Dec(metrics.NumPolicies) + metrics.NumPolicies.Dec() return nil } From 2f456fe96258d5d67d3d8e125dd0205f56c06d7b Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 10 Jul 2020 17:21:23 -0400 Subject: [PATCH 43/53] removed code used when testing was done through http server. Moved registering to metric creation functions --- npm/metrics/prometheus-metrics.go | 30 ++++++------------------------ npm/metrics/promutil/util.go | 2 -- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 6b996c4771..0a69d05994 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -24,18 +24,8 @@ const ( addIPSetExecTimeLabel = "add_ipset_exec_time" ) -// include any metric in this map -var allMetricNames = map[prometheus.Collector]string{ - NumPolicies: numPoliciesLabel, - AddPolicyExecTime: addPolicyExecTimeLabel, - NumIPTableRules: numIPTableRules, - AddIPTableRuleExecTime: addIPTableRuleExecTimeLabel, - NumIPSets: numIPSetsLabel, - AddIPSetExecTime: addIPSetExecTimeLabel, -} - func createGauge(name string, helpMessage string) prometheus.Gauge { - return prometheus.NewGauge( + gauge := prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: namespace, Name: name, @@ -43,10 +33,12 @@ func createGauge(name string, helpMessage string) prometheus.Gauge { }, //[]string{"node"}, // include labels in a slice like this if creating Vectors ) + prometheus.DefaultRegisterer.MustRegister(gauge) + return gauge } func createSummary(name string, helpMessage string) prometheus.Summary { - return prometheus.NewSummary( + summary := prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, Name: name, @@ -54,16 +46,6 @@ func createSummary(name string, helpMessage string) prometheus.Summary { Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, //quantiles }, ) -} - -func init() { - for metric := range allMetricNames { - prometheus.DefaultRegisterer.MustRegister(metric) - } -} - -// GetMetricName is for validation purposes. It returns the name representation of any metric registered in this file. -// Returns an empty string if the metric is not declared and exported in this file. -func GetMetricName(collector prometheus.Collector) string { - return allMetricNames[collector] + prometheus.DefaultRegisterer.MustRegister(summary) + return summary } diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index def553f880..f3bbafde00 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -7,8 +7,6 @@ import ( dto "github.com/prometheus/client_model/go" ) -const delayAfterHTTPStart = 10 - // NotifyIfErrors writes any non-nil errors to a testing utility func NotifyIfErrors(t *testing.T, errors ...error) { allGood := true From 29f3e906e0bb66586625102b7dca419a135d19cb Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 10 Jul 2020 17:46:14 -0400 Subject: [PATCH 44/53] added createGaugeVec, updated comments, made all help strings constants --- npm/metrics/prometheus-metrics.go | 54 +++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 0a69d05994..936f1b990f 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -6,22 +6,37 @@ import ( const namespace = "npm" +// Gauge metrics have methods Inc(), Dec(), and Set(float64) +// Summary metrics has the method Observe(float64) +// For any Vector metric, you can call WithLabelValues(...string) before the above methods e.g. SomeGaugeVec.WithLabelValues("label1", "label2").Dec() + var ( - NumPolicies = createGauge(numPoliciesLabel, "The number of current network policies for this node") - AddPolicyExecTime = createSummary(addPolicyExecTimeLabel, "Execution time in milliseconds for adding a network policy") - NumIPTableRules = createGauge(numIPTableRules, "The number of current IPTable rules for this node") - AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, "Execution time in milliseconds for adding an IPTable rule to a chain") - NumIPSets = createGauge(numIPSetsLabel, "The number of current IP sets for this node") - AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, "Execution time in milliseconds for creating an IP set") + NumPolicies = createGauge(numPoliciesLabel, numPoliciesHelp) + AddPolicyExecTime = createSummary(addPolicyExecTimeLabel, addPolicyExecTimeHelp) + NumIPTableRules = createGauge(numIPTableRulesLabel, numIPTableRulesHelp) + AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, addIPTableRuleExecTimeHelp) + NumIPSets = createGauge(numIPSetsLabel, numIPSetsHelp) + AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, addIPSetExecTimeHelp) ) const ( - numPoliciesLabel = "num_policies" - addPolicyExecTimeLabel = "add_policy_exec_time" - numIPTableRules = "num_iptables_rules" + numPoliciesLabel = "num_policies" + numPoliciesHelp = "The number of current network policies for this node" + + addPolicyExecTimeLabel = "add_policy_exec_time" + addPolicyExecTimeHelp = "Execution time in milliseconds for adding a network policy" + + numIPTableRulesLabel = "num_iptables_rules" + numIPTableRulesHelp = "The number of current IPTable rules for this node" + addIPTableRuleExecTimeLabel = "add_iptables_rule_exec_time" - numIPSetsLabel = "num_ipsets" - addIPSetExecTimeLabel = "add_ipset_exec_time" + addIPTableRuleExecTimeHelp = "Execution time in milliseconds for adding an IPTable rule to a chain" + + numIPSetsLabel = "num_ipsets" + numIPSetsHelp = "The number of current IP sets for this node" + + addIPSetExecTimeLabel = "add_ipset_exec_time" + addIPSetExecTimeHelp = "Execution time in milliseconds for creating an IP set" ) func createGauge(name string, helpMessage string) prometheus.Gauge { @@ -31,19 +46,32 @@ func createGauge(name string, helpMessage string) prometheus.Gauge { Name: name, Help: helpMessage, }, - //[]string{"node"}, // include labels in a slice like this if creating Vectors ) prometheus.DefaultRegisterer.MustRegister(gauge) return gauge } +func createGaugeVec(name string, helpMessage string, labels ...string) *prometheus.GaugeVec { + gaugeVec := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: name, + Help: helpMessage, + }, + labels, + ) + prometheus.DefaultRegisterer.MustRegister(gaugeVec) + return gaugeVec +} + func createSummary(name string, helpMessage string) prometheus.Summary { summary := prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, Name: name, Help: helpMessage, - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, //quantiles + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + // quantiles e.g. the "0.5 quantile" will actually be the phi quantile for some phi in [0.5 - 0.05, 0.5 + 0.05] }, ) prometheus.DefaultRegisterer.MustRegister(summary) From 56963ad1caa5144530d6cb994c7231fcd9319e55 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 10 Jul 2020 17:53:23 -0400 Subject: [PATCH 45/53] added metric that counts number of entries in each ipset. still need to add tests --- npm/ipsm/ipsm.go | 6 +++--- npm/metrics/prometheus-metrics.go | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 5a698f93ae..6e5701d1fc 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -232,7 +232,7 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { delete(ipsMgr.setMap, setName) metrics.NumIPSets.Dec() - // TODO set count metric with setName label to 0 + metrics.IPSetInventory.WithLabelValues(setName).Set(0) return nil } @@ -278,7 +278,7 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error { // Stores the podUid as the context for this ip. ipsMgr.setMap[setName].elements[ip] = podUid - // TODO increment set count metric with setName label + metrics.IPSetInventory.WithLabelValues(setName).Inc() return nil } @@ -321,7 +321,7 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error { // Now cleanup the cache delete(ipsMgr.setMap[setName].elements, ip) - // TODO decrement set count metric with setName label + metrics.IPSetInventory.WithLabelValues(setName).Dec() if len(ipsMgr.setMap[setName].elements) == 0 { ipsMgr.DeleteSet(setName) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 936f1b990f..ee2c1cc6bf 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -17,6 +17,7 @@ var ( AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, addIPTableRuleExecTimeHelp) NumIPSets = createGauge(numIPSetsLabel, numIPSetsHelp) AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, addIPSetExecTimeHelp) + IPSetInventory = createGaugeVec(ipsetInventoryLabel, ipsetInventoryHelp) ) const ( @@ -37,6 +38,9 @@ const ( addIPSetExecTimeLabel = "add_ipset_exec_time" addIPSetExecTimeHelp = "Execution time in milliseconds for creating an IP set" + + ipsetCountsLabel = "ipset_counts" + ipsetCountsHelp = "Number of entries in each individual IPSet" ) func createGauge(name string, helpMessage string) prometheus.Gauge { From df2be75d69d7cc224c5127222a81483393ea553f Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 10 Jul 2020 18:50:24 -0400 Subject: [PATCH 46/53] fixed creation of GaugeVecs, and use explicit labeling instead of order-based labeling now --- npm/ipsm/ipsm.go | 7 +++-- npm/metrics/prometheus-metrics.go | 51 ++++++++++++++++--------------- npm/metrics/promutil/util.go | 13 +++++--- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 6e5701d1fc..400f358233 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -11,6 +11,7 @@ import ( "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" + "github.com/prometheus/client_golang/prometheus" ) type ipsEntry struct { @@ -232,7 +233,7 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { delete(ipsMgr.setMap, setName) metrics.NumIPSets.Dec() - metrics.IPSetInventory.WithLabelValues(setName).Set(0) + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Set(0) return nil } @@ -278,7 +279,7 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error { // Stores the podUid as the context for this ip. ipsMgr.setMap[setName].elements[ip] = podUid - metrics.IPSetInventory.WithLabelValues(setName).Inc() + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Inc() return nil } @@ -321,7 +322,7 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error { // Now cleanup the cache delete(ipsMgr.setMap[setName].elements, ip) - metrics.IPSetInventory.WithLabelValues(setName).Dec() + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Dec() if len(ipsMgr.setMap[setName].elements) == 0 { ipsMgr.DeleteSet(setName) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index ee2c1cc6bf..8ff3c99823 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -6,41 +6,44 @@ import ( const namespace = "npm" -// Gauge metrics have methods Inc(), Dec(), and Set(float64) +// Prometheus Metrics +// Gauge metrics have the methods Inc(), Dec(), and Set(float64) // Summary metrics has the method Observe(float64) -// For any Vector metric, you can call WithLabelValues(...string) before the above methods e.g. SomeGaugeVec.WithLabelValues("label1", "label2").Dec() - +// For any Vector metric, you can call With(prometheus.Labels) before the above methods +// e.g. SomeGaugeVec.With(prometheus.Labels{label1: val1, label2: val2, ...).Dec() var ( - NumPolicies = createGauge(numPoliciesLabel, numPoliciesHelp) - AddPolicyExecTime = createSummary(addPolicyExecTimeLabel, addPolicyExecTimeHelp) - NumIPTableRules = createGauge(numIPTableRulesLabel, numIPTableRulesHelp) - AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeLabel, addIPTableRuleExecTimeHelp) - NumIPSets = createGauge(numIPSetsLabel, numIPSetsHelp) - AddIPSetExecTime = createSummary(addIPSetExecTimeLabel, addIPSetExecTimeHelp) - IPSetInventory = createGaugeVec(ipsetInventoryLabel, ipsetInventoryHelp) + NumPolicies = createGauge(numPoliciesName, numPoliciesHelp) + AddPolicyExecTime = createSummary(addPolicyExecTimeName, addPolicyExecTimeHelp) + NumIPTableRules = createGauge(numIPTableRulesName, numIPTableRulesHelp) + AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeName, addIPTableRuleExecTimeHelp) + NumIPSets = createGauge(numIPSetsName, numIPSetsHelp) + AddIPSetExecTime = createSummary(addIPSetExecTimeName, addIPSetExecTimeHelp) + IPSetInventory = createGaugeVec(ipsetInventoryName, ipsetInventoryHelp, SetNameLabel) ) +// Constants for metric names and descriptions as well as exported labels for Vector metrics const ( - numPoliciesLabel = "num_policies" - numPoliciesHelp = "The number of current network policies for this node" + numPoliciesName = "num_policies" + numPoliciesHelp = "The number of current network policies for this node" - addPolicyExecTimeLabel = "add_policy_exec_time" - addPolicyExecTimeHelp = "Execution time in milliseconds for adding a network policy" + addPolicyExecTimeName = "add_policy_exec_time" + addPolicyExecTimeHelp = "Execution time in milliseconds for adding a network policy" - numIPTableRulesLabel = "num_iptables_rules" - numIPTableRulesHelp = "The number of current IPTable rules for this node" + numIPTableRulesName = "num_iptables_rules" + numIPTableRulesHelp = "The number of current IPTable rules for this node" - addIPTableRuleExecTimeLabel = "add_iptables_rule_exec_time" - addIPTableRuleExecTimeHelp = "Execution time in milliseconds for adding an IPTable rule to a chain" + addIPTableRuleExecTimeName = "add_iptables_rule_exec_time" + addIPTableRuleExecTimeHelp = "Execution time in milliseconds for adding an IPTable rule to a chain" - numIPSetsLabel = "num_ipsets" - numIPSetsHelp = "The number of current IP sets for this node" + numIPSetsName = "num_ipsets" + numIPSetsHelp = "The number of current IP sets for this node" - addIPSetExecTimeLabel = "add_ipset_exec_time" - addIPSetExecTimeHelp = "Execution time in milliseconds for creating an IP set" + addIPSetExecTimeName = "add_ipset_exec_time" + addIPSetExecTimeHelp = "Execution time in milliseconds for creating an IP set" - ipsetCountsLabel = "ipset_counts" - ipsetCountsHelp = "Number of entries in each individual IPSet" + ipsetInventoryName = "ipset_counts" + ipsetInventoryHelp = "Number of entries in each individual IPSet" + SetNameLabel = "set_name" ) func createGauge(name string, helpMessage string) prometheus.Gauge { diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index f3bbafde00..d1780eddaa 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -26,8 +26,8 @@ func NotifyIfErrors(t *testing.T, errors ...error) { } } -// GetValue is used for validation. It returns a gaugeMetric's value. -func GetValue(gaugeMetric prometheus.Collector) (int, error) { +// GetValue is used for validation. It returns a Gauge metric's value. +func GetValue(gaugeMetric prometheus.Gauge) (int, error) { dtoMetric, err := getDTOMetric(gaugeMetric) if err != nil { return 0, err @@ -35,8 +35,13 @@ func GetValue(gaugeMetric prometheus.Collector) (int, error) { return int(dtoMetric.Gauge.GetValue()), nil } -// GetCountValue is used for validation. It returns the number of times a summaryMetric has recorded an observation. -func GetCountValue(summaryMetric prometheus.Collector) (int, error) { +// GetVecValue is used for validation. It returns a Gauge Vec metric's value. +func GetVecValue(gaugeVecMetric prometheus.GaugeVec, labels prometheus.Labels) (int, error) { + return GetValue(gaugeVecMetric.With(labels)) +} + +// GetCountValue is used for validation. It returns the number of times a Summary metric has recorded an observation. +func GetCountValue(summaryMetric prometheus.Summary) (int, error) { dtoMetric, err := getDTOMetric(summaryMetric) if err != nil { return 0, err From 9b3a26d365ffb717395e70a44361f90b8e8cd544 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Mon, 13 Jul 2020 10:25:12 -0400 Subject: [PATCH 47/53] updated GetVecValue method signature --- npm/metrics/promutil/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/util.go index d1780eddaa..61feb2eec8 100644 --- a/npm/metrics/promutil/util.go +++ b/npm/metrics/promutil/util.go @@ -36,7 +36,7 @@ func GetValue(gaugeMetric prometheus.Gauge) (int, error) { } // GetVecValue is used for validation. It returns a Gauge Vec metric's value. -func GetVecValue(gaugeVecMetric prometheus.GaugeVec, labels prometheus.Labels) (int, error) { +func GetVecValue(gaugeVecMetric *prometheus.GaugeVec, labels prometheus.Labels) (int, error) { return GetValue(gaugeVecMetric.With(labels)) } From 1dea15fabf5ce9a7860b56fe7f0b546e7b592169 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Mon, 13 Jul 2020 10:27:45 -0400 Subject: [PATCH 48/53] added set to metrics on creation and wrote unit tests for CreateSet, AddToSet, DeleteFromSet, DeleteSet --- npm/ipsm/ipsm.go | 1 + npm/ipsm/ipsm_test.go | 55 ++++++++++++++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 400f358233..54a2f74902 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -205,6 +205,7 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { metrics.NumIPSets.Inc() timer.StopAndRecord(metrics.AddIPSetExecTime) + metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Set(0) return nil } diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 43a1c8cf5a..1f4bbd3908 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -9,6 +9,7 @@ import ( "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/metrics/promutil" "github.com/Azure/azure-container-networking/npm/util" + "github.com/prometheus/client_golang/prometheus" ) func TestSave(t *testing.T) { @@ -132,23 +133,30 @@ func TestCreateSet(t *testing.T) { gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets) countVal, err2 := promutil.GetCountValue(metrics.AddIPSetExecTime) - if err := ipsMgr.CreateSet("test-set", []string{util.IpsetNetHashFlag}); err != nil { + testSet1Name := "test-set" + if err := ipsMgr.CreateSet(testSet1Name, []string{util.IpsetNetHashFlag}); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet") } + testSet2Name := "test-set-with-maxelem" spec := append([]string{util.IpsetNetHashFlag, util.IpsetMaxelemName, util.IpsetMaxelemNum}) - if err := ipsMgr.CreateSet("test-set-with-maxelem", spec); err != nil { + if err := ipsMgr.CreateSet(testSet2Name, spec); err != nil { t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet when set maxelem") } newGaugeVal, err3 := promutil.GetValue(metrics.NumIPSets) newCountVal, err4 := promutil.GetCountValue(metrics.AddIPSetExecTime) - promutil.NotifyIfErrors(t, err1, err2, err3, err4) + testSet1Count, err5 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSet1Name}) + testSet2Count, err6 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSet2Name}) + promutil.NotifyIfErrors(t, err1, err2, err3, err4, err5, err6) if newGaugeVal != gaugeVal+2 { - t.Errorf("Change in ipset number didn't register in prometheus") + t.Errorf("Change in ipset number didn't register in Prometheus") } if newCountVal != countVal+2 { - t.Errorf("Execution time didn't register in prometheus") + t.Errorf("Execution time didn't register in Prometheus") + } + if testSet1Count != 0 || testSet2Count != 0 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") } } @@ -164,21 +172,26 @@ func TestDeleteSet(t *testing.T) { } }() - if err := ipsMgr.CreateSet("test-set", append([]string{util.IpsetNetHashFlag})); err != nil { + testSetName := "test-set" + if err := ipsMgr.CreateSet(testSetName, append([]string{util.IpsetNetHashFlag})); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.CreateSet") } gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets) - if err := ipsMgr.DeleteSet("test-set"); err != nil { + if err := ipsMgr.DeleteSet(testSetName); err != nil { t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet") } newGaugeVal, err2 := promutil.GetValue(metrics.NumIPSets) - promutil.NotifyIfErrors(t, err1, err2) + testSetCount, err3 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName}) + promutil.NotifyIfErrors(t, err1, err2, err3) if newGaugeVal != gaugeVal-1 { t.Errorf("Change in ipset number didn't register in prometheus") } + if testSetCount != 0 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") + } } func TestAddToSet(t *testing.T) { @@ -193,13 +206,20 @@ func TestAddToSet(t *testing.T) { } }() - if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { + testSetName := "test-set" + if err := ipsMgr.AddToSet(testSetName, "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { t.Errorf("TestAddToSet failed @ ipsMgr.AddToSet") } - if err := ipsMgr.AddToSet("test-set", "1.2.3.4/nomatch", util.IpsetNetHashFlag, ""); err != nil { + if err := ipsMgr.AddToSet(testSetName, "1.2.3.4/nomatch", util.IpsetNetHashFlag, ""); err != nil { t.Errorf("TestAddToSet with nomatch failed @ ipsMgr.AddToSet") } + + testSetCount, err1 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName}) + promutil.NotifyIfErrors(t, err1) + if testSetCount != 2 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") + } } func TestAddToSetWithCachePodInfo(t *testing.T) { @@ -254,22 +274,29 @@ func TestDeleteFromSet(t *testing.T) { } }() - if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { + testSetName := "test-set" + if err := ipsMgr.AddToSet(testSetName, "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet") } - if len(ipsMgr.setMap["test-set"].elements) != 1 { + if len(ipsMgr.setMap[testSetName].elements) != 1 { t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet") } - if err := ipsMgr.DeleteFromSet("test-set", "1.2.3.4", ""); err != nil { + if err := ipsMgr.DeleteFromSet(testSetName, "1.2.3.4", ""); err != nil { t.Errorf("TestDeleteFromSet failed @ ipsMgr.DeleteFromSet") } // After deleting the only entry, "1.2.3.4" from "test-set", "test-set" ipset won't exist - if _, exists := ipsMgr.setMap["test-set"]; exists { + if _, exists := ipsMgr.setMap[testSetName]; exists { t.Errorf("TestDeleteFromSet failed @ ipsMgr.DeleteFromSet") } + + testSetCount, err1 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName}) + promutil.NotifyIfErrors(t, err1) + if testSetCount != 0 { + t.Errorf("Prometheus IPSet count has incorrect number of entries") + } } func TestDeleteFromSetWithPodCache(t *testing.T) { From b4021786412961b6fada0c40db33ca07cacbb104 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Mon, 13 Jul 2020 19:38:15 -0400 Subject: [PATCH 49/53] use custom registry to limit content that Container Insights scrapes. Also log the start of http server --- npm/metrics/http.go | 4 +++- npm/metrics/prometheus-metrics.go | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/npm/metrics/http.go b/npm/metrics/http.go index 0a99a44d71..6954a25fb1 100644 --- a/npm/metrics/http.go +++ b/npm/metrics/http.go @@ -4,6 +4,7 @@ import ( "net/http" "time" + "github.com/Azure/azure-container-networking/log" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -28,6 +29,7 @@ func StartHTTP(delayAmountAfterStart int) { started = true http.Handle(MetricsPath, getHandler()) + log.Logf("Starting Prometheus HTTP Server") go http.ListenAndServe(HTTPPort, nil) time.Sleep(time.Second * time.Duration(delayAmountAfterStart)) } @@ -35,7 +37,7 @@ func StartHTTP(delayAmountAfterStart int) { // getHandler returns the HTTP handler for the metrics endpoint func getHandler() http.Handler { if handler == nil { - handler = promhttp.Handler() + handler = promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) } return handler } diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 8ff3c99823..0a1b30dacc 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -46,6 +46,12 @@ const ( SetNameLabel = "set_name" ) +var registry = prometheus.NewRegistry() + +func register(collector prometheus.Collector) { + registry.MustRegister(collector) +} + func createGauge(name string, helpMessage string) prometheus.Gauge { gauge := prometheus.NewGauge( prometheus.GaugeOpts{ @@ -54,7 +60,7 @@ func createGauge(name string, helpMessage string) prometheus.Gauge { Help: helpMessage, }, ) - prometheus.DefaultRegisterer.MustRegister(gauge) + register(gauge) return gauge } @@ -67,7 +73,7 @@ func createGaugeVec(name string, helpMessage string, labels ...string) *promethe }, labels, ) - prometheus.DefaultRegisterer.MustRegister(gaugeVec) + register(gaugeVec) return gaugeVec } @@ -81,6 +87,6 @@ func createSummary(name string, helpMessage string) prometheus.Summary { // quantiles e.g. the "0.5 quantile" will actually be the phi quantile for some phi in [0.5 - 0.05, 0.5 + 0.05] }, ) - prometheus.DefaultRegisterer.MustRegister(summary) + register(summary) return summary } From 6e28082834a802cdb060c026bbfe181c99ade72e Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Mon, 13 Jul 2020 19:38:52 -0400 Subject: [PATCH 50/53] wrote TODO item comments for Restore and Destroy (currently these functions are only used in testing) --- npm/ipsm/ipsm.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 54a2f74902..6ef9316207 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -375,6 +375,8 @@ func (ipsMgr *IpsetManager) Destroy() error { return err } + //TODO set metrics.IPSetInventory to 0 for all set names + return nil } @@ -439,5 +441,7 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error { } cmd.Wait() + //TODO based on the set name and number of entries in the config file, update metrics.IPSetInventory + return nil } From 1f614ab1b38b5e2ae994cc5b5c607fd8709ed8d4 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Mon, 13 Jul 2020 19:53:54 -0400 Subject: [PATCH 51/53] NPM won't crash if a Prometheus metric fails to register now (unlikely). Added logging for metric registration/creation, and explicit public function to initialize metrics so that we can finish log config first --- npm/metrics/prometheus-metrics.go | 44 ++++++++++++++++++++++--------- npm/plugin/main.go | 2 ++ 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/npm/metrics/prometheus-metrics.go b/npm/metrics/prometheus-metrics.go index 0a1b30dacc..312962b51c 100644 --- a/npm/metrics/prometheus-metrics.go +++ b/npm/metrics/prometheus-metrics.go @@ -1,6 +1,7 @@ package metrics import ( + "github.com/Azure/azure-container-networking/log" "github.com/prometheus/client_golang/prometheus" ) @@ -12,13 +13,13 @@ const namespace = "npm" // For any Vector metric, you can call With(prometheus.Labels) before the above methods // e.g. SomeGaugeVec.With(prometheus.Labels{label1: val1, label2: val2, ...).Dec() var ( - NumPolicies = createGauge(numPoliciesName, numPoliciesHelp) - AddPolicyExecTime = createSummary(addPolicyExecTimeName, addPolicyExecTimeHelp) - NumIPTableRules = createGauge(numIPTableRulesName, numIPTableRulesHelp) - AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeName, addIPTableRuleExecTimeHelp) - NumIPSets = createGauge(numIPSetsName, numIPSetsHelp) - AddIPSetExecTime = createSummary(addIPSetExecTimeName, addIPSetExecTimeHelp) - IPSetInventory = createGaugeVec(ipsetInventoryName, ipsetInventoryHelp, SetNameLabel) + NumPolicies prometheus.Gauge + AddPolicyExecTime prometheus.Summary + NumIPTableRules prometheus.Gauge + AddIPTableRuleExecTime prometheus.Summary + NumIPSets prometheus.Gauge + AddIPSetExecTime prometheus.Summary + IPSetInventory *prometheus.GaugeVec ) // Constants for metric names and descriptions as well as exported labels for Vector metrics @@ -47,9 +48,28 @@ const ( ) var registry = prometheus.NewRegistry() +var haveInitialized = false -func register(collector prometheus.Collector) { - registry.MustRegister(collector) +// InitializeAll creates all the Prometheus Metrics. The metrics will be nil before this method is called. +func InitializeAll() { + if !haveInitialized { + NumPolicies = createGauge(numPoliciesName, numPoliciesHelp) + AddPolicyExecTime = createSummary(addPolicyExecTimeName, addPolicyExecTimeHelp) + NumIPTableRules = createGauge(numIPTableRulesName, numIPTableRulesHelp) + AddIPTableRuleExecTime = createSummary(addIPTableRuleExecTimeName, addIPTableRuleExecTimeHelp) + NumIPSets = createGauge(numIPSetsName, numIPSetsHelp) + AddIPSetExecTime = createSummary(addIPSetExecTimeName, addIPSetExecTimeHelp) + IPSetInventory = createGaugeVec(ipsetInventoryName, ipsetInventoryHelp, SetNameLabel) + log.Logf("Finished initializing all Prometheus metrics") + haveInitialized = true + } +} + +func register(collector prometheus.Collector, name string) { + err := registry.Register(collector) + if err != nil { + log.Errorf("Error creating metric %s", name) + } } func createGauge(name string, helpMessage string) prometheus.Gauge { @@ -60,7 +80,7 @@ func createGauge(name string, helpMessage string) prometheus.Gauge { Help: helpMessage, }, ) - register(gauge) + register(gauge, name) return gauge } @@ -73,7 +93,7 @@ func createGaugeVec(name string, helpMessage string, labels ...string) *promethe }, labels, ) - register(gaugeVec) + register(gaugeVec, name) return gaugeVec } @@ -87,6 +107,6 @@ func createSummary(name string, helpMessage string) prometheus.Summary { // quantiles e.g. the "0.5 quantile" will actually be the phi quantile for some phi in [0.5 - 0.05, 0.5 + 0.05] }, ) - register(summary) + register(summary, name) return summary } diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 08130d7baf..468ce77b65 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -44,6 +44,8 @@ func main() { panic(err.Error()) } + metrics.InitializeAll() + // Creates the in-cluster config config, err := rest.InClusterConfig() if err != nil { From e04ede1de029f4c1c619af32730e0e926a0b9533 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Mon, 13 Jul 2020 20:52:12 -0400 Subject: [PATCH 52/53] initialize metrics in unit tests --- npm/ipsm/ipsm_test.go | 1 + npm/iptm/iptm_test.go | 1 + npm/namespace_test.go | 2 ++ 3 files changed, 4 insertions(+) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 1f4bbd3908..df48a57fb2 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -423,6 +423,7 @@ func TestRun(t *testing.T) { } func TestMain(m *testing.M) { + metrics.InitializeAll() ipsMgr := NewIpsetManager() ipsMgr.Save(util.IpsetConfigFile) diff --git a/npm/iptm/iptm_test.go b/npm/iptm/iptm_test.go index 54172efb6e..dcb0c4f046 100644 --- a/npm/iptm/iptm_test.go +++ b/npm/iptm/iptm_test.go @@ -226,6 +226,7 @@ func TestRun(t *testing.T) { } func TestMain(m *testing.M) { + metrics.InitializeAll() iptMgr := NewIptablesManager() iptMgr.Save(util.IptablesConfigFile) diff --git a/npm/namespace_test.go b/npm/namespace_test.go index 5d0922f186..4b947f7768 100644 --- a/npm/namespace_test.go +++ b/npm/namespace_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/util" @@ -178,6 +179,7 @@ func TestDeleteNamespace(t *testing.T) { } func TestMain(m *testing.M) { + metrics.InitializeAll() iptMgr := iptm.NewIptablesManager() iptMgr.Save(util.IptablesConfigFile) From 43e116196ccc3efaa7a172e08cb8da211e17ac93 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 14 Jul 2020 18:40:59 -0400 Subject: [PATCH 53/53] renamed util.go to test-util.go --- npm/metrics/promutil/{util.go => test-util.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename npm/metrics/promutil/{util.go => test-util.go} (100%) diff --git a/npm/metrics/promutil/util.go b/npm/metrics/promutil/test-util.go similarity index 100% rename from npm/metrics/promutil/util.go rename to npm/metrics/promutil/test-util.go