Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
ae95acc
prometheus additions to testmain (commented out right now)
Jun 16, 2020
7505699
home of the npm prometheus metrics and tools for updating them, testi…
Jun 24, 2020
e57526e
add/remove policy metrics
Jun 24, 2020
c9e3529
add/remove iptables rule metric measurements
Jun 24, 2020
aa8fc12
add/remove ipset metric measurements
Jun 24, 2020
44e500b
testing for gauges. want to soon remove the boolean for including pro…
Jun 24, 2020
ccd3762
run http server that exposes prometheus from main
Jun 24, 2020
8ed5dd8
cleaner test additions with less code
Jun 24, 2020
ef5f168
removed incorrect instance of AddSet in the TestDeleteSet test
Jun 24, 2020
8decd79
added prometheus annotations to pod templates
Jun 24, 2020
73ee92c
merged prometheus changes with pull from master
Jun 25, 2020
1d9e319
deleted unused file
Jun 25, 2020
19c3e93
much more organized initialization of metrics now. now includes map f…
Jun 25, 2020
75d5772
add ability to get summary count value. now getting gauge values and …
Jun 25, 2020
3a081b1
condenses prometheus testing code base by condensing all prometheus e…
Jun 25, 2020
d41fe10
added testing for summary counts, condensed prometheus error handling…
Jun 25, 2020
9fb3bfb
update based on variable spelling change in metrics package
Jun 25, 2020
67675df
Added comments for functions and moved http handler code to the http …
Jun 25, 2020
605567a
fixed problem of registering same metric name for different metrics, …
Jun 25, 2020
b0d1f94
made prometheus testing folder with interactive testing file. moved o…
Jun 25, 2020
680cc88
moved testing around again
Jun 25, 2020
c2cbc61
fixed spelling mistake
Jun 25, 2020
59c56a7
counting mistake in unit test
Jun 25, 2020
00a1950
handler variable ws in wrong file. Changed stdout printing to logging
Jun 25, 2020
6bb6bb0
fixed parameter errors and counting error in a test
Jun 25, 2020
142c8f5
moved utilities for testing prometheus metrics to npm/util. Updated S…
Jun 26, 2020
94a7930
updated uses of StartHTTP to have the extra parameter
Jun 26, 2020
7edc4cd
updated GetValue and GetCountValue uses to use the prometheus feature…
Jun 26, 2020
8f0ece1
removed unnecessary comments, removed print statement, and added quan…
Jun 26, 2020
36b574e
fixed problem of double registering metrics
Jun 26, 2020
185efca
wait longer for http server to start
Jun 29, 2020
85949ba
moved tool in test-util.go to promutil/util.go
Jun 30, 2020
9c074df
fixed timer to be in milliseconds and updated metric descriptions to …
Jun 30, 2020
f3bf2b6
removed unnecessary comments
Jun 30, 2020
0fbf146
http server always started in a go routine now. Added comment justify…
Jun 30, 2020
f17d70e
debugging http connection refused in pipeline
Jun 30, 2020
7c337fe
fixed syntax error
Jun 30, 2020
46be509
removed debugging wrapper around http service
Jun 30, 2020
0a51a18
sleep so that the testing metrics endpoint can be pinged
Jun 30, 2020
e032f9b
redesigned GetValue and GetCountValue so that they don't use http calls
Jun 30, 2020
f7c17ba
removed random but helpful testing file - will write about quick test…
Jul 2, 2020
4797697
milliseconds were being truncated. now they have decimals
Jul 10, 2020
a4623bd
merged in shufang's pr'
Jul 10, 2020
7a5eb6e
use direct Prometheus metric commands instead of wrapping them
Jul 10, 2020
2f456fe
removed code used when testing was done through http server. Moved re…
Jul 10, 2020
29f3e90
added createGaugeVec, updated comments, made all help strings constants
Jul 10, 2020
56963ad
added metric that counts number of entries in each ipset. still need …
Jul 10, 2020
df2be75
fixed creation of GaugeVecs, and use explicit labeling instead of ord…
Jul 10, 2020
9b3a26d
updated GetVecValue method signature
Jul 13, 2020
1dea15f
added set to metrics on creation and wrote unit tests for CreateSet, …
Jul 13, 2020
b402178
use custom registry to limit content that Container Insights scrapes.…
Jul 13, 2020
6e28082
wrote TODO item comments for Restore and Destroy (currently these fun…
Jul 13, 2020
1f614ab
NPM won't crash if a Prometheus metric fails to register now (unlikel…
Jul 13, 2020
e04ede1
initialize metrics in unit tests
Jul 14, 2020
122a838
Merge branch 'master' of https://github.com/Azure/azure-container-net…
Jul 14, 2020
43e1161
renamed util.go to test-util.go
Jul 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion npm/azure-npm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ roleRef:
name: azure-npm
apiGroup: rbac.authorization.k8s.io
---
apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: azure-npm
Expand All @@ -67,6 +67,8 @@ spec:
k8s-app: azure-npm
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
spec:
priorityClassName: system-node-critical
tolerations:
Expand Down
19 changes: 19 additions & 0 deletions npm/ipsm/ipsm.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (
"syscall"

"github.com/Azure/azure-container-networking/log"
"github.com/Azure/azure-container-networking/npm/metrics"
"github.com/Azure/azure-container-networking/npm/util"
"github.com/prometheus/client_golang/prometheus"
)

type ipsEntry struct {
Expand Down Expand Up @@ -180,6 +182,8 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro

// CreateSet creates an ipset.
func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error {
timer := metrics.StartNewTimer()

if _, exists := ipsMgr.setMap[setName]; exists {
return nil
}
Expand All @@ -199,6 +203,10 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error {

ipsMgr.setMap[setName] = NewIpset(setName)

metrics.NumIPSets.Inc()
timer.StopAndRecord(metrics.AddIPSetExecTime)
metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Set(0)

return nil
}

Expand All @@ -225,6 +233,9 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error {

delete(ipsMgr.setMap, setName)

metrics.NumIPSets.Dec()
metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Set(0)

return nil
}

Expand Down Expand Up @@ -269,6 +280,8 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error {
// Stores the podUid as the context for this ip.
ipsMgr.setMap[setName].elements[ip] = podUid

metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Inc()

return nil
}

Expand Down Expand Up @@ -310,6 +323,8 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error {
// Now cleanup the cache
delete(ipsMgr.setMap[setName].elements, ip)

metrics.IPSetInventory.With(prometheus.Labels{metrics.SetNameLabel: setName}).Dec()

if len(ipsMgr.setMap[setName].elements) == 0 {
ipsMgr.DeleteSet(setName)
}
Expand Down Expand Up @@ -360,6 +375,8 @@ func (ipsMgr *IpsetManager) Destroy() error {
return err
}

//TODO set metrics.IPSetInventory to 0 for all set names

return nil
}

Expand Down Expand Up @@ -424,5 +441,7 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error {
}
cmd.Wait()

//TODO based on the set name and number of entries in the config file, update metrics.IPSetInventory

return nil
}
71 changes: 61 additions & 10 deletions npm/ipsm/ipsm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ import (
"os"
"testing"

"github.com/Azure/azure-container-networking/npm/metrics"
"github.com/Azure/azure-container-networking/npm/metrics/promutil"
"github.com/Azure/azure-container-networking/npm/util"
"github.com/prometheus/client_golang/prometheus"
)

func TestSave(t *testing.T) {
Expand Down Expand Up @@ -127,14 +130,34 @@ func TestCreateSet(t *testing.T) {
}
}()

if err := ipsMgr.CreateSet("test-set", []string{util.IpsetNetHashFlag}); err != nil {
gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets)
countVal, err2 := promutil.GetCountValue(metrics.AddIPSetExecTime)

testSet1Name := "test-set"
if err := ipsMgr.CreateSet(testSet1Name, []string{util.IpsetNetHashFlag}); err != nil {
t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet")
}

testSet2Name := "test-set-with-maxelem"
spec := append([]string{util.IpsetNetHashFlag, util.IpsetMaxelemName, util.IpsetMaxelemNum})
if err := ipsMgr.CreateSet("test-set-with-maxelem", spec); err != nil {
if err := ipsMgr.CreateSet(testSet2Name, spec); err != nil {
t.Errorf("TestCreateSet failed @ ipsMgr.CreateSet when set maxelem")
}

newGaugeVal, err3 := promutil.GetValue(metrics.NumIPSets)
newCountVal, err4 := promutil.GetCountValue(metrics.AddIPSetExecTime)
testSet1Count, err5 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSet1Name})
testSet2Count, err6 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSet2Name})
promutil.NotifyIfErrors(t, err1, err2, err3, err4, err5, err6)
if newGaugeVal != gaugeVal+2 {
t.Errorf("Change in ipset number didn't register in Prometheus")
}
if newCountVal != countVal+2 {
t.Errorf("Execution time didn't register in Prometheus")
}
if testSet1Count != 0 || testSet2Count != 0 {
t.Errorf("Prometheus IPSet count has incorrect number of entries")
}
}

func TestDeleteSet(t *testing.T) {
Expand All @@ -149,13 +172,26 @@ func TestDeleteSet(t *testing.T) {
}
}()

if err := ipsMgr.CreateSet("test-set", append([]string{util.IpsetNetHashFlag})); err != nil {
testSetName := "test-set"
if err := ipsMgr.CreateSet(testSetName, append([]string{util.IpsetNetHashFlag})); err != nil {
t.Errorf("TestDeleteSet failed @ ipsMgr.CreateSet")
}

if err := ipsMgr.DeleteSet("test-set"); err != nil {
gaugeVal, err1 := promutil.GetValue(metrics.NumIPSets)

if err := ipsMgr.DeleteSet(testSetName); err != nil {
t.Errorf("TestDeleteSet failed @ ipsMgr.DeleteSet")
}

newGaugeVal, err2 := promutil.GetValue(metrics.NumIPSets)
testSetCount, err3 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName})
promutil.NotifyIfErrors(t, err1, err2, err3)
if newGaugeVal != gaugeVal-1 {
t.Errorf("Change in ipset number didn't register in prometheus")
}
if testSetCount != 0 {
t.Errorf("Prometheus IPSet count has incorrect number of entries")
}
}

func TestAddToSet(t *testing.T) {
Expand All @@ -170,13 +206,20 @@ func TestAddToSet(t *testing.T) {
}
}()

if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil {
testSetName := "test-set"
if err := ipsMgr.AddToSet(testSetName, "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil {
t.Errorf("TestAddToSet failed @ ipsMgr.AddToSet")
}

if err := ipsMgr.AddToSet("test-set", "1.2.3.4/nomatch", util.IpsetNetHashFlag, ""); err != nil {
if err := ipsMgr.AddToSet(testSetName, "1.2.3.4/nomatch", util.IpsetNetHashFlag, ""); err != nil {
t.Errorf("TestAddToSet with nomatch failed @ ipsMgr.AddToSet")
}

testSetCount, err1 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName})
promutil.NotifyIfErrors(t, err1)
if testSetCount != 2 {
t.Errorf("Prometheus IPSet count has incorrect number of entries")
}
}

func TestAddToSetWithCachePodInfo(t *testing.T) {
Expand Down Expand Up @@ -231,22 +274,29 @@ func TestDeleteFromSet(t *testing.T) {
}
}()

if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil {
testSetName := "test-set"
if err := ipsMgr.AddToSet(testSetName, "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil {
t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet")
}

if len(ipsMgr.setMap["test-set"].elements) != 1 {
if len(ipsMgr.setMap[testSetName].elements) != 1 {
t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet")
}

if err := ipsMgr.DeleteFromSet("test-set", "1.2.3.4", ""); err != nil {
if err := ipsMgr.DeleteFromSet(testSetName, "1.2.3.4", ""); err != nil {
t.Errorf("TestDeleteFromSet failed @ ipsMgr.DeleteFromSet")
}

// After deleting the only entry, "1.2.3.4" from "test-set", "test-set" ipset won't exist
if _, exists := ipsMgr.setMap["test-set"]; exists {
if _, exists := ipsMgr.setMap[testSetName]; exists {
t.Errorf("TestDeleteFromSet failed @ ipsMgr.DeleteFromSet")
}

testSetCount, err1 := promutil.GetVecValue(metrics.IPSetInventory, prometheus.Labels{metrics.SetNameLabel: testSetName})
promutil.NotifyIfErrors(t, err1)
if testSetCount != 0 {
t.Errorf("Prometheus IPSet count has incorrect number of entries")
}
}

func TestDeleteFromSetWithPodCache(t *testing.T) {
Expand Down Expand Up @@ -373,6 +423,7 @@ func TestRun(t *testing.T) {
}

func TestMain(m *testing.M) {
metrics.InitializeAll()
ipsMgr := NewIpsetManager()
ipsMgr.Save(util.IpsetConfigFile)

Expand Down
8 changes: 8 additions & 0 deletions npm/iptm/iptm.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"golang.org/x/sys/unix"

"github.com/Azure/azure-container-networking/log"
"github.com/Azure/azure-container-networking/npm/metrics"
"github.com/Azure/azure-container-networking/npm/util"
"k8s.io/apimachinery/pkg/util/wait"
// utiliptables "k8s.io/kubernetes/pkg/util/iptables"
Expand Down Expand Up @@ -298,6 +299,8 @@ func (iptMgr *IptablesManager) DeleteChain(chain string) error {

// Add adds a rule in iptables.
func (iptMgr *IptablesManager) Add(entry *IptEntry) error {
timer := metrics.StartNewTimer()

log.Logf("Adding iptables entry: %+v.", entry)

if entry.IsJumpEntry {
Expand All @@ -310,6 +313,9 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error {
return err
}

metrics.NumIPTableRules.Inc()
timer.StopAndRecord(metrics.AddIPTableRuleExecTime)

return nil
}

Expand All @@ -332,6 +338,8 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error {
return err
}

metrics.NumIPTableRules.Dec()

return nil
}

Expand Down
27 changes: 26 additions & 1 deletion npm/iptm/iptm_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package iptm

import (
"testing"
"os"
"testing"

"github.com/Azure/azure-container-networking/npm/metrics"
"github.com/Azure/azure-container-networking/npm/metrics/promutil"
"github.com/Azure/azure-container-networking/npm/util"
)

Expand Down Expand Up @@ -147,9 +149,23 @@ func TestAdd(t *testing.T) {
util.IptablesReject,
},
}

gaugeVal, err1 := promutil.GetValue(metrics.NumIPTableRules)
countVal, err2 := promutil.GetCountValue(metrics.AddIPTableRuleExecTime)

if err := iptMgr.Add(entry); err != nil {
t.Errorf("TestAdd failed @ iptMgr.Add")
}

newGaugeVal, err3 := promutil.GetValue(metrics.NumIPTableRules)
newCountVal, err4 := promutil.GetCountValue(metrics.AddIPTableRuleExecTime)
promutil.NotifyIfErrors(t, err1, err2, err3, err4)
if newGaugeVal != gaugeVal+1 {
t.Errorf("Change in iptable rule number didn't register in prometheus")
}
if newCountVal != countVal+1 {
t.Errorf("Execution time didn't register in prometheus")
}
}

func TestDelete(t *testing.T) {
Expand All @@ -175,9 +191,17 @@ func TestDelete(t *testing.T) {
t.Errorf("TestDelete failed @ iptMgr.Add")
}

gaugeVal, err1 := promutil.GetValue(metrics.NumIPTableRules)

if err := iptMgr.Delete(entry); err != nil {
t.Errorf("TestDelete failed @ iptMgr.Delete")
}

newGaugeVal, err2 := promutil.GetValue(metrics.NumIPTableRules)
promutil.NotifyIfErrors(t, err1, err2)
if newGaugeVal != gaugeVal-1 {
t.Errorf("Change in iptable rule number didn't register in prometheus")
}
}

func TestRun(t *testing.T) {
Expand All @@ -202,6 +226,7 @@ func TestRun(t *testing.T) {
}

func TestMain(m *testing.M) {
metrics.InitializeAll()
iptMgr := NewIptablesManager()
iptMgr.Save(util.IptablesConfigFile)

Expand Down
43 changes: 43 additions & 0 deletions npm/metrics/http.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package metrics

import (
"net/http"
"time"

"github.com/Azure/azure-container-networking/log"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

const (
// HTTPPort is the port used by the HTTP server (includes a preceding colon)
HTTPPort = ":8000"

//MetricsPath is the path for the Prometheus metrics endpoint (includes preceding slash)
MetricsPath = "/metrics"
)

var started = false
var handler http.Handler

// StartHTTP starts a HTTP server in a Go routine with endpoint on port 8000. Metrics are exposed on the endpoint /metrics.
// By being exposed, the metrics can be scraped by a Prometheus Server or Container Insights.
// The function will pause for delayAmountAfterStart seconds after starting the HTTP server for the first time.
func StartHTTP(delayAmountAfterStart int) {
if started {
return
}
started = true

http.Handle(MetricsPath, getHandler())
log.Logf("Starting Prometheus HTTP Server")
go http.ListenAndServe(HTTPPort, nil)
time.Sleep(time.Second * time.Duration(delayAmountAfterStart))
}

// getHandler returns the HTTP handler for the metrics endpoint
func getHandler() http.Handler {
if handler == nil {
handler = promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
}
return handler
}
Loading