From 3d6ac5accf8056eea61a6e75f6cc628e9c454252 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 12 Aug 2020 11:25:42 -0700 Subject: [PATCH 01/19] Accelerate metrics report from every 30 mins to every 5 mins. --- npm/npm.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/npm.go b/npm/npm.go index bfbf79d0c2..01fd6e9ea6 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -32,7 +32,7 @@ const ( restoreMaxRetries = 10 backupWaitTimeInSeconds = 60 telemetryRetryTimeInSeconds = 60 - heartbeatIntervalInMinutes = 30 + heartbeatIntervalInMinutes = 5 ) // NetworkPolicyManager contains informers for pod, namespace and networkpolicy. From fd447c6e734ae4b399219091f6b3d96fa7e0abb8 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 12 Aug 2020 12:15:16 -0700 Subject: [PATCH 02/19] Add errCountTest metric. --- npm/npm.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/npm/npm.go b/npm/npm.go index 01fd6e9ea6..e32f57570f 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -112,6 +112,10 @@ func (npMgr *NetworkPolicyManager) SendAiMetrics() { Name: "NwPolicyCount", CustomDimensions: customDimensions, } + errCountTest = aitelemetry.Metric{ + Name: "errCountTest", + CustomDimensions: customDimensions, + } ) for i := 0; err != nil && i < 5; i++ { @@ -131,10 +135,12 @@ func (npMgr *NetworkPolicyManager) SendAiMetrics() { podCount.Value = float64(clusterState.PodCount) nsCount.Value = float64(clusterState.NsCount) nwPolicyCount.Value = float64(clusterState.NwPolicyCount) + errCountTest.Value = float64(77) th.TrackMetric(podCount) th.TrackMetric(nsCount) th.TrackMetric(nwPolicyCount) + th.TrackMetric(errCountTest) } } else { log.Logf("Failed to initialize AppInsights handle with err: %+v", err) From 72b38fcd36dde15c53beec5d89ab09a6847ff6ca Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 12 Aug 2020 13:59:25 -0700 Subject: [PATCH 03/19] Refactor SendAiMetrics. AI initialization is in main routine while send metrics is in another go routine. --- npm/metrics/ai-utils.go | 70 +++++++++++++++++++++++++++++++++++++++++ npm/npm.go | 63 +++++++++++++------------------------ npm/plugin/main.go | 3 +- 3 files changed, 93 insertions(+), 43 deletions(-) create mode 100644 npm/metrics/ai-utils.go diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go new file mode 100644 index 0000000000..649889f3c4 --- /dev/null +++ b/npm/metrics/ai-utils.go @@ -0,0 +1,70 @@ +package metrics + +import ( + "time" + + "github.com/Azure/azure-container-networking/aitelemetry" + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/npm/util" +) + +var ( + th aitelemetry.TelemetryHandle +) + +// CreateTelemetryHandle creates +func CreateTelemetryHandle(version string) error { + var aiMetadata string + + aiConfig := aitelemetry.AIConfig{ + AppName: util.AzureNpmFlag, + AppVersion: version, + BatchSize: 32768, + BatchInterval: 30, + RefreshTimeout: 15, + DebugMode: true, + GetEnvRetryCount: 5, + GetEnvRetryWaitTimeInSecs: 3, + } + + var err error + th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) + + for i := 0; err != nil && i < 5; i++ { + log.Logf("Failed to init AppInsights with err: %+v for %d time", err, i + 1) + time.Sleep(time.Minute * 5) + th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) + } + + if err != nil { + return err + } + + if th != nil { + log.Logf("Initialized AppInsights handle") + } + + return nil +} + +// SendMetric sends +func SendMetric(metric aitelemetry.Metric) { + if th == nil { + log.Logf("AppInsights didn't initialized.") + return + } + th.TrackMetric(metric) +} + +// SendErrorMetric is responsible for sending error metrics trhough AI telemetry +// func SendErrorMetric(errorCode int, packageName, functionName string) { +// customDimensions := map[string]string{"PackageName": packageName, +// "FunctionName": functionName, +// "ErrorCode": strconv.Itoa(errorCode)} +// metric := aitelemetry.Metric{ +// Name: util.ErrorMetric, +// Value: util.ErrorValue, +// CustomDimensions: customDimensions, +// } +// go SendMetric(metric) +// } \ No newline at end of file diff --git a/npm/npm.go b/npm/npm.go index e32f57570f..f44a86cec4 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -12,6 +12,7 @@ import ( "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" "github.com/Azure/azure-container-networking/telemetry" corev1 "k8s.io/api/core/v1" @@ -82,21 +83,14 @@ func (npMgr *NetworkPolicyManager) GetClusterState() telemetry.ClusterState { return npMgr.clusterState } -// SendAiMetrics :- send NPM metrics using AppInsights -func (npMgr *NetworkPolicyManager) SendAiMetrics() { - var ( - aiConfig = aitelemetry.AIConfig{ - AppName: util.AzureNpmFlag, - AppVersion: npMgr.version, - BatchSize: 32768, - BatchInterval: 30, - RefreshTimeout: 15, - DebugMode: true, - GetEnvRetryCount: 5, - GetEnvRetryWaitTimeInSecs: 3, - } +// GetAppVersion returns network policy manager app version +func (npMgr *NetworkPolicyManager) GetAppVersion() string { + return npMgr.version +} - th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) +// SendClusterMetrics :- send NPM cluster metrics using AppInsights +func (npMgr *NetworkPolicyManager) SendClusterMetrics() { + var ( heartbeat = time.NewTicker(time.Minute * heartbeatIntervalInMinutes).C customDimensions = map[string]string{"ClusterID": util.GetClusterID(npMgr.nodeName), "APIServer": npMgr.serverVersion.String()} @@ -113,37 +107,22 @@ func (npMgr *NetworkPolicyManager) SendAiMetrics() { CustomDimensions: customDimensions, } errCountTest = aitelemetry.Metric{ - Name: "errCountTest", + Name: "errCountTest02", CustomDimensions: customDimensions, } ) - - for i := 0; err != nil && i < 5; i++ { - log.Logf("Failed to init AppInsights with err: %+v", err) - time.Sleep(time.Minute * 5) - th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) - } - - if th != nil { - log.Logf("Initialized AppInsights handle") - - defer th.Close(10) - - for { - <-heartbeat - clusterState := npMgr.GetClusterState() - podCount.Value = float64(clusterState.PodCount) - nsCount.Value = float64(clusterState.NsCount) - nwPolicyCount.Value = float64(clusterState.NwPolicyCount) - errCountTest.Value = float64(77) - - th.TrackMetric(podCount) - th.TrackMetric(nsCount) - th.TrackMetric(nwPolicyCount) - th.TrackMetric(errCountTest) - } - } else { - log.Logf("Failed to initialize AppInsights handle with err: %+v", err) + for { + <-heartbeat + clusterState := npMgr.GetClusterState() + podCount.Value = float64(clusterState.PodCount) + nsCount.Value = float64(clusterState.NsCount) + nwPolicyCount.Value = float64(clusterState.NwPolicyCount) + errCountTest.Value = float64(77) + + metrics.SendMetric(podCount) + metrics.SendMetric(nsCount) + metrics.SendMetric(nwPolicyCount) + metrics.SendMetric(errCountTest) } } diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 468ce77b65..e723caccdb 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -63,7 +63,8 @@ func main() { npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) - go npMgr.SendAiMetrics() + metrics.CreateTelemetryHandle(npMgr.GetAppVersion()) + go npMgr.SendClusterMetrics() if err = npMgr.Start(wait.NeverStop); err != nil { log.Logf("npm failed with error %v.", err) From cbe038122b6449bd3ca0ee203f6b3128a754b2d4 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 12 Aug 2020 16:01:43 -0700 Subject: [PATCH 04/19] Add aiMetadata config. --- npm/metrics/ai-utils.go | 3 +-- npm/npm.go | 5 +++++ npm/plugin/main.go | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index 649889f3c4..516f90349e 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -13,8 +13,7 @@ var ( ) // CreateTelemetryHandle creates -func CreateTelemetryHandle(version string) error { - var aiMetadata string +func CreateTelemetryHandle(version, aiMetadata string) error { aiConfig := aitelemetry.AIConfig{ AppName: util.AzureNpmFlag, diff --git a/npm/npm.go b/npm/npm.go index f44a86cec4..ca023fb366 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -88,6 +88,11 @@ func (npMgr *NetworkPolicyManager) GetAppVersion() string { return npMgr.version } +// GetAIMetadata returns ai metadata number +func GetAIMetadata() string { + return aiMetadata; +} + // SendClusterMetrics :- send NPM cluster metrics using AppInsights func (npMgr *NetworkPolicyManager) SendClusterMetrics() { var ( diff --git a/npm/plugin/main.go b/npm/plugin/main.go index e723caccdb..cdc4b12482 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -63,7 +63,7 @@ func main() { npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) - metrics.CreateTelemetryHandle(npMgr.GetAppVersion()) + metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) go npMgr.SendClusterMetrics() if err = npMgr.Start(wait.NeverStop); err != nil { From 0cd35fd3a7c5dd9694f014dcd5667a7a1f8aa0d3 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 12 Aug 2020 17:10:49 -0700 Subject: [PATCH 05/19] Add SendErrorMetrics function in ai utils. --- npm/ipsm/ipsm.go | 1 + npm/metrics/ai-utils.go | 25 ++++++++++++++----------- npm/npm.go | 7 +------ npm/util/const.go | 7 +++++++ 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index bb30b4bf27..d032bd0ff0 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -401,6 +401,7 @@ func (ipsMgr *IpsetManager) Run(entry *ipsEntry) (int, error) { cmdArgs = util.DropEmptyFields(cmdArgs) log.Logf("Executing ipset command %s %v", cmdName, cmdArgs) + metrics.SendErrorMetric(777, "ipsm", "Run"); _, err := exec.Command(cmdName, cmdArgs...).Output() if msg, failed := err.(*exec.ExitError); failed { errCode := msg.Sys().(syscall.WaitStatus).ExitStatus() diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index 516f90349e..c738bc3b81 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -2,6 +2,7 @@ package metrics import ( "time" + "strconv" "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/log" @@ -56,14 +57,16 @@ func SendMetric(metric aitelemetry.Metric) { } // SendErrorMetric is responsible for sending error metrics trhough AI telemetry -// func SendErrorMetric(errorCode int, packageName, functionName string) { -// customDimensions := map[string]string{"PackageName": packageName, -// "FunctionName": functionName, -// "ErrorCode": strconv.Itoa(errorCode)} -// metric := aitelemetry.Metric{ -// Name: util.ErrorMetric, -// Value: util.ErrorValue, -// CustomDimensions: customDimensions, -// } -// go SendMetric(metric) -// } \ No newline at end of file +func SendErrorMetric(errorCode int, packageName, functionName string) { + customDimensions := map[string]string { + util.PackageName: packageName, + util.FunctionName: functionName, + util.ErrorCode: strconv.Itoa(errorCode), + } + metric := aitelemetry.Metric{ + Name: util.ErrorMetric, + Value: util.ErrorValue, + CustomDimensions: customDimensions, + } + go SendMetric(metric) +} \ No newline at end of file diff --git a/npm/npm.go b/npm/npm.go index ca023fb366..3f94440cc6 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -111,10 +111,6 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { Name: "NwPolicyCount", CustomDimensions: customDimensions, } - errCountTest = aitelemetry.Metric{ - Name: "errCountTest02", - CustomDimensions: customDimensions, - } ) for { <-heartbeat @@ -122,12 +118,11 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { podCount.Value = float64(clusterState.PodCount) nsCount.Value = float64(clusterState.NsCount) nwPolicyCount.Value = float64(clusterState.NwPolicyCount) - errCountTest.Value = float64(77) + metrics.SendErrorMetric(777, "npm", "SendClusterMetrics"); metrics.SendMetric(podCount) metrics.SendMetric(nsCount) metrics.SendMetric(nwPolicyCount) - metrics.SendMetric(errCountTest) } } diff --git a/npm/util/const.go b/npm/util/const.go index 2a7d122452..14c2150db7 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -124,4 +124,11 @@ const ( AddNetworkPolicyEvent string = "Add network policy" UpdateNetworkPolicyEvent string = "Update network policy" DeleteNetworkPolicyEvent string = "Delete network policy" + + ErrorMetric string = "ErrorMetric" + PackageName string = "PackageName" + FunctionName string = "FunctionName" + ErrorCode string = "ErrorCode" + + ErrorValue float64 = 1 ) From f3dbffcbee0b0ab3ab00aeb91a8efbd034d4b0c9 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 12 Aug 2020 17:34:49 -0700 Subject: [PATCH 06/19] Going to push error log to AI telemetry. --- npm/ipsm/ipsm.go | 7 ++++++- npm/npm.go | 1 - npm/plugin/main.go | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index d032bd0ff0..cf951c4068 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -13,6 +13,10 @@ import ( "github.com/Azure/azure-container-networking/npm/util" ) +const { + packageName string = "ipsm" +} + type ipsEntry struct { operationFlag string name string @@ -396,17 +400,18 @@ func (ipsMgr *IpsetManager) Destroy() error { // Run execute an ipset command to update ipset. func (ipsMgr *IpsetManager) Run(entry *ipsEntry) (int, error) { + functionName := "Run" cmdName := util.Ipset cmdArgs := append([]string{entry.operationFlag, util.IpsetExistFlag, entry.set}, entry.spec...) cmdArgs = util.DropEmptyFields(cmdArgs) log.Logf("Executing ipset command %s %v", cmdName, cmdArgs) - metrics.SendErrorMetric(777, "ipsm", "Run"); _, err := exec.Command(cmdName, cmdArgs...).Output() if msg, failed := err.(*exec.ExitError); failed { errCode := msg.Sys().(syscall.WaitStatus).ExitStatus() if errCode > 0 { log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) + metrics.SendErrorMetric(errCode, package, functionName); } return errCode, err diff --git a/npm/npm.go b/npm/npm.go index 3f94440cc6..45ca3c414f 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -119,7 +119,6 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { nsCount.Value = float64(clusterState.NsCount) nwPolicyCount.Value = float64(clusterState.NwPolicyCount) - metrics.SendErrorMetric(777, "npm", "SendClusterMetrics"); metrics.SendMetric(podCount) metrics.SendMetric(nsCount) metrics.SendMetric(nwPolicyCount) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index cdc4b12482..1e46d8498a 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -45,6 +45,7 @@ func main() { } metrics.InitializeAll() + metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) // Creates the in-cluster config config, err := rest.InClusterConfig() @@ -63,7 +64,6 @@ func main() { npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) - metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) go npMgr.SendClusterMetrics() if err = npMgr.Start(wait.NeverStop); err != nil { From 935c38f678d0d34476ebebcf2131a9f81d69e3f1 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 12 Aug 2020 17:43:54 -0700 Subject: [PATCH 07/19] Add error log to AI telemetry. --- npm/ipsm/ipsm.go | 6 +++--- npm/metrics/ai-log.go | 24 ++++++++++++++++++++++++ npm/npm.go | 2 ++ npm/plugin/main.go | 2 +- 4 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 npm/metrics/ai-log.go diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index cf951c4068..93f2905b45 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -13,9 +13,9 @@ import ( "github.com/Azure/azure-container-networking/npm/util" ) -const { +const ( packageName string = "ipsm" -} +) type ipsEntry struct { operationFlag string @@ -411,7 +411,7 @@ func (ipsMgr *IpsetManager) Run(entry *ipsEntry) (int, error) { errCode := msg.Sys().(syscall.WaitStatus).ExitStatus() if errCode > 0 { log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) - metrics.SendErrorMetric(errCode, package, functionName); + metrics.SendErrorMetric(errCode, packageName, functionName) } return errCode, err diff --git a/npm/metrics/ai-log.go b/npm/metrics/ai-log.go new file mode 100644 index 0000000000..4f3072ba97 --- /dev/null +++ b/npm/metrics/ai-log.go @@ -0,0 +1,24 @@ +package metrics + +import ( + "fmt" + + "github.com/Azure/azure-container-networking/aitelemetry" +) + +// Printf logs in the AI telemetry +func Printf(format string, args ...interface{}) { + if th == nil { + return + } + + msg := fmt.Sprintf(format, args...) + sendTraceInternal(msg) +} + +// Send AI telemetry trace +func sendTraceInternal(msg string) { + report := aitelemetry.Report{CustomDimensions: make(map[string]string)} + report.Message = msg + th.TrackLog(report) +} \ No newline at end of file diff --git a/npm/npm.go b/npm/npm.go index 45ca3c414f..fb557f7226 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -112,6 +112,7 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { CustomDimensions: customDimensions, } ) + packageName := "npm" for { <-heartbeat clusterState := npMgr.GetClusterState() @@ -119,6 +120,7 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { nsCount.Value = float64(clusterState.NsCount) nwPolicyCount.Value = float64(clusterState.NwPolicyCount) + metrics.Printf("Testing error logging in AI for SendClusterMetrics in %s", packageName) metrics.SendMetric(podCount) metrics.SendMetric(nsCount) metrics.SendMetric(nwPolicyCount) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 1e46d8498a..af7a93a60d 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -45,7 +45,6 @@ func main() { } metrics.InitializeAll() - metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) // Creates the in-cluster config config, err := rest.InClusterConfig() @@ -63,6 +62,7 @@ func main() { factory := informers.NewSharedInformerFactory(clientset, time.Hour*24) npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) + metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) go npMgr.SendClusterMetrics() From 4edb0d6fa104bc347e62f118fce98e59341ffab0 Mon Sep 17 00:00:00 2001 From: Shufang Date: Thu, 13 Aug 2020 23:38:03 -0700 Subject: [PATCH 08/19] Change error message format. --- npm/metrics/ai-log.go | 22 +++++++++++++--------- npm/npm.go | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/npm/metrics/ai-log.go b/npm/metrics/ai-log.go index 4f3072ba97..2338b693b9 100644 --- a/npm/metrics/ai-log.go +++ b/npm/metrics/ai-log.go @@ -2,23 +2,27 @@ package metrics import ( "fmt" + "strconv" "github.com/Azure/azure-container-networking/aitelemetry" + "github.com/Azure/azure-container-networking/npm/util" ) // Printf logs in the AI telemetry -func Printf(format string, args ...interface{}) { +func Printf(errorCode int, packageName, functionName, format string, args ...interface{}) { if th == nil { return } msg := fmt.Sprintf(format, args...) - sendTraceInternal(msg) -} - -// Send AI telemetry trace -func sendTraceInternal(msg string) { - report := aitelemetry.Report{CustomDimensions: make(map[string]string)} - report.Message = msg - th.TrackLog(report) + customDimensions := map[string]string { + util.PackageName: packageName, + util.FunctionName: functionName, + } + report := aitelemetry.Report{ + Message: msg, + Context: strconv.Itoa(errorCode), + CustomDimensions: customDimensions, + } + th.TrackLog(report) } \ No newline at end of file diff --git a/npm/npm.go b/npm/npm.go index fb557f7226..d2a9fa7588 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -120,7 +120,7 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { nsCount.Value = float64(clusterState.NsCount) nwPolicyCount.Value = float64(clusterState.NwPolicyCount) - metrics.Printf("Testing error logging in AI for SendClusterMetrics in %s", packageName) + metrics.Printf(7, "npm", "SendClusterMetrics", "Testing error logging %s", packageName) metrics.SendMetric(podCount) metrics.SendMetric(nsCount) metrics.SendMetric(nwPolicyCount) From da7992b39cbab8cf2321928d709ce279ab41a8a4 Mon Sep 17 00:00:00 2001 From: Shufang Date: Fri, 14 Aug 2020 14:35:21 -0700 Subject: [PATCH 09/19] Add error log and metrics to AI telemetry. --- npm/ipsm/ipsm.go | 21 ++++++++++++++-- npm/iptm/iptm.go | 18 +++++++++++++ npm/metrics/ai-log.go | 28 --------------------- npm/metrics/ai-utils.go | 56 ++++++++++++++++++++++++----------------- npm/npm.go | 14 ++++++++--- npm/util/const.go | 15 ++++++++++- 6 files changed, 95 insertions(+), 57 deletions(-) delete mode 100644 npm/metrics/ai-log.go diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 93f2905b45..586b109835 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -100,6 +100,7 @@ func (ipsMgr *IpsetManager) CreateList(listName string) error { log.Logf("Creating List: %+v", entry) if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { log.Errorf("Error: failed to create ipset list %s.", listName) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset list %s.", listName) return err } @@ -121,6 +122,7 @@ func (ipsMgr *IpsetManager) DeleteList(listName string) error { } log.Errorf("Error: failed to delete ipset %s %+v", listName, entry) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset %s %+v", listName, entry) return err } @@ -151,6 +153,7 @@ func (ipsMgr *IpsetManager) AddToList(listName string, setName string) error { if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { log.Errorf("Error: failed to create ipset rules. rule: %+v", entry) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset rules. rule: %+v", entry) return err } @@ -163,6 +166,7 @@ func (ipsMgr *IpsetManager) AddToList(listName string, setName string) error { func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) error { if _, exists := ipsMgr.listMap[listName]; !exists { log.Logf("ipset list with name %s not found", listName) + metrics.SendErrorMetric(util.IpsmID, "ipset list with name %s not found", listName) return nil } @@ -175,6 +179,7 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro if _, err := ipsMgr.Run(entry); err != nil { log.Errorf("Error: failed to delete ipset entry. %+v", entry) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset entry. %+v", entry) return err } @@ -186,6 +191,7 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro if len(ipsMgr.listMap[listName].elements) == 0 { if err := ipsMgr.DeleteList(listName); err != nil { log.Errorf("Error: failed to delete ipset list %s.", listName) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset list %s.", listName) return err } } @@ -211,6 +217,7 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { log.Logf("Creating Set: %+v", entry) if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { log.Errorf("Error: failed to create ipset.") + metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset.") return err } @@ -227,6 +234,7 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { func (ipsMgr *IpsetManager) DeleteSet(setName string) error { if _, exists := ipsMgr.setMap[setName]; !exists { log.Logf("ipset with name %s not found", setName) + metrics.SendErrorMetric(util.IpsmID, "ipset with name %s not found", setName) return nil } @@ -241,6 +249,7 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { } log.Errorf("Error: failed to delete ipset %s. Entry: %+v", setName, entry) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset %s. Entry: %+v", setName, entry) return err } @@ -290,6 +299,7 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error { if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { log.Logf("Error: failed to create ipset rules. %+v", entry) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset rules. %+v", entry) return err } @@ -334,6 +344,7 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error { } log.Errorf("Error: failed to delete ipset entry. Entry: %+v", entry) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset entry. Entry: %+v", entry) return err } @@ -359,6 +370,7 @@ func (ipsMgr *IpsetManager) Clean() error { if err := ipsMgr.DeleteSet(setName); err != nil { log.Errorf("Error: failed to clean ipset") + metrics.SendErrorMetric(util.IpsmID, "Error: failed to clean ipset") return err } } @@ -370,6 +382,7 @@ func (ipsMgr *IpsetManager) Clean() error { if err := ipsMgr.DeleteList(listName); err != nil { log.Errorf("Error: failed to clean ipset list") + metrics.SendErrorMetric(util.IpsmID, "Error: failed to clean ipset list") return err } } @@ -384,12 +397,14 @@ func (ipsMgr *IpsetManager) Destroy() error { } if _, err := ipsMgr.Run(entry); err != nil { log.Errorf("Error: failed to flush ipset") + metrics.SendErrorMetric(util.IpsmID, "Error: failed to flush ipset") return err } entry.operationFlag = util.IpsetDestroyFlag if _, err := ipsMgr.Run(entry); err != nil { log.Errorf("Error: failed to destroy ipset") + metrics.SendErrorMetric(util.IpsmID, "Error: failed to destroy ipset") return err } @@ -400,7 +415,6 @@ func (ipsMgr *IpsetManager) Destroy() error { // Run execute an ipset command to update ipset. func (ipsMgr *IpsetManager) Run(entry *ipsEntry) (int, error) { - functionName := "Run" cmdName := util.Ipset cmdArgs := append([]string{entry.operationFlag, util.IpsetExistFlag, entry.set}, entry.spec...) cmdArgs = util.DropEmptyFields(cmdArgs) @@ -411,7 +425,7 @@ func (ipsMgr *IpsetManager) Run(entry *ipsEntry) (int, error) { errCode := msg.Sys().(syscall.WaitStatus).ExitStatus() if errCode > 0 { log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) - metrics.SendErrorMetric(errCode, packageName, functionName) + metrics.SendErrorMetric(util.IpsmID, "Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) } return errCode, err @@ -429,6 +443,7 @@ func (ipsMgr *IpsetManager) Save(configFile string) error { cmd := exec.Command(util.Ipset, util.IpsetSaveFlag, util.IpsetFileFlag, configFile) if err := cmd.Start(); err != nil { log.Errorf("Error: failed to save ipset to file.") + metrics.SendErrorMetric(util.IpsmID, "Error: failed to save ipset to file.") return err } cmd.Wait() @@ -445,6 +460,7 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error { f, err := os.Stat(configFile) if err != nil { log.Errorf("Error: failed to get file %s stat from ipsm.Restore", configFile) + metrics.SendErrorMetric(util.IpsmID, "Error: failed to get file %s stat from ipsm.Restore", configFile) return err } @@ -457,6 +473,7 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error { cmd := exec.Command(util.Ipset, util.IpsetRestoreFlag, util.IpsetFileFlag, configFile) if err := cmd.Start(); err != nil { log.Errorf("Error: failed to restore ipset from file.") + metrics.SendErrorMetric(util.IpsmID, "Error: failed to to restore ipset from file.") return err } cmd.Wait() diff --git a/npm/iptm/iptm.go b/npm/iptm/iptm.go index d5b8a0eff7..540de25f7e 100644 --- a/npm/iptm/iptm.go +++ b/npm/iptm/iptm.go @@ -93,6 +93,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error { entry.Specs = append([]string{index}, entry.Specs...) if _, err = iptMgr.Run(entry); err != nil { log.Errorf("Error: failed to add AZURE-NPM chain to FORWARD chain.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM chain to FORWARD chain.") return err } } @@ -114,6 +115,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err := iptMgr.Run(entry); err != nil { log.Errorf("Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.") return err } } @@ -140,6 +142,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err := iptMgr.Run(entry); err != nil { log.Errorf("Error: failed to add AZURE-NPM-EGRESS-PORT chain to AZURE-NPM chain.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.") return err } } @@ -166,6 +169,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err := iptMgr.Run(entry); err != nil { log.Errorf("Error: failed to add AZURE-NPM-TARGET-SETS chain to AZURE-NPM chain.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-TARGET-SETS chain to AZURE-NPM chain.") return err } } @@ -189,6 +193,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err = iptMgr.Run(entry); err != nil { log.Logf("Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.") return err } } @@ -219,6 +224,7 @@ func (iptMgr *IptablesManager) UninitNpmChains() error { errCode, err := iptMgr.Run(entry) if errCode != iptablesErrDoesNotExist && err != nil { log.Errorf("Error: failed to remove default rule from FORWARD chain.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.") return err } @@ -230,6 +236,7 @@ func (iptMgr *IptablesManager) UninitNpmChains() error { errCode, err := iptMgr.Run(entry) if errCode != iptablesErrDoesNotExist && err != nil { log.Errorf("Error: failed to flush iptables chain %s.", chain) + metrics.SendErrorMetric(util.IptmID, "Error: failed to flush iptables chain %s.", chain) } } @@ -271,6 +278,7 @@ func (iptMgr *IptablesManager) AddChain(chain string) error { } log.Errorf("Error: failed to create iptables chain %s.", entry.Chain) + metrics.SendErrorMetric(util.IptmID, "Error: failed to create iptables chain %s.", entry.Chain) return err } @@ -291,6 +299,7 @@ func (iptMgr *IptablesManager) DeleteChain(chain string) error { } log.Errorf("Error: failed to delete iptables chain %s.", entry.Chain) + metrics.SendErrorMetric(util.IptmID, "Error: failed to delete iptables chain %s.", entry.Chain) return err } @@ -310,6 +319,7 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error { } if _, err := iptMgr.Run(entry); err != nil { log.Errorf("Error: failed to create iptables rules.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to create iptables rules.") return err } @@ -335,6 +345,7 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error { iptMgr.OperationFlag = util.IptablesDeletionFlag if _, err := iptMgr.Run(entry); err != nil { log.Errorf("Error: failed to delete iptables rules.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to delete iptables rules.") return err } @@ -365,6 +376,7 @@ func (iptMgr *IptablesManager) Run(entry *IptEntry) (int, error) { errCode := msg.Sys().(syscall.WaitStatus).ExitStatus() if errCode > 0 && iptMgr.OperationFlag != util.IptablesCheckFlag { log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) + metrics.SendErrorMetric(util.IptmID, "Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) } return errCode, err @@ -394,6 +406,7 @@ func (iptMgr *IptablesManager) Save(configFile string) error { f, err := os.Create(configFile) if err != nil { log.Errorf("Error: failed to open file: %s.", configFile) + metrics.SendErrorMetric(util.IptmID, "Error: failed to open file: %s.", configFile) return err } defer f.Close() @@ -402,6 +415,7 @@ func (iptMgr *IptablesManager) Save(configFile string) error { cmd.Stdout = f if err := cmd.Start(); err != nil { log.Errorf("Error: failed to run iptables-save.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to run iptables-save.") return err } cmd.Wait() @@ -430,6 +444,7 @@ func (iptMgr *IptablesManager) Restore(configFile string) error { f, err := os.Open(configFile) if err != nil { log.Errorf("Error: failed to open file: %s.", configFile) + metrics.SendErrorMetric(util.IptmID, "Error: failed to open file: %s.", configFile) return err } defer f.Close() @@ -438,6 +453,7 @@ func (iptMgr *IptablesManager) Restore(configFile string) error { cmd.Stdin = f if err := cmd.Start(); err != nil { log.Errorf("Error: failed to run iptables-restore.") + metrics.SendErrorMetric(util.IptmID, "Error: failed to run iptables-restore.") return err } cmd.Wait() @@ -461,6 +477,7 @@ func grabIptablesLocks() (*os.File, error) { l, err := os.OpenFile(util.IptablesLockFile, os.O_CREATE, 0600) if err != nil { log.Logf("Error: failed to open iptables lock file %s.", util.IptablesLockFile) + metrics.SendErrorMetric(util.IptmID, "Error: failed to open iptables lock file %s.", util.IptablesLockFile) return nil, err } @@ -472,6 +489,7 @@ func grabIptablesLocks() (*os.File, error) { return true, nil }); err != nil { log.Logf("Error: failed to acquire new iptables lock: %v.", err) + metrics.SendErrorMetric(util.IptmID, "Error: failed to acquire new iptables lock: %v.", err) return nil, err } diff --git a/npm/metrics/ai-log.go b/npm/metrics/ai-log.go deleted file mode 100644 index 2338b693b9..0000000000 --- a/npm/metrics/ai-log.go +++ /dev/null @@ -1,28 +0,0 @@ -package metrics - -import ( - "fmt" - "strconv" - - "github.com/Azure/azure-container-networking/aitelemetry" - "github.com/Azure/azure-container-networking/npm/util" -) - -// Printf logs in the AI telemetry -func Printf(errorCode int, packageName, functionName, format string, args ...interface{}) { - if th == nil { - return - } - - msg := fmt.Sprintf(format, args...) - customDimensions := map[string]string { - util.PackageName: packageName, - util.FunctionName: functionName, - } - report := aitelemetry.Report{ - Message: msg, - Context: strconv.Itoa(errorCode), - CustomDimensions: customDimensions, - } - th.TrackLog(report) -} \ No newline at end of file diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index c738bc3b81..b846ed186f 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -1,8 +1,9 @@ package metrics import ( - "time" + "fmt" "strconv" + "time" "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/log" @@ -19,18 +20,18 @@ func CreateTelemetryHandle(version, aiMetadata string) error { aiConfig := aitelemetry.AIConfig{ AppName: util.AzureNpmFlag, AppVersion: version, - BatchSize: 32768, - BatchInterval: 30, - RefreshTimeout: 15, - DebugMode: true, - GetEnvRetryCount: 5, - GetEnvRetryWaitTimeInSecs: 3, + BatchSize: util.BatchSize, + BatchInterval: util.BatchInterval, + RefreshTimeout: util.RefreshTimeout, + DebugMode: util.DebugMode, + GetEnvRetryCount: util.GetEnvRetryCount, + GetEnvRetryWaitTimeInSecs: util.GetEnvRetryWaitTimeInSecs, } var err error th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) - for i := 0; err != nil && i < 5; i++ { + for i := 0; err != nil && i < util.AiInitializeRetryCount; i++ { log.Logf("Failed to init AppInsights with err: %+v for %d time", err, i + 1) time.Sleep(time.Minute * 5) th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) @@ -47,26 +48,35 @@ func CreateTelemetryHandle(version, aiMetadata string) error { return nil } -// SendMetric sends -func SendMetric(metric aitelemetry.Metric) { - if th == nil { - log.Logf("AppInsights didn't initialized.") - return - } - th.TrackMetric(metric) -} - // SendErrorMetric is responsible for sending error metrics trhough AI telemetry -func SendErrorMetric(errorCode int, packageName, functionName string) { +func SendErrorMetric(operationID int, format string, args ...interface{}) { + // Send error metrics customDimensions := map[string]string { - util.PackageName: packageName, - util.FunctionName: functionName, - util.ErrorCode: strconv.Itoa(errorCode), + util.ErrorCode: strconv.Itoa(operationID), } metric := aitelemetry.Metric{ Name: util.ErrorMetric, Value: util.ErrorValue, CustomDimensions: customDimensions, } - go SendMetric(metric) -} \ No newline at end of file + SendMetric(metric) + + // Send error logs + msg := fmt.Sprintf(format, args...) + report := aitelemetry.Report{ + Message: msg, + Context: strconv.Itoa(operationID), + CustomDimensions: make(map[string]string), + } + th.TrackLog(report) +} + +// SendMetric sends +func SendMetric(metric aitelemetry.Metric) { + if th == nil { + log.Logf("AppInsights didn't initialized.") + return + } + th.TrackMetric(metric) +} + diff --git a/npm/npm.go b/npm/npm.go index d2a9fa7588..7de25f44e5 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -34,6 +34,7 @@ const ( backupWaitTimeInSeconds = 60 telemetryRetryTimeInSeconds = 60 heartbeatIntervalInMinutes = 5 + packageName = "npm" ) // NetworkPolicyManager contains informers for pod, namespace and networkpolicy. @@ -112,7 +113,7 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { CustomDimensions: customDimensions, } ) - packageName := "npm" + for { <-heartbeat clusterState := npMgr.GetClusterState() @@ -120,7 +121,6 @@ func (npMgr *NetworkPolicyManager) SendClusterMetrics() { nsCount.Value = float64(clusterState.NsCount) nwPolicyCount.Value = float64(clusterState.NwPolicyCount) - metrics.Printf(7, "npm", "SendClusterMetrics", "Testing error logging %s", packageName) metrics.SendMetric(podCount) metrics.SendMetric(nsCount) metrics.SendMetric(nwPolicyCount) @@ -140,6 +140,7 @@ func (npMgr *NetworkPolicyManager) restore() { } log.Logf("Error: timeout restoring Azure-NPM states") + metrics.SendErrorMetric(util.NpmID, "Error: timeout restoring Azure-NPM states") panic(err.Error) } @@ -152,6 +153,7 @@ func (npMgr *NetworkPolicyManager) backup() { if err = iptMgr.Save(util.IptablesConfigFile); err != nil { log.Logf("Error: failed to back up Azure-NPM states") + metrics.SendErrorMetric(util.NpmID, "Error: failed to back up Azure-NPM states") } } } @@ -163,15 +165,18 @@ func (npMgr *NetworkPolicyManager) Start(stopCh <-chan struct{}) error { // Wait for the initial sync of local cache. if !cache.WaitForCacheSync(stopCh, npMgr.podInformer.Informer().HasSynced) { + metrics.SendErrorMetric(util.NpmID, "Pod informer failed to sync") return fmt.Errorf("Pod informer failed to sync") } if !cache.WaitForCacheSync(stopCh, npMgr.nsInformer.Informer().HasSynced) { + metrics.SendErrorMetric(util.NpmID, "Namespace informer failed to sync") return fmt.Errorf("Namespace informer failed to sync") } if !cache.WaitForCacheSync(stopCh, npMgr.npInformer.Informer().HasSynced) { - return fmt.Errorf("Namespace informer failed to sync") + metrics.SendErrorMetric(util.NpmID, "Network policy informer failed to sync") + return fmt.Errorf("Network policy informer failed to sync") } go npMgr.backup() @@ -203,12 +208,14 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in } if err != nil { log.Logf("Error: failed to retrieving kubernetes version") + metrics.SendErrorMetric(util.NpmID, "Error: failed to retrieving kubernetes version") panic(err.Error) } log.Logf("API server version: %+v", serverVersion) if err = util.SetIsNewNwPolicyVerFlag(serverVersion); err != nil { log.Logf("Error: failed to set IsNewNwPolicyVerFlag") + metrics.SendErrorMetric(util.NpmID, "Error: failed to set IsNewNwPolicyVerFlag") panic(err.Error) } @@ -240,6 +247,7 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in kubeSystemNs := "ns-" + util.KubeSystemFlag if err := allNs.ipsMgr.CreateSet(kubeSystemNs, append([]string{util.IpsetNetHashFlag})); err != nil { log.Logf("Error: failed to create ipset for namespace %s.", kubeSystemNs) + metrics.SendErrorMetric(util.NpmID, "Error: failed to create ipset for namespace %s.", kubeSystemNs) } podInformer.Informer().AddEventHandler( diff --git a/npm/util/const.go b/npm/util/const.go index 14c2150db7..e471355f5a 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -130,5 +130,18 @@ const ( FunctionName string = "FunctionName" ErrorCode string = "ErrorCode" - ErrorValue float64 = 1 + BatchSize int = 32768 + BatchInterval int = 30 + RefreshTimeout int = 15 + GetEnvRetryCount int = 5 + GetEnvRetryWaitTimeInSecs int = 3 + AiInitializeRetryCount int = 5 + NpmID int = 1 + IpsmID int = 2 + IptmID int = 3 + + DebugMode bool = true + + ErrorValue float64 = 1 + ) From 3374d35ddedb74268daa20baf7686cb1c896dfbf Mon Sep 17 00:00:00 2001 From: Shufang Date: Fri, 14 Aug 2020 14:58:01 -0700 Subject: [PATCH 10/19] Remove unnecessary const. --- npm/ipsm/ipsm.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 586b109835..c3e5968ba4 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -13,10 +13,6 @@ import ( "github.com/Azure/azure-container-networking/npm/util" ) -const ( - packageName string = "ipsm" -) - type ipsEntry struct { operationFlag string name string From 9ef997d90bdfe49240e382855757b98a02758b3d Mon Sep 17 00:00:00 2001 From: Shufang Date: Fri, 14 Aug 2020 15:31:35 -0700 Subject: [PATCH 11/19] Change heartbeat back to every 30 mins. --- npm/npm.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/npm/npm.go b/npm/npm.go index 7de25f44e5..15d04ed04f 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -33,8 +33,7 @@ const ( restoreMaxRetries = 10 backupWaitTimeInSeconds = 60 telemetryRetryTimeInSeconds = 60 - heartbeatIntervalInMinutes = 5 - packageName = "npm" + heartbeatIntervalInMinutes = 30 ) // NetworkPolicyManager contains informers for pod, namespace and networkpolicy. From 8a3239d5dd7c6337fd2755dcc252ab7516561e71 Mon Sep 17 00:00:00 2001 From: Shufang Date: Sun, 16 Aug 2020 23:28:52 -0700 Subject: [PATCH 12/19] Seperate send log from SendErrorMetric function for better reuse. --- npm/ipsm/ipsm_test.go | 2 +- npm/metrics/ai-utils.go | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 69d8ac5ce6..64c1795241 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -154,7 +154,7 @@ func TestDeleteFromList(t *testing.T) { } if _, err := ipsMgr.Run(entry); err == nil { - t.Errorf("TestDeleteFromList failed @ ipsMgr.CreateSet since %s still exist in kernel", setName) + t.Errorf("TestDeleteFromList failed @ ipsMgr.DeleteSet since %s still exist in kernel", setName) } } diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index b846ed186f..9a7944dc2b 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -68,10 +68,10 @@ func SendErrorMetric(operationID int, format string, args ...interface{}) { Context: strconv.Itoa(operationID), CustomDimensions: make(map[string]string), } - th.TrackLog(report) + SendLog(report) } -// SendMetric sends +// SendMetric sends metrics func SendMetric(metric aitelemetry.Metric) { if th == nil { log.Logf("AppInsights didn't initialized.") @@ -80,3 +80,12 @@ func SendMetric(metric aitelemetry.Metric) { th.TrackMetric(metric) } +// SendLog sends log +func SendLog(report aitelemetry.Report) { + if th == nil { + log.Logf("AppInsights didn't initialized.") + return + } + th.TrackLog(report) +} + From 26283a793c6b3aef663ef73845e5d2eb5b2d8f8d Mon Sep 17 00:00:00 2001 From: Shufang Date: Sun, 16 Aug 2020 23:43:05 -0700 Subject: [PATCH 13/19] Change a unit test set name to avoid kernel conflict. --- npm/ipsm/ipsm_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/ipsm/ipsm_test.go b/npm/ipsm/ipsm_test.go index 64c1795241..4404ebe199 100644 --- a/npm/ipsm/ipsm_test.go +++ b/npm/ipsm/ipsm_test.go @@ -449,7 +449,7 @@ func TestDestroy(t *testing.T) { } }() - if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { + if err := ipsMgr.AddToSet("test-destroy-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil { t.Errorf("TestDestroy failed @ ipsMgr.AddToSet") } From edd600b8d7b74a45ed2e23ff93cc258744888dae Mon Sep 17 00:00:00 2001 From: Shufang Date: Tue, 1 Sep 2020 22:29:24 -0700 Subject: [PATCH 14/19] Address comments. Make error log and metrics sending more generic. --- npm/ipsm/ipsm.go | 34 ++++++++-------------------------- npm/iptm/iptm.go | 28 +++------------------------- npm/metrics/ai-utils.go | 29 +++++++++++++++-------------- npm/npm.go | 5 ----- npm/plugin/main.go | 5 ++--- npm/util/const.go | 14 +++++++++----- 6 files changed, 37 insertions(+), 78 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index c3e5968ba4..9641456952 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -1,3 +1,4 @@ +// Package ipsm focus on ip set operation // Copyright 2018 Microsoft. All rights reserved. // MIT License package ipsm @@ -69,12 +70,12 @@ func (ipsMgr *IpsetManager) Exists(key string, val string, kind string) bool { // SetExists checks whehter an ipset exists. func (ipsMgr *IpsetManager) SetExists(setName, kind string) bool { - m := ipsMgr.setMap - if kind == util.IpsetSetListFlag { - m = ipsMgr.listMap - } - _, exists := m[setName] - return exists + m := ipsMgr.setMap + if kind == util.IpsetSetListFlag { + m = ipsMgr.listMap + } + _, exists := m[setName] + return exists } func isNsSet(setName string) bool { @@ -95,7 +96,6 @@ func (ipsMgr *IpsetManager) CreateList(listName string) error { } log.Logf("Creating List: %+v", entry) if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { - log.Errorf("Error: failed to create ipset list %s.", listName) metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset list %s.", listName) return err } @@ -117,7 +117,6 @@ func (ipsMgr *IpsetManager) DeleteList(listName string) error { return nil } - log.Errorf("Error: failed to delete ipset %s %+v", listName, entry) metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset %s %+v", listName, entry) return err } @@ -148,7 +147,6 @@ func (ipsMgr *IpsetManager) AddToList(listName string, setName string) error { } if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { - log.Errorf("Error: failed to create ipset rules. rule: %+v", entry) metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset rules. rule: %+v", entry) return err } @@ -161,7 +159,6 @@ func (ipsMgr *IpsetManager) AddToList(listName string, setName string) error { // DeleteFromList removes an ipset to an ipset list. func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) error { if _, exists := ipsMgr.listMap[listName]; !exists { - log.Logf("ipset list with name %s not found", listName) metrics.SendErrorMetric(util.IpsmID, "ipset list with name %s not found", listName) return nil } @@ -174,7 +171,6 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro } if _, err := ipsMgr.Run(entry); err != nil { - log.Errorf("Error: failed to delete ipset entry. %+v", entry) metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset entry. %+v", entry) return err } @@ -186,7 +182,6 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro if len(ipsMgr.listMap[listName].elements) == 0 { if err := ipsMgr.DeleteList(listName); err != nil { - log.Errorf("Error: failed to delete ipset list %s.", listName) metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset list %s.", listName) return err } @@ -212,7 +207,6 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { } log.Logf("Creating Set: %+v", entry) if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { - log.Errorf("Error: failed to create ipset.") metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset.") return err } @@ -229,7 +223,6 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error { // DeleteSet removes a set from ipset. func (ipsMgr *IpsetManager) DeleteSet(setName string) error { if _, exists := ipsMgr.setMap[setName]; !exists { - log.Logf("ipset with name %s not found", setName) metrics.SendErrorMetric(util.IpsmID, "ipset with name %s not found", setName) return nil } @@ -244,7 +237,6 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error { return nil } - log.Errorf("Error: failed to delete ipset %s. Entry: %+v", setName, entry) metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset %s. Entry: %+v", setName, entry) return err } @@ -294,7 +286,6 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error { } if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 { - log.Logf("Error: failed to create ipset rules. %+v", entry) metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset rules. %+v", entry) return err } @@ -339,7 +330,6 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error { return nil } - log.Errorf("Error: failed to delete ipset entry. Entry: %+v", entry) metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset entry. Entry: %+v", entry) return err } @@ -365,7 +355,6 @@ func (ipsMgr *IpsetManager) Clean() error { } if err := ipsMgr.DeleteSet(setName); err != nil { - log.Errorf("Error: failed to clean ipset") metrics.SendErrorMetric(util.IpsmID, "Error: failed to clean ipset") return err } @@ -377,7 +366,6 @@ func (ipsMgr *IpsetManager) Clean() error { } if err := ipsMgr.DeleteList(listName); err != nil { - log.Errorf("Error: failed to clean ipset list") metrics.SendErrorMetric(util.IpsmID, "Error: failed to clean ipset list") return err } @@ -392,14 +380,12 @@ func (ipsMgr *IpsetManager) Destroy() error { operationFlag: util.IpsetFlushFlag, } if _, err := ipsMgr.Run(entry); err != nil { - log.Errorf("Error: failed to flush ipset") metrics.SendErrorMetric(util.IpsmID, "Error: failed to flush ipset") return err } entry.operationFlag = util.IpsetDestroyFlag if _, err := ipsMgr.Run(entry); err != nil { - log.Errorf("Error: failed to destroy ipset") metrics.SendErrorMetric(util.IpsmID, "Error: failed to destroy ipset") return err } @@ -420,7 +406,6 @@ func (ipsMgr *IpsetManager) Run(entry *ipsEntry) (int, error) { if msg, failed := err.(*exec.ExitError); failed { errCode := msg.Sys().(syscall.WaitStatus).ExitStatus() if errCode > 0 { - log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) metrics.SendErrorMetric(util.IpsmID, "Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) } @@ -438,7 +423,6 @@ func (ipsMgr *IpsetManager) Save(configFile string) error { cmd := exec.Command(util.Ipset, util.IpsetSaveFlag, util.IpsetFileFlag, configFile) if err := cmd.Start(); err != nil { - log.Errorf("Error: failed to save ipset to file.") metrics.SendErrorMetric(util.IpsmID, "Error: failed to save ipset to file.") return err } @@ -455,7 +439,6 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error { f, err := os.Stat(configFile) if err != nil { - log.Errorf("Error: failed to get file %s stat from ipsm.Restore", configFile) metrics.SendErrorMetric(util.IpsmID, "Error: failed to get file %s stat from ipsm.Restore", configFile) return err } @@ -468,7 +451,6 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error { cmd := exec.Command(util.Ipset, util.IpsetRestoreFlag, util.IpsetFileFlag, configFile) if err := cmd.Start(); err != nil { - log.Errorf("Error: failed to restore ipset from file.") metrics.SendErrorMetric(util.IpsmID, "Error: failed to to restore ipset from file.") return err } @@ -477,4 +459,4 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error { //TODO based on the set name and number of entries in the config file, update IPSetInventory return nil -} +} \ No newline at end of file diff --git a/npm/iptm/iptm.go b/npm/iptm/iptm.go index 540de25f7e..6935dd7473 100644 --- a/npm/iptm/iptm.go +++ b/npm/iptm/iptm.go @@ -1,9 +1,6 @@ -/* +// Part of this file is modified from iptables package from Kuberenetes. +// https://github.com/kubernetes/kubernetes/blob/master/pkg/util/iptables -Part of this file is modified from iptables package from Kuberenetes. -https://github.com/kubernetes/kubernetes/blob/master/pkg/util/iptables - -*/ package iptm import ( @@ -15,7 +12,6 @@ import ( "time" "golang.org/x/sys/unix" - "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" @@ -92,7 +88,6 @@ func (iptMgr *IptablesManager) InitNpmChains() error { iptMgr.OperationFlag = util.IptablesInsertionFlag entry.Specs = append([]string{index}, entry.Specs...) if _, err = iptMgr.Run(entry); err != nil { - log.Errorf("Error: failed to add AZURE-NPM chain to FORWARD chain.") metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM chain to FORWARD chain.") return err } @@ -114,7 +109,6 @@ func (iptMgr *IptablesManager) InitNpmChains() error { if !exists { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err := iptMgr.Run(entry); err != nil { - log.Errorf("Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.") metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.") return err } @@ -141,7 +135,6 @@ func (iptMgr *IptablesManager) InitNpmChains() error { if !exists { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err := iptMgr.Run(entry); err != nil { - log.Errorf("Error: failed to add AZURE-NPM-EGRESS-PORT chain to AZURE-NPM chain.") metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.") return err } @@ -168,7 +161,6 @@ func (iptMgr *IptablesManager) InitNpmChains() error { if !exists { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err := iptMgr.Run(entry); err != nil { - log.Errorf("Error: failed to add AZURE-NPM-TARGET-SETS chain to AZURE-NPM chain.") metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-TARGET-SETS chain to AZURE-NPM chain.") return err } @@ -192,7 +184,6 @@ func (iptMgr *IptablesManager) InitNpmChains() error { if !exists { iptMgr.OperationFlag = util.IptablesAppendFlag if _, err = iptMgr.Run(entry); err != nil { - log.Logf("Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.") metrics.SendErrorMetric(util.IptmID, "Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.") return err } @@ -223,7 +214,6 @@ func (iptMgr *IptablesManager) UninitNpmChains() error { iptMgr.OperationFlag = util.IptablesDeletionFlag errCode, err := iptMgr.Run(entry) if errCode != iptablesErrDoesNotExist && err != nil { - log.Errorf("Error: failed to remove default rule from FORWARD chain.") metrics.SendErrorMetric(util.IptmID, "Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.") return err } @@ -235,7 +225,6 @@ func (iptMgr *IptablesManager) UninitNpmChains() error { } errCode, err := iptMgr.Run(entry) if errCode != iptablesErrDoesNotExist && err != nil { - log.Errorf("Error: failed to flush iptables chain %s.", chain) metrics.SendErrorMetric(util.IptmID, "Error: failed to flush iptables chain %s.", chain) } } @@ -277,7 +266,6 @@ func (iptMgr *IptablesManager) AddChain(chain string) error { return nil } - log.Errorf("Error: failed to create iptables chain %s.", entry.Chain) metrics.SendErrorMetric(util.IptmID, "Error: failed to create iptables chain %s.", entry.Chain) return err } @@ -298,7 +286,6 @@ func (iptMgr *IptablesManager) DeleteChain(chain string) error { return nil } - log.Errorf("Error: failed to delete iptables chain %s.", entry.Chain) metrics.SendErrorMetric(util.IptmID, "Error: failed to delete iptables chain %s.", entry.Chain) return err } @@ -318,7 +305,6 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error { iptMgr.OperationFlag = util.IptablesInsertionFlag } if _, err := iptMgr.Run(entry); err != nil { - log.Errorf("Error: failed to create iptables rules.") metrics.SendErrorMetric(util.IptmID, "Error: failed to create iptables rules.") return err } @@ -344,7 +330,6 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error { iptMgr.OperationFlag = util.IptablesDeletionFlag if _, err := iptMgr.Run(entry); err != nil { - log.Errorf("Error: failed to delete iptables rules.") metrics.SendErrorMetric(util.IptmID, "Error: failed to delete iptables rules.") return err } @@ -375,7 +360,6 @@ func (iptMgr *IptablesManager) Run(entry *IptEntry) (int, error) { if msg, failed := err.(*exec.ExitError); failed { errCode := msg.Sys().(syscall.WaitStatus).ExitStatus() if errCode > 0 && iptMgr.OperationFlag != util.IptablesCheckFlag { - log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) metrics.SendErrorMetric(util.IptmID, "Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n")) } @@ -405,7 +389,6 @@ func (iptMgr *IptablesManager) Save(configFile string) error { // create the config file for writing f, err := os.Create(configFile) if err != nil { - log.Errorf("Error: failed to open file: %s.", configFile) metrics.SendErrorMetric(util.IptmID, "Error: failed to open file: %s.", configFile) return err } @@ -414,7 +397,6 @@ func (iptMgr *IptablesManager) Save(configFile string) error { cmd := exec.Command(util.IptablesSave) cmd.Stdout = f if err := cmd.Start(); err != nil { - log.Errorf("Error: failed to run iptables-save.") metrics.SendErrorMetric(util.IptmID, "Error: failed to run iptables-save.") return err } @@ -443,7 +425,6 @@ func (iptMgr *IptablesManager) Restore(configFile string) error { // open the config file for reading f, err := os.Open(configFile) if err != nil { - log.Errorf("Error: failed to open file: %s.", configFile) metrics.SendErrorMetric(util.IptmID, "Error: failed to open file: %s.", configFile) return err } @@ -452,7 +433,6 @@ func (iptMgr *IptablesManager) Restore(configFile string) error { cmd := exec.Command(util.IptablesRestore) cmd.Stdin = f if err := cmd.Start(); err != nil { - log.Errorf("Error: failed to run iptables-restore.") metrics.SendErrorMetric(util.IptmID, "Error: failed to run iptables-restore.") return err } @@ -476,7 +456,6 @@ func grabIptablesLocks() (*os.File, error) { // Grab 1.6.x style lock. l, err := os.OpenFile(util.IptablesLockFile, os.O_CREATE, 0600) if err != nil { - log.Logf("Error: failed to open iptables lock file %s.", util.IptablesLockFile) metrics.SendErrorMetric(util.IptmID, "Error: failed to open iptables lock file %s.", util.IptablesLockFile) return nil, err } @@ -488,7 +467,6 @@ func grabIptablesLocks() (*os.File, error) { return true, nil }); err != nil { - log.Logf("Error: failed to acquire new iptables lock: %v.", err) metrics.SendErrorMetric(util.IptmID, "Error: failed to acquire new iptables lock: %v.", err) return nil, err } @@ -525,4 +503,4 @@ func grabIptablesFileLock(f *os.File) error { // // Write table headers. // writeLine(filterChains, "*filter") -// } +// } \ No newline at end of file diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index 9a7944dc2b..faa3da530a 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -14,7 +14,7 @@ var ( th aitelemetry.TelemetryHandle ) -// CreateTelemetryHandle creates +// CreateTelemetryHandle creates a handler to initialize AI telemetry func CreateTelemetryHandle(version, aiMetadata string) error { aiConfig := aitelemetry.AIConfig{ @@ -28,13 +28,14 @@ func CreateTelemetryHandle(version, aiMetadata string) error { GetEnvRetryWaitTimeInSecs: util.GetEnvRetryWaitTimeInSecs, } - var err error - th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) - - for i := 0; err != nil && i < util.AiInitializeRetryCount; i++ { - log.Logf("Failed to init AppInsights with err: %+v for %d time", err, i + 1) - time.Sleep(time.Minute * 5) - th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) + for i := 0; i < util.AiInitializeRetryCount; i++ { + th, err := aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) + if err != nil { + log.Logf("Failed to init AppInsights with err: %+v for %d time", err, i+1) + time.Sleep(time.Minute * util.AiInitializeRetryInMin) + } else { + i = util.AiInitializeRetryCount + } } if err != nil { @@ -51,7 +52,7 @@ func CreateTelemetryHandle(version, aiMetadata string) error { // SendErrorMetric is responsible for sending error metrics trhough AI telemetry func SendErrorMetric(operationID int, format string, args ...interface{}) { // Send error metrics - customDimensions := map[string]string { + customDimensions := map[string]string{ util.ErrorCode: strconv.Itoa(operationID), } metric := aitelemetry.Metric{ @@ -68,14 +69,15 @@ func SendErrorMetric(operationID int, format string, args ...interface{}) { Context: strconv.Itoa(operationID), CustomDimensions: make(map[string]string), } + log.Errorf(msg) SendLog(report) } // SendMetric sends metrics func SendMetric(metric aitelemetry.Metric) { if th == nil { - log.Logf("AppInsights didn't initialized.") - return + log.Logf("AppInsights didn't initialized.") + return } th.TrackMetric(metric) } @@ -83,9 +85,8 @@ func SendMetric(metric aitelemetry.Metric) { // SendLog sends log func SendLog(report aitelemetry.Report) { if th == nil { - log.Logf("AppInsights didn't initialized.") - return + log.Logf("AppInsights didn't initialized.") + return } th.TrackLog(report) } - diff --git a/npm/npm.go b/npm/npm.go index 15d04ed04f..c8f4f450eb 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -138,7 +138,6 @@ func (npMgr *NetworkPolicyManager) restore() { time.Sleep(restoreRetryWaitTimeInSeconds * time.Second) } - log.Logf("Error: timeout restoring Azure-NPM states") metrics.SendErrorMetric(util.NpmID, "Error: timeout restoring Azure-NPM states") panic(err.Error) } @@ -151,7 +150,6 @@ func (npMgr *NetworkPolicyManager) backup() { time.Sleep(backupWaitTimeInSeconds * time.Second) if err = iptMgr.Save(util.IptablesConfigFile); err != nil { - log.Logf("Error: failed to back up Azure-NPM states") metrics.SendErrorMetric(util.NpmID, "Error: failed to back up Azure-NPM states") } } @@ -206,14 +204,12 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in } } if err != nil { - log.Logf("Error: failed to retrieving kubernetes version") metrics.SendErrorMetric(util.NpmID, "Error: failed to retrieving kubernetes version") panic(err.Error) } log.Logf("API server version: %+v", serverVersion) if err = util.SetIsNewNwPolicyVerFlag(serverVersion); err != nil { - log.Logf("Error: failed to set IsNewNwPolicyVerFlag") metrics.SendErrorMetric(util.NpmID, "Error: failed to set IsNewNwPolicyVerFlag") panic(err.Error) } @@ -245,7 +241,6 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in // Create ipset for the namespace. kubeSystemNs := "ns-" + util.KubeSystemFlag if err := allNs.ipsMgr.CreateSet(kubeSystemNs, append([]string{util.IpsetNetHashFlag})); err != nil { - log.Logf("Error: failed to create ipset for namespace %s.", kubeSystemNs) metrics.SendErrorMetric(util.NpmID, "Error: failed to create ipset for namespace %s.", kubeSystemNs) } diff --git a/npm/plugin/main.go b/npm/plugin/main.go index af7a93a60d..8cf9571db5 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -8,7 +8,6 @@ import ( "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm" "github.com/Azure/azure-container-networking/npm/metrics" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" @@ -62,7 +61,7 @@ func main() { factory := informers.NewSharedInformerFactory(clientset, time.Hour*24) npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) - metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) + go metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) go npMgr.SendClusterMetrics() @@ -74,4 +73,4 @@ func main() { metrics.StartHTTP(0) select {} -} +} \ No newline at end of file diff --git a/npm/util/const.go b/npm/util/const.go index e471355f5a..2d3240ac98 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -130,12 +130,16 @@ const ( FunctionName string = "FunctionName" ErrorCode string = "ErrorCode" - BatchSize int = 32768 - BatchInterval int = 30 - RefreshTimeout int = 15 + // A general used default batch size in AI telemetry + BatchSizeInBytes int = 32768 + BatchIntervalInSecs int = 30 + RefreshTimeoutInSecs int = 15 GetEnvRetryCount int = 5 GetEnvRetryWaitTimeInSecs int = 3 - AiInitializeRetryCount int = 5 + AiInitializeRetryCount int = 3 + AiInitializeRetryInMin int = 2 + // These ID represents where did the error log generate from. + // It's for better query purpose. NpmID int = 1 IpsmID int = 2 IptmID int = 3 @@ -144,4 +148,4 @@ const ( ErrorValue float64 = 1 -) +) \ No newline at end of file From a7a298e4ab6d75f4bfba11b052dec611f62e0fac Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 2 Sep 2020 00:10:57 -0700 Subject: [PATCH 15/19] Fix typo. --- npm/metrics/ai-utils.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index faa3da530a..a1a1c3d2ca 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -20,19 +20,20 @@ func CreateTelemetryHandle(version, aiMetadata string) error { aiConfig := aitelemetry.AIConfig{ AppName: util.AzureNpmFlag, AppVersion: version, - BatchSize: util.BatchSize, - BatchInterval: util.BatchInterval, - RefreshTimeout: util.RefreshTimeout, + BatchSize: util.BatchSizeInBytes, + BatchInterval: util.BatchIntervalInSecs, + RefreshTimeout: util.RefreshTimeoutInSecs, DebugMode: util.DebugMode, GetEnvRetryCount: util.GetEnvRetryCount, GetEnvRetryWaitTimeInSecs: util.GetEnvRetryWaitTimeInSecs, } + var err error for i := 0; i < util.AiInitializeRetryCount; i++ { - th, err := aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) + th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) if err != nil { log.Logf("Failed to init AppInsights with err: %+v for %d time", err, i+1) - time.Sleep(time.Minute * util.AiInitializeRetryInMin) + time.Sleep(time.Minute * time.Duration(util.AiInitializeRetryInMin)) } else { i = util.AiInitializeRetryCount } From a5f65bedcc149b372dc3f72e3041e37ec4cdee35 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 2 Sep 2020 00:15:53 -0700 Subject: [PATCH 16/19] Fix indentation. --- npm/ipsm/ipsm.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/npm/ipsm/ipsm.go b/npm/ipsm/ipsm.go index 9641456952..8acfb27855 100644 --- a/npm/ipsm/ipsm.go +++ b/npm/ipsm/ipsm.go @@ -70,12 +70,12 @@ func (ipsMgr *IpsetManager) Exists(key string, val string, kind string) bool { // SetExists checks whehter an ipset exists. func (ipsMgr *IpsetManager) SetExists(setName, kind string) bool { - m := ipsMgr.setMap - if kind == util.IpsetSetListFlag { - m = ipsMgr.listMap - } - _, exists := m[setName] - return exists + m := ipsMgr.setMap + if kind == util.IpsetSetListFlag { + m = ipsMgr.listMap + } + _, exists := m[setName] + return exists } func isNsSet(setName string) bool { From f621d2ed12f7908b1a6b99b02e6880d2c0ac6d5f Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 2 Sep 2020 00:42:51 -0700 Subject: [PATCH 17/19] Fix AI initialize issue. --- npm/plugin/main.go | 3 ++- npm/util/const.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 8cf9571db5..e5e11e3741 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -61,11 +61,12 @@ func main() { factory := informers.NewSharedInformerFactory(clientset, time.Hour*24) npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) - go metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) + metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) go npMgr.SendClusterMetrics() if err = npMgr.Start(wait.NeverStop); err != nil { + metrics.SendErrorMetric("npm failed with error %v.", err) log.Logf("npm failed with error %v.", err) panic(err.Error) } diff --git a/npm/util/const.go b/npm/util/const.go index 2d3240ac98..c9ecfa8a02 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -137,7 +137,7 @@ const ( GetEnvRetryCount int = 5 GetEnvRetryWaitTimeInSecs int = 3 AiInitializeRetryCount int = 3 - AiInitializeRetryInMin int = 2 + AiInitializeRetryInMin int = 1 // These ID represents where did the error log generate from. // It's for better query purpose. NpmID int = 1 From 3a0dd4041d0327ee846f853928e0f6747f9769f4 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 2 Sep 2020 01:03:04 -0700 Subject: [PATCH 18/19] Remove unnecessary log. --- npm/plugin/main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index e5e11e3741..c3a804ea19 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -66,7 +66,6 @@ func main() { go npMgr.SendClusterMetrics() if err = npMgr.Start(wait.NeverStop); err != nil { - metrics.SendErrorMetric("npm failed with error %v.", err) log.Logf("npm failed with error %v.", err) panic(err.Error) } From b56c4cdd0432bbd298b3d1f18a432043ea65f158 Mon Sep 17 00:00:00 2001 From: Shufang Date: Wed, 2 Sep 2020 21:07:03 -0700 Subject: [PATCH 19/19] Use break in if condition. --- npm/metrics/ai-utils.go | 2 +- npm/npm.go | 4 ++-- npm/util/const.go | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index a1a1c3d2ca..271f2f0d1e 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -35,7 +35,7 @@ func CreateTelemetryHandle(version, aiMetadata string) error { log.Logf("Failed to init AppInsights with err: %+v for %d time", err, i+1) time.Sleep(time.Minute * time.Duration(util.AiInitializeRetryInMin)) } else { - i = util.AiInitializeRetryCount + break } } diff --git a/npm/npm.go b/npm/npm.go index c8f4f450eb..df6530e409 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -1,4 +1,4 @@ -// Copyright 2018 Microsoft. All rights reserved. +// Package npm Copyright 2018 Microsoft. All rights reserved. // MIT License package npm @@ -90,7 +90,7 @@ func (npMgr *NetworkPolicyManager) GetAppVersion() string { // GetAIMetadata returns ai metadata number func GetAIMetadata() string { - return aiMetadata; + return aiMetadata } // SendClusterMetrics :- send NPM cluster metrics using AppInsights diff --git a/npm/util/const.go b/npm/util/const.go index c9ecfa8a02..8a4abd1b50 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -22,7 +22,7 @@ const ( //iptables related constants. const ( Iptables string = "iptables" - Ip6tables string = "ip6tables" + IP6tables string = "ip6tables" IptablesSave string = "iptables-save" IptablesRestore string = "iptables-restore" IptablesConfigFile string = "/var/log/iptables.conf" @@ -90,10 +90,10 @@ const ( IpsetFlushFlag string = "-F" IpsetDestroyFlag string = "-X" - IpsetExistFlag string = "-exist" - IpsetFileFlag string = "-file" + IpsetExistFlag string = "-exist" + IpsetFileFlag string = "-file" IPsetCheckListFlag string = "list" - IpsetTestFlag string = "test" + IpsetTestFlag string = "test" IpsetSetListFlag string = "setlist" IpsetNetHashFlag string = "nethash" @@ -130,7 +130,8 @@ const ( FunctionName string = "FunctionName" ErrorCode string = "ErrorCode" - // A general used default batch size in AI telemetry + // Default batch size in AI telemetry + // Defined here https://docs.microsoft.com/en-us/azure/azure-monitor/app/pricing BatchSizeInBytes int = 32768 BatchIntervalInSecs int = 30 RefreshTimeoutInSecs int = 15 @@ -140,12 +141,11 @@ const ( AiInitializeRetryInMin int = 1 // These ID represents where did the error log generate from. // It's for better query purpose. - NpmID int = 1 - IpsmID int = 2 - IptmID int = 3 + NpmID int = 1 + IpsmID int = 2 + IptmID int = 3 - DebugMode bool = true + DebugMode bool = true - ErrorValue float64 = 1 - -) \ No newline at end of file + ErrorValue float64 = 1 +)