From 8e65f0253b76847a24ebc307445c8ea366fde81d Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 9 Feb 2021 18:20:07 -0800 Subject: [PATCH 1/5] Reducing re-sync time and adding better delete watch function logic --- npm/npm.go | 103 +++++++++++++++++++++++++++++++++++++++++---- npm/plugin/main.go | 11 ++++- 2 files changed, 103 insertions(+), 11 deletions(-) diff --git a/npm/npm.go b/npm/npm.go index b86337d7cc..1fb4c6347d 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -252,18 +252,49 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in // Pod event handlers cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { + podObj, ok := obj.(*corev1.Pod) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "ADD Pod: Received unexpected object type: %v", obj) + return + } npMgr.Lock() - npMgr.AddPod(obj.(*corev1.Pod)) + npMgr.AddPod(podObj) npMgr.Unlock() }, UpdateFunc: func(old, new interface{}) { + oldPodObj, ok := old.(*corev1.Pod) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE Pod: Received unexpected old object type: %v", oldPodObj) + return + } + newPodObj, ok := new.(*corev1.Pod) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE Pod: Received unexpected new object type: %v", newPodObj) + return + } npMgr.Lock() - npMgr.UpdatePod(old.(*corev1.Pod), new.(*corev1.Pod)) + npMgr.UpdatePod(oldPodObj, newPodObj) npMgr.Unlock() }, DeleteFunc: func(obj interface{}) { + // DeleteFunc gets the final state of the resource (if it is known). + // Otherwise, it gets an object of type DeletedFinalStateUnknown. + // This can happen if the watch is closed and misses the delete event and + // the controller doesn't notice the deletion until the subsequent re-list + podObj, ok := obj.(*corev1.Pod) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "DELETE Pod: Received unexpected object type: %v", obj) + return + } + if podObj, ok = tombstone.Obj.(*corev1.Pod); !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "DELETE Pod: Received unexpected object type: %v", obj) + return + } + } npMgr.Lock() - npMgr.DeletePod(obj.(*corev1.Pod)) + npMgr.DeletePod(podObj) npMgr.Unlock() }, }, @@ -273,18 +304,45 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in // Namespace event handlers cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { + nameSpaceObj, ok := obj.(*corev1.Namespace) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "ADD NameSpace: Received unexpected object type: %v", obj) + return + } npMgr.Lock() - npMgr.AddNamespace(obj.(*corev1.Namespace)) + npMgr.AddNamespace(nameSpaceObj) npMgr.Unlock() }, UpdateFunc: func(old, new interface{}) { + oldNameSpaceObj, ok := old.(*corev1.Namespace) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE NameSpace: Received unexpected old object type: %v", oldNameSpaceObj) + return + } + newNameSpaceObj, ok := new.(*corev1.Namespace) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE NameSpace: Received unexpected new object type: %v", newNameSpaceObj) + return + } npMgr.Lock() - npMgr.UpdateNamespace(old.(*corev1.Namespace), new.(*corev1.Namespace)) + npMgr.UpdateNamespace(oldNameSpaceObj, newNameSpaceObj) npMgr.Unlock() }, DeleteFunc: func(obj interface{}) { + nameSpaceObj, ok := obj.(*corev1.Namespace) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "DELETE NameSpace: Received unexpected object type: %v", obj) + return + } + if nameSpaceObj, ok = tombstone.Obj.(*corev1.Namespace); !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "DELETE NameSpace: Received unexpected object type: %v", obj) + return + } + } npMgr.Lock() - npMgr.DeleteNamespace(obj.(*corev1.Namespace)) + npMgr.DeleteNamespace(nameSpaceObj) npMgr.Unlock() }, }, @@ -294,18 +352,45 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in // Network policy event handlers cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { + networkPolicyObj, ok := obj.(*networkingv1.NetworkPolicy) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "ADD Network Policy: Received unexpected object type: %v", obj) + return + } npMgr.Lock() - npMgr.AddNetworkPolicy(obj.(*networkingv1.NetworkPolicy)) + npMgr.AddNetworkPolicy(networkPolicyObj) npMgr.Unlock() }, UpdateFunc: func(old, new interface{}) { + oldNetworkPolicyObj, ok := old.(*networkingv1.NetworkPolicy) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE Network Policy: Received unexpected old object type: %v", oldNetworkPolicyObj) + return + } + newNetworkPolicyObj, ok := new.(*networkingv1.NetworkPolicy) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE Network Policy: Received unexpected new object type: %v", newNetworkPolicyObj) + return + } npMgr.Lock() - npMgr.UpdateNetworkPolicy(old.(*networkingv1.NetworkPolicy), new.(*networkingv1.NetworkPolicy)) + npMgr.UpdateNetworkPolicy(oldNetworkPolicyObj, newNetworkPolicyObj) npMgr.Unlock() }, DeleteFunc: func(obj interface{}) { + networkPolicyObj, ok := obj.(*networkingv1.NetworkPolicy) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "DELETE Network Policy: Received unexpected object type: %v", obj) + return + } + if networkPolicyObj, ok = tombstone.Obj.(*networkingv1.NetworkPolicy); !ok { + metrics.SendErrorLogAndMetric(util.NpmID, "DELETE Network Policy: Received unexpected object type: %v", obj) + return + } + } npMgr.Lock() - npMgr.DeleteNetworkPolicy(obj.(*networkingv1.NetworkPolicy)) + npMgr.DeleteNetworkPolicy(networkPolicyObj) npMgr.Unlock() }, }, diff --git a/npm/plugin/main.go b/npm/plugin/main.go index c3a804ea19..c41919d47e 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -3,6 +3,7 @@ package main import ( + "math/rand" "time" "github.com/Azure/azure-container-networking/log" @@ -58,7 +59,13 @@ func main() { panic(err.Error()) } - factory := informers.NewSharedInformerFactory(clientset, time.Hour*24) + // Setting reSyncPeriod to 15 secs + minResyncPeriod := 15 * time.Second + + // Adding some randomness so all NPM pods will not request for info at once. + factor := rand.Float64() + 1 + resyncPeriod := time.Duration(float64(minResyncPeriod.Nanoseconds()) * factor) + factory := informers.NewSharedInformerFactory(clientset, resyncPeriod) npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) @@ -73,4 +80,4 @@ func main() { metrics.StartHTTP(0) select {} -} \ No newline at end of file +} From 9fcb6e91ad417273361576cdd7812602415d0cb4 Mon Sep 17 00:00:00 2001 From: vakr Date: Thu, 11 Feb 2021 17:56:08 -0800 Subject: [PATCH 2/5] Changing resync period to 15 mins --- npm/plugin/main.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index c41919d47e..f9dc4b9f31 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -16,6 +16,7 @@ import ( ) const waitForTelemetryInSeconds = 60 +const resyncPeriodInMinutes = 15 // Version is populated by make during build. var version string @@ -60,7 +61,7 @@ func main() { } // Setting reSyncPeriod to 15 secs - minResyncPeriod := 15 * time.Second + minResyncPeriod := resyncPeriodInMinutes * time.Minute // Adding some randomness so all NPM pods will not request for info at once. factor := rand.Float64() + 1 From 6a94bb2792c3d8bbdf04c02bad31d1d92cc522d8 Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 16 Feb 2021 11:07:24 -0800 Subject: [PATCH 3/5] adding a log message on resync period --- npm/plugin/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index f9dc4b9f31..83017cd7d2 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -66,6 +66,7 @@ func main() { // Adding some randomness so all NPM pods will not request for info at once. factor := rand.Float64() + 1 resyncPeriod := time.Duration(float64(minResyncPeriod.Nanoseconds()) * factor) + log.Logf("[INFO] Resync period for NPM pod is set to %d.", int(resyncPeriod/time.Minute)) factory := informers.NewSharedInformerFactory(clientset, resyncPeriod) npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) From 0985dd51dfd494715b536f5ee32e6e9b870c3fcb Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 16 Feb 2021 14:21:24 -0800 Subject: [PATCH 4/5] reverting resync changes --- npm/plugin/main.go | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index 83017cd7d2..f5d7208d55 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -3,7 +3,6 @@ package main import ( - "math/rand" "time" "github.com/Azure/azure-container-networking/log" @@ -16,7 +15,6 @@ import ( ) const waitForTelemetryInSeconds = 60 -const resyncPeriodInMinutes = 15 // Version is populated by make during build. var version string @@ -60,14 +58,7 @@ func main() { panic(err.Error()) } - // Setting reSyncPeriod to 15 secs - minResyncPeriod := resyncPeriodInMinutes * time.Minute - - // Adding some randomness so all NPM pods will not request for info at once. - factor := rand.Float64() + 1 - resyncPeriod := time.Duration(float64(minResyncPeriod.Nanoseconds()) * factor) - log.Logf("[INFO] Resync period for NPM pod is set to %d.", int(resyncPeriod/time.Minute)) - factory := informers.NewSharedInformerFactory(clientset, resyncPeriod) + factory := informers.NewSharedInformerFactory(clientset, time.Hour*24) npMgr := npm.NewNetworkPolicyManager(clientset, factory, version) metrics.CreateTelemetryHandle(npMgr.GetAppVersion(), npm.GetAIMetadata()) From 657b65475c1de194b2bfe6b41b08d9b40ea8b98b Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 16 Feb 2021 14:22:08 -0800 Subject: [PATCH 5/5] reverting resync changes --- npm/plugin/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/plugin/main.go b/npm/plugin/main.go index f5d7208d55..c3a804ea19 100644 --- a/npm/plugin/main.go +++ b/npm/plugin/main.go @@ -73,4 +73,4 @@ func main() { metrics.StartHTTP(0) select {} -} +} \ No newline at end of file