From 758e0cb5bc4b158215221e23113718905005578f Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 6 Apr 2021 11:36:38 -0700 Subject: [PATCH 01/19] first version of network policy controller and its unit tests --- npm/networkPolicyController.go | 545 ++++++++++++++++++ ...est.go => networkPolicyController_test.go} | 368 ++++++++---- npm/npm.go | 91 +-- npm/nwpolicy.go | 298 ---------- npm/parsePolicy.go | 4 +- 5 files changed, 826 insertions(+), 480 deletions(-) create mode 100644 npm/networkPolicyController.go rename npm/{nwpolicy_test.go => networkPolicyController_test.go} (50%) delete mode 100644 npm/nwpolicy.go diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go new file mode 100644 index 0000000000..d284678d86 --- /dev/null +++ b/npm/networkPolicyController.go @@ -0,0 +1,545 @@ +// Copyright 2018 Microsoft. All rights reserved. +// MIT License +package npm + +import ( + "fmt" + "strconv" + "time" + + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/npm/ipsm" + "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/Azure/azure-container-networking/npm/util" + networkingv1 "k8s.io/api/networking/v1" + "k8s.io/apimachinery/pkg/api/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + networkinginformers "k8s.io/client-go/informers/networking/v1" + "k8s.io/client-go/kubernetes" + netpollister "k8s.io/client-go/listers/networking/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" +) + +type networkPolicyController struct { + clientset kubernetes.Interface + netPolLister netpollister.NetworkPolicyLister + netPolListerSynced cache.InformerSynced + workqueue workqueue.RateLimitingInterface + // (TODO): networkPolController does not need to have whole NetworkPolicyManager pointer. Need to improve it + npMgr *NetworkPolicyManager + isAzureNpmChainCreated bool + // (TODO): why do we have this bool variable even though isAzureNpmChainCreated exists. + isSafeToCleanUpAzureNpmChain bool +} + +func NewNetworkPolicyController(npInformer networkinginformers.NetworkPolicyInformer, clientset kubernetes.Interface, npMgr *NetworkPolicyManager) *networkPolicyController { + netPolController := &networkPolicyController{ + clientset: clientset, + netPolLister: npInformer.Lister(), + netPolListerSynced: npInformer.Informer().HasSynced, + workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "NetPols"), + npMgr: npMgr, + isAzureNpmChainCreated: false, + isSafeToCleanUpAzureNpmChain: false, + } + + npInformer.Informer().AddEventHandler( + cache.ResourceEventHandlerFuncs{ + AddFunc: netPolController.addNetPol, + UpdateFunc: netPolController.updateNetPol, + DeleteFunc: netPolController.deleteNetPol, + }, + ) + return netPolController +} + +// filter this event if we do not need to handle this event +func (c *networkPolicyController) needSync(obj interface{}) (string, error) { + var key string + _, ok := obj.(*networkingv1.NetworkPolicy) + if !ok { + return key, fmt.Errorf("cannot cast obj (%v) to network policy obj", obj) + } + + var err error + if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { + return key, fmt.Errorf("error due to %s", err) + } + + return key, nil +} + +func (c *networkPolicyController) addNetPol(obj interface{}) { + key, err := c.needSync(obj) + if err != nil { + utilruntime.HandleError(err) + return + } + + // (TODO): need to remove + log.Logf("[NETPOL ADD EVENT] add key %s into workqueue", key) + c.workqueue.Add(key) +} + +func (c *networkPolicyController) updateNetPol(old, new interface{}) { + key, err := c.needSync(new) + if err != nil { + utilruntime.HandleError(err) + return + } + + // needSync already checked validation of casting new network policy. + newNetPol, _ := new.(*networkingv1.NetworkPolicy) + oldNetPol, ok := old.(*networkingv1.NetworkPolicy) + if ok { + if oldNetPol.ResourceVersion == newNetPol.ResourceVersion { + // Periodic resync will send update events for all known network plicies. + // Two different versions of the same network policy will always have different RVs. + return + } + } + + c.workqueue.Add(key) +} + +func (c *networkPolicyController) deleteNetPol(obj interface{}) { + _, ok := obj.(*networkingv1.NetworkPolicy) + // DeleteFunc gets the final state of the resource (if it is known). + // Otherwise, it gets an object of type DeletedFinalStateUnknown. + // This can happen if the watch is closed and misses the delete event and + // the controller doesn't notice the deletion until the subsequent re-list + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + metrics.SendErrorLogAndMetric(util.NetpolID, "[NETPOL DELETE EVENT] Received unexpected object type: %v", obj) + utilruntime.HandleError(fmt.Errorf("error decoding object, invalid type")) + return + } + + if _, ok = tombstone.Obj.(*networkingv1.NetworkPolicy); !ok { + metrics.SendErrorLogAndMetric(util.NetpolID, "[NETPOL DELETE EVENT] Received unexpected object type (error decoding object tombstone, invalid type): %v", obj) + utilruntime.HandleError(fmt.Errorf("error decoding object tombstone, invalid type")) + return + } + } + + log.Logf("[NETPOL DELETE EVENT]") + var key string + var err error + if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { + utilruntime.HandleError(err) + return + } + netPolCachedKey := util.GetNSNameWithPrefix(key) + + // (TODO): Reduce scope of lock later + c.npMgr.Lock() + defer c.npMgr.Unlock() + _, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] + + // If network policy object is not in the RawNpMap, we do not need to clean-up states for this network policy + // since netPolController did not apply for any states for this pod + if !netPolExists { + return + } + + log.Logf("[NETPOL DEL EVENT] add key %s into workqueue", key) + c.workqueue.Add(key) +} + +func (c *networkPolicyController) Run(threadiness int, stopCh <-chan struct{}) error { + defer utilruntime.HandleCrash() + defer c.workqueue.ShutDown() + + log.Logf("Starting Network Policy %d worker(s)", threadiness) + for i := 0; i < threadiness; i++ { + go wait.Until(c.runWorker, time.Second, stopCh) + } + + log.Logf("Started Network Policy workers") + <-stopCh + log.Logf("Shutting down Network Policy workers") + + return nil +} + +func (c *networkPolicyController) runWorker() { + for c.processNextWorkItem() { + } +} + +func (c *networkPolicyController) processNextWorkItem() bool { + obj, shutdown := c.workqueue.Get() + + if shutdown { + return false + } + + err := func(obj interface{}) error { + defer c.workqueue.Done(obj) + var key string + var ok bool + if key, ok = obj.(string); !ok { + // As the item in the workqueue is actually invalid, we call + // Forget here else we'd go into a loop of attempting to + // process a work item that is invalid. + c.workqueue.Forget(obj) + utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj)) + return nil + } + // Run the syncHandler, passing it the namespace/name string of the + // Pod resource to be synced. + // TODO : may consider using "c.queue.AddAfter(key, *requeueAfter)" according to error type later + if err := c.syncNetPol(key); err != nil { + // Put the item back on the workqueue to handle any transient errors. + c.workqueue.AddRateLimited(key) + return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error()) + } + // Finally, if no error occurs we Forget this item so it does not + // get queued again until another change happens. + c.workqueue.Forget(obj) + log.Logf("Successfully synced '%s'", key) + return nil + }(obj) + + if err != nil { + utilruntime.HandleError(err) + metrics.SendErrorLogAndMetric(util.NetpolID, "syncNetPol error due to %v", err) + return true + } + + return true +} + +// syncNetPol compares the actual state with the desired, and attempts to converge the two. +func (c *networkPolicyController) syncNetPol(key string) error { + // Convert the namespace/name string into a distinct namespace and name + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key)) + return nil + } + + log.Logf("SyncNetPol %s %s %s", key, namespace, name) + // Get the network policy resource with this namespace/name + netPolObj, err := c.netPolLister.NetworkPolicies(namespace).Get(name) + + // (TODO): Reduce scope of lock later + c.npMgr.Lock() + defer c.npMgr.Unlock() + + if err != nil { + if errors.IsNotFound(err) { + // (TODO): think the location of this code. + log.Logf("Network Policy %s is not found, may be it is deleted", key) + + // netPolObj is not found, but should need to check the RawNpMap cache with cachedNetPolKey + // #1 No item in RawNpMap, which means network policy with the cachedNetPolKey is not applied. Thus, do not need to clean up process. + // #2 item in RawNpMap exists, which means network policy with the cachedNetPolKey is applied . Thus, Need to clean up process. + cachedNetPolKey := util.GetNSNameWithPrefix(key) + cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] + if !cachedNetPolObjExists { + return nil + } + + err = c.deleteNetworkPolicy(cachedNetPolObj) + if err != nil { + return fmt.Errorf("[syncNetPol] Error: %v when network policy is not found\n", err) + } + } + return err + } + + // If DeletionTimestamp of the netPolObj is set, start cleaning up lastly applied states. + // This is early cleaning up process from updateNetPol event + if netPolObj.ObjectMeta.DeletionTimestamp == nil && netPolObj.ObjectMeta.DeletionGracePeriodSeconds == nil { + err = c.deleteNetworkPolicy(netPolObj) + if err != nil { + return fmt.Errorf("Error: %v when ObjectMeta.DeletionTimestamp field is set\n", err) + } + } + + // Install default rules for kube-system and iptables + // (TODO): this default rules for kube-system and azure-npm chains, not directly related to passed netPolObj + // How to decouple this call with passed netPolObj? since it causes unnecessary re-try for the passed netPolObj + if err = c.initializeDefaultAzureNpmChain(); err != nil { + return fmt.Errorf("[syncNetPol] Error: due to %v", err) + } + + err = c.syncAddAndUpdateNetPol(netPolObj) + if err != nil { + return fmt.Errorf("[syncNetPol] Error due to %s\n", err.Error()) + } + + return nil +} + +// initializeDefaultAzureNpmChain install default rules for kube-system and iptables +func (c *networkPolicyController) initializeDefaultAzureNpmChain() error { + if c.isAzureNpmChainCreated { + return nil + } + + ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr + iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr + if err := ipsMgr.CreateSet(util.KubeSystemFlag, append([]string{util.IpsetNetHashFlag})); err != nil { + return fmt.Errorf("[initializeDefaultAzureNpmChain] Error: failed to initialize kube-system ipset with err %s", err) + } + if err := iptMgr.InitNpmChains(); err != nil { + return fmt.Errorf("[initializeDefaultAzureNpmChain] Error: failed to initialize azure-npm chains with err %s", err) + } + + c.isAzureNpmChainCreated = true + return nil +} + +// DeleteNetworkPolicy handles deleting network policy based on cachedNetPolKey. +func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.NetworkPolicy) error { + var err error + netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) + if err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: while running MetaNamespaceKeyFunc err: %s", err) + } + + cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) + cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] + // if there is no applied network policy with the cachedNetPolKey, do not need to clean up process. + if !cachedNetPolObjExists { + return nil + } + + ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr + iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr + // translate policy from "cachedNetPolObj" + _, _, _, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(cachedNetPolObj) + + // delete iptables entries + for _, iptEntry := range iptEntries { + if err = iptMgr.Delete(iptEntry); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) + } + } + + // delete ipset list related to ingress CIDRs + if err = c.removeCidrsRule("in", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule in due to %v", err) + } + + // delete ipset list related to egress CIDRs + if err = c.removeCidrsRule("out", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, egressIPCidrs, ipsMgr); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule out due to %v", err) + } + + delete(c.npMgr.RawNpMap, cachedNetPolKey) + + // (TODO): it is not related to event - need to separate it. + if c.canCleanUpNpmChains() { + c.isAzureNpmChainCreated = false + if err = iptMgr.UninitNpmChains(); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) + } + } + + metrics.NumPolicies.Dec() + return nil +} + +// Add and Update NetworkPolicy handles adding network policy to iptables. +func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1.NetworkPolicy) error { + timer := metrics.StartNewTimer() + + var err error + netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) + if err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: while running MetaNamespaceKeyFunc err: %s", err) + } + + // Start reconciling loop to eventually meet the desired states from network policy + // #1 If a new network policy is created, the network policy is not in RawNPMap. + // start translating policy and install translated ipset and iptables rules into kernel + // #2 If a network policy with -- is applied before and two network policy are the same object (same UID), + // first delete the applied network policy, + // then start translating policy and install translated ipset and iptables rules into kernel + // #3 If a network policy with -- is applied before and two network policy are the different object (different UID) due to missing some events for the old object + // first delete the applied network policy, + // then start translating policy and install translated ipset and iptables rules into kernel + // For #2 and #3, the logic are the same. + + // (TODO): Need more optimizations + // Need to apply difference only if possible + + err = c.deleteNetworkPolicy(netPolObj) + if err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to deleteNetworkPolicy due to %s", err) + } + + ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr + iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr + sets, namedPorts, lists, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(netPolObj) + for _, set := range sets { + log.Logf("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) + if err = ipsMgr.CreateSet(set, append([]string{util.IpsetNetHashFlag})); err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: creating ipset %s with err: %v", set, err) + } + } + for _, set := range namedPorts { + log.Logf("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) + if err = ipsMgr.CreateSet(set, append([]string{util.IpsetIPPortHashFlag})); err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: creating ipset named port %s with err: %v", set, err) + } + } + for _, list := range lists { + if err = ipsMgr.CreateList(list); err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: creating ipset list %s with err: %v", list, err) + } + } + + // (TODO): Do we need initAllNsList?? + // Can we move this into "initializeDefaultAzureNpmChain" function? + if err = c.initAllNsList(); err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: initializing all-namespace ipset list with err: %v", err) + } + + if err = c.createCidrsRule("in", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr); err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: createCidrsRule in due to %v", err) + } + + if err = c.createCidrsRule("out", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, egressIPCidrs, ipsMgr); err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: createCidrsRule out due to %v", err) + } + + for _, iptEntry := range iptEntries { + if err = iptMgr.Add(iptEntry); err != nil { + return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) + } + } + + cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) + c.npMgr.RawNpMap[cachedNetPolKey] = netPolObj + + metrics.NumPolicies.Inc() + + // (TODO): may better to use defer? + timer.StopAndRecord(metrics.AddPolicyExecTime) + return nil +} + +func (c *networkPolicyController) createCidrsRule(ingressOrEgress, policyName, ns string, ipsetEntries [][]string, ipsMgr *ipsm.IpsetManager) error { + spec := append([]string{util.IpsetNetHashFlag, util.IpsetMaxelemName, util.IpsetMaxelemNum}) + + for i, ipCidrSet := range ipsetEntries { + if ipCidrSet == nil || len(ipCidrSet) == 0 { + continue + } + setName := policyName + "-in-ns-" + ns + "-" + strconv.Itoa(i) + ingressOrEgress + log.Logf("Creating set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) + if err := ipsMgr.CreateSet(setName, spec); err != nil { + metrics.SendErrorLogAndMetric(util.NetpolID, "[createCidrsRule] Error: creating ipset %s with err: %v", ipCidrSet, err) + } + for _, ipCidrEntry := range util.DropEmptyFields(ipCidrSet) { + // Ipset doesn't allow 0.0.0.0/0 to be added. A general solution is split 0.0.0.0/1 in half which convert to + // 1.0.0.0/1 and 128.0.0.0/1 + if ipCidrEntry == "0.0.0.0/0" { + splitEntry := [2]string{"1.0.0.0/1", "128.0.0.0/1"} + for _, entry := range splitEntry { + if err := ipsMgr.AddToSet(setName, entry, util.IpsetNetHashFlag, ""); err != nil { + return fmt.Errorf("[createCidrsRule] adding ip cidrs %s into ipset %s with err: %v", entry, ipCidrSet, err) + } + } + } else { + if err := ipsMgr.AddToSet(setName, ipCidrEntry, util.IpsetNetHashFlag, ""); err != nil { + return fmt.Errorf("[createCidrsRule] adding ip cidrs %s into ipset %s with err: %v", ipCidrEntry, ipCidrSet, err) + } + } + } + } + + return nil +} + +func (c *networkPolicyController) removeCidrsRule(ingressOrEgress, policyName, ns string, ipsetEntries [][]string, ipsMgr *ipsm.IpsetManager) error { + for i, ipCidrSet := range ipsetEntries { + if ipCidrSet == nil || len(ipCidrSet) == 0 { + continue + } + setName := policyName + "-in-ns-" + ns + "-" + strconv.Itoa(i) + ingressOrEgress + log.Logf("Delete set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) + if err := ipsMgr.DeleteSet(setName); err != nil { + return fmt.Errorf("[removeCidrsRule] deleting ipset %s with err: %v", ipCidrSet, err) + } + } + + return nil +} + +func (c *networkPolicyController) canCleanUpNpmChains() bool { + // (TODO): why do we have this? + // if !c.isSafeToCleanUpAzureNpmChain { + // return false + // } + + if len(c.npMgr.RawNpMap) == 0 { + return true + } + + return false +} + +// (TODO): copied from namespace.go - InitAllNsList syncs all-namespace ipset list +func (c *networkPolicyController) initAllNsList() error { + // who adds util.KubeAllNamespacesFlag in NsMap? + ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr + for ns := range c.npMgr.NsMap { + if ns == util.KubeAllNamespacesFlag { + continue + } + + if err := ipsMgr.AddToList(util.KubeAllNamespacesFlag, ns); err != nil { + return fmt.Errorf("[InitAllNsList] Error: failed to add namespace set %s to ipset list %s with err: %v", ns, util.KubeAllNamespacesFlag, err) + } + } + return nil +} + +// (TODO): copied from namespace.go, but no component uses this function UninitAllNsList cleans all-namespace ipset list. +func (c *networkPolicyController) unInitAllNsList() error { + allNs := c.npMgr.NsMap[util.KubeAllNamespacesFlag] + for ns := range c.npMgr.NsMap { + if ns == util.KubeAllNamespacesFlag { + continue + } + + if err := allNs.IpsMgr.DeleteFromList(util.KubeAllNamespacesFlag, ns); err != nil { + return fmt.Errorf("[UninitAllNsList] Error: failed to delete namespace set %s from list %s with err: %v", ns, util.KubeAllNamespacesFlag, err) + } + } + return nil +} + +// GetProcessedNPKey will return netpolKey +func (c *networkPolicyController) getProcessedNPKey(netPolObj *networkingv1.NetworkPolicy) string { + // hashSelector will never be empty + // (TODO): what if PodSelector is [] or nothing? - make the Unit test for this + hashedPodSelector := HashSelector(&netPolObj.Spec.PodSelector) + + // (TODO): any chance to have namespace has zero length? + if len(netPolObj.GetNamespace()) > 0 { + hashedPodSelector = netPolObj.GetNamespace() + "/" + hashedPodSelector + } + return util.GetNSNameWithPrefix(hashedPodSelector) +} + +// (TODO): placeholder to avoid compile errors +func (npMgr *NetworkPolicyManager) AddNetworkPolicy(netPol *networkingv1.NetworkPolicy) error { + return nil +} + +func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(netPol *networkingv1.NetworkPolicy) error { + return nil +} + +func (npMgr *NetworkPolicyManager) UpdateNetworkPolicy(oldNpObj *networkingv1.NetworkPolicy, newNpObj *networkingv1.NetworkPolicy) error { + return nil +} diff --git a/npm/nwpolicy_test.go b/npm/networkPolicyController_test.go similarity index 50% rename from npm/nwpolicy_test.go rename to npm/networkPolicyController_test.go index 256d0556f8..39479c7a0a 100644 --- a/npm/nwpolicy_test.go +++ b/npm/networkPolicyController_test.go @@ -14,10 +14,239 @@ import ( corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" + kubeinformers "k8s.io/client-go/informers" + k8sfake "k8s.io/client-go/kubernetes/fake" + core "k8s.io/client-go/testing" + "k8s.io/client-go/tools/cache" ) +type netPolFixture struct { + t *testing.T + + kubeclient *k8sfake.Clientset + // Objects to put in the store. + netPolLister []*networkingv1.NetworkPolicy + // (TODO) Actions expected to happen on the client. Will use this to check action. + kubeactions []core.Action + // Objects from here preloaded into NewSimpleFake. + kubeobjects []runtime.Object + + // (TODO) will remove npMgr if possible + npMgr *NetworkPolicyManager + ipsMgr *ipsm.IpsetManager + netPolController *networkPolicyController + kubeInformer kubeinformers.SharedInformerFactory +} + +func newNetPolFixture(t *testing.T) *netPolFixture { + f := &netPolFixture{ + t: t, + netPolLister: []*networkingv1.NetworkPolicy{}, + kubeobjects: []runtime.Object{}, + npMgr: newNPMgr(t), + ipsMgr: ipsm.NewIpsetManager(), + } + + f.npMgr.RawNpMap = make(map[string]*networkingv1.NetworkPolicy) + f.npMgr.ProcessedNpMap = make(map[string]*networkingv1.NetworkPolicy) + return f +} + +func (f *netPolFixture) newNetPodController(stopCh chan struct{}) { + f.kubeclient = k8sfake.NewSimpleClientset(f.kubeobjects...) + f.kubeInformer = kubeinformers.NewSharedInformerFactory(f.kubeclient, noResyncPeriodFunc()) + + f.netPolController = NewNetworkPolicyController(f.kubeInformer.Networking().V1().NetworkPolicies(), f.kubeclient, f.npMgr) + f.netPolController.netPolListerSynced = alwaysReady + + for _, netPol := range f.netPolLister { + f.kubeInformer.Networking().V1().NetworkPolicies().Informer().GetIndexer().Add(netPol) + } + + f.kubeInformer.Start(stopCh) +} + +func (f *netPolFixture) ipSetSave(ipsetConfigFile string) { + // call /sbin/ipset save -file /var/log/ipset-test.conf + f.t.Logf("Start storing ipset to %s", ipsetConfigFile) + if err := f.ipsMgr.Save(ipsetConfigFile); err != nil { + f.t.Errorf("ipSetSave failed @ ipsMgr.Save") + } +} +func (f *netPolFixture) ipSetRestore(ipsetConfigFile string) { + // call /sbin/ipset restore -file /var/log/ipset-test.conf + f.t.Logf("Start re-storing ipset to %s", ipsetConfigFile) + if err := f.ipsMgr.Restore(ipsetConfigFile); err != nil { + f.t.Errorf("ipSetRestore failed @ ipsMgr.Restore") + } +} + +// (TODO): fix it +func createNetPol() *networkingv1.NetworkPolicy { + tcp := corev1.ProtocolTCP + port8000 := intstr.FromInt(8000) + return &networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "allow-ingress", + Namespace: "test-nwpolicy", + }, + Spec: networkingv1.NetworkPolicySpec{ + Ingress: []networkingv1.NetworkPolicyIngressRule{ + networkingv1.NetworkPolicyIngressRule{ + From: []networkingv1.NetworkPolicyPeer{ + networkingv1.NetworkPolicyPeer{ + PodSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "test"}, + }, + }, + networkingv1.NetworkPolicyPeer{ + IPBlock: &networkingv1.IPBlock{ + CIDR: "0.0.0.0/0", + }, + }, + }, + Ports: []networkingv1.NetworkPolicyPort{{ + Protocol: &tcp, + Port: &port8000, + }}, + }, + }, + }, + } +} + +func getNetPolKey(netPolObj *networkingv1.NetworkPolicy, t *testing.T) string { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(netPolObj) + if err != nil { + t.Errorf("Unexpected error getting key for network policy %s: %v", netPolObj.Name, err) + return "" + } + return key +} + +func addNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPolicy) { + // simulate "network policy" add event and add network policy object to sharedInformer cache + f.netPolController.addNetPol(netPolObj) + + if f.netPolController.workqueue.Len() == 0 { + t.Logf("Add network policy: worker queue length is 0 ") + return + } + + f.netPolController.processNextWorkItem() +} + +func deleteNetPod(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPolicy) { + addNetPol(t, f, netPolObj) + t.Logf("Complete adding network policy event") + + // simulate network policy deletion event and delete network policy object from sharedInformer cache + f.kubeInformer.Networking().V1().NetworkPolicies().Informer().GetIndexer().Delete(netPolObj) + f.netPolController.deleteNetPol(netPolObj) + + if f.netPolController.workqueue.Len() == 0 { + t.Logf("Delete network policy: worker queue length is 0 ") + return + } + + f.netPolController.processNextWorkItem() +} + +// Need to make more cases - interestings.. +func updateNetPol(t *testing.T, f *netPolFixture, oldNetPolObj, netNetPolObj *networkingv1.NetworkPolicy) { + addNetPol(t, f, oldNetPolObj) + t.Logf("Complete updating network policy event") + + // simulate pod update event and update the pod to shared informer's cache + f.kubeInformer.Networking().V1().NetworkPolicies().Informer().GetIndexer().Update(netNetPolObj) + f.netPolController.updateNetPol(oldNetPolObj, netNetPolObj) + + if f.netPolController.workqueue.Len() == 0 { + t.Logf("Update Network Policy: worker queue length is 0 ") + return + } + + f.netPolController.processNextWorkItem() +} + +// (TODO) - fix : it should be "RawNetPol" && "ProcessedNetPol" +type expectedNetPolValues struct { + expectedLenOfNsMap int + expectedLenOfRawNpMap int + expectedLenOfProcessedNpMap int + expectedIsSafeToCleanUpAzureNpmChain bool + expectedIsAzureNpmChainCreated bool + expectedLenOfWorkQueue int +} + +func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expectedNetPolValues) { + for _, test := range testCases { + if got := len(f.npMgr.NsMap); got != test.expectedLenOfNsMap { + f.t.Errorf("npMgr namespace map length = %d, want %d", got, test.expectedLenOfNsMap) + } + + if got := len(f.netPolController.npMgr.RawNpMap); got != test.expectedLenOfRawNpMap { + f.t.Errorf("Raw NetPol Map length = %d, want %d", got, test.expectedLenOfRawNpMap) + } + + if got := len(f.netPolController.npMgr.ProcessedNpMap); got != test.expectedLenOfProcessedNpMap { + f.t.Errorf("Processed NetPol Map length = %d, want %d", got, test.expectedLenOfProcessedNpMap) + } + + // if got := f.netPolController.isSafeToCleanUpAzureNpmChain; got != test.expectedIsSafeToCleanUpAzureNpmChain { + // f.t.Errorf("isSafeToCleanUpAzureNpmChain %v, want %v", got, test.expectedLenOfRawNpMap) + // } + + if got := f.netPolController.isAzureNpmChainCreated; got != test.expectedIsAzureNpmChainCreated { + f.t.Errorf("isAzureNpmChainCreated %v, want %v", got, test.expectedIsAzureNpmChainCreated) + + } + + if got := f.netPolController.workqueue.Len(); got != test.expectedLenOfWorkQueue { + f.t.Errorf("Workqueue length = %d, want %d", got, test.expectedLenOfWorkQueue) + } + } +} + func TestAddNetworkPolicy(t *testing.T) { + netPolObj := createNetPol() + + f := newNetPolFixture(t) + f.netPolLister = append(f.netPolLister, netPolObj) + f.kubeobjects = append(f.kubeobjects, netPolObj) + stopCh := make(chan struct{}) + defer close(stopCh) + f.newNetPodController(stopCh) + + addNetPol(t, f, netPolObj) + // (TODO): Why? networkPolicyController_test.go:187: npMgr namespace map length = 1, want 2 + testCases := []expectedNetPolValues{ + {2, 1, 0, false, true, 0}, + } + checkNetPolTestResult("TestAddNetPol", f, testCases) +} + +func TestDeleteNetworkPolicy(t *testing.T) { + netPolObj := createNetPol() + + f := newNetPolFixture(t) + f.netPolLister = append(f.netPolLister, netPolObj) + f.kubeobjects = append(f.kubeobjects, netPolObj) + stopCh := make(chan struct{}) + defer close(stopCh) + f.newNetPodController(stopCh) + + deleteNetPod(t, f, netPolObj) + // (TODO): check ground-truth value + testCases := []expectedNetPolValues{ + {1, 0, 0, false, false, 0}, + } + checkNetPolTestResult("TestDelNetPol", f, testCases) +} + +func TestAddNetworkPolicy1(t *testing.T) { npMgr := &NetworkPolicyManager{ NsMap: make(map[string]*Namespace), PodMap: make(map[string]*NpmPod), @@ -249,116 +478,33 @@ func TestUpdateNetworkPolicy(t *testing.T) { npMgr.Unlock() } -func TestDeleteNetworkPolicy(t *testing.T) { - npMgr := &NetworkPolicyManager{ - NsMap: make(map[string]*Namespace), - PodMap: make(map[string]*NpmPod), - RawNpMap: make(map[string]*networkingv1.NetworkPolicy), - ProcessedNpMap: make(map[string]*networkingv1.NetworkPolicy), - TelemetryEnabled: false, - } - - allNs, err := newNs(util.KubeAllNamespacesFlag) - if err != nil { - panic(err.Error) - } - npMgr.NsMap[util.KubeAllNamespacesFlag] = allNs - - iptMgr := iptm.NewIptablesManager() - if err := iptMgr.Save(util.IptablesTestConfigFile); err != nil { - t.Errorf("TestDeleteNetworkPolicy failed @ iptMgr.Save") - } - - ipsMgr := ipsm.NewIpsetManager() - if err := ipsMgr.Save(util.IpsetTestConfigFile); err != nil { - t.Errorf("TestDeleteNetworkPolicy failed @ ipsMgr.Save") - } - - defer func() { - if err := iptMgr.Restore(util.IptablesTestConfigFile); err != nil { - t.Errorf("TestDeleteNetworkPolicy failed @ iptMgr.Restore") - } - - if err := ipsMgr.Restore(util.IpsetTestConfigFile); err != nil { - t.Errorf("TestDeleteNetworkPolicy failed @ ipsMgr.Restore") - } - }() - - // Create ns-kube-system set - if err := ipsMgr.CreateSet("ns-"+util.KubeSystemFlag, append([]string{util.IpsetNetHashFlag})); err != nil { - t.Errorf("TestDeleteNetworkPolicy failed @ ipsMgr.CreateSet, adding kube-system set%+v", err) - } - - tcp := corev1.ProtocolTCP - allow := &networkingv1.NetworkPolicy{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-ingress", - Namespace: "test-nwpolicy", - }, - Spec: networkingv1.NetworkPolicySpec{ - Ingress: []networkingv1.NetworkPolicyIngressRule{ - networkingv1.NetworkPolicyIngressRule{ - From: []networkingv1.NetworkPolicyPeer{{ - PodSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "test"}, - }, - }}, - Ports: []networkingv1.NetworkPolicyPort{{ - Protocol: &tcp, - Port: &intstr.IntOrString{ - StrVal: "8000", - }, - }}, - }, - }, - }, - } - - npMgr.Lock() - if err := npMgr.AddNetworkPolicy(allow); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ AddNetworkPolicy") - } - - gaugeVal, err1 := promutil.GetValue(metrics.NumPolicies) - - if err := npMgr.DeleteNetworkPolicy(allow); err != nil { - t.Errorf("TestDeleteNetworkPolicy failed @ DeleteNetworkPolicy") - } - npMgr.Unlock() - - newGaugeVal, err2 := promutil.GetValue(metrics.NumPolicies) - promutil.NotifyIfErrors(t, err1, err2) - if newGaugeVal != gaugeVal-1 { - t.Errorf("Change in policy number didn't register in prometheus") - } -} func TestGetNetworkPolicyKey(t *testing.T) { - npObj := &networkingv1.NetworkPolicy{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-egress", - Namespace: "test-nwpolicy", - }, - Spec: networkingv1.NetworkPolicySpec{ - Egress: []networkingv1.NetworkPolicyEgressRule{ - networkingv1.NetworkPolicyEgressRule{ - To: []networkingv1.NetworkPolicyPeer{{ - NamespaceSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"ns": "test"}, - }, - }}, - }, - }, - }, - } - - netpolKey := GetNetworkPolicyKey(npObj) - - if netpolKey == "" { - t.Errorf("TestGetNetworkPolicyKey failed @ netpolKey length check %s", netpolKey) - } - - expectedKey := util.GetNSNameWithPrefix("test-nwpolicy/allow-egress") - if netpolKey != expectedKey { - t.Errorf("TestGetNetworkPolicyKey failed @ netpolKey did not match expected value %s", netpolKey) - } + // npObj := &networkingv1.NetworkPolicy{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: "allow-egress", + // Namespace: "test-nwpolicy", + // }, + // Spec: networkingv1.NetworkPolicySpec{ + // Egress: []networkingv1.NetworkPolicyEgressRule{ + // networkingv1.NetworkPolicyEgressRule{ + // To: []networkingv1.NetworkPolicyPeer{{ + // NamespaceSelector: &metav1.LabelSelector{ + // MatchLabels: map[string]string{"ns": "test"}, + // }, + // }}, + // }, + // }, + // }, + // } + + // netpolKey := GetNetworkPolicyKey(npObj) + + // if netpolKey == "" { + // t.Errorf("TestGetNetworkPolicyKey failed @ netpolKey length check %s", netpolKey) + // } + + // expectedKey := util.GetNSNameWithPrefix("test-nwpolicy/allow-egress") + // if netpolKey != expectedKey { + // t.Errorf("TestGetNetworkPolicyKey failed @ netpolKey did not match expected value %s", netpolKey) + // } } diff --git a/npm/npm.go b/npm/npm.go index 7efd6bf049..bf9f87f153 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -49,16 +49,16 @@ type NetworkPolicyManager struct { podController *podController nsInformer coreinformers.NamespaceInformer - npInformer networkinginformers.NetworkPolicyInformer nameSpaceController *nameSpaceController - NodeName string - NsMap map[string]*Namespace - PodMap map[string]*NpmPod // Key is / - RawNpMap map[string]*networkingv1.NetworkPolicy // Key is ns-/ - ProcessedNpMap map[string]*networkingv1.NetworkPolicy // Key is ns-/ - isAzureNpmChainCreated bool - isSafeToCleanUpAzureNpmChain bool + npInformer networkinginformers.NetworkPolicyInformer + netPolController *networkPolicyController + + NodeName string + NsMap map[string]*Namespace // Key is ns- + PodMap map[string]*NpmPod // Key is / + RawNpMap map[string]*networkingv1.NetworkPolicy // Key is ns-/ + ProcessedNpMap map[string]*networkingv1.NetworkPolicy // Key is ns-/ clusterState telemetry.ClusterState version string @@ -187,10 +187,10 @@ func (npMgr *NetworkPolicyManager) Start(stopCh <-chan struct{}) error { return fmt.Errorf("Network policy informer failed to sync") } - // TODO: any dependency among below functions? - // start pod controller after synced + // start controllers after synced go npMgr.podController.Run(threadness, stopCh) go npMgr.nameSpaceController.Run(threadness, stopCh) + go npMgr.netPolController.Run(threadness, stopCh) go npMgr.reconcileChains() go npMgr.backup() @@ -234,18 +234,16 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in } npMgr := &NetworkPolicyManager{ - clientset: clientset, - informerFactory: informerFactory, - podInformer: podInformer, - nsInformer: nsInformer, - npInformer: npInformer, - NodeName: os.Getenv("HOSTNAME"), - NsMap: make(map[string]*Namespace), - PodMap: make(map[string]*NpmPod), - RawNpMap: make(map[string]*networkingv1.NetworkPolicy), - ProcessedNpMap: make(map[string]*networkingv1.NetworkPolicy), - isAzureNpmChainCreated: false, - isSafeToCleanUpAzureNpmChain: false, + clientset: clientset, + informerFactory: informerFactory, + podInformer: podInformer, + nsInformer: nsInformer, + npInformer: npInformer, + NodeName: os.Getenv("HOSTNAME"), + NsMap: make(map[string]*Namespace), + PodMap: make(map[string]*NpmPod), + RawNpMap: make(map[string]*networkingv1.NetworkPolicy), + ProcessedNpMap: make(map[string]*networkingv1.NetworkPolicy), clusterState: telemetry.ClusterState{ PodCount: 0, NsCount: 0, @@ -271,53 +269,8 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in // create NameSpace controller npMgr.nameSpaceController = NewNameSpaceController(nsInformer, clientset, npMgr) - npInformer.Informer().AddEventHandler( - // Network policy event handlers - cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - networkPolicyObj, ok := obj.(*networkingv1.NetworkPolicy) - if !ok { - metrics.SendErrorLogAndMetric(util.NpmID, "ADD Network Policy: Received unexpected object type: %v", obj) - return - } - npMgr.Lock() - npMgr.AddNetworkPolicy(networkPolicyObj) - npMgr.Unlock() - }, - UpdateFunc: func(old, new interface{}) { - oldNetworkPolicyObj, ok := old.(*networkingv1.NetworkPolicy) - if !ok { - metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE Network Policy: Received unexpected old object type: %v", oldNetworkPolicyObj) - return - } - newNetworkPolicyObj, ok := new.(*networkingv1.NetworkPolicy) - if !ok { - metrics.SendErrorLogAndMetric(util.NpmID, "UPDATE Network Policy: Received unexpected new object type: %v", newNetworkPolicyObj) - return - } - npMgr.Lock() - npMgr.UpdateNetworkPolicy(oldNetworkPolicyObj, newNetworkPolicyObj) - npMgr.Unlock() - }, - DeleteFunc: func(obj interface{}) { - networkPolicyObj, ok := obj.(*networkingv1.NetworkPolicy) - if !ok { - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - metrics.SendErrorLogAndMetric(util.NpmID, "DELETE Network Policy: Received unexpected object type: %v", obj) - return - } - if networkPolicyObj, ok = tombstone.Obj.(*networkingv1.NetworkPolicy); !ok { - metrics.SendErrorLogAndMetric(util.NpmID, "DELETE Network Policy: Received unexpected object type: %v", obj) - return - } - } - npMgr.Lock() - npMgr.DeleteNetworkPolicy(networkPolicyObj) - npMgr.Unlock() - }, - }, - ) + // create network policy controller + npMgr.netPolController = NewNetworkPolicyController(informerFactory.Networking().V1().NetworkPolicies(), clientset, npMgr) return npMgr } diff --git a/npm/nwpolicy.go b/npm/nwpolicy.go deleted file mode 100644 index e03ef79049..0000000000 --- a/npm/nwpolicy.go +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright 2018 Microsoft. All rights reserved. -// MIT License -package npm - -import ( - "fmt" - "strconv" - - "github.com/Azure/azure-container-networking/log" - "github.com/Azure/azure-container-networking/npm/ipsm" - "github.com/Azure/azure-container-networking/npm/iptm" - "github.com/Azure/azure-container-networking/npm/metrics" - "github.com/Azure/azure-container-networking/npm/util" - networkingv1 "k8s.io/api/networking/v1" -) - -// GetNetworkPolicyKey will return netpolKey -func GetNetworkPolicyKey(npObj *networkingv1.NetworkPolicy) string { - netpolKey, err := util.GetObjKeyFunc(npObj) - if err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[GetNetworkPolicyKey] Error: while running MetaNamespaceKeyFunc err: %s", err) - return "" - } - if len(netpolKey) == 0 { - return "" - } - return util.GetNSNameWithPrefix(netpolKey) -} - -// GetProcessedNPKey will return netpolKey -func GetProcessedNPKey(npObj *networkingv1.NetworkPolicy, hashSelector string) string { - // hashSelector will never be empty - netpolKey := hashSelector - if len(npObj.GetNamespace()) > 0 { - netpolKey = npObj.GetNamespace() + "/" + netpolKey - } - return util.GetNSNameWithPrefix(netpolKey) -} - -func (npMgr *NetworkPolicyManager) canCleanUpNpmChains() bool { - if !npMgr.isSafeToCleanUpAzureNpmChain { - return false - } - - if len(npMgr.ProcessedNpMap) > 0 { - return false - } - - return true -} - -func (npMgr *NetworkPolicyManager) policyExists(npObj *networkingv1.NetworkPolicy) bool { - npKey := GetNetworkPolicyKey(npObj) - if npKey == "" { - return false - } - - np, exists := npMgr.RawNpMap[npKey] - if !exists { - return false - } - - if !util.CompareResourceVersions(np.ObjectMeta.ResourceVersion, npObj.ObjectMeta.ResourceVersion) { - log.Logf("Cached Network Policy has larger ResourceVersion number than new Obj. Name: %s Cached RV: %d New RV: %d\n", - npObj.ObjectMeta.Name, - np.ObjectMeta.ResourceVersion, - npObj.ObjectMeta.ResourceVersion, - ) - return true - } - - if isSamePolicy(np, npObj) { - return true - } - - return false -} - -// AddNetworkPolicy handles adding network policy to iptables. -func (npMgr *NetworkPolicyManager) AddNetworkPolicy(npObj *networkingv1.NetworkPolicy) error { - var ( - err error - npNs = util.GetNSNameWithPrefix(npObj.ObjectMeta.Namespace) - npName = npObj.ObjectMeta.Name - allNs = npMgr.NsMap[util.KubeAllNamespacesFlag] - timer = metrics.StartNewTimer() - hashedSelector = HashSelector(&npObj.Spec.PodSelector) - npKey = GetNetworkPolicyKey(npObj) - npProcessedKey = GetProcessedNPKey(npObj, hashedSelector) - ) - - log.Logf("NETWORK POLICY CREATING: NameSpace%s, Name:%s", npNs, npName) - - if npKey == "" { - err = fmt.Errorf("[AddNetworkPolicy] Error: npKey is empty for %s network policy in %s", npName, npNs) - metrics.SendErrorLogAndMetric(util.NetpolID, err.Error()) - return err - } - - if npMgr.policyExists(npObj) { - return nil - } - - if !npMgr.isAzureNpmChainCreated { - if err = allNs.IpsMgr.CreateSet(util.KubeSystemFlag, append([]string{util.IpsetNetHashFlag})); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[AddNetworkPolicy] Error: failed to initialize kube-system ipset with err %s", err) - return err - } - - if err = allNs.iptMgr.InitNpmChains(); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[AddNetworkPolicy] Error: failed to initialize azure-npm chains with err %s", err) - return err - } - - npMgr.isAzureNpmChainCreated = true - } - - var ( - addedPolicy *networkingv1.NetworkPolicy - sets, namedPorts, lists []string - ingressIPCidrs, egressIPCidrs [][]string - iptEntries []*iptm.IptEntry - ipsMgr = allNs.IpsMgr - ) - - // Remove the existing policy from processed (merged) network policy map - if oldPolicy, oldPolicyExists := npMgr.RawNpMap[npKey]; oldPolicyExists { - npMgr.isSafeToCleanUpAzureNpmChain = false - npMgr.DeleteNetworkPolicy(oldPolicy) - npMgr.isSafeToCleanUpAzureNpmChain = true - } - - // Add (merge) the new policy with others who apply to the same pods - if oldPolicy, oldPolicyExists := npMgr.ProcessedNpMap[npProcessedKey]; oldPolicyExists { - addedPolicy, err = addPolicy(oldPolicy, npObj) - if err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[AddNetworkPolicy] Error: adding policy %s to %s with err: %v", npName, oldPolicy.ObjectMeta.Name, err) - return err - } - } - - if addedPolicy != nil { - npMgr.ProcessedNpMap[npProcessedKey] = addedPolicy - } else { - npMgr.ProcessedNpMap[npProcessedKey] = npObj - } - - sets, namedPorts, lists, ingressIPCidrs, egressIPCidrs, iptEntries = translatePolicy(npObj) - for _, set := range sets { - log.Logf("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) - if err = ipsMgr.CreateSet(set, append([]string{util.IpsetNetHashFlag})); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[AddNetworkPolicy] Error: creating ipset %s with err: %v", set, err) - return err - } - } - for _, set := range namedPorts { - log.Logf("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) - if err = ipsMgr.CreateSet(set, append([]string{util.IpsetIPPortHashFlag})); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[AddNetworkPolicy] Error: creating ipset named port %s with err: %v", set, err) - return err - } - } - for _, list := range lists { - if err = ipsMgr.CreateList(list); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[AddNetworkPolicy] Error: creating ipset list %s with err: %v", list, err) - return err - } - } - - createCidrsRule("in", npObj.ObjectMeta.Name, npObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr) - createCidrsRule("out", npObj.ObjectMeta.Name, npObj.ObjectMeta.Namespace, egressIPCidrs, ipsMgr) - iptMgr := allNs.iptMgr - for _, iptEntry := range iptEntries { - if err = iptMgr.Add(iptEntry); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[AddNetworkPolicy] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) - return err - } - } - npMgr.RawNpMap[npKey] = npObj - - metrics.NumPolicies.Inc() - timer.StopAndRecord(metrics.AddPolicyExecTime) - - return nil -} - -// UpdateNetworkPolicy handles updateing network policy in iptables. -func (npMgr *NetworkPolicyManager) UpdateNetworkPolicy(oldNpObj *networkingv1.NetworkPolicy, newNpObj *networkingv1.NetworkPolicy) error { - if newNpObj.ObjectMeta.DeletionTimestamp == nil && newNpObj.ObjectMeta.DeletionGracePeriodSeconds == nil { - log.Logf("NETWORK POLICY UPDATING") - return npMgr.AddNetworkPolicy(newNpObj) - } - - return nil -} - -// DeleteNetworkPolicy handles deleting network policy from iptables. -func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(npObj *networkingv1.NetworkPolicy) error { - var ( - err error - allNs = npMgr.NsMap[util.KubeAllNamespacesFlag] - hashedSelector = HashSelector(&npObj.Spec.PodSelector) - npKey = GetNetworkPolicyKey(npObj) - npProcessedKey = GetProcessedNPKey(npObj, hashedSelector) - ) - - npNs, npName := util.GetNSNameWithPrefix(npObj.ObjectMeta.Namespace), npObj.ObjectMeta.Name - log.Logf("NETWORK POLICY DELETING: Namespace: %s, Name:%s", npNs, npName) - - if npKey == "" { - err = fmt.Errorf("[AddNetworkPolicy] Error: npKey is empty for %s network policy in %s", npName, npNs) - metrics.SendErrorLogAndMetric(util.NetpolID, err.Error()) - return err - } - - _, _, _, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(npObj) - - iptMgr := allNs.iptMgr - for _, iptEntry := range iptEntries { - if err = iptMgr.Delete(iptEntry); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[DeleteNetworkPolicy] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) - return err - } - } - - removeCidrsRule("in", npObj.ObjectMeta.Name, npObj.ObjectMeta.Namespace, ingressIPCidrs, allNs.IpsMgr) - removeCidrsRule("out", npObj.ObjectMeta.Name, npObj.ObjectMeta.Namespace, egressIPCidrs, allNs.IpsMgr) - - if oldPolicy, oldPolicyExists := npMgr.ProcessedNpMap[npProcessedKey]; oldPolicyExists { - deductedPolicy, err := deductPolicy(oldPolicy, npObj) - if err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[DeleteNetworkPolicy] Error: deducting policy %s from %s with err: %v", npName, oldPolicy.ObjectMeta.Name, err) - return err - } - - if deductedPolicy == nil { - delete(npMgr.ProcessedNpMap, npProcessedKey) - } else { - npMgr.ProcessedNpMap[npProcessedKey] = deductedPolicy - } - } - - if npMgr.canCleanUpNpmChains() { - npMgr.isAzureNpmChainCreated = false - if err = iptMgr.UninitNpmChains(); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[DeleteNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) - return err - } - } - delete(npMgr.RawNpMap, npKey) - - metrics.NumPolicies.Dec() - - return nil -} - -func createCidrsRule(ingressOrEgress, policyName, ns string, ipsetEntries [][]string, ipsMgr *ipsm.IpsetManager) { - spec := append([]string{util.IpsetNetHashFlag, util.IpsetMaxelemName, util.IpsetMaxelemNum}) - for i, ipCidrSet := range ipsetEntries { - if ipCidrSet == nil || len(ipCidrSet) == 0 { - continue - } - setName := policyName + "-in-ns-" + ns + "-" + strconv.Itoa(i) + ingressOrEgress - log.Logf("Creating set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) - if err := ipsMgr.CreateSet(setName, spec); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[createCidrsRule] Error: creating ipset %s with err: %v", ipCidrSet, err) - } - for _, ipCidrEntry := range util.DropEmptyFields(ipCidrSet) { - // Ipset doesn't allow 0.0.0.0/0 to be added. A general solution is split 0.0.0.0/1 in half which convert to - // 1.0.0.0/1 and 128.0.0.0/1 - if ipCidrEntry == "0.0.0.0/0" { - splitEntry := [2]string{"1.0.0.0/1", "128.0.0.0/1"} - for _, entry := range splitEntry { - if err := ipsMgr.AddToSet(setName, entry, util.IpsetNetHashFlag, ""); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[createCidrsRule] adding ip cidrs %s into ipset %s with err: %v", entry, ipCidrSet, err) - } - } - } else { - if err := ipsMgr.AddToSet(setName, ipCidrEntry, util.IpsetNetHashFlag, ""); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[createCidrsRule] adding ip cidrs %s into ipset %s with err: %v", ipCidrEntry, ipCidrSet, err) - } - } - } - } -} - -func removeCidrsRule(ingressOrEgress, policyName, ns string, ipsetEntries [][]string, ipsMgr *ipsm.IpsetManager) { - for i, ipCidrSet := range ipsetEntries { - if ipCidrSet == nil || len(ipCidrSet) == 0 { - continue - } - setName := policyName + "-in-ns-" + ns + "-" + strconv.Itoa(i) + ingressOrEgress - log.Logf("Delete set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) - if err := ipsMgr.DeleteSet(setName); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[removeCidrsRule] deleting ipset %s with err: %v", ipCidrSet, err) - } - } -} diff --git a/npm/parsePolicy.go b/npm/parsePolicy.go index 43d1d31d59..638a574df3 100644 --- a/npm/parsePolicy.go +++ b/npm/parsePolicy.go @@ -27,9 +27,8 @@ func isSamePolicy(old, new *networkingv1.NetworkPolicy) bool { } // addPolicy merges policies based on labels. +// if namespace matches && podSelector matches, then merge two network policies. Otherwise, return as is. func addPolicy(old, new *networkingv1.NetworkPolicy) (*networkingv1.NetworkPolicy, error) { - // if namespace matches && podSelector matches, then merge - // else return as is. if !reflect.DeepEqual(old.TypeMeta, new.TypeMeta) { return nil, fmt.Errorf("Old and new networkpolicies don't have the same TypeMeta") } @@ -63,6 +62,7 @@ func addPolicy(old, new *networkingv1.NetworkPolicy) (*networkingv1.NetworkPolic } } + // (TODO): It seems ingress and egress may have duplicated fields, but in translatePolicy, it seems duplicated fields are removed. ingress := append(old.Spec.Ingress, new.Spec.Ingress...) egress := append(old.Spec.Egress, new.Spec.Egress...) addedPolicy.Spec.Ingress = ingress From c7aa535f0b1a48ec761e8c68b9206d798018d951 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 6 Apr 2021 14:16:46 -0700 Subject: [PATCH 02/19] update reconcile and deleteNetworkPolicy function to correctly install and uninstall default Azure NPM chain. --- npm/networkPolicyController.go | 94 ++++++++++++---------------------- 1 file changed, 32 insertions(+), 62 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index d284678d86..d06b102342 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -28,9 +28,10 @@ type networkPolicyController struct { netPolListerSynced cache.InformerSynced workqueue workqueue.RateLimitingInterface // (TODO): networkPolController does not need to have whole NetworkPolicyManager pointer. Need to improve it - npMgr *NetworkPolicyManager + npMgr *NetworkPolicyManager + // flag to indicate default Azure NPM chain is created or not isAzureNpmChainCreated bool - // (TODO): why do we have this bool variable even though isAzureNpmChainCreated exists. + // flag to indicate deleting default Azure NPM chaing is ok or not isSafeToCleanUpAzureNpmChain bool } @@ -78,8 +79,6 @@ func (c *networkPolicyController) addNetPol(obj interface{}) { return } - // (TODO): need to remove - log.Logf("[NETPOL ADD EVENT] add key %s into workqueue", key) c.workqueue.Add(key) } @@ -221,8 +220,8 @@ func (c *networkPolicyController) syncNetPol(key string) error { utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key)) return nil } - log.Logf("SyncNetPol %s %s %s", key, namespace, name) + // Get the network policy resource with this namespace/name netPolObj, err := c.netPolLister.NetworkPolicies(namespace).Get(name) @@ -232,9 +231,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { if err != nil { if errors.IsNotFound(err) { - // (TODO): think the location of this code. log.Logf("Network Policy %s is not found, may be it is deleted", key) - // netPolObj is not found, but should need to check the RawNpMap cache with cachedNetPolKey // #1 No item in RawNpMap, which means network policy with the cachedNetPolKey is not applied. Thus, do not need to clean up process. // #2 item in RawNpMap exists, which means network policy with the cachedNetPolKey is applied . Thus, Need to clean up process. @@ -261,13 +258,6 @@ func (c *networkPolicyController) syncNetPol(key string) error { } } - // Install default rules for kube-system and iptables - // (TODO): this default rules for kube-system and azure-npm chains, not directly related to passed netPolObj - // How to decouple this call with passed netPolObj? since it causes unnecessary re-try for the passed netPolObj - if err = c.initializeDefaultAzureNpmChain(); err != nil { - return fmt.Errorf("[syncNetPol] Error: due to %v", err) - } - err = c.syncAddAndUpdateNetPol(netPolObj) if err != nil { return fmt.Errorf("[syncNetPol] Error due to %s\n", err.Error()) @@ -332,22 +322,27 @@ func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.Ne return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule out due to %v", err) } - delete(c.npMgr.RawNpMap, cachedNetPolKey) - - // (TODO): it is not related to event - need to separate it. - if c.canCleanUpNpmChains() { + // (TODO): Can we decouple this from network policy event since if all deletions for cachedNetPolObj is successful, but UnititNpmChains() function is failed, + // deleteNetworkPolicy() will be executed again. + // Current code is funcationally ok. + // Even though UninitNpmChains return error, isAzureNpmChainCreated sets up false + // #1 when deletion event is requeued to workqueue and re-process this deleteNetworkPolicy function again, it is safe. + // #2 when a new network policy is added, the default Azure NPM chain is installed. + if c.canCleanUpAzureNpmChains() { c.isAzureNpmChainCreated = false if err = iptMgr.UninitNpmChains(); err != nil { return fmt.Errorf("[deleteNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) } } + delete(c.npMgr.RawNpMap, cachedNetPolKey) metrics.NumPolicies.Dec() return nil } // Add and Update NetworkPolicy handles adding network policy to iptables. func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1.NetworkPolicy) error { + // (TODO): where do we use this? timer := metrics.StartNewTimer() var err error @@ -366,15 +361,27 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // first delete the applied network policy, // then start translating policy and install translated ipset and iptables rules into kernel // For #2 and #3, the logic are the same. + // (TODO): can optimize logic more to reduce computations. For example, apply only difference if possible like podController - // (TODO): Need more optimizations - // Need to apply difference only if possible + // Do not need to clean up default Azure NPM chain in deleteNetworkPolicy function, if network policy object is applied. + // So, extra overhead to install default Azure NPM chain in initializeDefaultAzureNpmChain function. + // To achieve it, use flag "isSafeToCleanUpAzureNpmChain" before calling deleteNetworkPolicy function + c.isSafeToCleanUpAzureNpmChain = false + defer func() { + c.isSafeToCleanUpAzureNpmChain = true + }() err = c.deleteNetworkPolicy(netPolObj) if err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to deleteNetworkPolicy due to %s", err) } + // Install this default rules for kube-system and azure-npm chains if they are not initilized. + // Execute initializeDefaultAzureNpmChain function first before actually starting processing network policy object. + if err = c.initializeDefaultAzureNpmChain(); err != nil { + return fmt.Errorf("[syncNetPol] Error: due to %v", err) + } + ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr sets, namedPorts, lists, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(netPolObj) @@ -396,12 +403,6 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 } } - // (TODO): Do we need initAllNsList?? - // Can we move this into "initializeDefaultAzureNpmChain" function? - if err = c.initAllNsList(); err != nil { - return fmt.Errorf("[syncAddAndUpdateNetPol] Error: initializing all-namespace ipset list with err: %v", err) - } - if err = c.createCidrsRule("in", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr); err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: createCidrsRule in due to %v", err) } @@ -474,11 +475,10 @@ func (c *networkPolicyController) removeCidrsRule(ingressOrEgress, policyName, n return nil } -func (c *networkPolicyController) canCleanUpNpmChains() bool { - // (TODO): why do we have this? - // if !c.isSafeToCleanUpAzureNpmChain { - // return false - // } +func (c *networkPolicyController) canCleanUpAzureNpmChains() bool { + if !c.isSafeToCleanUpAzureNpmChain { + return false + } if len(c.npMgr.RawNpMap) == 0 { return true @@ -487,38 +487,8 @@ func (c *networkPolicyController) canCleanUpNpmChains() bool { return false } -// (TODO): copied from namespace.go - InitAllNsList syncs all-namespace ipset list -func (c *networkPolicyController) initAllNsList() error { - // who adds util.KubeAllNamespacesFlag in NsMap? - ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr - for ns := range c.npMgr.NsMap { - if ns == util.KubeAllNamespacesFlag { - continue - } - - if err := ipsMgr.AddToList(util.KubeAllNamespacesFlag, ns); err != nil { - return fmt.Errorf("[InitAllNsList] Error: failed to add namespace set %s to ipset list %s with err: %v", ns, util.KubeAllNamespacesFlag, err) - } - } - return nil -} - -// (TODO): copied from namespace.go, but no component uses this function UninitAllNsList cleans all-namespace ipset list. -func (c *networkPolicyController) unInitAllNsList() error { - allNs := c.npMgr.NsMap[util.KubeAllNamespacesFlag] - for ns := range c.npMgr.NsMap { - if ns == util.KubeAllNamespacesFlag { - continue - } - - if err := allNs.IpsMgr.DeleteFromList(util.KubeAllNamespacesFlag, ns); err != nil { - return fmt.Errorf("[UninitAllNsList] Error: failed to delete namespace set %s from list %s with err: %v", ns, util.KubeAllNamespacesFlag, err) - } - } - return nil -} - // GetProcessedNPKey will return netpolKey +// (TODO): will use this function when optimizing management of multiple network policies with merging and deducting multiple network policies. func (c *networkPolicyController) getProcessedNPKey(netPolObj *networkingv1.NetworkPolicy) string { // hashSelector will never be empty // (TODO): what if PodSelector is [] or nothing? - make the Unit test for this From d8a4d2bd24c10a2698a7a81373c535e44cf71d36 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 6 Apr 2021 14:53:18 -0700 Subject: [PATCH 03/19] To explicitly manage default Azure NPM chain in deleteNetworkPolicy function --- npm/networkPolicyController.go | 52 ++++++++++++++-------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index d06b102342..c52de3615c 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -22,6 +22,14 @@ import ( "k8s.io/client-go/util/workqueue" ) +// NamedPortOperation decides opeartion (e.g., delete or add) for named port ipset in manageNamedPortIpsets +type IsSafeCleanUpAzureNpmChain bool + +const ( + SafeToCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain = true + unSafeToCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain = false +) + type networkPolicyController struct { clientset kubernetes.Interface netPolLister netpollister.NetworkPolicyLister @@ -43,7 +51,7 @@ func NewNetworkPolicyController(npInformer networkinginformers.NetworkPolicyInfo workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "NetPols"), npMgr: npMgr, isAzureNpmChainCreated: false, - isSafeToCleanUpAzureNpmChain: false, + isSafeToCleanUpAzureNpmChain: true, } npInformer.Informer().AddEventHandler( @@ -241,7 +249,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { return nil } - err = c.deleteNetworkPolicy(cachedNetPolObj) + err = c.deleteNetworkPolicy(cachedNetPolObj, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncNetPol] Error: %v when network policy is not found\n", err) } @@ -252,7 +260,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { // If DeletionTimestamp of the netPolObj is set, start cleaning up lastly applied states. // This is early cleaning up process from updateNetPol event if netPolObj.ObjectMeta.DeletionTimestamp == nil && netPolObj.ObjectMeta.DeletionGracePeriodSeconds == nil { - err = c.deleteNetworkPolicy(netPolObj) + err = c.deleteNetworkPolicy(netPolObj, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("Error: %v when ObjectMeta.DeletionTimestamp field is set\n", err) } @@ -286,7 +294,7 @@ func (c *networkPolicyController) initializeDefaultAzureNpmChain() error { } // DeleteNetworkPolicy handles deleting network policy based on cachedNetPolKey. -func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.NetworkPolicy) error { +func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.NetworkPolicy, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { var err error netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) if err != nil { @@ -324,11 +332,12 @@ func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.Ne // (TODO): Can we decouple this from network policy event since if all deletions for cachedNetPolObj is successful, but UnititNpmChains() function is failed, // deleteNetworkPolicy() will be executed again. - // Current code is funcationally ok. - // Even though UninitNpmChains return error, isAzureNpmChainCreated sets up false - // #1 when deletion event is requeued to workqueue and re-process this deleteNetworkPolicy function again, it is safe. - // #2 when a new network policy is added, the default Azure NPM chain is installed. - if c.canCleanUpAzureNpmChains() { + // if there is only one cached network policy in RawNPMap and no immediate network policy to process, + // clean up default azure npm chains since the cached network policy from RawNPMap is removed. + if isSafeCleanUpAzureNpmChain && len(c.npMgr.RawNpMap) == 1 { + // Even though UninitNpmChains return error, isAzureNpmChainCreated sets up false + // #1 when deletion event is requeued to workqueue and re-process this deleteNetworkPolicy function again, it is safe. + // #2 when a new network policy is added, the default Azure NPM chain is installed. c.isAzureNpmChainCreated = false if err = iptMgr.UninitNpmChains(); err != nil { return fmt.Errorf("[deleteNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) @@ -363,15 +372,10 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // For #2 and #3, the logic are the same. // (TODO): can optimize logic more to reduce computations. For example, apply only difference if possible like podController - // Do not need to clean up default Azure NPM chain in deleteNetworkPolicy function, if network policy object is applied. - // So, extra overhead to install default Azure NPM chain in initializeDefaultAzureNpmChain function. - // To achieve it, use flag "isSafeToCleanUpAzureNpmChain" before calling deleteNetworkPolicy function - c.isSafeToCleanUpAzureNpmChain = false - defer func() { - c.isSafeToCleanUpAzureNpmChain = true - }() - - err = c.deleteNetworkPolicy(netPolObj) + // Do not need to clean up default Azure NPM chain in deleteNetworkPolicy function, if network policy object is applied soon. + // So, avoid extra overhead to install default Azure NPM chain in initializeDefaultAzureNpmChain function. + // To achieve it, use flag unSafeToCleanUpAzureNpmChain to indicate that the default Azure NPM chain cannot be deleted. + err = c.deleteNetworkPolicy(netPolObj, unSafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to deleteNetworkPolicy due to %s", err) } @@ -475,18 +479,6 @@ func (c *networkPolicyController) removeCidrsRule(ingressOrEgress, policyName, n return nil } -func (c *networkPolicyController) canCleanUpAzureNpmChains() bool { - if !c.isSafeToCleanUpAzureNpmChain { - return false - } - - if len(c.npMgr.RawNpMap) == 0 { - return true - } - - return false -} - // GetProcessedNPKey will return netpolKey // (TODO): will use this function when optimizing management of multiple network policies with merging and deducting multiple network policies. func (c *networkPolicyController) getProcessedNPKey(netPolObj *networkingv1.NetworkPolicy) string { From 96a5bdeab245147b596900930f069e93bae29b34 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 6 Apr 2021 15:01:15 -0700 Subject: [PATCH 04/19] correct comments and delete unused variable --- npm/networkPolicyController.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index c52de3615c..85ceb4896b 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -22,7 +22,7 @@ import ( "k8s.io/client-go/util/workqueue" ) -// NamedPortOperation decides opeartion (e.g., delete or add) for named port ipset in manageNamedPortIpsets +// IsSafeCleanUpAzureNpmChain is used to indicate whether default Azure NPM chain can be safely deleted or not. type IsSafeCleanUpAzureNpmChain bool const ( @@ -39,19 +39,16 @@ type networkPolicyController struct { npMgr *NetworkPolicyManager // flag to indicate default Azure NPM chain is created or not isAzureNpmChainCreated bool - // flag to indicate deleting default Azure NPM chaing is ok or not - isSafeToCleanUpAzureNpmChain bool } func NewNetworkPolicyController(npInformer networkinginformers.NetworkPolicyInformer, clientset kubernetes.Interface, npMgr *NetworkPolicyManager) *networkPolicyController { netPolController := &networkPolicyController{ - clientset: clientset, - netPolLister: npInformer.Lister(), - netPolListerSynced: npInformer.Informer().HasSynced, - workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "NetPols"), - npMgr: npMgr, - isAzureNpmChainCreated: false, - isSafeToCleanUpAzureNpmChain: true, + clientset: clientset, + netPolLister: npInformer.Lister(), + netPolListerSynced: npInformer.Informer().HasSynced, + workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "NetPols"), + npMgr: npMgr, + isAzureNpmChainCreated: false, } npInformer.Informer().AddEventHandler( From 3b6fbc0f33a45f9cbf29c2a6cc5787e158fa3f6e Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 6 Apr 2021 15:30:55 -0700 Subject: [PATCH 05/19] fix missed returing errors in codes --- npm/networkPolicyController.go | 145 ++++++++++++++++----------------- 1 file changed, 72 insertions(+), 73 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 85ceb4896b..6af6d75fde 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -61,7 +61,7 @@ func NewNetworkPolicyController(npInformer networkinginformers.NetworkPolicyInfo return netPolController } -// filter this event if we do not need to handle this event +// needSync checks whether the obj is valid network policy object. func (c *networkPolicyController) needSync(obj interface{}) (string, error) { var key string _, ok := obj.(*networkingv1.NetworkPolicy) @@ -129,27 +129,26 @@ func (c *networkPolicyController) deleteNetPol(obj interface{}) { } } - log.Logf("[NETPOL DELETE EVENT]") var key string var err error if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { utilruntime.HandleError(err) return } + netPolCachedKey := util.GetNSNameWithPrefix(key) // (TODO): Reduce scope of lock later c.npMgr.Lock() defer c.npMgr.Unlock() - _, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] - // If network policy object is not in the RawNpMap, we do not need to clean-up states for this network policy - // since netPolController did not apply for any states for this pod + _, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] + // If a network policy object is not in the RawNpMap, do not need to clean-up states for the network policy + // since netPolController did not apply for any states for the network policy if !netPolExists { return } - log.Logf("[NETPOL DEL EVENT] add key %s into workqueue", key) c.workqueue.Add(key) } @@ -162,7 +161,7 @@ func (c *networkPolicyController) Run(threadiness int, stopCh <-chan struct{}) e go wait.Until(c.runWorker, time.Second, stopCh) } - log.Logf("Started Network Policy workers") + log.Logf("Started Network Policy %d worker(s)", threadiness) <-stopCh log.Logf("Shutting down Network Policy workers") @@ -261,6 +260,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { if err != nil { return fmt.Errorf("Error: %v when ObjectMeta.DeletionTimestamp field is set\n", err) } + return nil } err = c.syncAddAndUpdateNetPol(netPolObj) @@ -290,63 +290,7 @@ func (c *networkPolicyController) initializeDefaultAzureNpmChain() error { return nil } -// DeleteNetworkPolicy handles deleting network policy based on cachedNetPolKey. -func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.NetworkPolicy, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { - var err error - netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) - if err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: while running MetaNamespaceKeyFunc err: %s", err) - } - - cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) - cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] - // if there is no applied network policy with the cachedNetPolKey, do not need to clean up process. - if !cachedNetPolObjExists { - return nil - } - - ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr - iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr - // translate policy from "cachedNetPolObj" - _, _, _, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(cachedNetPolObj) - - // delete iptables entries - for _, iptEntry := range iptEntries { - if err = iptMgr.Delete(iptEntry); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) - } - } - - // delete ipset list related to ingress CIDRs - if err = c.removeCidrsRule("in", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule in due to %v", err) - } - - // delete ipset list related to egress CIDRs - if err = c.removeCidrsRule("out", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, egressIPCidrs, ipsMgr); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule out due to %v", err) - } - - // (TODO): Can we decouple this from network policy event since if all deletions for cachedNetPolObj is successful, but UnititNpmChains() function is failed, - // deleteNetworkPolicy() will be executed again. - // if there is only one cached network policy in RawNPMap and no immediate network policy to process, - // clean up default azure npm chains since the cached network policy from RawNPMap is removed. - if isSafeCleanUpAzureNpmChain && len(c.npMgr.RawNpMap) == 1 { - // Even though UninitNpmChains return error, isAzureNpmChainCreated sets up false - // #1 when deletion event is requeued to workqueue and re-process this deleteNetworkPolicy function again, it is safe. - // #2 when a new network policy is added, the default Azure NPM chain is installed. - c.isAzureNpmChainCreated = false - if err = iptMgr.UninitNpmChains(); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) - } - } - - delete(c.npMgr.RawNpMap, cachedNetPolKey) - metrics.NumPolicies.Dec() - return nil -} - -// Add and Update NetworkPolicy handles adding network policy to iptables. +// syncAddAndUpdateNetPol handles a new network policy or an updated network policy object triggered by add and update events func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1.NetworkPolicy) error { // (TODO): where do we use this? timer := metrics.StartNewTimer() @@ -357,21 +301,20 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 return fmt.Errorf("[syncAddAndUpdateNetPol] Error: while running MetaNamespaceKeyFunc err: %s", err) } - // Start reconciling loop to eventually meet the desired states from network policy + // Start reconciling loop to eventually meet cached states against the desired states from network policy. // #1 If a new network policy is created, the network policy is not in RawNPMap. // start translating policy and install translated ipset and iptables rules into kernel // #2 If a network policy with -- is applied before and two network policy are the same object (same UID), - // first delete the applied network policy, - // then start translating policy and install translated ipset and iptables rules into kernel + // first delete the applied network policy, then start translating policy and install translated ipset and iptables rules into kernel // #3 If a network policy with -- is applied before and two network policy are the different object (different UID) due to missing some events for the old object - // first delete the applied network policy, - // then start translating policy and install translated ipset and iptables rules into kernel - // For #2 and #3, the logic are the same. + // first delete the applied network policy, then start translating policy and install translated ipset and iptables rules into kernel + // To deal with all three cases, we first delete network policy if possible, then install translated rules into kernel. // (TODO): can optimize logic more to reduce computations. For example, apply only difference if possible like podController // Do not need to clean up default Azure NPM chain in deleteNetworkPolicy function, if network policy object is applied soon. // So, avoid extra overhead to install default Azure NPM chain in initializeDefaultAzureNpmChain function. // To achieve it, use flag unSafeToCleanUpAzureNpmChain to indicate that the default Azure NPM chain cannot be deleted. + // delete existing network policy err = c.deleteNetworkPolicy(netPolObj, unSafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to deleteNetworkPolicy due to %s", err) @@ -422,12 +365,68 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 c.npMgr.RawNpMap[cachedNetPolKey] = netPolObj metrics.NumPolicies.Inc() - // (TODO): may better to use defer? timer.StopAndRecord(metrics.AddPolicyExecTime) return nil } +// DeleteNetworkPolicy handles deleting network policy based on cachedNetPolKey. +func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.NetworkPolicy, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { + var err error + netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) + if err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: while running MetaNamespaceKeyFunc err: %s", err) + } + + cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) + cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] + // if there is no applied network policy with the cachedNetPolKey, do not need to clean up process. + if !cachedNetPolObjExists { + return nil + } + + ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr + iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr + // translate policy from "cachedNetPolObj" + _, _, _, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(cachedNetPolObj) + + // delete iptables entries + for _, iptEntry := range iptEntries { + if err = iptMgr.Delete(iptEntry); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) + } + } + + // delete ipset list related to ingress CIDRs + if err = c.removeCidrsRule("in", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule in due to %v", err) + } + + // delete ipset list related to egress CIDRs + if err = c.removeCidrsRule("out", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, egressIPCidrs, ipsMgr); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule out due to %v", err) + } + + // (TODO): Can we decouple below logic from cachedNetPolObj since if all deletions for cachedNetPolObj are successful, but UnititNpmChains function is failed, + // deleteNetworkPolicy function will be executed again. + + // if there is only one cached network policy in RawNPMap and no immediate network policy to process, + // clean up default azure npm chains since the cached network policy from RawNPMap is removed. + if isSafeCleanUpAzureNpmChain && len(c.npMgr.RawNpMap) == 1 { + // Even though UninitNpmChains return error, isAzureNpmChainCreated sets up false + // #1 When deletion event is requeued to workqueue and deleteNetworkPolicy function is called again, it is safe. + // #2 when a new network policy is added, the default Azure NPM chain can install. + c.isAzureNpmChainCreated = false + if err = iptMgr.UninitNpmChains(); err != nil { + return fmt.Errorf("[deleteNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) + } + } + + delete(c.npMgr.RawNpMap, cachedNetPolKey) + metrics.NumPolicies.Dec() + return nil +} + func (c *networkPolicyController) createCidrsRule(ingressOrEgress, policyName, ns string, ipsetEntries [][]string, ipsMgr *ipsm.IpsetManager) error { spec := append([]string{util.IpsetNetHashFlag, util.IpsetMaxelemName, util.IpsetMaxelemNum}) @@ -438,7 +437,7 @@ func (c *networkPolicyController) createCidrsRule(ingressOrEgress, policyName, n setName := policyName + "-in-ns-" + ns + "-" + strconv.Itoa(i) + ingressOrEgress log.Logf("Creating set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) if err := ipsMgr.CreateSet(setName, spec); err != nil { - metrics.SendErrorLogAndMetric(util.NetpolID, "[createCidrsRule] Error: creating ipset %s with err: %v", ipCidrSet, err) + return fmt.Errorf("[createCidrsRule] Error: creating ipset %s with err: %v", ipCidrSet, err) } for _, ipCidrEntry := range util.DropEmptyFields(ipCidrSet) { // Ipset doesn't allow 0.0.0.0/0 to be added. A general solution is split 0.0.0.0/1 in half which convert to @@ -490,7 +489,7 @@ func (c *networkPolicyController) getProcessedNPKey(netPolObj *networkingv1.Netw return util.GetNSNameWithPrefix(hashedPodSelector) } -// (TODO): placeholder to avoid compile errors +// (TODO): placeholders to avoid compile errors. Will be deleted func (npMgr *NetworkPolicyManager) AddNetworkPolicy(netPol *networkingv1.NetworkPolicy) error { return nil } From 0bb785b2075924847de284cc874c618caa0d664f Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 6 Apr 2021 15:46:50 -0700 Subject: [PATCH 06/19] Correct to check DeletionTimestamp and DeletionGracePeriodSeconds variables --- npm/networkPolicyController.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 6af6d75fde..467c47a95b 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -224,6 +224,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key)) return nil } + log.Logf("SyncNetPol %s %s %s", key, namespace, name) // Get the network policy resource with this namespace/name @@ -255,7 +256,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { // If DeletionTimestamp of the netPolObj is set, start cleaning up lastly applied states. // This is early cleaning up process from updateNetPol event - if netPolObj.ObjectMeta.DeletionTimestamp == nil && netPolObj.ObjectMeta.DeletionGracePeriodSeconds == nil { + if netPolObj.ObjectMeta.DeletionTimestamp != nil || netPolObj.ObjectMeta.DeletionGracePeriodSeconds != nil { err = c.deleteNetworkPolicy(netPolObj, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("Error: %v when ObjectMeta.DeletionTimestamp field is set\n", err) From c3a196970ffede16abbc676e0911e14599894d65 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 6 Apr 2021 16:57:34 -0700 Subject: [PATCH 07/19] removed placeholder functions in network policy controoler and added more test cases (e.g., update and adding multiple network policies) --- npm/networkPolicyController.go | 17 +- npm/networkPolicyController_test.go | 369 ++++++---------------------- 2 files changed, 72 insertions(+), 314 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 467c47a95b..92cd9008ea 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -192,8 +192,8 @@ func (c *networkPolicyController) processNextWorkItem() bool { utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj)) return nil } - // Run the syncHandler, passing it the namespace/name string of the - // Pod resource to be synced. + // Run the syncNetPol, passing it the namespace/name string of the + // network policy resource to be synced. // TODO : may consider using "c.queue.AddAfter(key, *requeueAfter)" according to error type later if err := c.syncNetPol(key); err != nil { // Put the item back on the workqueue to handle any transient errors. @@ -489,16 +489,3 @@ func (c *networkPolicyController) getProcessedNPKey(netPolObj *networkingv1.Netw } return util.GetNSNameWithPrefix(hashedPodSelector) } - -// (TODO): placeholders to avoid compile errors. Will be deleted -func (npMgr *NetworkPolicyManager) AddNetworkPolicy(netPol *networkingv1.NetworkPolicy) error { - return nil -} - -func (npMgr *NetworkPolicyManager) DeleteNetworkPolicy(netPol *networkingv1.NetworkPolicy) error { - return nil -} - -func (npMgr *NetworkPolicyManager) UpdateNetworkPolicy(oldNpObj *networkingv1.NetworkPolicy, newNpObj *networkingv1.NetworkPolicy) error { - return nil -} diff --git a/npm/networkPolicyController_test.go b/npm/networkPolicyController_test.go index 39479c7a0a..f723fcc0b8 100644 --- a/npm/networkPolicyController_test.go +++ b/npm/networkPolicyController_test.go @@ -3,13 +3,10 @@ package npm import ( + "fmt" "testing" "github.com/Azure/azure-container-networking/npm/ipsm" - "github.com/Azure/azure-container-networking/npm/iptm" - "github.com/Azure/azure-container-networking/npm/metrics" - "github.com/Azure/azure-container-networking/npm/metrics/promutil" - "github.com/Azure/azure-container-networking/npm/util" corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" @@ -19,7 +16,6 @@ import ( kubeinformers "k8s.io/client-go/informers" k8sfake "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" - "k8s.io/client-go/tools/cache" ) type netPolFixture struct { @@ -50,11 +46,10 @@ func newNetPolFixture(t *testing.T) *netPolFixture { } f.npMgr.RawNpMap = make(map[string]*networkingv1.NetworkPolicy) - f.npMgr.ProcessedNpMap = make(map[string]*networkingv1.NetworkPolicy) return f } -func (f *netPolFixture) newNetPodController(stopCh chan struct{}) { +func (f *netPolFixture) newNetPolController(stopCh chan struct{}) { f.kubeclient = k8sfake.NewSimpleClientset(f.kubeobjects...) f.kubeInformer = kubeinformers.NewSharedInformerFactory(f.kubeclient, noResyncPeriodFunc()) @@ -83,7 +78,7 @@ func (f *netPolFixture) ipSetRestore(ipsetConfigFile string) { } } -// (TODO): fix it +// (TODO): make createNetPol flexible func createNetPol() *networkingv1.NetworkPolicy { tcp := corev1.ProtocolTCP port8000 := intstr.FromInt(8000) @@ -113,19 +108,23 @@ func createNetPol() *networkingv1.NetworkPolicy { }}, }, }, + Egress: []networkingv1.NetworkPolicyEgressRule{ + networkingv1.NetworkPolicyEgressRule{ + To: []networkingv1.NetworkPolicyPeer{{ + PodSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "test"}, + }, + }}, + Ports: []networkingv1.NetworkPolicyPort{{ + Protocol: &tcp, + Port: &port8000, + }}, + }, + }, }, } } -func getNetPolKey(netPolObj *networkingv1.NetworkPolicy, t *testing.T) string { - key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(netPolObj) - if err != nil { - t.Errorf("Unexpected error getting key for network policy %s: %v", netPolObj.Name, err) - return "" - } - return key -} - func addNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPolicy) { // simulate "network policy" add event and add network policy object to sharedInformer cache f.netPolController.addNetPol(netPolObj) @@ -138,7 +137,7 @@ func addNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPo f.netPolController.processNextWorkItem() } -func deleteNetPod(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPolicy) { +func deleteNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPolicy) { addNetPol(t, f, netPolObj) t.Logf("Complete adding network policy event") @@ -154,12 +153,11 @@ func deleteNetPod(t *testing.T, f *netPolFixture, netPolObj *networkingv1.Networ f.netPolController.processNextWorkItem() } -// Need to make more cases - interestings.. func updateNetPol(t *testing.T, f *netPolFixture, oldNetPolObj, netNetPolObj *networkingv1.NetworkPolicy) { addNetPol(t, f, oldNetPolObj) - t.Logf("Complete updating network policy event") + t.Logf("Complete adding network policy event") - // simulate pod update event and update the pod to shared informer's cache + // simulate network policy update event and update the network policy to shared informer's cache f.kubeInformer.Networking().V1().NetworkPolicies().Informer().GetIndexer().Update(netNetPolObj) f.netPolController.updateNetPol(oldNetPolObj, netNetPolObj) @@ -171,14 +169,11 @@ func updateNetPol(t *testing.T, f *netPolFixture, oldNetPolObj, netNetPolObj *ne f.netPolController.processNextWorkItem() } -// (TODO) - fix : it should be "RawNetPol" && "ProcessedNetPol" type expectedNetPolValues struct { - expectedLenOfNsMap int - expectedLenOfRawNpMap int - expectedLenOfProcessedNpMap int - expectedIsSafeToCleanUpAzureNpmChain bool - expectedIsAzureNpmChainCreated bool - expectedLenOfWorkQueue int + expectedLenOfNsMap int + expectedLenOfRawNpMap int + expectedLenOfWorkQueue int + expectedIsAzureNpmChainCreated bool } func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expectedNetPolValues) { @@ -191,17 +186,8 @@ func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expect f.t.Errorf("Raw NetPol Map length = %d, want %d", got, test.expectedLenOfRawNpMap) } - if got := len(f.netPolController.npMgr.ProcessedNpMap); got != test.expectedLenOfProcessedNpMap { - f.t.Errorf("Processed NetPol Map length = %d, want %d", got, test.expectedLenOfProcessedNpMap) - } - - // if got := f.netPolController.isSafeToCleanUpAzureNpmChain; got != test.expectedIsSafeToCleanUpAzureNpmChain { - // f.t.Errorf("isSafeToCleanUpAzureNpmChain %v, want %v", got, test.expectedLenOfRawNpMap) - // } - if got := f.netPolController.isAzureNpmChainCreated; got != test.expectedIsAzureNpmChainCreated { f.t.Errorf("isAzureNpmChainCreated %v, want %v", got, test.expectedIsAzureNpmChainCreated) - } if got := f.netPolController.workqueue.Len(); got != test.expectedLenOfWorkQueue { @@ -210,6 +196,29 @@ func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expect } } +func TestAddMultipleNetworkPolicies(t *testing.T) { + netPolObj1 := createNetPol() + + // deep copy netPolObj1 and change namespace and name since current createNetPol is not flexble. + netPolObj2 := netPolObj1.DeepCopy() + netPolObj2.Namespace = fmt.Sprintf("%s-new", netPolObj1.Namespace) + netPolObj2.Name = fmt.Sprintf("%s-new", netPolObj1.Name) + + f := newNetPolFixture(t) + f.netPolLister = append(f.netPolLister, netPolObj1, netPolObj2) + f.kubeobjects = append(f.kubeobjects, netPolObj1, netPolObj2) + stopCh := make(chan struct{}) + defer close(stopCh) + f.newNetPolController(stopCh) + + addNetPol(t, f, netPolObj1) + addNetPol(t, f, netPolObj2) + + testCases := []expectedNetPolValues{ + {1, 2, 0, true}, + } + checkNetPolTestResult("TestAddMultipleNetPols", f, testCases) +} func TestAddNetworkPolicy(t *testing.T) { netPolObj := createNetPol() @@ -218,12 +227,11 @@ func TestAddNetworkPolicy(t *testing.T) { f.kubeobjects = append(f.kubeobjects, netPolObj) stopCh := make(chan struct{}) defer close(stopCh) - f.newNetPodController(stopCh) + f.newNetPolController(stopCh) addNetPol(t, f, netPolObj) - // (TODO): Why? networkPolicyController_test.go:187: npMgr namespace map length = 1, want 2 testCases := []expectedNetPolValues{ - {2, 1, 0, false, true, 0}, + {1, 1, 0, true}, } checkNetPolTestResult("TestAddNetPol", f, testCases) } @@ -236,275 +244,38 @@ func TestDeleteNetworkPolicy(t *testing.T) { f.kubeobjects = append(f.kubeobjects, netPolObj) stopCh := make(chan struct{}) defer close(stopCh) - f.newNetPodController(stopCh) + f.newNetPolController(stopCh) - deleteNetPod(t, f, netPolObj) - // (TODO): check ground-truth value + deleteNetPol(t, f, netPolObj) testCases := []expectedNetPolValues{ - {1, 0, 0, false, false, 0}, + {1, 0, 0, false}, } checkNetPolTestResult("TestDelNetPol", f, testCases) } -func TestAddNetworkPolicy1(t *testing.T) { - npMgr := &NetworkPolicyManager{ - NsMap: make(map[string]*Namespace), - PodMap: make(map[string]*NpmPod), - RawNpMap: make(map[string]*networkingv1.NetworkPolicy), - ProcessedNpMap: make(map[string]*networkingv1.NetworkPolicy), - TelemetryEnabled: false, - } - - allNs, err := newNs(util.KubeAllNamespacesFlag) - if err != nil { - panic(err.Error) - } - npMgr.NsMap[util.KubeAllNamespacesFlag] = allNs - - iptMgr := iptm.NewIptablesManager() - if err := iptMgr.Save(util.IptablesTestConfigFile); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ iptMgr.Save") - } - - ipsMgr := ipsm.NewIpsetManager() - if err := ipsMgr.Save(util.IpsetTestConfigFile); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ ipsMgr.Save") - } - - // Create ns-kube-system set - if err := ipsMgr.CreateSet("ns-"+util.KubeSystemFlag, append([]string{util.IpsetNetHashFlag})); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ ipsMgr.CreateSet, adding kube-system set%+v", err) - } - - defer func() { - if err := iptMgr.Restore(util.IptablesTestConfigFile); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ iptMgr.Restore") - } - - if err := ipsMgr.Restore(util.IpsetTestConfigFile); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ ipsMgr.Restore") - } - }() - - tcp := corev1.ProtocolTCP - port8000 := intstr.FromInt(8000) - allowIngress := &networkingv1.NetworkPolicy{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-ingress", - Namespace: "test-nwpolicy", - }, - Spec: networkingv1.NetworkPolicySpec{ - Ingress: []networkingv1.NetworkPolicyIngressRule{ - networkingv1.NetworkPolicyIngressRule{ - From: []networkingv1.NetworkPolicyPeer{ - networkingv1.NetworkPolicyPeer{ - PodSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "test"}, - }, - }, - networkingv1.NetworkPolicyPeer{ - IPBlock: &networkingv1.IPBlock{ - CIDR: "0.0.0.0/0", - }, - }, - }, - Ports: []networkingv1.NetworkPolicyPort{{ - Protocol: &tcp, - Port: &port8000, - }}, - }, - }, - }, - } - - gaugeVal, err1 := promutil.GetValue(metrics.NumPolicies) - countVal, err2 := promutil.GetCountValue(metrics.AddPolicyExecTime) - - npMgr.Lock() - if err := npMgr.AddNetworkPolicy(allowIngress); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ allowIngress AddNetworkPolicy") - t.Errorf("Error: %v", err) - } - npMgr.Unlock() - - ipsMgr = npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr - - // Check whether 0.0.0.0/0 got translated to 1.0.0.0/1 and 128.0.0.0/1 - if !ipsMgr.Exists("allow-ingress-in-ns-test-nwpolicy-0in", "1.0.0.0/1", util.IpsetNetHashFlag) { - t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet") - } - - if !ipsMgr.Exists("allow-ingress-in-ns-test-nwpolicy-0in", "128.0.0.0/1", util.IpsetNetHashFlag) { - t.Errorf("TestDeleteFromSet failed @ ipsMgr.AddToSet") - } - - allowEgress := &networkingv1.NetworkPolicy{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-egress", - Namespace: "test-nwpolicy", - }, - Spec: networkingv1.NetworkPolicySpec{ - Egress: []networkingv1.NetworkPolicyEgressRule{ - networkingv1.NetworkPolicyEgressRule{ - To: []networkingv1.NetworkPolicyPeer{{ - PodSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "test"}, - }, - }}, - Ports: []networkingv1.NetworkPolicyPort{{ - Protocol: &tcp, - Port: &port8000, - }}, - }, - }, - }, - } - - npMgr.Lock() - if err := npMgr.AddNetworkPolicy(allowEgress); err != nil { - t.Errorf("TestAddNetworkPolicy failed @ allowEgress AddNetworkPolicy") - t.Errorf("Error: %v", err) - } - npMgr.Unlock() - - newGaugeVal, err3 := promutil.GetValue(metrics.NumPolicies) - newCountVal, err4 := promutil.GetCountValue(metrics.AddPolicyExecTime) - promutil.NotifyIfErrors(t, err1, err2, err3, err4) - if newGaugeVal != gaugeVal+2 { - t.Errorf("Change in policy number didn't register in prometheus") - } - if newCountVal != countVal+2 { - t.Errorf("Execution time didn't register in prometheus") - } -} - +// Need to make more test cases func TestUpdateNetworkPolicy(t *testing.T) { - npMgr := &NetworkPolicyManager{ - NsMap: make(map[string]*Namespace), - PodMap: make(map[string]*NpmPod), - RawNpMap: make(map[string]*networkingv1.NetworkPolicy), - ProcessedNpMap: make(map[string]*networkingv1.NetworkPolicy), - TelemetryEnabled: false, - } - - allNs, err := newNs(util.KubeAllNamespacesFlag) - if err != nil { - panic(err.Error) - } - npMgr.NsMap[util.KubeAllNamespacesFlag] = allNs - - iptMgr := iptm.NewIptablesManager() - if err := iptMgr.Save(util.IptablesTestConfigFile); err != nil { - t.Errorf("TestUpdateNetworkPolicy failed @ iptMgr.Save") - } - - ipsMgr := ipsm.NewIpsetManager() - if err := ipsMgr.Save(util.IpsetTestConfigFile); err != nil { - t.Errorf("TestUpdateNetworkPolicy failed @ ipsMgr.Save") - } - - defer func() { - if err := iptMgr.Restore(util.IptablesTestConfigFile); err != nil { - t.Errorf("TestUpdateNetworkPolicy failed @ iptMgr.Restore") - } - - if err := ipsMgr.Restore(util.IpsetTestConfigFile); err != nil { - t.Errorf("TestUpdateNetworkPolicy failed @ ipsMgr.Restore") - } - }() - - // Create ns-kube-system set - if err := ipsMgr.CreateSet("ns-"+util.KubeSystemFlag, append([]string{util.IpsetNetHashFlag})); err != nil { - t.Errorf("TestUpdateNetworkPolicy failed @ ipsMgr.CreateSet, adding kube-system set%+v", err) - } - - tcp, udp := corev1.ProtocolTCP, corev1.ProtocolUDP - allowIngress := &networkingv1.NetworkPolicy{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-ingress", - Namespace: "test-nwpolicy", - }, - Spec: networkingv1.NetworkPolicySpec{ - Ingress: []networkingv1.NetworkPolicyIngressRule{ - networkingv1.NetworkPolicyIngressRule{ - From: []networkingv1.NetworkPolicyPeer{{ - PodSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "test"}, - }, - }}, - Ports: []networkingv1.NetworkPolicyPort{{ - Protocol: &tcp, - Port: &intstr.IntOrString{ - StrVal: "8000", - }, - }}, - }, - }, - }, - } + oldNetPolObj := createNetPol() - allowEgress := &networkingv1.NetworkPolicy{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-egress", - Namespace: "test-nwpolicy", - }, - Spec: networkingv1.NetworkPolicySpec{ - Egress: []networkingv1.NetworkPolicyEgressRule{ - networkingv1.NetworkPolicyEgressRule{ - To: []networkingv1.NetworkPolicyPeer{{ - NamespaceSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"ns": "test"}, - }, - }}, - Ports: []networkingv1.NetworkPolicyPort{{ - Protocol: &udp, - Port: &intstr.IntOrString{ - StrVal: "8001", - }, - }}, - }, - }, + f := newNetPolFixture(t) + f.netPolLister = append(f.netPolLister, oldNetPolObj) + f.kubeobjects = append(f.kubeobjects, oldNetPolObj) + stopCh := make(chan struct{}) + defer close(stopCh) + f.newNetPolController(stopCh) + + newNetPolObj := oldNetPolObj.DeepCopy() + // update podSelctor in a new network policy field + newNetPolObj.Spec.PodSelector = metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test", + "new": "test", }, } - npMgr.Lock() - if err := npMgr.AddNetworkPolicy(allowIngress); err != nil { - t.Errorf("TestUpdateNetworkPolicy failed @ AddNetworkPolicy") - } - - if err := npMgr.UpdateNetworkPolicy(allowIngress, allowEgress); err != nil { - t.Errorf("TestUpdateNetworkPolicy failed @ UpdateNetworkPolicy") + updateNetPol(t, f, oldNetPolObj, newNetPolObj) + testCases := []expectedNetPolValues{ + {1, 1, 0, true}, } - npMgr.Unlock() -} - -func TestGetNetworkPolicyKey(t *testing.T) { - // npObj := &networkingv1.NetworkPolicy{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: "allow-egress", - // Namespace: "test-nwpolicy", - // }, - // Spec: networkingv1.NetworkPolicySpec{ - // Egress: []networkingv1.NetworkPolicyEgressRule{ - // networkingv1.NetworkPolicyEgressRule{ - // To: []networkingv1.NetworkPolicyPeer{{ - // NamespaceSelector: &metav1.LabelSelector{ - // MatchLabels: map[string]string{"ns": "test"}, - // }, - // }}, - // }, - // }, - // }, - // } - - // netpolKey := GetNetworkPolicyKey(npObj) - - // if netpolKey == "" { - // t.Errorf("TestGetNetworkPolicyKey failed @ netpolKey length check %s", netpolKey) - // } - - // expectedKey := util.GetNSNameWithPrefix("test-nwpolicy/allow-egress") - // if netpolKey != expectedKey { - // t.Errorf("TestGetNetworkPolicyKey failed @ netpolKey did not match expected value %s", netpolKey) - // } + checkNetPolTestResult("TestUpdateNetPol", f, testCases) } From 928aee6342f7c265ff37bf350b35210cd3cc123a Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Wed, 7 Apr 2021 01:14:59 -0700 Subject: [PATCH 08/19] - applied comments (use explict names, locating lock in a better place) --- npm/networkPolicyController.go | 41 ++++++++++++++--------------- npm/networkPolicyController_test.go | 6 ++--- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 92cd9008ea..9ba3cb81cf 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -46,16 +46,16 @@ func NewNetworkPolicyController(npInformer networkinginformers.NetworkPolicyInfo clientset: clientset, netPolLister: npInformer.Lister(), netPolListerSynced: npInformer.Informer().HasSynced, - workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "NetPols"), + workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "NetworkPolicy"), npMgr: npMgr, isAzureNpmChainCreated: false, } npInformer.Informer().AddEventHandler( cache.ResourceEventHandlerFuncs{ - AddFunc: netPolController.addNetPol, - UpdateFunc: netPolController.updateNetPol, - DeleteFunc: netPolController.deleteNetPol, + AddFunc: netPolController.addNetworkPolicy, + UpdateFunc: netPolController.updateNetworkPolicy, + DeleteFunc: netPolController.deleteNetworkPolicy, }, ) return netPolController @@ -77,7 +77,7 @@ func (c *networkPolicyController) needSync(obj interface{}) (string, error) { return key, nil } -func (c *networkPolicyController) addNetPol(obj interface{}) { +func (c *networkPolicyController) addNetworkPolicy(obj interface{}) { key, err := c.needSync(obj) if err != nil { utilruntime.HandleError(err) @@ -87,7 +87,7 @@ func (c *networkPolicyController) addNetPol(obj interface{}) { c.workqueue.Add(key) } -func (c *networkPolicyController) updateNetPol(old, new interface{}) { +func (c *networkPolicyController) updateNetworkPolicy(old, new interface{}) { key, err := c.needSync(new) if err != nil { utilruntime.HandleError(err) @@ -108,7 +108,7 @@ func (c *networkPolicyController) updateNetPol(old, new interface{}) { c.workqueue.Add(key) } -func (c *networkPolicyController) deleteNetPol(obj interface{}) { +func (c *networkPolicyController) deleteNetworkPolicy(obj interface{}) { _, ok := obj.(*networkingv1.NetworkPolicy) // DeleteFunc gets the final state of the resource (if it is known). // Otherwise, it gets an object of type DeletedFinalStateUnknown. @@ -138,11 +138,10 @@ func (c *networkPolicyController) deleteNetPol(obj interface{}) { netPolCachedKey := util.GetNSNameWithPrefix(key) - // (TODO): Reduce scope of lock later + // (TODO): need to decouple this lock from npMgr if possible c.npMgr.Lock() - defer c.npMgr.Unlock() - _, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] + defer c.npMgr.Unlock() // If a network policy object is not in the RawNpMap, do not need to clean-up states for the network policy // since netPolController did not apply for any states for the network policy if !netPolExists { @@ -246,7 +245,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { return nil } - err = c.deleteNetworkPolicy(cachedNetPolObj, SafeToCleanUpAzureNpmChain) + err = c.cleanUpNetworkPolicy(cachedNetPolObj, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncNetPol] Error: %v when network policy is not found\n", err) } @@ -257,7 +256,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { // If DeletionTimestamp of the netPolObj is set, start cleaning up lastly applied states. // This is early cleaning up process from updateNetPol event if netPolObj.ObjectMeta.DeletionTimestamp != nil || netPolObj.ObjectMeta.DeletionGracePeriodSeconds != nil { - err = c.deleteNetworkPolicy(netPolObj, SafeToCleanUpAzureNpmChain) + err = c.cleanUpNetworkPolicy(netPolObj, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("Error: %v when ObjectMeta.DeletionTimestamp field is set\n", err) } @@ -316,7 +315,7 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // So, avoid extra overhead to install default Azure NPM chain in initializeDefaultAzureNpmChain function. // To achieve it, use flag unSafeToCleanUpAzureNpmChain to indicate that the default Azure NPM chain cannot be deleted. // delete existing network policy - err = c.deleteNetworkPolicy(netPolObj, unSafeToCleanUpAzureNpmChain) + err = c.cleanUpNetworkPolicy(netPolObj, unSafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to deleteNetworkPolicy due to %s", err) } @@ -372,11 +371,11 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 } // DeleteNetworkPolicy handles deleting network policy based on cachedNetPolKey. -func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.NetworkPolicy, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { +func (c *networkPolicyController) cleanUpNetworkPolicy(netPolObj *networkingv1.NetworkPolicy, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { var err error netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) if err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: while running MetaNamespaceKeyFunc err: %s", err) + return fmt.Errorf("[cleanUpNetworkPolicy] Error: while running MetaNamespaceKeyFunc err: %s", err) } cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) @@ -394,32 +393,32 @@ func (c *networkPolicyController) deleteNetworkPolicy(netPolObj *networkingv1.Ne // delete iptables entries for _, iptEntry := range iptEntries { if err = iptMgr.Delete(iptEntry); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) + return fmt.Errorf("[cleanUpNetworkPolicy] Error: failed to apply iptables rule. Rule: %+v with err: %v", iptEntry, err) } } // delete ipset list related to ingress CIDRs if err = c.removeCidrsRule("in", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule in due to %v", err) + return fmt.Errorf("[cleanUpNetworkPolicy] Error: removeCidrsRule in due to %v", err) } // delete ipset list related to egress CIDRs if err = c.removeCidrsRule("out", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, egressIPCidrs, ipsMgr); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: removeCidrsRule out due to %v", err) + return fmt.Errorf("[cleanUpNetworkPolicy] Error: removeCidrsRule out due to %v", err) } // (TODO): Can we decouple below logic from cachedNetPolObj since if all deletions for cachedNetPolObj are successful, but UnititNpmChains function is failed, - // deleteNetworkPolicy function will be executed again. + // cleanUpNetworkPolicy function will be executed again. // if there is only one cached network policy in RawNPMap and no immediate network policy to process, // clean up default azure npm chains since the cached network policy from RawNPMap is removed. if isSafeCleanUpAzureNpmChain && len(c.npMgr.RawNpMap) == 1 { // Even though UninitNpmChains return error, isAzureNpmChainCreated sets up false - // #1 When deletion event is requeued to workqueue and deleteNetworkPolicy function is called again, it is safe. + // #1 When deletion event is requeued to workqueue and cleanUpNetworkPolicy function is called again, it is safe. // #2 when a new network policy is added, the default Azure NPM chain can install. c.isAzureNpmChainCreated = false if err = iptMgr.UninitNpmChains(); err != nil { - return fmt.Errorf("[deleteNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) + return fmt.Errorf("[cleanUpNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) } } diff --git a/npm/networkPolicyController_test.go b/npm/networkPolicyController_test.go index f723fcc0b8..141c5a43aa 100644 --- a/npm/networkPolicyController_test.go +++ b/npm/networkPolicyController_test.go @@ -127,7 +127,7 @@ func createNetPol() *networkingv1.NetworkPolicy { func addNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPolicy) { // simulate "network policy" add event and add network policy object to sharedInformer cache - f.netPolController.addNetPol(netPolObj) + f.netPolController.addNetworkPolicy(netPolObj) if f.netPolController.workqueue.Len() == 0 { t.Logf("Add network policy: worker queue length is 0 ") @@ -143,7 +143,7 @@ func deleteNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.Networ // simulate network policy deletion event and delete network policy object from sharedInformer cache f.kubeInformer.Networking().V1().NetworkPolicies().Informer().GetIndexer().Delete(netPolObj) - f.netPolController.deleteNetPol(netPolObj) + f.netPolController.deleteNetworkPolicy(netPolObj) if f.netPolController.workqueue.Len() == 0 { t.Logf("Delete network policy: worker queue length is 0 ") @@ -159,7 +159,7 @@ func updateNetPol(t *testing.T, f *netPolFixture, oldNetPolObj, netNetPolObj *ne // simulate network policy update event and update the network policy to shared informer's cache f.kubeInformer.Networking().V1().NetworkPolicies().Informer().GetIndexer().Update(netNetPolObj) - f.netPolController.updateNetPol(oldNetPolObj, netNetPolObj) + f.netPolController.updateNetworkPolicy(oldNetPolObj, netNetPolObj) if f.netPolController.workqueue.Len() == 0 { t.Logf("Update Network Policy: worker queue length is 0 ") From faad159c36df584eed4f0f6dbe8b3a56ec515a7a Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Wed, 7 Apr 2021 01:42:12 -0700 Subject: [PATCH 09/19] add two methods to save and restore iptables in unit test --- npm/networkPolicyController_test.go | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/npm/networkPolicyController_test.go b/npm/networkPolicyController_test.go index 141c5a43aa..4ea9131b25 100644 --- a/npm/networkPolicyController_test.go +++ b/npm/networkPolicyController_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/Azure/azure-container-networking/npm/ipsm" + "github.com/Azure/azure-container-networking/npm/iptm" corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" @@ -30,8 +31,10 @@ type netPolFixture struct { kubeobjects []runtime.Object // (TODO) will remove npMgr if possible - npMgr *NetworkPolicyManager - ipsMgr *ipsm.IpsetManager + npMgr *NetworkPolicyManager + ipsMgr *ipsm.IpsetManager + iptMgr *iptm.IptablesManager + netPolController *networkPolicyController kubeInformer kubeinformers.SharedInformerFactory } @@ -43,6 +46,7 @@ func newNetPolFixture(t *testing.T) *netPolFixture { kubeobjects: []runtime.Object{}, npMgr: newNPMgr(t), ipsMgr: ipsm.NewIpsetManager(), + iptMgr: iptm.NewIptablesManager(), } f.npMgr.RawNpMap = make(map[string]*networkingv1.NetworkPolicy) @@ -63,18 +67,30 @@ func (f *netPolFixture) newNetPolController(stopCh chan struct{}) { f.kubeInformer.Start(stopCh) } -func (f *netPolFixture) ipSetSave(ipsetConfigFile string) { +func (f *netPolFixture) saveIpTables(iptablesConfigFile string) { + if err := f.iptMgr.Save(iptablesConfigFile); err != nil { + f.t.Errorf("Failed to save iptables rules") + } +} + +func (f *netPolFixture) restoreIpTables(iptablesConfigFile string) { + if err := f.iptMgr.Restore(iptablesConfigFile); err != nil { + f.t.Errorf("Failed to restore iptables rules") + } +} + +func (f *netPolFixture) saveIpSet(ipsetConfigFile string) { // call /sbin/ipset save -file /var/log/ipset-test.conf f.t.Logf("Start storing ipset to %s", ipsetConfigFile) if err := f.ipsMgr.Save(ipsetConfigFile); err != nil { - f.t.Errorf("ipSetSave failed @ ipsMgr.Save") + f.t.Errorf("Failed to save ipsets") } } -func (f *netPolFixture) ipSetRestore(ipsetConfigFile string) { +func (f *netPolFixture) restoreIpSet(ipsetConfigFile string) { // call /sbin/ipset restore -file /var/log/ipset-test.conf f.t.Logf("Start re-storing ipset to %s", ipsetConfigFile) if err := f.ipsMgr.Restore(ipsetConfigFile); err != nil { - f.t.Errorf("ipSetRestore failed @ ipsMgr.Restore") + f.t.Errorf("failed to restore ipsets") } } From f5ac80e8bc76749a36777a88b721f75dd08cf8d2 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Wed, 7 Apr 2021 01:44:58 -0700 Subject: [PATCH 10/19] comment out unused function --- npm/networkPolicyController.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 9ba3cb81cf..f0233bab59 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -477,14 +477,14 @@ func (c *networkPolicyController) removeCidrsRule(ingressOrEgress, policyName, n // GetProcessedNPKey will return netpolKey // (TODO): will use this function when optimizing management of multiple network policies with merging and deducting multiple network policies. -func (c *networkPolicyController) getProcessedNPKey(netPolObj *networkingv1.NetworkPolicy) string { - // hashSelector will never be empty - // (TODO): what if PodSelector is [] or nothing? - make the Unit test for this - hashedPodSelector := HashSelector(&netPolObj.Spec.PodSelector) - - // (TODO): any chance to have namespace has zero length? - if len(netPolObj.GetNamespace()) > 0 { - hashedPodSelector = netPolObj.GetNamespace() + "/" + hashedPodSelector - } - return util.GetNSNameWithPrefix(hashedPodSelector) -} +// func (c *networkPolicyController) getProcessedNPKey(netPolObj *networkingv1.NetworkPolicy) string { +// // hashSelector will never be empty +// // (TODO): what if PodSelector is [] or nothing? - make the Unit test for this +// hashedPodSelector := HashSelector(&netPolObj.Spec.PodSelector) + +// // (TODO): any chance to have namespace has zero length? +// if len(netPolObj.GetNamespace()) > 0 { +// hashedPodSelector = netPolObj.GetNamespace() + "/" + hashedPodSelector +// } +// return util.GetNSNameWithPrefix(hashedPodSelector) +// } From 50d1e982c5a2348fccc10f985cae604bcdc16d14 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Wed, 7 Apr 2021 05:55:16 -0700 Subject: [PATCH 11/19] early filter in updateNetworkPolicy function if they are the same network policies. Update unit tests to test more network policies events --- npm/networkPolicyController.go | 13 +++++ npm/networkPolicyController_test.go | 86 +++++++++++++++++++++-------- npm/parsePolicy.go | 10 ++++ 3 files changed, 86 insertions(+), 23 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index f0233bab59..a9884e35f1 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -105,6 +105,19 @@ func (c *networkPolicyController) updateNetworkPolicy(old, new interface{}) { } } + netPolCachedKey := util.GetNSNameWithPrefix(key) + c.npMgr.Lock() + cachedNetPolObj, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] + c.npMgr.Unlock() + if netPolExists { + // if network policy does not have different states against lastly applied states stored in cachedNetPolObj, + // netPolController does not need to reconcile this update. + // in this updateNetworkPolicy event, newNetPol was updated with states which netPolController does not need to reconcile. + if isSameNetworkPolicy(cachedNetPolObj, newNetPol) { + return + } + } + c.workqueue.Add(key) } diff --git a/npm/networkPolicyController_test.go b/npm/networkPolicyController_test.go index 4ea9131b25..d21ebfbdd4 100644 --- a/npm/networkPolicyController_test.go +++ b/npm/networkPolicyController_test.go @@ -4,6 +4,7 @@ package npm import ( "fmt" + "strconv" "testing" "github.com/Azure/azure-container-networking/npm/ipsm" @@ -37,16 +38,20 @@ type netPolFixture struct { netPolController *networkPolicyController kubeInformer kubeinformers.SharedInformerFactory + + // to test whether unnecessary enqueuing event into workqueue was correctly filtered in eventhandler code of network policy controller + isEnqueueEventIntoWorkQueue bool } func newNetPolFixture(t *testing.T) *netPolFixture { f := &netPolFixture{ - t: t, - netPolLister: []*networkingv1.NetworkPolicy{}, - kubeobjects: []runtime.Object{}, - npMgr: newNPMgr(t), - ipsMgr: ipsm.NewIpsetManager(), - iptMgr: iptm.NewIptablesManager(), + t: t, + netPolLister: []*networkingv1.NetworkPolicy{}, + kubeobjects: []runtime.Object{}, + npMgr: newNPMgr(t), + ipsMgr: ipsm.NewIpsetManager(), + iptMgr: iptm.NewIptablesManager(), + isEnqueueEventIntoWorkQueue: true, } f.npMgr.RawNpMap = make(map[string]*networkingv1.NetworkPolicy) @@ -133,7 +138,7 @@ func createNetPol() *networkingv1.NetworkPolicy { }}, Ports: []networkingv1.NetworkPolicyPort{{ Protocol: &tcp, - Port: &port8000, + Port: &intstr.IntOrString{StrVal: "8000"}, // namedPort }}, }, }, @@ -146,7 +151,7 @@ func addNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.NetworkPo f.netPolController.addNetworkPolicy(netPolObj) if f.netPolController.workqueue.Len() == 0 { - t.Logf("Add network policy: worker queue length is 0 ") + f.isEnqueueEventIntoWorkQueue = false return } @@ -162,7 +167,7 @@ func deleteNetPol(t *testing.T, f *netPolFixture, netPolObj *networkingv1.Networ f.netPolController.deleteNetworkPolicy(netPolObj) if f.netPolController.workqueue.Len() == 0 { - t.Logf("Delete network policy: worker queue length is 0 ") + f.isEnqueueEventIntoWorkQueue = false return } @@ -178,7 +183,7 @@ func updateNetPol(t *testing.T, f *netPolFixture, oldNetPolObj, netNetPolObj *ne f.netPolController.updateNetworkPolicy(oldNetPolObj, netNetPolObj) if f.netPolController.workqueue.Len() == 0 { - t.Logf("Update Network Policy: worker queue length is 0 ") + f.isEnqueueEventIntoWorkQueue = false return } @@ -186,10 +191,11 @@ func updateNetPol(t *testing.T, f *netPolFixture, oldNetPolObj, netNetPolObj *ne } type expectedNetPolValues struct { - expectedLenOfNsMap int - expectedLenOfRawNpMap int - expectedLenOfWorkQueue int - expectedIsAzureNpmChainCreated bool + expectedLenOfNsMap int + expectedLenOfRawNpMap int + expectedLenOfWorkQueue int + expectedIsAzureNpmChainCreated bool + expectedEnqueueEventIntoWorkQueue bool } func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expectedNetPolValues) { @@ -202,12 +208,16 @@ func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expect f.t.Errorf("Raw NetPol Map length = %d, want %d", got, test.expectedLenOfRawNpMap) } + if got := f.netPolController.workqueue.Len(); got != test.expectedLenOfWorkQueue { + f.t.Errorf("Workqueue length = %d, want %d", got, test.expectedLenOfWorkQueue) + } + if got := f.netPolController.isAzureNpmChainCreated; got != test.expectedIsAzureNpmChainCreated { f.t.Errorf("isAzureNpmChainCreated %v, want %v", got, test.expectedIsAzureNpmChainCreated) } - if got := f.netPolController.workqueue.Len(); got != test.expectedLenOfWorkQueue { - f.t.Errorf("Workqueue length = %d, want %d", got, test.expectedLenOfWorkQueue) + if got := f.isEnqueueEventIntoWorkQueue; got != test.expectedEnqueueEventIntoWorkQueue { + f.t.Errorf("isEnqueueEventIntoWorkQueue %v, want %v", got, test.expectedEnqueueEventIntoWorkQueue) } } } @@ -215,10 +225,12 @@ func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expect func TestAddMultipleNetworkPolicies(t *testing.T) { netPolObj1 := createNetPol() - // deep copy netPolObj1 and change namespace and name since current createNetPol is not flexble. + // deep copy netPolObj1 and change namespace, name, and porttype (to namedPort) since current createNetPol is not flexble. netPolObj2 := netPolObj1.DeepCopy() netPolObj2.Namespace = fmt.Sprintf("%s-new", netPolObj1.Namespace) netPolObj2.Name = fmt.Sprintf("%s-new", netPolObj1.Name) + // namedPort + netPolObj2.Spec.Ingress[0].Ports[0].Port = &intstr.IntOrString{StrVal: fmt.Sprintf("%s", netPolObj2.Name)} f := newNetPolFixture(t) f.netPolLister = append(f.netPolLister, netPolObj1, netPolObj2) @@ -231,10 +243,11 @@ func TestAddMultipleNetworkPolicies(t *testing.T) { addNetPol(t, f, netPolObj2) testCases := []expectedNetPolValues{ - {1, 2, 0, true}, + {1, 2, 0, true, true}, } checkNetPolTestResult("TestAddMultipleNetPols", f, testCases) } + func TestAddNetworkPolicy(t *testing.T) { netPolObj := createNetPol() @@ -247,7 +260,7 @@ func TestAddNetworkPolicy(t *testing.T) { addNetPol(t, f, netPolObj) testCases := []expectedNetPolValues{ - {1, 1, 0, true}, + {1, 1, 0, true, true}, } checkNetPolTestResult("TestAddNetPol", f, testCases) } @@ -264,12 +277,14 @@ func TestDeleteNetworkPolicy(t *testing.T) { deleteNetPol(t, f, netPolObj) testCases := []expectedNetPolValues{ - {1, 0, 0, false}, + {1, 0, 0, false, true}, } checkNetPolTestResult("TestDelNetPol", f, testCases) } -// Need to make more test cases +// this unit test is for the case where states of network policy are changed, but network policy controller does not need to reconcile. +// Check it with expectedEnqueueEventIntoWorkQueue variable. +// (TODO): What are the fieldS which cause this case in a real world? Need to know it. func TestUpdateNetworkPolicy(t *testing.T) { oldNetPolObj := createNetPol() @@ -280,6 +295,28 @@ func TestUpdateNetworkPolicy(t *testing.T) { defer close(stopCh) f.newNetPolController(stopCh) + newNetPolObj := oldNetPolObj.DeepCopy() + // oldNetPolObj.ResourceVersion value is "0" + newRV, _ := strconv.Atoi(oldNetPolObj.ResourceVersion) + newNetPolObj.ResourceVersion = fmt.Sprintf("%d", newRV+1) + + updateNetPol(t, f, oldNetPolObj, newNetPolObj) + testCases := []expectedNetPolValues{ + {1, 1, 0, true, false}, + } + checkNetPolTestResult("TestUpdateNetPol", f, testCases) +} + +func TestLabelUpdateNetworkPolicy(t *testing.T) { + oldNetPolObj := createNetPol() + + f := newNetPolFixture(t) + f.netPolLister = append(f.netPolLister, oldNetPolObj) + f.kubeobjects = append(f.kubeobjects, oldNetPolObj) + stopCh := make(chan struct{}) + defer close(stopCh) + f.newNetPolController(stopCh) + newNetPolObj := oldNetPolObj.DeepCopy() // update podSelctor in a new network policy field newNetPolObj.Spec.PodSelector = metav1.LabelSelector{ @@ -288,10 +325,13 @@ func TestUpdateNetworkPolicy(t *testing.T) { "new": "test", }, } - + // oldNetPolObj.ResourceVersion value is "0" + newRV, _ := strconv.Atoi(oldNetPolObj.ResourceVersion) + newNetPolObj.ResourceVersion = fmt.Sprintf("%d", newRV+1) updateNetPol(t, f, oldNetPolObj, newNetPolObj) + testCases := []expectedNetPolValues{ - {1, 1, 0, true}, + {1, 1, 0, true, true}, } checkNetPolTestResult("TestUpdateNetPol", f, testCases) } diff --git a/npm/parsePolicy.go b/npm/parsePolicy.go index 638a574df3..17f7c482b1 100644 --- a/npm/parsePolicy.go +++ b/npm/parsePolicy.go @@ -10,6 +10,16 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// compare all fields including name of two network policies, which network policy controller need to care about. +func isSameNetworkPolicy(old, new *networkingv1.NetworkPolicy) bool { + if old.ObjectMeta.Name != new.ObjectMeta.Name { + return false + } + return isSamePolicy(old, new) +} + +// (TODO): isSamePolicy function does not compare name of two network policies since trying to reduce the number of rules if below three conditions are the same. +// Will optimize networkPolicyController code with addPolicy and deductPolicy functions if possible. Refer to https://github.com/Azure/azure-container-networking/pull/390 func isSamePolicy(old, new *networkingv1.NetworkPolicy) bool { if !reflect.DeepEqual(old.TypeMeta, new.TypeMeta) { return false From 7db79d0acdf3d483f6b52ba4fd70436ae988988d Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Wed, 7 Apr 2021 15:59:56 -0700 Subject: [PATCH 12/19] - start using klog package instead of log package --- go.mod | 1 + npm/networkPolicyController.go | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 9680bdb2d9..569f9a778a 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( k8s.io/api v0.18.2 k8s.io/apimachinery v0.18.2 k8s.io/client-go v0.18.2 + k8s.io/klog v1.0.0 sigs.k8s.io/controller-runtime v0.6.0 software.sslmate.com/src/go-pkcs12 v0.0.0-20201102150903-66718f75db0e // indirect ) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index a9884e35f1..242dbd8b1e 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -7,7 +7,6 @@ import ( "strconv" "time" - "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" @@ -20,6 +19,7 @@ import ( netpollister "k8s.io/client-go/listers/networking/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" + "k8s.io/klog" ) // IsSafeCleanUpAzureNpmChain is used to indicate whether default Azure NPM chain can be safely deleted or not. @@ -168,14 +168,14 @@ func (c *networkPolicyController) Run(threadiness int, stopCh <-chan struct{}) e defer utilruntime.HandleCrash() defer c.workqueue.ShutDown() - log.Logf("Starting Network Policy %d worker(s)", threadiness) + klog.Infof("Starting Network Policy %d worker(s)", threadiness) for i := 0; i < threadiness; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } - log.Logf("Started Network Policy %d worker(s)", threadiness) + klog.Infof("Started Network Policy %d worker(s)", threadiness) <-stopCh - log.Logf("Shutting down Network Policy workers") + klog.Info("Shutting down Network Policy workers") return nil } @@ -215,7 +215,7 @@ func (c *networkPolicyController) processNextWorkItem() bool { // Finally, if no error occurs we Forget this item so it does not // get queued again until another change happens. c.workqueue.Forget(obj) - log.Logf("Successfully synced '%s'", key) + klog.Infof("Successfully synced '%s'", key) return nil }(obj) @@ -237,8 +237,6 @@ func (c *networkPolicyController) syncNetPol(key string) error { return nil } - log.Logf("SyncNetPol %s %s %s", key, namespace, name) - // Get the network policy resource with this namespace/name netPolObj, err := c.netPolLister.NetworkPolicies(namespace).Get(name) @@ -248,7 +246,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { if err != nil { if errors.IsNotFound(err) { - log.Logf("Network Policy %s is not found, may be it is deleted", key) + klog.Infof("Network Policy %s is not found, may be it is deleted", key) // netPolObj is not found, but should need to check the RawNpMap cache with cachedNetPolKey // #1 No item in RawNpMap, which means network policy with the cachedNetPolKey is not applied. Thus, do not need to clean up process. // #2 item in RawNpMap exists, which means network policy with the cachedNetPolKey is applied . Thus, Need to clean up process. @@ -343,13 +341,13 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr sets, namedPorts, lists, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(netPolObj) for _, set := range sets { - log.Logf("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) + klog.Infof("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) if err = ipsMgr.CreateSet(set, append([]string{util.IpsetNetHashFlag})); err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: creating ipset %s with err: %v", set, err) } } for _, set := range namedPorts { - log.Logf("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) + klog.Infof("Creating set: %v, hashedSet: %v", set, util.GetHashedName(set)) if err = ipsMgr.CreateSet(set, append([]string{util.IpsetIPPortHashFlag})); err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: creating ipset named port %s with err: %v", set, err) } @@ -448,7 +446,7 @@ func (c *networkPolicyController) createCidrsRule(ingressOrEgress, policyName, n continue } setName := policyName + "-in-ns-" + ns + "-" + strconv.Itoa(i) + ingressOrEgress - log.Logf("Creating set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) + klog.Infof("Creating set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) if err := ipsMgr.CreateSet(setName, spec); err != nil { return fmt.Errorf("[createCidrsRule] Error: creating ipset %s with err: %v", ipCidrSet, err) } @@ -479,7 +477,7 @@ func (c *networkPolicyController) removeCidrsRule(ingressOrEgress, policyName, n continue } setName := policyName + "-in-ns-" + ns + "-" + strconv.Itoa(i) + ingressOrEgress - log.Logf("Delete set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) + klog.Infof("Delete set: %v, hashedSet: %v", setName, util.GetHashedName(setName)) if err := ipsMgr.DeleteSet(setName); err != nil { return fmt.Errorf("[removeCidrsRule] deleting ipset %s with err: %v", ipCidrSet, err) } From b9ea448b5ed0db874ef8c2bc91565cc10abaed1d Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Wed, 7 Apr 2021 16:22:01 -0700 Subject: [PATCH 13/19] remove unneeded defer for lock --- npm/networkPolicyController.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 242dbd8b1e..56753e4cb8 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -154,7 +154,7 @@ func (c *networkPolicyController) deleteNetworkPolicy(obj interface{}) { // (TODO): need to decouple this lock from npMgr if possible c.npMgr.Lock() _, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] - defer c.npMgr.Unlock() + c.npMgr.Unlock() // If a network policy object is not in the RawNpMap, do not need to clean-up states for the network policy // since netPolController did not apply for any states for the network policy if !netPolExists { From 2850c3458b141b929df988db48bbe17d4a20916e Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Thu, 8 Apr 2021 13:43:19 -0700 Subject: [PATCH 14/19] Locate of adding and deleting network policy object from our network policy cache in a right place. Correct prometheus metric code. --- npm/networkPolicyController.go | 36 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 56753e4cb8..133c00eb8b 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -303,8 +303,9 @@ func (c *networkPolicyController) initializeDefaultAzureNpmChain() error { // syncAddAndUpdateNetPol handles a new network policy or an updated network policy object triggered by add and update events func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1.NetworkPolicy) error { - // (TODO): where do we use this? + // This timer measures execution time to run this function regardless of success or failure cases timer := metrics.StartNewTimer() + defer timer.StopAndRecord(metrics.AddPolicyExecTime) var err error netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) @@ -337,6 +338,12 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 return fmt.Errorf("[syncNetPol] Error: due to %v", err) } + // Cache network object first before applying ipsets and iptables. + // If error happens while applying ipsets and iptables, + // the key is re-queued in workqueue and process this function again, which eventually meets desired states of network policy + cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) + c.npMgr.RawNpMap[cachedNetPolKey] = netPolObj + ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr sets, namedPorts, lists, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(netPolObj) @@ -372,12 +379,7 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 } } - cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) - c.npMgr.RawNpMap[cachedNetPolKey] = netPolObj - metrics.NumPolicies.Inc() - // (TODO): may better to use defer? - timer.StopAndRecord(metrics.AddPolicyExecTime) return nil } @@ -418,23 +420,23 @@ func (c *networkPolicyController) cleanUpNetworkPolicy(netPolObj *networkingv1.N return fmt.Errorf("[cleanUpNetworkPolicy] Error: removeCidrsRule out due to %v", err) } - // (TODO): Can we decouple below logic from cachedNetPolObj since if all deletions for cachedNetPolObj are successful, but UnititNpmChains function is failed, - // cleanUpNetworkPolicy function will be executed again. + // Sucess to clean up ipset and iptables operations in kernel and delete the cached network policy from RawNpMap + delete(c.npMgr.RawNpMap, cachedNetPolKey) + metrics.NumPolicies.Dec() - // if there is only one cached network policy in RawNPMap and no immediate network policy to process, - // clean up default azure npm chains since the cached network policy from RawNPMap is removed. - if isSafeCleanUpAzureNpmChain && len(c.npMgr.RawNpMap) == 1 { - // Even though UninitNpmChains return error, isAzureNpmChainCreated sets up false - // #1 When deletion event is requeued to workqueue and cleanUpNetworkPolicy function is called again, it is safe. - // #2 when a new network policy is added, the default Azure NPM chain can install. + // If there is no cached network policy in RawNPMap anymore and no immediate network policy to process, start cleaning up default azure npm chains + // However, UninitNpmChains function is failed which left failed states and will not retry, but functionally it is ok. + // (TODO): Ideally, need to decouple cleaning-up default azure npm chains from "network policy deletion" event. + if isSafeCleanUpAzureNpmChain && len(c.npMgr.RawNpMap) == 0 { + // Even though UninitNpmChains function returns error, isAzureNpmChainCreated sets up false. + // So, when a new network policy is added, the "default Azure NPM chain" can be installed. c.isAzureNpmChainCreated = false if err = iptMgr.UninitNpmChains(); err != nil { - return fmt.Errorf("[cleanUpNetworkPolicy] Error: failed to uninitialize azure-npm chains with err: %s", err) + utilruntime.HandleError(fmt.Errorf("Error: failed to uninitialize azure-npm chains with err: %s", err)) + return nil } } - delete(c.npMgr.RawNpMap, cachedNetPolKey) - metrics.NumPolicies.Dec() return nil } From 761a9740a51dc0ccb80fd20c9d98487604c25c3d Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Fri, 9 Apr 2021 12:20:08 -0700 Subject: [PATCH 15/19] use cached network policy key instead of network policy object as method parameter in cleanUpNetworkPolicy --- npm/networkPolicyController.go | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index 133c00eb8b..c8e91e7a97 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -251,12 +251,12 @@ func (c *networkPolicyController) syncNetPol(key string) error { // #1 No item in RawNpMap, which means network policy with the cachedNetPolKey is not applied. Thus, do not need to clean up process. // #2 item in RawNpMap exists, which means network policy with the cachedNetPolKey is applied . Thus, Need to clean up process. cachedNetPolKey := util.GetNSNameWithPrefix(key) - cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] + _, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] if !cachedNetPolObjExists { return nil } - err = c.cleanUpNetworkPolicy(cachedNetPolObj, SafeToCleanUpAzureNpmChain) + err = c.cleanUpNetworkPolicy(cachedNetPolKey, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncNetPol] Error: %v when network policy is not found\n", err) } @@ -267,7 +267,8 @@ func (c *networkPolicyController) syncNetPol(key string) error { // If DeletionTimestamp of the netPolObj is set, start cleaning up lastly applied states. // This is early cleaning up process from updateNetPol event if netPolObj.ObjectMeta.DeletionTimestamp != nil || netPolObj.ObjectMeta.DeletionGracePeriodSeconds != nil { - err = c.cleanUpNetworkPolicy(netPolObj, SafeToCleanUpAzureNpmChain) + cachedNetPolKey := util.GetNSNameWithPrefix(key) + err = c.cleanUpNetworkPolicy(cachedNetPolKey, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("Error: %v when ObjectMeta.DeletionTimestamp field is set\n", err) } @@ -327,7 +328,8 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // So, avoid extra overhead to install default Azure NPM chain in initializeDefaultAzureNpmChain function. // To achieve it, use flag unSafeToCleanUpAzureNpmChain to indicate that the default Azure NPM chain cannot be deleted. // delete existing network policy - err = c.cleanUpNetworkPolicy(netPolObj, unSafeToCleanUpAzureNpmChain) + cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) + err = c.cleanUpNetworkPolicy(cachedNetPolKey, unSafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to deleteNetworkPolicy due to %s", err) } @@ -341,7 +343,6 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // Cache network object first before applying ipsets and iptables. // If error happens while applying ipsets and iptables, // the key is re-queued in workqueue and process this function again, which eventually meets desired states of network policy - cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) c.npMgr.RawNpMap[cachedNetPolKey] = netPolObj ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr @@ -384,14 +385,7 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 } // DeleteNetworkPolicy handles deleting network policy based on cachedNetPolKey. -func (c *networkPolicyController) cleanUpNetworkPolicy(netPolObj *networkingv1.NetworkPolicy, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { - var err error - netpolKey, err := cache.MetaNamespaceKeyFunc(netPolObj) - if err != nil { - return fmt.Errorf("[cleanUpNetworkPolicy] Error: while running MetaNamespaceKeyFunc err: %s", err) - } - - cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) +func (c *networkPolicyController) cleanUpNetworkPolicy(cachedNetPolKey string, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] // if there is no applied network policy with the cachedNetPolKey, do not need to clean up process. if !cachedNetPolObjExists { @@ -403,6 +397,7 @@ func (c *networkPolicyController) cleanUpNetworkPolicy(netPolObj *networkingv1.N // translate policy from "cachedNetPolObj" _, _, _, ingressIPCidrs, egressIPCidrs, iptEntries := translatePolicy(cachedNetPolObj) + var err error // delete iptables entries for _, iptEntry := range iptEntries { if err = iptMgr.Delete(iptEntry); err != nil { @@ -411,12 +406,12 @@ func (c *networkPolicyController) cleanUpNetworkPolicy(netPolObj *networkingv1.N } // delete ipset list related to ingress CIDRs - if err = c.removeCidrsRule("in", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, ingressIPCidrs, ipsMgr); err != nil { + if err = c.removeCidrsRule("in", cachedNetPolObj.Name, cachedNetPolObj.Namespace, ingressIPCidrs, ipsMgr); err != nil { return fmt.Errorf("[cleanUpNetworkPolicy] Error: removeCidrsRule in due to %v", err) } // delete ipset list related to egress CIDRs - if err = c.removeCidrsRule("out", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, egressIPCidrs, ipsMgr); err != nil { + if err = c.removeCidrsRule("out", cachedNetPolObj.Name, cachedNetPolObj.Namespace, egressIPCidrs, ipsMgr); err != nil { return fmt.Errorf("[cleanUpNetworkPolicy] Error: removeCidrsRule out due to %v", err) } From b6fb6a2a107d7f2ad2dcb812719be82ba6126c3e Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Mon, 12 Apr 2021 17:23:24 -0700 Subject: [PATCH 16/19] remove redundant check --- npm/networkPolicyController.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index c8e91e7a97..dace732b70 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -247,15 +247,9 @@ func (c *networkPolicyController) syncNetPol(key string) error { if err != nil { if errors.IsNotFound(err) { klog.Infof("Network Policy %s is not found, may be it is deleted", key) - // netPolObj is not found, but should need to check the RawNpMap cache with cachedNetPolKey - // #1 No item in RawNpMap, which means network policy with the cachedNetPolKey is not applied. Thus, do not need to clean up process. - // #2 item in RawNpMap exists, which means network policy with the cachedNetPolKey is applied . Thus, Need to clean up process. + // netPolObj is not found, but should need to check the RawNpMap cache with cachedNetPolKey. + // cleanUpNetworkPolicy method will take care of the deletion of a cached network policy if the cached network policy exists with cachedNetPolKey in our RawNpMap cache. cachedNetPolKey := util.GetNSNameWithPrefix(key) - _, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] - if !cachedNetPolObjExists { - return nil - } - err = c.cleanUpNetworkPolicy(cachedNetPolKey, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncNetPol] Error: %v when network policy is not found\n", err) From 81b8fb1e558366718191e0e90210a05e0bf7f2ce Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 13 Apr 2021 13:38:15 -0700 Subject: [PATCH 17/19] Remove ns- prefix as key in RawNpMap. Update UT to check prometheus metrics. Applied better naming and removed redundancy codes. --- npm/networkPolicyController.go | 56 ++++++++++++++--------------- npm/networkPolicyController_test.go | 36 +++++++++++++++---- npm/npm.go | 6 ++-- 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/npm/networkPolicyController.go b/npm/networkPolicyController.go index dace732b70..1f3af4c32a 100644 --- a/npm/networkPolicyController.go +++ b/npm/networkPolicyController.go @@ -61,8 +61,9 @@ func NewNetworkPolicyController(npInformer networkinginformers.NetworkPolicyInfo return netPolController } -// needSync checks whether the obj is valid network policy object. -func (c *networkPolicyController) needSync(obj interface{}) (string, error) { +// getNetworkPolicyKey returns namespace/name of network policy object if it is valid network policy object and has valid namespace/name. +// If not, it returns error. +func (c *networkPolicyController) getNetworkPolicyKey(obj interface{}) (string, error) { var key string _, ok := obj.(*networkingv1.NetworkPolicy) if !ok { @@ -78,23 +79,23 @@ func (c *networkPolicyController) needSync(obj interface{}) (string, error) { } func (c *networkPolicyController) addNetworkPolicy(obj interface{}) { - key, err := c.needSync(obj) + netPolkey, err := c.getNetworkPolicyKey(obj) if err != nil { utilruntime.HandleError(err) return } - c.workqueue.Add(key) + c.workqueue.Add(netPolkey) } func (c *networkPolicyController) updateNetworkPolicy(old, new interface{}) { - key, err := c.needSync(new) + netPolkey, err := c.getNetworkPolicyKey(new) if err != nil { utilruntime.HandleError(err) return } - // needSync already checked validation of casting new network policy. + // new network policy object is already checked validation by calling getNetworkPolicyKey function. newNetPol, _ := new.(*networkingv1.NetworkPolicy) oldNetPol, ok := old.(*networkingv1.NetworkPolicy) if ok { @@ -105,9 +106,8 @@ func (c *networkPolicyController) updateNetworkPolicy(old, new interface{}) { } } - netPolCachedKey := util.GetNSNameWithPrefix(key) c.npMgr.Lock() - cachedNetPolObj, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] + cachedNetPolObj, netPolExists := c.npMgr.RawNpMap[netPolkey] c.npMgr.Unlock() if netPolExists { // if network policy does not have different states against lastly applied states stored in cachedNetPolObj, @@ -118,7 +118,7 @@ func (c *networkPolicyController) updateNetworkPolicy(old, new interface{}) { } } - c.workqueue.Add(key) + c.workqueue.Add(netPolkey) } func (c *networkPolicyController) deleteNetworkPolicy(obj interface{}) { @@ -142,18 +142,16 @@ func (c *networkPolicyController) deleteNetworkPolicy(obj interface{}) { } } - var key string + var netPolkey string var err error - if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { + if netPolkey, err = cache.MetaNamespaceKeyFunc(obj); err != nil { utilruntime.HandleError(err) return } - netPolCachedKey := util.GetNSNameWithPrefix(key) - // (TODO): need to decouple this lock from npMgr if possible c.npMgr.Lock() - _, netPolExists := c.npMgr.RawNpMap[netPolCachedKey] + _, netPolExists := c.npMgr.RawNpMap[netPolkey] c.npMgr.Unlock() // If a network policy object is not in the RawNpMap, do not need to clean-up states for the network policy // since netPolController did not apply for any states for the network policy @@ -161,7 +159,7 @@ func (c *networkPolicyController) deleteNetworkPolicy(obj interface{}) { return } - c.workqueue.Add(key) + c.workqueue.Add(netPolkey) } func (c *networkPolicyController) Run(threadiness int, stopCh <-chan struct{}) error { @@ -247,13 +245,13 @@ func (c *networkPolicyController) syncNetPol(key string) error { if err != nil { if errors.IsNotFound(err) { klog.Infof("Network Policy %s is not found, may be it is deleted", key) - // netPolObj is not found, but should need to check the RawNpMap cache with cachedNetPolKey. - // cleanUpNetworkPolicy method will take care of the deletion of a cached network policy if the cached network policy exists with cachedNetPolKey in our RawNpMap cache. - cachedNetPolKey := util.GetNSNameWithPrefix(key) - err = c.cleanUpNetworkPolicy(cachedNetPolKey, SafeToCleanUpAzureNpmChain) + // netPolObj is not found, but should need to check the RawNpMap cache with key. + // cleanUpNetworkPolicy method will take care of the deletion of a cached network policy if the cached network policy exists with key in our RawNpMap cache. + err = c.cleanUpNetworkPolicy(key, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncNetPol] Error: %v when network policy is not found\n", err) } + return err } return err } @@ -261,8 +259,7 @@ func (c *networkPolicyController) syncNetPol(key string) error { // If DeletionTimestamp of the netPolObj is set, start cleaning up lastly applied states. // This is early cleaning up process from updateNetPol event if netPolObj.ObjectMeta.DeletionTimestamp != nil || netPolObj.ObjectMeta.DeletionGracePeriodSeconds != nil { - cachedNetPolKey := util.GetNSNameWithPrefix(key) - err = c.cleanUpNetworkPolicy(cachedNetPolKey, SafeToCleanUpAzureNpmChain) + err = c.cleanUpNetworkPolicy(key, SafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("Error: %v when ObjectMeta.DeletionTimestamp field is set\n", err) } @@ -322,8 +319,7 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // So, avoid extra overhead to install default Azure NPM chain in initializeDefaultAzureNpmChain function. // To achieve it, use flag unSafeToCleanUpAzureNpmChain to indicate that the default Azure NPM chain cannot be deleted. // delete existing network policy - cachedNetPolKey := util.GetNSNameWithPrefix(netpolKey) - err = c.cleanUpNetworkPolicy(cachedNetPolKey, unSafeToCleanUpAzureNpmChain) + err = c.cleanUpNetworkPolicy(netpolKey, unSafeToCleanUpAzureNpmChain) if err != nil { return fmt.Errorf("[syncAddAndUpdateNetPol] Error: failed to deleteNetworkPolicy due to %s", err) } @@ -337,7 +333,8 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // Cache network object first before applying ipsets and iptables. // If error happens while applying ipsets and iptables, // the key is re-queued in workqueue and process this function again, which eventually meets desired states of network policy - c.npMgr.RawNpMap[cachedNetPolKey] = netPolObj + c.npMgr.RawNpMap[netpolKey] = netPolObj + metrics.NumPolicies.Inc() ipsMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].IpsMgr iptMgr := c.npMgr.NsMap[util.KubeAllNamespacesFlag].iptMgr @@ -374,14 +371,13 @@ func (c *networkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 } } - metrics.NumPolicies.Inc() return nil } -// DeleteNetworkPolicy handles deleting network policy based on cachedNetPolKey. -func (c *networkPolicyController) cleanUpNetworkPolicy(cachedNetPolKey string, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { - cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[cachedNetPolKey] - // if there is no applied network policy with the cachedNetPolKey, do not need to clean up process. +// DeleteNetworkPolicy handles deleting network policy based on netPolKey. +func (c *networkPolicyController) cleanUpNetworkPolicy(netPolKey string, isSafeCleanUpAzureNpmChain IsSafeCleanUpAzureNpmChain) error { + cachedNetPolObj, cachedNetPolObjExists := c.npMgr.RawNpMap[netPolKey] + // if there is no applied network policy with the netPolKey, do not need to clean up process. if !cachedNetPolObjExists { return nil } @@ -410,7 +406,7 @@ func (c *networkPolicyController) cleanUpNetworkPolicy(cachedNetPolKey string, i } // Sucess to clean up ipset and iptables operations in kernel and delete the cached network policy from RawNpMap - delete(c.npMgr.RawNpMap, cachedNetPolKey) + delete(c.npMgr.RawNpMap, netPolKey) metrics.NumPolicies.Dec() // If there is no cached network policy in RawNPMap anymore and no immediate network policy to process, start cleaning up default azure npm chains diff --git a/npm/networkPolicyController_test.go b/npm/networkPolicyController_test.go index d21ebfbdd4..74ed2afa8a 100644 --- a/npm/networkPolicyController_test.go +++ b/npm/networkPolicyController_test.go @@ -9,6 +9,8 @@ import ( "github.com/Azure/azure-container-networking/npm/ipsm" "github.com/Azure/azure-container-networking/npm/iptm" + "github.com/Azure/azure-container-networking/npm/metrics" + "github.com/Azure/azure-container-networking/npm/metrics/promutil" corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" @@ -196,6 +198,11 @@ type expectedNetPolValues struct { expectedLenOfWorkQueue int expectedIsAzureNpmChainCreated bool expectedEnqueueEventIntoWorkQueue bool + // prometheus metrics + expectedNumPoliciesMetrics int + expectedNumPoliciesMetricsError error + expectedCountOfAddPolicyExecTimeMetric int + expectedCountOfAddPolicyExecTimeMetricError error } func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expectedNetPolValues) { @@ -219,6 +226,23 @@ func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expect if got := f.isEnqueueEventIntoWorkQueue; got != test.expectedEnqueueEventIntoWorkQueue { f.t.Errorf("isEnqueueEventIntoWorkQueue %v, want %v", got, test.expectedEnqueueEventIntoWorkQueue) } + + // Check prometheus metrics + expectedNumPoliciesMetrics, expectedNumPoliciesMetricsError := promutil.GetValue(metrics.NumPolicies) + if expectedNumPoliciesMetrics != test.expectedNumPoliciesMetrics { + f.t.Errorf("NumPolicies metrics length = %d, want %d", expectedNumPoliciesMetrics, test.expectedNumPoliciesMetrics) + } + if expectedNumPoliciesMetricsError != test.expectedNumPoliciesMetricsError { + f.t.Errorf("NumPolicies metrics error = %s, want %s", expectedNumPoliciesMetricsError, test.expectedNumPoliciesMetricsError) + } + + expectedCountOfAddPolicyExecTimeMetric, expectedCountOfAddPolicyExecTimeMetricError := promutil.GetCountValue(metrics.AddPolicyExecTime) + if expectedCountOfAddPolicyExecTimeMetric != test.expectedCountOfAddPolicyExecTimeMetric { + f.t.Errorf("NumPolicies metrics length = %d, want %d", expectedCountOfAddPolicyExecTimeMetric, test.expectedCountOfAddPolicyExecTimeMetric) + } + if expectedCountOfAddPolicyExecTimeMetricError != test.expectedCountOfAddPolicyExecTimeMetricError { + f.t.Errorf("NumPolicies metrics error = %s, want %s", expectedCountOfAddPolicyExecTimeMetricError, test.expectedCountOfAddPolicyExecTimeMetricError) + } } } @@ -243,7 +267,7 @@ func TestAddMultipleNetworkPolicies(t *testing.T) { addNetPol(t, f, netPolObj2) testCases := []expectedNetPolValues{ - {1, 2, 0, true, true}, + {1, 2, 0, true, true, 2, nil, 2, nil}, } checkNetPolTestResult("TestAddMultipleNetPols", f, testCases) } @@ -260,8 +284,9 @@ func TestAddNetworkPolicy(t *testing.T) { addNetPol(t, f, netPolObj) testCases := []expectedNetPolValues{ - {1, 1, 0, true, true}, + {1, 1, 0, true, true, 1, nil, 1, nil}, } + checkNetPolTestResult("TestAddNetPol", f, testCases) } @@ -277,14 +302,13 @@ func TestDeleteNetworkPolicy(t *testing.T) { deleteNetPol(t, f, netPolObj) testCases := []expectedNetPolValues{ - {1, 0, 0, false, true}, + {1, 0, 0, false, true, 0, nil, 1, nil}, } checkNetPolTestResult("TestDelNetPol", f, testCases) } // this unit test is for the case where states of network policy are changed, but network policy controller does not need to reconcile. // Check it with expectedEnqueueEventIntoWorkQueue variable. -// (TODO): What are the fieldS which cause this case in a real world? Need to know it. func TestUpdateNetworkPolicy(t *testing.T) { oldNetPolObj := createNetPol() @@ -302,7 +326,7 @@ func TestUpdateNetworkPolicy(t *testing.T) { updateNetPol(t, f, oldNetPolObj, newNetPolObj) testCases := []expectedNetPolValues{ - {1, 1, 0, true, false}, + {1, 1, 0, true, false, 1, nil, 1, nil}, } checkNetPolTestResult("TestUpdateNetPol", f, testCases) } @@ -331,7 +355,7 @@ func TestLabelUpdateNetworkPolicy(t *testing.T) { updateNetPol(t, f, oldNetPolObj, newNetPolObj) testCases := []expectedNetPolValues{ - {1, 1, 0, true, true}, + {1, 1, 0, true, true, 1, nil, 2, nil}, } checkNetPolTestResult("TestUpdateNetPol", f, testCases) } diff --git a/npm/npm.go b/npm/npm.go index bf9f87f153..658b1402a5 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -57,8 +57,8 @@ type NetworkPolicyManager struct { NodeName string NsMap map[string]*Namespace // Key is ns- PodMap map[string]*NpmPod // Key is / - RawNpMap map[string]*networkingv1.NetworkPolicy // Key is ns-/ - ProcessedNpMap map[string]*networkingv1.NetworkPolicy // Key is ns-/ + RawNpMap map[string]*networkingv1.NetworkPolicy // Key is / + ProcessedNpMap map[string]*networkingv1.NetworkPolicy // Key is / clusterState telemetry.ClusterState version string @@ -270,7 +270,7 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in npMgr.nameSpaceController = NewNameSpaceController(nsInformer, clientset, npMgr) // create network policy controller - npMgr.netPolController = NewNetworkPolicyController(informerFactory.Networking().V1().NetworkPolicies(), clientset, npMgr) + npMgr.netPolController = NewNetworkPolicyController(npInformer, clientset, npMgr) return npMgr } From 9551acf29d129fc0cea4ccd8a96e8847bd8bbf4e Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 13 Apr 2021 13:40:33 -0700 Subject: [PATCH 18/19] minor update for varialbe names --- npm/networkPolicyController_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/npm/networkPolicyController_test.go b/npm/networkPolicyController_test.go index 74ed2afa8a..ab92386de3 100644 --- a/npm/networkPolicyController_test.go +++ b/npm/networkPolicyController_test.go @@ -199,8 +199,8 @@ type expectedNetPolValues struct { expectedIsAzureNpmChainCreated bool expectedEnqueueEventIntoWorkQueue bool // prometheus metrics - expectedNumPoliciesMetrics int - expectedNumPoliciesMetricsError error + expectedNumPoliciesMetric int + expectedNumPoliciesMetricError error expectedCountOfAddPolicyExecTimeMetric int expectedCountOfAddPolicyExecTimeMetricError error } @@ -229,11 +229,11 @@ func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expect // Check prometheus metrics expectedNumPoliciesMetrics, expectedNumPoliciesMetricsError := promutil.GetValue(metrics.NumPolicies) - if expectedNumPoliciesMetrics != test.expectedNumPoliciesMetrics { - f.t.Errorf("NumPolicies metrics length = %d, want %d", expectedNumPoliciesMetrics, test.expectedNumPoliciesMetrics) + if expectedNumPoliciesMetrics != test.expectedNumPoliciesMetric { + f.t.Errorf("NumPolicies metrics length = %d, want %d", expectedNumPoliciesMetrics, test.expectedNumPoliciesMetric) } - if expectedNumPoliciesMetricsError != test.expectedNumPoliciesMetricsError { - f.t.Errorf("NumPolicies metrics error = %s, want %s", expectedNumPoliciesMetricsError, test.expectedNumPoliciesMetricsError) + if expectedNumPoliciesMetricsError != test.expectedNumPoliciesMetricError { + f.t.Errorf("NumPolicies metrics error = %s, want %s", expectedNumPoliciesMetricsError, test.expectedNumPoliciesMetricError) } expectedCountOfAddPolicyExecTimeMetric, expectedCountOfAddPolicyExecTimeMetricError := promutil.GetCountValue(metrics.AddPolicyExecTime) From b7eb15ca0e28f90d3896194f38eea7824bbd4595 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Tue, 13 Apr 2021 15:53:30 -0700 Subject: [PATCH 19/19] remove dependency between UT by re-initializing metrics. Correct message. --- npm/metrics/prometheus_metrics.go | 5 +++++ npm/networkPolicyController_test.go | 13 +++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/npm/metrics/prometheus_metrics.go b/npm/metrics/prometheus_metrics.go index 717db27312..852b5db6bd 100644 --- a/npm/metrics/prometheus_metrics.go +++ b/npm/metrics/prometheus_metrics.go @@ -61,6 +61,11 @@ var nodeLevelRegistry = prometheus.NewRegistry() var clusterLevelRegistry = prometheus.NewRegistry() var haveInitialized = false +func ReInitializeAllMetrics() { + haveInitialized = false + InitializeAll() +} + // InitializeAll creates all the Prometheus Metrics. The metrics will be nil before this method is called. func InitializeAll() { if !haveInitialized { diff --git a/npm/networkPolicyController_test.go b/npm/networkPolicyController_test.go index ab92386de3..a1904e7dee 100644 --- a/npm/networkPolicyController_test.go +++ b/npm/networkPolicyController_test.go @@ -57,6 +57,11 @@ func newNetPolFixture(t *testing.T) *netPolFixture { } f.npMgr.RawNpMap = make(map[string]*networkingv1.NetworkPolicy) + + // While running "make test-all", metrics hold states which was executed in previous unit test. + // (TODO): Need to fix to remove this fundamental dependency + metrics.ReInitializeAllMetrics() + return f } @@ -230,18 +235,18 @@ func checkNetPolTestResult(testName string, f *netPolFixture, testCases []expect // Check prometheus metrics expectedNumPoliciesMetrics, expectedNumPoliciesMetricsError := promutil.GetValue(metrics.NumPolicies) if expectedNumPoliciesMetrics != test.expectedNumPoliciesMetric { - f.t.Errorf("NumPolicies metrics length = %d, want %d", expectedNumPoliciesMetrics, test.expectedNumPoliciesMetric) + f.t.Errorf("NumPolicies metric length = %d, want %d", expectedNumPoliciesMetrics, test.expectedNumPoliciesMetric) } if expectedNumPoliciesMetricsError != test.expectedNumPoliciesMetricError { - f.t.Errorf("NumPolicies metrics error = %s, want %s", expectedNumPoliciesMetricsError, test.expectedNumPoliciesMetricError) + f.t.Errorf("NumPolicies metric error = %s, want %s", expectedNumPoliciesMetricsError, test.expectedNumPoliciesMetricError) } expectedCountOfAddPolicyExecTimeMetric, expectedCountOfAddPolicyExecTimeMetricError := promutil.GetCountValue(metrics.AddPolicyExecTime) if expectedCountOfAddPolicyExecTimeMetric != test.expectedCountOfAddPolicyExecTimeMetric { - f.t.Errorf("NumPolicies metrics length = %d, want %d", expectedCountOfAddPolicyExecTimeMetric, test.expectedCountOfAddPolicyExecTimeMetric) + f.t.Errorf("CountOfAddPolicyExecTime metric length = %d, want %d", expectedCountOfAddPolicyExecTimeMetric, test.expectedCountOfAddPolicyExecTimeMetric) } if expectedCountOfAddPolicyExecTimeMetricError != test.expectedCountOfAddPolicyExecTimeMetricError { - f.t.Errorf("NumPolicies metrics error = %s, want %s", expectedCountOfAddPolicyExecTimeMetricError, test.expectedCountOfAddPolicyExecTimeMetricError) + f.t.Errorf("CountOfAddPolicyExecTime metric error = %s, want %s", expectedCountOfAddPolicyExecTimeMetricError, test.expectedCountOfAddPolicyExecTimeMetricError) } } }