From 76204487a6acc857eaaba477bb8204385c1255c2 Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 30 Aug 2022 10:23:22 -0700 Subject: [PATCH 1/6] [NPM][Fix] Adding redundant check to ignore endpoint not found while removing policies --- network/hnswrapper/hnsv2wrapperfake.go | 2 +- .../policies/policymanager_windows.go | 3 ++- .../policies/policymanager_windows_test.go | 27 +++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/network/hnswrapper/hnsv2wrapperfake.go b/network/hnswrapper/hnsv2wrapperfake.go index 4f59aa5aec..7d59d248d6 100644 --- a/network/hnswrapper/hnsv2wrapperfake.go +++ b/network/hnswrapper/hnsv2wrapperfake.go @@ -203,7 +203,7 @@ func (f Hnsv2wrapperFake) GetEndpointByID(endpointID string) (*hcn.HostComputeEn if ep, ok := f.Cache.endpoints[endpointID]; ok { return ep.GetHCNObj(), nil } - return &hcn.HostComputeEndpoint{}, nil + return &hcn.HostComputeEndpoint{}, hcn.EndpointNotFoundError{EndpointID: endpointID} } func (f Hnsv2wrapperFake) CreateEndpoint(endpoint *hcn.HostComputeEndpoint) (*hcn.HostComputeEndpoint, error) { diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 9b12ce56ee..5a51ae394c 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -174,7 +174,8 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRulesToRemove int, resetAllACL shouldResetAllACLs) error { epObj, err := pMgr.ioShim.Hns.GetEndpointByID(epID) if err != nil { - if isNotFoundErr(err) { + // IsNotFound check is being skipped at times. So adding a redundant check here. + if isNotFoundErr(err) || strings.Contains(err.Error(), "endpoint was not found") { klog.Infof("[PolicyManagerWindows] ignoring remove policy on endpoint since the endpoint wasn't found. the corresponding pod was most likely deleted. policy: %s, endpoint: %s", ruleID, epID) return nil } diff --git a/npm/pkg/dataplane/policies/policymanager_windows_test.go b/npm/pkg/dataplane/policies/policymanager_windows_test.go index 2d32bcabf6..0f279c6727 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows_test.go +++ b/npm/pkg/dataplane/policies/policymanager_windows_test.go @@ -129,6 +129,33 @@ func TestRemovePolicies(t *testing.T) { verifyACLCacheIsCleaned(t, hns, len(endPointIDList)) } +func TestRemovePoliciesEndpointNotFound(t *testing.T) { + pMgr, hns := getPMgr(t) + err := pMgr.AddPolicy(TestNetworkPolicies[0], endPointIDList) + require.NoError(t, err) + + aclID := TestNetworkPolicies[0].ACLPolicyID + + _, err = hns.Cache.ACLPolicies(endPointIDList, aclID) + require.NoError(t, err) + /* + for _, id := range endPointIDList { + acls, ok := aclPolicies[id] + if !ok { + t.Errorf("Expected %s to be in ACLs", id) + } + verifyFakeHNSCacheACLs(t, expectedACLs, acls) + } + */ + + testendPointIDList := map[string]string{ + "10.0.0.5": "test10", + } + err = pMgr.RemovePolicy(TestNetworkPolicies[0].PolicyKey, testendPointIDList) + require.NoError(t, err, err) + verifyACLCacheIsCleaned(t, hns, len(endPointIDList)) +} + // Helper functions for UTS func getPMgr(t *testing.T) (*PolicyManager, *hnswrapper.Hnsv2wrapperFake) { From f7e2f98bc033af9d370881e469eaf61a5268bf1d Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 30 Aug 2022 10:33:47 -0700 Subject: [PATCH 2/6] adding error in logline --- npm/pkg/dataplane/policies/policymanager_windows.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 5a51ae394c..97cd3f6d61 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -176,7 +176,7 @@ func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRul if err != nil { // IsNotFound check is being skipped at times. So adding a redundant check here. if isNotFoundErr(err) || strings.Contains(err.Error(), "endpoint was not found") { - klog.Infof("[PolicyManagerWindows] ignoring remove policy on endpoint since the endpoint wasn't found. the corresponding pod was most likely deleted. policy: %s, endpoint: %s", ruleID, epID) + klog.Infof("[PolicyManagerWindows] ignoring remove policy since the endpoint wasn't found. the corresponding pod might be deleted. policy: %s, endpoint: %s, err: %s", ruleID, epID, err.Error()) return nil } return fmt.Errorf("[PolicyManagerWindows] failed to remove policy while getting the endpoint. policy: %s, endpoint: %s, err: %w", ruleID, epID, err) From 0fccf7ce4f277b3189a2da2e86176315beec2ca9 Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 30 Aug 2022 10:39:18 -0700 Subject: [PATCH 3/6] adding error in logline --- npm/pkg/dataplane/policies/policymanager_windows.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 97cd3f6d61..1b68604d82 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -222,9 +222,10 @@ func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRul func (pMgr *PolicyManager) applyPoliciesToEndpointID(epID string, policies hcn.PolicyEndpointRequest) error { epObj, err := pMgr.ioShim.Hns.GetEndpointByID(epID) if err != nil { - if isNotFoundErr(err) { + // IsNotFound check is being skipped at times. So adding a redundant check here. + if isNotFoundErr(err) || strings.Contains(err.Error(), "endpoint was not found") { // unlikely scenario where an endpoint is deleted right after we refresh HNS endpoints, or an unlikely scenario where an endpoint is deleted right after we refresh HNS endpoints - metrics.SendErrorLogAndMetric(util.IptmID, "[PolicyManagerWindows] ignoring apply policies to endpoint since the endpoint wasn't found. endpoint: %s", epID) + metrics.SendErrorLogAndMetric(util.IptmID, "[PolicyManagerWindows] ignoring apply policies to endpoint since the endpoint wasn't found. endpoint: %s, err: %s", epID, err.Error()) return nil } return fmt.Errorf("[PolicyManagerWindows] to apply policies while getting the endpoint. endpoint: %s, err: %w", epID, err) From b51f98d41fe2c57f2307bf8077aac053367147dd Mon Sep 17 00:00:00 2001 From: vakr Date: Tue, 30 Aug 2022 16:02:50 -0700 Subject: [PATCH 4/6] making sure applyDP error is handled correctly --- npm/pkg/controlplane/controllers/v2/namespaceController.go | 2 +- npm/pkg/controlplane/controllers/v2/podController.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/namespaceController.go b/npm/pkg/controlplane/controllers/v2/namespaceController.go index d36d3a0b1b..cc2e0ab50e 100644 --- a/npm/pkg/controlplane/controllers/v2/namespaceController.go +++ b/npm/pkg/controlplane/controllers/v2/namespaceController.go @@ -221,7 +221,7 @@ func (nsc *NamespaceController) processNextWorkItem() bool { } // syncNamespace compares the actual state with the desired, and attempts to converge the two. -func (nsc *NamespaceController) syncNamespace(nsKey string) error { +func (nsc *NamespaceController) syncNamespace(nsKey string) (err error) { // timer for recording execution times timer := metrics.StartNewTimer() diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index b9a84e0b7b..72be2a399f 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -243,7 +243,7 @@ func (c *PodController) processNextWorkItem() bool { } // syncPod compares the actual state with the desired, and attempts to converge the two. -func (c *PodController) syncPod(key string) error { +func (c *PodController) syncPod(key string) (err error) { // timer for recording execution times timer := metrics.StartNewTimer() From 9d6c1ba9527a84ae9f2cd1b1bb4fc9b1e9e74fe7 Mon Sep 17 00:00:00 2001 From: vakr Date: Wed, 31 Aug 2022 11:27:57 -0700 Subject: [PATCH 5/6] revert changes in controllers, covered in a different PR --- npm/pkg/controlplane/controllers/v2/namespaceController.go | 2 +- npm/pkg/controlplane/controllers/v2/podController.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/namespaceController.go b/npm/pkg/controlplane/controllers/v2/namespaceController.go index cc2e0ab50e..d36d3a0b1b 100644 --- a/npm/pkg/controlplane/controllers/v2/namespaceController.go +++ b/npm/pkg/controlplane/controllers/v2/namespaceController.go @@ -221,7 +221,7 @@ func (nsc *NamespaceController) processNextWorkItem() bool { } // syncNamespace compares the actual state with the desired, and attempts to converge the two. -func (nsc *NamespaceController) syncNamespace(nsKey string) (err error) { +func (nsc *NamespaceController) syncNamespace(nsKey string) error { // timer for recording execution times timer := metrics.StartNewTimer() diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index 72be2a399f..b9a84e0b7b 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -243,7 +243,7 @@ func (c *PodController) processNextWorkItem() bool { } // syncPod compares the actual state with the desired, and attempts to converge the two. -func (c *PodController) syncPod(key string) (err error) { +func (c *PodController) syncPod(key string) error { // timer for recording execution times timer := metrics.StartNewTimer() From da66e4c83612b86d2130da7d307a189ac0850eb9 Mon Sep 17 00:00:00 2001 From: vakr Date: Wed, 31 Aug 2022 14:19:19 -0700 Subject: [PATCH 6/6] adding UTs --- .../policies/policymanager_windows_test.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/npm/pkg/dataplane/policies/policymanager_windows_test.go b/npm/pkg/dataplane/policies/policymanager_windows_test.go index 0f279c6727..553c2d4317 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows_test.go +++ b/npm/pkg/dataplane/policies/policymanager_windows_test.go @@ -129,6 +129,15 @@ func TestRemovePolicies(t *testing.T) { verifyACLCacheIsCleaned(t, hns, len(endPointIDList)) } +func TestApplyPoliciesEndpointNotFound(t *testing.T) { + pMgr, _ := getPMgr(t) + testendPointIDList := map[string]string{ + "10.0.0.5": "test10", + } + err := pMgr.AddPolicy(TestNetworkPolicies[0], testendPointIDList) + require.NoError(t, err) +} + func TestRemovePoliciesEndpointNotFound(t *testing.T) { pMgr, hns := getPMgr(t) err := pMgr.AddPolicy(TestNetworkPolicies[0], endPointIDList) @@ -138,16 +147,6 @@ func TestRemovePoliciesEndpointNotFound(t *testing.T) { _, err = hns.Cache.ACLPolicies(endPointIDList, aclID) require.NoError(t, err) - /* - for _, id := range endPointIDList { - acls, ok := aclPolicies[id] - if !ok { - t.Errorf("Expected %s to be in ACLs", id) - } - verifyFakeHNSCacheACLs(t, expectedACLs, acls) - } - */ - testendPointIDList := map[string]string{ "10.0.0.5": "test10", }