diff --git a/network/hnswrapper/hnsv2wrapperfake.go b/network/hnswrapper/hnsv2wrapperfake.go index 4f59aa5aec..7d59d248d6 100644 --- a/network/hnswrapper/hnsv2wrapperfake.go +++ b/network/hnswrapper/hnsv2wrapperfake.go @@ -203,7 +203,7 @@ func (f Hnsv2wrapperFake) GetEndpointByID(endpointID string) (*hcn.HostComputeEn if ep, ok := f.Cache.endpoints[endpointID]; ok { return ep.GetHCNObj(), nil } - return &hcn.HostComputeEndpoint{}, nil + return &hcn.HostComputeEndpoint{}, hcn.EndpointNotFoundError{EndpointID: endpointID} } func (f Hnsv2wrapperFake) CreateEndpoint(endpoint *hcn.HostComputeEndpoint) (*hcn.HostComputeEndpoint, error) { diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 9b12ce56ee..1b68604d82 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -174,8 +174,9 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRulesToRemove int, resetAllACL shouldResetAllACLs) error { epObj, err := pMgr.ioShim.Hns.GetEndpointByID(epID) if err != nil { - if isNotFoundErr(err) { - klog.Infof("[PolicyManagerWindows] ignoring remove policy on endpoint since the endpoint wasn't found. the corresponding pod was most likely deleted. policy: %s, endpoint: %s", ruleID, epID) + // IsNotFound check is being skipped at times. So adding a redundant check here. + if isNotFoundErr(err) || strings.Contains(err.Error(), "endpoint was not found") { + klog.Infof("[PolicyManagerWindows] ignoring remove policy since the endpoint wasn't found. the corresponding pod might be deleted. policy: %s, endpoint: %s, err: %s", ruleID, epID, err.Error()) return nil } return fmt.Errorf("[PolicyManagerWindows] failed to remove policy while getting the endpoint. policy: %s, endpoint: %s, err: %w", ruleID, epID, err) @@ -221,9 +222,10 @@ func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRul func (pMgr *PolicyManager) applyPoliciesToEndpointID(epID string, policies hcn.PolicyEndpointRequest) error { epObj, err := pMgr.ioShim.Hns.GetEndpointByID(epID) if err != nil { - if isNotFoundErr(err) { + // IsNotFound check is being skipped at times. So adding a redundant check here. + if isNotFoundErr(err) || strings.Contains(err.Error(), "endpoint was not found") { // unlikely scenario where an endpoint is deleted right after we refresh HNS endpoints, or an unlikely scenario where an endpoint is deleted right after we refresh HNS endpoints - metrics.SendErrorLogAndMetric(util.IptmID, "[PolicyManagerWindows] ignoring apply policies to endpoint since the endpoint wasn't found. endpoint: %s", epID) + metrics.SendErrorLogAndMetric(util.IptmID, "[PolicyManagerWindows] ignoring apply policies to endpoint since the endpoint wasn't found. endpoint: %s, err: %s", epID, err.Error()) return nil } return fmt.Errorf("[PolicyManagerWindows] to apply policies while getting the endpoint. endpoint: %s, err: %w", epID, err) diff --git a/npm/pkg/dataplane/policies/policymanager_windows_test.go b/npm/pkg/dataplane/policies/policymanager_windows_test.go index 2d32bcabf6..553c2d4317 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows_test.go +++ b/npm/pkg/dataplane/policies/policymanager_windows_test.go @@ -129,6 +129,32 @@ func TestRemovePolicies(t *testing.T) { verifyACLCacheIsCleaned(t, hns, len(endPointIDList)) } +func TestApplyPoliciesEndpointNotFound(t *testing.T) { + pMgr, _ := getPMgr(t) + testendPointIDList := map[string]string{ + "10.0.0.5": "test10", + } + err := pMgr.AddPolicy(TestNetworkPolicies[0], testendPointIDList) + require.NoError(t, err) +} + +func TestRemovePoliciesEndpointNotFound(t *testing.T) { + pMgr, hns := getPMgr(t) + err := pMgr.AddPolicy(TestNetworkPolicies[0], endPointIDList) + require.NoError(t, err) + + aclID := TestNetworkPolicies[0].ACLPolicyID + + _, err = hns.Cache.ACLPolicies(endPointIDList, aclID) + require.NoError(t, err) + testendPointIDList := map[string]string{ + "10.0.0.5": "test10", + } + err = pMgr.RemovePolicy(TestNetworkPolicies[0].PolicyKey, testendPointIDList) + require.NoError(t, err, err) + verifyACLCacheIsCleaned(t, hns, len(endPointIDList)) +} + // Helper functions for UTS func getPMgr(t *testing.T) (*PolicyManager, *hnswrapper.Hnsv2wrapperFake) {