diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 6aae19e33f..e403eb8dd1 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -15,6 +15,7 @@ const ( podCrudTag Tag = "pod-crud" nsCrudTag Tag = "namespace-crud" netpolCrudTag Tag = "netpol-crud" + reconcileTag Tag = "reconcile" ) const ( @@ -155,6 +156,7 @@ func getAllSerialTests() []*SerialTestCase { TestCaseMetadata: &TestCaseMetadata{ Tags: []Tag{ podCrudTag, + reconcileTag, }, DpCfg: defaultWindowsDPCfg, InitialEndpoints: nil, @@ -482,6 +484,33 @@ func getAllSerialTests() []*SerialTestCase { }, }, }, + { + Description: "issue 1613: remove last instance of label, then reconcile IPSets, then apply DP", + Actions: []*Action{ + CreateEndpoint(endpoint1, ip1), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePodLabels("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}, nil), + ReconcileDP(), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + reconcileTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: nil, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, } } diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index 8227da2abf..1d5a4ccdae 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -217,7 +217,8 @@ func (dp *DataPlane) ApplyDataPlane() error { err := dp.refreshPodEndpoints() if err != nil { metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "[DataPlane] failed to refresh endpoints while updating pods. err: [%s]", err.Error()) - return fmt.Errorf("[DataPlane] failed to refresh endpoints while updating pods. err: [%w]", err) + // return as success since this can be retried irrespective of other operations + return nil } // lock updatePodCache while driving goal state to kernel @@ -225,23 +226,16 @@ func (dp *DataPlane) ApplyDataPlane() error { dp.updatePodCache.Lock() defer dp.updatePodCache.Unlock() - var aggregateErr error for podKey, pod := range dp.updatePodCache.cache { err := dp.updatePod(pod) if err != nil { - if aggregateErr == nil { - aggregateErr = fmt.Errorf("failed to update pod while applying the dataplane. key: [%s], err: [%w]", podKey, err) - } else { - aggregateErr = fmt.Errorf("failed to update pod while applying the dataplane. key: [%s], err: [%s]. previous err: [%w]", podKey, err.Error(), aggregateErr) - } + // move on to the next and later return as success since this can be retried irrespective of other operations metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update pod while applying the dataplane. key: [%s], err: [%s]", podKey, err.Error()) continue } + delete(dp.updatePodCache.cache, podKey) } - if aggregateErr != nil { - return fmt.Errorf("[DataPlane] error while updating pods: %w", aggregateErr) - } } return nil } @@ -393,17 +387,18 @@ func (dp *DataPlane) createIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, n } func (dp *DataPlane) deleteIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, netpolName string, referenceType ipsets.ReferenceType) error { + for _, set := range sets { + prefixName := set.Metadata.GetPrefixName() + if err := dp.ipsetMgr.DeleteReference(prefixName, netpolName, referenceType); err != nil { + // with current implementation of DeleteReference(), err will be ipsets.ErrSetDoesNotExist + klog.Infof("[DataPlane] ignoring delete reference on non-existent set. ipset: %s. netpol: %s. referenceType: %s", prefixName, netpolName, referenceType) + } + } + npmErrorString := npmerrors.DeleteSelectorReference if referenceType == ipsets.NetPolType { npmErrorString = npmerrors.DeleteNetPolReference } - for _, set := range sets { - // TODO ignore set does not exist error - err := dp.ipsetMgr.DeleteReference(set.Metadata.GetPrefixName(), netpolName, referenceType) - if err != nil { - return npmerrors.Errorf(npmErrorString, false, fmt.Sprintf("[DataPlane] failed to deleteIPSetReferences with err: %s", err.Error())) - } - } // Check if any list sets are provided with members to delete // NOTE: every translated member will be deleted, even if the member is part of the same set in another policy diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 856f96065e..60552a3876 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -167,7 +167,9 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error { */ selectorReference, err := dp.ipsetMgr.GetSelectorReferencesBySet(setName) if err != nil { - return err + // ignore this set since it may have been deleted in the background reconcile thread + klog.Infof("[DataPlane] ignoring pod update for ipset to remove since the set does not exist. pod: %+v. set: %s", pod, setName) + continue } for policyKey := range selectorReference { @@ -204,7 +206,9 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error { */ selectorReference, err := dp.ipsetMgr.GetSelectorReferencesBySet(setName) if err != nil { - return err + // ignore this set since it may have been deleted in the background reconcile thread + klog.Infof("[DataPlane] ignoring pod update for ipset to remove since the set does not exist. pod: %+v. set: %s", pod, setName) + continue } for policyKey := range selectorReference {