From 5b2169dfd51af1e35e20b4347d0c77f69ea3e915 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 15 Feb 2023 15:54:37 -0800 Subject: [PATCH 01/15] reset acls for pods marked for delete --- .../controllers/v2/podController.go | 18 ++++-- .../controllers/v2/podController_test.go | 35 +++++++----- npm/pkg/dataplane/dataplane.go | 29 ++++++++-- npm/pkg/dataplane/dataplane_windows.go | 57 ++++++++++++------- npm/pkg/dataplane/policies/policymanager.go | 7 +++ npm/pkg/dataplane/types.go | 37 +++++++++--- 6 files changed, 127 insertions(+), 56 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index f7d92119fa..9a35f2407c 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -30,8 +30,9 @@ import ( type NamedPortOperation string const ( - deleteNamedPort NamedPortOperation = "del" - addNamedPort NamedPortOperation = "add" + deleteNamedPort NamedPortOperation = "del" + addNamedPort NamedPortOperation = "add" + deletePodAndNamedPort NamedPortOperation = "del-pod-and-namedport" addEvent string = "ADD" updateEvent string = "UPDATE" @@ -542,9 +543,8 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { } var err error - cachedPodMetadata := dataplane.NewPodMetadata(cachedNpmPodKey, cachedNpmPod.PodIP, "") + cachedPodMetadata := dataplane.NewPodMetadataMarkedForDelete(cachedNpmPodKey, cachedNpmPod.PodIP) // Delete the pod from its namespace's ipset. - // note: NodeName empty is not going to call update pod if err = c.dp.RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(cachedNpmPod.Namespace, ipsets.Namespace)}, cachedPodMetadata); err != nil { @@ -568,7 +568,7 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { // Delete pod's named ports from its ipset. Need to pass true in the manageNamedPortIpsets function call if err = c.manageNamedPortIpsets( - cachedNpmPod.ContainerPorts, cachedNpmPodKey, cachedNpmPod.PodIP, "", deleteNamedPort); err != nil { + cachedNpmPod.ContainerPorts, cachedNpmPodKey, cachedNpmPod.PodIP, "", deletePodAndNamedPort); err != nil { return fmt.Errorf("[cleanUpDeletedPod] Error: failed to delete pod from named port ipset with err: %w", err) } @@ -601,16 +601,22 @@ func (c *PodController) manageNamedPortIpsets(portList []corev1.ContainerPort, p namedPortIpsetEntry := fmt.Sprintf("%s,%s%d", podIP, protocol, port.ContainerPort) // nodename in NewPodMetadata is nil so UpdatePod is ignored - podMetadata := dataplane.NewPodMetadata(podKey, namedPortIpsetEntry, nodeName) switch namedPortOperation { case deleteNamedPort: + podMetadata := dataplane.NewPodMetadata(podKey, namedPortIpsetEntry, nodeName) if err := c.dp.RemoveFromSets([]*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(port.Name, ipsets.NamedPorts)}, podMetadata); err != nil { return fmt.Errorf("failed to remove from set when deleting named port with err %w", err) } case addNamedPort: + podMetadata := dataplane.NewPodMetadata(podKey, namedPortIpsetEntry, nodeName) if err := c.dp.AddToSets([]*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(port.Name, ipsets.NamedPorts)}, podMetadata); err != nil { return fmt.Errorf("failed to add to set when deleting named port with err %w", err) } + case deletePodAndNamedPort: + podMetadata := dataplane.NewPodMetadataMarkedForDelete(podKey, namedPortIpsetEntry) + if err := c.dp.RemoveFromSets([]*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(port.Name, ipsets.NamedPorts)}, podMetadata); err != nil { + return fmt.Errorf("failed to remove from set when deleting pod and named port with err %w", err) + } } } diff --git a/npm/pkg/controlplane/controllers/v2/podController_test.go b/npm/pkg/controlplane/controllers/v2/podController_test.go index d1ae9c5c02..824040529e 100644 --- a/npm/pkg/controlplane/controllers/v2/podController_test.go +++ b/npm/pkg/controlplane/controllers/v2/podController_test.go @@ -429,13 +429,14 @@ func TestDeletePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) + deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), + dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -545,13 +546,14 @@ func TestDeletePodWithTombstoneAfterAddingPod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) + deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), + dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -664,13 +666,14 @@ func TestEmptyIPUpdate(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) + deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), + dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -757,13 +760,14 @@ func TestIPAddressUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) + deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), + dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -831,13 +835,14 @@ func TestPodStatusUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) + deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), + dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index 71218aaa4b..32d6d24b46 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -135,7 +135,12 @@ func (dp *DataPlane) AddToSets(setNames []*ipsets.IPSetMetadata, podMetadata *Po return fmt.Errorf("[DataPlane] error while adding to set: %w", err) } - if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { + if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.isMarkedForDelete()) { + if podMetadata.isMarkedForDelete() { + metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "[DataPlane] pod key %s is unexpectedly marked for delete in AddToSets", podMetadata.PodKey) + return nil + } + klog.Infof("[DataPlane] Updating Sets to Add for pod key %s", podMetadata.PodKey) // lock updatePodCache while reading/modifying or setting the updatePod in the cache @@ -163,8 +168,12 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat return fmt.Errorf("[DataPlane] error while removing from set: %w", err) } - if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { - klog.Infof("[DataPlane] Updating Sets to Remove for pod key %s", podMetadata.PodKey) + if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.isMarkedForDelete()) { + if podMetadata.isMarkedForDelete() { + klog.Infof("[DataPlane] pod key %s marked for delete in RemoveFromSets", podMetadata.PodKey) + } else { + klog.Infof("[DataPlane] Updating Sets to Remove for pod key %s", podMetadata.PodKey) + } // lock updatePodCache while reading/modifying or setting the updatePod in the cache dp.updatePodCache.Lock() @@ -177,7 +186,19 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat dp.updatePodCache.cache[podMetadata.PodKey] = updatePod } - updatePod.updateIPSetsToRemove(setNames) + if podMetadata.isMarkedForDelete() { + // mark IP for delete + if updatePod.ipsMarkedForDelete == nil { + updatePod.ipsMarkedForDelete = make(map[string]struct{}, 1) + } + updatePod.ipsMarkedForDelete[podMetadata.PodIP] = struct{}{} + + // discard all previous IPSet updates, and disregard all IPSet updates for this pod marked for delete + updatePod.IPSetsToAdd = nil + updatePod.IPSetsToRemove = nil + } else { + updatePod.updateIPSetsToRemove(setNames) + } } return nil diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 6c2c8d9ad6..da4bb006d8 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -113,41 +113,56 @@ func (dp *DataPlane) shouldUpdatePod() bool { // updatePod has two responsibilities in windows // 1. Will call into dataplane and updates endpoint references of this pod. // 2. Will check for existing applicable network policies and applies it on endpoint. -/* - FIXME: see https://github.com/Azure/azure-container-networking/issues/1729 - TODO: it would be good to replace stalePodKey behavior since it is complex. -*/ +// Assumptions: +// - We will receive a cleanup event for a Pod in the correct order (i.e. after a create event for the same Pod) func (dp *DataPlane) updatePod(pod *updateNPMPod) error { klog.Infof("[DataPlane] updatePod called for Pod Key %s", pod.PodKey) - if pod.NodeName != dp.nodeName { - // Ignore updates if the pod is not part of this node. - klog.Infof("[DataPlane] ignoring update pod as expected Node: [%s] got: [%s]. pod: [%s]", dp.nodeName, pod.NodeName, pod.PodKey) - return nil - } // lock the endpoint cache while we read/modify the endpoint with the pod's IP dp.endpointCache.Lock() defer dp.endpointCache.Unlock() + // reset ACLs for any Pod that was wrongly assigned to an endpoint (fixes #1729) + for ip := range pod.ipsMarkedForDelete { + endpoint, ok := dp.endpointCache.cache[ip] + if !ok { + // this branch will be taken often for the typical flow. 1) Endpoint deleted, 2) Pod delete event + // additionally, this branch will be always be taken for an IP on a different node + klog.Infof("[DataPlane] ignoring IP marked for delete since there is no corresponding endpoint. IP: %s. podKey: %s", ip, pod.PodKey) + continue + } + + if endpoint.podKey != pod.PodKey { + klog.Infof("[DataPlane] ignoring IP marked for delete since it is not associated with this endpoint. podKey: %s. endpoint: %+v", pod.PodKey, endpoint) + continue + } + + // this Pod was incorrectly assigned to the Endpoint (see issue #1729) + if err := dp.policyMgr.ResetEndpoint(endpoint.id); err != nil { + return fmt.Errorf("failed to reset endpoint. endpoint: %+v. err: %w", endpoint, err) + } + + // mark the Endpoint as unassigned + endpoint.podKey = unspecifiedPodKey + delete(pod.ipsMarkedForDelete, ip) + } + + if len(pod.IPSetsToAdd) == 0 && len(pod.IPSetsToRemove) == 0 { + // nothing more to do + return nil + } + // Check if pod is already present in cache endpoint, ok := dp.endpointCache.cache[pod.PodIP] if !ok { // ignore this err and pod endpoint will be deleted in ApplyDP // if the endpoint is not found, it means the pod is not part of this node or pod got deleted. - klog.Warningf("[DataPlane] did not find endpoint with IPaddress %s for pod %s", pod.PodIP, pod.PodKey) + klog.Warningf("[DataPlane] ignoring pod update since there is no corresponding endpoint. IP: %s. podKey: %s", pod.PodIP, pod.PodKey) return nil } if endpoint.podKey == unspecifiedPodKey { // while refreshing pod endpoints, newly discovered endpoints are given an unspecified pod key - if endpoint.isStalePodKey(pod.PodKey) { - // NOTE: if a pod restarts and takes up its previous IP, then its endpoint would be new and this branch would be taken. - // Updates to this pod would not occur. Pod IPs are expected to change on restart though. - // See: https://stackoverflow.com/questions/52362514/when-will-the-kubernetes-pod-ip-change - // If a pod does restart and take up its previous IP, then the pod can be deleted/restarted to mitigate this problem. - klog.Infof("[DataPlane] ignoring pod update since pod with key %s is stale and likely was deleted", pod.PodKey) - return nil - } endpoint.podKey = pod.PodKey } else if pod.PodKey != endpoint.podKey { return fmt.Errorf("pod key mismatch. Expected: %s, Actual: %s. Error: [%w]", pod.PodKey, endpoint.podKey, errMismanagedPodKey) @@ -286,15 +301,13 @@ func (dp *DataPlane) getEndpointsToApplyPolicy(policy *policies.NPMNetworkPolicy for ip, podKey := range netpolSelectorIPs { endpoint, ok := dp.endpointCache.cache[ip] if !ok { - klog.Infof("[DataPlane] Ignoring endpoint with IP %s since it was not found in the endpoint cache. This IP might not be in the HNS network", ip) + klog.Infof("[DataPlane] ignoring selector IP since it was not found in the endpoint cache and might not be in the HNS network. ip: %s. podKey: %s", ip, podKey) continue } if endpoint.podKey != podKey { // in case the pod controller hasn't updated the dp yet that the IP's pod owner has changed - klog.Infof( - "[DataPlane] ignoring endpoint with IP %s since the pod keys are different. podKey: [%s], endpoint: [%+v], endpoint stale pod key: [%+v]", - ip, podKey, endpoint, endpoint.stalePodKey) + klog.Infof("[DataPlane] ignoring selector IP since the endpoint is assigned to a different podKey. ip: %s. podKey: %s. endpoint: %+v", ip, podKey, endpoint) continue } diff --git a/npm/pkg/dataplane/policies/policymanager.go b/npm/pkg/dataplane/policies/policymanager.go index 775bfd44ac..152a76c9c0 100644 --- a/npm/pkg/dataplane/policies/policymanager.go +++ b/npm/pkg/dataplane/policies/policymanager.go @@ -73,6 +73,13 @@ func NewPolicyManager(ioShim *common.IOShim, cfg *PolicyManagerCfg) *PolicyManag } } +func (pMgr *PolicyManager) ResetEndpoint(epID string) error { + if util.IsWindowsDP() { + return pMgr.bootup([]string{epID}) + } + return nil +} + func (pMgr *PolicyManager) Bootup(epIDs []string) error { metrics.ResetNumACLRules() if err := pMgr.bootup(epIDs); err != nil { diff --git a/npm/pkg/dataplane/types.go b/npm/pkg/dataplane/types.go index d8fd26afc3..26cdc5102f 100644 --- a/npm/pkg/dataplane/types.go +++ b/npm/pkg/dataplane/types.go @@ -31,20 +31,27 @@ type GenericDataplane interface { // to update the dataplane with the latest pod information // this helps in calculating if any update needs to have policies applied or removed type updateNPMPod struct { - *PodMetadata - IPSetsToAdd []string - IPSetsToRemove []string + PodKey string + PodIP string + NodeName string + // ipsMarkedForDelete tracks IPs that this pod key was associated with before + // it would rare for this to be more than one IP, but possible if ApplyDataPlane fails several times or ApplyDataPlane is asynchronous + ipsMarkedForDelete map[string]struct{} + IPSetsToAdd []string + IPSetsToRemove []string } // PodMetadata is what is passed to dataplane to specify pod ipset // todo definitely requires further optimization between the intersection // of types, PodMetadata, NpmPod and corev1.pod type PodMetadata struct { - PodKey string - PodIP string - NodeName string + PodKey string + PodIP string + NodeName string + markedForDelete bool } +// NewPodMetadata is for Pods that were created or have updated labels/namedports func NewPodMetadata(podKey, podIP, nodeName string) *PodMetadata { return &PodMetadata{ PodKey: podKey, @@ -53,15 +60,27 @@ func NewPodMetadata(podKey, podIP, nodeName string) *PodMetadata { } } +// NewPodMetadataMarkedForDelete is for Pods that were deleted (e.g. Pod IP has changed) +func NewPodMetadataMarkedForDelete(podKey, podIP string) *PodMetadata { + pm := NewPodMetadata(podKey, podIP, "") + pm.markedForDelete = true + return pm +} + +func (pm *PodMetadata) isMarkedForDelete() bool { + return pm.markedForDelete +} + func (p *PodMetadata) Namespace() string { return strings.Split(p.PodKey, "/")[0] } func newUpdateNPMPod(podMetadata *PodMetadata) *updateNPMPod { return &updateNPMPod{ - PodMetadata: podMetadata, - IPSetsToAdd: make([]string, 0), - IPSetsToRemove: make([]string, 0), + PodKey: podMetadata.PodKey, + PodIP: podMetadata.PodIP, + NodeName: podMetadata.NodeName, + // can leave all slices as nil since len() and append() work on nil slices } } From d3080ddcb8fd8102b32d98c5552dc6297b864001 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 15 Feb 2023 16:22:31 -0800 Subject: [PATCH 02/15] simplify: remove stalePodKey --- npm/pkg/dataplane/dataplane_windows.go | 33 ++++---------------------- npm/pkg/dataplane/types_windows.go | 17 +------------ 2 files changed, 5 insertions(+), 45 deletions(-) diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index da4bb006d8..ca6f349f8c 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -365,7 +365,6 @@ func (dp *DataPlane) refreshPodEndpoints() error { dp.endpointCache.Lock() defer dp.endpointCache.Unlock() - currentTime := time.Now().Unix() existingIPs := make(map[string]struct{}) for _, endpoint := range endpoints { if len(endpoint.IpConfigurations) == 0 { @@ -392,40 +391,16 @@ func (dp *DataPlane) refreshPodEndpoints() error { // throw away old endpoints that have the same IP as a current endpoint (the old endpoint is getting deleted) // we don't have to worry about cleaning up network policies on endpoints that are getting deleted npmEP := newNPMEndpoint(endpoint) - if oldNPMEP.podKey == unspecifiedPodKey { - klog.Infof("updating endpoint cache since endpoint changed for IP which never had a pod key. new endpoint: %s, old endpoint: %s, ip: %s", npmEP.id, oldNPMEP.id, npmEP.ip) - dp.endpointCache.cache[ip] = npmEP - } else { - npmEP.stalePodKey = &staleKey{ - key: oldNPMEP.podKey, - timestamp: currentTime, - } - dp.endpointCache.cache[ip] = npmEP - // NOTE: TSGs rely on this log line - klog.Infof("updating endpoint cache for previously cached IP %s: %+v with stalePodKey %+v", npmEP.ip, npmEP, npmEP.stalePodKey) - } + klog.Infof("[DataPlane] updating endpoint cache for IP with a new endpoint. old endpoint: %+v. new endpoint: %+v", oldNPMEP, npmEP) + dp.endpointCache.cache[ip] = npmEP } } // garbage collection for the endpoint cache for ip, ep := range dp.endpointCache.cache { if _, ok := existingIPs[ip]; !ok { - if ep.podKey == unspecifiedPodKey { - if ep.stalePodKey == nil { - klog.Infof("deleting old endpoint which never had a pod key. ID: %s, IP: %s", ep.id, ip) - delete(dp.endpointCache.cache, ip) - } else if int(currentTime-ep.stalePodKey.timestamp)/60 > minutesToKeepStalePodKey { - klog.Infof("deleting old endpoint which had a stale pod key. ID: %s, IP: %s, stalePodKey: %+v", ep.id, ip, ep.stalePodKey) - delete(dp.endpointCache.cache, ip) - } - } else { - ep.stalePodKey = &staleKey{ - key: ep.podKey, - timestamp: currentTime, - } - ep.podKey = unspecifiedPodKey - klog.Infof("marking endpoint stale for at least %d minutes. ID: %s, IP: %s, new stalePodKey: %+v", minutesToKeepStalePodKey, ep.id, ip, ep.stalePodKey) - } + klog.Infof("[DataPlane] deleting endpoint from cache. endpoint: %+v", ep) + delete(dp.endpointCache.cache, ip) } } diff --git a/npm/pkg/dataplane/types_windows.go b/npm/pkg/dataplane/types_windows.go index 9dff506afe..f4d911993d 100644 --- a/npm/pkg/dataplane/types_windows.go +++ b/npm/pkg/dataplane/types_windows.go @@ -2,10 +2,7 @@ package dataplane import "github.com/Microsoft/hcsshim/hcn" -const ( - unspecifiedPodKey = "" - minutesToKeepStalePodKey = 10 -) +const unspecifiedPodKey = "" // npmEndpoint holds info relevant for endpoints in windows type npmEndpoint struct { @@ -13,19 +10,11 @@ type npmEndpoint struct { id string ip string podKey string - // stalePodKey is used to keep track of the previous pod that had this IP - stalePodKey *staleKey // Map with Key as Network Policy name to to emulate set // and value as struct{} for minimal memory consumption netPolReference map[string]struct{} } -type staleKey struct { - key string - // timestamp represents the Unix time this struct was created - timestamp int64 -} - // newNPMEndpoint initializes npmEndpoint and copies relevant information from hcn.HostComputeEndpoint. // This function must be defined in a file with a windows build tag for proper vendoring since it uses the hcn pkg func newNPMEndpoint(endpoint *hcn.HostComputeEndpoint) *npmEndpoint { @@ -37,7 +26,3 @@ func newNPMEndpoint(endpoint *hcn.HostComputeEndpoint) *npmEndpoint { ip: endpoint.IpConfigurations[0].IpAddress, } } - -func (ep *npmEndpoint) isStalePodKey(podKey string) bool { - return ep.stalePodKey != nil && ep.stalePodKey.key == podKey -} From 7baa098477b643a5aacf2db21add5799c8eaec2a Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 16 Feb 2023 13:58:49 -0800 Subject: [PATCH 03/15] add log for associating pod with endpoint --- npm/pkg/dataplane/dataplane_windows.go | 1 + 1 file changed, 1 insertion(+) diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index ca6f349f8c..a9e146063d 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -163,6 +163,7 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error { if endpoint.podKey == unspecifiedPodKey { // while refreshing pod endpoints, newly discovered endpoints are given an unspecified pod key + klog.Infof("[DataPlane] associating pod with endpoint. podKey: %s. endpoint: %+v", pod.PodKey, endpoint) endpoint.podKey = pod.PodKey } else if pod.PodKey != endpoint.podKey { return fmt.Errorf("pod key mismatch. Expected: %s, Actual: %s. Error: [%w]", pod.PodKey, endpoint.podKey, errMismanagedPodKey) From 4cdc6203b6149cd8ddb16836d8d31392d4c9c890 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 15 Feb 2023 17:58:49 -0800 Subject: [PATCH 04/15] reorganize win dp uts --- .../dataplane-test-cases_windows_test.go | 28 ++++++++++++++++++- npm/pkg/dataplane/dataplane_windows_test.go | 14 +++++++--- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index e403eb8dd1..070d5060ef 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -88,7 +88,33 @@ func policyXBaseOnK1V1() *networkingv1.NetworkPolicy { } } -func getAllSerialTests() []*SerialTestCase { +func policyXBase2OnK2V2() *networkingv1.NetworkPolicy { + return &networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "base2", + Namespace: "x", + }, + Spec: networkingv1.NetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k2": "v2", + }, + }, + Ingress: []networkingv1.NetworkPolicyIngressRule{ + {}, + }, + Egress: []networkingv1.NetworkPolicyEgressRule{ + {}, + }, + PolicyTypes: []networkingv1.PolicyType{ + networkingv1.PolicyTypeIngress, + networkingv1.PolicyTypeEgress, + }, + }, + } +} + +func basicTests() []*SerialTestCase { return []*SerialTestCase{ { Description: "pod created", diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index 5fd4a18b6f..054d66073b 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -18,8 +18,15 @@ const ( threadedHNSLatency = time.Duration(50 * time.Millisecond) ) -func TestAllSerialCases(t *testing.T) { - tests := getAllSerialTests() +func TestBasics(t *testing.T) { + testSerialCases(t, basicTests()) +} + +func TestAllMultiJobCases(t *testing.T) { + testMultiJobCases(t, getAllMultiJobTests()) +} + +func testSerialCases(t *testing.T, tests []*SerialTestCase) { for i, tt := range tests { i := i tt := tt @@ -53,8 +60,7 @@ func TestAllSerialCases(t *testing.T) { } } -func TestAllMultiJobCases(t *testing.T) { - tests := getAllMultiJobTests() +func testMultiJobCases(t *testing.T, tests []*MultiJobTestCase) { for i, tt := range tests { i := i tt := tt From e3456a7ef5904e1a84517a1a9cd943ba0919699e Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 15 Feb 2023 18:00:08 -0800 Subject: [PATCH 05/15] UTs for issue 1729 (fail because podMarkedForDelete is not used) --- .../dataplane-test-cases_windows_test.go | 380 ++++++++++++++++++ npm/pkg/dataplane/dataplane_windows_test.go | 4 + 2 files changed, 384 insertions(+) diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 070d5060ef..ce6dfa10f4 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -540,6 +540,386 @@ func basicTests() []*SerialTestCase { } } +// see issue #1729 +// TODO: other orders +func podEndpointAssignmentTests() []*SerialTestCase { + sequence1Tests := []*SerialTestCase{ + { + Description: "Sequence 1: Pod A create --> Policy create --> Pod A cleanup --> Pod B create", + Actions: []*Action{ + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 1: Policy create --> Pod A create --> Pod A cleanup --> Pod B create", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 1: Policy create --> Pod A create --> Pod A cleanup --> Pod B create (skip first apply DP)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 1: Policy create --> Pod A create --> Pod A cleanup --> Pod B create (skip first two apply DP)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + } + + sequence2Tests := []*SerialTestCase{ + { + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // IP temporarily associated with IPSets of both pod A and pod B + // Pod A sets + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + // Pod B sets + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create --> Pod A cleanup", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + // UpdatePod() will fail for x/b since x/a is associated with the IP/Endpoint + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + // NOTE: it may take two ApplyDP() calls if UpdatePod() is called on x/b before x/a + // ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + } + + // TODO + sequence3Tests := []*SerialTestCase{ + // { + // Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod A cleanup", + // }, + } + + allTests := append(sequence1Tests, sequence2Tests...) + allTests = append(allTests, sequence3Tests...) + return allTests +} + func getAllMultiJobTests() []*MultiJobTestCase { return []*MultiJobTestCase{ { diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index 054d66073b..4e5ddaa0fe 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -22,6 +22,10 @@ func TestBasics(t *testing.T) { testSerialCases(t, basicTests()) } +func TestPodEndpointAssignment(t *testing.T) { + testSerialCases(t, podEndpointAssignmentTests()) +} + func TestAllMultiJobCases(t *testing.T) { testMultiJobCases(t, getAllMultiJobTests()) } From 1181fe82211cfa55c5c28bdacc55d1e78fd04135 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 15 Feb 2023 18:03:23 -0800 Subject: [PATCH 06/15] update controller mock to use deleted pod metadata --- npm/pkg/dataplane/types_windows_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/npm/pkg/dataplane/types_windows_test.go b/npm/pkg/dataplane/types_windows_test.go index 996a18319f..5114467549 100644 --- a/npm/pkg/dataplane/types_windows_test.go +++ b/npm/pkg/dataplane/types_windows_test.go @@ -260,8 +260,7 @@ func DeletePod(namespace, name, ip string, labels map[string]string) *Action { podKey := fmt.Sprintf("%s/%s", namespace, name) return &Action{ DPAction: &PodDeleteAction{ - // currently, the PodController doesn't share the node name - Pod: NewPodMetadata(podKey, ip, ""), + Pod: NewPodMetadataMarkedForDelete(podKey, ip), Labels: labels, }, } From 01487d35d8c21b627200feadda8d15affda74eca Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 17 Feb 2023 09:49:16 -0800 Subject: [PATCH 07/15] rename podMarkedForDelete to deletedPod --- .../controllers/v2/podController.go | 4 ++-- .../controllers/v2/podController_test.go | 20 +++++++++---------- npm/pkg/dataplane/dataplane.go | 10 +++++----- npm/pkg/dataplane/types.go | 18 ++++++++--------- npm/pkg/dataplane/types_windows_test.go | 2 +- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index 9a35f2407c..06611602bf 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -543,7 +543,7 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { } var err error - cachedPodMetadata := dataplane.NewPodMetadataMarkedForDelete(cachedNpmPodKey, cachedNpmPod.PodIP) + cachedPodMetadata := dataplane.NewDeletedPodMetadata(cachedNpmPodKey, cachedNpmPod.PodIP) // Delete the pod from its namespace's ipset. if err = c.dp.RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(cachedNpmPod.Namespace, ipsets.Namespace)}, @@ -613,7 +613,7 @@ func (c *PodController) manageNamedPortIpsets(portList []corev1.ContainerPort, p return fmt.Errorf("failed to add to set when deleting named port with err %w", err) } case deletePodAndNamedPort: - podMetadata := dataplane.NewPodMetadataMarkedForDelete(podKey, namedPortIpsetEntry) + podMetadata := dataplane.NewDeletedPodMetadata(podKey, namedPortIpsetEntry) if err := c.dp.RemoveFromSets([]*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(port.Name, ipsets.NamedPorts)}, podMetadata); err != nil { return fmt.Errorf("failed to remove from set when deleting pod and named port with err %w", err) } diff --git a/npm/pkg/controlplane/controllers/v2/podController_test.go b/npm/pkg/controlplane/controllers/v2/podController_test.go index 824040529e..8faeb2ef9f 100644 --- a/npm/pkg/controlplane/controllers/v2/podController_test.go +++ b/npm/pkg/controlplane/controllers/v2/podController_test.go @@ -429,14 +429,14 @@ func TestDeletePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -546,14 +546,14 @@ func TestDeletePodWithTombstoneAfterAddingPod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -666,14 +666,14 @@ func TestEmptyIPUpdate(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -760,14 +760,14 @@ func TestIPAddressUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } @@ -835,14 +835,14 @@ func TestPodStatusUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadataMarkedForDelete("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), ). Return(nil).Times(1) } diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index 32d6d24b46..f49eb8b6af 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -135,8 +135,8 @@ func (dp *DataPlane) AddToSets(setNames []*ipsets.IPSetMetadata, podMetadata *Po return fmt.Errorf("[DataPlane] error while adding to set: %w", err) } - if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.isMarkedForDelete()) { - if podMetadata.isMarkedForDelete() { + if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.wasDeleted()) { + if podMetadata.wasDeleted() { metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "[DataPlane] pod key %s is unexpectedly marked for delete in AddToSets", podMetadata.PodKey) return nil } @@ -168,8 +168,8 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat return fmt.Errorf("[DataPlane] error while removing from set: %w", err) } - if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.isMarkedForDelete()) { - if podMetadata.isMarkedForDelete() { + if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.wasDeleted()) { + if podMetadata.wasDeleted() { klog.Infof("[DataPlane] pod key %s marked for delete in RemoveFromSets", podMetadata.PodKey) } else { klog.Infof("[DataPlane] Updating Sets to Remove for pod key %s", podMetadata.PodKey) @@ -186,7 +186,7 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat dp.updatePodCache.cache[podMetadata.PodKey] = updatePod } - if podMetadata.isMarkedForDelete() { + if podMetadata.wasDeleted() { // mark IP for delete if updatePod.ipsMarkedForDelete == nil { updatePod.ipsMarkedForDelete = make(map[string]struct{}, 1) diff --git a/npm/pkg/dataplane/types.go b/npm/pkg/dataplane/types.go index 26cdc5102f..5c1c866b57 100644 --- a/npm/pkg/dataplane/types.go +++ b/npm/pkg/dataplane/types.go @@ -45,10 +45,10 @@ type updateNPMPod struct { // todo definitely requires further optimization between the intersection // of types, PodMetadata, NpmPod and corev1.pod type PodMetadata struct { - PodKey string - PodIP string - NodeName string - markedForDelete bool + PodKey string + PodIP string + NodeName string + deleted bool } // NewPodMetadata is for Pods that were created or have updated labels/namedports @@ -60,15 +60,15 @@ func NewPodMetadata(podKey, podIP, nodeName string) *PodMetadata { } } -// NewPodMetadataMarkedForDelete is for Pods that were deleted (e.g. Pod IP has changed) -func NewPodMetadataMarkedForDelete(podKey, podIP string) *PodMetadata { +// NewDeletedPodMetadata is for Pods that were deleted (e.g. Pod IP has changed) +func NewDeletedPodMetadata(podKey, podIP string) *PodMetadata { pm := NewPodMetadata(podKey, podIP, "") - pm.markedForDelete = true + pm.deleted = true return pm } -func (pm *PodMetadata) isMarkedForDelete() bool { - return pm.markedForDelete +func (pm *PodMetadata) wasDeleted() bool { + return pm.deleted } func (p *PodMetadata) Namespace() string { diff --git a/npm/pkg/dataplane/types_windows_test.go b/npm/pkg/dataplane/types_windows_test.go index 5114467549..6cf1af946d 100644 --- a/npm/pkg/dataplane/types_windows_test.go +++ b/npm/pkg/dataplane/types_windows_test.go @@ -260,7 +260,7 @@ func DeletePod(namespace, name, ip string, labels map[string]string) *Action { podKey := fmt.Sprintf("%s/%s", namespace, name) return &Action{ DPAction: &PodDeleteAction{ - Pod: NewPodMetadataMarkedForDelete(podKey, ip), + Pod: NewDeletedPodMetadata(podKey, ip), Labels: labels, }, } From 12f5fe12ba73d982073b6915c93376d32da34251 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 17 Feb 2023 16:00:47 -0800 Subject: [PATCH 08/15] all UTs (noticing edge in ipsetmanager now) --- .../dataplane-test-cases_windows_test.go | 629 +++++++++++++++++- npm/pkg/dataplane/dataplane_windows_test.go | 2 +- 2 files changed, 620 insertions(+), 11 deletions(-) diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index ce6dfa10f4..95e80cb699 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -35,6 +35,8 @@ var ( podK1V1Set = ipsets.NewIPSetMetadata("k1:v1", ipsets.KeyValueLabelOfPod) podK2Set = ipsets.NewIPSetMetadata("k2", ipsets.KeyLabelOfPod) podK2V2Set = ipsets.NewIPSetMetadata("k2:v2", ipsets.KeyValueLabelOfPod) + podK3Set = ipsets.NewIPSetMetadata("k3", ipsets.KeyLabelOfPod) + podK3V3Set = ipsets.NewIPSetMetadata("k3:v3", ipsets.KeyValueLabelOfPod) // emptySet is a member of a list if enabled in the dp Config // in Windows, this Config option is actually forced to be enabled in NewDataPlane() @@ -114,6 +116,32 @@ func policyXBase2OnK2V2() *networkingv1.NetworkPolicy { } } +func policyXBase3OnK3V3() *networkingv1.NetworkPolicy { + return &networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "base3", + Namespace: "x", + }, + Spec: networkingv1.NetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k3": "v3", + }, + }, + Ingress: []networkingv1.NetworkPolicyIngressRule{ + {}, + }, + Egress: []networkingv1.NetworkPolicyEgressRule{ + {}, + }, + PolicyTypes: []networkingv1.PolicyType{ + networkingv1.PolicyTypeIngress, + networkingv1.PolicyTypeEgress, + }, + }, + } +} + func basicTests() []*SerialTestCase { return []*SerialTestCase{ { @@ -540,9 +568,8 @@ func basicTests() []*SerialTestCase { } } -// see issue #1729 -// TODO: other orders -func podEndpointAssignmentTests() []*SerialTestCase { +// see issue #1729 for context on sequences 1, 2, 3 +func updatePodTests() []*SerialTestCase { sequence1Tests := []*SerialTestCase{ { Description: "Sequence 1: Pod A create --> Policy create --> Pod A cleanup --> Pod B create", @@ -785,7 +812,7 @@ func podEndpointAssignmentTests() []*SerialTestCase { sequence2Tests := []*SerialTestCase{ { - Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create", + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create (Pod A has appropriate ACLs)", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), UpdatePolicy(policyXBase2OnK2V2()), @@ -855,8 +882,103 @@ func podEndpointAssignmentTests() []*SerialTestCase { ApplyDP(), DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), ApplyDP(), - // NOTE: it may take two ApplyDP() calls if UpdatePod() is called on x/b before x/a - // ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + // error like: + // "failed to update dirty IP while applying the dataplane. dirty IP object: &{ip:10.0.0.1 pods:map[x/a:0xc00048b0c0 x/b:0xc00048b280]}, + // err: [error: updatePod called for multiple Running Pods. dirty IP object: &{ip:10.0.0.1 pods:map[x/a:0xc00048b0c0 x/b:0xc00048b280]}]" + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create (skip first ApplyDP(), no ACLs due to error)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // IP temporarily associated with IPSets of both pod A and pod B + // Pod A sets + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + // Pod B sets + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create --> Pod A cleanup (skip first two ApplyDP())", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for x/b since x/a is associated with the IP/Endpoint + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ Tags: []Tag{ @@ -908,15 +1030,502 @@ func podEndpointAssignmentTests() []*SerialTestCase { }, } - // TODO sequence3Tests := []*SerialTestCase{ - // { - // Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod A cleanup", - // }, + { + Description: "Sequence 3: Pod B Create --> Pod A create --> Pod A Cleanup (ensure correct IPSets)", + Actions: []*Action{ + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + // not yet garbage-collected + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create (skip first ApplyDP())", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod A Cleanup", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + // not yet garbage-collected + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update (unable to add second policy to endpoint until A cleanup)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + UpdatePolicy(policyXBase3OnK3V3()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + dptestutils.SetPolicy(podK3Set, ip1), + dptestutils.SetPolicy(podK3V3Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update --> Pod A cleanup (able to add second policy)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + UpdatePolicy(policyXBase3OnK3V3()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + dptestutils.SetPolicy(podK3Set, ip1), + dptestutils.SetPolicy(podK3V3Set, ip1), + // not garbage-collected yet + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base3", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base3", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + } + + otherTests := []*SerialTestCase{ + { + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make any add/reset ACL SysCall into HNS + // log/codepath: "ignoring pod deletion on IP since the pod is not associated with the current endpoint" + Description: "ignore Pod update if added then deleted before ApplyDP()", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS + // log/codepath: "ignoring pod deletion on IP since there is no corresponding endpoint" + Description: "ignore Pod delete for deleted endpoint", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + DeleteEndpoint(endpoint1), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, + }, + }, + { + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS + // log/codepath: "ignoring pod deletion on IP since there is no corresponding endpoint" + Description: "ignore Pod delete for deleted endpoint (skip first ApplyDP())", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeleteEndpoint(endpoint1), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, + }, + }, + { + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make an add ACL SysCall into HNS + // log/codepath: "ignoring pod update since there is no corresponding endpoint" + Description: "ignore Pod update for deleted endpoint", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeleteEndpoint(endpoint1), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, + }, + }, + { + Description: "still apply Policy to new Pod IP even when original Pod delete event hasn't happened yet", + Actions: []*Action{ + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + CreateEndpoint(endpoint2, ip2), + CreatePod("x", "b", ip2, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1, ip2), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip2), + dptestutils.SetPolicy(podK2V2Set, ip2), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + endpoint2: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, } allTests := append(sequence1Tests, sequence2Tests...) allTests = append(allTests, sequence3Tests...) + allTests = append(allTests, otherTests...) return allTests } diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index 4e5ddaa0fe..b1dc2170a4 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -23,7 +23,7 @@ func TestBasics(t *testing.T) { } func TestPodEndpointAssignment(t *testing.T) { - testSerialCases(t, podEndpointAssignmentTests()) + testSerialCases(t, updatePodTests()) } func TestAllMultiJobCases(t *testing.T) { From 468f1d07e9bbf93ab4cb3fcc37bb5e2135974cba Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 17 Feb 2023 16:01:11 -0800 Subject: [PATCH 09/15] updatePod() works for all sequences --- npm/pkg/dataplane/dataplane.go | 115 ++++++++++++------------- npm/pkg/dataplane/dataplane_windows.go | 41 ++++++--- npm/pkg/dataplane/types.go | 47 +++++++--- 3 files changed, 122 insertions(+), 81 deletions(-) diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index f49eb8b6af..411e5a20d8 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -24,13 +24,30 @@ type Config struct { *policies.PolicyManagerCfg } -type updatePodCache struct { +// dirtyIPCache tracks dirty IPs and Pod updates for those IPs. +// It tracks IPSets a Running Pod was recently added to or removed from, and which deleted Pods were just deleted from an IP. +// assumption: a Pod won't take up its previously used IP when restarting (see https://stackoverflow.com/questions/52362514/when-will-the-kubernetes-pod-ip-change) +// A Pod may be associated with more than one dirtyIP after Pod restarts. The number some ApplyDataPlane fails several times or ApplyDataPlane is asynchronous. +type dirtyIPCache struct { sync.Mutex - cache map[string]*updateNPMPod + // cache maps IP to dirtyIP + cache map[string]*dirtyIP } -func newUpdatePodCache() *updatePodCache { - return &updatePodCache{cache: make(map[string]*updateNPMPod)} +func newDirtyIPCache() *dirtyIPCache { + return &dirtyIPCache{cache: make(map[string]*dirtyIP)} +} + +// getOrCreateDirtyIP returns the dirtyIP for the given PodMetadata. If it doesn't exist, it creates a new one. +// Caller should lock the dirtyIPCache before calling this function. +func (d *dirtyIPCache) getOrCreateDirtyIP(ip string) *dirtyIP { + dIP, ok := d.cache[ip] + if !ok { + dIP = newDirtyIP(ip) + d.cache[ip] = dIP + } + + return dIP } type endpointCache struct { @@ -50,10 +67,10 @@ type DataPlane struct { nodeName string // endpointCache stores all endpoints of the network (including off-node) // Key is PodIP - endpointCache *endpointCache - ioShim *common.IOShim - updatePodCache *updatePodCache - stopChannel <-chan struct{} + endpointCache *endpointCache + ioShim *common.IOShim + dirtyIPCache *dirtyIPCache + stopChannel <-chan struct{} } func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChannel <-chan struct{}) (*DataPlane, error) { @@ -63,14 +80,14 @@ func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChann cfg.IPSetManagerCfg.AddEmptySetToLists = true } dp := &DataPlane{ - Config: cfg, - policyMgr: policies.NewPolicyManager(ioShim, cfg.PolicyManagerCfg), - ipsetMgr: ipsets.NewIPSetManager(cfg.IPSetManagerCfg, ioShim), - endpointCache: newEndpointCache(), - nodeName: nodeName, - ioShim: ioShim, - updatePodCache: newUpdatePodCache(), - stopChannel: stopChannel, + Config: cfg, + policyMgr: policies.NewPolicyManager(ioShim, cfg.PolicyManagerCfg), + ipsetMgr: ipsets.NewIPSetManager(cfg.IPSetManagerCfg, ioShim), + endpointCache: newEndpointCache(), + nodeName: nodeName, + ioShim: ioShim, + dirtyIPCache: newDirtyIPCache(), + stopChannel: stopChannel, } err := dp.BootupDataplane() @@ -136,24 +153,18 @@ func (dp *DataPlane) AddToSets(setNames []*ipsets.IPSetMetadata, podMetadata *Po } if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.wasDeleted()) { + klog.Infof("[DataPlane] processing AddToSets updatePod. podMetadata: %+v", podMetadata) + if podMetadata.wasDeleted() { - metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "[DataPlane] pod key %s is unexpectedly marked for delete in AddToSets", podMetadata.PodKey) - return nil + return fmt.Errorf("error: unexpectedly found deleted pod in AddToSets. podMetadata: %+v", podMetadata) } - klog.Infof("[DataPlane] Updating Sets to Add for pod key %s", podMetadata.PodKey) - - // lock updatePodCache while reading/modifying or setting the updatePod in the cache - dp.updatePodCache.Lock() - defer dp.updatePodCache.Unlock() - - updatePod, ok := dp.updatePodCache.cache[podMetadata.PodKey] - if !ok { - klog.Infof("[DataPlane] {AddToSet} pod key %s not found in updatePodCache. creating a new obj", podMetadata.PodKey) - updatePod = newUpdateNPMPod(podMetadata) - dp.updatePodCache.cache[podMetadata.PodKey] = updatePod - } + // lock dirtyIPCache while reading/modifying + dp.dirtyIPCache.Lock() + defer dp.dirtyIPCache.Unlock() + dIP := dp.dirtyIPCache.getOrCreateDirtyIP(podMetadata.PodIP) + updatePod := dIP.getOrCreateUpdatePod(podMetadata) updatePod.updateIPSetsToAdd(setNames) } @@ -169,31 +180,17 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat } if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.wasDeleted()) { - if podMetadata.wasDeleted() { - klog.Infof("[DataPlane] pod key %s marked for delete in RemoveFromSets", podMetadata.PodKey) - } else { - klog.Infof("[DataPlane] Updating Sets to Remove for pod key %s", podMetadata.PodKey) - } + klog.Infof("[DataPlane] processing RemoveFromSets updatePod. podMetadata: %+v", podMetadata) - // lock updatePodCache while reading/modifying or setting the updatePod in the cache - dp.updatePodCache.Lock() - defer dp.updatePodCache.Unlock() - - updatePod, ok := dp.updatePodCache.cache[podMetadata.PodKey] - if !ok { - klog.Infof("[DataPlane] {RemoveFromSet} pod key %s not found in updatePodCache. creating a new obj", podMetadata.PodKey) - updatePod = newUpdateNPMPod(podMetadata) - dp.updatePodCache.cache[podMetadata.PodKey] = updatePod - } + // lock dirtyIPCache while reading/modifying + dp.dirtyIPCache.Lock() + defer dp.dirtyIPCache.Unlock() + dIP := dp.dirtyIPCache.getOrCreateDirtyIP(podMetadata.PodIP) + updatePod := dIP.getOrCreateUpdatePod(podMetadata) if podMetadata.wasDeleted() { - // mark IP for delete - if updatePod.ipsMarkedForDelete == nil { - updatePod.ipsMarkedForDelete = make(map[string]struct{}, 1) - } - updatePod.ipsMarkedForDelete[podMetadata.PodIP] = struct{}{} - - // discard all previous IPSet updates, and disregard all IPSet updates for this pod marked for delete + updatePod.markDeleted() + // can discard IPSet updates updatePod.IPSetsToAdd = nil updatePod.IPSetsToRemove = nil } else { @@ -244,20 +241,20 @@ func (dp *DataPlane) ApplyDataPlane() error { return nil } - // lock updatePodCache while driving goal state to kernel + // lock dirtyIPCache while driving goal state to kernel // prevents another ApplyDataplane call from updating the same pods - dp.updatePodCache.Lock() - defer dp.updatePodCache.Unlock() + dp.dirtyIPCache.Lock() + defer dp.dirtyIPCache.Unlock() - for podKey, pod := range dp.updatePodCache.cache { - err := dp.updatePod(pod) + for _, dIP := range dp.dirtyIPCache.cache { + err := dp.updatePod(dIP) if err != nil { // move on to the next and later return as success since this can be retried irrespective of other operations - metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update pod while applying the dataplane. key: [%s], err: [%s]", podKey, err.Error()) + metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update dirty IP while applying the dataplane. dirty IP object: %+v, err: [%s]", dIP, err.Error()) continue } - delete(dp.updatePodCache.cache, podKey) + delete(dp.dirtyIPCache.cache, dIP.ip) } } return nil diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index a9e146063d..48c551c7c1 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -115,36 +115,57 @@ func (dp *DataPlane) shouldUpdatePod() bool { // 2. Will check for existing applicable network policies and applies it on endpoint. // Assumptions: // - We will receive a cleanup event for a Pod in the correct order (i.e. after a create event for the same Pod) -func (dp *DataPlane) updatePod(pod *updateNPMPod) error { - klog.Infof("[DataPlane] updatePod called for Pod Key %s", pod.PodKey) +func (dp *DataPlane) updatePod(dIP *dirtyIP) error { + klog.Infof("[DataPlane] updatePod called for dirty IP object: %+v", dIP) // lock the endpoint cache while we read/modify the endpoint with the pod's IP dp.endpointCache.Lock() defer dp.endpointCache.Unlock() - // reset ACLs for any Pod that was wrongly assigned to an endpoint (fixes #1729) - for ip := range pod.ipsMarkedForDelete { - endpoint, ok := dp.endpointCache.cache[ip] + // reset ACLs for any deleted Pod that was wrongly assigned to an endpoint (fixes #1729) + for podKey, pod := range dIP.pods { + if !pod.wasDeleted() { + continue + } + + endpoint, ok := dp.endpointCache.cache[dIP.ip] if !ok { // this branch will be taken often for the typical flow. 1) Endpoint deleted, 2) Pod delete event // additionally, this branch will be always be taken for an IP on a different node - klog.Infof("[DataPlane] ignoring IP marked for delete since there is no corresponding endpoint. IP: %s. podKey: %s", ip, pod.PodKey) + klog.Infof("[DataPlane] ignoring pod deletion on IP since there is no corresponding endpoint. IP: %s. podKey: %s", dIP.ip, podKey) + delete(dIP.pods, podKey) continue } - if endpoint.podKey != pod.PodKey { - klog.Infof("[DataPlane] ignoring IP marked for delete since it is not associated with this endpoint. podKey: %s. endpoint: %+v", pod.PodKey, endpoint) + if endpoint.podKey != podKey { + klog.Infof("[DataPlane] ignoring pod deletion on IP since the pod is not associated with the current endpoint. IP: %s. podKey: %s. endpoint: %+v", dIP.ip, podKey, endpoint) + delete(dIP.pods, podKey) continue } // this Pod was incorrectly assigned to the Endpoint (see issue #1729) if err := dp.policyMgr.ResetEndpoint(endpoint.id); err != nil { - return fmt.Errorf("failed to reset endpoint. endpoint: %+v. err: %w", endpoint, err) + return fmt.Errorf("failed to reset endpoint for deleted pod. IP: %s. podKey: %s. endpoint: %+v. err: %w", dIP.ip, podKey, endpoint, err) } // mark the Endpoint as unassigned endpoint.podKey = unspecifiedPodKey - delete(pod.ipsMarkedForDelete, ip) + delete(dIP.pods, podKey) + } + + if len(dIP.pods) == 0 { + // no running pod to update + return nil + } + + if len(dIP.pods) > 1 { + return fmt.Errorf("error: updatePod called for multiple Running Pods. dirty IP object: %+v", dIP) + } + + // there is only one pod in the map + var pod *updateNPMPod + for _, pod = range dIP.pods { + break } if len(pod.IPSetsToAdd) == 0 && len(pod.IPSetsToRemove) == 0 { diff --git a/npm/pkg/dataplane/types.go b/npm/pkg/dataplane/types.go index 5c1c866b57..6a1d3030c3 100644 --- a/npm/pkg/dataplane/types.go +++ b/npm/pkg/dataplane/types.go @@ -27,18 +27,38 @@ type GenericDataplane interface { UpdatePolicy(policies *policies.NPMNetworkPolicy) error } +// dirtyIP represents an IP and all of its updateNPMPod events. +// there should be only one updateNPMPod such that updateNPMPod.wasDeleted() == false +type dirtyIP struct { + ip string + // states maps pod key to its updateNPMPod + pods map[string]*updateNPMPod +} + +func newDirtyIP(ip string) *dirtyIP { + return &dirtyIP{ + ip: ip, + pods: make(map[string]*updateNPMPod), + } +} + +func (d *dirtyIP) getOrCreateUpdatePod(podMetadata *PodMetadata) *updateNPMPod { + uPod, ok := d.pods[podMetadata.PodKey] + if !ok { + uPod = newUpdateNPMPod(podMetadata) + d.pods[podMetadata.PodKey] = uPod + } + + return uPod +} + // UpdateNPMPod pod controller will populate and send this datastructure to dataplane // to update the dataplane with the latest pod information // this helps in calculating if any update needs to have policies applied or removed type updateNPMPod struct { - PodKey string - PodIP string - NodeName string - // ipsMarkedForDelete tracks IPs that this pod key was associated with before - // it would rare for this to be more than one IP, but possible if ApplyDataPlane fails several times or ApplyDataPlane is asynchronous - ipsMarkedForDelete map[string]struct{} - IPSetsToAdd []string - IPSetsToRemove []string + *PodMetadata + IPSetsToAdd []string + IPSetsToRemove []string } // PodMetadata is what is passed to dataplane to specify pod ipset @@ -71,16 +91,19 @@ func (pm *PodMetadata) wasDeleted() bool { return pm.deleted } +func (pm *PodMetadata) markDeleted() { + pm.deleted = true +} + func (p *PodMetadata) Namespace() string { return strings.Split(p.PodKey, "/")[0] } func newUpdateNPMPod(podMetadata *PodMetadata) *updateNPMPod { return &updateNPMPod{ - PodKey: podMetadata.PodKey, - PodIP: podMetadata.PodIP, - NodeName: podMetadata.NodeName, - // can leave all slices as nil since len() and append() work on nil slices + PodMetadata: podMetadata, + IPSetsToAdd: make([]string, 0), + IPSetsToRemove: make([]string, 0), } } From aade8fbc9680a0175ed2124b8956b3929e9767a5 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 2 Mar 2023 15:12:17 -0800 Subject: [PATCH 10/15] work with updatePodCache and existing controlplane --- .../controllers/v2/podController.go | 25 +- .../controllers/v2/podController_test.go | 20 +- .../dataplane-test-cases_windows_test.go | 404 +++++++++--------- npm/pkg/dataplane/dataplane.go | 103 ++--- npm/pkg/dataplane/dataplane_windows.go | 83 +--- npm/pkg/dataplane/dataplane_windows_test.go | 4 + npm/pkg/dataplane/types.go | 43 +- npm/pkg/dataplane/types_windows.go | 2 + npm/pkg/dataplane/types_windows_test.go | 2 +- 9 files changed, 313 insertions(+), 373 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index 06611602bf..2eeb1b8318 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -30,9 +30,8 @@ import ( type NamedPortOperation string const ( - deleteNamedPort NamedPortOperation = "del" - addNamedPort NamedPortOperation = "add" - deletePodAndNamedPort NamedPortOperation = "del-pod-and-namedport" + deleteNamedPort NamedPortOperation = "del" + addNamedPort NamedPortOperation = "add" addEvent string = "ADD" updateEvent string = "UPDATE" @@ -456,8 +455,7 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper addToIPSets, deleteFromIPSets := util.GetIPSetListCompareLabels(cachedNpmPod.Labels, newPodObj.Labels) newPodMetadata := dataplane.NewPodMetadata(podKey, newPodObj.Status.PodIP, newPodObj.Spec.NodeName) - // todo: verify pulling nodename from newpod, - // if a pod is getting deleted, we do not have to cleanup policies, so it is okay to pass in wrong nodename + // should have newPodMetadata == cachedPodMetadata since from branch above, we have cachedNpmPod.PodIP == newPodObj.Status.PodIP cachedPodMetadata := dataplane.NewPodMetadata(podKey, cachedNpmPod.PodIP, newPodMetadata.NodeName) // Delete the pod from its label's ipset. for _, removeIPSetName := range deleteFromIPSets { @@ -515,8 +513,10 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper newPodPorts := common.GetContainerPortList(newPodObj) if !reflect.DeepEqual(cachedNpmPod.ContainerPorts, newPodPorts) { // Delete cached pod's named ports from its ipset. + // Node name is only used in Windows. Keep consistency with above RemoveFromSets() calls by using the Pod's node name. + // manageNamedPortIpsets() also does nothing for Windows currently. if err = c.manageNamedPortIpsets( - cachedNpmPod.ContainerPorts, podKey, cachedNpmPod.PodIP, "", deleteNamedPort); err != nil { + cachedNpmPod.ContainerPorts, podKey, cachedNpmPod.PodIP, newPodMetadata.NodeName, deleteNamedPort); err != nil { return metrics.UpdateOp, fmt.Errorf("[syncAddAndUpdatePod] Error: failed to delete pod from named port ipset with err: %w", err) } // Since portList ipset deletion is successful, NPM can remove cachedContainerPorts @@ -543,8 +543,9 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { } var err error - cachedPodMetadata := dataplane.NewDeletedPodMetadata(cachedNpmPodKey, cachedNpmPod.PodIP) + cachedPodMetadata := dataplane.NewPodMetadata(cachedNpmPodKey, cachedNpmPod.PodIP, dataplane.NodePlaceholderForDeletedPod) // Delete the pod from its namespace's ipset. + // note: NodeName empty is not going to call update pod if err = c.dp.RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(cachedNpmPod.Namespace, ipsets.Namespace)}, cachedPodMetadata); err != nil { @@ -568,7 +569,7 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { // Delete pod's named ports from its ipset. Need to pass true in the manageNamedPortIpsets function call if err = c.manageNamedPortIpsets( - cachedNpmPod.ContainerPorts, cachedNpmPodKey, cachedNpmPod.PodIP, "", deletePodAndNamedPort); err != nil { + cachedNpmPod.ContainerPorts, cachedNpmPodKey, cachedNpmPod.PodIP, dataplane.NodePlaceholderForDeletedPod, deleteNamedPort); err != nil { return fmt.Errorf("[cleanUpDeletedPod] Error: failed to delete pod from named port ipset with err: %w", err) } @@ -601,22 +602,16 @@ func (c *PodController) manageNamedPortIpsets(portList []corev1.ContainerPort, p namedPortIpsetEntry := fmt.Sprintf("%s,%s%d", podIP, protocol, port.ContainerPort) // nodename in NewPodMetadata is nil so UpdatePod is ignored + podMetadata := dataplane.NewPodMetadata(podKey, namedPortIpsetEntry, nodeName) switch namedPortOperation { case deleteNamedPort: - podMetadata := dataplane.NewPodMetadata(podKey, namedPortIpsetEntry, nodeName) if err := c.dp.RemoveFromSets([]*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(port.Name, ipsets.NamedPorts)}, podMetadata); err != nil { return fmt.Errorf("failed to remove from set when deleting named port with err %w", err) } case addNamedPort: - podMetadata := dataplane.NewPodMetadata(podKey, namedPortIpsetEntry, nodeName) if err := c.dp.AddToSets([]*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(port.Name, ipsets.NamedPorts)}, podMetadata); err != nil { return fmt.Errorf("failed to add to set when deleting named port with err %w", err) } - case deletePodAndNamedPort: - podMetadata := dataplane.NewDeletedPodMetadata(podKey, namedPortIpsetEntry) - if err := c.dp.RemoveFromSets([]*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(port.Name, ipsets.NamedPorts)}, podMetadata); err != nil { - return fmt.Errorf("failed to remove from set when deleting pod and named port with err %w", err) - } } } diff --git a/npm/pkg/controlplane/controllers/v2/podController_test.go b/npm/pkg/controlplane/controllers/v2/podController_test.go index 8faeb2ef9f..41a901a647 100644 --- a/npm/pkg/controlplane/controllers/v2/podController_test.go +++ b/npm/pkg/controlplane/controllers/v2/podController_test.go @@ -429,14 +429,14 @@ func TestDeletePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), ). Return(nil).Times(1) } @@ -546,14 +546,14 @@ func TestDeletePodWithTombstoneAfterAddingPod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), ). Return(nil).Times(1) } @@ -666,14 +666,14 @@ func TestEmptyIPUpdate(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), ). Return(nil).Times(1) } @@ -760,14 +760,14 @@ func TestIPAddressUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), ). Return(nil).Times(1) } @@ -835,14 +835,14 @@ func TestPodStatusUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4") + deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewDeletedPodMetadata("test-namespace/test-pod", "1.2.3.4,8080"), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), ). Return(nil).Times(1) } diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 95e80cb699..3d631914b6 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -812,7 +812,7 @@ func updatePodTests() []*SerialTestCase { sequence2Tests := []*SerialTestCase{ { - Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create (Pod A has appropriate ACLs)", + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), UpdatePolicy(policyXBase2OnK2V2()), @@ -845,7 +845,7 @@ func updatePodTests() []*SerialTestCase { ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ endpoint1: { { - ID: "azure-acl-x-base", + ID: "azure-acl-x-base2", Protocols: "", Action: "Allow", Direction: "In", @@ -856,7 +856,7 @@ func updatePodTests() []*SerialTestCase { Priority: 222, }, { - ID: "azure-acl-x-base", + ID: "azure-acl-x-base2", Protocols: "", Action: "Allow", Direction: "Out", @@ -877,7 +877,6 @@ func updatePodTests() []*SerialTestCase { UpdatePolicy(policyXBase2OnK2V2()), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), ApplyDP(), - // UpdatePod() will fail for x/b since x/a is associated with the IP/Endpoint CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), ApplyDP(), DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), @@ -932,10 +931,7 @@ func updatePodTests() []*SerialTestCase { }, }, { - // error like: - // "failed to update dirty IP while applying the dataplane. dirty IP object: &{ip:10.0.0.1 pods:map[x/a:0xc00048b0c0 x/b:0xc00048b280]}, - // err: [error: updatePod called for multiple Running Pods. dirty IP object: &{ip:10.0.0.1 pods:map[x/a:0xc00048b0c0 x/b:0xc00048b280]}]" - Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create (skip first ApplyDP(), no ACLs due to error)", + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create (skip first ApplyDP())", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), UpdatePolicy(policyXBase2OnK2V2()), @@ -965,7 +961,30 @@ func updatePodTests() []*SerialTestCase { dptestutils.SetPolicy(podK2V2Set, ip1), }, ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ - endpoint1: {}, + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, }, }, }, @@ -975,7 +994,6 @@ func updatePodTests() []*SerialTestCase { UpdatePolicy(policyXBaseOnK1V1()), UpdatePolicy(policyXBase2OnK2V2()), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - // UpdatePod() will fail for x/b since x/a is associated with the IP/Endpoint CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), ApplyDP(), @@ -1030,15 +1048,13 @@ func updatePodTests() []*SerialTestCase { }, } - sequence3Tests := []*SerialTestCase{ + otherTests := []*SerialTestCase{ { - Description: "Sequence 3: Pod B Create --> Pod A create --> Pod A Cleanup (ensure correct IPSets)", + // log: "[DataPlane] removing deleted pod from updatePodCache. podMetadata: &{PodKey:x/a PodIP:10.0.0.1 NodeName:DELETED_POD_INDICATOR}" + Description: "ignore Pod update if added then deleted before ApplyDP()", Actions: []*Action{ - CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), - ApplyDP(), + UpdatePolicy(policyXBaseOnK1V1()), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - // UpdatePod() will fail for both x/a and x/b - ApplyDP(), DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), ApplyDP(), }, @@ -1054,10 +1070,7 @@ func updatePodTests() []*SerialTestCase { ExpectedSetPolicies: []*hcn.SetPolicySetting{ dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet, ip1), - dptestutils.SetPolicy(podK2Set, ip1), - dptestutils.SetPolicy(podK2V2Set, ip1), - // not yet garbage-collected + dptestutils.SetPolicy(nsXSet), dptestutils.SetPolicy(podK1Set), dptestutils.SetPolicy(podK1V1Set), }, @@ -1067,15 +1080,15 @@ func updatePodTests() []*SerialTestCase { }, }, { - Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create", + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS + Description: "ignore Pod delete for deleted endpoint", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), - UpdatePolicy(policyXBase2OnK2V2()), - CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), - ApplyDP(), - // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), ApplyDP(), + DeleteEndpoint(endpoint1), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ Tags: []Tag{ @@ -1089,48 +1102,21 @@ func updatePodTests() []*SerialTestCase { ExpectedSetPolicies: []*hcn.SetPolicySetting{ dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet, ip1), - dptestutils.SetPolicy(podK1Set, ip1), - dptestutils.SetPolicy(podK1V1Set, ip1), - dptestutils.SetPolicy(podK2Set, ip1), - dptestutils.SetPolicy(podK2V2Set, ip1), - }, - ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ - endpoint1: { - { - ID: "azure-acl-x-base2", - Protocols: "", - Action: "Allow", - Direction: "In", - LocalAddresses: "", - RemoteAddresses: "", - LocalPorts: "", - RemotePorts: "", - Priority: 222, - }, - { - ID: "azure-acl-x-base2", - Protocols: "", - Action: "Allow", - Direction: "Out", - LocalAddresses: "", - RemoteAddresses: "", - LocalPorts: "", - RemotePorts: "", - Priority: 222, - }, - }, + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, }, }, { - Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create (skip first ApplyDP())", + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS + Description: "ignore Pod delete for deleted endpoint (skip first ApplyDP())", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), - UpdatePolicy(policyXBase2OnK2V2()), - CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - // UpdatePod() will fail for both x/a and x/b + DeleteEndpoint(endpoint1), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ @@ -1145,27 +1131,20 @@ func updatePodTests() []*SerialTestCase { ExpectedSetPolicies: []*hcn.SetPolicySetting{ dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet, ip1), - dptestutils.SetPolicy(podK1Set, ip1), - dptestutils.SetPolicy(podK1V1Set, ip1), - dptestutils.SetPolicy(podK2Set, ip1), - dptestutils.SetPolicy(podK2V2Set, ip1), - }, - ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ - endpoint1: {}, + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, }, }, { - Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod A Cleanup", + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make an add ACL SysCall into HNS" + Description: "ignore Pod update when there's no corresponding endpoint", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), - UpdatePolicy(policyXBase2OnK2V2()), - CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - // UpdatePod() will fail for both x/a and x/b - ApplyDP(), - DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + DeleteEndpoint(endpoint1), ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ @@ -1181,29 +1160,19 @@ func updatePodTests() []*SerialTestCase { dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), dptestutils.SetPolicy(nsXSet, ip1), - dptestutils.SetPolicy(podK2Set, ip1), - dptestutils.SetPolicy(podK2V2Set, ip1), - // not yet garbage-collected - dptestutils.SetPolicy(podK1Set), - dptestutils.SetPolicy(podK1V1Set), - }, - ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ - endpoint1: {}, + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, }, }, { - Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update (unable to add second policy to endpoint until A cleanup)", + Description: "two endpoints, one with policy, one without", Actions: []*Action{ - UpdatePolicy(policyXBaseOnK1V1()), UpdatePolicy(policyXBase2OnK2V2()), - UpdatePolicy(policyXBase3OnK3V3()), - CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), - ApplyDP(), - // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - ApplyDP(), - UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), + CreateEndpoint(endpoint2, ip2), + CreatePod("x", "b", ip2, thisNode, map[string]string{"k2": "v2"}), ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ @@ -1218,16 +1187,15 @@ func updatePodTests() []*SerialTestCase { ExpectedSetPolicies: []*hcn.SetPolicySetting{ dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(nsXSet, ip1, ip2), dptestutils.SetPolicy(podK1Set, ip1), dptestutils.SetPolicy(podK1V1Set, ip1), - dptestutils.SetPolicy(podK2Set, ip1), - dptestutils.SetPolicy(podK2V2Set, ip1), - dptestutils.SetPolicy(podK3Set, ip1), - dptestutils.SetPolicy(podK3V3Set, ip1), + dptestutils.SetPolicy(podK2Set, ip2), + dptestutils.SetPolicy(podK2V2Set, ip2), }, ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ - endpoint1: { + endpoint1: {}, + endpoint2: { { ID: "azure-acl-x-base2", Protocols: "", @@ -1254,18 +1222,26 @@ func updatePodTests() []*SerialTestCase { }, }, }, + } + + allTests := append(sequence1Tests, sequence2Tests...) + // allTests = append(allTests, podAssignmentSequence3Tests()...) + allTests = append(allTests, otherTests...) + return allTests +} + +// sequence 3 of issue 1729 +// seems like this sequence is impossible +// if it ever occurred, would need modifications in updatePod() and ipsetmanager +func podAssignmentSequence3Tests() []*SerialTestCase { + return []*SerialTestCase{ { - Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update --> Pod A cleanup (able to add second policy)", + Description: "Sequence 3: Pod B Create --> Pod A create --> Pod A Cleanup (ensure correct IPSets)", Actions: []*Action{ - UpdatePolicy(policyXBaseOnK1V1()), - UpdatePolicy(policyXBase2OnK2V2()), - UpdatePolicy(policyXBase3OnK3V3()), CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), ApplyDP(), - // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - ApplyDP(), - UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), + // UpdatePod() will fail for both x/a and x/b ApplyDP(), DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), ApplyDP(), @@ -1285,12 +1261,44 @@ func updatePodTests() []*SerialTestCase { dptestutils.SetPolicy(nsXSet, ip1), dptestutils.SetPolicy(podK2Set, ip1), dptestutils.SetPolicy(podK2V2Set, ip1), - dptestutils.SetPolicy(podK3Set, ip1), - dptestutils.SetPolicy(podK3V3Set, ip1), - // not garbage-collected yet + // not yet garbage-collected dptestutils.SetPolicy(podK1Set), dptestutils.SetPolicy(podK1V1Set), }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ endpoint1: { { @@ -1315,43 +1323,18 @@ func updatePodTests() []*SerialTestCase { RemotePorts: "", Priority: 222, }, - { - ID: "azure-acl-x-base3", - Protocols: "", - Action: "Allow", - Direction: "In", - LocalAddresses: "", - RemoteAddresses: "", - LocalPorts: "", - RemotePorts: "", - Priority: 222, - }, - { - ID: "azure-acl-x-base3", - Protocols: "", - Action: "Allow", - Direction: "Out", - LocalAddresses: "", - RemoteAddresses: "", - LocalPorts: "", - RemotePorts: "", - Priority: 222, - }, }, }, }, }, - } - - otherTests := []*SerialTestCase{ { - // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make any add/reset ACL SysCall into HNS - // log/codepath: "ignoring pod deletion on IP since the pod is not associated with the current endpoint" - Description: "ignore Pod update if added then deleted before ApplyDP()", + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create (skip first ApplyDP())", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ @@ -1366,9 +1349,11 @@ func updatePodTests() []*SerialTestCase { ExpectedSetPolicies: []*hcn.SetPolicySetting{ dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet), - dptestutils.SetPolicy(podK1Set), - dptestutils.SetPolicy(podK1V1Set), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), }, ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ endpoint1: {}, @@ -1376,14 +1361,14 @@ func updatePodTests() []*SerialTestCase { }, }, { - // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS - // log/codepath: "ignoring pod deletion on IP since there is no corresponding endpoint" - Description: "ignore Pod delete for deleted endpoint", + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod A Cleanup", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b ApplyDP(), - DeleteEndpoint(endpoint1), DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), ApplyDP(), }, @@ -1399,51 +1384,30 @@ func updatePodTests() []*SerialTestCase { ExpectedSetPolicies: []*hcn.SetPolicySetting{ dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + // not yet garbage-collected dptestutils.SetPolicy(podK1Set), dptestutils.SetPolicy(podK1V1Set), }, - ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, - }, - }, - { - // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS - // log/codepath: "ignoring pod deletion on IP since there is no corresponding endpoint" - Description: "ignore Pod delete for deleted endpoint (skip first ApplyDP())", - Actions: []*Action{ - UpdatePolicy(policyXBaseOnK1V1()), - CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - DeleteEndpoint(endpoint1), - DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), - ApplyDP(), - }, - TestCaseMetadata: &TestCaseMetadata{ - Tags: []Tag{ - podCrudTag, - netpolCrudTag, - }, - DpCfg: defaultWindowsDPCfg, - InitialEndpoints: []*hcn.HostComputeEndpoint{ - dptestutils.Endpoint(endpoint1, ip1), - }, - ExpectedSetPolicies: []*hcn.SetPolicySetting{ - dptestutils.SetPolicy(emptySet), - dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet), - dptestutils.SetPolicy(podK1Set), - dptestutils.SetPolicy(podK1V1Set), + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, }, - ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, }, }, { - // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make an add ACL SysCall into HNS - // log/codepath: "ignoring pod update since there is no corresponding endpoint" - Description: "ignore Pod update for deleted endpoint", + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update (unable to add second policy to endpoint until A cleanup)", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + UpdatePolicy(policyXBase3OnK3V3()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - DeleteEndpoint(endpoint1), + ApplyDP(), + UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ @@ -1461,17 +1425,53 @@ func updatePodTests() []*SerialTestCase { dptestutils.SetPolicy(nsXSet, ip1), dptestutils.SetPolicy(podK1Set, ip1), dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + dptestutils.SetPolicy(podK3Set, ip1), + dptestutils.SetPolicy(podK3V3Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, }, - ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, }, }, { - Description: "still apply Policy to new Pod IP even when original Pod delete event hasn't happened yet", + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update --> Pod A cleanup (able to add second policy)", Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), UpdatePolicy(policyXBase2OnK2V2()), + UpdatePolicy(policyXBase3OnK3V3()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), - CreateEndpoint(endpoint2, ip2), - CreatePod("x", "b", ip2, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), ApplyDP(), }, TestCaseMetadata: &TestCaseMetadata{ @@ -1486,15 +1486,17 @@ func updatePodTests() []*SerialTestCase { ExpectedSetPolicies: []*hcn.SetPolicySetting{ dptestutils.SetPolicy(emptySet), dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), - dptestutils.SetPolicy(nsXSet, ip1, ip2), - dptestutils.SetPolicy(podK1Set, ip1), - dptestutils.SetPolicy(podK1V1Set, ip1), - dptestutils.SetPolicy(podK2Set, ip2), - dptestutils.SetPolicy(podK2V2Set, ip2), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + dptestutils.SetPolicy(podK3Set, ip1), + dptestutils.SetPolicy(podK3V3Set, ip1), + // not garbage-collected yet + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), }, ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ - endpoint1: {}, - endpoint2: { + endpoint1: { { ID: "azure-acl-x-base2", Protocols: "", @@ -1517,16 +1519,34 @@ func updatePodTests() []*SerialTestCase { RemotePorts: "", Priority: 222, }, + { + ID: "azure-acl-x-base3", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base3", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, }, }, }, }, } - allTests := append(sequence1Tests, sequence2Tests...) - allTests = append(allTests, sequence3Tests...) - allTests = append(allTests, otherTests...) - return allTests } func getAllMultiJobTests() []*MultiJobTestCase { diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index 411e5a20d8..3bebda5eeb 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -24,30 +24,14 @@ type Config struct { *policies.PolicyManagerCfg } -// dirtyIPCache tracks dirty IPs and Pod updates for those IPs. -// It tracks IPSets a Running Pod was recently added to or removed from, and which deleted Pods were just deleted from an IP. -// assumption: a Pod won't take up its previously used IP when restarting (see https://stackoverflow.com/questions/52362514/when-will-the-kubernetes-pod-ip-change) -// A Pod may be associated with more than one dirtyIP after Pod restarts. The number some ApplyDataPlane fails several times or ApplyDataPlane is asynchronous. -type dirtyIPCache struct { +type updatePodCache struct { sync.Mutex - // cache maps IP to dirtyIP - cache map[string]*dirtyIP + // cache maps Pod Key to its updateNPMPod + cache map[string]*updateNPMPod } -func newDirtyIPCache() *dirtyIPCache { - return &dirtyIPCache{cache: make(map[string]*dirtyIP)} -} - -// getOrCreateDirtyIP returns the dirtyIP for the given PodMetadata. If it doesn't exist, it creates a new one. -// Caller should lock the dirtyIPCache before calling this function. -func (d *dirtyIPCache) getOrCreateDirtyIP(ip string) *dirtyIP { - dIP, ok := d.cache[ip] - if !ok { - dIP = newDirtyIP(ip) - d.cache[ip] = dIP - } - - return dIP +func newUpdatePodCache() *updatePodCache { + return &updatePodCache{cache: make(map[string]*updateNPMPod)} } type endpointCache struct { @@ -67,10 +51,10 @@ type DataPlane struct { nodeName string // endpointCache stores all endpoints of the network (including off-node) // Key is PodIP - endpointCache *endpointCache - ioShim *common.IOShim - dirtyIPCache *dirtyIPCache - stopChannel <-chan struct{} + endpointCache *endpointCache + ioShim *common.IOShim + updatePodCache *updatePodCache + stopChannel <-chan struct{} } func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChannel <-chan struct{}) (*DataPlane, error) { @@ -80,14 +64,14 @@ func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChann cfg.IPSetManagerCfg.AddEmptySetToLists = true } dp := &DataPlane{ - Config: cfg, - policyMgr: policies.NewPolicyManager(ioShim, cfg.PolicyManagerCfg), - ipsetMgr: ipsets.NewIPSetManager(cfg.IPSetManagerCfg, ioShim), - endpointCache: newEndpointCache(), - nodeName: nodeName, - ioShim: ioShim, - dirtyIPCache: newDirtyIPCache(), - stopChannel: stopChannel, + Config: cfg, + policyMgr: policies.NewPolicyManager(ioShim, cfg.PolicyManagerCfg), + ipsetMgr: ipsets.NewIPSetManager(cfg.IPSetManagerCfg, ioShim), + endpointCache: newEndpointCache(), + nodeName: nodeName, + ioShim: ioShim, + updatePodCache: newUpdatePodCache(), + stopChannel: stopChannel, } err := dp.BootupDataplane() @@ -159,12 +143,16 @@ func (dp *DataPlane) AddToSets(setNames []*ipsets.IPSetMetadata, podMetadata *Po return fmt.Errorf("error: unexpectedly found deleted pod in AddToSets. podMetadata: %+v", podMetadata) } - // lock dirtyIPCache while reading/modifying - dp.dirtyIPCache.Lock() - defer dp.dirtyIPCache.Unlock() + // lock updatePodCache while reading/modifying + dp.updatePodCache.Lock() + defer dp.updatePodCache.Unlock() + + updatePod, ok := dp.updatePodCache.cache[podMetadata.PodKey] + if !ok { + updatePod = newUpdateNPMPod(podMetadata) + dp.updatePodCache.cache[podMetadata.PodKey] = updatePod + } - dIP := dp.dirtyIPCache.getOrCreateDirtyIP(podMetadata.PodIP) - updatePod := dIP.getOrCreateUpdatePod(podMetadata) updatePod.updateIPSetsToAdd(setNames) } @@ -182,20 +170,23 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.wasDeleted()) { klog.Infof("[DataPlane] processing RemoveFromSets updatePod. podMetadata: %+v", podMetadata) - // lock dirtyIPCache while reading/modifying - dp.dirtyIPCache.Lock() - defer dp.dirtyIPCache.Unlock() + // lock updatePodCache while reading/modifying + dp.updatePodCache.Lock() + defer dp.updatePodCache.Unlock() - dIP := dp.dirtyIPCache.getOrCreateDirtyIP(podMetadata.PodIP) - updatePod := dIP.getOrCreateUpdatePod(podMetadata) if podMetadata.wasDeleted() { - updatePod.markDeleted() - // can discard IPSet updates - updatePod.IPSetsToAdd = nil - updatePod.IPSetsToRemove = nil - } else { - updatePod.updateIPSetsToRemove(setNames) + klog.Infof("[DataPlane] removing deleted pod from updatePodCache. podMetadata: %+v", podMetadata) + delete(dp.updatePodCache.cache, podMetadata.PodKey) + return nil } + + updatePod, ok := dp.updatePodCache.cache[podMetadata.PodKey] + if !ok { + updatePod = newUpdateNPMPod(podMetadata) + dp.updatePodCache.cache[podMetadata.PodKey] = updatePod + } + + updatePod.updateIPSetsToRemove(setNames) } return nil @@ -241,20 +232,20 @@ func (dp *DataPlane) ApplyDataPlane() error { return nil } - // lock dirtyIPCache while driving goal state to kernel + // lock updatePodCache while driving goal state to kernel // prevents another ApplyDataplane call from updating the same pods - dp.dirtyIPCache.Lock() - defer dp.dirtyIPCache.Unlock() + dp.updatePodCache.Lock() + defer dp.updatePodCache.Unlock() - for _, dIP := range dp.dirtyIPCache.cache { - err := dp.updatePod(dIP) + for podKey, pod := range dp.updatePodCache.cache { + err := dp.updatePod(pod) if err != nil { // move on to the next and later return as success since this can be retried irrespective of other operations - metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update dirty IP while applying the dataplane. dirty IP object: %+v, err: [%s]", dIP, err.Error()) + metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update pod while applying the dataplane. podKey: [%s], err: [%s]", podKey, err.Error()) continue } - delete(dp.dirtyIPCache.cache, dIP.ip) + delete(dp.updatePodCache.cache, podKey) } } return nil diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 48c551c7c1..b1ebe53682 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -21,10 +21,7 @@ const ( refreshLocalEndpoints bool = false ) -var ( - errPolicyModeUnsupported = errors.New("only IPSet policy mode is supported") - errMismanagedPodKey = errors.New("the endpoint corresponds to a different pod") -) +var errPolicyModeUnsupported = errors.New("only IPSet policy mode is supported") // initializeDataPlane will help gather network and endpoint details func (dp *DataPlane) initializeDataPlane() error { @@ -113,66 +110,18 @@ func (dp *DataPlane) shouldUpdatePod() bool { // updatePod has two responsibilities in windows // 1. Will call into dataplane and updates endpoint references of this pod. // 2. Will check for existing applicable network policies and applies it on endpoint. -// Assumptions: -// - We will receive a cleanup event for a Pod in the correct order (i.e. after a create event for the same Pod) -func (dp *DataPlane) updatePod(dIP *dirtyIP) error { - klog.Infof("[DataPlane] updatePod called for dirty IP object: %+v", dIP) +// Assumption: a Pod won't take up its previously used IP when restarting (see https://stackoverflow.com/questions/52362514/when-will-the-kubernetes-pod-ip-change) +func (dp *DataPlane) updatePod(pod *updateNPMPod) error { + klog.Infof("[DataPlane] updatePod called. podKey: %s", pod.PodKey) + if len(pod.IPSetsToAdd) == 0 && len(pod.IPSetsToRemove) == 0 { + // nothing to do + return nil + } // lock the endpoint cache while we read/modify the endpoint with the pod's IP dp.endpointCache.Lock() defer dp.endpointCache.Unlock() - // reset ACLs for any deleted Pod that was wrongly assigned to an endpoint (fixes #1729) - for podKey, pod := range dIP.pods { - if !pod.wasDeleted() { - continue - } - - endpoint, ok := dp.endpointCache.cache[dIP.ip] - if !ok { - // this branch will be taken often for the typical flow. 1) Endpoint deleted, 2) Pod delete event - // additionally, this branch will be always be taken for an IP on a different node - klog.Infof("[DataPlane] ignoring pod deletion on IP since there is no corresponding endpoint. IP: %s. podKey: %s", dIP.ip, podKey) - delete(dIP.pods, podKey) - continue - } - - if endpoint.podKey != podKey { - klog.Infof("[DataPlane] ignoring pod deletion on IP since the pod is not associated with the current endpoint. IP: %s. podKey: %s. endpoint: %+v", dIP.ip, podKey, endpoint) - delete(dIP.pods, podKey) - continue - } - - // this Pod was incorrectly assigned to the Endpoint (see issue #1729) - if err := dp.policyMgr.ResetEndpoint(endpoint.id); err != nil { - return fmt.Errorf("failed to reset endpoint for deleted pod. IP: %s. podKey: %s. endpoint: %+v. err: %w", dIP.ip, podKey, endpoint, err) - } - - // mark the Endpoint as unassigned - endpoint.podKey = unspecifiedPodKey - delete(dIP.pods, podKey) - } - - if len(dIP.pods) == 0 { - // no running pod to update - return nil - } - - if len(dIP.pods) > 1 { - return fmt.Errorf("error: updatePod called for multiple Running Pods. dirty IP object: %+v", dIP) - } - - // there is only one pod in the map - var pod *updateNPMPod - for _, pod = range dIP.pods { - break - } - - if len(pod.IPSetsToAdd) == 0 && len(pod.IPSetsToRemove) == 0 { - // nothing more to do - return nil - } - // Check if pod is already present in cache endpoint, ok := dp.endpointCache.cache[pod.PodIP] if !ok { @@ -186,8 +135,22 @@ func (dp *DataPlane) updatePod(dIP *dirtyIP) error { // while refreshing pod endpoints, newly discovered endpoints are given an unspecified pod key klog.Infof("[DataPlane] associating pod with endpoint. podKey: %s. endpoint: %+v", pod.PodKey, endpoint) endpoint.podKey = pod.PodKey + } else if pod.PodKey == endpoint.previousIncorrectPodKey { + klog.Infof("[DataPlane] ignoring pod update since this pod was previously and incorrectly assigned to this endpoint. endpoint: %+v", endpoint) + return nil } else if pod.PodKey != endpoint.podKey { - return fmt.Errorf("pod key mismatch. Expected: %s, Actual: %s. Error: [%w]", pod.PodKey, endpoint.podKey, errMismanagedPodKey) + // solves issue 1729 + klog.Infof("[DataPlane] pod key has changed. will reset endpoint acls and skip looking ipsets to remove. new podKey: %s. previous endpoint: %+v", pod.PodKey, endpoint) + if err := dp.policyMgr.ResetEndpoint(endpoint.id); err != nil { + return fmt.Errorf("failed to reset endpoint for pod with incorrect pod key. new podKey: %s. previous endpoint: %+v. err: %w", pod.PodKey, endpoint, err) + } + + // mark this after successful reset. If before reset, we would not retry on failure + endpoint.previousIncorrectPodKey = endpoint.podKey + endpoint.podKey = pod.PodKey + + // all ACLs were removed, so in case there were ipsets to remove, there's no need to look for policies to delete + pod.IPSetsToRemove = nil } // for every ipset we're removing from the endpoint, remove from the endpoint any policy that requires the set diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index b1dc2170a4..17d696a241 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -34,6 +34,9 @@ func testSerialCases(t *testing.T, tests []*SerialTestCase) { for i, tt := range tests { i := i tt := tt + if tt.Description != "ignore Pod update if added then deleted before ApplyDP()" { + continue + } t.Run(tt.Description, func(t *testing.T) { t.Logf("beginning test #%d. Description: [%s]. Tags: %+v", i, tt.Description, tt.Tags) @@ -60,6 +63,7 @@ func testSerialCases(t *testing.T, tests []*SerialTestCase) { } dptestutils.VerifyHNSCache(t, hns, tt.ExpectedSetPolicies, tt.ExpectedEnpdointACLs) + require.FailNow(t, "done") }) } } diff --git a/npm/pkg/dataplane/types.go b/npm/pkg/dataplane/types.go index 6a1d3030c3..4281dedf4b 100644 --- a/npm/pkg/dataplane/types.go +++ b/npm/pkg/dataplane/types.go @@ -8,6 +8,8 @@ import ( "github.com/Azure/azure-container-networking/npm/util" ) +const NodePlaceholderForDeletedPod = "DELETED_POD_INDICATOR" + type GenericDataplane interface { BootupDataplane() error RunPeriodicTasks() @@ -27,31 +29,6 @@ type GenericDataplane interface { UpdatePolicy(policies *policies.NPMNetworkPolicy) error } -// dirtyIP represents an IP and all of its updateNPMPod events. -// there should be only one updateNPMPod such that updateNPMPod.wasDeleted() == false -type dirtyIP struct { - ip string - // states maps pod key to its updateNPMPod - pods map[string]*updateNPMPod -} - -func newDirtyIP(ip string) *dirtyIP { - return &dirtyIP{ - ip: ip, - pods: make(map[string]*updateNPMPod), - } -} - -func (d *dirtyIP) getOrCreateUpdatePod(podMetadata *PodMetadata) *updateNPMPod { - uPod, ok := d.pods[podMetadata.PodKey] - if !ok { - uPod = newUpdateNPMPod(podMetadata) - d.pods[podMetadata.PodKey] = uPod - } - - return uPod -} - // UpdateNPMPod pod controller will populate and send this datastructure to dataplane // to update the dataplane with the latest pod information // this helps in calculating if any update needs to have policies applied or removed @@ -68,7 +45,6 @@ type PodMetadata struct { PodKey string PodIP string NodeName string - deleted bool } // NewPodMetadata is for Pods that were created or have updated labels/namedports @@ -80,19 +56,8 @@ func NewPodMetadata(podKey, podIP, nodeName string) *PodMetadata { } } -// NewDeletedPodMetadata is for Pods that were deleted (e.g. Pod IP has changed) -func NewDeletedPodMetadata(podKey, podIP string) *PodMetadata { - pm := NewPodMetadata(podKey, podIP, "") - pm.deleted = true - return pm -} - -func (pm *PodMetadata) wasDeleted() bool { - return pm.deleted -} - -func (pm *PodMetadata) markDeleted() { - pm.deleted = true +func (p *PodMetadata) wasDeleted() bool { + return p.NodeName == NodePlaceholderForDeletedPod } func (p *PodMetadata) Namespace() string { diff --git a/npm/pkg/dataplane/types_windows.go b/npm/pkg/dataplane/types_windows.go index f4d911993d..c0841da3d7 100644 --- a/npm/pkg/dataplane/types_windows.go +++ b/npm/pkg/dataplane/types_windows.go @@ -10,6 +10,8 @@ type npmEndpoint struct { id string ip string podKey string + // previousIncorrectPodKey represents a Pod that was previously and incorrectly assigned to this endpoint (see issue 1729) + previousIncorrectPodKey string // Map with Key as Network Policy name to to emulate set // and value as struct{} for minimal memory consumption netPolReference map[string]struct{} diff --git a/npm/pkg/dataplane/types_windows_test.go b/npm/pkg/dataplane/types_windows_test.go index 6cf1af946d..942406b6b0 100644 --- a/npm/pkg/dataplane/types_windows_test.go +++ b/npm/pkg/dataplane/types_windows_test.go @@ -260,7 +260,7 @@ func DeletePod(namespace, name, ip string, labels map[string]string) *Action { podKey := fmt.Sprintf("%s/%s", namespace, name) return &Action{ DPAction: &PodDeleteAction{ - Pod: NewDeletedPodMetadata(podKey, ip), + Pod: NewPodMetadata(podKey, ip, NodePlaceholderForDeletedPod), Labels: labels, }, } From 23848472ee6c366e443862c8b58baa41053e1ac1 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 2 Mar 2023 17:41:58 -0800 Subject: [PATCH 11/15] remove pod delete logic --- .../controllers/v2/podController.go | 8 ++--- .../controllers/v2/podController_test.go | 35 ++++++++----------- .../dataplane-test-cases_windows_test.go | 1 - npm/pkg/dataplane/dataplane.go | 14 ++------ npm/pkg/dataplane/types.go | 6 ---- npm/pkg/dataplane/types_windows_test.go | 3 +- 6 files changed, 22 insertions(+), 45 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index 2eeb1b8318..34764a8475 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -513,10 +513,8 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper newPodPorts := common.GetContainerPortList(newPodObj) if !reflect.DeepEqual(cachedNpmPod.ContainerPorts, newPodPorts) { // Delete cached pod's named ports from its ipset. - // Node name is only used in Windows. Keep consistency with above RemoveFromSets() calls by using the Pod's node name. - // manageNamedPortIpsets() also does nothing for Windows currently. if err = c.manageNamedPortIpsets( - cachedNpmPod.ContainerPorts, podKey, cachedNpmPod.PodIP, newPodMetadata.NodeName, deleteNamedPort); err != nil { + cachedNpmPod.ContainerPorts, podKey, cachedNpmPod.PodIP, "", deleteNamedPort); err != nil { return metrics.UpdateOp, fmt.Errorf("[syncAddAndUpdatePod] Error: failed to delete pod from named port ipset with err: %w", err) } // Since portList ipset deletion is successful, NPM can remove cachedContainerPorts @@ -543,7 +541,7 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { } var err error - cachedPodMetadata := dataplane.NewPodMetadata(cachedNpmPodKey, cachedNpmPod.PodIP, dataplane.NodePlaceholderForDeletedPod) + cachedPodMetadata := dataplane.NewPodMetadata(cachedNpmPodKey, cachedNpmPod.PodIP, "") // Delete the pod from its namespace's ipset. // note: NodeName empty is not going to call update pod if err = c.dp.RemoveFromSets( @@ -569,7 +567,7 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { // Delete pod's named ports from its ipset. Need to pass true in the manageNamedPortIpsets function call if err = c.manageNamedPortIpsets( - cachedNpmPod.ContainerPorts, cachedNpmPodKey, cachedNpmPod.PodIP, dataplane.NodePlaceholderForDeletedPod, deleteNamedPort); err != nil { + cachedNpmPod.ContainerPorts, cachedNpmPodKey, cachedNpmPod.PodIP, "", deleteNamedPort); err != nil { return fmt.Errorf("[cleanUpDeletedPod] Error: failed to delete pod from named port ipset with err: %w", err) } diff --git a/npm/pkg/controlplane/controllers/v2/podController_test.go b/npm/pkg/controlplane/controllers/v2/podController_test.go index 41a901a647..d1ae9c5c02 100644 --- a/npm/pkg/controlplane/controllers/v2/podController_test.go +++ b/npm/pkg/controlplane/controllers/v2/podController_test.go @@ -429,14 +429,13 @@ func TestDeletePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) - dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), ). Return(nil).Times(1) } @@ -546,14 +545,13 @@ func TestDeletePodWithTombstoneAfterAddingPod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) - dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), ). Return(nil).Times(1) } @@ -666,14 +664,13 @@ func TestEmptyIPUpdate(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) - dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), ). Return(nil).Times(1) } @@ -760,14 +757,13 @@ func TestIPAddressUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) - dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), ). Return(nil).Times(1) } @@ -835,14 +831,13 @@ func TestPodStatusUpdatePod(t *testing.T) { } dp.EXPECT().ApplyDataPlane().Return(nil).Times(2) // Delete pod section - deleteMetadata := dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4", dataplane.NodePlaceholderForDeletedPod) - dp.EXPECT().RemoveFromSets(mockIPSets[:1], deleteMetadata).Return(nil).Times(1) - dp.EXPECT().RemoveFromSets(mockIPSets[1:], deleteMetadata).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[:1], podMetadata1).Return(nil).Times(1) + dp.EXPECT().RemoveFromSets(mockIPSets[1:], podMetadata1).Return(nil).Times(1) if !util.IsWindowsDP() { dp.EXPECT(). RemoveFromSets( []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata("app:test-pod", ipsets.NamedPorts)}, - dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", dataplane.NodePlaceholderForDeletedPod), + dataplane.NewPodMetadata("test-namespace/test-pod", "1.2.3.4,8080", ""), ). Return(nil).Times(1) } diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index d63dbc6392..eb924419b3 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -1459,7 +1459,6 @@ func updatePodTests() []*SerialTestCase { otherTests := []*SerialTestCase{ { - // log: "[DataPlane] removing deleted pod from updatePodCache. podMetadata: &{PodKey:x/a PodIP:10.0.0.1 NodeName:DELETED_POD_INDICATOR}" Description: "ignore Pod update if added then deleted before ApplyDP()", Actions: []*Action{ UpdatePolicy(policyXBaseOnK1V1()), diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index f4acd69ad0..eb5dd89930 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -136,13 +136,9 @@ func (dp *DataPlane) AddToSets(setNames []*ipsets.IPSetMetadata, podMetadata *Po return fmt.Errorf("[DataPlane] error while adding to set: %w", err) } - if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.wasDeleted()) { + if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { klog.Infof("[DataPlane] processing AddToSets updatePod. podMetadata: %+v", podMetadata) - if podMetadata.wasDeleted() { - return fmt.Errorf("error: unexpectedly found deleted pod in AddToSets. podMetadata: %+v", podMetadata) - } - // lock updatePodCache while reading/modifying dp.updatePodCache.Lock() defer dp.updatePodCache.Unlock() @@ -167,19 +163,13 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat return fmt.Errorf("[DataPlane] error while removing from set: %w", err) } - if dp.shouldUpdatePod() && (podMetadata.NodeName == dp.nodeName || podMetadata.wasDeleted()) { + if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { klog.Infof("[DataPlane] processing RemoveFromSets updatePod. podMetadata: %+v", podMetadata) // lock updatePodCache while reading/modifying dp.updatePodCache.Lock() defer dp.updatePodCache.Unlock() - if podMetadata.wasDeleted() { - klog.Infof("[DataPlane] removing deleted pod from updatePodCache. podMetadata: %+v", podMetadata) - delete(dp.updatePodCache.cache, podMetadata.PodKey) - return nil - } - updatePod, ok := dp.updatePodCache.cache[podMetadata.PodKey] if !ok { updatePod = newUpdateNPMPod(podMetadata) diff --git a/npm/pkg/dataplane/types.go b/npm/pkg/dataplane/types.go index 4281dedf4b..e284f414ff 100644 --- a/npm/pkg/dataplane/types.go +++ b/npm/pkg/dataplane/types.go @@ -8,8 +8,6 @@ import ( "github.com/Azure/azure-container-networking/npm/util" ) -const NodePlaceholderForDeletedPod = "DELETED_POD_INDICATOR" - type GenericDataplane interface { BootupDataplane() error RunPeriodicTasks() @@ -56,10 +54,6 @@ func NewPodMetadata(podKey, podIP, nodeName string) *PodMetadata { } } -func (p *PodMetadata) wasDeleted() bool { - return p.NodeName == NodePlaceholderForDeletedPod -} - func (p *PodMetadata) Namespace() string { return strings.Split(p.PodKey, "/")[0] } diff --git a/npm/pkg/dataplane/types_windows_test.go b/npm/pkg/dataplane/types_windows_test.go index 942406b6b0..996a18319f 100644 --- a/npm/pkg/dataplane/types_windows_test.go +++ b/npm/pkg/dataplane/types_windows_test.go @@ -260,7 +260,8 @@ func DeletePod(namespace, name, ip string, labels map[string]string) *Action { podKey := fmt.Sprintf("%s/%s", namespace, name) return &Action{ DPAction: &PodDeleteAction{ - Pod: NewPodMetadata(podKey, ip, NodePlaceholderForDeletedPod), + // currently, the PodController doesn't share the node name + Pod: NewPodMetadata(podKey, ip, ""), Labels: labels, }, } From a8421ba7afd30eea66e39207cfae09ae764a7fb9 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 2 Mar 2023 17:45:56 -0800 Subject: [PATCH 12/15] remove change incorrect comment --- npm/pkg/dataplane/types.go | 1 - 1 file changed, 1 deletion(-) diff --git a/npm/pkg/dataplane/types.go b/npm/pkg/dataplane/types.go index e284f414ff..d8fd26afc3 100644 --- a/npm/pkg/dataplane/types.go +++ b/npm/pkg/dataplane/types.go @@ -45,7 +45,6 @@ type PodMetadata struct { NodeName string } -// NewPodMetadata is for Pods that were created or have updated labels/namedports func NewPodMetadata(podKey, podIP, nodeName string) *PodMetadata { return &PodMetadata{ PodKey: podKey, From d20fb6c8d23ce58926cc1767c7bf8fc921f5be5b Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 3 Mar 2023 09:57:58 -0800 Subject: [PATCH 13/15] fix lint --- npm/pkg/dataplane/dataplane-test-cases_windows_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index eb924419b3..505c5b0306 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -1632,8 +1632,11 @@ func updatePodTests() []*SerialTestCase { }, } - allTests := append(sequence1Tests, sequence2Tests...) + allTests := sequence1Tests + allTests = append(allTests, sequence2Tests...) // allTests = append(allTests, podAssignmentSequence3Tests()...) + // make golint happy + _ = podAssignmentSequence3Tests() allTests = append(allTests, otherTests...) return allTests } From 3927fbf83252824b382e5b861da3b31adf12378a Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 3 Mar 2023 10:42:41 -0800 Subject: [PATCH 14/15] fix gofumpt --- npm/pkg/dataplane/dataplane-test-cases_windows_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 505c5b0306..25be2b8d51 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -1957,7 +1957,6 @@ func podAssignmentSequence3Tests() []*SerialTestCase { }, }, } - } func getAllMultiJobTests() []*MultiJobTestCase { From 589c8d1c71dc7032536b5f83ff19d1cfc40669a2 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 3 Mar 2023 11:24:26 -0800 Subject: [PATCH 15/15] remove changes to dp.go logs/comments --- npm/pkg/dataplane/dataplane.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index eb5dd89930..995b6a11ce 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -26,7 +26,6 @@ type Config struct { type updatePodCache struct { sync.Mutex - // cache maps Pod Key to its updateNPMPod cache map[string]*updateNPMPod } @@ -137,14 +136,15 @@ func (dp *DataPlane) AddToSets(setNames []*ipsets.IPSetMetadata, podMetadata *Po } if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { - klog.Infof("[DataPlane] processing AddToSets updatePod. podMetadata: %+v", podMetadata) + klog.Infof("[DataPlane] Updating Sets to Add for pod key %s", podMetadata.PodKey) - // lock updatePodCache while reading/modifying + // lock updatePodCache while reading/modifying or setting the updatePod in the cache dp.updatePodCache.Lock() defer dp.updatePodCache.Unlock() updatePod, ok := dp.updatePodCache.cache[podMetadata.PodKey] if !ok { + klog.Infof("[DataPlane] {AddToSet} pod key %s not found in updatePodCache. creating a new obj", podMetadata.PodKey) updatePod = newUpdateNPMPod(podMetadata) dp.updatePodCache.cache[podMetadata.PodKey] = updatePod } @@ -164,14 +164,15 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat } if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { - klog.Infof("[DataPlane] processing RemoveFromSets updatePod. podMetadata: %+v", podMetadata) + klog.Infof("[DataPlane] Updating Sets to Remove for pod key %s", podMetadata.PodKey) - // lock updatePodCache while reading/modifying + // lock updatePodCache while reading/modifying or setting the updatePod in the cache dp.updatePodCache.Lock() defer dp.updatePodCache.Unlock() updatePod, ok := dp.updatePodCache.cache[podMetadata.PodKey] if !ok { + klog.Infof("[DataPlane] {RemoveFromSet} pod key %s not found in updatePodCache. creating a new obj", podMetadata.PodKey) updatePod = newUpdateNPMPod(podMetadata) dp.updatePodCache.cache[podMetadata.PodKey] = updatePod } @@ -231,7 +232,7 @@ func (dp *DataPlane) ApplyDataPlane() error { err := dp.updatePod(pod) if err != nil { // move on to the next and later return as success since this can be retried irrespective of other operations - metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update pod while applying the dataplane. podKey: [%s], err: [%s]", podKey, err.Error()) + metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update pod while applying the dataplane. key: [%s], err: [%s]", podKey, err.Error()) continue }