diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index f7d92119fa..34764a8475 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -455,8 +455,7 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper addToIPSets, deleteFromIPSets := util.GetIPSetListCompareLabels(cachedNpmPod.Labels, newPodObj.Labels) newPodMetadata := dataplane.NewPodMetadata(podKey, newPodObj.Status.PodIP, newPodObj.Spec.NodeName) - // todo: verify pulling nodename from newpod, - // if a pod is getting deleted, we do not have to cleanup policies, so it is okay to pass in wrong nodename + // should have newPodMetadata == cachedPodMetadata since from branch above, we have cachedNpmPod.PodIP == newPodObj.Status.PodIP cachedPodMetadata := dataplane.NewPodMetadata(podKey, cachedNpmPod.PodIP, newPodMetadata.NodeName) // Delete the pod from its label's ipset. for _, removeIPSetName := range deleteFromIPSets { diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 87dd6342c3..25be2b8d51 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -36,6 +36,8 @@ var ( podK1V1Set = ipsets.NewIPSetMetadata("k1:v1", ipsets.KeyValueLabelOfPod) podK2Set = ipsets.NewIPSetMetadata("k2", ipsets.KeyLabelOfPod) podK2V2Set = ipsets.NewIPSetMetadata("k2:v2", ipsets.KeyValueLabelOfPod) + podK3Set = ipsets.NewIPSetMetadata("k3", ipsets.KeyLabelOfPod) + podK3V3Set = ipsets.NewIPSetMetadata("k3:v3", ipsets.KeyValueLabelOfPod) // emptySet is a member of a list if enabled in the dp Config // in Windows, this Config option is actually forced to be enabled in NewDataPlane() @@ -101,6 +103,58 @@ func policyXBaseOnK1V1() *networkingv1.NetworkPolicy { } } +func policyXBase2OnK2V2() *networkingv1.NetworkPolicy { + return &networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "base2", + Namespace: "x", + }, + Spec: networkingv1.NetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k2": "v2", + }, + }, + Ingress: []networkingv1.NetworkPolicyIngressRule{ + {}, + }, + Egress: []networkingv1.NetworkPolicyEgressRule{ + {}, + }, + PolicyTypes: []networkingv1.PolicyType{ + networkingv1.PolicyTypeIngress, + networkingv1.PolicyTypeEgress, + }, + }, + } +} + +func policyXBase3OnK3V3() *networkingv1.NetworkPolicy { + return &networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "base3", + Namespace: "x", + }, + Spec: networkingv1.NetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k3": "v3", + }, + }, + Ingress: []networkingv1.NetworkPolicyIngressRule{ + {}, + }, + Egress: []networkingv1.NetworkPolicyEgressRule{ + {}, + }, + PolicyTypes: []networkingv1.PolicyType{ + networkingv1.PolicyTypeIngress, + networkingv1.PolicyTypeEgress, + }, + }, + } +} + func basicTests() []*SerialTestCase { return []*SerialTestCase{ { @@ -823,6 +877,1088 @@ func capzCalicoTests() []*SerialTestCase { } } +// see issue #1729 for context on sequences 1, 2, 3 +func updatePodTests() []*SerialTestCase { + sequence1Tests := []*SerialTestCase{ + { + Description: "Sequence 1: Pod A create --> Policy create --> Pod A cleanup --> Pod B create", + Actions: []*Action{ + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 1: Policy create --> Pod A create --> Pod A cleanup --> Pod B create", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 1: Policy create --> Pod A create --> Pod A cleanup --> Pod B create (skip first apply DP)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 1: Policy create --> Pod A create --> Pod A cleanup --> Pod B create (skip first two apply DP)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + } + + sequence2Tests := []*SerialTestCase{ + { + Description: "Sequence 2 with Calico network", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: windowsCalicoDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // IP temporarily associated with IPSets of both pod A and pod B + // Pod A sets + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + // Pod B sets + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-baseazurewireserver", + Action: "Block", + Direction: "Out", + Priority: 200, + RemoteAddresses: "168.63.129.16/32", + RemotePorts: "80", + Protocols: "6", + }, + { + ID: "azure-acl-baseallowinswitch", + Action: "Allow", + Direction: "In", + Priority: 65499, + }, + { + ID: "azure-acl-baseallowoutswitch", + Action: "Allow", + Direction: "Out", + Priority: 65499, + }, + { + ID: "azure-acl-baseallowinhost", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", + }, + { + ID: "azure-acl-baseallowouthost", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", + }, + }, + }, + }, + }, + { + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // IP temporarily associated with IPSets of both pod A and pod B + // Pod A sets + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + // Pod B sets + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create --> Pod A cleanup", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create (skip first ApplyDP())", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // IP temporarily associated with IPSets of both pod A and pod B + // Pod A sets + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + // Pod B sets + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 2: Policy create --> Pod A Create --> Pod B create --> Pod A cleanup (skip first two ApplyDP())", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + // old labels (not yet garbage collected) + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + // new labels + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + } + + otherTests := []*SerialTestCase{ + { + Description: "ignore Pod update if added then deleted before ApplyDP()", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS + Description: "ignore Pod delete for deleted endpoint", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + DeleteEndpoint(endpoint1), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, + }, + }, + { + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make a reset ACL SysCall into HNS + Description: "ignore Pod delete for deleted endpoint (skip first ApplyDP())", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeleteEndpoint(endpoint1), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet), + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, + }, + }, + { + // doesn't really enforce behavior in DP, but one could look at logs to make sure we don't make an add ACL SysCall into HNS" + Description: "ignore Pod update when there's no corresponding endpoint", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + DeleteEndpoint(endpoint1), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{}, + }, + }, + { + Description: "two endpoints, one with policy, one without", + Actions: []*Action{ + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + CreateEndpoint(endpoint2, ip2), + CreatePod("x", "b", ip2, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1, ip2), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip2), + dptestutils.SetPolicy(podK2V2Set, ip2), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + endpoint2: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + } + + allTests := sequence1Tests + allTests = append(allTests, sequence2Tests...) + // allTests = append(allTests, podAssignmentSequence3Tests()...) + // make golint happy + _ = podAssignmentSequence3Tests() + allTests = append(allTests, otherTests...) + return allTests +} + +// sequence 3 of issue 1729 +// seems like this sequence is impossible +// if it ever occurred, would need modifications in updatePod() and ipsetmanager +func podAssignmentSequence3Tests() []*SerialTestCase { + return []*SerialTestCase{ + { + Description: "Sequence 3: Pod B Create --> Pod A create --> Pod A Cleanup (ensure correct IPSets)", + Actions: []*Action{ + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + // not yet garbage-collected + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create (skip first ApplyDP())", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod A Cleanup", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + // UpdatePod() will fail for both x/a and x/b + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + // not yet garbage-collected + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: {}, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update (unable to add second policy to endpoint until A cleanup)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + UpdatePolicy(policyXBase3OnK3V3()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + dptestutils.SetPolicy(podK3Set, ip1), + dptestutils.SetPolicy(podK3V3Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Sequence 3: Policy create --> Pod B Create --> Pod A create --> Pod B Update --> Pod A cleanup (able to add second policy)", + Actions: []*Action{ + UpdatePolicy(policyXBaseOnK1V1()), + UpdatePolicy(policyXBase2OnK2V2()), + UpdatePolicy(policyXBase3OnK3V3()), + CreatePod("x", "b", ip1, thisNode, map[string]string{"k2": "v2"}), + ApplyDP(), + // UpdatePod() will fail for x/a since x/b is associated with the IP/Endpoint + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePodLabels("x", "b", ip1, thisNode, nil, map[string]string{"k3": "v3"}), + ApplyDP(), + DeletePod("x", "a", ip1, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + podCrudTag, + netpolCrudTag, + }, + DpCfg: defaultWindowsDPCfg, + InitialEndpoints: []*hcn.HostComputeEndpoint{ + dptestutils.Endpoint(endpoint1, ip1), + }, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK2Set, ip1), + dptestutils.SetPolicy(podK2V2Set, ip1), + dptestutils.SetPolicy(podK3Set, ip1), + dptestutils.SetPolicy(podK3V3Set, ip1), + // not garbage-collected yet + dptestutils.SetPolicy(podK1Set), + dptestutils.SetPolicy(podK1V1Set), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base2", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base3", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base3", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + } +} + func getAllMultiJobTests() []*MultiJobTestCase { return []*MultiJobTestCase{ { diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 73d89bc1bc..c68cab2902 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -21,10 +21,7 @@ const ( refreshLocalEndpoints bool = false ) -var ( - errPolicyModeUnsupported = errors.New("only IPSet policy mode is supported") - errMismanagedPodKey = errors.New("the endpoint corresponds to a different pod") -) +var errPolicyModeUnsupported = errors.New("only IPSet policy mode is supported") // initializeDataPlane will help gather network and endpoint details func (dp *DataPlane) initializeDataPlane() error { @@ -113,15 +110,11 @@ func (dp *DataPlane) shouldUpdatePod() bool { // updatePod has two responsibilities in windows // 1. Will call into dataplane and updates endpoint references of this pod. // 2. Will check for existing applicable network policies and applies it on endpoint. -/* - FIXME: see https://github.com/Azure/azure-container-networking/issues/1729 - TODO: it would be good to replace stalePodKey behavior since it is complex. -*/ +// Assumption: a Pod won't take up its previously used IP when restarting (see https://stackoverflow.com/questions/52362514/when-will-the-kubernetes-pod-ip-change) func (dp *DataPlane) updatePod(pod *updateNPMPod) error { - klog.Infof("[DataPlane] updatePod called for Pod Key %s", pod.PodKey) - if pod.NodeName != dp.nodeName { - // Ignore updates if the pod is not part of this node. - klog.Infof("[DataPlane] ignoring update pod as expected Node: [%s] got: [%s]. pod: [%s]", dp.nodeName, pod.NodeName, pod.PodKey) + klog.Infof("[DataPlane] updatePod called. podKey: %s", pod.PodKey) + if len(pod.IPSetsToAdd) == 0 && len(pod.IPSetsToRemove) == 0 { + // nothing to do return nil } @@ -134,23 +127,35 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error { if !ok { // ignore this err and pod endpoint will be deleted in ApplyDP // if the endpoint is not found, it means the pod is not part of this node or pod got deleted. - klog.Warningf("[DataPlane] did not find endpoint with IPaddress %s for pod %s", pod.PodIP, pod.PodKey) + klog.Warningf("[DataPlane] ignoring pod update since there is no corresponding endpoint. IP: %s. podKey: %s", pod.PodIP, pod.PodKey) return nil } if endpoint.podKey == unspecifiedPodKey { // while refreshing pod endpoints, newly discovered endpoints are given an unspecified pod key - if endpoint.isStalePodKey(pod.PodKey) { - // NOTE: if a pod restarts and takes up its previous IP, then its endpoint would be new and this branch would be taken. - // Updates to this pod would not occur. Pod IPs are expected to change on restart though. - // See: https://stackoverflow.com/questions/52362514/when-will-the-kubernetes-pod-ip-change - // If a pod does restart and take up its previous IP, then the pod can be deleted/restarted to mitigate this problem. - klog.Infof("[DataPlane] ignoring pod update since pod with key %s is stale and likely was deleted", pod.PodKey) - return nil - } + klog.Infof("[DataPlane] associating pod with endpoint. podKey: %s. endpoint: %+v", pod.PodKey, endpoint) endpoint.podKey = pod.PodKey + } else if pod.PodKey == endpoint.previousIncorrectPodKey { + klog.Infof("[DataPlane] ignoring pod update since this pod was previously and incorrectly assigned to this endpoint. endpoint: %+v", endpoint) + return nil } else if pod.PodKey != endpoint.podKey { - return fmt.Errorf("pod key mismatch. Expected: %s, Actual: %s. Error: [%w]", pod.PodKey, endpoint.podKey, errMismanagedPodKey) + // solves issue 1729 + klog.Infof("[DataPlane] pod key has changed. will reset endpoint acls and skip looking ipsets to remove. new podKey: %s. previous endpoint: %+v", pod.PodKey, endpoint) + if err := dp.policyMgr.ResetEndpoint(endpoint.id); err != nil { + return fmt.Errorf("failed to reset endpoint for pod with incorrect pod key. new podKey: %s. previous endpoint: %+v. err: %w", pod.PodKey, endpoint, err) + } + + // mark this after successful reset. If before reset, we would not retry on failure + endpoint.previousIncorrectPodKey = endpoint.podKey + endpoint.podKey = pod.PodKey + + // all ACLs were removed, so in case there were ipsets to remove, there's no need to look for policies to delete + pod.IPSetsToRemove = nil + + if dp.NetworkName == util.CalicoNetworkName { + klog.Infof("adding back base ACLs for calico CNI endpoint after resetting ACLs. endpoint: %+v", endpoint) + dp.policyMgr.AddBaseACLsForCalicoCNI(endpoint.id) + } } // for every ipset we're removing from the endpoint, remove from the endpoint any policy that requires the set @@ -286,15 +291,13 @@ func (dp *DataPlane) getEndpointsToApplyPolicy(policy *policies.NPMNetworkPolicy for ip, podKey := range netpolSelectorIPs { endpoint, ok := dp.endpointCache.cache[ip] if !ok { - klog.Infof("[DataPlane] Ignoring endpoint with IP %s since it was not found in the endpoint cache. This IP might not be in the HNS network", ip) + klog.Infof("[DataPlane] ignoring selector IP since it was not found in the endpoint cache and might not be in the HNS network. ip: %s. podKey: %s", ip, podKey) continue } if endpoint.podKey != podKey { // in case the pod controller hasn't updated the dp yet that the IP's pod owner has changed - klog.Infof( - "[DataPlane] ignoring endpoint with IP %s since the pod keys are different. podKey: [%s], endpoint: [%+v], endpoint stale pod key: [%+v]", - ip, podKey, endpoint, endpoint.stalePodKey) + klog.Infof("[DataPlane] ignoring selector IP since the endpoint is assigned to a different podKey. ip: %s. podKey: %s. endpoint: %+v", ip, podKey, endpoint) continue } @@ -349,7 +352,6 @@ func (dp *DataPlane) refreshPodEndpoints() error { dp.endpointCache.Lock() defer dp.endpointCache.Unlock() - currentTime := time.Now().Unix() existingIPs := make(map[string]struct{}) for _, endpoint := range endpoints { if len(endpoint.IpConfigurations) == 0 { @@ -384,18 +386,8 @@ func (dp *DataPlane) refreshPodEndpoints() error { // throw away old endpoints that have the same IP as a current endpoint (the old endpoint is getting deleted) // we don't have to worry about cleaning up network policies on endpoints that are getting deleted npmEP := newNPMEndpoint(endpoint) - if oldNPMEP.podKey == unspecifiedPodKey { - klog.Infof("updating endpoint cache since endpoint changed for IP which never had a pod key. new endpoint: %s, old endpoint: %s, ip: %s", npmEP.id, oldNPMEP.id, npmEP.ip) - dp.endpointCache.cache[ip] = npmEP - } else { - npmEP.stalePodKey = &staleKey{ - key: oldNPMEP.podKey, - timestamp: currentTime, - } - dp.endpointCache.cache[ip] = npmEP - // NOTE: TSGs rely on this log line - klog.Infof("updating endpoint cache for previously cached IP %s: %+v with stalePodKey %+v", npmEP.ip, npmEP, npmEP.stalePodKey) - } + klog.Infof("[DataPlane] updating endpoint cache for IP with a new endpoint. old endpoint: %+v. new endpoint: %+v", oldNPMEP, npmEP) + dp.endpointCache.cache[ip] = npmEP if dp.NetworkName == util.CalicoNetworkName { // NOTE 1: connectivity may be broken for an endpoint until this method is called @@ -410,22 +402,8 @@ func (dp *DataPlane) refreshPodEndpoints() error { // garbage collection for the endpoint cache for ip, ep := range dp.endpointCache.cache { if _, ok := existingIPs[ip]; !ok { - if ep.podKey == unspecifiedPodKey { - if ep.stalePodKey == nil { - klog.Infof("deleting old endpoint which never had a pod key. ID: %s, IP: %s", ep.id, ip) - delete(dp.endpointCache.cache, ip) - } else if int(currentTime-ep.stalePodKey.timestamp)/60 > minutesToKeepStalePodKey { - klog.Infof("deleting old endpoint which had a stale pod key. ID: %s, IP: %s, stalePodKey: %+v", ep.id, ip, ep.stalePodKey) - delete(dp.endpointCache.cache, ip) - } - } else { - ep.stalePodKey = &staleKey{ - key: ep.podKey, - timestamp: currentTime, - } - ep.podKey = unspecifiedPodKey - klog.Infof("marking endpoint stale for at least %d minutes. ID: %s, IP: %s, new stalePodKey: %+v", minutesToKeepStalePodKey, ep.id, ip, ep.stalePodKey) - } + klog.Infof("[DataPlane] deleting endpoint from cache. endpoint: %+v", ep) + delete(dp.endpointCache.cache, ip) } } diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index da901ac9ec..a82708b3ad 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -23,6 +23,10 @@ func TestBasics(t *testing.T) { testSerialCases(t, basicTests()) } +func TestPodEndpointAssignment(t *testing.T) { + testSerialCases(t, updatePodTests()) +} + func TestCapzCalico(t *testing.T) { testSerialCases(t, capzCalicoTests()) } @@ -32,11 +36,9 @@ func TestAllMultiJobCases(t *testing.T) { } func testSerialCases(t *testing.T, tests []*SerialTestCase) { - fmt.Printf("tests: %+v\n", tests) for i, tt := range tests { i := i tt := tt - klog.Infof("tt: %+v", tt) t.Run(tt.Description, func(t *testing.T) { klog.Infof("tt in: %+v", tt) t.Logf("beginning test #%d. Description: [%s]. Tags: %+v", i, tt.Description, tt.Tags) diff --git a/npm/pkg/dataplane/policies/policymanager.go b/npm/pkg/dataplane/policies/policymanager.go index 775bfd44ac..152a76c9c0 100644 --- a/npm/pkg/dataplane/policies/policymanager.go +++ b/npm/pkg/dataplane/policies/policymanager.go @@ -73,6 +73,13 @@ func NewPolicyManager(ioShim *common.IOShim, cfg *PolicyManagerCfg) *PolicyManag } } +func (pMgr *PolicyManager) ResetEndpoint(epID string) error { + if util.IsWindowsDP() { + return pMgr.bootup([]string{epID}) + } + return nil +} + func (pMgr *PolicyManager) Bootup(epIDs []string) error { metrics.ResetNumACLRules() if err := pMgr.bootup(epIDs); err != nil { diff --git a/npm/pkg/dataplane/types_windows.go b/npm/pkg/dataplane/types_windows.go index 9dff506afe..c0841da3d7 100644 --- a/npm/pkg/dataplane/types_windows.go +++ b/npm/pkg/dataplane/types_windows.go @@ -2,10 +2,7 @@ package dataplane import "github.com/Microsoft/hcsshim/hcn" -const ( - unspecifiedPodKey = "" - minutesToKeepStalePodKey = 10 -) +const unspecifiedPodKey = "" // npmEndpoint holds info relevant for endpoints in windows type npmEndpoint struct { @@ -13,19 +10,13 @@ type npmEndpoint struct { id string ip string podKey string - // stalePodKey is used to keep track of the previous pod that had this IP - stalePodKey *staleKey + // previousIncorrectPodKey represents a Pod that was previously and incorrectly assigned to this endpoint (see issue 1729) + previousIncorrectPodKey string // Map with Key as Network Policy name to to emulate set // and value as struct{} for minimal memory consumption netPolReference map[string]struct{} } -type staleKey struct { - key string - // timestamp represents the Unix time this struct was created - timestamp int64 -} - // newNPMEndpoint initializes npmEndpoint and copies relevant information from hcn.HostComputeEndpoint. // This function must be defined in a file with a windows build tag for proper vendoring since it uses the hcn pkg func newNPMEndpoint(endpoint *hcn.HostComputeEndpoint) *npmEndpoint { @@ -37,7 +28,3 @@ func newNPMEndpoint(endpoint *hcn.HostComputeEndpoint) *npmEndpoint { ip: endpoint.IpConfigurations[0].IpAddress, } } - -func (ep *npmEndpoint) isStalePodKey(podKey string) bool { - return ep.stalePodKey != nil && ep.stalePodKey.key == podKey -}