From 82c44ff45bf8de17648898a302595301f674902d Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 22 Jun 2022 14:42:39 -0700 Subject: [PATCH 1/2] validate sctp for windows --- .../controllers/v2/namespaceController.go | 19 ++++++++++--------- .../controllers/v2/networkPolicyController.go | 12 +++++++++--- .../controllers/v2/podController.go | 18 ++++++++++++++---- .../translation/translatePolicy.go | 13 +++++++++++-- .../translation/translatePolicy_test.go | 4 ++-- npm/pkg/dataplane/dataplane_windows.go | 3 ++- npm/pkg/dataplane/policies/policy.go | 5 ++++- npm/pkg/dataplane/policies/policymanager.go | 1 - 8 files changed, 52 insertions(+), 23 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/namespaceController.go b/npm/pkg/controlplane/controllers/v2/namespaceController.go index 98520c037b..d36d3a0b1b 100644 --- a/npm/pkg/controlplane/controllers/v2/namespaceController.go +++ b/npm/pkg/controlplane/controllers/v2/namespaceController.go @@ -26,13 +26,6 @@ import ( "k8s.io/klog" ) -type LabelAppendOperation bool - -const ( - clearExistingLabels LabelAppendOperation = true - appendToExistingLabels LabelAppendOperation = false -) - var errWorkqueueFormatting = errors.New("error in formatting") // NpmNamespaceCache to store namespace struct in nameSpaceController.go. @@ -238,6 +231,10 @@ func (nsc *NamespaceController) syncNamespace(nsKey string) error { // apply dataplane and record exec time after syncing operationKind := metrics.NoOp defer func() { + if err != nil { + klog.Infof("[syncNamespace] failed to sync namespace, but will apply any changes to the dataplane. err: %s", err.Error()) + } + dperr := nsc.dp.ApplyDataPlane() // NOTE: it may seem like Prometheus is considering some ns create events as updates. @@ -248,7 +245,11 @@ func (nsc *NamespaceController) syncNamespace(nsKey string) error { metrics.RecordControllerNamespaceExecTime(timer, operationKind, err != nil && dperr != nil) if dperr != nil { - err = fmt.Errorf("failed with error %w, apply failed with %v", err, dperr) + if err == nil { + err = fmt.Errorf("failed to apply dataplane changes while syncing namespace. err: %w", dperr) + } else { + err = fmt.Errorf("failed to sync namespace and apply dataplane changes. sync err: [%w], apply err: [%v]", err, dperr) + } } }() @@ -270,7 +271,7 @@ func (nsc *NamespaceController) syncNamespace(nsKey string) error { if err != nil { // need to retry this cleaning-up process metrics.SendErrorLogAndMetric(util.NSID, "Error: %v when namespace is not found", err) - return fmt.Errorf("Error: %w when namespace is not found", err) + return fmt.Errorf("error: %w when namespace is not found", err) } } return err diff --git a/npm/pkg/controlplane/controllers/v2/networkPolicyController.go b/npm/pkg/controlplane/controllers/v2/networkPolicyController.go index 17bab4d553..1c5fbfef09 100644 --- a/npm/pkg/controlplane/controllers/v2/networkPolicyController.go +++ b/npm/pkg/controlplane/controllers/v2/networkPolicyController.go @@ -285,10 +285,10 @@ func (c *NetworkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 // install translated rules into kernel npmNetPolObj, err := translation.TranslatePolicy(netPolObj) if err != nil { - if errors.Is(err, translation.ErrUnsupportedNamedPort) || errors.Is(err, translation.ErrUnsupportedNegativeMatch) { + if isUnsupportedWindowsTranslationErr(err) { // We can safely suppress unsupported network policy because re-Queuing will result in same error - klog.Warningf("NetworkPolicy %s in namespace %s is not translated because it has unsupported translated features of Windows.", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace) - // consider a no-op since we the policy is unsupported. The exec time here isn't important either. + klog.Warningf("NetworkPolicy %s in namespace %s is not translated because it has unsupported translated features of Windows: %s", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, err.Error()) + // consider a no-op since the policy is unsupported. The exec time here isn't important either. return metrics.NoOp, nil } klog.Errorf("Failed to translate podSelector in NetworkPolicy %s in namespace %s: %s", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, err.Error()) @@ -342,3 +342,9 @@ func (c *NetworkPolicyController) cleanUpNetworkPolicy(netPolKey string) error { metrics.DecNumPolicies() return nil } + +func isUnsupportedWindowsTranslationErr(err error) bool { + return errors.Is(err, translation.ErrUnsupportedNamedPort) || + errors.Is(err, translation.ErrUnsupportedNegativeMatch) || + errors.Is(err, translation.ErrUnsupportedSCTP) +} diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index e6b58fff45..b9a84e0b7b 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -260,11 +260,21 @@ func (c *PodController) syncPod(key string) error { // apply dataplane and record exec time after syncing operationKind := metrics.NoOp defer func() { + if err != nil { + klog.Infof("[syncPod] failed to sync pod, but will apply any changes to the dataplane. err: %s", err.Error()) + } + dperr := c.dp.ApplyDataPlane() + // can't record this in another deferred func since deferred funcs are processed in LIFO order metrics.RecordControllerPodExecTime(timer, operationKind, err != nil && dperr != nil) + if dperr != nil { - err = fmt.Errorf("failed with error %w, apply failed with %v", err, dperr) + if err == nil { + err = fmt.Errorf("failed to apply dataplane changes while syncing pod. err: %w", dperr) + } else { + err = fmt.Errorf("failed to sync pod and apply dataplane changes. sync err: [%w], apply err: [%v]", err, dperr) + } } }() @@ -285,7 +295,7 @@ func (c *PodController) syncPod(key string) error { err = c.cleanUpDeletedPod(key) if err != nil { // need to retry this cleaning-up process - return fmt.Errorf("Error: %w when pod is not found", err) + return fmt.Errorf("error: %w when pod is not found", err) } return err } @@ -302,7 +312,7 @@ func (c *PodController) syncPod(key string) error { operationKind = metrics.DeleteOp } if err = c.cleanUpDeletedPod(key); err != nil { - return fmt.Errorf("Error: %w when when pod is in completed state", err) + return fmt.Errorf("error: %w when when pod is in completed state", err) } return nil } @@ -319,7 +329,7 @@ func (c *PodController) syncPod(key string) error { operationKind, err = c.syncAddAndUpdatePod(pod) if err != nil { - return fmt.Errorf("Failed to sync pod due to %w", err) + return fmt.Errorf("failed to sync pod due to %w", err) } return nil diff --git a/npm/pkg/controlplane/translation/translatePolicy.go b/npm/pkg/controlplane/translation/translatePolicy.go index 7f36b4a4f1..bfc52fff38 100644 --- a/npm/pkg/controlplane/translation/translatePolicy.go +++ b/npm/pkg/controlplane/translation/translatePolicy.go @@ -9,7 +9,6 @@ import ( "github.com/Azure/azure-container-networking/npm/util" networkingv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/klog/v2" ) /* @@ -25,6 +24,8 @@ var ( ErrUnsupportedNamedPort = errors.New("unsupported namedport translation features used on windows") // ErrUnsupportedNegativeMatch is returned when negative match translation feature is used in windows. ErrUnsupportedNegativeMatch = errors.New("unsupported NotExist operator translation features used on windows") + // ErrUnsupportedSCTP is returned when SCTP protocol is used in windows. + ErrUnsupportedSCTP = errors.New("unsupported SCTP protocol used on windows") ) type podSelectorResult struct { @@ -48,7 +49,6 @@ func portType(portRule networkingv1.NetworkPolicyPort) (netpolPortType, error) { return numericPortType, nil } else if portRule.Port.IntValue() == 0 && portRule.Port.String() != "" { if util.IsWindowsDP() { - klog.Warningf("Windows does not support named port. Use numeric port instead.") return "", ErrUnsupportedNamedPort } return namedPortType, nil @@ -578,5 +578,14 @@ func TranslatePolicy(npObj *networkingv1.NetworkPolicy) (*policies.NPMNetworkPol } } } + + // ad-hoc validation to reduce code changes (modifying function signatures and returning errors in all the correct places) + if util.IsWindowsDP() { + for _, acl := range npmNetPol.ACLs { + if acl.Protocol == policies.SCTP { + return nil, ErrUnsupportedSCTP + } + } + } return npmNetPol, nil } diff --git a/npm/pkg/controlplane/translation/translatePolicy_test.go b/npm/pkg/controlplane/translation/translatePolicy_test.go index f7f5276526..ab5b578547 100644 --- a/npm/pkg/controlplane/translation/translatePolicy_test.go +++ b/npm/pkg/controlplane/translation/translatePolicy_test.go @@ -1629,7 +1629,7 @@ func TestIngressPolicy(t *testing.T) { }, }, { - name: "error", + name: "unknown port type error", isNewNwPolicyVer: true, targetSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ @@ -2284,7 +2284,7 @@ func TestEgressPolicy(t *testing.T) { }, }, { - name: "error", + name: "unknown port type error", isNewNwPolicyVer: true, targetSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 5aacc541ad..fec279cff5 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -273,7 +273,6 @@ func (dp *DataPlane) refreshAllPodEndpoints() error { currentTime := time.Now().Unix() existingIPs := make(map[string]struct{}) for _, endpoint := range endpoints { - klog.Infof("Endpoints info %+v", endpoint.Id) if len(endpoint.IpConfigurations) == 0 { klog.Infof("Endpoint ID %s has no IPAddreses", endpoint.Id) continue @@ -335,6 +334,8 @@ func (dp *DataPlane) refreshAllPodEndpoints() error { } } + klog.Infof("Endpoint cache after refresh: %+v", dp.endpointCache) // TODO: remove for public preview + return nil } diff --git a/npm/pkg/dataplane/policies/policy.go b/npm/pkg/dataplane/policies/policy.go index a5dad0c66c..b411a7407b 100644 --- a/npm/pkg/dataplane/policies/policy.go +++ b/npm/pkg/dataplane/policies/policy.go @@ -131,7 +131,6 @@ func NormalizePolicy(networkPolicy *NPMNetworkPolicy) { } } -// TODO do verification in controller? func ValidatePolicy(networkPolicy *NPMNetworkPolicy) error { for _, aclPolicy := range networkPolicy.ACLs { if !aclPolicy.hasKnownTarget() { @@ -143,6 +142,10 @@ func ValidatePolicy(networkPolicy *NPMNetworkPolicy) error { if !aclPolicy.hasKnownProtocol() { return npmerrors.SimpleError(fmt.Sprintf("ACL policy for NetPol %s has unknown protocol [%s]", networkPolicy.PolicyKey, aclPolicy.Protocol)) } + if util.IsWindowsDP() && aclPolicy.Protocol == SCTP { + return npmerrors.SimpleError(fmt.Sprintf("ACL policy for NetPol %s has unsupported SCTP protocol on Windows", networkPolicy.PolicyKey)) + } + if !aclPolicy.satisifiesPortAndProtocolConstraints() { return npmerrors.SimpleError(fmt.Sprintf( "ACL policy for NetPol %s has dst port(s) (Port or Port and EndPort), so must have protocol tcp, udp, udplite, sctp, or dccp but has protocol %s", diff --git a/npm/pkg/dataplane/policies/policymanager.go b/npm/pkg/dataplane/policies/policymanager.go index 92b7484d37..a1ae22bcf6 100644 --- a/npm/pkg/dataplane/policies/policymanager.go +++ b/npm/pkg/dataplane/policies/policymanager.go @@ -111,7 +111,6 @@ func (pMgr *PolicyManager) AddPolicy(policy *NPMNetworkPolicy, endpointList map[ return nil } - // TODO move this validation and normalization to controller NormalizePolicy(policy) if err := ValidatePolicy(policy); err != nil { msg := fmt.Sprintf("failed to validate policy: %s", err.Error()) From a4031da814f62dae3cdac8e258c0a06657c76be9 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 22 Jun 2022 15:50:53 -0700 Subject: [PATCH 2/2] fix lint --- .../controllers/v2/networkPolicyController.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/npm/pkg/controlplane/controllers/v2/networkPolicyController.go b/npm/pkg/controlplane/controllers/v2/networkPolicyController.go index 1c5fbfef09..a5a1dc7913 100644 --- a/npm/pkg/controlplane/controllers/v2/networkPolicyController.go +++ b/npm/pkg/controlplane/controllers/v2/networkPolicyController.go @@ -286,13 +286,16 @@ func (c *NetworkPolicyController) syncAddAndUpdateNetPol(netPolObj *networkingv1 npmNetPolObj, err := translation.TranslatePolicy(netPolObj) if err != nil { if isUnsupportedWindowsTranslationErr(err) { - // We can safely suppress unsupported network policy because re-Queuing will result in same error - klog.Warningf("NetworkPolicy %s in namespace %s is not translated because it has unsupported translated features of Windows: %s", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, err.Error()) - // consider a no-op since the policy is unsupported. The exec time here isn't important either. + klog.Warningf("NetworkPolicy %s in namespace %s is not translated because it has unsupported translated features of Windows: %s", + netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, err.Error()) + + // We can safely suppress unsupported network policy because re-Queuing will result in same error. + // The exec time isn't relevant here, so consider a no-op. return metrics.NoOp, nil } + klog.Errorf("Failed to translate podSelector in NetworkPolicy %s in namespace %s: %s", netPolObj.ObjectMeta.Name, netPolObj.ObjectMeta.Namespace, err.Error()) - // consider a no-op since we can't translate. The exec time here isn't important either. + // The exec time isn't relevant here, so consider a no-op. return metrics.NoOp, errNetPolTranslationFailure }