diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index 5baf87eab7..d3e690aaf5 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -101,13 +101,6 @@ func (dp *DataPlane) InitializeDataPlane() error { // ResetDataPlane helps in cleaning up dataplane sets and policies programmed // by NPM, returning a clean slate func (dp *DataPlane) ResetDataPlane() error { - // It is important to keep order to clean-up ACLs before ipsets. Otherwise we won't be able to delete ipsets referenced by ACLs - if err := dp.policyMgr.Reset(); err != nil { - return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset policy dataplane", err) - } - if err := dp.ipsetMgr.ResetIPSets(); err != nil { - return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset ipsets dataplane", err) - } return dp.resetDataPlane() } diff --git a/npm/pkg/dataplane/dataplane_linux.go b/npm/pkg/dataplane/dataplane_linux.go index bd4ebad9cf..319b68f56f 100644 --- a/npm/pkg/dataplane/dataplane_linux.go +++ b/npm/pkg/dataplane/dataplane_linux.go @@ -2,6 +2,7 @@ package dataplane import ( "github.com/Azure/azure-container-networking/npm/pkg/dataplane/policies" + npmerrors "github.com/Azure/azure-container-networking/npm/util/errors" "k8s.io/klog" ) @@ -26,5 +27,12 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error { } func (dp *DataPlane) resetDataPlane() error { + // It is important to keep order to clean-up ACLs before ipsets. Otherwise we won't be able to delete ipsets referenced by ACLs + if err := dp.policyMgr.Reset(nil); err != nil { + return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset policy dataplane", err) + } + if err := dp.ipsetMgr.ResetIPSets(); err != nil { + return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset ipsets dataplane", err) + } return nil } diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 91c5274e05..491558876d 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/Azure/azure-container-networking/npm/pkg/dataplane/policies" + npmerrors "github.com/Azure/azure-container-networking/npm/util/errors" "github.com/Microsoft/hcsshim/hcn" "k8s.io/klog" ) @@ -42,6 +43,24 @@ func (dp *DataPlane) initializeDataPlane() error { return nil } +func (dp *DataPlane) resetDataPlane() error { + // initialize the DP so the podendpoints will get updated. + if err := dp.initializeDataPlane(); err != nil { + return err + } + + epIDs := dp.getAllEndpointIDs() + + // It is important to keep order to clean-up ACLs before ipsets. Otherwise we won't be able to delete ipsets referenced by ACLs + if err := dp.policyMgr.Reset(epIDs); err != nil { + return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset policy dataplane", err) + } + if err := dp.ipsetMgr.ResetIPSets(); err != nil { + return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset ipsets dataplane", err) + } + return nil +} + func (dp *DataPlane) shouldUpdatePod() bool { return true } @@ -195,10 +214,6 @@ func (dp *DataPlane) getEndpointsToApplyPolicy(policy *policies.NPMNetworkPolicy return endpointList, nil } -func (dp *DataPlane) resetDataPlane() error { - return nil -} - func (dp *DataPlane) getAllPodEndpoints() ([]hcn.HostComputeEndpoint, error) { klog.Infof("Getting all endpoints for Network ID %s", dp.networkID) endpoints, err := dp.ioShim.Hns.ListEndpointsOfNetwork(dp.networkID) @@ -269,3 +284,11 @@ func (dp *DataPlane) getEndpointByIP(podIP string) (*NPMEndpoint, error) { return nil, nil } + +func (dp *DataPlane) getAllEndpointIDs() []string { + endpointIDs := make([]string, 0, len(dp.endpointCache)) + for _, endpoint := range dp.endpointCache { + endpointIDs = append(endpointIDs, endpoint.ID) + } + return endpointIDs +} diff --git a/npm/pkg/dataplane/policies/chain-management_linux.go b/npm/pkg/dataplane/policies/chain-management_linux.go index d25d8407d7..ece94acfa8 100644 --- a/npm/pkg/dataplane/policies/chain-management_linux.go +++ b/npm/pkg/dataplane/policies/chain-management_linux.go @@ -100,7 +100,7 @@ func (pMgr *PolicyManager) initialize() error { return nil } -func (pMgr *PolicyManager) reset() error { +func (pMgr *PolicyManager) reset(_ []string) error { if err := pMgr.removeNPMChains(); err != nil { return npmerrors.SimpleErrorWrapper("failed to remove NPM chains", err) } diff --git a/npm/pkg/dataplane/policies/policymanager.go b/npm/pkg/dataplane/policies/policymanager.go index 86c5d0eb4b..9ebc4d118d 100644 --- a/npm/pkg/dataplane/policies/policymanager.go +++ b/npm/pkg/dataplane/policies/policymanager.go @@ -56,8 +56,8 @@ type PolicyManagerCfg struct { Mode PolicyManagerMode } -func (pMgr *PolicyManager) Reset() error { - if err := pMgr.reset(); err != nil { +func (pMgr *PolicyManager) Reset(epIDs []string) error { + if err := pMgr.reset(epIDs); err != nil { return npmerrors.ErrorWrapper(npmerrors.ResetPolicyMgr, false, "failed to reset policy manager", err) } return nil diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index fb206a438d..c7c492bda1 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -4,18 +4,23 @@ import ( "encoding/json" "errors" "fmt" + "strings" "github.com/Microsoft/hcsshim/hcn" "k8s.io/klog" ) var ( - ErrFailedMarshalACLSettings = errors.New("Failed to marshal ACL settings") - ErrFailedUnMarshalACLSettings = errors.New("Failed to unmarshal ACL settings") + ErrFailedMarshalACLSettings = errors.New("Failed to marshal ACL settings") + ErrFailedUnMarshalACLSettings = errors.New("Failed to unmarshal ACL settings") + resetAllACLs shouldResetAllACLs = true + removeOnlyGivenPolicy shouldResetAllACLs = false ) type staleChains struct{} // unused in Windows +type shouldResetAllACLs bool + type endpointPolicyBuilder struct { aclPolicies []*NPMACLPolSettings otherPolicies []hcn.EndpointPolicy @@ -35,9 +40,16 @@ func (pMgr *PolicyManager) initialize() error { return nil } -func (pMgr *PolicyManager) reset() error { - // TODO - return nil +func (pMgr *PolicyManager) reset(epIDs []string) error { + var aggregateErr error + for _, epID := range epIDs { + err := pMgr.removePolicyByEndpointID("", epID, 0, resetAllACLs) + if err != nil { + aggregateErr = fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr) + continue + } + } + return aggregateErr } func (pMgr *PolicyManager) reconcile() { @@ -120,39 +132,13 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m // but if the bug is not solved then get all existing policies and remove relevant policies from list // then apply remaining policies onto the endpoint var aggregateErr error + numOfRulesToRemove := len(rulesToRemove) for epIPAddr, epID := range endpointList { - epObj, err := pMgr.getEndpointByID(epID) + err := pMgr.removePolicyByEndpointID(rulesToRemove[0].Id, epID, numOfRulesToRemove, removeOnlyGivenPolicy) if err != nil { - // Do not return if one endpoint fails, try all endpoints. - // aggregate the error message and return it at the end aggregateErr = fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr) continue } - if len(epObj.Policies) == 0 { - klog.Infof("[DataPlanewindows] No Policies to remove on %s ID Endpoint", epID) - continue - } - - epBuilder, err := splitEndpointPolicies(epObj.Policies) - if err != nil { - aggregateErr = fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr) - continue - } - - epBuilder.compareAndRemovePolicies(rulesToRemove[0].Id, len(rulesToRemove)) - klog.Infof("[DataPlanewindows] Epbuilder ACL policies before removing %+v", epBuilder.aclPolicies) - klog.Infof("[DataPlanewindows] Epbuilder Other policies before removing %+v", epBuilder.otherPolicies) - epPolicies, err := epBuilder.getHCNPolicyRequest() - if err != nil { - aggregateErr = fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr) - continue - } - - err = pMgr.updatePoliciesOnEndpoint(epObj, epPolicies) - if err != nil { - aggregateErr = fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr) - continue - } // Delete podendpoint from policy cache delete(policy.PodEndpoints, epIPAddr) @@ -161,6 +147,47 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m return aggregateErr } +func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRulesToRemove int, resetAllACL shouldResetAllACLs) error { + epObj, err := pMgr.getEndpointByID(epID) + if err != nil { + return fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error()) + } + if len(epObj.Policies) == 0 { + klog.Infof("[DataPlanewindows] No Policies to remove on %s ID Endpoint", epID) + } + + epBuilder, err := splitEndpointPolicies(epObj.Policies) + if err != nil { + return fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error()) + } + + if resetAllACL { + klog.Infof("[DataPlane Windows] Resetting all ACL Policies on %s ID Endpoint", epID) + if !epBuilder.resetAllNPMAclPolicies() { + klog.Infof("[DataPlane Windows] No Azure-NPM ACL Policies on %s ID Endpoint to reset", epID) + return nil + } + } else { + klog.Infof("[DataPlane Windows] Resetting only ACL Policies with %s ID on %s ID Endpoint", ruleID, epID) + if !epBuilder.compareAndRemovePolicies(ruleID, noOfRulesToRemove) { + klog.Infof("[DataPlane Windows] No Policies with ID %s on %s ID Endpoint", ruleID, epID) + return nil + } + } + klog.Infof("[DataPlanewindows] Epbuilder ACL policies before removing %+v", epBuilder.aclPolicies) + klog.Infof("[DataPlanewindows] Epbuilder Other policies before removing %+v", epBuilder.otherPolicies) + epPolicies, err := epBuilder.getHCNPolicyRequest() + if err != nil { + return fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error()) + } + + err = pMgr.updatePoliciesOnEndpoint(epObj, epPolicies) + if err != nil { + return fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error()) + } + return nil +} + // addEPPolicyWithEpID given an EP ID and a list of policies, add the policies to the endpoint func (pMgr *PolicyManager) applyPoliciesToEndpointID(epID string, policies hcn.PolicyEndpointRequest) error { epObj, err := pMgr.getEndpointByID(epID) @@ -283,12 +310,13 @@ func (epBuilder *endpointPolicyBuilder) compareAndRemovePolicies(ruleIDToRemove aclFound = true } } - epBuilder.removeACLPolicyAtIndex(toDeleteIndexes) // If ACl Policies are not found, it means that we might have removed them earlier // or never applied them if !aclFound { klog.Infof("[DataPlane Windows] ACL with ID %s is not Found in Dataplane", ruleIDToRemove) + return aclFound } + epBuilder.removeACLPolicyAtIndex(toDeleteIndexes) // if there are still rules to remove, it means that we might have not added all the policies in the add // case and were only able to find a portion of the rules to remove if lenOfRulesToRemove > 0 { @@ -297,11 +325,33 @@ func (epBuilder *endpointPolicyBuilder) compareAndRemovePolicies(ruleIDToRemove return aclFound } -func (epBuilder *endpointPolicyBuilder) resetAllNPMAclPolicies() { - epBuilder.aclPolicies = []*NPMACLPolSettings{} +func (epBuilder *endpointPolicyBuilder) resetAllNPMAclPolicies() bool { + if len(epBuilder.aclPolicies) == 0 { + return false + } + aclFound := false + toDeleteIndexes := map[int]struct{}{} + for i, acl := range epBuilder.aclPolicies { + // First check if ID is present and equal, this saves compute cycles to compare both objects + if strings.HasPrefix(acl.Id, policyIDPrefix) { + // Remove the ACL policy from the list + klog.Infof("[DataPlane Windows] Found ACL with ID %s and removing it", acl.Id) + toDeleteIndexes[i] = struct{}{} + aclFound = true + } + } + if len(toDeleteIndexes) == len(epBuilder.aclPolicies) { + epBuilder.aclPolicies = []*NPMACLPolSettings{} + return aclFound + } + epBuilder.removeACLPolicyAtIndex(toDeleteIndexes) + return aclFound } func (epBuilder *endpointPolicyBuilder) removeACLPolicyAtIndex(indexes map[int]struct{}) { + if len(indexes) == 0 { + return + } tempAclPolicies := []*NPMACLPolSettings{} for i, acl := range epBuilder.aclPolicies { if _, ok := indexes[i]; !ok { diff --git a/test/integration/npm/main.go b/test/integration/npm/main.go index 98021f2996..c0243bf3bb 100644 --- a/test/integration/npm/main.go +++ b/test/integration/npm/main.go @@ -149,7 +149,7 @@ func main() { func testPolicyManager() { pMgr := policies.NewPolicyManager(common.NewIOShim()) - panicOnError(pMgr.Reset()) + panicOnError(pMgr.Reset(nil)) printAndWait(false) panicOnError(pMgr.Initialize())