Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions npm/pkg/dataplane/dataplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,6 @@ func (dp *DataPlane) InitializeDataPlane() error {
// ResetDataPlane helps in cleaning up dataplane sets and policies programmed
// by NPM, returning a clean slate
func (dp *DataPlane) ResetDataPlane() error {
// It is important to keep order to clean-up ACLs before ipsets. Otherwise we won't be able to delete ipsets referenced by ACLs
if err := dp.policyMgr.Reset(); err != nil {
return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset policy dataplane", err)
}
if err := dp.ipsetMgr.ResetIPSets(); err != nil {
return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset ipsets dataplane", err)
}
return dp.resetDataPlane()
}

Expand Down
8 changes: 8 additions & 0 deletions npm/pkg/dataplane/dataplane_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package dataplane

import (
"github.com/Azure/azure-container-networking/npm/pkg/dataplane/policies"
npmerrors "github.com/Azure/azure-container-networking/npm/util/errors"
"k8s.io/klog"
)

Expand All @@ -26,5 +27,12 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error {
}

func (dp *DataPlane) resetDataPlane() error {
// It is important to keep order to clean-up ACLs before ipsets. Otherwise we won't be able to delete ipsets referenced by ACLs
if err := dp.policyMgr.Reset(nil); err != nil {
return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset policy dataplane", err)
}
if err := dp.ipsetMgr.ResetIPSets(); err != nil {
return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset ipsets dataplane", err)
}
return nil
}
31 changes: 27 additions & 4 deletions npm/pkg/dataplane/dataplane_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"

"github.com/Azure/azure-container-networking/npm/pkg/dataplane/policies"
npmerrors "github.com/Azure/azure-container-networking/npm/util/errors"
"github.com/Microsoft/hcsshim/hcn"
"k8s.io/klog"
)
Expand Down Expand Up @@ -42,6 +43,24 @@ func (dp *DataPlane) initializeDataPlane() error {
return nil
}

func (dp *DataPlane) resetDataPlane() error {
// initialize the DP so the podendpoints will get updated.
if err := dp.initializeDataPlane(); err != nil {
return err
}

epIDs := dp.getAllEndpointIDs()

// It is important to keep order to clean-up ACLs before ipsets. Otherwise we won't be able to delete ipsets referenced by ACLs
if err := dp.policyMgr.Reset(epIDs); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in resetDataPlane(), only difference between linux and windows is getting the endpoint IDs. Perhaps make getAllEndpointIDs() OS specific (call dp.initializeDP() within it) and keep pMgr and iMgr resets in generic dp.Reset()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will also need to initializeDP first in windows to be able to get a view of the endpoints in HNS, so it would be better to keep it as is.

return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset policy dataplane", err)
}
if err := dp.ipsetMgr.ResetIPSets(); err != nil {
return npmerrors.ErrorWrapper(npmerrors.ResetDataPlane, false, "failed to reset ipsets dataplane", err)
}
return nil
}

func (dp *DataPlane) shouldUpdatePod() bool {
return true
}
Expand Down Expand Up @@ -195,10 +214,6 @@ func (dp *DataPlane) getEndpointsToApplyPolicy(policy *policies.NPMNetworkPolicy
return endpointList, nil
}

func (dp *DataPlane) resetDataPlane() error {
return nil
}

func (dp *DataPlane) getAllPodEndpoints() ([]hcn.HostComputeEndpoint, error) {
klog.Infof("Getting all endpoints for Network ID %s", dp.networkID)
endpoints, err := dp.ioShim.Hns.ListEndpointsOfNetwork(dp.networkID)
Expand Down Expand Up @@ -269,3 +284,11 @@ func (dp *DataPlane) getEndpointByIP(podIP string) (*NPMEndpoint, error) {

return nil, nil
}

func (dp *DataPlane) getAllEndpointIDs() []string {
endpointIDs := make([]string, 0, len(dp.endpointCache))
for _, endpoint := range dp.endpointCache {
endpointIDs = append(endpointIDs, endpoint.ID)
}
return endpointIDs
}
2 changes: 1 addition & 1 deletion npm/pkg/dataplane/policies/chain-management_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func (pMgr *PolicyManager) initialize() error {
return nil
}

func (pMgr *PolicyManager) reset() error {
func (pMgr *PolicyManager) reset(_ []string) error {
if err := pMgr.removeNPMChains(); err != nil {
return npmerrors.SimpleErrorWrapper("failed to remove NPM chains", err)
}
Expand Down
4 changes: 2 additions & 2 deletions npm/pkg/dataplane/policies/policymanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ type PolicyManagerCfg struct {
Mode PolicyManagerMode
}

func (pMgr *PolicyManager) Reset() error {
if err := pMgr.reset(); err != nil {
func (pMgr *PolicyManager) Reset(epIDs []string) error {
if err := pMgr.reset(epIDs); err != nil {
return npmerrors.ErrorWrapper(npmerrors.ResetPolicyMgr, false, "failed to reset policy manager", err)
}
return nil
Expand Down
122 changes: 86 additions & 36 deletions npm/pkg/dataplane/policies/policymanager_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@ import (
"encoding/json"
"errors"
"fmt"
"strings"

"github.com/Microsoft/hcsshim/hcn"
"k8s.io/klog"
)

var (
ErrFailedMarshalACLSettings = errors.New("Failed to marshal ACL settings")
ErrFailedUnMarshalACLSettings = errors.New("Failed to unmarshal ACL settings")
ErrFailedMarshalACLSettings = errors.New("Failed to marshal ACL settings")
ErrFailedUnMarshalACLSettings = errors.New("Failed to unmarshal ACL settings")
resetAllACLs shouldResetAllACLs = true
removeOnlyGivenPolicy shouldResetAllACLs = false
)

type staleChains struct{} // unused in Windows

type shouldResetAllACLs bool

type endpointPolicyBuilder struct {
aclPolicies []*NPMACLPolSettings
otherPolicies []hcn.EndpointPolicy
Expand All @@ -35,9 +40,16 @@ func (pMgr *PolicyManager) initialize() error {
return nil
}

func (pMgr *PolicyManager) reset() error {
// TODO
return nil
func (pMgr *PolicyManager) reset(epIDs []string) error {
var aggregateErr error
for _, epID := range epIDs {
err := pMgr.removePolicyByEndpointID("", epID, 0, resetAllACLs)
if err != nil {
aggregateErr = fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr)
continue
}
}
return aggregateErr
}

func (pMgr *PolicyManager) reconcile() {
Expand Down Expand Up @@ -120,39 +132,13 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m
// but if the bug is not solved then get all existing policies and remove relevant policies from list
// then apply remaining policies onto the endpoint
var aggregateErr error
numOfRulesToRemove := len(rulesToRemove)
for epIPAddr, epID := range endpointList {
epObj, err := pMgr.getEndpointByID(epID)
err := pMgr.removePolicyByEndpointID(rulesToRemove[0].Id, epID, numOfRulesToRemove, removeOnlyGivenPolicy)
if err != nil {
// Do not return if one endpoint fails, try all endpoints.
// aggregate the error message and return it at the end
aggregateErr = fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr)
continue
}
if len(epObj.Policies) == 0 {
klog.Infof("[DataPlanewindows] No Policies to remove on %s ID Endpoint", epID)
continue
}

epBuilder, err := splitEndpointPolicies(epObj.Policies)
if err != nil {
aggregateErr = fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr)
continue
}

epBuilder.compareAndRemovePolicies(rulesToRemove[0].Id, len(rulesToRemove))
klog.Infof("[DataPlanewindows] Epbuilder ACL policies before removing %+v", epBuilder.aclPolicies)
klog.Infof("[DataPlanewindows] Epbuilder Other policies before removing %+v", epBuilder.otherPolicies)
epPolicies, err := epBuilder.getHCNPolicyRequest()
if err != nil {
aggregateErr = fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr)
continue
}

err = pMgr.updatePoliciesOnEndpoint(epObj, epPolicies)
if err != nil {
aggregateErr = fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err\n Previous %w", epID, err.Error(), aggregateErr)
continue
}

// Delete podendpoint from policy cache
delete(policy.PodEndpoints, epIPAddr)
Expand All @@ -161,6 +147,47 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m
return aggregateErr
}

func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRulesToRemove int, resetAllACL shouldResetAllACLs) error {
epObj, err := pMgr.getEndpointByID(epID)
if err != nil {
return fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error())
}
if len(epObj.Policies) == 0 {
klog.Infof("[DataPlanewindows] No Policies to remove on %s ID Endpoint", epID)
}

epBuilder, err := splitEndpointPolicies(epObj.Policies)
if err != nil {
return fmt.Errorf("[DataPlane Windows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error())
}

if resetAllACL {
klog.Infof("[DataPlane Windows] Resetting all ACL Policies on %s ID Endpoint", epID)
if !epBuilder.resetAllNPMAclPolicies() {
klog.Infof("[DataPlane Windows] No Azure-NPM ACL Policies on %s ID Endpoint to reset", epID)
return nil
}
} else {
klog.Infof("[DataPlane Windows] Resetting only ACL Policies with %s ID on %s ID Endpoint", ruleID, epID)
if !epBuilder.compareAndRemovePolicies(ruleID, noOfRulesToRemove) {
klog.Infof("[DataPlane Windows] No Policies with ID %s on %s ID Endpoint", ruleID, epID)
return nil
}
}
klog.Infof("[DataPlanewindows] Epbuilder ACL policies before removing %+v", epBuilder.aclPolicies)
klog.Infof("[DataPlanewindows] Epbuilder Other policies before removing %+v", epBuilder.otherPolicies)
epPolicies, err := epBuilder.getHCNPolicyRequest()
if err != nil {
return fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error())
}

err = pMgr.updatePoliciesOnEndpoint(epObj, epPolicies)
if err != nil {
return fmt.Errorf("[DataPlanewindows] Skipping removing policies on %s ID Endpoint with %s err", epID, err.Error())
}
return nil
}

// addEPPolicyWithEpID given an EP ID and a list of policies, add the policies to the endpoint
func (pMgr *PolicyManager) applyPoliciesToEndpointID(epID string, policies hcn.PolicyEndpointRequest) error {
epObj, err := pMgr.getEndpointByID(epID)
Expand Down Expand Up @@ -283,12 +310,13 @@ func (epBuilder *endpointPolicyBuilder) compareAndRemovePolicies(ruleIDToRemove
aclFound = true
}
}
epBuilder.removeACLPolicyAtIndex(toDeleteIndexes)
// If ACl Policies are not found, it means that we might have removed them earlier
// or never applied them
if !aclFound {
klog.Infof("[DataPlane Windows] ACL with ID %s is not Found in Dataplane", ruleIDToRemove)
return aclFound
}
epBuilder.removeACLPolicyAtIndex(toDeleteIndexes)
// if there are still rules to remove, it means that we might have not added all the policies in the add
// case and were only able to find a portion of the rules to remove
if lenOfRulesToRemove > 0 {
Expand All @@ -297,11 +325,33 @@ func (epBuilder *endpointPolicyBuilder) compareAndRemovePolicies(ruleIDToRemove
return aclFound
}

func (epBuilder *endpointPolicyBuilder) resetAllNPMAclPolicies() {
epBuilder.aclPolicies = []*NPMACLPolSettings{}
func (epBuilder *endpointPolicyBuilder) resetAllNPMAclPolicies() bool {
if len(epBuilder.aclPolicies) == 0 {
return false
}
aclFound := false
toDeleteIndexes := map[int]struct{}{}
for i, acl := range epBuilder.aclPolicies {
// First check if ID is present and equal, this saves compute cycles to compare both objects
if strings.HasPrefix(acl.Id, policyIDPrefix) {
// Remove the ACL policy from the list
klog.Infof("[DataPlane Windows] Found ACL with ID %s and removing it", acl.Id)
toDeleteIndexes[i] = struct{}{}
aclFound = true
}
}
if len(toDeleteIndexes) == len(epBuilder.aclPolicies) {
epBuilder.aclPolicies = []*NPMACLPolSettings{}
return aclFound
}
epBuilder.removeACLPolicyAtIndex(toDeleteIndexes)
return aclFound
}

func (epBuilder *endpointPolicyBuilder) removeACLPolicyAtIndex(indexes map[int]struct{}) {
if len(indexes) == 0 {
return
}
tempAclPolicies := []*NPMACLPolSettings{}
for i, acl := range epBuilder.aclPolicies {
if _, ok := indexes[i]; !ok {
Expand Down
2 changes: 1 addition & 1 deletion test/integration/npm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func main() {
func testPolicyManager() {
pMgr := policies.NewPolicyManager(common.NewIOShim())

panicOnError(pMgr.Reset())
panicOnError(pMgr.Reset(nil))
printAndWait(false)

panicOnError(pMgr.Initialize())
Expand Down