Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion npm/pkg/dataplane/dataplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ func (dp *DataPlane) RemovePolicy(policyKey string) error {
return nil
}
// Use the endpoint list saved in cache for this network policy to remove
err := dp.policyMgr.RemovePolicy(policy.PolicyKey, nil)
err := dp.policyMgr.RemovePolicy(policy.PolicyKey)
if err != nil {
return fmt.Errorf("[DataPlane] error while removing policy: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion npm/pkg/dataplane/dataplane_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error {
endpointList := map[string]string{
endpoint.ip: endpoint.id,
}
err := dp.policyMgr.RemovePolicy(policyKey, endpointList)
err := dp.policyMgr.RemovePolicyForEndpoints(policyKey, endpointList)
if err != nil {
return err
}
Expand Down
50 changes: 45 additions & 5 deletions npm/pkg/dataplane/policies/policymanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,18 @@ func (pMgr *PolicyManager) AddPolicy(policy *NPMNetworkPolicy, endpointList map[
metrics.RecordACLRuleExecTime(timer) // record execution time regardless of failure
if err != nil {
// NOTE: in Linux, Prometheus metrics may be off at this point since some ACL rules may have been applied successfully
// In Windows, Prometheus metrics may be off at this point since we don't know how many endpoints had rules applied successfully.
msg := fmt.Sprintf("failed to add policy: %s", err.Error())
metrics.SendErrorLogAndMetric(util.IptmID, "error: %s", msg)
return npmerrors.Errorf(npmerrors.AddPolicy, false, msg)
}

// update Prometheus metrics on success
metrics.IncNumACLRulesBy(policy.numACLRulesProducedInKernel())
numEndpoints := 1
if util.IsWindowsDP() {
numEndpoints = len(endpointList)
}
metrics.IncNumACLRulesBy(policy.numACLRulesProducedInKernel() * numEndpoints)

pMgr.policyMap.cache[policy.PolicyKey] = policy
return nil
Expand All @@ -146,7 +151,7 @@ func (pMgr *PolicyManager) isFirstPolicy() bool {
return len(pMgr.policyMap.cache) == 0
}

func (pMgr *PolicyManager) RemovePolicy(policyKey string, endpointList map[string]string) error {
func (pMgr *PolicyManager) RemovePolicy(policyKey string) error {
policy, ok := pMgr.GetPolicy(policyKey)

if !ok {
Expand All @@ -162,22 +167,57 @@ func (pMgr *PolicyManager) RemovePolicy(policyKey string, endpointList map[strin
defer pMgr.policyMap.Unlock()

// Call actual dataplane function to apply changes
err := pMgr.removePolicy(policy, endpointList)
err := pMgr.removePolicy(policy, nil)
// currently we only have acl rule exec time for "adding" rules, so we skip recording here
if err != nil {
// NOTE: in Linux, Prometheus metrics may be off at this point since some ACL rules may have been applied successfully
// NOTE: in Linux, Prometheus metrics may be off at this point since some ACL rules may have been applied successfully.
// In Windows, Prometheus metrics may be off at this point since we don't know how many endpoints had rules applied successfully.
msg := fmt.Sprintf("failed to remove policy: %s", err.Error())
metrics.SendErrorLogAndMetric(util.IptmID, "error: %s", msg)
return npmerrors.Errorf(npmerrors.RemovePolicy, false, msg)
}

// update Prometheus metrics on success
metrics.DecNumACLRulesBy(policy.numACLRulesProducedInKernel())
numEndpoints := 1
if util.IsWindowsDP() {
numEndpoints = len(policy.PodEndpoints)
}
metrics.DecNumACLRulesBy(policy.numACLRulesProducedInKernel() * numEndpoints)

// remove policy from cache
delete(pMgr.policyMap.cache, policyKey)
return nil
}

// RemovePolicyForEndpoints is identical to RemovePolicy except it will not remove the policy from the cache.
// This function is intended for Windows only.
func (pMgr *PolicyManager) RemovePolicyForEndpoints(policyKey string, endpointList map[string]string) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this function being used ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

forgot to commit. updated

policy, ok := pMgr.GetPolicy(policyKey)

if !ok {
return nil
}

if len(policy.ACLs) == 0 {
klog.Infof("[DataPlane] No ACLs in policy %s to remove for endpoints", policyKey)
return nil
}
// Call actual dataplane function to apply changes
err := pMgr.removePolicy(policy, endpointList)
// currently we only have acl rule exec time for "adding" rules, so we skip recording here
if err != nil {
// NOTE: Prometheus metrics may be off at this point since we don't know how many endpoints had rules applied successfully.
msg := fmt.Sprintf("failed to remove policy. endpoints: [%+v]. err: [%s]", endpointList, err.Error())
metrics.SendErrorLogAndMetric(util.IptmID, "error: %s", msg)
return npmerrors.Errorf(npmerrors.RemovePolicy, false, msg)
}

// update Prometheus metrics on success
metrics.DecNumACLRulesBy(policy.numACLRulesProducedInKernel() * len(endpointList))

return nil
}

func (pMgr *PolicyManager) isLastPolicy() bool {
// if we change our code to delete more than one policy at once, we can specify numPoliciesToDelete as an argument
numPoliciesToDelete := 1
Expand Down
10 changes: 5 additions & 5 deletions npm/pkg/dataplane/policies/policymanager_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ func TestRemovePoliciesAcceptableError(t *testing.T) {
defer ioshim.VerifyCalls(t, calls)
pMgr := NewPolicyManager(ioshim, ipsetConfig)
require.NoError(t, pMgr.AddPolicy(bothDirectionsNetPol, epList))
require.NoError(t, pMgr.RemovePolicy(bothDirectionsNetPol.PolicyKey, nil))
require.NoError(t, pMgr.RemovePolicy(bothDirectionsNetPol.PolicyKey))
_, ok := pMgr.GetPolicy(bothDirectionsNetPol.PolicyKey)
require.False(t, ok)
promVals{0, 1}.testPrometheusMetrics(t)
Expand Down Expand Up @@ -381,7 +381,7 @@ func TestRemovePoliciesError(t *testing.T) {
pMgr := NewPolicyManager(ioshim, ipsetConfig)
err := pMgr.AddPolicy(bothDirectionsNetPol, nil)
require.NoError(t, err)
err = pMgr.RemovePolicy(bothDirectionsNetPol.PolicyKey, nil)
err = pMgr.RemovePolicy(bothDirectionsNetPol.PolicyKey)
require.Error(t, err)

promVals{6, 1}.testPrometheusMetrics(t)
Expand All @@ -407,23 +407,23 @@ func TestUpdatingStaleChains(t *testing.T) {
assertStaleChainsContain(t, pMgr.staleChains)

// successful removal, so mark the policy's chains as stale
require.NoError(t, pMgr.RemovePolicy(bothDirectionsNetPol.PolicyKey, nil))
require.NoError(t, pMgr.RemovePolicy(bothDirectionsNetPol.PolicyKey))
assertStaleChainsContain(t, pMgr.staleChains, bothDirectionsNetPolIngressChain, bothDirectionsNetPolEgressChain)

// successful add, so keep the same stale chains
require.NoError(t, pMgr.AddPolicy(ingressNetPol, nil))
assertStaleChainsContain(t, pMgr.staleChains, bothDirectionsNetPolIngressChain, bothDirectionsNetPolEgressChain)

// failure to remove, so keep the same stale chains
require.Error(t, pMgr.RemovePolicy(ingressNetPol.PolicyKey, nil))
require.Error(t, pMgr.RemovePolicy(ingressNetPol.PolicyKey))
assertStaleChainsContain(t, pMgr.staleChains, bothDirectionsNetPolIngressChain, bothDirectionsNetPolEgressChain)

// successfully add a new policy. keep the same stale chains
require.NoError(t, pMgr.AddPolicy(egressNetPol, nil))
assertStaleChainsContain(t, pMgr.staleChains, bothDirectionsNetPolIngressChain, bothDirectionsNetPolEgressChain)

// successful removal, so mark the policy's chains as stale
require.NoError(t, pMgr.RemovePolicy(egressNetPol.PolicyKey, nil))
require.NoError(t, pMgr.RemovePolicy(egressNetPol.PolicyKey))
assertStaleChainsContain(t, pMgr.staleChains, bothDirectionsNetPolIngressChain, bothDirectionsNetPolEgressChain, egressNetPolChain)

// failure to add, so keep the same stale chains the same
Expand Down
4 changes: 2 additions & 2 deletions npm/pkg/dataplane/policies/policymanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func TestRemovePolicy(t *testing.T) {
defer ioshim.VerifyCalls(t, calls)
pMgr := NewPolicyManager(ioshim, ipsetConfig)
require.NoError(t, pMgr.AddPolicy(testNetPol, epList))
require.NoError(t, pMgr.RemovePolicy(testNetPol.PolicyKey, nil))
require.NoError(t, pMgr.RemovePolicy(testNetPol.PolicyKey))
_, ok := pMgr.GetPolicy(testNetPol.PolicyKey)
require.False(t, ok)
promVals{0, 1}.testPrometheusMetrics(t)
Expand All @@ -183,7 +183,7 @@ func TestRemoveNonexistentPolicy(t *testing.T) {
metrics.ReinitializeAll()
ioshim := common.NewMockIOShim(nil)
pMgr := NewPolicyManager(ioshim, ipsetConfig)
require.NoError(t, pMgr.RemovePolicy("wrong-policy-key", epList))
require.NoError(t, pMgr.RemovePolicy("wrong-policy-key"))
promVals{0, 0}.testPrometheusMetrics(t)
}

Expand Down
3 changes: 3 additions & 0 deletions npm/pkg/dataplane/policies/policymanager_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m
if err != nil {
return err
}
// FIXME rulesToRemove is a list of pointers
klog.Infof("[PolicyManagerWindows] To Remove Policy: %s \n To Delete ACLs: %+v \n To Remove From %+v endpoints", policy.PolicyKey, rulesToRemove, endpointList)
// If remove bug is solved we can directly remove the exact policy from the endpoint
// but if the bug is not solved then get all existing policies and remove relevant policies from list
Expand Down Expand Up @@ -204,6 +205,7 @@ func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRul
return nil
}
}
// FIXME epBuilder.aclPolicies is a list of pointers
klog.Infof("[DataPlanewindows] Epbuilder ACL policies before removing %+v", epBuilder.aclPolicies)
klog.Infof("[DataPlanewindows] Epbuilder Other policies before removing %+v", epBuilder.otherPolicies)
epPolicies, err := epBuilder.getHCNPolicyRequest()
Expand Down Expand Up @@ -246,6 +248,7 @@ func getEPPolicyReqFromACLSettings(settings []*NPMACLPolSettings) (hcn.PolicyEnd
}

for i, acl := range settings {
// FIXME a lot of prints
klog.Infof("Acl settings: %+v", acl)
byteACL, err := json.Marshal(acl)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions npm/pkg/dataplane/policies/policymanager_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func TestRemovePolicies(t *testing.T) {
verifyFakeHNSCacheACLs(t, expectedACLs, acls)
}

err = pMgr.RemovePolicy(TestNetworkPolicies[0].PolicyKey, nil)
err = pMgr.RemovePolicy(TestNetworkPolicies[0].PolicyKey)
require.NoError(t, err)
verifyACLCacheIsCleaned(t, hns, len(endPointIDList))
}
Expand All @@ -150,7 +150,7 @@ func TestRemovePoliciesEndpointNotFound(t *testing.T) {
testendPointIDList := map[string]string{
"10.0.0.5": "test10",
}
err = pMgr.RemovePolicy(TestNetworkPolicies[0].PolicyKey, testendPointIDList)
err = pMgr.RemovePolicyForEndpoints(TestNetworkPolicies[0].PolicyKey, testendPointIDList)
require.NoError(t, err, err)
verifyACLCacheIsCleaned(t, hns, len(endPointIDList))
}
Expand Down