From 30517697312e2542f6ce272876f7bfbeffe8bc12 Mon Sep 17 00:00:00 2001 From: neaggarw Date: Thu, 12 Nov 2020 00:37:44 -0800 Subject: [PATCH] CNS: Fix IPAM scaler down to handle incremental decrease --- cns/fakes/cnsfake.go | 21 ++++-- cns/ipampoolmonitor/ipampoolmonitor.go | 29 +++++--- cns/ipampoolmonitor/ipampoolmonitor_test.go | 82 +++++++++++++++++++++ 3 files changed, 112 insertions(+), 20 deletions(-) diff --git a/cns/fakes/cnsfake.go b/cns/fakes/cnsfake.go index d87a47748f..6cb5e8644f 100644 --- a/cns/fakes/cnsfake.go +++ b/cns/fakes/cnsfake.go @@ -135,17 +135,19 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c defer ipm.Unlock() var ( - err error - pendingRelease []cns.IPConfigurationStatus + err error ) + pendingRelease := make(map[string]cns.IPConfigurationStatus) + defer func() { // if there was an error, and not all ip's have been freed, restore state if err != nil && len(pendingRelease) != numberOfIPsToMark { - for i := range pendingRelease { - ipm.AvailableIPIDStack.Push(pendingRelease[i].ID) - ipm.AvailableIPConfigState[pendingRelease[i].ID] = pendingRelease[i] - delete(ipm.PendingReleaseIPConfigState, pendingRelease[i].ID) + for uuid, ipState := range pendingRelease { + ipState.State = cns.Available + ipm.AvailableIPIDStack.Push(pendingRelease[uuid].ID) + ipm.AvailableIPConfigState[pendingRelease[uuid].ID] = ipState + delete(ipm.PendingReleaseIPConfigState, pendingRelease[uuid].ID) } } }() @@ -157,7 +159,10 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c } // add all pending release to a slice - pendingRelease = append(pendingRelease, ipm.AvailableIPConfigState[id]) + ipConfig := ipm.AvailableIPConfigState[id] + ipConfig.State = cns.PendingRelease + pendingRelease[id] = ipConfig + delete(ipm.AvailableIPConfigState, id) } @@ -166,7 +171,7 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c ipm.PendingReleaseIPConfigState[pendingRelease[i].ID] = pendingRelease[i] } - return ipm.PendingReleaseIPConfigState, nil + return pendingRelease, nil } type HTTPServiceFake struct { diff --git a/cns/ipampoolmonitor/ipampoolmonitor.go b/cns/ipampoolmonitor/ipampoolmonitor.go index e8e03b558f..b2dcadf251 100644 --- a/cns/ipampoolmonitor/ipampoolmonitor.go +++ b/cns/ipampoolmonitor/ipampoolmonitor.go @@ -107,12 +107,12 @@ func (pm *CNSIPAMPoolMonitor) increasePoolSize() error { pm.cachedNNC.Spec.RequestedIPCount += pm.scalarUnits.BatchSize // pass nil map to CNStoCRDSpec because we don't want to modify the to be deleted ipconfigs - pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(nil, pm.cachedNNC.Spec.RequestedIPCount) + pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(false) if err != nil { return err } - logger.Printf("[ipam-pool-monitor] Increasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's:%v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs())) + logger.Printf("[ipam-pool-monitor] Increasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's:%v, ToBeDeleted Count: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()), len(pm.cachedNNC.Spec.IPsNotInUse)) return pm.rc.UpdateCRDSpec(context.Background(), pm.cachedNNC.Spec) } @@ -124,18 +124,20 @@ func (pm *CNSIPAMPoolMonitor) decreasePoolSize() error { pm.cachedNNC.Spec.RequestedIPCount -= pm.scalarUnits.BatchSize // mark n number of IP's as pending - pendingIPAddresses, err := pm.cns.MarkIPsAsPending(int(pm.scalarUnits.BatchSize)) + pendingIpAddresses, err := pm.cns.MarkIPsAsPending(int(pm.scalarUnits.BatchSize)) if err != nil { return err } + logger.Printf("[ipam-pool-monitor] Updated Requested count %v, Releasing ips: %+v", pm.cachedNNC.Spec.RequestedIPCount, pendingIpAddresses) + // convert the pending IP addresses to a spec - pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(pendingIPAddresses, pm.cachedNNC.Spec.RequestedIPCount) + pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(false) if err != nil { return err } pm.pendingRelease = true - logger.Printf("[ipam-pool-monitor] Decreasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs())) + logger.Printf("[ipam-pool-monitor] Decreasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's: %v, ToBeDeleted Count: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()), len(pm.cachedNNC.Spec.IPsNotInUse)) return pm.rc.UpdateCRDSpec(context.Background(), pm.cachedNNC.Spec) } @@ -146,7 +148,7 @@ func (pm *CNSIPAMPoolMonitor) cleanPendingRelease() error { defer pm.mu.Unlock() var err error - pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(nil, pm.cachedNNC.Spec.RequestedIPCount) + pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(true) if err != nil { logger.Printf("[ipam-pool-monitor] Failed to translate ") } @@ -156,19 +158,22 @@ func (pm *CNSIPAMPoolMonitor) cleanPendingRelease() error { } // CNSToCRDSpec translates CNS's map of Ips to be released and requested ip count into a CRD Spec -func MarkIPsAsPendingInCRD(toBeDeletedSecondaryIPConfigs map[string]cns.IPConfigurationStatus, ipCount int64) (nnc.NodeNetworkConfigSpec, error) { +func (pm *CNSIPAMPoolMonitor) createNNCSpecForCRD(resetNotInUseList bool) (nnc.NodeNetworkConfigSpec, error) { var ( spec nnc.NodeNetworkConfigSpec - uuid string ) - spec.RequestedIPCount = ipCount + // DUpdate the count from cached spec + spec.RequestedIPCount = pm.cachedNNC.Spec.RequestedIPCount - if toBeDeletedSecondaryIPConfigs == nil { + // Discard the ToBeDeleted list if requested. This happens if DNC has cleaned up the pending ips and CNS has also updated its state. + if resetNotInUseList == true { spec.IPsNotInUse = make([]string, 0) } else { - for uuid = range toBeDeletedSecondaryIPConfigs { - spec.IPsNotInUse = append(spec.IPsNotInUse, uuid) + // Get All Pending IPs from CNS and populate it again. + pendingIps := pm.cns.GetPendingReleaseIPConfigs() + for _, pendingIp := range pendingIps { + spec.IPsNotInUse = append(spec.IPsNotInUse, pendingIp.ID) } } diff --git a/cns/ipampoolmonitor/ipampoolmonitor_test.go b/cns/ipampoolmonitor/ipampoolmonitor_test.go index 934019bb34..b536221568 100644 --- a/cns/ipampoolmonitor/ipampoolmonitor_test.go +++ b/cns/ipampoolmonitor/ipampoolmonitor_test.go @@ -313,3 +313,85 @@ func TestPoolSizeDecreaseWhenDecreaseHasAlreadyBeenRequested(t *testing.T) { t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) } } + +func TestPoolSizeDecreaseToReallyLow(t *testing.T) { + var ( + batchSize = 10 + initialIPConfigCount = 30 + requestThresholdPercent = 30 + releaseThresholdPercent = 100 + ) + + fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent) + + log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) + log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) + + // initial pool count is 30, set 23 of them to be allocated + err := fakecns.SetNumberOfAllocatedIPs(23) + if err != nil { + t.Error(err) + } + + // Pool monitor does nothing, as the current number of IP's falls in the threshold + err = poolmonitor.Reconcile() + if err != nil { + t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) + } + + // Now Drop the Allocated count to really low, say 3. This should trigger release in 2 batches + err = fakecns.SetNumberOfAllocatedIPs(3) + if err != nil { + t.Error(err) + } + + // Pool monitor does nothing, as the current number of IP's falls in the threshold + t.Logf("Reconcile after Allocated count from 33 -> 3, Exepected free count = 10") + err = poolmonitor.Reconcile() + if err != nil { + t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) + } + + // Ensure the size of the requested spec is still the same + if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize { + t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", batchSize, len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) + } + + // Ensure the request ipcount is now one batch size smaller than the inital IP count + if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-batchSize) { + t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) + } + + // Reconcile again, it should release the second batch + t.Logf("Reconcile again - 2, Exepected free count = 20") + err = poolmonitor.Reconcile() + if err != nil { + t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) + } + + // Ensure the size of the requested spec is still the same + if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize*2 { + t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", batchSize*2, len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) + } + + // Ensure the request ipcount is now one batch size smaller than the inital IP count + if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-(batchSize*2)) { + t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) + } + + t.Logf("Update Request Controller") + err = fakerc.Reconcile() + if err != nil { + t.Error(err) + } + + err = poolmonitor.Reconcile() + if err != nil { + t.Errorf("Expected no pool monitor failure after request controller reconcile: %v", err) + } + + // Ensure the spec doesn't have any IPsNotInUse after request controller has reconciled + if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != 0 { + t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) + } +}