diff --git a/cns/ipampoolmonitor/ipampoolmonitor.go b/cns/ipampoolmonitor/ipampoolmonitor.go index 3e26a69245..0e2441acae 100644 --- a/cns/ipampoolmonitor/ipampoolmonitor.go +++ b/cns/ipampoolmonitor/ipampoolmonitor.go @@ -12,6 +12,10 @@ import ( nnc "github.com/Azure/azure-container-networking/nodenetworkconfig/api/v1alpha" ) +const ( + defaultMaxIPCount = int64(250) +) + type CNSIPAMPoolMonitor struct { pendingRelease bool @@ -71,12 +75,21 @@ func (pm *CNSIPAMPoolMonitor) Reconcile() error { pendingReleaseIPCount := len(pm.httpService.GetPendingReleaseIPConfigs()) availableIPConfigCount := len(pm.httpService.GetAvailableIPConfigs()) // TODO: add pending allocation count to real cns freeIPConfigCount := pm.cachedNNC.Spec.RequestedIPCount - int64(allocatedPodIPCount) - msg := fmt.Sprintf("[ipam-pool-monitor] Pool Size: %v, Goal Size: %v, BatchSize: %v, MinFree: %v, MaxFree:%v, Allocated: %v, Available: %v, Pending Release: %v, Free: %v, Pending Program: %v", - cnsPodIPConfigCount, pm.cachedNNC.Spec.RequestedIPCount, pm.scalarUnits.BatchSize, pm.MinimumFreeIps, pm.MaximumFreeIps, allocatedPodIPCount, availableIPConfigCount, pendingReleaseIPCount, freeIPConfigCount, pendingProgramCount) + batchSize := pm.getBatchSize() //Use getters in case customer changes batchsize manually + maxIPCount := pm.getMaxIPCount() + + + msg := fmt.Sprintf("[ipam-pool-monitor] Pool Size: %v, Goal Size: %v, BatchSize: %v, MaxIPCount: %v, MinFree: %v, MaxFree:%v, Allocated: %v, Available: %v, Pending Release: %v, Free: %v, Pending Program: %v", + cnsPodIPConfigCount, pm.cachedNNC.Spec.RequestedIPCount, batchSize, maxIPCount, pm.MinimumFreeIps, pm.MaximumFreeIps, allocatedPodIPCount, availableIPConfigCount, pendingReleaseIPCount, freeIPConfigCount, pendingProgramCount) switch { // pod count is increasing case freeIPConfigCount < pm.MinimumFreeIps: + if pm.cachedNNC.Spec.RequestedIPCount == maxIPCount { + // If we're already at the maxIPCount, don't try to increase + return nil + } + logger.Printf("[ipam-pool-monitor] Increasing pool size...%s ", msg) return pm.increasePoolSize() @@ -111,7 +124,24 @@ func (pm *CNSIPAMPoolMonitor) increasePoolSize() error { return err } - tempNNCSpec.RequestedIPCount += pm.scalarUnits.BatchSize + // Query the max IP count + maxIPCount := pm.getMaxIPCount() + previouslyRequestedIPCount := tempNNCSpec.RequestedIPCount + batchSize := pm.getBatchSize() + + tempNNCSpec.RequestedIPCount += batchSize + if tempNNCSpec.RequestedIPCount > maxIPCount { + // We don't want to ask for more ips than the max + logger.Printf("[ipam-pool-monitor] Requested IP count (%v) is over max limit (%v), requesting max limit instead.", tempNNCSpec.RequestedIPCount, maxIPCount) + tempNNCSpec.RequestedIPCount = maxIPCount + } + + // If the requested IP count is same as before, then don't do anything + if tempNNCSpec.RequestedIPCount == previouslyRequestedIPCount { + logger.Printf("[ipam-pool-monitor] Previously requested IP count %v is same as updated IP count %v, doing nothing", previouslyRequestedIPCount, tempNNCSpec.RequestedIPCount) + return nil + } + logger.Printf("[ipam-pool-monitor] Increasing pool size, Current Pool Size: %v, Updated Requested IP Count: %v, Pods with IP's:%v, ToBeDeleted Count: %v", len(pm.httpService.GetPodIPConfigState()), tempNNCSpec.RequestedIPCount, len(pm.httpService.GetAllocatedIPConfigs()), len(tempNNCSpec.IPsNotInUse)) err = pm.rc.UpdateCRDSpec(context.Background(), tempNNCSpec) @@ -134,10 +164,35 @@ func (pm *CNSIPAMPoolMonitor) decreasePoolSize(existingPendingReleaseIPCount int var err error var newIpsMarkedAsPending bool var pendingIpAddresses map[string]cns.IPConfigurationStatus + var updatedRequestedIPCount int64 + var decreaseIPCountBy int64 + + // Ensure the updated requested IP count is a multiple of the batch size + previouslyRequestedIPCount := pm.cachedNNC.Spec.RequestedIPCount + batchSize := pm.getBatchSize() + modResult := previouslyRequestedIPCount % batchSize + + logger.Printf("[ipam-pool-monitor] Previously RequestedIP Count %v", previouslyRequestedIPCount) + logger.Printf("[ipam-pool-monitor] Batch size : %v", batchSize) + logger.Printf("[ipam-pool-monitor] modResult of (previously requested IP count mod batch size) = %v", modResult) + + if modResult != 0 { + // Example: previouscount = 25, batchsize = 10, 25 - 10 = 15, NOT a multiple of batchsize (10) + // Don't want that, so make requestedIPCount 20 (25 - (25 % 10)) so that it is a multiple of the batchsize (10) + updatedRequestedIPCount = previouslyRequestedIPCount - modResult + } else { + // Example: previouscount = 30, batchsize = 10, 30 - 10 = 20 which is multiple of batchsize (10) so all good + updatedRequestedIPCount = previouslyRequestedIPCount - batchSize + } + + decreaseIPCountBy = previouslyRequestedIPCount - updatedRequestedIPCount + + logger.Printf("[ipam-pool-monitor] updatedRequestedIPCount %v", updatedRequestedIPCount) + if pm.updatingIpsNotInUseCount == 0 || pm.updatingIpsNotInUseCount < existingPendingReleaseIPCount { - logger.Printf("[ipam-pool-monitor] Marking IPs as PendingRelease, ipsToBeReleasedCount %d", int(pm.scalarUnits.BatchSize)) - pendingIpAddresses, err = pm.httpService.MarkIPAsPendingRelease(int(pm.scalarUnits.BatchSize)) + logger.Printf("[ipam-pool-monitor] Marking IPs as PendingRelease, ipsToBeReleasedCount %d", int(decreaseIPCountBy)) + pendingIpAddresses, err = pm.httpService.MarkIPAsPendingRelease(int(decreaseIPCountBy)) if err != nil { return err } @@ -237,8 +292,8 @@ func (pm *CNSIPAMPoolMonitor) Update(scalar nnc.Scaler, spec nnc.NodeNetworkConf pm.scalarUnits = scalar - pm.MinimumFreeIps = int64(float64(pm.scalarUnits.BatchSize) * (float64(pm.scalarUnits.RequestThresholdPercent) / 100)) - pm.MaximumFreeIps = int64(float64(pm.scalarUnits.BatchSize) * (float64(pm.scalarUnits.ReleaseThresholdPercent) / 100)) + pm.MinimumFreeIps = int64(float64(pm.getBatchSize()) * (float64(pm.scalarUnits.RequestThresholdPercent) / 100)) + pm.MaximumFreeIps = int64(float64(pm.getBatchSize()) * (float64(pm.scalarUnits.ReleaseThresholdPercent) / 100)) pm.cachedNNC.Spec = spec @@ -248,6 +303,21 @@ func (pm *CNSIPAMPoolMonitor) Update(scalar nnc.Scaler, spec nnc.NodeNetworkConf return nil } +func (pm *CNSIPAMPoolMonitor) getMaxIPCount() int64 { + if pm.scalarUnits.MaxIPCount == 0 { + pm.scalarUnits.MaxIPCount = defaultMaxIPCount + } + return pm.scalarUnits.MaxIPCount +} + +func (pm *CNSIPAMPoolMonitor) getBatchSize() int64 { + maxIPCount := pm.getMaxIPCount() + if pm.scalarUnits.BatchSize > maxIPCount { + pm.scalarUnits.BatchSize = maxIPCount + } + return pm.scalarUnits.BatchSize +} + //this function sets the values for state in IPAMPoolMonitor Struct func (pm *CNSIPAMPoolMonitor) GetStateSnapshot() cns.IpamPoolMonitorStateSnapshot { pm.mu.Lock() diff --git a/cns/ipampoolmonitor/ipampoolmonitor_test.go b/cns/ipampoolmonitor/ipampoolmonitor_test.go index b536221568..285bd3766e 100644 --- a/cns/ipampoolmonitor/ipampoolmonitor_test.go +++ b/cns/ipampoolmonitor/ipampoolmonitor_test.go @@ -9,13 +9,14 @@ import ( nnc "github.com/Azure/azure-container-networking/nodenetworkconfig/api/v1alpha" ) -func initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent int) (*fakes.HTTPServiceFake, *fakes.RequestControllerFake, *CNSIPAMPoolMonitor) { +func initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent int, maxPodIPCount int64) (*fakes.HTTPServiceFake, *fakes.RequestControllerFake, *CNSIPAMPoolMonitor) { logger.InitLogger("testlogs", 0, 0, "./") scalarUnits := nnc.Scaler{ BatchSize: int64(batchSize), RequestThresholdPercent: int64(requestThresholdPercent), ReleaseThresholdPercent: int64(releaseThresholdPercent), + MaxIPCount: int64(maxPodIPCount), } subnetaddresspace := "10.0.0.0/8" @@ -37,9 +38,10 @@ func TestPoolSizeIncrease(t *testing.T) { initialIPConfigCount = 10 requestThresholdPercent = 30 releaseThresholdPercent = 150 + maxPodIPCount = int64(30) ) - fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent) + fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) // increase number of allocated IP's in CNS err := fakecns.SetNumberOfAllocatedIPs(8) @@ -90,9 +92,10 @@ func TestPoolIncreaseDoesntChangeWhenIncreaseIsAlreadyInProgress(t *testing.T) { initialIPConfigCount = 10 requestThresholdPercent = 30 releaseThresholdPercent = 150 + maxPodIPCount = int64(30) ) - fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent) + fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) // increase number of allocated IP's in CNS err := fakecns.SetNumberOfAllocatedIPs(8) @@ -155,9 +158,10 @@ func TestPoolSizeIncreaseIdempotency(t *testing.T) { initialIPConfigCount = 10 requestThresholdPercent = 30 releaseThresholdPercent = 150 + maxPodIPCount = int64(30) ) - fakecns, _, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent) + fakecns, _, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) @@ -191,15 +195,97 @@ func TestPoolSizeIncreaseIdempotency(t *testing.T) { } } +func TestPoolIncreasePastNodeLimit(t *testing.T) { + var ( + batchSize = 16 + initialIPConfigCount = 16 + requestThresholdPercent = 50 + releaseThresholdPercent = 150 + maxPodIPCount = int64(30) + ) + + fakecns, _, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) + + t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) + t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) + + // increase number of allocated IP's in CNS + err := fakecns.SetNumberOfAllocatedIPs(9) + if err != nil { + t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) + } + + // When poolmonitor reconcile is called, trigger increase and cache goal state + err = poolmonitor.Reconcile() + if err != nil { + t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) + } + + // ensure pool monitor has only requested the max pod ip count + if poolmonitor.cachedNNC.Spec.RequestedIPCount != maxPodIPCount { + t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) when the max has been reached", poolmonitor.cachedNNC.Spec.RequestedIPCount, maxPodIPCount) + } +} + +func TestPoolIncreaseBatchSizeGreaterThanMaxPodIPCount(t *testing.T) { + var ( + batchSize = 50 + initialIPConfigCount = 16 + requestThresholdPercent = 50 + releaseThresholdPercent = 150 + maxPodIPCount = int64(30) + ) + + fakecns, _, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) + + t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) + t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) + + // increase number of allocated IP's in CNS + err := fakecns.SetNumberOfAllocatedIPs(16) + if err != nil { + t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) + } + + // When poolmonitor reconcile is called, trigger increase and cache goal state + err = poolmonitor.Reconcile() + if err != nil { + t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) + } + + // ensure pool monitor has only requested the max pod ip count + if poolmonitor.cachedNNC.Spec.RequestedIPCount != maxPodIPCount { + t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) when the max has been reached", poolmonitor.cachedNNC.Spec.RequestedIPCount, maxPodIPCount) + } +} + +func TestPoolIncreaseMaxIPCountSetToZero(t *testing.T) { + var ( + batchSize = 16 + initialIPConfigCount = 16 + requestThresholdPercent = 50 + releaseThresholdPercent = 150 + initialMaxPodIPCount = int64(0) + expectedMaxPodIPCount = defaultMaxIPCount + ) + + _, _, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, initialMaxPodIPCount) + + if poolmonitor.getMaxIPCount() != expectedMaxPodIPCount { + t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) when the MaxIPCount field in the CRD is zero", poolmonitor.getMaxIPCount(), expectedMaxPodIPCount) + } +} + func TestPoolDecrease(t *testing.T) { var ( batchSize = 10 initialIPConfigCount = 20 requestThresholdPercent = 30 releaseThresholdPercent = 150 + maxPodIPCount = int64(30) ) - fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent) + fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) @@ -253,9 +339,10 @@ func TestPoolSizeDecreaseWhenDecreaseHasAlreadyBeenRequested(t *testing.T) { initialIPConfigCount = 20 requestThresholdPercent = 30 releaseThresholdPercent = 100 + maxPodIPCount = int64(30) ) - fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent) + fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) @@ -320,9 +407,10 @@ func TestPoolSizeDecreaseToReallyLow(t *testing.T) { initialIPConfigCount = 30 requestThresholdPercent = 30 releaseThresholdPercent = 100 + maxPodIPCount = int64(30) ) - fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent) + fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) @@ -395,3 +483,94 @@ func TestPoolSizeDecreaseToReallyLow(t *testing.T) { t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) } } + +func TestDecreaseAfterNodeLimitReached(t *testing.T) { + var ( + batchSize = 16 + initialIPConfigCount = 30 + requestThresholdPercent = 50 + releaseThresholdPercent = 150 + maxPodIPCount = int64(30) + expectedRequestedIP = 16 + expectedDecreaseIP = int(maxPodIPCount) % batchSize + ) + + fakecns, _, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) + + t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) + t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) + + err := fakecns.SetNumberOfAllocatedIPs(20) + if err != nil { + t.Error(err) + } + + err = poolmonitor.Reconcile() + if err != nil { + t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) + } + + // Trigger a batch release + err = fakecns.SetNumberOfAllocatedIPs(5) + if err != nil { + t.Error(err) + } + + err = poolmonitor.Reconcile() + if err != nil { + t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) + } + + // Ensure poolmonitor asked for a multiple of batch size + if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(expectedRequestedIP) { + t.Fatalf("Expected requested ips to be %v when scaling by 1 batch size down from %v (max pod limit) but got %v", expectedRequestedIP, maxPodIPCount, poolmonitor.cachedNNC.Spec.RequestedIPCount) + } + + // Ensure we minused by the mod result + if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != expectedDecreaseIP { + t.Fatalf("Expected to decrease requested IPs by %v (max pod count mod batchsize) to make the requested ip count a multiple of the batch size in the case of hitting the max before scale down, but got %v", expectedDecreaseIP, len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) + } +} + +func TestPoolDecreaseBatchSizeGreaterThanMaxPodIPCount(t *testing.T) { + var ( + batchSize = 31 + initialIPConfigCount = 30 + requestThresholdPercent = 50 + releaseThresholdPercent = 150 + maxPodIPCount = int64(30) + ) + + fakecns, _, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) + + t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) + t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) + + // increase number of allocated IP's in CNS + err := fakecns.SetNumberOfAllocatedIPs(30) + if err != nil { + t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) + } + + // When poolmonitor reconcile is called, trigger increase and cache goal state + err = poolmonitor.Reconcile() + if err != nil { + t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) + } + + // Trigger a batch release + err = fakecns.SetNumberOfAllocatedIPs(1) + if err != nil { + t.Error(err) + } + + err = poolmonitor.Reconcile() + if err != nil { + t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) + } + + // ensure pool monitor has only requested the max pod ip count + if poolmonitor.cachedNNC.Spec.RequestedIPCount != maxPodIPCount { + t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) when the max has been reached", poolmonitor.cachedNNC.Spec.RequestedIPCount, maxPodIPCount) + } +} \ No newline at end of file