diff --git a/cns/api.go b/cns/api.go index fdaa6d6509..816849fa12 100644 --- a/cns/api.go +++ b/cns/api.go @@ -214,15 +214,15 @@ type NodeConfiguration struct { } type IPAMPoolMonitor interface { - Start(ctx context.Context, poolMonitorRefreshMilliseconds int) error - Update(scalar v1alpha.Scaler, spec v1alpha.NodeNetworkConfigSpec) + Start(ctx context.Context) error + Update(nnc *v1alpha.NodeNetworkConfig) GetStateSnapshot() IpamPoolMonitorStateSnapshot } // IpamPoolMonitorStateSnapshot struct to expose state values for IPAMPoolMonitor struct type IpamPoolMonitorStateSnapshot struct { - MinimumFreeIps int64 - MaximumFreeIps int64 + MinimumFreeIps int + MaximumFreeIps int UpdatingIpsNotInUseCount int CachedNNC v1alpha.NodeNetworkConfig } diff --git a/cns/client/client_test.go b/cns/client/client_test.go index d55b820f1b..a6ea4793ea 100644 --- a/cns/client/client_test.go +++ b/cns/client/client_test.go @@ -39,8 +39,8 @@ const ( gatewayIp = "10.0.0.1" subnetPrfixLength = 24 dockerContainerType = cns.Docker - releasePercent = 50 - requestPercent = 100 + releasePercent = 150 + requestPercent = 50 batchSize = 10 initPoolSize = 10 ) @@ -97,9 +97,23 @@ func addTestStateToRestServer(t *testing.T, secondaryIps []string) { t.Fatalf("Failed to createNetworkContainerRequest, req: %+v, err: %d", req, returnCode) } - svc.IPAMPoolMonitor.Update( - fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), - fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) + svc.IPAMPoolMonitor.Update(&v1alpha.NodeNetworkConfig{ + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: 16, + IPsNotInUse: []string{"abc"}, + }, + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + MaxIPCount: 250, + }, + NetworkContainers: []v1alpha.NetworkContainer{ + {}, + }, + }, + }) } func getIPNetFromResponse(resp *cns.IPConfigResponse) (net.IPNet, error) { @@ -167,8 +181,9 @@ func TestMain(m *testing.M) { Status: v1alpha.NodeNetworkConfigStatus{ Scaler: v1alpha.Scaler{ BatchSize: 10, - ReleaseThresholdPercent: 50, - RequestThresholdPercent: 40, + ReleaseThresholdPercent: 150, + RequestThresholdPercent: 50, + MaxIPCount: 250, }, NetworkContainers: []v1alpha.NetworkContainer{ { @@ -188,7 +203,7 @@ func TestMain(m *testing.M) { }, }, } - svc.IPAMPoolMonitor = &fakes.IPAMPoolMonitorFake{FakeMinimumIps: 10, FakeMaximumIps: 20, FakeIpsNotInUseCount: 13, FakecachedNNC: fakeNNC} + svc.IPAMPoolMonitor = &fakes.MonitorFake{IPsNotInUseCount: 13, NodeNetworkConfig: &fakeNNC} if err != nil { logger.Errorf("Failed to create CNS object, err:%v.\n", err) @@ -347,8 +362,8 @@ func TestCNSClientDebugAPI(t *testing.T) { assert.GreaterOrEqual(t, len(inmemory.HTTPRestServiceData.PodIPConfigState), 1, "PodIpConfigState with at least 1 entry expected") testIpamPoolMonitor := inmemory.HTTPRestServiceData.IPAMPoolMonitor - assert.EqualValues(t, 10, testIpamPoolMonitor.MinimumFreeIps, "IPAMPoolMonitor state is not reflecting the initial set values") - assert.EqualValues(t, 20, testIpamPoolMonitor.MaximumFreeIps, "IPAMPoolMonitor state is not reflecting the initial set values") + assert.EqualValues(t, 5, testIpamPoolMonitor.MinimumFreeIps, "IPAMPoolMonitor state is not reflecting the initial set values") + assert.EqualValues(t, 15, testIpamPoolMonitor.MaximumFreeIps, "IPAMPoolMonitor state is not reflecting the initial set values") assert.Equal(t, 13, testIpamPoolMonitor.UpdatingIpsNotInUseCount, "IPAMPoolMonitor state is not reflecting the initial set values") // check for cached NNC Spec struct values @@ -357,8 +372,8 @@ func TestCNSClientDebugAPI(t *testing.T) { // check for cached NNC Status struct values assert.EqualValues(t, 10, testIpamPoolMonitor.CachedNNC.Status.Scaler.BatchSize, "IPAMPoolMonitor cached NNC Status is not reflecting the initial set values") - assert.EqualValues(t, 50, testIpamPoolMonitor.CachedNNC.Status.Scaler.ReleaseThresholdPercent, "IPAMPoolMonitor cached NNC Status is not reflecting the initial set values") - assert.EqualValues(t, 40, testIpamPoolMonitor.CachedNNC.Status.Scaler.RequestThresholdPercent, "IPAMPoolMonitor cached NNC Status is not reflecting the initial set values") + assert.EqualValues(t, 150, testIpamPoolMonitor.CachedNNC.Status.Scaler.ReleaseThresholdPercent, "IPAMPoolMonitor cached NNC Status is not reflecting the initial set values") + assert.EqualValues(t, 50, testIpamPoolMonitor.CachedNNC.Status.Scaler.RequestThresholdPercent, "IPAMPoolMonitor cached NNC Status is not reflecting the initial set values") assert.Len(t, testIpamPoolMonitor.CachedNNC.Status.NetworkContainers, 1, "Expected only one Network Container in the list") t.Logf("In-memory Data: ") @@ -1406,4 +1421,4 @@ func TestGetHTTPServiceData(t *testing.T) { assert.Equal(t, tt.want, got) }) } -} \ No newline at end of file +} diff --git a/cns/fakes/cnsfake.go b/cns/fakes/cnsfake.go index 1c86d1cd2b..7fb9f9dfc9 100644 --- a/cns/fakes/cnsfake.go +++ b/cns/fakes/cnsfake.go @@ -13,42 +13,27 @@ import ( "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/cns/common" "github.com/Azure/azure-container-networking/cns/types" - "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" ) type StringStack struct { - lock sync.Mutex // you don't have to do this if you don't want thread safety + sync.Mutex items []string } -func NewFakeScalar(releaseThreshold, requestThreshold, batchSize int) v1alpha.Scaler { - return v1alpha.Scaler{ - BatchSize: int64(batchSize), - ReleaseThresholdPercent: int64(releaseThreshold), - RequestThresholdPercent: int64(requestThreshold), - } -} - -func NewFakeNodeNetworkConfigSpec(requestedIPCount int) v1alpha.NodeNetworkConfigSpec { - return v1alpha.NodeNetworkConfigSpec{ - RequestedIPCount: int64(requestedIPCount), - } -} - func NewStack() *StringStack { - return &StringStack{sync.Mutex{}, make([]string, 0)} + return &StringStack{items: make([]string, 0)} } func (stack *StringStack) Push(v string) { - stack.lock.Lock() - defer stack.lock.Unlock() + stack.Lock() + defer stack.Unlock() stack.items = append(stack.items, v) } func (stack *StringStack) Pop() (string, error) { - stack.lock.Lock() - defer stack.lock.Unlock() + stack.Lock() + defer stack.Unlock() length := len(stack.items) if length == 0 { diff --git a/cns/fakes/ipampoolmonitorfake.go b/cns/fakes/ipampoolmonitorfake.go deleted file mode 100644 index adde22342a..0000000000 --- a/cns/fakes/ipampoolmonitorfake.go +++ /dev/null @@ -1,37 +0,0 @@ -//go:build !ignore_uncovered -// +build !ignore_uncovered - -package fakes - -import ( - "context" - - "github.com/Azure/azure-container-networking/cns" - "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" -) - -type IPAMPoolMonitorFake struct { - FakeMinimumIps int - FakeMaximumIps int - FakeIpsNotInUseCount int - FakecachedNNC v1alpha.NodeNetworkConfig -} - -func (ipm *IPAMPoolMonitorFake) Start(ctx context.Context, poolMonitorRefreshMilliseconds int) error { - return nil -} - -func (ipm *IPAMPoolMonitorFake) Update(scalar v1alpha.Scaler, spec v1alpha.NodeNetworkConfigSpec) {} - -func (ipm *IPAMPoolMonitorFake) Reconcile() error { - return nil -} - -func (ipm *IPAMPoolMonitorFake) GetStateSnapshot() cns.IpamPoolMonitorStateSnapshot { - return cns.IpamPoolMonitorStateSnapshot{ - MinimumFreeIps: int64(ipm.FakeMinimumIps), - MaximumFreeIps: int64(ipm.FakeMaximumIps), - UpdatingIpsNotInUseCount: ipm.FakeIpsNotInUseCount, - CachedNNC: ipm.FakecachedNNC, - } -} diff --git a/cns/fakes/monitor.go b/cns/fakes/monitor.go new file mode 100644 index 0000000000..d9087f20b5 --- /dev/null +++ b/cns/fakes/monitor.go @@ -0,0 +1,37 @@ +//go:build !ignore_uncovered +// +build !ignore_uncovered + +package fakes + +import ( + "context" + + "github.com/Azure/azure-container-networking/cns" + "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" +) + +type MonitorFake struct { + IPsNotInUseCount int + NodeNetworkConfig *v1alpha.NodeNetworkConfig +} + +func (*MonitorFake) Start(ctx context.Context) error { + return nil +} + +func (f *MonitorFake) Update(nnc *v1alpha.NodeNetworkConfig) { + f.NodeNetworkConfig = nnc +} + +func (*MonitorFake) Reconcile() error { + return nil +} + +func (f *MonitorFake) GetStateSnapshot() cns.IpamPoolMonitorStateSnapshot { + return cns.IpamPoolMonitorStateSnapshot{ + MaximumFreeIps: int(float64(f.NodeNetworkConfig.Status.Scaler.BatchSize) * (float64(f.NodeNetworkConfig.Status.Scaler.ReleaseThresholdPercent) / 100)), //nolint:gomnd // it's a percent + MinimumFreeIps: int(float64(f.NodeNetworkConfig.Status.Scaler.BatchSize) * (float64(f.NodeNetworkConfig.Status.Scaler.RequestThresholdPercent) / 100)), //nolint:gomnd // it's a percent + UpdatingIpsNotInUseCount: f.IPsNotInUseCount, + CachedNNC: *f.NodeNetworkConfig, + } +} diff --git a/cns/fakes/requestcontrollerfake.go b/cns/fakes/requestcontrollerfake.go index ab7e3870d1..2447d4548c 100644 --- a/cns/fakes/requestcontrollerfake.go +++ b/cns/fakes/requestcontrollerfake.go @@ -114,8 +114,7 @@ func (rc *RequestControllerFake) Reconcile(removePendingReleaseIPs bool) error { } // update - rc.cnscli.PoolMonitor.Update(rc.NNC.Status.Scaler, rc.NNC.Spec) - + rc.cnscli.PoolMonitor.Update(rc.NNC) return nil } diff --git a/cns/ipampoolmonitor/metrics.go b/cns/ipampool/metrics.go similarity index 98% rename from cns/ipampoolmonitor/metrics.go rename to cns/ipampool/metrics.go index 7c9b8ce901..a553cdbaf6 100644 --- a/cns/ipampoolmonitor/metrics.go +++ b/cns/ipampool/metrics.go @@ -1,4 +1,4 @@ -package ipampoolmonitor +package ipampool import ( "github.com/prometheus/client_golang/prometheus" diff --git a/cns/ipampoolmonitor/ipampoolmonitor.go b/cns/ipampool/monitor.go similarity index 55% rename from cns/ipampoolmonitor/ipampoolmonitor.go rename to cns/ipampool/monitor.go index 54ad7a998f..245b7331bc 100644 --- a/cns/ipampoolmonitor/ipampoolmonitor.go +++ b/cns/ipampool/monitor.go @@ -1,4 +1,4 @@ -package ipampoolmonitor +package ipampool import ( "context" @@ -10,65 +10,107 @@ import ( "github.com/Azure/azure-container-networking/cns/logger" "github.com/Azure/azure-container-networking/cns/metric" "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" + "github.com/pkg/errors" ) -const defaultMaxIPCount = int64(250) +const ( + // DefaultRefreshDelay pool monitor poll delay default in seconds. + DefaultRefreshDelay = 1 * time.Second + // DefaultMaxIPs default maximum allocatable IPs + DefaultMaxIPs = 250 +) type nodeNetworkConfigSpecUpdater interface { UpdateSpec(context.Context, *v1alpha.NodeNetworkConfigSpec) (*v1alpha.NodeNetworkConfig, error) } -type CNSIPAMPoolMonitor struct { - MaximumFreeIps int64 - MinimumFreeIps int64 - cachedNNC v1alpha.NodeNetworkConfig - httpService cns.HTTPService - mu sync.RWMutex - scalarUnits v1alpha.Scaler - updatingIpsNotInUseCount int - nnccli nodeNetworkConfigSpecUpdater +// poolState is the Monitor's view of the IP pool. +type poolState struct { + minFreeCount int + maxFreeCount int + notInUseCount int +} + +type Options struct { + RefreshDelay time.Duration + MaxIPs int +} + +type Monitor struct { + opts *Options + spec v1alpha.NodeNetworkConfigSpec + scaler v1alpha.Scaler + state poolState + nnccli nodeNetworkConfigSpecUpdater + httpService cns.HTTPService + initialized chan interface{} + nncSource chan v1alpha.NodeNetworkConfig + once sync.Once } -func NewCNSIPAMPoolMonitor(httpService cns.HTTPService, nnccli nodeNetworkConfigSpecUpdater) *CNSIPAMPoolMonitor { - logger.Printf("NewCNSIPAMPoolMonitor: Create IPAM Pool Monitor") - return &CNSIPAMPoolMonitor{ +func NewMonitor(httpService cns.HTTPService, nnccli nodeNetworkConfigSpecUpdater, opts *Options) *Monitor { + if opts.RefreshDelay < 1 { + opts.RefreshDelay = DefaultRefreshDelay + } + if opts.MaxIPs < 1 { + opts.MaxIPs = DefaultMaxIPs + } + return &Monitor{ + opts: opts, httpService: httpService, nnccli: nnccli, + initialized: make(chan interface{}), + nncSource: make(chan v1alpha.NodeNetworkConfig), } } -func (pm *CNSIPAMPoolMonitor) Start(ctx context.Context, poolMonitorRefreshMilliseconds int) error { +func (pm *Monitor) Start(ctx context.Context) error { logger.Printf("[ipam-pool-monitor] Starting CNS IPAM Pool Monitor") - ticker := time.NewTicker(time.Duration(poolMonitorRefreshMilliseconds) * time.Millisecond) + ticker := time.NewTicker(pm.opts.RefreshDelay) + defer ticker.Stop() for { + // proceed when things happen: select { - case <-ctx.Done(): - return fmt.Errorf("[ipam-pool-monitor] CNS IPAM Pool Monitor received cancellation signal") - case <-ticker.C: - err := pm.Reconcile(ctx) - if err != nil { - logger.Printf("[ipam-pool-monitor] Reconcile failed with err %v", err) + case <-ctx.Done(): // calling context has closed, we'll exit. + return errors.Wrap(ctx.Err(), "pool monitor context closed") + case <-ticker.C: // attempt to reconcile every tick. + select { + case <-pm.initialized: // this blocks until we have initialized + // if we have initialized and enter this case, we proceed out of the select and continue to reconcile. + default: + // if we have NOT initialized and enter this case, we continue out of this iteration and let the for loop begin again. + continue } + case nnc := <-pm.nncSource: // received a new NodeNetworkConfig, extract the data from it and re-reconcile. + pm.spec = nnc.Spec + pm.scaler = nnc.Status.Scaler + pm.state.minFreeCount, pm.state.maxFreeCount = CalculateMinFreeIPs(pm.scaler), CalculateMaxFreeIPs(pm.scaler) + pm.once.Do(func() { close(pm.initialized) }) // close the init channel the first time we receive a NodeNetworkConfig. + } + // if control has flowed through the select(s) to this point, we can now reconcile. + err := pm.reconcile(ctx) + if err != nil { + logger.Printf("[ipam-pool-monitor] Reconcile failed with err %v", err) } } } -func (pm *CNSIPAMPoolMonitor) Reconcile(ctx context.Context) error { +func (pm *Monitor) reconcile(ctx context.Context) error { cnsPodIPConfigCount := len(pm.httpService.GetPodIPConfigState()) pendingProgramCount := len(pm.httpService.GetPendingProgramIPConfigs()) // TODO: add pending program count to real cns allocatedPodIPCount := len(pm.httpService.GetAllocatedIPConfigs()) pendingReleaseIPCount := len(pm.httpService.GetPendingReleaseIPConfigs()) availableIPConfigCount := len(pm.httpService.GetAvailableIPConfigs()) // TODO: add pending allocation count to real cns - requestedIPConfigCount := pm.cachedNNC.Spec.RequestedIPCount + requestedIPConfigCount := pm.spec.RequestedIPCount unallocatedIPConfigCount := cnsPodIPConfigCount - allocatedPodIPCount freeIPConfigCount := requestedIPConfigCount - int64(allocatedPodIPCount) - batchSize := pm.getBatchSize() // Use getters in case customer changes batchsize manually - maxIPCount := pm.getMaxIPCount() + batchSize := pm.scaler.BatchSize + maxIPCount := pm.scaler.MaxIPCount - msg := fmt.Sprintf("[ipam-pool-monitor] Pool Size: %v, Goal Size: %v, BatchSize: %v, MaxIPCount: %v, MinFree: %v, MaxFree:%v, Allocated: %v, Available: %v, Pending Release: %v, Free: %v, Pending Program: %v", - cnsPodIPConfigCount, pm.cachedNNC.Spec.RequestedIPCount, batchSize, maxIPCount, pm.MinimumFreeIps, pm.MaximumFreeIps, allocatedPodIPCount, availableIPConfigCount, pendingReleaseIPCount, freeIPConfigCount, pendingProgramCount) + msg := fmt.Sprintf("[ipam-pool-monitor] Pool Size: %v, Goal Size: %v, BatchSize: %v, MaxIPCount: %v, Allocated: %v, Available: %v, Pending Release: %v, Free: %v, Pending Program: %v", + cnsPodIPConfigCount, pm.spec.RequestedIPCount, batchSize, maxIPCount, allocatedPodIPCount, availableIPConfigCount, pendingReleaseIPCount, freeIPConfigCount, pendingProgramCount) ipamAllocatedIPCount.Set(float64(allocatedPodIPCount)) ipamAvailableIPCount.Set(float64(availableIPConfigCount)) @@ -83,8 +125,8 @@ func (pm *CNSIPAMPoolMonitor) Reconcile(ctx context.Context) error { switch { // pod count is increasing - case freeIPConfigCount < pm.MinimumFreeIps: - if pm.cachedNNC.Spec.RequestedIPCount == maxIPCount { + case freeIPConfigCount < int64(pm.state.minFreeCount): + if pm.spec.RequestedIPCount == maxIPCount { // If we're already at the maxIPCount, don't try to increase return nil } @@ -93,13 +135,13 @@ func (pm *CNSIPAMPoolMonitor) Reconcile(ctx context.Context) error { return pm.increasePoolSize(ctx) // pod count is decreasing - case freeIPConfigCount >= pm.MaximumFreeIps: + case freeIPConfigCount >= int64(pm.state.maxFreeCount): logger.Printf("[ipam-pool-monitor] Decreasing pool size...%s ", msg) return pm.decreasePoolSize(ctx, pendingReleaseIPCount) // CRD has reconciled CNS state, and target spec is now the same size as the state // free to remove the IP's from the CRD - case len(pm.cachedNNC.Spec.IPsNotInUse) != pendingReleaseIPCount: + case len(pm.spec.IPsNotInUse) != pendingReleaseIPCount: logger.Printf("[ipam-pool-monitor] Removing Pending Release IP's from CRD...%s ", msg) return pm.cleanPendingRelease(ctx) @@ -112,16 +154,13 @@ func (pm *CNSIPAMPoolMonitor) Reconcile(ctx context.Context) error { return nil } -func (pm *CNSIPAMPoolMonitor) increasePoolSize(ctx context.Context) error { - pm.mu.Lock() - defer pm.mu.Unlock() - +func (pm *Monitor) increasePoolSize(ctx context.Context) error { tempNNCSpec := pm.createNNCSpecForCRD() // Query the max IP count - maxIPCount := pm.getMaxIPCount() + maxIPCount := pm.scaler.MaxIPCount previouslyRequestedIPCount := tempNNCSpec.RequestedIPCount - batchSize := pm.getBatchSize() + batchSize := pm.scaler.BatchSize tempNNCSpec.RequestedIPCount += batchSize if tempNNCSpec.RequestedIPCount > maxIPCount { @@ -147,22 +186,19 @@ func (pm *CNSIPAMPoolMonitor) increasePoolSize(ctx context.Context) error { // start an alloc timer metric.StartPoolIncreaseTimer(int(batchSize)) // save the updated state to cachedSpec - pm.cachedNNC.Spec = tempNNCSpec + pm.spec = tempNNCSpec return nil } -func (pm *CNSIPAMPoolMonitor) decreasePoolSize(ctx context.Context, existingPendingReleaseIPCount int) error { - pm.mu.Lock() - defer pm.mu.Unlock() - +func (pm *Monitor) decreasePoolSize(ctx context.Context, existingPendingReleaseIPCount int) error { // mark n number of IP's as pending var newIpsMarkedAsPending bool var pendingIPAddresses map[string]cns.IPConfigurationStatus var updatedRequestedIPCount int64 // Ensure the updated requested IP count is a multiple of the batch size - previouslyRequestedIPCount := pm.cachedNNC.Spec.RequestedIPCount - batchSize := pm.getBatchSize() + previouslyRequestedIPCount := pm.spec.RequestedIPCount + batchSize := pm.scaler.BatchSize modResult := previouslyRequestedIPCount % batchSize logger.Printf("[ipam-pool-monitor] Previously RequestedIP Count %v", previouslyRequestedIPCount) @@ -182,8 +218,8 @@ func (pm *CNSIPAMPoolMonitor) decreasePoolSize(ctx context.Context, existingPend logger.Printf("[ipam-pool-monitor] updatedRequestedIPCount %v", updatedRequestedIPCount) - if pm.updatingIpsNotInUseCount == 0 || - pm.updatingIpsNotInUseCount < existingPendingReleaseIPCount { + if pm.state.notInUseCount == 0 || + pm.state.notInUseCount < existingPendingReleaseIPCount { logger.Printf("[ipam-pool-monitor] Marking IPs as PendingRelease, ipsToBeReleasedCount %d", int(decreaseIPCountBy)) var err error pendingIPAddresses, err = pm.httpService.MarkIPAsPendingRelease(int(decreaseIPCountBy)) @@ -198,11 +234,11 @@ func (pm *CNSIPAMPoolMonitor) decreasePoolSize(ctx context.Context, existingPend if newIpsMarkedAsPending { // cache the updatingPendingRelease so that we dont re-set new IPs to PendingRelease in case UpdateCRD call fails - pm.updatingIpsNotInUseCount = len(tempNNCSpec.IPsNotInUse) + pm.state.notInUseCount = len(tempNNCSpec.IPsNotInUse) } logger.Printf("[ipam-pool-monitor] Releasing IPCount in this batch %d, updatingPendingIpsNotInUse count %d", - len(pendingIPAddresses), pm.updatingIpsNotInUseCount) + len(pendingIPAddresses), pm.state.notInUseCount) tempNNCSpec.RequestedIPCount -= int64(len(pendingIPAddresses)) logger.Printf("[ipam-pool-monitor] Decreasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's: %v, ToBeDeleted Count: %v", len(pm.httpService.GetPodIPConfigState()), tempNNCSpec.RequestedIPCount, len(pm.httpService.GetAllocatedIPConfigs()), len(tempNNCSpec.IPsNotInUse)) @@ -218,21 +254,18 @@ func (pm *CNSIPAMPoolMonitor) decreasePoolSize(ctx context.Context, existingPend metric.StartPoolDecreaseTimer(int(batchSize)) // save the updated state to cachedSpec - pm.cachedNNC.Spec = tempNNCSpec + pm.spec = tempNNCSpec // clear the updatingPendingIpsNotInUse, as we have Updated the CRD - logger.Printf("[ipam-pool-monitor] cleaning the updatingPendingIpsNotInUse, existing length %d", pm.updatingIpsNotInUseCount) - pm.updatingIpsNotInUseCount = 0 + logger.Printf("[ipam-pool-monitor] cleaning the updatingPendingIpsNotInUse, existing length %d", pm.state.notInUseCount) + pm.state.notInUseCount = 0 return nil } // cleanPendingRelease removes IPs from the cache and CRD if the request controller has reconciled // CNS state and the pending IP release map is empty. -func (pm *CNSIPAMPoolMonitor) cleanPendingRelease(ctx context.Context) error { - pm.mu.Lock() - defer pm.mu.Unlock() - +func (pm *Monitor) cleanPendingRelease(ctx context.Context) error { tempNNCSpec := pm.createNNCSpecForCRD() _, err := pm.nnccli.UpdateSpec(ctx, &tempNNCSpec) @@ -244,16 +277,16 @@ func (pm *CNSIPAMPoolMonitor) cleanPendingRelease(ctx context.Context) error { logger.Printf("[ipam-pool-monitor] cleanPendingRelease: UpdateCRDSpec succeeded for spec %+v", tempNNCSpec) // save the updated state to cachedSpec - pm.cachedNNC.Spec = tempNNCSpec + pm.spec = tempNNCSpec return nil } // createNNCSpecForCRD translates CNS's map of IPs to be released and requested IP count into an NNC Spec. -func (pm *CNSIPAMPoolMonitor) createNNCSpecForCRD() v1alpha.NodeNetworkConfigSpec { +func (pm *Monitor) createNNCSpecForCRD() v1alpha.NodeNetworkConfigSpec { var spec v1alpha.NodeNetworkConfigSpec // Update the count from cached spec - spec.RequestedIPCount = pm.cachedNNC.Spec.RequestedIPCount + spec.RequestedIPCount = pm.spec.RequestedIPCount // Get All Pending IPs from CNS and populate it again. pendingIPs := pm.httpService.GetPendingReleaseIPConfigs() @@ -264,53 +297,71 @@ func (pm *CNSIPAMPoolMonitor) createNNCSpecForCRD() v1alpha.NodeNetworkConfigSpe return spec } -// UpdatePoolLimitsTransacted called by request controller on reconcile to set the batch size limits -func (pm *CNSIPAMPoolMonitor) Update(scalar v1alpha.Scaler, spec v1alpha.NodeNetworkConfigSpec) { - pm.mu.Lock() - defer pm.mu.Unlock() - - pm.scalarUnits = scalar - - pm.MinimumFreeIps = int64(float64(pm.getBatchSize()) * (float64(pm.scalarUnits.RequestThresholdPercent) / 100)) - pm.MaximumFreeIps = int64(float64(pm.getBatchSize()) * (float64(pm.scalarUnits.ReleaseThresholdPercent) / 100)) +// GetStateSnapshot gets a snapshot of the IPAMPoolMonitor struct. +func (pm *Monitor) GetStateSnapshot() cns.IpamPoolMonitorStateSnapshot { + scaler, spec, state := pm.scaler, pm.spec, pm.state + return cns.IpamPoolMonitorStateSnapshot{ + MinimumFreeIps: state.minFreeCount, + MaximumFreeIps: state.maxFreeCount, + UpdatingIpsNotInUseCount: state.notInUseCount, + CachedNNC: v1alpha.NodeNetworkConfig{ + Spec: spec, + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: scaler, + }, + }, + } +} - pm.cachedNNC.Spec = spec +// Update ingests a NodeNetworkConfig, clamping some values to ensure they are legal and then +// pushing it to the Monitor's source channel. +func (pm *Monitor) Update(nnc *v1alpha.NodeNetworkConfig) { + pm.clampScaler(&nnc.Status.Scaler) - // if the nnc has conveged, observe the pool scaling latency (if any) + // if the nnc has converged, observe the pool scaling latency (if any). allocatedIPs := len(pm.httpService.GetPodIPConfigState()) - len(pm.httpService.GetPendingReleaseIPConfigs()) - if int(pm.cachedNNC.Spec.RequestedIPCount) == allocatedIPs { + if int(nnc.Spec.RequestedIPCount) == allocatedIPs { // observe elapsed duration for IP pool scaling metric.ObserverPoolScaleLatency() } - - logger.Printf("[ipam-pool-monitor] Update spec %+v, pm.MinimumFreeIps %d, pm.MaximumFreeIps %d", - pm.cachedNNC.Spec, pm.MinimumFreeIps, pm.MaximumFreeIps) + pm.nncSource <- *nnc } -func (pm *CNSIPAMPoolMonitor) getMaxIPCount() int64 { - if pm.scalarUnits.MaxIPCount == 0 { - pm.scalarUnits.MaxIPCount = defaultMaxIPCount +// clampScaler makes sure that the values stored in the scaler are sane. +// we usually expect these to be correctly set for us, but we could crash +// without these checks. if they are incorrectly set, there will be some weird +// IP pool behavior for a while until the nnc reconciler corrects the state. +func (pm *Monitor) clampScaler(scaler *v1alpha.Scaler) { + if scaler.MaxIPCount < 1 { + scaler.MaxIPCount = int64(pm.opts.MaxIPs) } - return pm.scalarUnits.MaxIPCount -} - -func (pm *CNSIPAMPoolMonitor) getBatchSize() int64 { - maxIPCount := pm.getMaxIPCount() - if pm.scalarUnits.BatchSize > maxIPCount { - pm.scalarUnits.BatchSize = maxIPCount + if scaler.BatchSize < 1 { + scaler.BatchSize = 1 + } + if scaler.BatchSize > scaler.MaxIPCount { + scaler.BatchSize = scaler.MaxIPCount + } + if scaler.RequestThresholdPercent < 1 { + scaler.RequestThresholdPercent = 1 + } + if scaler.RequestThresholdPercent > 100 { //nolint:gomnd // it's a percent + scaler.RequestThresholdPercent = 100 + } + if scaler.ReleaseThresholdPercent < scaler.RequestThresholdPercent+100 { + scaler.ReleaseThresholdPercent = scaler.RequestThresholdPercent + 100 //nolint:gomnd // it's a percent } - return pm.scalarUnits.BatchSize } -// GetStateSnapshot gets a snapshot of the IPAMPoolMonitor struct. -func (pm *CNSIPAMPoolMonitor) GetStateSnapshot() cns.IpamPoolMonitorStateSnapshot { - pm.mu.Lock() - defer pm.mu.Unlock() +// CalculateMinFreeIPs calculates the minimum free IP quantity based on the Scaler +// in the passed NodeNetworkConfig. +//nolint:gocritic // ignore hugeparam +func CalculateMinFreeIPs(scaler v1alpha.Scaler) int { + return int(float64(scaler.BatchSize) * (float64(scaler.RequestThresholdPercent) / 100)) //nolint:gomnd // it's a percent +} - return cns.IpamPoolMonitorStateSnapshot{ - MinimumFreeIps: pm.MinimumFreeIps, - MaximumFreeIps: pm.MaximumFreeIps, - UpdatingIpsNotInUseCount: pm.updatingIpsNotInUseCount, - CachedNNC: pm.cachedNNC, - } +// CalculateMaxFreeIPs calculates the maximum free IP quantity based on the Scaler +// in the passed NodeNetworkConfig. +//nolint:gocritic // ignore hugeparam +func CalculateMaxFreeIPs(scaler v1alpha.Scaler) int { + return int(float64(scaler.BatchSize) * (float64(scaler.ReleaseThresholdPercent) / 100)) //nolint:gomnd // it's a percent } diff --git a/cns/ipampool/monitor_test.go b/cns/ipampool/monitor_test.go new file mode 100644 index 0000000000..698c72b469 --- /dev/null +++ b/cns/ipampool/monitor_test.go @@ -0,0 +1,445 @@ +package ipampool + +import ( + "context" + "testing" + "time" + + "github.com/Azure/azure-container-networking/cns" + "github.com/Azure/azure-container-networking/cns/fakes" + "github.com/Azure/azure-container-networking/cns/logger" + "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" + "github.com/stretchr/testify/assert" +) + +type fakeNodeNetworkConfigUpdater struct { + nnc *v1alpha.NodeNetworkConfig +} + +func (f *fakeNodeNetworkConfigUpdater) UpdateSpec(ctx context.Context, spec *v1alpha.NodeNetworkConfigSpec) (*v1alpha.NodeNetworkConfig, error) { + f.nnc.Spec = *spec + return f.nnc, nil +} + +type directUpdatePoolMonitor struct { + m *Monitor + cns.IPAMPoolMonitor +} + +func (d *directUpdatePoolMonitor) Update(nnc *v1alpha.NodeNetworkConfig) { + d.m.scaler, d.m.spec = nnc.Status.Scaler, nnc.Spec + d.m.state.minFreeCount, d.m.state.maxFreeCount = CalculateMinFreeIPs(d.m.scaler), CalculateMaxFreeIPs(d.m.scaler) +} + +type state struct { + allocatedIPCount int + batchSize int + ipConfigCount int + maxIPCount int + releaseThresholdPercent int + requestThresholdPercent int +} + +func initFakes(initState state) (*fakes.HTTPServiceFake, *fakes.RequestControllerFake, *Monitor) { + logger.InitLogger("testlogs", 0, 0, "./") + + scalarUnits := v1alpha.Scaler{ + BatchSize: int64(initState.batchSize), + RequestThresholdPercent: int64(initState.requestThresholdPercent), + ReleaseThresholdPercent: int64(initState.releaseThresholdPercent), + MaxIPCount: int64(initState.maxIPCount), + } + subnetaddresspace := "10.0.0.0/8" + + fakecns := fakes.NewHTTPServiceFake() + fakerc := fakes.NewRequestControllerFake(fakecns, scalarUnits, subnetaddresspace, initState.ipConfigCount) + + poolmonitor := NewMonitor(fakecns, &fakeNodeNetworkConfigUpdater{fakerc.NNC}, &Options{RefreshDelay: 100 * time.Second}) + + fakecns.PoolMonitor = &directUpdatePoolMonitor{m: poolmonitor} + _ = fakecns.SetNumberOfAllocatedIPs(initState.allocatedIPCount) + + return fakecns, fakerc, poolmonitor +} + +func TestPoolSizeIncrease(t *testing.T) { + initState := state{ + batchSize: 10, + allocatedIPCount: 8, + ipConfigCount: 10, + requestThresholdPercent: 50, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // When poolmonitor reconcile is called, trigger increase and cache goal state + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure pool monitor has reached quorum with cns + assert.Equal(t, int64(initState.ipConfigCount+(1*initState.batchSize)), poolmonitor.spec.RequestedIPCount) + + // request controller reconciles, carves new IP's from the test subnet and adds to CNS state + assert.NoError(t, fakerc.Reconcile(true)) + + // when poolmonitor reconciles again here, the IP count will be within the thresholds + // so no CRD update and nothing pending + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure pool monitor has reached quorum with cns + assert.Equal(t, int64(initState.ipConfigCount+(1*initState.batchSize)), poolmonitor.spec.RequestedIPCount) + + // make sure IPConfig state size reflects the new pool size + assert.Len(t, fakecns.GetPodIPConfigState(), initState.ipConfigCount+(1*initState.batchSize)) +} + +func TestPoolIncreaseDoesntChangeWhenIncreaseIsAlreadyInProgress(t *testing.T) { + initState := state{ + batchSize: 10, + allocatedIPCount: 8, + ipConfigCount: 10, + requestThresholdPercent: 30, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // When poolmonitor reconcile is called, trigger increase and cache goal state + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // increase number of allocated IP's in CNS, within allocatable size but still inside trigger threshold + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(9)) + + // poolmonitor reconciles, but doesn't actually update the CRD, because there is already a pending update + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure pool monitor has reached quorum with cns + assert.Equal(t, int64(initState.ipConfigCount+(1*initState.batchSize)), poolmonitor.spec.RequestedIPCount) + + // request controller reconciles, carves new IP's from the test subnet and adds to CNS state + assert.NoError(t, fakerc.Reconcile(true)) + + // when poolmonitor reconciles again here, the IP count will be within the thresholds + // so no CRD update and nothing pending + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // make sure IPConfig state size reflects the new pool size + assert.Len(t, fakecns.GetPodIPConfigState(), initState.ipConfigCount+(1*initState.batchSize)) + + // ensure pool monitor has reached quorum with cns + assert.Equal(t, int64(initState.ipConfigCount+(1*initState.batchSize)), poolmonitor.spec.RequestedIPCount) +} + +func TestPoolSizeIncreaseIdempotency(t *testing.T) { + initState := state{ + batchSize: 10, + allocatedIPCount: 8, + ipConfigCount: 10, + requestThresholdPercent: 30, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + _, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // When poolmonitor reconcile is called, trigger increase and cache goal state + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure pool monitor has increased batch size + assert.Equal(t, int64(initState.ipConfigCount+(1*initState.batchSize)), poolmonitor.spec.RequestedIPCount) + + // reconcile pool monitor a second time, then verify requested ip count is still the same + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure pool monitor requested pool size is unchanged as request controller hasn't reconciled yet + assert.Equal(t, int64(initState.ipConfigCount+(1*initState.batchSize)), poolmonitor.spec.RequestedIPCount) +} + +func TestPoolIncreasePastNodeLimit(t *testing.T) { + initState := state{ + batchSize: 16, + allocatedIPCount: 9, + ipConfigCount: 16, + requestThresholdPercent: 50, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + _, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // When poolmonitor reconcile is called, trigger increase and cache goal state + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure pool monitor has only requested the max pod ip count + assert.Equal(t, int64(initState.maxIPCount), poolmonitor.spec.RequestedIPCount) +} + +func TestPoolIncreaseBatchSizeGreaterThanMaxPodIPCount(t *testing.T) { + initState := state{ + batchSize: 50, + allocatedIPCount: 16, + ipConfigCount: 16, + requestThresholdPercent: 50, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + _, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // When poolmonitor reconcile is called, trigger increase and cache goal state + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure pool monitor has only requested the max pod ip count + assert.Equal(t, int64(initState.maxIPCount), poolmonitor.spec.RequestedIPCount) +} + +func TestPoolDecrease(t *testing.T) { + initState := state{ + batchSize: 10, + ipConfigCount: 20, + allocatedIPCount: 15, + requestThresholdPercent: 50, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // Pool monitor does nothing, as the current number of IP's falls in the threshold + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Decrease the number of allocated IP's down to 5. This should trigger a scale down + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(4)) + + // Pool monitor will adjust the spec so the pool size will be 1 batch size smaller + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // ensure that the adjusted spec is smaller than the initial pool size + assert.Len(t, poolmonitor.spec.IPsNotInUse, initState.ipConfigCount-initState.batchSize) + + // reconcile the fake request controller + assert.NoError(t, fakerc.Reconcile(true)) + + // CNS won't actually clean up the IPsNotInUse until it changes the spec for some other reason (i.e. scale up) + // so instead we should just verify that the CNS state has no more PendingReleaseIPConfigs, + // and that they were cleaned up. + assert.Empty(t, fakecns.GetPendingReleaseIPConfigs()) +} + +func TestPoolSizeDecreaseWhenDecreaseHasAlreadyBeenRequested(t *testing.T) { + initState := state{ + batchSize: 10, + allocatedIPCount: 5, + ipConfigCount: 20, + requestThresholdPercent: 30, + releaseThresholdPercent: 100, + maxIPCount: 30, + } + + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // Pool monitor does nothing, as the current number of IP's falls in the threshold + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Ensure the size of the requested spec is still the same + assert.Len(t, poolmonitor.spec.IPsNotInUse, initState.ipConfigCount-initState.batchSize) + + // Ensure the request ipcount is now one batch size smaller than the initial IP count + assert.Equal(t, int64(initState.ipConfigCount-initState.batchSize), poolmonitor.spec.RequestedIPCount) + + // Update pods with IP count, ensure pool monitor stays the same until request controller reconciles + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(6)) + + // Ensure the size of the requested spec is still the same + assert.Len(t, poolmonitor.spec.IPsNotInUse, initState.ipConfigCount-initState.batchSize) + + // Ensure the request ipcount is now one batch size smaller than the initial IP count + assert.Equal(t, int64(initState.ipConfigCount-initState.batchSize), poolmonitor.spec.RequestedIPCount) + + assert.NoError(t, fakerc.Reconcile(true)) + + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Ensure the spec doesn't have any IPsNotInUse after request controller has reconciled + assert.Empty(t, poolmonitor.spec.IPsNotInUse) +} + +func TestDecreaseAndIncreaseToSameCount(t *testing.T) { + initState := state{ + batchSize: 10, + allocatedIPCount: 7, + ipConfigCount: 10, + requestThresholdPercent: 50, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.Equal(t, int64(20), poolmonitor.spec.RequestedIPCount) + assert.Empty(t, poolmonitor.spec.IPsNotInUse) + + // Update the IPConfig state + assert.NoError(t, fakerc.Reconcile(true)) + + // Release all IPs + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(0)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.Equal(t, int64(10), poolmonitor.spec.RequestedIPCount) + assert.Len(t, poolmonitor.spec.IPsNotInUse, 10) + + // Increase it back to 20 + // initial pool count is 10, set 5 of them to be allocated + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(7)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.Equal(t, int64(20), poolmonitor.spec.RequestedIPCount) + assert.Len(t, poolmonitor.spec.IPsNotInUse, 10) + + // Update the IPConfig count and dont remove the pending IPs + assert.NoError(t, fakerc.Reconcile(false)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.Equal(t, int64(20), poolmonitor.spec.RequestedIPCount) + assert.Len(t, poolmonitor.spec.IPsNotInUse, 10) + + assert.NoError(t, fakerc.Reconcile(true)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.Equal(t, int64(20), poolmonitor.spec.RequestedIPCount) + assert.Empty(t, poolmonitor.spec.IPsNotInUse) +} + +func TestPoolSizeDecreaseToReallyLow(t *testing.T) { + initState := state{ + batchSize: 10, + allocatedIPCount: 23, + ipConfigCount: 30, + requestThresholdPercent: 30, + releaseThresholdPercent: 100, + maxIPCount: 30, + } + + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // Pool monitor does nothing, as the current number of IP's falls in the threshold + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Now Drop the Allocated count to really low, say 3. This should trigger release in 2 batches + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(3)) + + // Pool monitor does nothing, as the current number of IP's falls in the threshold + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Ensure the size of the requested spec is still the same + assert.Len(t, poolmonitor.spec.IPsNotInUse, initState.batchSize) + + // Ensure the request ipcount is now one batch size smaller than the initial IP count + assert.Equal(t, int64(initState.ipConfigCount-initState.batchSize), poolmonitor.spec.RequestedIPCount) + + // Reconcile again, it should release the second batch + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Ensure the size of the requested spec is still the same + assert.Len(t, poolmonitor.spec.IPsNotInUse, initState.batchSize*2) + + // Ensure the request ipcount is now one batch size smaller than the initial IP count + assert.Equal(t, int64(initState.ipConfigCount-(initState.batchSize*2)), poolmonitor.spec.RequestedIPCount) + + assert.NoError(t, fakerc.Reconcile(true)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.Empty(t, poolmonitor.spec.IPsNotInUse) +} + +func TestDecreaseAfterNodeLimitReached(t *testing.T) { + initState := state{ + batchSize: 16, + allocatedIPCount: 20, + ipConfigCount: 30, + requestThresholdPercent: 50, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Trigger a batch release + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(5)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Ensure poolmonitor asked for a multiple of batch size + assert.Equal(t, int64(16), poolmonitor.spec.RequestedIPCount) + assert.Len(t, poolmonitor.spec.IPsNotInUse, int(initState.maxIPCount%initState.batchSize)) +} + +func TestPoolDecreaseBatchSizeGreaterThanMaxPodIPCount(t *testing.T) { + initState := state{ + batchSize: 31, + allocatedIPCount: 30, + ipConfigCount: 30, + requestThresholdPercent: 50, + releaseThresholdPercent: 150, + maxIPCount: 30, + } + + fakecns, fakerc, poolmonitor := initFakes(initState) + assert.NoError(t, fakerc.Reconcile(true)) + + // When poolmonitor reconcile is called, trigger increase and cache goal state + assert.NoError(t, poolmonitor.reconcile(context.Background())) + + // Trigger a batch release + assert.NoError(t, fakecns.SetNumberOfAllocatedIPs(1)) + assert.NoError(t, poolmonitor.reconcile(context.Background())) + assert.Equal(t, int64(initState.maxIPCount), poolmonitor.spec.RequestedIPCount) +} + +func TestCalculateIPs(t *testing.T) { + tests := []struct { + name string + in v1alpha.Scaler + wantMinFree int + wantMaxFree int + }{ + { + name: "normal", + in: v1alpha.Scaler{ + BatchSize: 16, + RequestThresholdPercent: 50, + ReleaseThresholdPercent: 150, + MaxIPCount: 250, + }, + wantMinFree: 8, + wantMaxFree: 24, + }, + { + name: "200%", + in: v1alpha.Scaler{ + BatchSize: 16, + RequestThresholdPercent: 100, + ReleaseThresholdPercent: 200, + MaxIPCount: 250, + }, + wantMinFree: 16, + wantMaxFree: 32, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.wantMinFree, CalculateMinFreeIPs(tt.in)) + assert.Equal(t, tt.wantMaxFree, CalculateMaxFreeIPs(tt.in)) + }) + } +} diff --git a/cns/ipampoolmonitor/ipampoolmonitor_test.go b/cns/ipampoolmonitor/ipampoolmonitor_test.go deleted file mode 100644 index cf39ecf5c1..0000000000 --- a/cns/ipampoolmonitor/ipampoolmonitor_test.go +++ /dev/null @@ -1,774 +0,0 @@ -package ipampoolmonitor - -import ( - "context" - "log" - "testing" - - "github.com/Azure/azure-container-networking/cns/fakes" - "github.com/Azure/azure-container-networking/cns/logger" - "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" -) - -type fakeNodeNetworkConfigUpdater struct { - nnc *v1alpha.NodeNetworkConfig -} - -func (f *fakeNodeNetworkConfigUpdater) UpdateSpec(ctx context.Context, spec *v1alpha.NodeNetworkConfigSpec) (*v1alpha.NodeNetworkConfig, error) { - f.nnc.Spec = *spec - return f.nnc, nil -} - -func initFakes(t *testing.T, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent int, - maxPodIPCount int64) (*fakes.HTTPServiceFake, *fakes.RequestControllerFake, *CNSIPAMPoolMonitor) { - logger.InitLogger("testlogs", 0, 0, "./") - - scalarUnits := v1alpha.Scaler{ - BatchSize: int64(batchSize), - RequestThresholdPercent: int64(requestThresholdPercent), - ReleaseThresholdPercent: int64(releaseThresholdPercent), - MaxIPCount: int64(maxPodIPCount), - } - subnetaddresspace := "10.0.0.0/8" - - fakecns := fakes.NewHTTPServiceFake() - fakerc := fakes.NewRequestControllerFake(fakecns, scalarUnits, subnetaddresspace, initialIPConfigCount) - - poolmonitor := NewCNSIPAMPoolMonitor(fakecns, &fakeNodeNetworkConfigUpdater{fakerc.NNC}) - - fakecns.PoolMonitor = poolmonitor - - err := fakerc.Reconcile(true) - if err != nil { - t.Fatalf("Failed to reconcile fake requestcontroller with err: %v", err) - } - - return fakecns, fakerc, poolmonitor -} - -func TestPoolSizeIncrease(t *testing.T) { - var ( - batchSize = 10 - initialIPConfigCount = 10 - requestThresholdPercent = 30 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, fakerc, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - // increase number of allocated IP's in CNS - err := fakecns.SetNumberOfAllocatedIPs(8) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // When poolmonitor reconcile is called, trigger increase and cache goal state - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // ensure pool monitor has reached quorum with cns - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount+(1*batchSize)) { - t.Fatalf("Pool monitor target IP count doesn't match CNS pool state "+ - "after reconcile: %v, "+ - "actual %v", poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetPodIPConfigState())) - } - - // request controller reconciles, carves new IP's from the test subnet and adds to CNS state - err = fakerc.Reconcile(true) - if err != nil { - t.Fatalf("Failed to reconcile fake requestcontroller with err: %v", err) - } - - // when poolmonitor reconciles again here, the IP count will be within the thresholds - // so no CRD update and nothing pending - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to reconcile pool monitor after request controller updates CNS state: %v", err) - } - - // ensure pool monitor has reached quorum with cns - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount+(1*batchSize)) { - t.Fatalf("Pool monitor target IP count doesn't "+ - "match CNS pool state after reconcile: %v, "+ - "actual %v", poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetPodIPConfigState())) - } - - // make sure IPConfig state size reflects the new pool size - if len(fakecns.GetPodIPConfigState()) != initialIPConfigCount+(1*batchSize) { - t.Fatalf("CNS Pod IPConfig state count doesn't "+ - "match, expected: %v, actual %v", len(fakecns.GetPodIPConfigState()), initialIPConfigCount+(1*batchSize)) - } - - t.Logf("Pool size %v, Target pool size %v, "+ - "Allocated IP's %v, ", len(fakecns.GetPodIPConfigState()), - poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetAllocatedIPConfigs())) -} - -func TestPoolIncreaseDoesntChangeWhenIncreaseIsAlreadyInProgress(t *testing.T) { - var ( - batchSize = 10 - initialIPConfigCount = 10 - requestThresholdPercent = 30 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, fakerc, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - // increase number of allocated IP's in CNS - err := fakecns.SetNumberOfAllocatedIPs(8) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // When poolmonitor reconcile is called, trigger increase and cache goal state - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // increase number of allocated IP's in CNS, within allocatable size but still inside trigger threshold, - err = fakecns.SetNumberOfAllocatedIPs(9) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // poolmonitor reconciles, but doesn't actually update the CRD, because there is already a pending update - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to reconcile pool monitor after allocation ip increase with err: %v", err) - } - - // ensure pool monitor has reached quorum with cns - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount+(1*batchSize)) { - t.Fatalf("Pool monitor target IP count doesn't match CNS pool state after reconcile: %v,"+ - " actual %v", poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetPodIPConfigState())) - } - - // request controller reconciles, carves new IP's from the test subnet and adds to CNS state - err = fakerc.Reconcile(true) - if err != nil { - t.Fatalf("Failed to reconcile fake requestcontroller with err: %v", err) - } - - // when poolmonitor reconciles again here, the IP count will be within the thresholds - // so no CRD update and nothing pending - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to reconcile pool monitor after request controller updates CNS state: %v", err) - } - - // make sure IPConfig state size reflects the new pool size - if len(fakecns.GetPodIPConfigState()) != initialIPConfigCount+(1*batchSize) { - t.Fatalf("CNS Pod IPConfig state count doesn't match, expected: %v, actual %v", - len(fakecns.GetPodIPConfigState()), initialIPConfigCount+(1*batchSize)) - } - - // ensure pool monitor has reached quorum with cns - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount+(1*batchSize)) { - t.Fatalf("Pool monitor target IP count doesn't match CNS pool state after reconcile: %v, "+ - "actual %v", poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetPodIPConfigState())) - } - - t.Logf("Pool size %v, Target pool size %v, Allocated IP's %v, ", len(fakecns.GetPodIPConfigState()), - poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetAllocatedIPConfigs())) -} - -func TestPoolSizeIncreaseIdempotency(t *testing.T) { - var ( - batchSize = 10 - initialIPConfigCount = 10 - requestThresholdPercent = 30 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, _, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) - t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) - - // increase number of allocated IP's in CNS - err := fakecns.SetNumberOfAllocatedIPs(8) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // When poolmonitor reconcile is called, trigger increase and cache goal state - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // ensure pool monitor has increased batch size - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount+(1*batchSize)) { - t.Fatalf("Pool monitor target IP count doesn't match CNS pool state after reconcile: %v,"+ - " actual %v", poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetPodIPConfigState())) - } - - // reconcile pool monitor a second time, then verify requested ip count is still the same - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // ensure pool monitor requested pool size is unchanged as request controller hasn't reconciled yet - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount+(1*batchSize)) { - t.Fatalf("Pool monitor target IP count doesn't match CNS pool state after reconcile: %v,"+ - " actual %v", poolmonitor.cachedNNC.Spec.RequestedIPCount, len(fakecns.GetPodIPConfigState())) - } -} - -func TestPoolIncreasePastNodeLimit(t *testing.T) { - var ( - batchSize = 16 - initialIPConfigCount = 16 - requestThresholdPercent = 50 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, _, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) - t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) - - // increase number of allocated IP's in CNS - err := fakecns.SetNumberOfAllocatedIPs(9) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // When poolmonitor reconcile is called, trigger increase and cache goal state - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // ensure pool monitor has only requested the max pod ip count - if poolmonitor.cachedNNC.Spec.RequestedIPCount != maxPodIPCount { - t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) when the max "+ - "has been reached", poolmonitor.cachedNNC.Spec.RequestedIPCount, maxPodIPCount) - } -} - -func TestPoolIncreaseBatchSizeGreaterThanMaxPodIPCount(t *testing.T) { - var ( - batchSize = 50 - initialIPConfigCount = 16 - requestThresholdPercent = 50 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, _, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) - t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) - - // increase number of allocated IP's in CNS - err := fakecns.SetNumberOfAllocatedIPs(16) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // When poolmonitor reconcile is called, trigger increase and cache goal state - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // ensure pool monitor has only requested the max pod ip count - if poolmonitor.cachedNNC.Spec.RequestedIPCount != maxPodIPCount { - t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) "+ - "when the max has been reached", poolmonitor.cachedNNC.Spec.RequestedIPCount, maxPodIPCount) - } -} - -func TestPoolIncreaseMaxIPCountSetToZero(t *testing.T) { - var ( - batchSize = 16 - initialIPConfigCount = 16 - requestThresholdPercent = 50 - releaseThresholdPercent = 150 - initialMaxPodIPCount = int64(0) - expectedMaxPodIPCount = defaultMaxIPCount - ) - - _, _, poolmonitor := initFakes(t, batchSize, initialIPConfigCount, - requestThresholdPercent, releaseThresholdPercent, initialMaxPodIPCount) - - if poolmonitor.getMaxIPCount() != expectedMaxPodIPCount { - t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) "+ - "when the MaxIPCount field in the CRD is zero", poolmonitor.getMaxIPCount(), expectedMaxPodIPCount) - } -} - -func TestPoolDecrease(t *testing.T) { - var ( - batchSize = 10 - initialIPConfigCount = 20 - requestThresholdPercent = 30 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, fakerc, poolmonitor := initFakes(t, batchSize, initialIPConfigCount, - requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) - - log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) - log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) - - // initial pool count is 20, set 15 of them to be allocated - err := fakecns.SetNumberOfAllocatedIPs(15) - if err != nil { - t.Fatal(err) - } - - // Pool monitor does nothing, as the current number of IP's falls in the threshold - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatal(err) - } - - // Decrease the number of allocated IP's down to 5. This should trigger a scale down - err = fakecns.SetNumberOfAllocatedIPs(4) - if err != nil { - t.Fatal(err) - } - - // Pool monitor will adjust the spec so the pool size will be 1 batch size smaller - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatal(err) - } - - // ensure that the adjusted spec is smaller than the initial pool size - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != (initialIPConfigCount - batchSize) { - t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, actual %v", - (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - // reconcile the fake request controller - err = fakerc.Reconcile(true) - if err != nil { - t.Fatal(err) - } - - // CNS won't actually clean up the IPsNotInUse until it changes the spec for some other reason (i.e. scale up) - // so instead we should just verify that the CNS state has no more PendingReleaseIPConfigs, - // and that they were cleaned up. - if len(fakecns.GetPendingReleaseIPConfigs()) != 0 { - t.Fatalf("expected 0 PendingReleaseIPConfigs, got %d", len(fakecns.GetPendingReleaseIPConfigs())) - } -} - -func TestPoolSizeDecreaseWhenDecreaseHasAlreadyBeenRequested(t *testing.T) { - var ( - batchSize = 10 - initialIPConfigCount = 20 - requestThresholdPercent = 30 - releaseThresholdPercent = 100 - maxPodIPCount = int64(30) - ) - - fakecns, fakerc, poolmonitor := initFakes(t, batchSize, initialIPConfigCount, - requestThresholdPercent, releaseThresholdPercent, maxPodIPCount) - - log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) - log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) - - // initial pool count is 30, set 25 of them to be allocated - err := fakecns.SetNumberOfAllocatedIPs(5) - if err != nil { - t.Error(err) - } - - // Pool monitor does nothing, as the current number of IP's falls in the threshold - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // Ensure the size of the requested spec is still the same - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != (initialIPConfigCount - batchSize) { - t.Fatalf("Expected IP's not in use be one batch size smaller after reconcile, expected %v,"+ - " actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - // Ensure the request ipcount is now one batch size smaller than the inital IP count - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-batchSize) { - t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v,"+ - " actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - // Update pods with IP count, ensure pool monitor stays the same until request controller reconciles - err = fakecns.SetNumberOfAllocatedIPs(6) - if err != nil { - t.Error(err) - } - - // Ensure the size of the requested spec is still the same - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != (initialIPConfigCount - batchSize) { - t.Fatalf("Expected IP's not in use to be one batch size smaller after reconcile, and not change"+ - " after reconcile, expected %v, actual %v", - (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - // Ensure the request ipcount is now one batch size smaller than the inital IP count - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-batchSize) { - t.Fatalf("Expected pool size to be one batch size smaller after reconcile, and not change after"+ - " existing call, expected %v, actual %v", (initialIPConfigCount - batchSize), - len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - err = fakerc.Reconcile(true) - if err != nil { - t.Error(err) - } - - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected no pool monitor failure after request controller reconcile: %v", err) - } - - // Ensure the spec doesn't have any IPsNotInUse after request controller has reconciled - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != 0 { - t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", - (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } -} - -func TestDecreaseAndIncreaseToSameCount(t *testing.T) { - var ( - batchSize = 10 - initialIPConfigCount = 10 - requestThresholdPercent = 50 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, fakerc, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) - log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) - - // initial pool count is 10, set 5 of them to be allocated - err := fakecns.SetNumberOfAllocatedIPs(7) - if err != nil { - t.Error(err) - } - - // Pool monitor will increase the count to 20 - t.Logf("Scaleup: Increase pool size to 20") - ReconcileAndValidate(context.Background(), t, poolmonitor, 20, 0) - - // Update the IPConfig state - t.Logf("Reconcile with PodIPState") - err = fakerc.Reconcile(true) - if err != nil { - t.Error(err) - } - - // Release all IPs - err = fakecns.SetNumberOfAllocatedIPs(0) - if err != nil { - t.Error(err) - } - - t.Logf("Scaledown: Decrease pool size to 10") - ReconcileAndValidate(context.Background(), t, poolmonitor, 10, 10) - - // Increase it back to 20 - // initial pool count is 10, set 5 of them to be allocated - t.Logf("Scaleup: pool size back to 20 without updating the PodIpState for previous scale down") - err = fakecns.SetNumberOfAllocatedIPs(7) - if err != nil { - t.Error(err) - } - ReconcileAndValidate(context.Background(), t, poolmonitor, 20, 10) - - // Update the IPConfig count and dont remove the pending IPs - t.Logf("Reconcile with PodIPState") - err = fakerc.Reconcile(false) - if err != nil { - t.Error(err) - } - - // reconcile again - t.Logf("Reconcole with pool monitor again, it should not cleanup ipsnotinuse") - ReconcileAndValidate(context.Background(), t, poolmonitor, 20, 10) - - t.Logf("Now update podipconfig state") - err = fakerc.Reconcile(true) - if err != nil { - t.Error(err) - } - - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected no pool monitor failure after request controller reconcile: %v", err) - } - ReconcileAndValidate(context.Background(), t, poolmonitor, 20, 0) -} - -func TestPoolSizeDecreaseToReallyLow(t *testing.T) { - var ( - batchSize = 10 - initialIPConfigCount = 30 - requestThresholdPercent = 30 - releaseThresholdPercent = 100 - maxPodIPCount = int64(30) - ) - - fakecns, fakerc, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps) - log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps) - - // initial pool count is 30, set 23 of them to be allocated - err := fakecns.SetNumberOfAllocatedIPs(23) - if err != nil { - t.Error(err) - } - - // Pool monitor does nothing, as the current number of IP's falls in the threshold - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // Now Drop the Allocated count to really low, say 3. This should trigger release in 2 batches - err = fakecns.SetNumberOfAllocatedIPs(3) - if err != nil { - t.Error(err) - } - - // Pool monitor does nothing, as the current number of IP's falls in the threshold - t.Logf("Reconcile after Allocated count from 33 -> 3, Exepected free count = 10") - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // Ensure the size of the requested spec is still the same - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize { - t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", - batchSize, len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - // Ensure the request ipcount is now one batch size smaller than the inital IP count - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-batchSize) { - t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, "+ - "actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - // Reconcile again, it should release the second batch - t.Logf("Reconcile again - 2, Exepected free count = 20") - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // Ensure the size of the requested spec is still the same - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize*2 { - t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", batchSize*2, - len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - // Ensure the request ipcount is now one batch size smaller than the inital IP count - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-(batchSize*2)) { - t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, "+ - "actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } - - t.Logf("Update Request Controller") - err = fakerc.Reconcile(true) - if err != nil { - t.Error(err) - } - - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected no pool monitor failure after request controller reconcile: %v", err) - } - - // Ensure the spec doesn't have any IPsNotInUse after request controller has reconciled - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != 0 { - t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", - (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } -} - -func TestDecreaseAfterNodeLimitReached(t *testing.T) { - var ( - batchSize = 16 - initialIPConfigCount = 30 - requestThresholdPercent = 50 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - expectedRequestedIP = 16 - expectedDecreaseIP = int(maxPodIPCount) % batchSize - ) - - fakecns, _, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) - t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) - - err := fakecns.SetNumberOfAllocatedIPs(20) - if err != nil { - t.Error(err) - } - - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // Trigger a batch release - err = fakecns.SetNumberOfAllocatedIPs(5) - if err != nil { - t.Error(err) - } - - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // Ensure poolmonitor asked for a multiple of batch size - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(expectedRequestedIP) { - t.Fatalf("Expected requested ips to be %v when scaling by 1 batch size down from %v "+ - "(max pod limit) but got %v", expectedRequestedIP, maxPodIPCount, - poolmonitor.cachedNNC.Spec.RequestedIPCount) - } - - // Ensure we minused by the mod result - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != expectedDecreaseIP { - t.Fatalf("Expected to decrease requested IPs by %v (max pod count mod batchsize) to "+ - "make the requested ip count a multiple of the batch size in the case of hitting "+ - "the max before scale down, but got %v", expectedDecreaseIP, len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } -} - -func TestPoolDecreaseBatchSizeGreaterThanMaxPodIPCount(t *testing.T) { - var ( - batchSize = 31 - initialIPConfigCount = 30 - requestThresholdPercent = 50 - releaseThresholdPercent = 150 - maxPodIPCount = int64(30) - ) - - fakecns, _, poolmonitor := initFakes(t, - batchSize, - initialIPConfigCount, - requestThresholdPercent, - releaseThresholdPercent, - maxPodIPCount) - - t.Logf("Minimum free IPs to request: %v", poolmonitor.MinimumFreeIps) - t.Logf("Maximum free IPs to release: %v", poolmonitor.MaximumFreeIps) - - // increase number of allocated IP's in CNS - err := fakecns.SetNumberOfAllocatedIPs(30) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // When poolmonitor reconcile is called, trigger increase and cache goal state - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Fatalf("Failed to allocate test ipconfigs with err: %v", err) - } - - // Trigger a batch release - err = fakecns.SetNumberOfAllocatedIPs(1) - if err != nil { - t.Error(err) - } - - err = poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // ensure pool monitor has only requested the max pod ip count - if poolmonitor.cachedNNC.Spec.RequestedIPCount != maxPodIPCount { - t.Fatalf("Pool monitor target IP count (%v) should be the node limit (%v) when the max "+ - "has been reached", poolmonitor.cachedNNC.Spec.RequestedIPCount, maxPodIPCount) - } -} - -func ReconcileAndValidate(ctx context.Context, - t *testing.T, - poolmonitor *CNSIPAMPoolMonitor, - expectedRequestCount, - expectedIpsNotInUse int) { - err := poolmonitor.Reconcile(context.Background()) - if err != nil { - t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err) - } - - // Increased the new count to be 20 - if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(expectedRequestCount) { - t.Fatalf("RequestIPCount not same, expected %v, actual %v", - expectedRequestCount, - poolmonitor.cachedNNC.Spec.RequestedIPCount) - } - - // Ensure there is no pending release ips - if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != expectedIpsNotInUse { - t.Fatalf("Expected IP's not in use, expected %v, actual %v", - expectedIpsNotInUse, - len(poolmonitor.cachedNNC.Spec.IPsNotInUse)) - } -} diff --git a/cns/restserver/api_test.go b/cns/restserver/api_test.go index 6577681123..e1566918f5 100644 --- a/cns/restserver/api_test.go +++ b/cns/restserver/api_test.go @@ -925,7 +925,7 @@ func startService() error { return err } - svc.IPAMPoolMonitor = &fakes.IPAMPoolMonitorFake{} + svc.IPAMPoolMonitor = &fakes.MonitorFake{} if service != nil { // Create empty azure-cns.json. CNS should start successfully by deleting this file diff --git a/cns/restserver/internalapi.go b/cns/restserver/internalapi.go index 4b7c949923..faa04cb562 100644 --- a/cns/restserver/internalapi.go +++ b/cns/restserver/internalapi.go @@ -211,8 +211,7 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo // This API will be called by CNS RequestController on CRD update. func (service *HTTPRestService) ReconcileNCState( - ncRequest *cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, scalar v1alpha.Scaler, - spec v1alpha.NodeNetworkConfigSpec) types.ResponseCode { + ncRequest *cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, nnc *v1alpha.NodeNetworkConfig) types.ResponseCode { logger.Printf("Reconciling NC state with podInfo %+v", podInfoByIP) // check if ncRequest is null, then return as there is no CRD state yet if ncRequest == nil { @@ -225,7 +224,7 @@ func (service *HTTPRestService) ReconcileNCState( if returnCode != types.Success { return returnCode } - service.IPAMPoolMonitor.Update(scalar, spec) + service.IPAMPoolMonitor.Update(nnc) // now parse the secondaryIP list, if it exists in PodInfo list, then allocate that ip for _, secIpConfig := range ncRequest.SecondaryIPConfigs { @@ -254,9 +253,9 @@ func (service *HTTPRestService) ReconcileNCState( } } - err := service.MarkExistingIPsAsPending(spec.IPsNotInUse) + err := service.MarkExistingIPsAsPending(nnc.Spec.IPsNotInUse) if err != nil { - logger.Errorf("[Azure CNS] Error. Failed to mark IP's as pending %v", spec.IPsNotInUse) + logger.Errorf("[Azure CNS] Error. Failed to mark IP's as pending %v", nnc.Spec.IPsNotInUse) return types.UnexpectedError } diff --git a/cns/restserver/internalapi_test.go b/cns/restserver/internalapi_test.go index 2e8609acdf..a9be924bcf 100644 --- a/cns/restserver/internalapi_test.go +++ b/cns/restserver/internalapi_test.go @@ -13,8 +13,8 @@ import ( "time" "github.com/Azure/azure-container-networking/cns" - "github.com/Azure/azure-container-networking/cns/fakes" "github.com/Azure/azure-container-networking/cns/types" + "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" "github.com/google/uuid" ) @@ -200,7 +200,18 @@ func TestReconcileNCWithEmptyState(t *testing.T) { expectedNcCount := len(svc.state.ContainerStatus) expectedAllocatedPods := make(map[string]cns.PodInfo) - returnCode := svc.ReconcileNCState(nil, expectedAllocatedPods, fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) + returnCode := svc.ReconcileNCState(nil, expectedAllocatedPods, &v1alpha.NodeNetworkConfig{ + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + }, + }, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: initPoolSize, + }, + }) if returnCode != types.Success { t.Errorf("Unexpected failure on reconcile with no state %d", returnCode) } @@ -231,7 +242,18 @@ func TestReconcileNCWithExistingState(t *testing.T) { } expectedNcCount := len(svc.state.ContainerStatus) - returnCode := svc.ReconcileNCState(req, expectedAllocatedPods, fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) + returnCode := svc.ReconcileNCState(req, expectedAllocatedPods, &v1alpha.NodeNetworkConfig{ + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + }, + }, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: initPoolSize, + }, + }) if returnCode != types.Success { t.Errorf("Unexpected failure on reconcile with no state %d", returnCode) } @@ -264,7 +286,18 @@ func TestReconcileNCWithExistingStateFromInterfaceID(t *testing.T) { } expectedNcCount := len(svc.state.ContainerStatus) - returnCode := svc.ReconcileNCState(req, expectedAllocatedPods, fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) + returnCode := svc.ReconcileNCState(req, expectedAllocatedPods, &v1alpha.NodeNetworkConfig{ + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + }, + }, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: initPoolSize, + }, + }) if returnCode != types.Success { t.Errorf("Unexpected failure on reconcile with no state %d", returnCode) } @@ -296,7 +329,18 @@ func TestReconcileNCWithSystemPods(t *testing.T) { expectedAllocatedPods["192.168.0.1"] = cns.NewPodInfo("", "", "systempod", "kube-system") expectedNcCount := len(svc.state.ContainerStatus) - returnCode := svc.ReconcileNCState(req, expectedAllocatedPods, fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) + returnCode := svc.ReconcileNCState(req, expectedAllocatedPods, &v1alpha.NodeNetworkConfig{ + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + }, + }, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: initPoolSize, + }, + }) if returnCode != types.Success { t.Errorf("Unexpected failure on reconcile with no state %d", returnCode) } @@ -382,9 +426,18 @@ func createAndValidateNCRequest(t *testing.T, secondaryIPConfigs map[string]cns. if returnCode != 0 { t.Fatalf("Failed to createNetworkContainerRequest, req: %+v, err: %d", req, returnCode) } - svc.IPAMPoolMonitor.Update( - fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), - fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) + svc.IPAMPoolMonitor.Update(&v1alpha.NodeNetworkConfig{ + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + }, + }, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: initPoolSize, + }, + }) validateNetworkRequest(t, *req) } @@ -546,9 +599,18 @@ func createNCReqInternal(t *testing.T, secondaryIPConfigs map[string]cns.Seconda if returnCode != 0 { t.Fatalf("Failed to createNetworkContainerRequest, req: %+v, err: %d", req, returnCode) } - svc.IPAMPoolMonitor.Update( - fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), - fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) + svc.IPAMPoolMonitor.Update(&v1alpha.NodeNetworkConfig{ + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + }, + }, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: initPoolSize, + }, + }) return *req } diff --git a/cns/restserver/ipam_test.go b/cns/restserver/ipam_test.go index db9259743d..9ab3ae20d0 100644 --- a/cns/restserver/ipam_test.go +++ b/cns/restserver/ipam_test.go @@ -11,6 +11,7 @@ import ( "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/cns/common" "github.com/Azure/azure-container-networking/cns/fakes" + "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha" ) var ( @@ -36,7 +37,7 @@ func getTestService() *HTTPRestService { var config common.ServiceConfig httpsvc, _ := NewHTTPRestService(&config, fakes.NewFakeImdsClient(), fakes.NewFakeNMAgentClient()) svc = httpsvc.(*HTTPRestService) - svc.IPAMPoolMonitor = &fakes.IPAMPoolMonitorFake{} + svc.IPAMPoolMonitor = &fakes.MonitorFake{} setOrchestratorTypeInternal(cns.KubernetesCRD) return svc @@ -613,9 +614,19 @@ func TestIPAMMarkIPAsPendingWithPendingProgrammingIPs(t *testing.T) { t.Fatalf("Failed to createNetworkContainerRequest, req: %+v, err: %d", req, returnCode) } svc.IPAMPoolMonitor.Update( - fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), - fakes.NewFakeNodeNetworkConfigSpec(initPoolSize)) - + &v1alpha.NodeNetworkConfig{ + Status: v1alpha.NodeNetworkConfigStatus{ + Scaler: v1alpha.Scaler{ + BatchSize: batchSize, + ReleaseThresholdPercent: releasePercent, + RequestThresholdPercent: requestPercent, + }, + }, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: initPoolSize, + }, + }, + ) // Release pending programming IPs ips, err := svc.MarkIPAsPendingRelease(2) if err != nil { diff --git a/cns/service/main.go b/cns/service/main.go index 7a6fdfa146..2db6bbf08c 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -28,7 +28,7 @@ import ( "github.com/Azure/azure-container-networking/cns/configuration" "github.com/Azure/azure-container-networking/cns/hnsclient" "github.com/Azure/azure-container-networking/cns/imdsclient" - "github.com/Azure/azure-container-networking/cns/ipampoolmonitor" + "github.com/Azure/azure-container-networking/cns/ipampool" "github.com/Azure/azure-container-networking/cns/logger" "github.com/Azure/azure-container-networking/cns/multitenantcontroller" "github.com/Azure/azure-container-networking/cns/multitenantcontroller/multitenantoperator" @@ -784,7 +784,7 @@ type nodeNetworkConfigGetter interface { } type ncStateReconciler interface { - ReconcileNCState(ncRequest *cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, scalar v1alpha.Scaler, spec v1alpha.NodeNetworkConfigSpec) cnstypes.ResponseCode + ReconcileNCState(ncRequest *cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, nnc *v1alpha.NodeNetworkConfig) cnstypes.ResponseCode } // TODO(rbtr) where should this live?? @@ -800,7 +800,7 @@ func initCNS(ctx context.Context, cli nodeNetworkConfigGetter, ncReconciler ncSt // If instance of crd is not found, pass nil to CNSClient if client.IgnoreNotFound(err) == nil { - err = restserver.ResponseCodeToError(ncReconciler.ReconcileNCState(nil, nil, nnc.Status.Scaler, nnc.Spec)) + err = restserver.ResponseCodeToError(ncReconciler.ReconcileNCState(nil, nil, nnc)) return errors.Wrap(err, "failed to reconcile NC state") } @@ -810,7 +810,7 @@ func initCNS(ctx context.Context, cli nodeNetworkConfigGetter, ncReconciler ncSt // If there are no NCs, pass nil to CNSClient if len(nnc.Status.NetworkContainers) == 0 { - err = restserver.ResponseCodeToError(ncReconciler.ReconcileNCState(nil, nil, nnc.Status.Scaler, nnc.Spec)) + err = restserver.ResponseCodeToError(ncReconciler.ReconcileNCState(nil, nil, nnc)) return errors.Wrap(err, "failed to reconcile NC state") } @@ -833,7 +833,7 @@ func initCNS(ctx context.Context, cli nodeNetworkConfigGetter, ncReconciler ncSt // errors.Wrap provides additional context, and return nil if the err input arg is nil // Call cnsclient init cns passing those two things. - err = restserver.ResponseCodeToError(ncReconciler.ReconcileNCState(&ncRequest, podInfoByIP, nnc.Status.Scaler, nnc.Spec)) + err = restserver.ResponseCodeToError(ncReconciler.ReconcileNCState(&ncRequest, podInfoByIP, nnc)) return errors.Wrap(err, "err in CNS reconciliation") } @@ -872,7 +872,15 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn scopedcli := kubecontroller.NewScopedClient(nnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName}) // initialize the ipam pool monitor - httpRestServiceImplementation.IPAMPoolMonitor = ipampoolmonitor.NewCNSIPAMPoolMonitor(httpRestServiceImplementation, scopedcli) + poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, scopedcli, &ipampool.Options{RefreshDelay: poolIPAMRefreshRateInMilliseconds}) + httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor + logger.Printf("Starting IPAM Pool Monitor") + go func() { + if e := poolMonitor.Start(ctx); e != nil { + logger.Errorf("[Azure CNS] Failed to start pool monitor with err: %v", e) + } + }() + err = initCNS(ctx, scopedcli, httpRestServiceImplementation) if err != nil { return errors.Wrap(err, "failed to initialize CNS state") @@ -908,26 +916,10 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn } }() - logger.Printf("Starting IPAM Pool Monitor") - go func() { - for { - if err := httpRestServiceImplementation.IPAMPoolMonitor.Start(ctx, poolIPAMRefreshRateInMilliseconds); err != nil { - logger.Errorf("[Azure CNS] Failed to start pool monitor with err: %v", err) - // todo: add a CNS metric to count # of failures - } else { - logger.Printf("[Azure CNS] Exiting IPAM Pool Monitor") - return - } - - // Retry after 1sec - time.Sleep(time.Second) - } - }() - logger.Printf("Starting SyncHostNCVersion") go func() { // Periodically poll vfp programmed NC version from NMAgent - tickerChannel := time.Tick(cnsconfig.SyncHostNCVersionIntervalMs * time.Millisecond) + tickerChannel := time.Tick(cnsconfig.SyncHostNCVersionIntervalMs) for { select { case <-tickerChannel: diff --git a/cns/singletenantcontroller/reconciler.go b/cns/singletenantcontroller/reconciler.go index 99249efb8a..748196c4dc 100644 --- a/cns/singletenantcontroller/reconciler.go +++ b/cns/singletenantcontroller/reconciler.go @@ -23,7 +23,7 @@ type cnsClient interface { } type ipamPoolMonitorClient interface { - Update(scalar v1alpha.Scaler, spec v1alpha.NodeNetworkConfigSpec) + Update(*v1alpha.NodeNetworkConfig) } type nncGetter interface { @@ -81,7 +81,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco return reconcile.Result{}, errors.Wrap(err, "failed to create or update network container") } - r.ipampoolmonitorcli.Update(nnc.Status.Scaler, nnc.Spec) + r.ipampoolmonitorcli.Update(nnc) // record assigned IPs metric assignedIPs.Set(float64(len(nnc.Status.NetworkContainers[0].IPAssignments))) @@ -103,7 +103,7 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, nodeName string) error { return nodeName == object.GetName() })). WithEventFilter(predicate.Funcs{ - // check that the generation is the same - status changes don't update generation. + // check that the generation is the same - status changes don't update generation.a UpdateFunc: func(ue event.UpdateEvent) bool { return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() }, diff --git a/cns/singletenantcontroller/reconciler_test.go b/cns/singletenantcontroller/reconciler_test.go index c82ee24755..9e396c3ae2 100644 --- a/cns/singletenantcontroller/reconciler_test.go +++ b/cns/singletenantcontroller/reconciler_test.go @@ -18,15 +18,14 @@ import ( ) type cnsClientState struct { - req *cns.CreateNetworkContainerRequest - scaler v1alpha.Scaler - spec v1alpha.NodeNetworkConfigSpec + req *cns.CreateNetworkContainerRequest + nnc *v1alpha.NodeNetworkConfig } type mockCNSClient struct { state cnsClientState createOrUpdateNC func(*cns.CreateNetworkContainerRequest) cnstypes.ResponseCode - update func(v1alpha.Scaler, v1alpha.NodeNetworkConfigSpec) + update func(*v1alpha.NodeNetworkConfig) } //nolint:gocritic // ignore hugeParam pls @@ -35,10 +34,9 @@ func (m *mockCNSClient) CreateOrUpdateNetworkContainerInternal(req *cns.CreateNe return m.createOrUpdateNC(req) } -func (m *mockCNSClient) Update(scaler v1alpha.Scaler, spec v1alpha.NodeNetworkConfigSpec) { - m.state.scaler = scaler - m.state.spec = spec - m.update(scaler, spec) +func (m *mockCNSClient) Update(nnc *v1alpha.NodeNetworkConfig) { + m.state.nnc = nnc + m.update(nnc) } type mockNCGetter struct { @@ -133,14 +131,16 @@ func TestReconcile(t *testing.T) { createOrUpdateNC: func(*cns.CreateNetworkContainerRequest) cnstypes.ResponseCode { return cnstypes.Success }, - update: func(v1alpha.Scaler, v1alpha.NodeNetworkConfigSpec) {}, + update: func(*v1alpha.NodeNetworkConfig) {}, }, wantErr: false, wantCNSClientState: cnsClientState{ - req: &validRequest, - scaler: validStatus.Scaler, - spec: v1alpha.NodeNetworkConfigSpec{ - RequestedIPCount: 1, + req: &validRequest, + nnc: &v1alpha.NodeNetworkConfig{ + Status: validStatus, + Spec: v1alpha.NodeNetworkConfigSpec{ + RequestedIPCount: 1, + }, }, }, }, diff --git a/test/integration/goldpinger/utils.go b/test/integration/goldpinger/utils.go index ad9bd14e0d..7c0a769306 100644 --- a/test/integration/goldpinger/utils.go +++ b/test/integration/goldpinger/utils.go @@ -1,3 +1,4 @@ +//go:build integration // +build integration package goldpinger @@ -62,10 +63,10 @@ func (c ClusterStats) PrintStats() { format := "cluster stats - " + "nodes in use: %d, " + "pod count: %d, " + - "pod health percentage: %2.2f, " + - "ping health percentage: %2.2f\n" + "pod health: %d/%d (%2.2f), " + + "ping health percentage: %d/%d (%2.2f)\n" podHealthPct := (float64(len(healthyPods)) / float64(podCount)) * 100 pingHealthPct := (float64(healthyPingCount) / float64(pingCount)) * 100 - fmt.Printf(format, len(nodes), podCount, podHealthPct, pingHealthPct) + fmt.Printf(format, len(nodes), podCount, len(healthyPods), podCount, podHealthPct, healthyPingCount, pingCount, pingHealthPct) } diff --git a/test/integration/k8s_test.go b/test/integration/k8s_test.go index 71ee58c0fd..8facf12d27 100644 --- a/test/integration/k8s_test.go +++ b/test/integration/k8s_test.go @@ -35,9 +35,8 @@ const ( gpServiceAccountPath = gpFolder + "/service-account.yaml" gpDaemonset = gpFolder + "/daemonset.yaml" gpDeployment = gpFolder + "/deployment.yaml" - - retryAttempts = 20 - retryDelaySec = 5 * time.Second + retryAttempts = 60 + retryDelaySec = 5 * time.Second ) var ( @@ -45,6 +44,7 @@ var ( kubeconfig = flag.String("test-kubeconfig", filepath.Join(homedir.HomeDir(), ".kube", "config"), "(optional) absolute path to the kubeconfig file") delegatedSubnetID = flag.String("delegated-subnet-id", "", "delegated subnet id for node labeling") delegatedSubnetName = flag.String("subnet-name", "", "subnet name for node labeling") + gpPodScaleCounts = []int{3, 15, 150, 3} ) func shouldLabelNodes() bool { @@ -142,12 +142,10 @@ func TestPodScaling(t *testing.T) { } }) - counts := []int{15, 5, 15} - - for _, c := range counts { + for _, c := range gpPodScaleCounts { count := c t.Run(fmt.Sprintf("replica count %d", count), func(t *testing.T) { - replicaCtx, cancel := context.WithTimeout(ctx, 60*time.Second) + replicaCtx, cancel := context.WithTimeout(ctx, (retryAttempts+1)*retryDelaySec) defer cancel() if err := updateReplicaCount(t, replicaCtx, deploymentsClient, deployment.Name, count); err != nil { @@ -192,7 +190,7 @@ func TestPodScaling(t *testing.T) { } t.Run("all pods can ping each other", func(t *testing.T) { - clusterCheckCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) + clusterCheckCtx, cancel := context.WithTimeout(ctx, 20*time.Minute) defer cancel() clusterCheckFn := func() error { pf, err := NewPortForwarder(restConfig) @@ -215,7 +213,7 @@ func TestPodScaling(t *testing.T) { return nil } if err := defaultRetrier.Do(portForwardCtx, portForwardFn); err != nil { - t.Fatalf("could not start port forward within %v: %v", retryDelaySec.String(), err) + t.Fatalf("could not start port forward within %v: %v", retryDelaySec.String(), err) } defer streamHandle.Stop() @@ -235,8 +233,7 @@ func TestPodScaling(t *testing.T) { return errors.New("not all pings are healthy") } - retrier := retry.Retrier{Attempts: 5, Delay: 20 * time.Second} - if err := retrier.Do(clusterCheckCtx, clusterCheckFn); err != nil { + if err := defaultRetrier.Do(clusterCheckCtx, clusterCheckFn); err != nil { t.Fatalf("cluster could not reach healthy state: %v", err) } diff --git a/test/integration/setup_test.go b/test/integration/setup_test.go index a520c94e62..f000493f7f 100644 --- a/test/integration/setup_test.go +++ b/test/integration/setup_test.go @@ -180,7 +180,7 @@ func installCNIManagerDaemonset(ctx context.Context, clientset *kubernetes.Clien cni.Spec.Template.Spec.Containers[0].Image = getImageString(image, imageTag) cniDaemonsetClient := clientset.AppsV1().DaemonSets(cni.Namespace) - log.Printf("Installing CNI with image %s", cni.Spec.Template.Spec.Containers[0].Image) + log.Printf("Installing CNI with image %s", cni.Spec.Template.Spec.Containers[0].Image) if err = mustCreateDaemonset(ctx, cniDaemonsetClient, cni); err != nil { return nil, err