diff --git a/cns/service/main.go b/cns/service/main.go index f74e816f74..26ce881d97 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -857,10 +857,21 @@ func reconcileInitialCNSState(ctx context.Context, cli nodeNetworkConfigGetter, // Convert to CreateNetworkContainerRequest for i := range nnc.Status.NetworkContainers { - ncRequest, err := kubecontroller.CreateNCRequestFromDynamicNC(nnc.Status.NetworkContainers[i]) + var ncRequest *cns.CreateNetworkContainerRequest + var err error + + switch nnc.Status.NetworkContainers[i].AssignmentMode { //nolint:exhaustive // skipping dynamic case + case v1alpha.Static: + ncRequest, err = kubecontroller.CreateNCRequestFromStaticNC(nnc.Status.NetworkContainers[i]) + default: // For backward compatibility, default will be treated as Dynamic too. + ncRequest, err = kubecontroller.CreateNCRequestFromDynamicNC(nnc.Status.NetworkContainers[i]) + } + if err != nil { - return errors.Wrap(err, "failed to convert NNC status to network container request") + return errors.Wrapf(err, "failed to convert NNC status to network container request, "+ + "assignmentMode: %s", nnc.Status.NetworkContainers[i].AssignmentMode) } + // rebuild CNS state podInfoByIP, err := podInfoByIPProvider.PodInfoByIP() if err != nil { @@ -949,23 +960,33 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor // reconcile initial CNS state from CNI or apiserver. - // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for - // aks addons to come up so retry a bit more aggresively here. - // will retry 10 times maxing out at a minute taking about 8 minutes before it gives up. - attempt := 0 - err = retry.Do(func() error { - attempt++ - logger.Printf("reconciling initial CNS state attempt: %d", attempt) - err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider) + // Only reconcile if there are any existing Pods using NC ips, + // else let the goal state be updated using a regular NNC Reconciler loop + podInfoByIP, err := podInfoByIPProvider.PodInfoByIP() + if err != nil { + return errors.Wrap(err, "failed to provide PodInfoByIP") + } + if len(podInfoByIP) > 0 { + logger.Printf("Reconciling initial CNS state as PodInfoByIP is not empty: %d", len(podInfoByIP)) + + // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for + // aks addons to come up so retry a bit more aggresively here. + // will retry 10 times maxing out at a minute taking about 8 minutes before it gives up. + attempt := 0 + err = retry.Do(func() error { + attempt++ + logger.Printf("reconciling initial CNS state attempt: %d", attempt) + err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider) + if err != nil { + logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err) + } + return errors.Wrap(err, "failed to initialize CNS state") + }, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute)) if err != nil { - logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err) + return err } - return errors.Wrap(err, "failed to initialize CNS state") - }, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute)) - if err != nil { - return err + logger.Printf("reconciled initial CNS state after %d attempts", attempt) } - logger.Printf("reconciled initial CNS state after %d attempts", attempt) // start the pool Monitor before the Reconciler, since it needs to be ready to receive an // NodeNetworkConfig update by the time the Reconciler tries to send it. diff --git a/cns/singletenantcontroller/reconciler.go b/cns/singletenantcontroller/reconciler.go index 7ebbbdb308..92cfa9082c 100644 --- a/cns/singletenantcontroller/reconciler.go +++ b/cns/singletenantcontroller/reconciler.go @@ -81,20 +81,23 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco for i := range nnc.Status.NetworkContainers { var req *cns.CreateNetworkContainerRequest var err error - switch nnc.Status.NetworkContainers[i].AssignmentMode { - case v1alpha.Dynamic: + switch nnc.Status.NetworkContainers[i].AssignmentMode { //nolint:exhaustive // skipping dynamic case + case v1alpha.Static: + req, err = CreateNCRequestFromStaticNC(nnc.Status.NetworkContainers[i]) + default: // For backward compatibility, default will be treated as Dynamic too. req, err = CreateNCRequestFromDynamicNC(nnc.Status.NetworkContainers[i]) // in dynamic, we will also push this NNC to the IPAM Pool Monitor when we're done. listenersToNotify = append(listenersToNotify, r.ipampoolmonitorcli) - case v1alpha.Static: - req, err = CreateNCRequestFromStaticNC(nnc.Status.NetworkContainers[i]) - default: - // unrecognized mode, fail out - err = errors.Errorf("unknown NetworkContainer AssignmentMode %s", string(nnc.Status.NetworkContainers[i].AssignmentMode)) + } + if err != nil { - return reconcile.Result{}, errors.Wrap(err, "failed to generate CreateNCRequest from NC") + logger.Errorf("[cns-rc] failed to generate CreateNCRequest from NC: %v, assignmentMode %s", err, + nnc.Status.NetworkContainers[i].AssignmentMode) + return reconcile.Result{}, errors.Wrapf(err, "failed to generate CreateNCRequest from NC "+ + "assignmentMode %s", nnc.Status.NetworkContainers[i].AssignmentMode) } + responseCode := r.cnscli.CreateOrUpdateNetworkContainerInternal(req) if err := restserver.ResponseCodeToError(responseCode); err != nil { logger.Errorf("[cns-rc] Error creating or updating NC in reconcile: %v", err) @@ -113,6 +116,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco } // we have received and pushed an NNC update, we are "Started" + logger.Printf("[cns-rc] CNS NNC Reconciler Started") r.once.Do(func() { close(r.started) }) return reconcile.Result{}, nil }