Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions cns/service/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -857,10 +857,21 @@ func reconcileInitialCNSState(ctx context.Context, cli nodeNetworkConfigGetter,

// Convert to CreateNetworkContainerRequest
for i := range nnc.Status.NetworkContainers {
ncRequest, err := kubecontroller.CreateNCRequestFromDynamicNC(nnc.Status.NetworkContainers[i])
var ncRequest *cns.CreateNetworkContainerRequest
var err error

switch nnc.Status.NetworkContainers[i].AssignmentMode { //nolint:exhaustive // skipping dynamic case
case v1alpha.Static:
ncRequest, err = kubecontroller.CreateNCRequestFromStaticNC(nnc.Status.NetworkContainers[i])
default: // For backward compatibility, default will be treated as Dynamic too.
ncRequest, err = kubecontroller.CreateNCRequestFromDynamicNC(nnc.Status.NetworkContainers[i])
}

if err != nil {
return errors.Wrap(err, "failed to convert NNC status to network container request")
return errors.Wrapf(err, "failed to convert NNC status to network container request, "+
"assignmentMode: %s", nnc.Status.NetworkContainers[i].AssignmentMode)
}

// rebuild CNS state
podInfoByIP, err := podInfoByIPProvider.PodInfoByIP()
if err != nil {
Expand Down Expand Up @@ -949,23 +960,33 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor

// reconcile initial CNS state from CNI or apiserver.
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
// aks addons to come up so retry a bit more aggresively here.
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
attempt := 0
err = retry.Do(func() error {
attempt++
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
// Only reconcile if there are any existing Pods using NC ips,
// else let the goal state be updated using a regular NNC Reconciler loop
podInfoByIP, err := podInfoByIPProvider.PodInfoByIP()
if err != nil {
return errors.Wrap(err, "failed to provide PodInfoByIP")
}
if len(podInfoByIP) > 0 {
logger.Printf("Reconciling initial CNS state as PodInfoByIP is not empty: %d", len(podInfoByIP))

// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
// aks addons to come up so retry a bit more aggresively here.
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
attempt := 0
err = retry.Do(func() error {
attempt++
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
if err != nil {
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
}
return errors.Wrap(err, "failed to initialize CNS state")
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
if err != nil {
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
return err
}
return errors.Wrap(err, "failed to initialize CNS state")
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
if err != nil {
return err
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
}
logger.Printf("reconciled initial CNS state after %d attempts", attempt)

// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
// NodeNetworkConfig update by the time the Reconciler tries to send it.
Expand Down
20 changes: 12 additions & 8 deletions cns/singletenantcontroller/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,23 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
for i := range nnc.Status.NetworkContainers {
var req *cns.CreateNetworkContainerRequest
var err error
switch nnc.Status.NetworkContainers[i].AssignmentMode {
case v1alpha.Dynamic:
switch nnc.Status.NetworkContainers[i].AssignmentMode { //nolint:exhaustive // skipping dynamic case
case v1alpha.Static:
req, err = CreateNCRequestFromStaticNC(nnc.Status.NetworkContainers[i])
default: // For backward compatibility, default will be treated as Dynamic too.
req, err = CreateNCRequestFromDynamicNC(nnc.Status.NetworkContainers[i])
// in dynamic, we will also push this NNC to the IPAM Pool Monitor when we're done.
listenersToNotify = append(listenersToNotify, r.ipampoolmonitorcli)
case v1alpha.Static:
req, err = CreateNCRequestFromStaticNC(nnc.Status.NetworkContainers[i])
default:
// unrecognized mode, fail out
err = errors.Errorf("unknown NetworkContainer AssignmentMode %s", string(nnc.Status.NetworkContainers[i].AssignmentMode))

}

if err != nil {
return reconcile.Result{}, errors.Wrap(err, "failed to generate CreateNCRequest from NC")
logger.Errorf("[cns-rc] failed to generate CreateNCRequest from NC: %v, assignmentMode %s", err,
nnc.Status.NetworkContainers[i].AssignmentMode)
return reconcile.Result{}, errors.Wrapf(err, "failed to generate CreateNCRequest from NC "+
"assignmentMode %s", nnc.Status.NetworkContainers[i].AssignmentMode)
}

responseCode := r.cnscli.CreateOrUpdateNetworkContainerInternal(req)
if err := restserver.ResponseCodeToError(responseCode); err != nil {
logger.Errorf("[cns-rc] Error creating or updating NC in reconcile: %v", err)
Expand All @@ -113,6 +116,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
}

// we have received and pushed an NNC update, we are "Started"
logger.Printf("[cns-rc] CNS NNC Reconciler Started")
r.once.Do(func() { close(r.started) })
return reconcile.Result{}, nil
}
Expand Down