From a7052450c2643a927b0b2963eccabdbb9e1a381f Mon Sep 17 00:00:00 2001 From: Paul Miller Date: Thu, 4 Nov 2021 18:17:12 +0000 Subject: [PATCH 1/3] stupid simple retry --- cns/service/main.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index 35e9ab778d..a4f0f5d91e 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -60,6 +60,8 @@ const ( // 720 * acn.FiveSeconds sec sleeps = 1Hr maxRetryNodeRegister = 720 + //10 * 30 seconds = 5 minutes of retrying before we crash and backoff. + maxRetryInit = 10 ) var ( @@ -884,9 +886,18 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn } }() - err = initCNS(ctx, scopedcli, httpRestServiceImplementation) - if err != nil { - return errors.Wrap(err, "failed to initialize CNS state") + //apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for + //aks addons to come up so retry a bit more aggresively here. + for tryNum := 0; tryNum <= maxRetryInit; tryNum++ { + err = initCNS(ctx, scopedcli, httpRestServiceImplementation) + if err == nil { + break + } + logger.Errorf("[Azure CNS] failed to init cns: %v", err) + if tryNum >= maxRetryInit { + return errors.Wrap(err, "failed to initialize CNS state") + } + time.Sleep(30 * time.Second) } manager, err := ctrl.NewManager(kubeConfig, ctrl.Options{ From a54d93ee2b83a7314a0dcffa726c3a92fd5d3978 Mon Sep 17 00:00:00 2001 From: Paul Miller Date: Thu, 4 Nov 2021 22:39:34 +0000 Subject: [PATCH 2/3] go lint fixes --- cns/service/main.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index a4f0f5d91e..345b763e04 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -61,7 +61,8 @@ const ( // 720 * acn.FiveSeconds sec sleeps = 1Hr maxRetryNodeRegister = 720 //10 * 30 seconds = 5 minutes of retrying before we crash and backoff. - maxRetryInit = 10 + maxRetryInit = 10 + retryIntervalInit = 30 * time.Second ) var ( @@ -886,8 +887,8 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn } }() - //apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for - //aks addons to come up so retry a bit more aggresively here. + // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for + // aks addons to come up so retry a bit more aggresively here. for tryNum := 0; tryNum <= maxRetryInit; tryNum++ { err = initCNS(ctx, scopedcli, httpRestServiceImplementation) if err == nil { @@ -897,7 +898,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn if tryNum >= maxRetryInit { return errors.Wrap(err, "failed to initialize CNS state") } - time.Sleep(30 * time.Second) + time.Sleep(retryIntervalInit) } manager, err := ctrl.NewManager(kubeConfig, ctrl.Options{ From 92dd540a3adfb023901491ce87568476b6b48125 Mon Sep 17 00:00:00 2001 From: Paul Miller Date: Fri, 5 Nov 2021 00:15:16 +0000 Subject: [PATCH 3/3] missed one // --- cns/service/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cns/service/main.go b/cns/service/main.go index 345b763e04..aa86e27fd2 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -60,7 +60,7 @@ const ( // 720 * acn.FiveSeconds sec sleeps = 1Hr maxRetryNodeRegister = 720 - //10 * 30 seconds = 5 minutes of retrying before we crash and backoff. + // 10 * 30 seconds = 5 minutes of retrying before we crash and backoff. maxRetryInit = 10 retryIntervalInit = 30 * time.Second )