From cc81929f4dd9e0c840a5fbb5ee84890ea9a54ef4 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Thu, 14 Nov 2024 23:04:14 +0000 Subject: [PATCH 01/16] feat: Creating NNC with HomeAz info in AKS-Swift Workflows --- cns/restserver/homeazmonitor.go | 2 +- cns/service/main.go | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cns/restserver/homeazmonitor.go b/cns/restserver/homeazmonitor.go index f73c6e35af..1277214fd3 100644 --- a/cns/restserver/homeazmonitor.go +++ b/cns/restserver/homeazmonitor.go @@ -135,7 +135,7 @@ func (h *HomeAzMonitor) Populate(ctx context.Context) { return default: - returnMessage := fmt.Sprintf("[HomeAzMonitor] failed with StatusCode: %d", apiError.StatusCode()) + returnMessage := fmt.Sprintf("[HomeAzMonitor] failed with StatusCode: %d and error %v", apiError.StatusCode(), err) returnCode := types.UnexpectedError h.update(returnCode, returnMessage, cns.HomeAzResponse{IsSupported: true}) return diff --git a/cns/service/main.go b/cns/service/main.go index 36f24dffa7..ace53eaca0 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1303,6 +1303,8 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // TODO(rbtr): nodename and namespace should be in the cns config directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName}) + // TODO: Create the NNC CRD here and update it with HomeAzinfo + logger.Printf("Reconciling initial CNS state") // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for // aks addons to come up so retry a bit more aggresively here. From 95d78f7f5a75354215d0e94a3327a1e3debe432c Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 19 Nov 2024 18:20:41 +0000 Subject: [PATCH 02/16] feat: Adding placeholders for NNC creation logic --- cns/configuration/configuration.go | 1 + cns/service/main.go | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cns/configuration/configuration.go b/cns/configuration/configuration.go index c183c9f0e2..4390fc4621 100644 --- a/cns/configuration/configuration.go +++ b/cns/configuration/configuration.go @@ -35,6 +35,7 @@ type CNSConfig struct { EnableSubnetScarcity bool EnableSwiftV2 bool InitializeFromCNI bool + EnableHomeAz bool KeyVaultSettings KeyVaultSettings MSISettings MSISettings ManageEndpointState bool diff --git a/cns/service/main.go b/cns/service/main.go index ace53eaca0..6bd3562513 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -668,9 +668,9 @@ func main() { } homeAzMonitor := restserver.NewHomeAzMonitor(nmaClient, time.Duration(cnsconfig.AZRSettings.PopulateHomeAzCacheRetryIntervalSecs)*time.Second) - // homeAz monitor is only required when there is a direct channel between DNC and CNS. - // This will prevent the monitor from unnecessarily calling NMA APIs for other scenarios such as AKS-swift, swiftv2 - if cnsconfig.ChannelMode == cns.Direct { + // homeAz monitor is required when there is a direct channel between DNC and CNS OR when homeAz feature is enabled in CNS for AKS-Swift + // This will prevent the monitor from unnecessarily calling NMA APIs for other scenarios such as AKS-swift, swiftv2 when disabled. + if cnsconfig.ChannelMode == cns.Direct || cnsconfig.EnableHomeAz { homeAzMonitor.Start() defer homeAzMonitor.Stop() } @@ -1275,6 +1275,11 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn } } + if cnsconfig.EnableHomeAz { + // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor + + } + // perform state migration from CNI in case CNS is set to manage the endpoint state and has emty state if cnsconfig.EnableStateMigration && !httpRestServiceImplementation.EndpointStateStore.Exists() { if err = PopulateCNSEndpointState(httpRestServiceImplementation.EndpointStateStore); err != nil { From 48e0e1d7b69eb321c6a5ad7647d33144108878c7 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 19 Nov 2024 19:02:50 +0000 Subject: [PATCH 03/16] Adding a base create NNC method --- cns/service/main.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cns/service/main.go b/cns/service/main.go index 6bd3562513..c6839ebc7d 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1277,7 +1277,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn if cnsconfig.EnableHomeAz { // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor - + createOrUpdateNNC(ctx) } // perform state migration from CNI in case CNS is set to manage the endpoint state and has emty state @@ -1520,6 +1520,17 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn return nil } +func createBaseNNC(ctx context.Context, node *corev1.Node) v1alpha.NodeNetworkConfig { + return v1alpha.NodeNetworkConfig{ObjectMeta: metav1.ObjectMeta{ + Annotations: make(map[string]string), + Labels: map[string]string{ + "managed": "true", + "owner": node.Name, + }, + Name: node.Name, + }} +} + // getPodInfoByIPProvider returns a PodInfoByIPProvider that reads endpoint state from the configured source func getPodInfoByIPProvider( ctx context.Context, From ff0bb65c718261f5bc0fcd701d0817c0177af018 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 19 Nov 2024 19:21:58 +0000 Subject: [PATCH 04/16] feat: Adding placeholders for NNC creation logic --- cns/service/main.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index c6839ebc7d..737f618d83 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1275,11 +1275,6 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn } } - if cnsconfig.EnableHomeAz { - // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor - createOrUpdateNNC(ctx) - } - // perform state migration from CNI in case CNS is set to manage the endpoint state and has emty state if cnsconfig.EnableStateMigration && !httpRestServiceImplementation.EndpointStateStore.Exists() { if err = PopulateCNSEndpointState(httpRestServiceImplementation.EndpointStateStore); err != nil { @@ -1308,7 +1303,12 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // TODO(rbtr): nodename and namespace should be in the cns config directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName}) - // TODO: Create the NNC CRD here and update it with HomeAzinfo + nnc := v1alpha.NodeNetworkConfig{} + if cnsconfig.EnableHomeAz { + // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor + nnc = createBaseNNC(ctx, node) + } + directcli.Create(ctx, &nnc) logger.Printf("Reconciling initial CNS state") // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for From 0b1ca25b69def38478d1e72a8b0bbe1b05db57c2 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 19 Nov 2024 19:23:24 +0000 Subject: [PATCH 05/16] fix: Removed the ctx from the baseNNC creation --- cns/service/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index 737f618d83..b2c5aa1f4a 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1306,7 +1306,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn nnc := v1alpha.NodeNetworkConfig{} if cnsconfig.EnableHomeAz { // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor - nnc = createBaseNNC(ctx, node) + nnc = createBaseNNC(node) } directcli.Create(ctx, &nnc) @@ -1520,7 +1520,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn return nil } -func createBaseNNC(ctx context.Context, node *corev1.Node) v1alpha.NodeNetworkConfig { +func createBaseNNC(node *corev1.Node) v1alpha.NodeNetworkConfig { return v1alpha.NodeNetworkConfig{ObjectMeta: metav1.ObjectMeta{ Annotations: make(map[string]string), Labels: map[string]string{ From 69cb09678ac171071ea3a9c73c9d4e2f7aa35da6 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 19 Nov 2024 19:38:28 +0000 Subject: [PATCH 06/16] fix: Added error check while writing the base NNC to the cluster --- cns/service/main.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cns/service/main.go b/cns/service/main.go index b2c5aa1f4a..ba4573cec4 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1308,7 +1308,9 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor nnc = createBaseNNC(node) } - directcli.Create(ctx, &nnc) + if err := directcli.Create(ctx, &nnc); err != nil { + return errors.Wrap(err, "failed to create base NNC") + } logger.Printf("Reconciling initial CNS state") // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for From e5f2637279146501f693927e27bc83eeecd3f79a Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 19 Nov 2024 19:40:11 +0000 Subject: [PATCH 07/16] fix: Added error check while writing the base NNC to the cluster --- cns/service/main.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cns/service/main.go b/cns/service/main.go index ba4573cec4..c0d7091a05 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1308,7 +1308,8 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor nnc = createBaseNNC(node) } - if err := directcli.Create(ctx, &nnc); err != nil { + + if err = directcli.Create(ctx, &nnc); err != nil { return errors.Wrap(err, "failed to create base NNC") } From c7129e14a13b0ae723caf372b73ed1bb6375c998 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 19 Nov 2024 21:31:43 +0000 Subject: [PATCH 08/16] fix: Disabling the gomnd linter as it is no loger active with the new Go version --- .golangci.yml | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index b0d9e59c04..b96a12221f 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -3,37 +3,37 @@ issues: max-issues-per-linter: 0 new-from-rev: origin/master linters: - presets: - - bugs - - error - - format - - performance - - unused + presets: + - bugs + - error + - format + - performance + - unused disable: - - maligned - - scopelint + - maligned + - scopelint + - gomnd enable: - - exportloopref - - goconst - - gocritic - - gocyclo - - gofmt - - gomnd - - goprintffuncname - - gosimple - - lll - - misspell - - nakedret - - promlinter - - revive + - exportloopref + - goconst + - gocritic + - gocyclo + - gofmt + - goprintffuncname + - gosimple + - lll + - misspell + - nakedret + - promlinter + - revive linters-settings: gocritic: enabled-tags: - - "diagnostic" - - "style" - - "performance" + - "diagnostic" + - "style" + - "performance" disabled-checks: - - "hugeParam" + - "hugeParam" govet: check-shadowing: true lll: From 978fe6c2494fd3152fb3b7f44b730af4a324ba02 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 3 Dec 2024 23:29:17 +0000 Subject: [PATCH 09/16] feat: Updating the NNC with the HomeAz details from the homeAzMonitor Cache --- cns/restserver/internalapi.go | 8 ++++++++ cns/service/main.go | 2 ++ 2 files changed, 10 insertions(+) diff --git a/cns/restserver/internalapi.go b/cns/restserver/internalapi.go index fa41525030..64e9cbd35d 100644 --- a/cns/restserver/internalapi.go +++ b/cns/restserver/internalapi.go @@ -633,3 +633,11 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns. func (service *HTTPRestService) SetVFForAccelnetNICs() error { return service.setVFForAccelnetNICs() } + +// GetHomeAz - Get the Home Az for the Node where CNS is running. +func (service *HTTPRestService) GetHomeAz() (homeAzResponse cns.GetHomeAzResponse) { + service.RLock() + homeAzResponse = service.homeAzMonitor.GetHomeAz(context.TODO()) + service.RUnlock() + return +} diff --git a/cns/service/main.go b/cns/service/main.go index c0d7091a05..4748f59df2 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1307,6 +1307,8 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn if cnsconfig.EnableHomeAz { // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor nnc = createBaseNNC(node) + homeAzResponse := httpRestServiceImplementation.GetHomeAz() + nnc.Spec.AvailabilityZone = strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) } if err = directcli.Create(ctx, &nnc); err != nil { From 600dd981ebe3e65eb11e291db801a522cd7d53f3 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 3 Dec 2024 23:40:43 +0000 Subject: [PATCH 10/16] fix: Updated the context that needs to be passed --- cns/restserver/internalapi.go | 4 ++-- cns/service/main.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cns/restserver/internalapi.go b/cns/restserver/internalapi.go index 64e9cbd35d..48869fe8b6 100644 --- a/cns/restserver/internalapi.go +++ b/cns/restserver/internalapi.go @@ -635,9 +635,9 @@ func (service *HTTPRestService) SetVFForAccelnetNICs() error { } // GetHomeAz - Get the Home Az for the Node where CNS is running. -func (service *HTTPRestService) GetHomeAz() (homeAzResponse cns.GetHomeAzResponse) { +func (service *HTTPRestService) GetHomeAz(ctx context.Context) (homeAzResponse cns.GetHomeAzResponse) { service.RLock() - homeAzResponse = service.homeAzMonitor.GetHomeAz(context.TODO()) + homeAzResponse = service.homeAzMonitor.GetHomeAz(ctx) service.RUnlock() return } diff --git a/cns/service/main.go b/cns/service/main.go index 4748f59df2..05ef0b6e56 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1307,7 +1307,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn if cnsconfig.EnableHomeAz { // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor nnc = createBaseNNC(node) - homeAzResponse := httpRestServiceImplementation.GetHomeAz() + homeAzResponse := httpRestServiceImplementation.GetHomeAz(ctx) nnc.Spec.AvailabilityZone = strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) } From df7c25a267fa7716f4de254f008e5693c2f0edad Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 17 Dec 2024 14:57:14 -0800 Subject: [PATCH 11/16] fix:Updating the NNC to Patch or create a new NNC and update the HomeAz accordingly --- .pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml | 2 +- .pipelines/mdnc/azure-cns-cni-1.5.28.yaml | 2 +- .pipelines/mdnc/azure-cns-cni-1.5.4.yaml | 2 +- .pipelines/mdnc/azure-cns-cni.yaml | 2 +- cns/azure-cns.yaml | 2 +- cns/service/main.go | 44 ++++++++++++++----- .../manifests/cilium/cns-write-ovly.yaml | 2 +- 7 files changed, 40 insertions(+), 16 deletions(-) diff --git a/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml b/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml index 47ae2f3557..c18a5d219c 100644 --- a/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml +++ b/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml b/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml index 3db8a46a3c..18f0eaee44 100644 --- a/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml +++ b/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml b/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml index f33fbba69d..40a3ec4bc2 100644 --- a/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml +++ b/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/.pipelines/mdnc/azure-cns-cni.yaml b/.pipelines/mdnc/azure-cns-cni.yaml index 469f25c669..a1e2ea5d42 100644 --- a/.pipelines/mdnc/azure-cns-cni.yaml +++ b/.pipelines/mdnc/azure-cns-cni.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/cns/azure-cns.yaml b/cns/azure-cns.yaml index 260bb775c1..fefafb14de 100644 --- a/cns/azure-cns.yaml +++ b/cns/azure-cns.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/cns/service/main.go b/cns/service/main.go index 05ef0b6e56..b5ea65669a 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1303,16 +1303,39 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // TODO(rbtr): nodename and namespace should be in the cns config directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName}) - nnc := v1alpha.NodeNetworkConfig{} + // Create the base NNC CRD if HomeAz is enabled if cnsconfig.EnableHomeAz { - // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor - nnc = createBaseNNC(node) homeAzResponse := httpRestServiceImplementation.GetHomeAz(ctx) - nnc.Spec.AvailabilityZone = strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) - } + availabilityZone := strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) + logger.Printf("[Azure CNS] HomeAz: %s", availabilityZone) + // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor + var nnc *v1alpha.NodeNetworkConfig + if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { + logger.Errorf("[Azure CNS] failed to get existing NNC: %v", err) + } - if err = directcli.Create(ctx, &nnc); err != nil { - return errors.Wrap(err, "failed to create base NNC") + if nnc == nil { + logger.Printf("[Azure CNS] Creating new base NNC") + newNNC := createBaseNNC(node) + newNNC.Spec.AvailabilityZone = availabilityZone + if err = directcli.Create(ctx, newNNC); err != nil { + return errors.Wrap(err, "failed to create base NNC") + } + } else { + // nnc.Spec.AvailabilityZone = availabilityZone + // if err = directcli.Update(ctx, nnc); err != nil { + // return errors.Wrap(err, "failed to update base NNC") + // } + logger.Printf("[Azure CNS] Patching existing NNC with new Spec with HomeAz") + newSpec := v1alpha.NodeNetworkConfigSpec{} + newSpec.AvailabilityZone = availabilityZone + newSpec.RequestedIPCount = nnc.Spec.RequestedIPCount + newSpec.IPsNotInUse = nnc.Spec.IPsNotInUse + if _, err := directnnccli.PatchSpec(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}, &newSpec, "azure-cns"); err != nil { + return errors.Wrap(err, "failed to update base NNC") + } + } + logger.Printf("[Azure CNS] Updated HomeAz in NNC") } logger.Printf("Reconciling initial CNS state") @@ -1525,14 +1548,15 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn return nil } -func createBaseNNC(node *corev1.Node) v1alpha.NodeNetworkConfig { - return v1alpha.NodeNetworkConfig{ObjectMeta: metav1.ObjectMeta{ +func createBaseNNC(node *corev1.Node) *v1alpha.NodeNetworkConfig { + return &v1alpha.NodeNetworkConfig{ObjectMeta: metav1.ObjectMeta{ Annotations: make(map[string]string), Labels: map[string]string{ "managed": "true", "owner": node.Name, }, - Name: node.Name, + Name: node.Name, + Namespace: "kube-system", }} } diff --git a/test/integration/manifests/cilium/cns-write-ovly.yaml b/test/integration/manifests/cilium/cns-write-ovly.yaml index 4f3d919757..296edd3f48 100644 --- a/test/integration/manifests/cilium/cns-write-ovly.yaml +++ b/test/integration/manifests/cilium/cns-write-ovly.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole From e0b0a8902df4fa0861d1b74dba7ed92c23475721 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Tue, 17 Dec 2024 17:19:55 -0800 Subject: [PATCH 12/16] fix: Updated the code to delete and the create the NNC with the HomeAz value --- cns/service/main.go | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index b5ea65669a..98c9d48b33 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1306,36 +1306,46 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // Create the base NNC CRD if HomeAz is enabled if cnsconfig.EnableHomeAz { homeAzResponse := httpRestServiceImplementation.GetHomeAz(ctx) - availabilityZone := strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) - logger.Printf("[Azure CNS] HomeAz: %s", availabilityZone) + logger.Printf("[Azure CNS] HomeAz: %s", strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10)) // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor var nnc *v1alpha.NodeNetworkConfig if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { logger.Errorf("[Azure CNS] failed to get existing NNC: %v", err) } + newNNC := createBaseNNC(node) if nnc == nil { logger.Printf("[Azure CNS] Creating new base NNC") - newNNC := createBaseNNC(node) - newNNC.Spec.AvailabilityZone = availabilityZone + newNNC.Spec.AvailabilityZone = strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) if err = directcli.Create(ctx, newNNC); err != nil { return errors.Wrap(err, "failed to create base NNC") } } else { - // nnc.Spec.AvailabilityZone = availabilityZone - // if err = directcli.Update(ctx, nnc); err != nil { - // return errors.Wrap(err, "failed to update base NNC") - // } logger.Printf("[Azure CNS] Patching existing NNC with new Spec with HomeAz") - newSpec := v1alpha.NodeNetworkConfigSpec{} - newSpec.AvailabilityZone = availabilityZone - newSpec.RequestedIPCount = nnc.Spec.RequestedIPCount - newSpec.IPsNotInUse = nnc.Spec.IPsNotInUse - if _, err := directnnccli.PatchSpec(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}, &newSpec, "azure-cns"); err != nil { - return errors.Wrap(err, "failed to update base NNC") + newNNC.Spec.AvailabilityZone = strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) + newNNC.Spec.RequestedIPCount = nnc.Spec.RequestedIPCount + newNNC.Spec.IPsNotInUse = nnc.Spec.IPsNotInUse + newNNC.Status = nnc.Status + newNNC.UID = nnc.UID + newNNC.Name = nnc.Name + newNNC.Namespace = nnc.Namespace + newNNC.Annotations = nnc.Annotations + newNNC.Labels = nnc.Labels + newNNC.Finalizers = nnc.Finalizers + newNNC.OwnerReferences = nnc.OwnerReferences + newNNC.CreationTimestamp = nnc.CreationTimestamp + newNNC.DeletionTimestamp = nnc.DeletionTimestamp + + // Delete existing NNC and create new one with updated HomeAz + if err = directcli.Delete(ctx, nnc); err != nil { + return errors.Wrap(err, "[Azure CNS]: failed to delete existing NNC") + } + + if err = directcli.Create(ctx, newNNC); err != nil { + return errors.Wrap(err, "[Azure CNS]: failed to create new NNC") } } - logger.Printf("[Azure CNS] Updated HomeAz in NNC") + logger.Printf("[Azure CNS] Updated HomeAz in NNC %v", newNNC) } logger.Printf("Reconciling initial CNS state") From 8e9ce0882a7a1dabd6459f3d5edbf6fb0e091e9d Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Wed, 18 Dec 2024 14:21:14 -0800 Subject: [PATCH 13/16] upadte: Fixing the type for the Availability zone for the NNC to int64 type --- cns/service/main.go | 9 +++++---- crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index 98c9d48b33..4725a3187f 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1306,7 +1306,8 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // Create the base NNC CRD if HomeAz is enabled if cnsconfig.EnableHomeAz { homeAzResponse := httpRestServiceImplementation.GetHomeAz(ctx) - logger.Printf("[Azure CNS] HomeAz: %s", strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10)) + az := int64(homeAzResponse.HomeAzResponse.HomeAz) + logger.Printf("[Azure CNS] HomeAz: %s", strconv.FormatInt(az, 10)) // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor var nnc *v1alpha.NodeNetworkConfig if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { @@ -1316,13 +1317,13 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn newNNC := createBaseNNC(node) if nnc == nil { logger.Printf("[Azure CNS] Creating new base NNC") - newNNC.Spec.AvailabilityZone = strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) + newNNC.Spec.AvailabilityZone = az if err = directcli.Create(ctx, newNNC); err != nil { return errors.Wrap(err, "failed to create base NNC") } } else { - logger.Printf("[Azure CNS] Patching existing NNC with new Spec with HomeAz") - newNNC.Spec.AvailabilityZone = strconv.FormatUint(uint64(homeAzResponse.HomeAzResponse.HomeAz), 10) + logger.Printf("[Azure CNS] Patching existing NNC with new Spec with HomeAz %d", az) + newNNC.Spec.AvailabilityZone = az newNNC.Spec.RequestedIPCount = nnc.Spec.RequestedIPCount newNNC.Spec.IPsNotInUse = nnc.Spec.IPsNotInUse newNNC.Status = nnc.Status diff --git a/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go b/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go index 91691d6548..786956d410 100644 --- a/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go +++ b/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go @@ -48,8 +48,9 @@ type NodeNetworkConfigSpec struct { RequestedIPCount int64 `json:"requestedIPCount"` IPsNotInUse []string `json:"ipsNotInUse,omitempty"` // AvailabilityZone contains the Azure availability zone for the virtual machine where network containers are placed. + // NMA returns an int value for the availability zone. // +kubebuilder:validation:Optional - AvailabilityZone string `json:"availabilityZone,omitempty"` + AvailabilityZone int64 `json:"availabilityZone,omitempty"` } // Status indicates the NNC reconcile status From e11ded8980c5e171e86c50600bc01035eed22e9d Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Wed, 18 Dec 2024 14:50:23 -0800 Subject: [PATCH 14/16] upadte: Fixing the type for the Availability zone for the NNC to uint type --- cns/service/main.go | 4 ++-- crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cns/service/main.go b/cns/service/main.go index 4725a3187f..9aa086536d 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1306,8 +1306,8 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // Create the base NNC CRD if HomeAz is enabled if cnsconfig.EnableHomeAz { homeAzResponse := httpRestServiceImplementation.GetHomeAz(ctx) - az := int64(homeAzResponse.HomeAzResponse.HomeAz) - logger.Printf("[Azure CNS] HomeAz: %s", strconv.FormatInt(az, 10)) + az := homeAzResponse.HomeAzResponse.HomeAz + logger.Printf("[Azure CNS] HomeAz: %d", az) // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor var nnc *v1alpha.NodeNetworkConfig if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { diff --git a/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go b/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go index 786956d410..9379504c7e 100644 --- a/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go +++ b/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go @@ -50,7 +50,7 @@ type NodeNetworkConfigSpec struct { // AvailabilityZone contains the Azure availability zone for the virtual machine where network containers are placed. // NMA returns an int value for the availability zone. // +kubebuilder:validation:Optional - AvailabilityZone int64 `json:"availabilityZone,omitempty"` + AvailabilityZone uint `json:"availabilityZone,omitempty"` } // Status indicates the NNC reconcile status From aa40cac3a5161d68f69e8f0383bbbdeed13c8866 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Fri, 20 Dec 2024 14:42:30 -0800 Subject: [PATCH 15/16] fix: Updated the config name to EnableHomeAZ --- cns/configuration/configuration.go | 2 +- cns/service/main.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cns/configuration/configuration.go b/cns/configuration/configuration.go index 4390fc4621..ac3625bf73 100644 --- a/cns/configuration/configuration.go +++ b/cns/configuration/configuration.go @@ -35,7 +35,7 @@ type CNSConfig struct { EnableSubnetScarcity bool EnableSwiftV2 bool InitializeFromCNI bool - EnableHomeAz bool + EnableHomeAZ bool KeyVaultSettings KeyVaultSettings MSISettings MSISettings ManageEndpointState bool diff --git a/cns/service/main.go b/cns/service/main.go index 9aa086536d..f09e829d5c 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -670,7 +670,7 @@ func main() { homeAzMonitor := restserver.NewHomeAzMonitor(nmaClient, time.Duration(cnsconfig.AZRSettings.PopulateHomeAzCacheRetryIntervalSecs)*time.Second) // homeAz monitor is required when there is a direct channel between DNC and CNS OR when homeAz feature is enabled in CNS for AKS-Swift // This will prevent the monitor from unnecessarily calling NMA APIs for other scenarios such as AKS-swift, swiftv2 when disabled. - if cnsconfig.ChannelMode == cns.Direct || cnsconfig.EnableHomeAz { + if cnsconfig.ChannelMode == cns.Direct || cnsconfig.EnableHomeAZ { homeAzMonitor.Start() defer homeAzMonitor.Stop() } @@ -1304,7 +1304,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName}) // Create the base NNC CRD if HomeAz is enabled - if cnsconfig.EnableHomeAz { + if cnsconfig.EnableHomeAZ { homeAzResponse := httpRestServiceImplementation.GetHomeAz(ctx) az := homeAzResponse.HomeAzResponse.HomeAz logger.Printf("[Azure CNS] HomeAz: %d", az) From 32c562e14274da6789a4cccc74433101452902d3 Mon Sep 17 00:00:00 2001 From: Ashish Nair Date: Mon, 23 Dec 2024 14:34:30 -0800 Subject: [PATCH 16/16] fix: Addressed all the PR comments --- cns/restserver/internalapi.go | 12 ++++++--- cns/service/main.go | 50 +++++++++++++++++++++++------------ 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/cns/restserver/internalapi.go b/cns/restserver/internalapi.go index 48869fe8b6..4c2d80e258 100644 --- a/cns/restserver/internalapi.go +++ b/cns/restserver/internalapi.go @@ -635,9 +635,13 @@ func (service *HTTPRestService) SetVFForAccelnetNICs() error { } // GetHomeAz - Get the Home Az for the Node where CNS is running. -func (service *HTTPRestService) GetHomeAz(ctx context.Context) (homeAzResponse cns.GetHomeAzResponse) { +func (service *HTTPRestService) GetHomeAz(ctx context.Context) (cns.GetHomeAzResponse, error) { service.RLock() - homeAzResponse = service.homeAzMonitor.GetHomeAz(ctx) - service.RUnlock() - return + defer service.RUnlock() + homeAzResponse := service.homeAzMonitor.GetHomeAz(ctx) + if homeAzResponse.Response.ReturnCode == types.NotFound { + return homeAzResponse, errors.New(homeAzResponse.Response.Message) + } + + return homeAzResponse, nil } diff --git a/cns/service/main.go b/cns/service/main.go index f09e829d5c..26ba92cb94 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1305,24 +1305,39 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // Create the base NNC CRD if HomeAz is enabled if cnsconfig.EnableHomeAZ { - homeAzResponse := httpRestServiceImplementation.GetHomeAz(ctx) + var homeAzResponse cns.GetHomeAzResponse + if homeAzResponse, err = httpRestServiceImplementation.GetHomeAz(ctx); err != nil { + return errors.Wrap(err, "failed to get HomeAz") // error out so that CNS restarts. + } az := homeAzResponse.HomeAzResponse.HomeAz logger.Printf("[Azure CNS] HomeAz: %d", az) // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor var nnc *v1alpha.NodeNetworkConfig - if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { - logger.Errorf("[Azure CNS] failed to get existing NNC: %v", err) - } + err = retry.Do(func() error { + if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { + return errors.Wrap(err, "[Azure CNS] failed to get existing NNC") + } + return nil + }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) newNNC := createBaseNNC(node) - if nnc == nil { - logger.Printf("[Azure CNS] Creating new base NNC") + if err != nil { + logger.Printf("[Azure CNS] Creating new base NNC with Az %d", az) newNNC.Spec.AvailabilityZone = az - if err = directcli.Create(ctx, newNNC); err != nil { - return errors.Wrap(err, "failed to create base NNC") + nncErr := retry.Do(func() error { + if err = directcli.Create(ctx, newNNC); err != nil { + return errors.Wrap(err, "failed to create base NNC with HomeAz") + } + return nil + }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) + if nncErr != nil { + return errors.Wrap(nncErr, "[Azure CNS] failed to create base NNC with HomeAz") } - } else { + } + + if err == nil { // NNC exists, patch it with new HomeAz logger.Printf("[Azure CNS] Patching existing NNC with new Spec with HomeAz %d", az) + newNNC.ObjectMeta.ResourceVersion = nnc.ObjectMeta.ResourceVersion newNNC.Spec.AvailabilityZone = az newNNC.Spec.RequestedIPCount = nnc.Spec.RequestedIPCount newNNC.Spec.IPsNotInUse = nnc.Spec.IPsNotInUse @@ -1336,14 +1351,15 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn newNNC.OwnerReferences = nnc.OwnerReferences newNNC.CreationTimestamp = nnc.CreationTimestamp newNNC.DeletionTimestamp = nnc.DeletionTimestamp - - // Delete existing NNC and create new one with updated HomeAz - if err = directcli.Delete(ctx, nnc); err != nil { - return errors.Wrap(err, "[Azure CNS]: failed to delete existing NNC") - } - - if err = directcli.Create(ctx, newNNC); err != nil { - return errors.Wrap(err, "[Azure CNS]: failed to create new NNC") + nncErr := retry.Do(func() error { + patchErr := directcli.Update(ctx, newNNC, &client.UpdateOptions{}) + if patchErr != nil { + return errors.Wrap(patchErr, "failed to patch NNC") + } + return nil + }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) + if nncErr != nil { + return errors.Wrap(nncErr, "[AzureCNS] failed to patch NNC with Home Az") } } logger.Printf("[Azure CNS] Updated HomeAz in NNC %v", newNNC)