From c4a578bf2bd9116ed4ce6ca860c0f6a34b54850d Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Thu, 18 Nov 2021 13:02:41 -0800 Subject: [PATCH 1/4] Update codes to enable V2 NPM --- npm/cmd/start.go | 2 +- npm/config/config.go | 4 +-- npm/npm.go | 71 ++++++++++++++++++++++++++------------------ 3 files changed, 45 insertions(+), 32 deletions(-) diff --git a/npm/cmd/start.go b/npm/cmd/start.go index 7812738efd..9a13d52ffc 100644 --- a/npm/cmd/start.go +++ b/npm/cmd/start.go @@ -109,7 +109,7 @@ func start(config npmconfig.Config) error { k8sServerVersion := k8sServerVersion(clientset) var dp dataplane.GenericDataplane - if config.Toggles.EnableV2Controllers { + if config.Toggles.EnableV2NPM { dp, err = dataplane.NewDataPlane(npm.GetNodeName(), common.NewIOShim()) if err != nil { return fmt.Errorf("failed to create dataplane with error %w", err) diff --git a/npm/config/config.go b/npm/config/config.go index 5e79a74cca..32d1b6da09 100644 --- a/npm/config/config.go +++ b/npm/config/config.go @@ -17,7 +17,7 @@ var DefaultConfig = Config{ EnablePrometheusMetrics: true, EnablePprof: true, EnableHTTPDebugAPI: true, - EnableV2Controllers: false, + EnableV2NPM: false, PlaceAzureChainFirst: false, }, } @@ -33,6 +33,6 @@ type Toggles struct { EnablePrometheusMetrics bool EnablePprof bool EnableHTTPDebugAPI bool - EnableV2Controllers bool + EnableV2NPM bool PlaceAzureChainFirst bool } diff --git a/npm/npm.go b/npm/npm.go index 9230c04657..9ee1fb622f 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -56,19 +56,19 @@ type NetworkPolicyManager struct { informerFactory informers.SharedInformerFactory podInformer coreinformers.PodInformer nsInformer coreinformers.NamespaceInformer + npInformer networkinginformers.NetworkPolicyInformer // V1 controllers (to be deprecated) podControllerV1 *controllersv1.PodController namespaceControllerV1 *controllersv1.NamespaceController npmNamespaceCacheV1 *controllersv1.NpmNamespaceCache + netPolControllerV1 *controllersv1.NetworkPolicyController // V2 controllers podControllerV2 *controllersv2.PodController namespaceControllerV2 *controllersv2.NamespaceController npmNamespaceCacheV2 *controllersv2.NpmNamespaceCache - - npInformer networkinginformers.NetworkPolicyInformer - netPolControllerV1 *controllersv1.NetworkPolicyController + netPolControllerV2 *controllersv2.NetworkPolicyController // ipsMgr are shared in all controllers. Thus, only one ipsMgr is created for simple management // and uses lock to avoid unintentional race condictions in IpsetManager. @@ -90,53 +90,67 @@ func NewNetworkPolicyManager(config npmconfig.Config, klog.Infof("API server version: %+v ai meta data %+v", k8sServerVersion, aiMetadata) npMgr := &NetworkPolicyManager{ - config: config, - informerFactory: informerFactory, - podInformer: informerFactory.Core().V1().Pods(), - nsInformer: informerFactory.Core().V1().Namespaces(), - npInformer: informerFactory.Networking().V1().NetworkPolicies(), - ipsMgr: ipsm.NewIpsetManager(exec), - npmNamespaceCacheV1: &controllersv1.NpmNamespaceCache{NsMap: make(map[string]*controllersv1.Namespace)}, - k8sServerVersion: k8sServerVersion, - NodeName: GetNodeName(), - version: npmVersion, - TelemetryEnabled: true, + config: config, + informerFactory: informerFactory, + podInformer: informerFactory.Core().V1().Pods(), + nsInformer: informerFactory.Core().V1().Namespaces(), + npInformer: informerFactory.Networking().V1().NetworkPolicies(), + k8sServerVersion: k8sServerVersion, + NodeName: GetNodeName(), + version: npmVersion, + TelemetryEnabled: true, } - if npMgr.config.Toggles.EnableV2Controllers { - // create pod controller + // create v2 NPM specific components. + if npMgr.config.Toggles.EnableV2NPM { + npMgr.npmNamespaceCacheV2 = &controllersv2.NpmNamespaceCache{NsMap: make(map[string]*controllersv2.Namespace)} npMgr.podControllerV2 = controllersv2.NewPodController(npMgr.podInformer, dp, npMgr.npmNamespaceCacheV2) - // create NameSpace controller npMgr.namespaceControllerV2 = controllersv2.NewNamespaceController(npMgr.nsInformer, dp, npMgr.npmNamespaceCacheV2) + // Question(jungukcho): Is config.Toggles.PlaceAzureChainFirst needed for v2? + npMgr.netPolControllerV2 = controllersv2.NewNetworkPolicyController(npMgr.npInformer, dp) return npMgr } - // create pod controller + // create v1 NPM specific components. + npMgr.ipsMgr = ipsm.NewIpsetManager(exec) + npMgr.npmNamespaceCacheV1 = &controllersv1.NpmNamespaceCache{NsMap: make(map[string]*controllersv1.Namespace)} npMgr.podControllerV1 = controllersv1.NewPodController(npMgr.podInformer, npMgr.ipsMgr, npMgr.npmNamespaceCacheV1) - // create NameSpace controller npMgr.namespaceControllerV1 = controllersv1.NewNameSpaceController(npMgr.nsInformer, npMgr.ipsMgr, npMgr.npmNamespaceCacheV1) - // create network policy controller npMgr.netPolControllerV1 = controllersv1.NewNetworkPolicyController(npMgr.npInformer, npMgr.ipsMgr, config.Toggles.PlaceAzureChainFirst) - return npMgr } func (npMgr *NetworkPolicyManager) MarshalJSON() ([]byte, error) { m := map[CacheKey]json.RawMessage{} - npmNamespaceCacheRaw, err := json.Marshal(npMgr.npmNamespaceCacheV1) + var npmNamespaceCacheRaw []byte + var err error + if npMgr.config.Toggles.EnableV2NPM { + npmNamespaceCacheRaw, err = json.Marshal(npMgr.npmNamespaceCacheV2) + } else { + npmNamespaceCacheRaw, err = json.Marshal(npMgr.npmNamespaceCacheV1) + } + if err != nil { return nil, errors.Errorf("%s: %v", errMarshalNPMCache, err) } m[NsMap] = npmNamespaceCacheRaw - podControllerRaw, err := json.Marshal(npMgr.podControllerV1) + var podControllerRaw []byte + if npMgr.config.Toggles.EnableV2NPM { + podControllerRaw, err = json.Marshal(npMgr.podControllerV2) + } else { + podControllerRaw, err = json.Marshal(npMgr.podControllerV1) + } + if err != nil { return nil, errors.Errorf("%s: %v", errMarshalNPMCache, err) } m[PodMap] = podControllerRaw - if npMgr.ipsMgr != nil { + // TODO(jungukcho): NPM debug may be broken. + // Will fix it later after v2 controller and linux test if it is broken. + if !npMgr.config.Toggles.EnableV2NPM && npMgr.ipsMgr != nil { listMapRaw, listMapMarshalErr := npMgr.ipsMgr.MarshalListMapJSON() if listMapMarshalErr != nil { return nil, errors.Errorf("%s: %v", errMarshalNPMCache, listMapMarshalErr) @@ -239,16 +253,15 @@ func (npMgr *NetworkPolicyManager) Start(config npmconfig.Config, stopCh <-chan return fmt.Errorf("Network policy informer failed to sync") } - if config.Toggles.EnableV2Controllers { + // start v2 NPM controllers after synced + if config.Toggles.EnableV2NPM { go npMgr.podControllerV2.Run(stopCh) go npMgr.namespaceControllerV2.Run(stopCh) - // TODO add in netpol controller v2 - // go npMgr.netPolControllerV1.Run(stopCh) - // go npMgr.netPolControllerV1.RunPeriodicTasks(stopCh) + go npMgr.netPolControllerV2.Run(stopCh) return nil } - // start controllers after synced + // start v1 NPM controllers after synced go npMgr.podControllerV1.Run(stopCh) go npMgr.namespaceControllerV1.Run(stopCh) go npMgr.netPolControllerV1.Run(stopCh) From 6f36ec18520a439c9dc5a9a4ccb3520afa16d0fb Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Thu, 18 Nov 2021 13:03:57 -0800 Subject: [PATCH 2/4] Deleted dead codes (if we want to keep it, please let me know) --- npm/npm.go | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) diff --git a/npm/npm.go b/npm/npm.go index 9ee1fb622f..b42bb77304 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -6,16 +6,12 @@ import ( "encoding/json" "fmt" "os" - "time" - "github.com/Azure/azure-container-networking/aitelemetry" npmconfig "github.com/Azure/azure-container-networking/npm/config" "github.com/Azure/azure-container-networking/npm/ipsm" - "github.com/Azure/azure-container-networking/npm/metrics" controllersv1 "github.com/Azure/azure-container-networking/npm/pkg/controlplane/controllers/v1" controllersv2 "github.com/Azure/azure-container-networking/npm/pkg/controlplane/controllers/v2" "github.com/Azure/azure-container-networking/npm/pkg/dataplane" - "github.com/Azure/azure-container-networking/npm/util" "github.com/pkg/errors" "k8s.io/apimachinery/pkg/version" "k8s.io/client-go/informers" @@ -188,48 +184,6 @@ func GetAIMetadata() string { return aiMetadata } -// SendClusterMetrics :- send NPM cluster metrics using AppInsights -// TODO(jungukcho): need to move codes into metrics packages -func (npMgr *NetworkPolicyManager) SendClusterMetrics() { - var ( - heartbeat = time.NewTicker(time.Minute * heartbeatIntervalInMinutes).C - customDimensions = map[string]string{ - "ClusterID": util.GetClusterID(npMgr.NodeName), - "APIServer": npMgr.k8sServerVersion.String(), - } - podCount = aitelemetry.Metric{ - Name: "PodCount", - CustomDimensions: customDimensions, - } - nsCount = aitelemetry.Metric{ - Name: "NsCount", - CustomDimensions: customDimensions, - } - nwPolicyCount = aitelemetry.Metric{ - Name: "NwPolicyCount", - CustomDimensions: customDimensions, - } - ) - - for { - <-heartbeat - - // Reducing one to remove all-namespaces ns obj - lenOfNsMap := len(npMgr.npmNamespaceCacheV1.NsMap) - nsCount.Value = float64(lenOfNsMap - 1) - - lenOfRawNpMap := npMgr.netPolControllerV1.LengthOfRawNpMap() - nwPolicyCount.Value += float64(lenOfRawNpMap) - - lenOfPodMap := npMgr.podControllerV1.LengthOfPodMap() - podCount.Value += float64(lenOfPodMap) - - metrics.SendMetric(podCount) - metrics.SendMetric(nsCount) - metrics.SendMetric(nwPolicyCount) - } -} - // Start starts shared informers and waits for the shared informer cache to sync. func (npMgr *NetworkPolicyManager) Start(config npmconfig.Config, stopCh <-chan struct{}) error { // Do initialization of data plane before starting syncup of each controller to avoid heavy call to api-server From f1d9b401a48904a213d87681f9351101bbfdab9b Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Thu, 18 Nov 2021 13:07:37 -0800 Subject: [PATCH 3/4] Update azure-npm.yaml to add toggle parameters --- npm/azure-npm.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/npm/azure-npm.yaml b/npm/azure-npm.yaml index fb5574f486..289eba7aaf 100644 --- a/npm/azure-npm.yaml +++ b/npm/azure-npm.yaml @@ -152,6 +152,8 @@ data: "Toggles": { "EnablePrometheusMetrics": true, "EnablePprof": true, - "EnableHTTPDebugAPI": true + "EnableHTTPDebugAPI": true, + "EnableV2NPM": false, + "PlaceAzureChainFirst": false } - } + } \ No newline at end of file From 4557e0afd711c17d34966c533308107805e119c1 Mon Sep 17 00:00:00 2001 From: Junguk Cho Date: Thu, 18 Nov 2021 13:38:20 -0800 Subject: [PATCH 4/4] Fix incorrect call for v2 NPM --- npm/npm.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/npm/npm.go b/npm/npm.go index b42bb77304..eb0dd3da93 100644 --- a/npm/npm.go +++ b/npm/npm.go @@ -186,9 +186,11 @@ func GetAIMetadata() string { // Start starts shared informers and waits for the shared informer cache to sync. func (npMgr *NetworkPolicyManager) Start(config npmconfig.Config, stopCh <-chan struct{}) error { - // Do initialization of data plane before starting syncup of each controller to avoid heavy call to api-server - if err := npMgr.netPolControllerV1.ResetDataPlane(); err != nil { - return fmt.Errorf("Failed to initialized data plane") + if !config.Toggles.EnableV2NPM { + // Do initialization of data plane before starting syncup of each controller to avoid heavy call to api-server + if err := npMgr.netPolControllerV1.ResetDataPlane(); err != nil { + return fmt.Errorf("Failed to initialized data plane with err %w", err) + } } // Starts all informers manufactured by npMgr's informerFactory.