From 9704be8718044fe778c79ba6fde93018e38569cc Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 10 Jan 2023 14:09:22 -0800 Subject: [PATCH 01/17] set kubeconfig on capz --- npm/examples/windows/azure-npm-capz.yaml | 160 ++++++++++++++++++ .../windows/setkubeconfigpath-capz.ps1 | 11 ++ 2 files changed, 171 insertions(+) create mode 100644 npm/examples/windows/azure-npm-capz.yaml create mode 100644 npm/examples/windows/setkubeconfigpath-capz.ps1 diff --git a/npm/examples/windows/azure-npm-capz.yaml b/npm/examples/windows/azure-npm-capz.yaml new file mode 100644 index 0000000000..4238d12681 --- /dev/null +++ b/npm/examples/windows/azure-npm-capz.yaml @@ -0,0 +1,160 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: azure-npm + namespace: kube-system + labels: + addonmanager.kubernetes.io/mode: EnsureExists +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: azure-npm + namespace: kube-system + labels: + addonmanager.kubernetes.io/mode: EnsureExists +rules: + - apiGroups: + - "" + resources: + - pods + - nodes + - namespaces + verbs: + - get + - list + - watch + - apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: azure-npm-binding + namespace: kube-system + labels: + addonmanager.kubernetes.io/mode: EnsureExists +subjects: + - kind: ServiceAccount + name: azure-npm + namespace: kube-system +roleRef: + kind: ClusterRole + name: azure-npm + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: azure-npm-win + namespace: kube-system + labels: + app: azure-npm + addonmanager.kubernetes.io/mode: EnsureExists +spec: + selector: + matchLabels: + k8s-app: azure-npm + template: + metadata: + labels: + k8s-app: azure-npm + annotations: + azure.npm/scrapeable: "" + spec: + priorityClassName: system-node-critical + tolerations: + - operator: "Exists" + effect: NoExecute + - operator: "Exists" + effect: NoSchedule + - key: CriticalAddonsOnly + operator: Exists + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\SYSTEM" + hostNetwork: true + containers: + - name: azure-npm + image: mcr.microsoft.com/containernetworking/azure-npm:v1.4.29 + command: ["powershell.exe"] + args: + [ + '.\setkubeconfigpath-capz.ps1', + ";", + "powershell.exe", + '.\npm.exe', + "start", + '--kubeconfig=.\kubeconfig', + ] + resources: + limits: + cpu: 250m + memory: 300Mi + requests: + cpu: 250m + env: + - name: HOSTNAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: NPM_CONFIG + value: .\\etc\\azure-npm\\azure-npm.json + volumeMounts: + - name: azure-npm-config + mountPath: .\\etc\\azure-npm + nodeSelector: + kubernetes.io/os: windows + volumes: + - name: azure-npm-config + configMap: + name: azure-npm-config + serviceAccountName: azure-npm +--- +apiVersion: v1 +kind: Service +metadata: + name: npm-metrics-cluster-service + namespace: kube-system + labels: + app: npm-metrics +spec: + selector: + k8s-app: azure-npm + ports: + - port: 9000 + targetPort: 10091 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: azure-npm-config + namespace: kube-system +data: + azure-npm.json: | + { + "ResyncPeriodInMinutes": 15, + "ListeningPort": 10091, + "ListeningAddress": "0.0.0.0", + "Toggles": { + "EnablePrometheusMetrics": true, + "EnablePprof": true, + "EnableHTTPDebugAPI": true, + "EnableV2NPM": true, + "PlaceAzureChainFirst": true, + "ApplyIPSetsOnNeed": false + }, + "Transport": { + "Address": "azure-npm.kube-system.svc.cluster.local", + "Port": 10092, + "ServicePort": 9001 + } + } diff --git a/npm/examples/windows/setkubeconfigpath-capz.ps1 b/npm/examples/windows/setkubeconfigpath-capz.ps1 new file mode 100644 index 0000000000..883fe6b6fc --- /dev/null +++ b/npm/examples/windows/setkubeconfigpath-capz.ps1 @@ -0,0 +1,11 @@ +# pull the server value from the kubeconfig on host to construct our own kubeconfig, but using service principal settings +# this is required to build a kubeconfig using the kubeconfig on disk in c:\etc\kubernetes, and the service principle granted in the container mount, to generate clientset +$cpEndpoint = Get-Content C:\etc\kubernetes\kubelet.conf | ForEach-Object -Process {if($_.Contains("server:")) {$_.Trim().Split()[1]}} +$token = Get-Content -Path $env:CONTAINER_SANDBOX_MOUNT_POINT\var\run\secrets\kubernetes.io\serviceaccount\token +$ca = Get-Content -Raw -Path $env:CONTAINER_SANDBOX_MOUNT_POINT\var\run\secrets\kubernetes.io\serviceaccount\ca.crt +$caBase64 = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes($ca)) +$server = "server: $cpEndpoint" +(Get-Content $env:CONTAINER_SANDBOX_MOUNT_POINT\kubeconfigtemplate.yaml). + replace("", $caBase64). + replace("", $server.Trim()). + replace("", $token) | Set-Content $env:CONTAINER_SANDBOX_MOUNT_POINT\kubeconfig -Force From e3e634f20d78742db3cfd38619cebd2e3ce39c92 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 10 Jan 2023 15:36:31 -0800 Subject: [PATCH 02/17] update dockerfile --- npm/examples/windows/azure-npm-capz.yaml | 2 +- npm/windows.Dockerfile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/npm/examples/windows/azure-npm-capz.yaml b/npm/examples/windows/azure-npm-capz.yaml index 4238d12681..381e1ee3e1 100644 --- a/npm/examples/windows/azure-npm-capz.yaml +++ b/npm/examples/windows/azure-npm-capz.yaml @@ -83,7 +83,7 @@ spec: hostNetwork: true containers: - name: azure-npm - image: mcr.microsoft.com/containernetworking/azure-npm:v1.4.29 + image: mcr.microsoft.com/containernetworking/azure-npm:v1.4.41 # setting to future version since it will use the updated Dockerfile command: ["powershell.exe"] args: [ diff --git a/npm/windows.Dockerfile b/npm/windows.Dockerfile index 0da5b6a2c1..92627bb68b 100644 --- a/npm/windows.Dockerfile +++ b/npm/windows.Dockerfile @@ -15,6 +15,7 @@ RUN $Env:CGO_ENABLED=0; go build -mod vendor -v -o /usr/bin/npm.exe -ldflags """ FROM mcr.microsoft.com/windows/servercore:ltsc2022 COPY --from=builder /usr/src/npm/npm/examples/windows/kubeconfigtemplate.yaml kubeconfigtemplate.yaml COPY --from=builder /usr/src/npm/npm/examples/windows/setkubeconfigpath.ps1 setkubeconfigpath.ps1 +COPY --from=builder /usr/src/npm/npm/examples/windows/setkubeconfigpath-capz.ps1 setkubeconfigpath-capz.ps1 COPY --from=builder /usr/bin/npm.exe npm.exe CMD ["npm.exe", "start" "--kubeconfig=.\\kubeconfig"] From 149a0f960aaf9959afc3b9235974bb45af4f8eff Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 11 Jan 2023 09:50:05 -0800 Subject: [PATCH 03/17] test network name Calico --- npm/cmd/start.go | 2 +- npm/pkg/dataplane/ipsets/ipsetmanager_windows.go | 9 +++++++++ npm/util/const.go | 2 +- test/integration/npm/main.go | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/npm/cmd/start.go b/npm/cmd/start.go index 485ba0254f..58be7d194a 100644 --- a/npm/cmd/start.go +++ b/npm/cmd/start.go @@ -32,7 +32,7 @@ import ( var npmV2DataplaneCfg = &dataplane.Config{ IPSetManagerCfg: &ipsets.IPSetManagerCfg{ - NetworkName: "azure", // FIXME should be specified in DP config instead + NetworkName: "Calico", // FIXME should be specified in DP config instead // NOTE: IPSetMode must be set later by the npm ConfigMap or default config }, PolicyManagerCfg: &policies.PolicyManagerCfg{ diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go b/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go index 00538002af..73b20e8d5e 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go @@ -304,6 +304,15 @@ func (iMgr *IPSetManager) calculateNewSetPolicies(networkPolicies []hcn.NetworkP } func (iMgr *IPSetManager) getHCnNetwork() (*hcn.HostComputeNetwork, error) { + net, err := hcn.ListNetworks() + if err != nil { + klog.Infof("error in listing networks %v", err) + } else { + for _, n := range net { + klog.Infof("network %v", n) + } + } + if iMgr.iMgrCfg.NetworkName == "" { iMgr.iMgrCfg.NetworkName = util.AzureNetworkName } diff --git a/npm/util/const.go b/npm/util/const.go index f94ecf4935..ecf24a962a 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -231,7 +231,7 @@ const ( ) // AzureNetworkName is the default network Azure CNI creates -const AzureNetworkName = "azure" +const AzureNetworkName = "Calico" // These ID represents where did the error log generate from. // It's for better query purpose. In Kusto these value are used in diff --git a/test/integration/npm/main.go b/test/integration/npm/main.go index de3ca6556e..3dca3bd9c8 100644 --- a/test/integration/npm/main.go +++ b/test/integration/npm/main.go @@ -23,7 +23,7 @@ var ( dpCfg = &dataplane.Config{ IPSetManagerCfg: &ipsets.IPSetManagerCfg{ IPSetMode: ipsets.ApplyAllIPSets, - NetworkName: "azure", + NetworkName: "Calico", }, PolicyManagerCfg: &policies.PolicyManagerCfg{ PolicyMode: policies.IPSetPolicyMode, From 6633874592f7faa42835116421d6b9986476d434 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 12 Jan 2023 11:13:45 -0800 Subject: [PATCH 04/17] add base acls --- npm/pkg/dataplane/dataplane_windows.go | 2 + .../dataplane/policies/policymanager_linux.go | 4 ++ .../policies/policymanager_windows.go | 40 +++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 856f96065e..d85816674d 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -366,6 +366,8 @@ func (dp *DataPlane) refreshPodEndpoints() error { dp.endpointCache.cache[ip] = npmEP // NOTE: TSGs rely on this log line klog.Infof("updating endpoint cache to include %s: %+v", npmEP.ip, npmEP) + + _ = dp.policyMgr.AddBaseACLs(npmEP.id) } else if oldNPMEP.id != endpoint.Id { // multiple endpoints can have the same IP address, but there should be one endpoint ID per pod // throw away old endpoints that have the same IP as a current endpoint (the old endpoint is getting deleted) diff --git a/npm/pkg/dataplane/policies/policymanager_linux.go b/npm/pkg/dataplane/policies/policymanager_linux.go index d88c23cd0c..a8d9c6d298 100644 --- a/npm/pkg/dataplane/policies/policymanager_linux.go +++ b/npm/pkg/dataplane/policies/policymanager_linux.go @@ -22,6 +22,10 @@ const ( chainSectionPrefix = "chain" ) +func (pMgr *PolicyManager) AddBaseACLs(endpointID string) error { + return nil +} + /* Error handling for iptables-restore: Currently we retry on any error and will make two tries max. diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 325ff0469c..5150865f09 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -56,6 +56,46 @@ func (pMgr *PolicyManager) reconcile() { // not implemented } +func (pMgr *PolicyManager) AddBaseACLs(endpointID string) error { + rulesToAdd := []*NPMACLPolSettings{ + { + Id: "host-allow-in", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + LocalPorts: "", + Priority: 0, + Protocols: "256", + RemoteAddresses: "", + RemotePorts: "", + RuleType: "Host", + }, + { + Id: "host-allow-out", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + LocalPorts: "", + Priority: 0, + Protocols: "256", + RemoteAddresses: "", + RemotePorts: "", + RuleType: "Host", + }, + } + + epPolicyRequest, err := getEPPolicyReqFromACLSettings(rulesToAdd) + if err != nil { + klog.Errorf("failed to get ep Policy request: %+v", err) + return nil + } + + if err := pMgr.applyPoliciesToEndpointID(endpointID, epPolicyRequest); err != nil { + klog.Errorf("error applying base policies: %+v", err) + } + return nil +} + // addPolicy will add the policy for each specified endpoint if the policy doesn't exist on the endpoint yet, // and will add the endpoint to the PodEndpoints of the policy if successful. // addPolicy may modify the endpointList input. From 7964a897d8cf58dfd19c0574fb544835f4fa272b Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 18 Jan 2023 16:28:48 -0800 Subject: [PATCH 05/17] add WindowsNetworkName toggle and revert hard coded Calico parts --- npm/cmd/start.go | 9 +++++-- npm/config/config.go | 15 +++++------ npm/pkg/dataplane/dataplane_windows.go | 14 +++++++--- .../dataplane/ipsets/ipsetmanager_windows.go | 26 ++++++++----------- .../dataplane/policies/policymanager_linux.go | 4 --- .../policies/policymanager_windows.go | 12 ++++----- npm/util/const.go | 8 ++++-- test/integration/npm/main.go | 2 +- 8 files changed, 48 insertions(+), 42 deletions(-) diff --git a/npm/cmd/start.go b/npm/cmd/start.go index 58be7d194a..a8773a7276 100644 --- a/npm/cmd/start.go +++ b/npm/cmd/start.go @@ -32,8 +32,7 @@ import ( var npmV2DataplaneCfg = &dataplane.Config{ IPSetManagerCfg: &ipsets.IPSetManagerCfg{ - NetworkName: "Calico", // FIXME should be specified in DP config instead - // NOTE: IPSetMode must be set later by the npm ConfigMap or default config + // NOTE: NetworkName and IPSetMode must be set later by the npm ConfigMap or default config }, PolicyManagerCfg: &policies.PolicyManagerCfg{ PolicyMode: policies.IPSetPolicyMode, @@ -124,6 +123,12 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error { stopChannel := wait.NeverStop if config.Toggles.EnableV2NPM { // update the dataplane config + if config.WindowsNetworkName == "" { + npmV2DataplaneCfg.NetworkName = util.AzureNetworkName + } else { + npmV2DataplaneCfg.NetworkName = config.WindowsNetworkName + } + npmV2DataplaneCfg.PlaceAzureChainFirst = config.Toggles.PlaceAzureChainFirst if config.Toggles.ApplyIPSetsOnNeed { npmV2DataplaneCfg.IPSetMode = ipsets.ApplyOnNeed diff --git a/npm/config/config.go b/npm/config/config.go index 66bda2c819..77f9e2e6fc 100644 --- a/npm/config/config.go +++ b/npm/config/config.go @@ -16,6 +16,7 @@ const ( // DefaultConfig is the guaranteed configuration NPM can run in out of the box var DefaultConfig = Config{ + WindowsNetworkName: util.AzureNetworkName, ResyncPeriodInMinutes: defaultResyncPeriod, ListeningPort: defaultListeningPort, @@ -47,14 +48,12 @@ type GrpcServerConfig struct { } type Config struct { - ResyncPeriodInMinutes int `json:"ResyncPeriodInMinutes,omitempty"` - - ListeningPort int `json:"ListeningPort,omitempty"` - ListeningAddress string `json:"ListeningAddress,omitempty"` - - Transport GrpcServerConfig `json:"Transport,omitempty"` - - Toggles Toggles `json:"Toggles,omitempty"` + WindowsNetworkName string `json:"WindowsNetworkName,omitempty"` + ResyncPeriodInMinutes int `json:"ResyncPeriodInMinutes,omitempty"` + ListeningPort int `json:"ListeningPort,omitempty"` + ListeningAddress string `json:"ListeningAddress,omitempty"` + Transport GrpcServerConfig `json:"Transport,omitempty"` + Toggles Toggles `json:"Toggles,omitempty"` } type Toggles struct { diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index d85816674d..ffc7a09448 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -56,7 +56,7 @@ func (dp *DataPlane) getNetworkInfo() error { var err error for ; true; <-ticker.C { - err = dp.setNetworkIDByName(util.AzureNetworkName) + err = dp.setNetworkIDByName(dp.NetworkName) if err == nil || !isNetworkNotFoundErr(err) { return err } @@ -65,7 +65,7 @@ func (dp *DataPlane) getNetworkInfo() error { break } klog.Infof("[DataPlane Windows] Network with name %s not found. Retrying in %d seconds, Current retry number %d, max retries: %d", - util.AzureNetworkName, + dp.NetworkName, maxNoNetSleepTime, retryNumber, maxNoNetRetryCount, @@ -367,7 +367,12 @@ func (dp *DataPlane) refreshPodEndpoints() error { // NOTE: TSGs rely on this log line klog.Infof("updating endpoint cache to include %s: %+v", npmEP.ip, npmEP) - _ = dp.policyMgr.AddBaseACLs(npmEP.id) + if dp.NetworkName == util.CalicoNetworkName { + // NOTE 1: connectivity may be broken for an endpoint until this method is called + // NOTE 2: if NPM restarted, technically we could call into HNS to add the base ACLs even if they already exist on the Endpoint. + // It doesn't seem worthwhile to account for these edge-cases since using calico network is currently intended just for testing + dp.policyMgr.AddBaseACLsForCalicoCNI(npmEP.id) + } } else if oldNPMEP.id != endpoint.Id { // multiple endpoints can have the same IP address, but there should be one endpoint ID per pod // throw away old endpoints that have the same IP as a current endpoint (the old endpoint is getting deleted) @@ -425,5 +430,6 @@ func (dp *DataPlane) setNetworkIDByName(networkName string) error { } func isNetworkNotFoundErr(err error) bool { - return strings.Contains(err.Error(), fmt.Sprintf("Network name \"%s\" not found", util.AzureNetworkName)) + return strings.Contains(err.Error(), fmt.Sprintf("Network name \"%s\" not found", util.AzureNetworkName)) || + strings.Contains(err.Error(), fmt.Sprintf("Network name \"%s\" not found", util.CalicoNetworkName)) } diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go b/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go index 73b20e8d5e..17008bc3a6 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go @@ -20,7 +20,7 @@ const ( donotResetIPSets = false ) -var errUnsupportedNetwork = errors.New("only 'azure' network is supported") +var errUnsupportedNetwork = errors.New("only 'azure' and 'calico' networks are supported") type networkPolicyBuilder struct { toAddSets map[string]*hcn.SetPolicySetting @@ -242,12 +242,17 @@ func (iMgr *IPSetManager) applyIPSets() error { // networkPolicyBuild which contains the new setPolicies to be added, updated and deleted // Assumes that the dirty cache is locked (or equivalently, the ipsetmanager itself). // toAddSets: -// this function will loop through the dirty cache and adds non-existing sets to toAddSets +// +// this function will loop through the dirty cache and adds non-existing sets to toAddSets +// // toUpdateSets: -// this function will loop through the dirty cache and adds existing sets in HNS to toUpdateSets -// this function will update all existing sets in HNS with their latest goal state irrespective of any change to the object +// +// this function will loop through the dirty cache and adds existing sets in HNS to toUpdateSets +// this function will update all existing sets in HNS with their latest goal state irrespective of any change to the object +// // toDeleteSets: -// this function will loop through the dirty delete cache and adds existing set obj in HNS to toDeleteSets +// +// this function will loop through the dirty delete cache and adds existing set obj in HNS to toDeleteSets func (iMgr *IPSetManager) calculateNewSetPolicies(networkPolicies []hcn.NetworkPolicy) (*networkPolicyBuilder, error) { setPolicyBuilder := &networkPolicyBuilder{ toAddSets: map[string]*hcn.SetPolicySetting{}, @@ -304,19 +309,10 @@ func (iMgr *IPSetManager) calculateNewSetPolicies(networkPolicies []hcn.NetworkP } func (iMgr *IPSetManager) getHCnNetwork() (*hcn.HostComputeNetwork, error) { - net, err := hcn.ListNetworks() - if err != nil { - klog.Infof("error in listing networks %v", err) - } else { - for _, n := range net { - klog.Infof("network %v", n) - } - } - if iMgr.iMgrCfg.NetworkName == "" { iMgr.iMgrCfg.NetworkName = util.AzureNetworkName } - if iMgr.iMgrCfg.NetworkName != util.AzureNetworkName { + if iMgr.iMgrCfg.NetworkName != util.AzureNetworkName && iMgr.iMgrCfg.NetworkName != util.CalicoNetworkName { return nil, errUnsupportedNetwork } network, err := iMgr.ioShim.Hns.GetNetworkByName(iMgr.iMgrCfg.NetworkName) diff --git a/npm/pkg/dataplane/policies/policymanager_linux.go b/npm/pkg/dataplane/policies/policymanager_linux.go index a8d9c6d298..d88c23cd0c 100644 --- a/npm/pkg/dataplane/policies/policymanager_linux.go +++ b/npm/pkg/dataplane/policies/policymanager_linux.go @@ -22,10 +22,6 @@ const ( chainSectionPrefix = "chain" ) -func (pMgr *PolicyManager) AddBaseACLs(endpointID string) error { - return nil -} - /* Error handling for iptables-restore: Currently we retry on any error and will make two tries max. diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 5150865f09..9cd1e2e284 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -56,7 +56,8 @@ func (pMgr *PolicyManager) reconcile() { // not implemented } -func (pMgr *PolicyManager) AddBaseACLs(endpointID string) error { +// AddBaseACLsForCalicoCNI attempts to add base ACLs for Calico CNI. +func (pMgr *PolicyManager) AddBaseACLsForCalicoCNI(epID string) { rulesToAdd := []*NPMACLPolSettings{ { Id: "host-allow-in", @@ -86,14 +87,13 @@ func (pMgr *PolicyManager) AddBaseACLs(endpointID string) error { epPolicyRequest, err := getEPPolicyReqFromACLSettings(rulesToAdd) if err != nil { - klog.Errorf("failed to get ep Policy request: %+v", err) - return nil + klog.Errorf("failed to get policy request for base ACLs for Calico CNI. endpoint: %s. err: %v", epID, err) + return } - if err := pMgr.applyPoliciesToEndpointID(endpointID, epPolicyRequest); err != nil { - klog.Errorf("error applying base policies: %+v", err) + if err := pMgr.applyPoliciesToEndpointID(epID, epPolicyRequest); err != nil { + klog.Errorf("failed to apply base ACLs for Calico CNI. endpoint: %s. err: %v", epID, err) } - return nil } // addPolicy will add the policy for each specified endpoint if the policy doesn't exist on the endpoint yet, diff --git a/npm/util/const.go b/npm/util/const.go index ecf24a962a..36bf220107 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -230,8 +230,12 @@ const ( ErrorValue float64 = 1 ) -// AzureNetworkName is the default network Azure CNI creates -const AzureNetworkName = "Calico" +const ( + // AzureNetworkName is the default network Azure CNI creates + AzureNetworkName = "azure" + // CalicoNetworkName is the default network Calico CNI creates + CalicoNetworkName = "calico" +) // These ID represents where did the error log generate from. // It's for better query purpose. In Kusto these value are used in diff --git a/test/integration/npm/main.go b/test/integration/npm/main.go index 3dca3bd9c8..de3ca6556e 100644 --- a/test/integration/npm/main.go +++ b/test/integration/npm/main.go @@ -23,7 +23,7 @@ var ( dpCfg = &dataplane.Config{ IPSetManagerCfg: &ipsets.IPSetManagerCfg{ IPSetMode: ipsets.ApplyAllIPSets, - NetworkName: "Calico", + NetworkName: "azure", }, PolicyManagerCfg: &policies.PolicyManagerCfg{ PolicyMode: policies.IPSetPolicyMode, From 4b64f46e9f5a62a4ec1bf88fedfebd7f9f603414 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 19 Jan 2023 14:58:37 -0800 Subject: [PATCH 06/17] update base acls for calico and add UTs --- network/hnswrapper/hnsv2wrapperfake.go | 6 +- .../dataplane-test-cases_windows_test.go | 63 +++++++++++++++++++ npm/pkg/dataplane/dataplane_windows_test.go | 7 ++- .../ipsets/ipsetmanager_windows_test.go | 14 ++--- npm/pkg/dataplane/ipsets/testutils_windows.go | 4 +- npm/pkg/dataplane/policies/policy_windows.go | 12 ++-- .../policies/policymanager_windows.go | 60 +++++++++--------- .../policies/policymanager_windows_test.go | 2 +- 8 files changed, 120 insertions(+), 48 deletions(-) diff --git a/network/hnswrapper/hnsv2wrapperfake.go b/network/hnswrapper/hnsv2wrapperfake.go index 50f392c884..ff378255a6 100644 --- a/network/hnswrapper/hnsv2wrapperfake.go +++ b/network/hnswrapper/hnsv2wrapperfake.go @@ -18,8 +18,6 @@ import ( "github.com/Microsoft/hcsshim/hcn" ) -const networkName = "azure" - var errorFakeHNS = errors.New("errorFakeHNS Error") func newErrorFakeHNS(errStr string) error { @@ -185,7 +183,9 @@ func (f Hnsv2wrapperFake) GetNetworkByName(networkName string) (*hcn.HostCompute if network, ok := f.Cache.networks[networkName]; ok { return network.GetHCNObj(), nil } - return nil, hcn.NetworkNotFoundError{} + return nil, hcn.NetworkNotFoundError{ + NetworkName: networkName, + } } func (f Hnsv2wrapperFake) GetNetworkByID(networkID string) (*hcn.HostComputeNetwork, error) { diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 6aae19e33f..1d37612502 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -15,6 +15,7 @@ const ( podCrudTag Tag = "pod-crud" nsCrudTag Tag = "namespace-crud" netpolCrudTag Tag = "netpol-crud" + calicoTag Tag = "calico" ) const ( @@ -52,6 +53,18 @@ var ( var ( defaultWindowsDPCfg = &Config{ IPSetManagerCfg: &ipsets.IPSetManagerCfg{ + NetworkName: "azure", + IPSetMode: ipsets.ApplyAllIPSets, + AddEmptySetToLists: true, + }, + PolicyManagerCfg: &policies.PolicyManagerCfg{ + PolicyMode: policies.IPSetPolicyMode, + }, + } + + windowsCalicoDPCfg = &Config{ + IPSetManagerCfg: &ipsets.IPSetManagerCfg{ + NetworkName: "calico", IPSetMode: ipsets.ApplyAllIPSets, AddEmptySetToLists: true, }, @@ -482,6 +495,56 @@ func getAllSerialTests() []*SerialTestCase { }, }, }, + { + Description: "Test base ACLs for Calico Network", + Actions: []*Action{ + CreateEndpoint(endpoint1, ip1), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + calicoTag, + podCrudTag, + }, + DpCfg: windowsCalicoDPCfg, + InitialEndpoints: nil, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-baseblockhealthprobeout", + Action: "Block", + Direction: "Out", + Priority: 200, + RemoteAddresses: "168.63.129.16/32", + RemotePorts: "80", + Protocols: "6", + }, + { + ID: "azure-acl-baseallowin", + Action: "Allow", + Direction: "In", + Priority: 65499, + Protocols: "256", + }, + { + ID: "azure-acl-baseallowout", + Action: "Allow", + Direction: "Out", + Priority: 65499, + Protocols: "256", + }, + }, + }, + }, + }, } } diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index 5fd4a18b6f..a9a7cb11e9 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -21,12 +21,15 @@ const ( func TestAllSerialCases(t *testing.T) { tests := getAllSerialTests() for i, tt := range tests { + if tt.Description != "Test base ACLs for Calico Network" { + continue + } i := i tt := tt t.Run(tt.Description, func(t *testing.T) { t.Logf("beginning test #%d. Description: [%s]. Tags: %+v", i, tt.Description, tt.Tags) - hns := ipsets.GetHNSFake(t) + hns := ipsets.GetHNSFake(t, tt.DpCfg.NetworkName) hns.Delay = defaultHNSLatency io := common.NewMockIOShimWithFakeHNS(hns) for _, ep := range tt.InitialEndpoints { @@ -61,7 +64,7 @@ func TestAllMultiJobCases(t *testing.T) { t.Run(tt.Description, func(t *testing.T) { t.Logf("beginning test #%d. Description: [%s]. Tags: %+v", i, tt.Description, tt.Tags) - hns := ipsets.GetHNSFake(t) + hns := ipsets.GetHNSFake(t, tt.DpCfg.NetworkName) hns.Delay = threadedHNSLatency io := common.NewMockIOShimWithFakeHNS(hns) for _, ep := range tt.InitialEndpoints { diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager_windows_test.go b/npm/pkg/dataplane/ipsets/ipsetmanager_windows_test.go index cd929293c9..959e36cf7e 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager_windows_test.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager_windows_test.go @@ -68,7 +68,7 @@ func TestGetIPsFromSelectorIPSets(t *testing.T) { } func TestAddToSetWindows(t *testing.T) { - hns := GetHNSFake(t) + hns := GetHNSFake(t, "azure") io := common.NewMockIOShimWithFakeHNS(hns) iMgr := NewIPSetManager(applyAlwaysCfg, io) @@ -102,7 +102,7 @@ func TestDestroyNPMIPSets(t *testing.T) { // create all possible SetTypes // FIXME because this can flake, commenting this out until we refactor with new windows testing framework // func TestApplyCreationsAndAdds(t *testing.T) { -// hns := GetHNSFake(t) +// hns := GetHNSFake(t, "azure") // io := common.NewMockIOShimWithFakeHNS(hns) // iMgr := NewIPSetManager(applyAlwaysCfg, io) @@ -182,7 +182,7 @@ func TestDestroyNPMIPSets(t *testing.T) { // } func TestApplyDeletions(t *testing.T) { - hns := GetHNSFake(t) + hns := GetHNSFake(t, "azure") io := common.NewMockIOShimWithFakeHNS(hns) iMgr := NewIPSetManager(applyAlwaysCfg, io) @@ -230,7 +230,7 @@ func TestApplyDeletions(t *testing.T) { // TODO test that a reconcile list is updated func TestFailureOnCreation(t *testing.T) { - hns := GetHNSFake(t) + hns := GetHNSFake(t, "azure") io := common.NewMockIOShimWithFakeHNS(hns) iMgr := NewIPSetManager(applyAlwaysCfg, io) @@ -268,7 +268,7 @@ func TestFailureOnCreation(t *testing.T) { // FIXME commenting this out until we refactor with new windows testing framework // func TestFailureOnAddToList(t *testing.T) { // // This exact scenario wouldn't occur. This error happens when the cache is out of date with the kernel. -// hns := GetHNSFake(t) +// hns := GetHNSFake(t, "azure") // io := common.NewMockIOShimWithFakeHNS(hns) // iMgr := NewIPSetManager(applyAlwaysCfg, io) @@ -319,7 +319,7 @@ func TestFailureOnCreation(t *testing.T) { // TODO test that a reconcile list is updated func TestFailureOnFlush(t *testing.T) { // This exact scenario wouldn't occur. This error happens when the cache is out of date with the kernel. - hns := GetHNSFake(t) + hns := GetHNSFake(t, "azure") io := common.NewMockIOShimWithFakeHNS(hns) iMgr := NewIPSetManager(applyAlwaysCfg, io) @@ -348,7 +348,7 @@ func TestFailureOnFlush(t *testing.T) { // TODO test that a reconcile list is updated func TestFailureOnDeletion(t *testing.T) { - hns := GetHNSFake(t) + hns := GetHNSFake(t, "azure") io := common.NewMockIOShimWithFakeHNS(hns) iMgr := NewIPSetManager(applyAlwaysCfg, io) diff --git a/npm/pkg/dataplane/ipsets/testutils_windows.go b/npm/pkg/dataplane/ipsets/testutils_windows.go index f5c780f981..f9235beffe 100644 --- a/npm/pkg/dataplane/ipsets/testutils_windows.go +++ b/npm/pkg/dataplane/ipsets/testutils_windows.go @@ -10,11 +10,11 @@ import ( "github.com/stretchr/testify/require" ) -func GetHNSFake(t *testing.T) *hnswrapper.Hnsv2wrapperFake { +func GetHNSFake(t *testing.T, networkName string) *hnswrapper.Hnsv2wrapperFake { hns := hnswrapper.NewHnsv2wrapperFake() network := &hcn.HostComputeNetwork{ Id: common.FakeHNSNetworkID, - Name: "azure", + Name: networkName, } _, err := hns.CreateNetwork(network) diff --git a/npm/pkg/dataplane/policies/policy_windows.go b/npm/pkg/dataplane/policies/policy_windows.go index 5f4fbd16ff..477142fecf 100644 --- a/npm/pkg/dataplane/policies/policy_windows.go +++ b/npm/pkg/dataplane/policies/policy_windows.go @@ -12,15 +12,17 @@ const ( blockRulePriotity = 3000 allowRulePriotity = 222 policyIDPrefix = "azure-acl" + + // anyProtocol is the number HNS corresponds to any protocol + anyProtocol = "256" ) var ( protocolNumMap = map[Protocol]string{ - TCP: "6", - UDP: "17", - SCTP: "132", - // HNS thinks 256 as ANY protocol - UnspecifiedProtocol: "256", + TCP: "6", + UDP: "17", + SCTP: "132", + UnspecifiedProtocol: anyProtocol, } ErrNamedPortsNotSupported = errors.New("Named Port translation is not supported in windows dataplane") diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 9cd1e2e284..a11b15d76c 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -19,6 +19,37 @@ var ( removeOnlyGivenPolicy shouldResetAllACLs = false ) +// baseACLsForCalicoCNI is a list of base ACLs that are required for connectivity on Calico CNI. +// Note: these ACLs have an ID with only one dash after the prefix so that they can't conflict with ACLs of a policy (see aclPolicyID()). +var baseACLsForCalicoCNI = []*NPMACLPolSettings{ + { + Id: fmt.Sprintf("%s-baseblockhealthprobeout", policyIDPrefix), + Action: "Block", + Direction: "Out", + Priority: 200, + RemoteAddresses: "168.63.129.16/32", + RemotePorts: "80", + Protocols: "6", + RuleType: "Switch", + }, + { + Id: fmt.Sprintf("%s-baseallowin", policyIDPrefix), + Action: "Allow", + Direction: "In", + Priority: 65499, + Protocols: anyProtocol, + RuleType: "Switch", + }, + { + Id: fmt.Sprintf("%s-baseallowout", policyIDPrefix), + Action: "Allow", + Direction: "Out", + Priority: 65499, + Protocols: anyProtocol, + RuleType: "Switch", + }, +} + type staleChains struct{} // unused in Windows type shouldResetAllACLs bool @@ -58,34 +89,7 @@ func (pMgr *PolicyManager) reconcile() { // AddBaseACLsForCalicoCNI attempts to add base ACLs for Calico CNI. func (pMgr *PolicyManager) AddBaseACLsForCalicoCNI(epID string) { - rulesToAdd := []*NPMACLPolSettings{ - { - Id: "host-allow-in", - Action: "Allow", - Direction: "In", - LocalAddresses: "", - LocalPorts: "", - Priority: 0, - Protocols: "256", - RemoteAddresses: "", - RemotePorts: "", - RuleType: "Host", - }, - { - Id: "host-allow-out", - Action: "Allow", - Direction: "Out", - LocalAddresses: "", - LocalPorts: "", - Priority: 0, - Protocols: "256", - RemoteAddresses: "", - RemotePorts: "", - RuleType: "Host", - }, - } - - epPolicyRequest, err := getEPPolicyReqFromACLSettings(rulesToAdd) + epPolicyRequest, err := getEPPolicyReqFromACLSettings(baseACLsForCalicoCNI) if err != nil { klog.Errorf("failed to get policy request for base ACLs for Calico CNI. endpoint: %s. err: %v", epID, err) return diff --git a/npm/pkg/dataplane/policies/policymanager_windows_test.go b/npm/pkg/dataplane/policies/policymanager_windows_test.go index 812c3116e7..65b3f74f1d 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows_test.go +++ b/npm/pkg/dataplane/policies/policymanager_windows_test.go @@ -183,7 +183,7 @@ func TestRemovePoliciesEndpointNotFound(t *testing.T) { // Helper functions for UTS func getPMgr(t *testing.T) (*PolicyManager, *hnswrapper.Hnsv2wrapperFake) { - hns := ipsets.GetHNSFake(t) + hns := ipsets.GetHNSFake(t, "azure") io := common.NewMockIOShimWithFakeHNS(hns) dptestutils.AddIPsToHNS(t, hns, endPointIDList) From d3d9c868d5e7460026ac217a4afe0b9903eb7919 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 19 Jan 2023 15:12:29 -0800 Subject: [PATCH 07/17] capitalize calico network name --- npm/config/config.go | 2 ++ npm/examples/windows/azure-npm-capz.yaml | 1 + npm/pkg/dataplane/ipsets/ipsetmanager.go | 32 ++++++++++++------------ npm/util/const.go | 2 +- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/npm/config/config.go b/npm/config/config.go index 77f9e2e6fc..52d7844a41 100644 --- a/npm/config/config.go +++ b/npm/config/config.go @@ -48,6 +48,8 @@ type GrpcServerConfig struct { } type Config struct { + // WindowsNetworkName can be either 'azure' or 'Calico' (case sensitive). + // It can also be the empty string, which results in the default value of 'azure'. WindowsNetworkName string `json:"WindowsNetworkName,omitempty"` ResyncPeriodInMinutes int `json:"ResyncPeriodInMinutes,omitempty"` ListeningPort int `json:"ListeningPort,omitempty"` diff --git a/npm/examples/windows/azure-npm-capz.yaml b/npm/examples/windows/azure-npm-capz.yaml index 381e1ee3e1..ee9f0a9e6a 100644 --- a/npm/examples/windows/azure-npm-capz.yaml +++ b/npm/examples/windows/azure-npm-capz.yaml @@ -141,6 +141,7 @@ metadata: data: azure-npm.json: | { + "WindowsNetworkName": "Calico", "ResyncPeriodInMinutes": 15, "ListeningPort": 10091, "ListeningAddress": "0.0.0.0", diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager.go b/npm/pkg/dataplane/ipsets/ipsetmanager.go index 7815965394..dc80dfae56 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager.go @@ -15,18 +15,18 @@ import ( type IPSetMode string /* - IPSet Modes - - - ApplyAllIPSets: - - all ipsets are added to the kernel - - ipsets are removed from the kernel when they are deleted from the cache - - creates empty ipsets - - adds empty/unreferenced ipsets to the toDelete cache periodically - - - ApplyOnNeed: - - ipsets are added to the kernel when they are referenced by network policies or lists in the kernel - - ipsets are removed from the kernel when they no longer have a reference - - removes empty/unreferenced ipsets from the cache periodically +IPSet Modes + +- ApplyAllIPSets: + - all ipsets are added to the kernel + - ipsets are removed from the kernel when they are deleted from the cache + - creates empty ipsets + - adds empty/unreferenced ipsets to the toDelete cache periodically + +- ApplyOnNeed: + - ipsets are added to the kernel when they are referenced by network policies or lists in the kernel + - ipsets are removed from the kernel when they no longer have a reference + - removes empty/unreferenced ipsets from the cache periodically */ const ( ApplyAllIPSets IPSetMode = "all" @@ -56,7 +56,7 @@ type IPSetManager struct { type IPSetManagerCfg struct { IPSetMode IPSetMode - // NetworkName can be left empty or set to 'azure' (the only supported network) + // NetworkName can be left empty or set to 'azure' or 'Calico' (case sensitive) NetworkName string // AddEmptySetToLists determines whether all lists should have an empty set as a member. // This is necessary for HNS (Windows); otherwise, an allow ACL with a list condition @@ -75,9 +75,9 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana } /* - Reconcile removes empty/unreferenced sets from the cache. - For ApplyAllIPSets mode, those sets are added to the toDeleteCache. - We can't delete from kernel immediately unless we lock iMgr during policy CRUD. +Reconcile removes empty/unreferenced sets from the cache. +For ApplyAllIPSets mode, those sets are added to the toDeleteCache. +We can't delete from kernel immediately unless we lock iMgr during policy CRUD. */ func (iMgr *IPSetManager) Reconcile() { iMgr.Lock() diff --git a/npm/util/const.go b/npm/util/const.go index 36bf220107..ff10cc53d6 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -234,7 +234,7 @@ const ( // AzureNetworkName is the default network Azure CNI creates AzureNetworkName = "azure" // CalicoNetworkName is the default network Calico CNI creates - CalicoNetworkName = "calico" + CalicoNetworkName = "Calico" ) // These ID represents where did the error log generate from. From de3b8f3f006dc855d26f0d2a98550bc24a8c373b Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 19 Jan 2023 15:55:51 -0800 Subject: [PATCH 08/17] fix connectivity. try with host allow acls --- network/hnswrapper/hnsv2wrapperfake.go | 1 + .../dataplane-test-cases_windows_test.go | 30 ++++++++++--- npm/pkg/dataplane/dataplane_windows_test.go | 3 -- .../dataplane/ipsets/ipsetmanager_windows.go | 2 +- .../policies/policymanager_windows.go | 44 ++++++++++++++++--- 5 files changed, 63 insertions(+), 17 deletions(-) diff --git a/network/hnswrapper/hnsv2wrapperfake.go b/network/hnswrapper/hnsv2wrapperfake.go index ff378255a6..7525e9dbe3 100644 --- a/network/hnswrapper/hnsv2wrapperfake.go +++ b/network/hnswrapper/hnsv2wrapperfake.go @@ -528,4 +528,5 @@ type FakeEndpointPolicy struct { LocalPorts string `json:",omitempty"` RemotePorts string `json:",omitempty"` Priority int `json:",omitempty"` + // FIXME should include RuleType too, but that will require updating every instance of this struct in UTs } diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 1d37612502..9e4a3f99e9 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -64,7 +64,7 @@ var ( windowsCalicoDPCfg = &Config{ IPSetManagerCfg: &ipsets.IPSetManagerCfg{ - NetworkName: "calico", + NetworkName: "Calico", IPSetMode: ipsets.ApplyAllIPSets, AddEmptySetToLists: true, }, @@ -519,7 +519,7 @@ func getAllSerialTests() []*SerialTestCase { ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ endpoint1: { { - ID: "azure-acl-baseblockhealthprobeout", + ID: "azure-acl-baseazurewireserver", Action: "Block", Direction: "Out", Priority: 200, @@ -528,18 +528,36 @@ func getAllSerialTests() []*SerialTestCase { Protocols: "6", }, { - ID: "azure-acl-baseallowin", + ID: "azure-acl-baseallowinswitch", Action: "Allow", Direction: "In", Priority: 65499, - Protocols: "256", }, { - ID: "azure-acl-baseallowout", + ID: "azure-acl-baseallowoutswitch", Action: "Allow", Direction: "Out", Priority: 65499, - Protocols: "256", + }, + { + ID: "azure-acl-baseallowinhost", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", + }, + { + ID: "azure-acl-baseallowouthost", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", }, }, }, diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index a9a7cb11e9..d54ea75b38 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -21,9 +21,6 @@ const ( func TestAllSerialCases(t *testing.T) { tests := getAllSerialTests() for i, tt := range tests { - if tt.Description != "Test base ACLs for Calico Network" { - continue - } i := i tt := tt t.Run(tt.Description, func(t *testing.T) { diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go b/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go index 17008bc3a6..a829520976 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager_windows.go @@ -20,7 +20,7 @@ const ( donotResetIPSets = false ) -var errUnsupportedNetwork = errors.New("only 'azure' and 'calico' networks are supported") +var errUnsupportedNetwork = errors.New("only 'azure' and 'Calico' networks are supported") type networkPolicyBuilder struct { toAddSets map[string]*hcn.SetPolicySetting diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index a11b15d76c..f8e5b6cb55 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -23,7 +23,7 @@ var ( // Note: these ACLs have an ID with only one dash after the prefix so that they can't conflict with ACLs of a policy (see aclPolicyID()). var baseACLsForCalicoCNI = []*NPMACLPolSettings{ { - Id: fmt.Sprintf("%s-baseblockhealthprobeout", policyIDPrefix), + Id: fmt.Sprintf("%s-baseazurewireserver", policyIDPrefix), Action: "Block", Direction: "Out", Priority: 200, @@ -33,20 +33,50 @@ var baseACLsForCalicoCNI = []*NPMACLPolSettings{ RuleType: "Switch", }, { - Id: fmt.Sprintf("%s-baseallowin", policyIDPrefix), + Id: fmt.Sprintf("%s-baseallowinswitch", policyIDPrefix), Action: "Allow", Direction: "In", Priority: 65499, - Protocols: anyProtocol, - RuleType: "Switch", }, { - Id: fmt.Sprintf("%s-baseallowout", policyIDPrefix), + Id: fmt.Sprintf("%s-baseallowoutswitch", policyIDPrefix), Action: "Allow", Direction: "Out", Priority: 65499, - Protocols: anyProtocol, - RuleType: "Switch", + }, + { + Id: fmt.Sprintf("%s-baseallowinhost", policyIDPrefix), + Action: "Allow", + Direction: "In", + // unsupported for NPMACLPolSettings + // InternalPort: 0, + LocalAddresses: "", + // unsupported for NPMACLPolSettings (note no 's') + // LocalPort: "0", + Priority: 0, + // unsupported for NPMACLPolSettings (note no 's') + // Protocol: "256", + RemoteAddresses: "", + // unsupported for NPMACLPolSettings (note no 's') + // RemotePort: "0", + RuleType: "Host", + }, + { + Id: fmt.Sprintf("%s-baseallowouthost", policyIDPrefix), + Action: "Allow", + Direction: "Out", + // unsupported for NPMACLPolSettings + // InternalPort: 0, + LocalAddresses: "", + // unsupported for NPMACLPolSettings (note no 's') + // LocalPort: "0", + Priority: 0, + // unsupported for NPMACLPolSettings (note no 's') + // Protocol: "256", + RemoteAddresses: "", + // unsupported for NPMACLPolSettings (note no 's') + // RemotePort: "0", + RuleType: "Host", }, } From 19d3af7408a151d240f67304d59efe7621dc1a22 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 19 Jan 2023 17:10:37 -0800 Subject: [PATCH 09/17] revert change to policy_windows.go --- npm/pkg/dataplane/policies/policy_windows.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/npm/pkg/dataplane/policies/policy_windows.go b/npm/pkg/dataplane/policies/policy_windows.go index 477142fecf..5f4fbd16ff 100644 --- a/npm/pkg/dataplane/policies/policy_windows.go +++ b/npm/pkg/dataplane/policies/policy_windows.go @@ -12,17 +12,15 @@ const ( blockRulePriotity = 3000 allowRulePriotity = 222 policyIDPrefix = "azure-acl" - - // anyProtocol is the number HNS corresponds to any protocol - anyProtocol = "256" ) var ( protocolNumMap = map[Protocol]string{ - TCP: "6", - UDP: "17", - SCTP: "132", - UnspecifiedProtocol: anyProtocol, + TCP: "6", + UDP: "17", + SCTP: "132", + // HNS thinks 256 as ANY protocol + UnspecifiedProtocol: "256", } ErrNamedPortsNotSupported = errors.New("Named Port translation is not supported in windows dataplane") From be5466c8b296aae896f43fb93a0cd4199d2528fc Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 19 Jan 2023 17:21:54 -0800 Subject: [PATCH 10/17] more UTs and add base ACLs for other "new endpoint" scenario --- .../dataplane-test-cases_windows_test.go | 165 +++++++++++++++++- npm/pkg/dataplane/dataplane_windows.go | 7 + npm/pkg/dataplane/dataplane_windows_test.go | 3 + 3 files changed, 174 insertions(+), 1 deletion(-) diff --git a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go index 9e4a3f99e9..cb90e312c6 100644 --- a/npm/pkg/dataplane/dataplane-test-cases_windows_test.go +++ b/npm/pkg/dataplane/dataplane-test-cases_windows_test.go @@ -496,7 +496,7 @@ func getAllSerialTests() []*SerialTestCase { }, }, { - Description: "Test base ACLs for Calico Network", + Description: "Calico Network: base ACLs", Actions: []*Action{ CreateEndpoint(endpoint1, ip1), CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), @@ -563,6 +563,169 @@ func getAllSerialTests() []*SerialTestCase { }, }, }, + { + Description: "Calico Network: add netpol", + Actions: []*Action{ + CreateEndpoint(endpoint1, ip1), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePolicy(policyXBaseOnK1V1()), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + calicoTag, + podCrudTag, + netpolCrudTag, + }, + DpCfg: windowsCalicoDPCfg, + InitialEndpoints: nil, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-baseazurewireserver", + Action: "Block", + Direction: "Out", + Priority: 200, + RemoteAddresses: "168.63.129.16/32", + RemotePorts: "80", + Protocols: "6", + }, + { + ID: "azure-acl-baseallowinswitch", + Action: "Allow", + Direction: "In", + Priority: 65499, + }, + { + ID: "azure-acl-baseallowoutswitch", + Action: "Allow", + Direction: "Out", + Priority: 65499, + }, + { + ID: "azure-acl-baseallowinhost", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", + }, + { + ID: "azure-acl-baseallowouthost", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", + }, + { + ID: "azure-acl-x-base", + Protocols: "", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + { + ID: "azure-acl-x-base", + Protocols: "", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + RemoteAddresses: "", + LocalPorts: "", + RemotePorts: "", + Priority: 222, + }, + }, + }, + }, + }, + { + Description: "Calico Network: add then remove netpol", + Actions: []*Action{ + CreateEndpoint(endpoint1, ip1), + CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}), + ApplyDP(), + UpdatePolicy(policyXBaseOnK1V1()), + DeletePolicyByObject(policyXBaseOnK1V1()), + }, + TestCaseMetadata: &TestCaseMetadata{ + Tags: []Tag{ + calicoTag, + podCrudTag, + netpolCrudTag, + }, + DpCfg: windowsCalicoDPCfg, + InitialEndpoints: nil, + ExpectedSetPolicies: []*hcn.SetPolicySetting{ + dptestutils.SetPolicy(emptySet), + dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()), + dptestutils.SetPolicy(nsXSet, ip1), + dptestutils.SetPolicy(podK1Set, ip1), + dptestutils.SetPolicy(podK1V1Set, ip1), + }, + ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{ + endpoint1: { + { + ID: "azure-acl-baseazurewireserver", + Action: "Block", + Direction: "Out", + Priority: 200, + RemoteAddresses: "168.63.129.16/32", + RemotePorts: "80", + Protocols: "6", + }, + { + ID: "azure-acl-baseallowinswitch", + Action: "Allow", + Direction: "In", + Priority: 65499, + }, + { + ID: "azure-acl-baseallowoutswitch", + Action: "Allow", + Direction: "Out", + Priority: 65499, + }, + { + ID: "azure-acl-baseallowinhost", + Action: "Allow", + Direction: "In", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", + }, + { + ID: "azure-acl-baseallowouthost", + Action: "Allow", + Direction: "Out", + LocalAddresses: "", + Priority: 0, + RemoteAddresses: "", + // RuleType is unsupported in FakeEndpointPolicy + // RuleType: "Host", + }, + }, + }, + }, + }, } } diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index ffc7a09448..082ed5f5e2 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -390,6 +390,13 @@ func (dp *DataPlane) refreshPodEndpoints() error { // NOTE: TSGs rely on this log line klog.Infof("updating endpoint cache for previously cached IP %s: %+v with stalePodKey %+v", npmEP.ip, npmEP, npmEP.stalePodKey) } + + if dp.NetworkName == util.CalicoNetworkName { + // NOTE 1: connectivity may be broken for an endpoint until this method is called + // NOTE 2: if NPM restarted, technically we could call into HNS to add the base ACLs even if they already exist on the Endpoint. + // It doesn't seem worthwhile to account for these edge-cases since using calico network is currently intended just for testing + dp.policyMgr.AddBaseACLsForCalicoCNI(npmEP.id) + } } } diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index d54ea75b38..19cebcc0d2 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -21,6 +21,9 @@ const ( func TestAllSerialCases(t *testing.T) { tests := getAllSerialTests() for i, tt := range tests { + if tt.Tags[0] != calicoTag { + continue + } i := i tt := tt t.Run(tt.Description, func(t *testing.T) { From 1d0d4c3e82e287143d0a034ff08d0799c4844903 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 20 Jan 2023 09:18:50 -0800 Subject: [PATCH 11/17] run all UTs --- npm/pkg/dataplane/dataplane_windows_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index 19cebcc0d2..d54ea75b38 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -21,9 +21,6 @@ const ( func TestAllSerialCases(t *testing.T) { tests := getAllSerialTests() for i, tt := range tests { - if tt.Tags[0] != calicoTag { - continue - } i := i tt := tt t.Run(tt.Description, func(t *testing.T) { From aa0dba74e61d2fb7e255d316f4019720ae5ddb36 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 20 Jan 2023 09:21:22 -0800 Subject: [PATCH 12/17] update npm image to .42 --- npm/examples/windows/azure-npm-capz.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/npm/examples/windows/azure-npm-capz.yaml b/npm/examples/windows/azure-npm-capz.yaml index ee9f0a9e6a..b6c703aa00 100644 --- a/npm/examples/windows/azure-npm-capz.yaml +++ b/npm/examples/windows/azure-npm-capz.yaml @@ -83,7 +83,8 @@ spec: hostNetwork: true containers: - name: azure-npm - image: mcr.microsoft.com/containernetworking/azure-npm:v1.4.41 # setting to future version since it will use the updated Dockerfile + # setting to future version since it will use the updated Dockerfile + image: mcr.microsoft.com/containernetworking/azure-npm:v1.4.42 command: ["powershell.exe"] args: [ From b6f6e0a16e2952d96d37cae875893fdef9b3ec35 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 27 Jan 2023 15:27:15 -0800 Subject: [PATCH 13/17] add log line --- npm/pkg/dataplane/dataplane_windows.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 082ed5f5e2..b66bbbb426 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -371,6 +371,7 @@ func (dp *DataPlane) refreshPodEndpoints() error { // NOTE 1: connectivity may be broken for an endpoint until this method is called // NOTE 2: if NPM restarted, technically we could call into HNS to add the base ACLs even if they already exist on the Endpoint. // It doesn't seem worthwhile to account for these edge-cases since using calico network is currently intended just for testing + klog.Infof("adding base ACLs for calico CNI endpoint. IP: %s. ID: %s", ip, npmEP.id) dp.policyMgr.AddBaseACLsForCalicoCNI(npmEP.id) } } else if oldNPMEP.id != endpoint.Id { @@ -395,6 +396,7 @@ func (dp *DataPlane) refreshPodEndpoints() error { // NOTE 1: connectivity may be broken for an endpoint until this method is called // NOTE 2: if NPM restarted, technically we could call into HNS to add the base ACLs even if they already exist on the Endpoint. // It doesn't seem worthwhile to account for these edge-cases since using calico network is currently intended just for testing + klog.Infof("adding base ACLs for calico CNI endpoint. IP: %s. ID: %s", ip, npmEP.id) dp.policyMgr.AddBaseACLsForCalicoCNI(npmEP.id) } } From e1014822d500aa22f02359d09f04e10c92933b2e Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Fri, 27 Jan 2023 15:27:52 -0800 Subject: [PATCH 14/17] allow traffic going inter-node --- npm/pkg/dataplane/policies/policymanager_windows.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index f8e5b6cb55..33f1956bef 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -37,12 +37,14 @@ var baseACLsForCalicoCNI = []*NPMACLPolSettings{ Action: "Allow", Direction: "In", Priority: 65499, + RuleType: "Switch", }, { Id: fmt.Sprintf("%s-baseallowoutswitch", policyIDPrefix), Action: "Allow", Direction: "Out", Priority: 65499, + RuleType: "Switch", }, { Id: fmt.Sprintf("%s-baseallowinhost", policyIDPrefix), From ca7c4616c6012e4e038563a6ac8819391772e37d Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 1 Feb 2023 17:23:32 -0800 Subject: [PATCH 15/17] Revert "allow traffic going inter-node" This reverts commit e1014822d500aa22f02359d09f04e10c92933b2e. --- npm/pkg/dataplane/policies/policymanager_windows.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index 33f1956bef..f8e5b6cb55 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -37,14 +37,12 @@ var baseACLsForCalicoCNI = []*NPMACLPolSettings{ Action: "Allow", Direction: "In", Priority: 65499, - RuleType: "Switch", }, { Id: fmt.Sprintf("%s-baseallowoutswitch", policyIDPrefix), Action: "Allow", Direction: "Out", Priority: 65499, - RuleType: "Switch", }, { Id: fmt.Sprintf("%s-baseallowinhost", policyIDPrefix), From 61ed2f4a74facc600ffe858f0758f8c258e9d045 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 14 Feb 2023 16:55:01 -0800 Subject: [PATCH 16/17] add long-runner pod for testing vfp tags in capz --- .../windows/long-running-pod-for-capz.yaml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 npm/examples/windows/long-running-pod-for-capz.yaml diff --git a/npm/examples/windows/long-running-pod-for-capz.yaml b/npm/examples/windows/long-running-pod-for-capz.yaml new file mode 100644 index 0000000000..9d256daab9 --- /dev/null +++ b/npm/examples/windows/long-running-pod-for-capz.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: long-runner + namespace: npm-e2e-longrunner +spec: + replicas: 1 + selector: + matchLabels: + app: long-runner + template: + metadata: + labels: + app: long-runner + spec: + containers: + - command: + - /agnhost + - serve-hostname + - --tcp + - --http=false + - --port + - "80" + image: k8s.gcr.io/e2e-test-images/agnhost:2.33 + imagePullPolicy: IfNotPresent + name: cont-80-tcp + ports: + - containerPort: 80 + name: serve-80-tcp + protocol: TCP From 5712887dfbb46d04530f869269a9e24e4897a1a0 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Tue, 14 Feb 2023 17:24:02 -0800 Subject: [PATCH 17/17] fix lints --- npm/pkg/dataplane/dataplane_windows.go | 4 +- .../policies/policymanager_windows.go | 38 +++++++++++-------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/npm/pkg/dataplane/dataplane_windows.go b/npm/pkg/dataplane/dataplane_windows.go index 6724437f1b..05b764ac0a 100644 --- a/npm/pkg/dataplane/dataplane_windows.go +++ b/npm/pkg/dataplane/dataplane_windows.go @@ -443,6 +443,6 @@ func (dp *DataPlane) setNetworkIDByName(networkName string) error { } func isNetworkNotFoundErr(err error) bool { - return strings.Contains(err.Error(), fmt.Sprintf("Network name \"%s\" not found", util.AzureNetworkName)) || - strings.Contains(err.Error(), fmt.Sprintf("Network name \"%s\" not found", util.CalicoNetworkName)) + return strings.Contains(err.Error(), fmt.Sprintf("Network name %q not found", util.AzureNetworkName)) || + strings.Contains(err.Error(), fmt.Sprintf("Network name %q not found", util.CalicoNetworkName)) } diff --git a/npm/pkg/dataplane/policies/policymanager_windows.go b/npm/pkg/dataplane/policies/policymanager_windows.go index f8e5b6cb55..92bbd4fbc2 100644 --- a/npm/pkg/dataplane/policies/policymanager_windows.go +++ b/npm/pkg/dataplane/policies/policymanager_windows.go @@ -12,6 +12,12 @@ import ( "k8s.io/klog" ) +const ( + // for lints + priority200 = 200 + priority65499 = 65499 +) + var ( ErrFailedMarshalACLSettings = errors.New("failed to marshal ACL settings") ErrFailedUnMarshalACLSettings = errors.New("failed to unmarshal ACL settings") @@ -24,30 +30,30 @@ var ( var baseACLsForCalicoCNI = []*NPMACLPolSettings{ { Id: fmt.Sprintf("%s-baseazurewireserver", policyIDPrefix), - Action: "Block", - Direction: "Out", - Priority: 200, + Action: hcn.ActionTypeBlock, + Direction: hcn.DirectionTypeOut, + Priority: priority200, RemoteAddresses: "168.63.129.16/32", RemotePorts: "80", Protocols: "6", - RuleType: "Switch", + RuleType: hcn.RuleTypeSwitch, }, { Id: fmt.Sprintf("%s-baseallowinswitch", policyIDPrefix), - Action: "Allow", - Direction: "In", - Priority: 65499, + Action: hcn.ActionTypeAllow, + Direction: hcn.DirectionTypeIn, + Priority: priority65499, }, { Id: fmt.Sprintf("%s-baseallowoutswitch", policyIDPrefix), - Action: "Allow", - Direction: "Out", - Priority: 65499, + Action: hcn.ActionTypeAllow, + Direction: hcn.DirectionTypeOut, + Priority: priority65499, }, { Id: fmt.Sprintf("%s-baseallowinhost", policyIDPrefix), - Action: "Allow", - Direction: "In", + Action: hcn.ActionTypeAllow, + Direction: hcn.DirectionTypeIn, // unsupported for NPMACLPolSettings // InternalPort: 0, LocalAddresses: "", @@ -59,12 +65,12 @@ var baseACLsForCalicoCNI = []*NPMACLPolSettings{ RemoteAddresses: "", // unsupported for NPMACLPolSettings (note no 's') // RemotePort: "0", - RuleType: "Host", + RuleType: hcn.RuleTypeHost, }, { Id: fmt.Sprintf("%s-baseallowouthost", policyIDPrefix), - Action: "Allow", - Direction: "Out", + Action: hcn.ActionTypeAllow, + Direction: hcn.DirectionTypeOut, // unsupported for NPMACLPolSettings // InternalPort: 0, LocalAddresses: "", @@ -76,7 +82,7 @@ var baseACLsForCalicoCNI = []*NPMACLPolSettings{ RemoteAddresses: "", // unsupported for NPMACLPolSettings (note no 's') // RemotePort: "0", - RuleType: "Host", + RuleType: hcn.RuleTypeHost, }, }