From 5d25eb1108050c23b8f142c58fcaa87a389bfd8e Mon Sep 17 00:00:00 2001 From: John Payne <89417863+jpayne3506@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:43:04 -0700 Subject: [PATCH] Release/v1.4 Remove AKS-Engine (#2212) * build azure-vnet-telemetry and azure-vnet-ipam in dropgz-test (#1846) build azure-vnet-telemetry and azure-vnet-ipam in dropgz-test for parity with release image Signed-off-by: Evan Baker (cherry picked from commit f6192596134cd98f090846a9e9493b19c814c749) * ci: disable kube-proxy for test clusters (#1965) * disable kube-proxy for byocni cluster creation * test config mapping * shell pwd * use CURDIR * check current directory * test with repo root dir * test azp format * test azp format * test azp format * change e2e steps to remove kube proxy * fix load test update args * fix ns and rg in update * update ciliume2e * fix kubectl cmd in load test * adding new targets for no kube proxy * remove cluster update * update overlay e2e * test behavior of load test * test grep for azure-cns * look for container deployment * testing * restart node variable check * update if condition * add skip node case --------- Co-authored-by: tamilmani1989 (cherry picked from commit 024819d7747f21e26567c359fa73fda9810dee40) * CI: [CNI] Replace the bash scripts for CNI load testing with golang test cases (#2003) CI:[CNI] Replace the bash scripts with the golang test cases (cherry picked from commit 008ae45f9ba63cba6ef7ab848e7c57b0737be966) * ci: [CNI] Move Nightly Cilium Pipeline test to ACN (#1963) * CNS to be able to generate dualstack overaly CNI conflist (#1981) * fix: Eliminating duplicate lines * ci: Add update permission for ciliumidentity * fix: Parameterize Image Registry add retry to nnc update during scaledown (#1970) * add retry to nnc update during scaledown Signed-off-by: Evan Baker * test for panic in pool monitor Signed-off-by: Evan Baker --------- Signed-off-by: Evan Baker fix: reserve 0th IP as gateway for overlay on Windows (#1968) * fix: reserve 0th IP as gateway for overlay on Windows * fix: allow gateway to be updated ci: windows profile container image (#1988) Always use 0 for NC version in Overlay (#1979) always use 0 for NC version in overlay Signed-off-by: Evan Baker [Vnet Scale - CNS]: Flattening CIDR ranges for Node NNC to a list (#1921) * Read secondary CIDRs from VnetScale NNC * fix comment * update comment * For VnetScale mode, Use 1st IP for def gateway instead of 0th for windows * fix/add import * address pr comments * add comments * address pr comments * wrap error * fix typo * fix UT fix: [NPM] check if policy exists in case of nil pointer (#1974) fix: check for nil first ci: disable kube-proxy for test clusters (#1965) * disable kube-proxy for byocni cluster creation * test config mapping * shell pwd * use CURDIR * check current directory * test with repo root dir * test azp format * test azp format * test azp format * change e2e steps to remove kube proxy * fix load test update args * fix ns and rg in update * update ciliume2e * fix kubectl cmd in load test * adding new targets for no kube proxy * remove cluster update * update overlay e2e * test behavior of load test * test grep for azure-cns * look for container deployment * testing * restart node variable check * update if condition * add skip node case --------- Co-authored-by: tamilmani1989 perf: [WIN-NPM] fast bootup (#1900) * wip * wip2 * use other apply DP func * address comment about if statement * finish bootup for both DPs * fix lint * fix lint 2 * fix lint 3 * longer UT timeout and add missing UTs for apply in background tool: [NPM] script to clean up iptable chains (#1978) tool: script to clean up NPM iptable chains feat: [WIN-NPM] metrics for latencies and failures (#1959) * implement metrics * add npm prefix * rename windows files * metrics pkg UTs * allow reinitializing prometheus metrics * fix: hns wrapper should not throw error for empty SetPolicy values * test: metric UTs in dataplane * fix: record list endpoint latency always * remove flaky UT * feat: metric for max ipset members * fix lint * fix lint 2 * fix build * fix lint 3 * simplify conditionals and protect against maxMembers becoming negative * remove bottom 4 histogram buckets. start at 16 ms * reset metrics for ipset UTs * style: don't check for windows dp in *_windows.go files * build: remove unused import * test: reset windows metrics in UT Remove SSH port 22 rule from aks-engine clusters (#1983) ci: change overlaye2e stage to cilium-overlay (#1997) * renaming overlaye2e for cilium * update display names for stages Initial getHomeAZ 404 changes (#1994) * initial getHomeAZ 404 changes * treat 404 as success * address comments CNS to be able to generate dualstack overaly CNI conflist (#1981) fix: Parameterize Image Registry add retry to nnc update during scaledown (#1970) * add retry to nnc update during scaledown Signed-off-by: Evan Baker * test for panic in pool monitor Signed-off-by: Evan Baker --------- Signed-off-by: Evan Baker fix: reserve 0th IP as gateway for overlay on Windows (#1968) * fix: reserve 0th IP as gateway for overlay on Windows * fix: allow gateway to be updated ci: windows profile container image (#1988) Always use 0 for NC version in Overlay (#1979) always use 0 for NC version in overlay Signed-off-by: Evan Baker [Vnet Scale - CNS]: Flattening CIDR ranges for Node NNC to a list (#1921) * Read secondary CIDRs from VnetScale NNC * fix comment * update comment * For VnetScale mode, Use 1st IP for def gateway instead of 0th for windows * fix/add import * address pr comments * add comments * address pr comments * wrap error * fix typo * fix UT fix: [NPM] check if policy exists in case of nil pointer (#1974) fix: check for nil first ci: disable kube-proxy for test clusters (#1965) * disable kube-proxy for byocni cluster creation * test config mapping * shell pwd * use CURDIR * check current directory * test with repo root dir * test azp format * test azp format * test azp format * change e2e steps to remove kube proxy * fix load test update args * fix ns and rg in update * update ciliume2e * fix kubectl cmd in load test * adding new targets for no kube proxy * remove cluster update * update overlay e2e * test behavior of load test * test grep for azure-cns * look for container deployment * testing * restart node variable check * update if condition * add skip node case --------- Co-authored-by: tamilmani1989 perf: [WIN-NPM] fast bootup (#1900) * wip * wip2 * use other apply DP func * address comment about if statement * finish bootup for both DPs * fix lint * fix lint 2 * fix lint 3 * longer UT timeout and add missing UTs for apply in background tool: [NPM] script to clean up iptable chains (#1978) tool: script to clean up NPM iptable chains feat: [WIN-NPM] metrics for latencies and failures (#1959) * implement metrics * add npm prefix * rename windows files * metrics pkg UTs * allow reinitializing prometheus metrics * fix: hns wrapper should not throw error for empty SetPolicy values * test: metric UTs in dataplane * fix: record list endpoint latency always * remove flaky UT * feat: metric for max ipset members * fix lint * fix lint 2 * fix build * fix lint 3 * simplify conditionals and protect against maxMembers becoming negative * remove bottom 4 histogram buckets. start at 16 ms * reset metrics for ipset UTs * style: don't check for windows dp in *_windows.go files * build: remove unused import * test: reset windows metrics in UT Remove SSH port 22 rule from aks-engine clusters (#1983) ci: change overlaye2e stage to cilium-overlay (#1997) * renaming overlaye2e for cilium * update display names for stages Initial getHomeAZ 404 changes (#1994) * initial getHomeAZ 404 changes * treat 404 as success * address comments CNS to be able to generate dualstack overaly CNI conflist (#1981) * fix: File Directory * style: Comments * Addressing Comments --------- Co-authored-by: Paul Johnston <35265851+pjohnst5@users.noreply.github.com> (cherry picked from commit 1514d95cce4601ac9750a07234e4b481b32d242d) * ci:[CNI] Add windows CNIv1 datapath test (#2016) * ci: Transfer files * test: Working Datapath Test * test: apierror Tests * style: Datapath Package * test: Deployment timing * fix: Error check * fix: Lint (cherry picked from commit 390977d5c2128fdd57c39c70a1fdb79035edfb70) * fix: [CNI] CNI load test failing due to namespace already created (#2031) fix: CNI load test failing due to namespace already created (cherry picked from commit c10900e68acd6e41d50a5c09ead1cfca7c4463e3) * ci:[CNI] Windows cniv1 load test pipeline (#2024) CI:[CNI] Windows cniv1 load test pipeline (cherry picked from commit e45ad213d5e26b681271fd4d898c0dcbda63715f) * ci: [CNI] Adding aks cluster creation steps for k8s e2e test (#2052) * ci: [CNI] Adding aks cluster creation steps for k8s e2e test * Add validate step to the pipeline * Adding the telemetry config to the cluster (cherry picked from commit 846e508b17d0ff292693744c1db5ba932692a6f7) * ci:[CNI] Replace AKS-Engine Tests with k8s conformance tests (#2062) * Initial Commit * Add attempts to prevent flakyness * Add taint for windows tests * Add k8s e2e tests * Testing vmSizes * Artifact k8se2e binary * Remove NPM E2E * Add testing and increase processes * Addressing comments (cherry picked from commit 451c691a57fafd2b6a99cb302a6f89dc7c1d3d94) * CI: Removing AKS engine related code (#2089) (cherry picked from commit b45c2c71c6a630524ffb8e364428eced7f460649) * feat: [dropgz] Dropgz for windows (#2075) * feat: [dropgz] Dropgz for windows * Removing the code for killing the process from dropgz for windows (cherry picked from commit 7a41178f497b829d9837a2cfcff6d5cf07d06677) * ci: Update dns tests for k8s conformance (#2104) Update dns tests for k8s v1.26 (cherry picked from commit bbf2fd43c7e342f43f7e01ebc2cfad0b07f1b4fd) * ci: adding cni package as a trigger (#2108) (cherry picked from commit e6a8ea6be7e3d660deb5c4249de540d0e814347f) * ci: add packages for submodule trigger (#2154) (cherry picked from commit 4aecfd6a8d69c5abcafd0941f48d45ee9afe9b46) * set mellanox reg key (#1768) (cherry picked from commit fa2de6d579da6550588dee94675daae5d15ab5c2) --------- Co-authored-by: Evan Baker Co-authored-by: Camryn Lee <31013536+camrynl@users.noreply.github.com> Co-authored-by: Vipul Singh Co-authored-by: Rajvi <107083915+rajvinar@users.noreply.github.com> --- .../cni/cilium/cilium-cni-load-test.yaml | 54 ++-- .../cni/cilium/nightly-release-test.yml | 54 ++++ .../cni/k8s-e2e/k8s-e2e-job-template.yaml | 111 +++++++++ .../cni/k8s-e2e/k8s-e2e-step-template.yaml | 53 ++++ .../create-cluster-template.yaml | 20 ++ .../pod-deployment-template.yaml | 20 ++ .../restart-node-template.yaml | 26 ++ .../validate-state-template.yaml | 19 ++ .pipelines/cni/pipeline.yaml | 37 +++ .../windows-cni-load-test-template.yaml | 144 +++++++++++ .pipelines/npm/npm-cni-integration-test.yaml | 93 +++++++ .pipelines/pipeline.yaml | 57 +++-- .../e2e-dualstack-job-template.yaml | 154 ------------ .../aks-engine/e2e-job-template.yaml | 45 ---- .../aks-engine/e2e-step-template.yaml | 135 ---------- .../aks-swift/e2e-step-template.yaml | 12 +- .../singletenancy/aks/e2e-job-template.yaml | 88 +++++++ .../singletenancy/aks/e2e-step-template.yaml | 87 +++++++ .../cilium-overlay-e2e-step-template.yaml | 46 +++- .../cilium/cilium-e2e-step-template.yaml | 28 ++- .pipelines/submodules-pipeline.yaml | 49 ++-- Makefile | 29 ++- cni/build/windows.Dockerfile | 21 ++ cns/configuration/cns_config.json | 3 +- cns/configuration/configuration.go | 1 + cns/service/main.go | 11 + ...st.Dockerfile => cniTest_linux.Dockerfile} | 5 + dropgz/build/cniTest_windows.Dockerfile | 30 +++ dropgz/build/windows.Dockerfile | 36 +++ dropgz/pkg/embed/payload.go | 13 +- hack/{swift => aks}/Makefile | 80 +++++- hack/{swift => aks}/README.md | 1 + hack/aks/kube-proxy.json | 10 + hack/scripts/scale_deployment.sh | 74 ------ hack/scripts/updatecni.ps1 | 57 +++++ hack/scripts/validate_state.sh | 138 ----------- platform/Makefile | 11 + platform/os_linux.go | 8 + platform/os_windows.go | 71 ++++++ platform/windows/adapter/mellanox/mellanox.go | 222 +++++++++++++++++ .../adapter/mocks/networkadapter_generated.go | 78 ++++++ platform/windows/adapter/network_adapter.go | 16 ++ test/apimodels/cniLinux1804.json | 61 ----- test/apimodels/cniLinuxDualstack1804.json | 64 ----- test/apimodels/cniWindows1903.json | 74 ------ test/apimodels/cniWindows2004.json | 62 ----- test/apimodels/cniWindows2022.json | 74 ------ test/apimodels/cniWindowsDualstack2004.json | 70 ------ .../integration/datapath/datapath_win_test.go | 181 ++++++++++++++ test/integration/k8s_test.go | 24 +- test/integration/load/load_test.go | 180 ++++++++++++++ .../cilium/cilium-agent/clusterrole.yaml | 7 + .../cilium-nightly-agent/clusterrole.yaml | 104 ++++++++ .../clusterrolebinding.yaml | 12 + .../cilium-nightly-agent/serviceaccount.yaml | 6 + .../cilium/cilium-nightly-config.yaml | 89 +++++++ .../cilium-nightly-operator/clusterrole.yaml | 184 ++++++++++++++ .../clusterrolebinding.yaml | 12 + .../serviceaccount.yaml | 5 + .../cilium/cilium-operator/clusterrole.yaml | 7 + .../cilium/{cilium-agent => }/daemonset.yaml | 12 +- .../{cilium-operator => }/deployment.yaml | 4 +- .../cni/cni-installer-v1-windows.yaml | 87 +++++++ .../manifests/cni/cni-installer-v1.yaml | 79 ++++++ test/integration/manifests/cns/daemonset.yaml | 2 +- .../datapath/windows-deployment.yaml | 22 ++ .../load/privileged-daemonset-windows.yaml | 32 +++ .../manifests/load/privileged-daemonset.yaml | 0 .../manifests/noop-deployment-linux.yaml | 13 +- .../manifests/noop-deployment-windows.yaml | 23 ++ test/integration/setup_test.go | 32 +-- test/integration/utils_delete_test.go | 32 --- test/internal/datapath/datapath_win.go | 178 ++++++++++++++ .../k8sutils}/label.go | 2 +- .../k8sutils/utils.go} | 139 +++++++++-- .../k8sutils/utils_create.go} | 63 ++++- test/internal/k8sutils/utils_delete.go | 52 ++++ test/internal/k8sutils/utils_get.go | 51 ++++ .../k8sutils/utils_parse.go} | 15 +- test/{integration => internal}/retry/retry.go | 2 - test/validate/client.go | 40 +++ test/validate/linux_validate.go | 232 ++++++++++++++++++ test/validate/utils.go | 39 +++ test/validate/windows_validate.go | 228 +++++++++++++++++ 84 files changed, 3669 insertions(+), 1173 deletions(-) create mode 100644 .pipelines/cni/cilium/nightly-release-test.yml create mode 100644 .pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml create mode 100644 .pipelines/cni/k8s-e2e/k8s-e2e-step-template.yaml create mode 100644 .pipelines/cni/load-test-templates/create-cluster-template.yaml create mode 100644 .pipelines/cni/load-test-templates/pod-deployment-template.yaml create mode 100644 .pipelines/cni/load-test-templates/restart-node-template.yaml create mode 100644 .pipelines/cni/load-test-templates/validate-state-template.yaml create mode 100644 .pipelines/cni/pipeline.yaml create mode 100644 .pipelines/cni/singletenancy/windows-cni-load-test-template.yaml create mode 100644 .pipelines/npm/npm-cni-integration-test.yaml delete mode 100644 .pipelines/singletenancy/aks-engine/e2e-dualstack-job-template.yaml delete mode 100644 .pipelines/singletenancy/aks-engine/e2e-job-template.yaml delete mode 100644 .pipelines/singletenancy/aks-engine/e2e-step-template.yaml create mode 100644 .pipelines/singletenancy/aks/e2e-job-template.yaml create mode 100644 .pipelines/singletenancy/aks/e2e-step-template.yaml create mode 100644 cni/build/windows.Dockerfile rename dropgz/build/{cniTest.Dockerfile => cniTest_linux.Dockerfile} (74%) create mode 100644 dropgz/build/cniTest_windows.Dockerfile create mode 100644 dropgz/build/windows.Dockerfile rename hack/{swift => aks}/Makefile (68%) rename hack/{swift => aks}/README.md (95%) create mode 100644 hack/aks/kube-proxy.json delete mode 100644 hack/scripts/scale_deployment.sh create mode 100644 hack/scripts/updatecni.ps1 delete mode 100644 hack/scripts/validate_state.sh create mode 100644 platform/Makefile create mode 100644 platform/windows/adapter/mellanox/mellanox.go create mode 100644 platform/windows/adapter/mocks/networkadapter_generated.go create mode 100644 platform/windows/adapter/network_adapter.go delete mode 100644 test/apimodels/cniLinux1804.json delete mode 100644 test/apimodels/cniLinuxDualstack1804.json delete mode 100644 test/apimodels/cniWindows1903.json delete mode 100644 test/apimodels/cniWindows2004.json delete mode 100644 test/apimodels/cniWindows2022.json delete mode 100644 test/apimodels/cniWindowsDualstack2004.json create mode 100644 test/integration/datapath/datapath_win_test.go create mode 100644 test/integration/load/load_test.go create mode 100644 test/integration/manifests/cilium/cilium-nightly-agent/clusterrole.yaml create mode 100644 test/integration/manifests/cilium/cilium-nightly-agent/clusterrolebinding.yaml create mode 100644 test/integration/manifests/cilium/cilium-nightly-agent/serviceaccount.yaml create mode 100644 test/integration/manifests/cilium/cilium-nightly-config.yaml create mode 100644 test/integration/manifests/cilium/cilium-nightly-operator/clusterrole.yaml create mode 100644 test/integration/manifests/cilium/cilium-nightly-operator/clusterrolebinding.yaml create mode 100644 test/integration/manifests/cilium/cilium-nightly-operator/serviceaccount.yaml rename test/integration/manifests/cilium/{cilium-agent => }/daemonset.yaml (96%) rename test/integration/manifests/cilium/{cilium-operator => }/deployment.yaml (97%) create mode 100644 test/integration/manifests/cni/cni-installer-v1-windows.yaml create mode 100644 test/integration/manifests/cni/cni-installer-v1.yaml create mode 100644 test/integration/manifests/datapath/windows-deployment.yaml create mode 100644 test/integration/manifests/load/privileged-daemonset-windows.yaml rename hack/manifests/hostprocess.yaml => test/integration/manifests/load/privileged-daemonset.yaml (100%) rename hack/manifests/pod.yaml => test/integration/manifests/noop-deployment-linux.yaml (57%) create mode 100644 test/integration/manifests/noop-deployment-windows.yaml delete mode 100644 test/integration/utils_delete_test.go create mode 100644 test/internal/datapath/datapath_win.go rename test/{integration => internal/k8sutils}/label.go (97%) rename test/{integration/utils_test.go => internal/k8sutils/utils.go} (56%) rename test/{integration/utils_create_test.go => internal/k8sutils/utils_create.go} (68%) create mode 100644 test/internal/k8sutils/utils_delete.go create mode 100644 test/internal/k8sutils/utils_get.go rename test/{integration/utils_parse_test.go => internal/k8sutils/utils_parse.go} (77%) rename test/{integration => internal}/retry/retry.go (96%) create mode 100644 test/validate/client.go create mode 100644 test/validate/linux_validate.go create mode 100644 test/validate/utils.go create mode 100644 test/validate/windows_validate.go diff --git a/.pipelines/cni/cilium/cilium-cni-load-test.yaml b/.pipelines/cni/cilium/cilium-cni-load-test.yaml index 128e6346c0..f6fd57373c 100644 --- a/.pipelines/cni/cilium/cilium-cni-load-test.yaml +++ b/.pipelines/cni/cilium/cilium-cni-load-test.yaml @@ -16,7 +16,7 @@ stages: inlineScript: | set -ex make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) - make -C ./hack/swift overlay-byocni-up AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP} NODE_COUNT=10 VM_SIZE=Standard_DS4_v2 + make -C ./hack/swift overlay-no-kube-proxy-up AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP}-$(make revision) NODE_COUNT=10 VM_SIZE=Standard_DS4_v2 name: "CreateAksCluster" displayName: "Create AKS Cluster" - stage: install_cilium @@ -35,7 +35,7 @@ stages: inlineScript: | set -ex az extension add --name aks-preview - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) ls -lah pwd kubectl cluster-info @@ -48,6 +48,9 @@ stages: echo "install Cilium onto Overlay Cluster" kubectl apply -f test/integration/manifests/cilium/cilium-agent kubectl apply -f test/integration/manifests/cilium/cilium-operator + # Passes Cilium image to daemonset and deployment + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/daemonset.yaml | kubectl apply -f - + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - kubectl get po -owide -A echo "deploy ip-masq-agent for overlay" kubectl create -f test/integration/manifests/ip-masq-agent/ip-masq-agent.yaml --validate=false @@ -62,20 +65,11 @@ stages: jobs: - job: deploy_pods steps: - - task: AzureCLI@1 - displayName: "Pod Deployment" - inputs: - azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) - scriptLocation: "inlineScript" - scriptType: "bash" - addSpnToEnvironment: true - inlineScript: | - set -ex - az extension add --name aks-preview - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} - kubectl apply -f hack/manifests/pod.yaml - kubectl apply -f hack/manifests/hostprocess.yaml - bash hack/scripts/scale_deployment.sh + - template: ../load-test-templates/pod-deployment-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + scaleup: 2400 + os: linux - stage: validate_state dependsOn: pod_deployment displayName: "Validate State" @@ -89,9 +83,9 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) kubectl get pods -A - bash hack/scripts/validate_state.sh + make test-validate-state name: "ValidateState" displayName: "Validate State" retryCountOnTaskFailure: 3 @@ -109,18 +103,22 @@ stages: addSpnToEnvironment: true inlineScript: | echo "Scale up the pods and immediated restart the nodes" - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) + cd test/integration/load echo "Scaling the pods down to 100 per node" - bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -s + go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -scaleup=1000 -skip-wait=true + cd ../../../ echo "Restarting the nodes" vmss_name=$(az vmss list -g MC_${RESOURCE_GROUP}_${RESOURCE_GROUP}_$(LOCATION) --query "[].name" -o tsv) - make -C ./hack/swift restart-vmss AZCLI=az CLUSTER=${RESOURCE_GROUP} REGION=$(LOCATION) VMSS_NAME=$vmss_name - bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -c + make -C ./hack/swift restart-vmss AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) REGION=$(LOCATION) VMSS_NAME=$vmss_name + cd test/integration/load + go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=1000 name: "RestartNodes" displayName: "Restart Nodes" - script: | - bash hack/scripts/validate_state.sh + export RESTART_CASE=true + make test-validate-state name: "ValidateState" displayName: "Validate State" retryCountOnTaskFailure: 3 @@ -148,11 +146,11 @@ stages: addSpnToEnvironment: true inlineScript: | set -ex - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) name: "GetCluster" displayName: "Get AKS Cluster" - script: | - k delete deployment container -n default + kubectl delete ns load-test cilium connectivity test retryCountOnTaskFailure: 6 name: "CiliumConnectivityTests" @@ -175,9 +173,9 @@ stages: if [ "$(DELETE_RESOURCES)" ] then echo "Deleting Cluster and resource group" - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} - make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) - make -C ./hack/swift down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP} + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + make -C ./hack/aks down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) echo "Cluster and resources down" else echo "Deletion of resources is False" diff --git a/.pipelines/cni/cilium/nightly-release-test.yml b/.pipelines/cni/cilium/nightly-release-test.yml new file mode 100644 index 0000000000..b3e6a35bff --- /dev/null +++ b/.pipelines/cni/cilium/nightly-release-test.yml @@ -0,0 +1,54 @@ +pr: none +trigger: none + +stages: + - stage: init + displayName: "Build and Push Cilium Image" + jobs: + - job: build_and_push_cilium_image + steps: + - bash: | + set -ex + cd .pipelines/ + git clone https://github.com/cilium/cilium.git + cd cilium + make docker-cilium-image + make docker-operator-generic-image + name: BuildCiliumImage + displayName: "Build Cilium Image" + - task: Docker@2 + displayName: Login + inputs: + containerRegistry: $(CONTAINER_REGISTRY) + command: "login" + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + inlineScript: | + set -ex + docker tag quay.io/cilium/cilium:$(DOCKER_IMAGE_TAG) $(ACR).azurecr.io/cilium/cilium:$(DOCKER_IMAGE_TAG) + docker tag quay.io/cilium/operator-generic:$(DOCKER_IMAGE_TAG) $(ACR).azurecr.io/cilium/operator-generic:$(DOCKER_IMAGE_TAG) + docker push $(ACR).azurecr.io/cilium/cilium:$(DOCKER_IMAGE_TAG) + docker push $(ACR).azurecr.io/cilium/operator-generic:$(DOCKER_IMAGE_TAG) + name: "PushCiliumImage" + displayName: "Push Cilium Image" + - task: Docker@2 + displayName: Logout + inputs: + containerRegistry: $(CONTAINER_REGISTRY) + command: "logout" + - stage: cilium_nightly + displayName: E2E - Cilium Nightly + variables: + GOPATH: "$(Agent.TempDirectory)/go" # Go workspace path + GOBIN: "$(GOPATH)/bin" # Go binaries path + modulePath: "$(GOPATH)/src/github.com/Azure/azure-container-networking" + jobs: + - job: cilium_nightly + steps: + - template: ../../singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml + parameters: + name: "cilium_nightly" + testDropgz: "" + clusterName: "ciliumnightly" diff --git a/.pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml b/.pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml new file mode 100644 index 0000000000..dc857426e5 --- /dev/null +++ b/.pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml @@ -0,0 +1,111 @@ +parameters: + clusterName: "" + os: "" + dependsOn: "" + sub: "" + + +jobs: + - job: cni_k8se2e + displayName: "CNI k8s E2E" + dependsOn: ${{ parameters.dependsOn }} + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: ${{ parameters.sub }} + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -e + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + + # sig-release provides test suite tarball(s) per k8s release. Just need to provide k8s version "v1.xx.xx" + # pulling k8s version from AKS. + eval k8sVersion="v"$( az aks show -g ${{ parameters.clusterName }}-$(make revision) -n ${{ parameters.clusterName }}-$(make revision) --query "currentKubernetesVersion") + curl -L https://dl.k8s.io/$k8sVersion/kubernetes-test-linux-amd64.tar.gz -o ./kubernetes-test-linux-amd64.tar.gz + + # https://github.com/kubernetes/sig-release/blob/master/release-engineering/artifacts.md#content-of-kubernetes-test-system-archtargz-on-example-of-kubernetes-test-linux-amd64targz-directories-removed-from-list + # explictly unzip and strip directories from ginkgo and e2e.test + tar -xvzf kubernetes-test-linux-amd64.tar.gz --strip-components=3 kubernetes/test/bin/ginkgo kubernetes/test/bin/e2e.test + + displayName: "Setup Environment" + - ${{ if eq(parameters.datapath, true) }}: + - template: ../k8s-e2e/k8s-e2e-step-template.yaml + parameters: + testName: Datapath + name: datapath + clusterName: ${{ parameters.clusterName }} + ginkgoFocus: '(.*).Networking.should|(.*).Networking.Granular|(.*)kubernetes.api' + ginkgoSkip: 'SCTP|Disruptive|Slow|hostNetwork|kube-proxy|IPv6' + os: ${{ parameters.os }} + processes: 8 + attempts: 10 + - ${{ if eq(parameters.dns, true) }}: + - template: ../k8s-e2e/k8s-e2e-step-template.yaml + parameters: + testName: DNS + name: dns + clusterName: ${{ parameters.clusterName }} + ginkgoFocus: '\[sig-network\].DNS.should' + ginkgoSkip: 'resolv|256 search' + os: ${{ parameters.os }} + processes: 8 + attempts: 3 + - ${{ if eq(parameters.portforward, true) }}: + - template: ../k8s-e2e/k8s-e2e-step-template.yaml + parameters: + testName: Kubectl Portforward + name: portforward + clusterName: ${{ parameters.clusterName }} + ginkgoFocus: '\[sig-cli\].Kubectl.Port' + ginkgoSkip: '' + os: ${{ parameters.os }} + processes: 8 + attempts: 3 + - ${{ if eq(parameters.loadBalancer, true) }}: + - template: ../k8s-e2e/k8s-e2e-step-template.yaml + parameters: + testName: Load Balancers + name: load + clusterName: ${{ parameters.clusterName }} + ginkgoFocus: '\[sig-network\].LoadBalancers' + ginkgoSkip: 'ESIPP|Serial' + os: ${{ parameters.os }} + processes: 8 + attempts: 3 + - ${{ if eq(parameters.service, true) }}: + - template: ../k8s-e2e/k8s-e2e-step-template.yaml + parameters: + testName: Service Conformance + name: service + clusterName: ${{ parameters.clusterName }} + ginkgoFocus: 'Services.*\[Conformance\].*' + ginkgoSkip: '' + os: ${{ parameters.os }} + processes: 8 + attempts: 3 + - ${{ if eq(parameters.hostport, true) }}: + - template: ../k8s-e2e/k8s-e2e-step-template.yaml + parameters: + testName: Host Port + name: hostport + clusterName: ${{ parameters.clusterName }} + ginkgoFocus: '\[sig-network\](.*)HostPort|\[sig-scheduling\](.*)hostPort' + ginkgoSkip: 'SCTP|exists conflict' # Skip slow 5 minute test + os: ${{ parameters.os }} + processes: 1 # Has a short serial test + attempts: 3 + - ${{ if and(eq(parameters.hybridWin, true), eq(parameters.os, 'windows')) }}: + - template: ../k8s-e2e/k8s-e2e-step-template.yaml + parameters: + testName: Hybrid Network + name: hybrid + clusterName: ${{ parameters.clusterName }} + ginkgoFocus: '\[sig-windows\].Hybrid' + ginkgoSkip: '' + os: ${{ parameters.os }} + processes: 8 + attempts: 3 diff --git a/.pipelines/cni/k8s-e2e/k8s-e2e-step-template.yaml b/.pipelines/cni/k8s-e2e/k8s-e2e-step-template.yaml new file mode 100644 index 0000000000..91cf77afec --- /dev/null +++ b/.pipelines/cni/k8s-e2e/k8s-e2e-step-template.yaml @@ -0,0 +1,53 @@ +parameters: + testName: "" + name: "" + clusterName: "" + ginkgoFocus: "" + ginkgoSkip: "" + os: "" + processes: "" # Number of parallel processes + attempts: "" + + +steps: + - script: | + set -ex + + # ginkgoSkip cant handle only |LinuxOnly. Need to have check + if ${{ lower(and(ge(length(parameters.ginkgoSkip), 1), eq(parameters.os, 'windows'))) }} + then + SKIP="|LinuxOnly" + elif ${{ lower(eq(parameters.os, 'windows')) }} + then + SKIP="LinuxOnly" + fi + + # Taint Linux nodes so that windows tests do not run on them + if ${{ lower(eq(parameters.os, 'windows')) }} + then + kubectl taint nodes -l kubernetes.azure.com/mode=system node-role.kubernetes.io/control-plane:NoSchedule + fi + + # Depreciating flags. Change once k8s minimum version supported is > 1.24 + # nodes -> procs + # flakeAttempts -> flake-attempts + # dryRun -> dry-run + + ./ginkgo --nodes=${{ parameters.processes }} \ + ./e2e.test -- \ + --num-nodes=2 \ + --provider=skeleton \ + --ginkgo.focus='${{ parameters.ginkgoFocus }}' \ + --ginkgo.skip="${{ parameters.ginkgoSkip }}$SKIP" \ + --ginkgo.flakeAttempts=${{ parameters.attempts }} \ + --ginkgo.v \ + --node-os-distro=${{ parameters.os }} \ + --kubeconfig=$HOME/.kube/config + + # Untaint Linux nodes once testing is complete + if ${{ lower(eq(parameters.os, 'windows')) }} + then + kubectl taint nodes -l kubernetes.azure.com/mode=system node-role.kubernetes.io/control-plane:NoSchedule- + fi + name: ${{ parameters.name }} + displayName: k8s E2E - ${{ parameters.testName }} diff --git a/.pipelines/cni/load-test-templates/create-cluster-template.yaml b/.pipelines/cni/load-test-templates/create-cluster-template.yaml new file mode 100644 index 0000000000..de07665774 --- /dev/null +++ b/.pipelines/cni/load-test-templates/create-cluster-template.yaml @@ -0,0 +1,20 @@ +parameters: + clusterType: "" + clusterName: "" + nodeCount: "" + vmSize: "" + windowsVMSize: "" + +steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + make -C ./hack/aks ${{ parameters.clusterType }} AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) NODE_COUNT=${{ parameters.nodeCount }} VM_SIZE=${{ parameters.vmSize }} WINDOWS_VM_SKU=${{ parameters.windowsVMSize }} WINDOWS_USERNAME=${WINDOWS_USERNAME} WINDOWS_PASSWORD=${WINDOWS_PASSWORD} + name: "CreateAksCluster" + displayName: "Create AKS Cluster" diff --git a/.pipelines/cni/load-test-templates/pod-deployment-template.yaml b/.pipelines/cni/load-test-templates/pod-deployment-template.yaml new file mode 100644 index 0000000000..6032c028d2 --- /dev/null +++ b/.pipelines/cni/load-test-templates/pod-deployment-template.yaml @@ -0,0 +1,20 @@ +parameters: + clusterName: "" + scaleup: 1000 + os: "" + iterations: 4 + +steps: + - task: AzureCLI@1 + displayName: "Pod Deployment" + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + az extension add --name aks-preview + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + cd test/integration/load + go test -timeout 30m -tags load -run ^TestLoad$ -tags=load -iterations=${{ parameters.iterations }} -scaleup=${{ parameters.scaleup }} -os=${{ parameters.os }} diff --git a/.pipelines/cni/load-test-templates/restart-node-template.yaml b/.pipelines/cni/load-test-templates/restart-node-template.yaml new file mode 100644 index 0000000000..4009150b8a --- /dev/null +++ b/.pipelines/cni/load-test-templates/restart-node-template.yaml @@ -0,0 +1,26 @@ +parameters: + clusterName: "" + +steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "Scale up the pods and immediated restart the nodes" + clusterName=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${clusterName} + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + cd test/integration/load + echo "Scaling the pods down to 100 per node" + go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -scaleup=1000 -skip-wait=true + cd ../../../ + echo "Restarting the nodes" + vmss_name=$(az vmss list -g MC_${clusterName}_${clusterName}_$(LOCATION) --query "[].name" -o tsv) + make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${clusterName} REGION=$(LOCATION) VMSS_NAME=$vmss_name + cd test/integration/load + go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=1000 + name: "RestartNodes" + displayName: "Restart Nodes" diff --git a/.pipelines/cni/load-test-templates/validate-state-template.yaml b/.pipelines/cni/load-test-templates/validate-state-template.yaml new file mode 100644 index 0000000000..8f0ae209a2 --- /dev/null +++ b/.pipelines/cni/load-test-templates/validate-state-template.yaml @@ -0,0 +1,19 @@ +parameters: + clusterName: "" + os: "linux" + restartCase: "false" + +steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + kubectl get pods -A + make test-validate-state OS=${{ parameters.os }} RESTART_CASE=${{ parameters.restartCase }} + name: "ValidateState" + displayName: "Validate State" + retryCountOnTaskFailure: 3 diff --git a/.pipelines/cni/pipeline.yaml b/.pipelines/cni/pipeline.yaml new file mode 100644 index 0000000000..3fda84f807 --- /dev/null +++ b/.pipelines/cni/pipeline.yaml @@ -0,0 +1,37 @@ +pr: none +trigger: none + +stages: + - stage: setup + displayName: Setup + jobs: + - job: env + displayName: Setup + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + steps: + - script: | + echo "Setting up environment" + go version + name: "SetEnvVars" + displayName: "Set Environment Variables" + condition: always() + - template: cilium/cilium-overlay-load-test-template.yaml + parameters: + name: cilium + clusterType: "overlay-no-kube-proxy-up" + clusterName: "cilium-overlay" + nodeCount: 10 + vmSize: "Standard_DS4_v2" + dependsOn: setup + - template: singletenancy/windows-cni-load-test-template.yaml + parameters: + name: win_cniv1 + clusterType: "windows-cniv1-up" + clusterName: "win-cniv1" + nodeCount: 2 + vmSize: "Standard_B2s" + dependsOn: setup + windowsVMSize: ${WINDOWS_VM_SKU} + os: windows + cni: cniv1 diff --git a/.pipelines/cni/singletenancy/windows-cni-load-test-template.yaml b/.pipelines/cni/singletenancy/windows-cni-load-test-template.yaml new file mode 100644 index 0000000000..c945d4cb4a --- /dev/null +++ b/.pipelines/cni/singletenancy/windows-cni-load-test-template.yaml @@ -0,0 +1,144 @@ +parameters: + dependsOn: "" + name: "" + clusterType: "" + clusterName: "" + nodeCount: "" + vmSize: "" + windowsVMSize: "" + os: "" + cni: "" + +stages: + - stage: createAKSclusterWindows + dependsOn: ${{ parameters.dependsOn }} + displayName: "Windows AKS Cluster ${{ parameters.cni }}" + jobs: + - job: create_aks_cluster_with_${{ parameters.name }} + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + steps: + - template: ../load-test-templates/create-cluster-template.yaml + parameters: + clusterType: ${{ parameters.clusterType }} + clusterName: ${{ parameters.clusterName }} + nodeCount: ${{ parameters.nodeCount }} + vmSize: ${{ parameters.vmSize }} + windowsVMSize: ${{ parameters.windowsVMSize }} + - stage: build_images + dependsOn: ${{ parameters.dependsOn }} + displayName: "Build CNI Images" + jobs: + - job: build_cni_images + pool: + name: "$(BUILD_POOL_NAME_LINUX_AMD64)" + strategy: + matrix: + cni_dropgz_windows2022_amd64: + arch: amd64 + name: cni-dropgz-test + os: windows + os_version: ltsc2022 + steps: + - template: ../../containers/container-template.yaml + parameters: + arch: $(arch) + name: $(name) + os: $(os) + os_version: $(os_version) + - stage: update_cni + dependsOn: + - createAKSclusterWindows + - build_images + displayName: "Update CNI on Cluster" + jobs: + - job: deploy_pods + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + strategy: + matrix: + cni_dropgz_windows2022_amd64: + os: windows + arch: amd64 + os_version: ltsc2022 + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + clusterName=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${clusterName} + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + dropgz_version=$(make cni-dropgz-version) + export DROP_GZ_URL=$( make cni-dropgz-test-image-name-and-tag OS=$(os) ARCH=$(arch) OS_VERSION=$(os_version) CNI_DROPGZ_VERSION=${dropgz_version}) + envsubst < ./test/integration/manifests/cni/cni-installer-v1-windows.yaml | kubectl apply -f - + name: "UploadCNI" + displayName: "Upload CNI" + - script: | + set -ex + kubectl rollout status daemonset/azure-cni-windows -n kube-system + kubectl get pods -A + name: "WaitForCNI" + displayName: "Wait For CNI" + - stage: pod_deployment_windows + dependsOn: update_cni + displayName: "Pod Deployment" + jobs: + - job: deploy_pods + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + steps: + - template: ../load-test-templates/pod-deployment-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + scaleup: ${WINDOWS_SCALEUP} + os: ${{ parameters.os }} + cni: ${{ parameters.cni }} + iterations: ${WINDOWS_ITERATIONS} + - stage: validate_state_windows + dependsOn: pod_deployment_windows + displayName: "Validate State" + jobs: + - job: validate_state + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + steps: + - template: ../load-test-templates/validate-state-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + os: ${{ parameters.os }} + cni: ${{ parameters.cni }} + - stage: delete_resources + displayName: "Delete Resources" + dependsOn: + - validate_state_windows + jobs: + - job: delete_resources + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + if [ "$(DELETE_RESOURCES)" ] + then + echo "Deleting Cluster and resource group" + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + make -C ./hack/aks down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) + echo "Cluster and resources down" + else + echo "Deletion of resources is False" + fi + name: "CleanUpCluster" + displayName: "Cleanup cluster" + condition: always() diff --git a/.pipelines/npm/npm-cni-integration-test.yaml b/.pipelines/npm/npm-cni-integration-test.yaml new file mode 100644 index 0000000000..8774b27992 --- /dev/null +++ b/.pipelines/npm/npm-cni-integration-test.yaml @@ -0,0 +1,93 @@ +parameters: + clusterName: "" + os: "" + sub: "" + os_version: "" + tag: "" + +jobs: + - job: npm_k8se2e + displayName: "NPM k8s E2E" + dependsOn: ${{ parameters.dependsOn }} + condition: and(succeeded(), ${{ or(contains(parameters.os_version, '2022'), eq(parameters.os, 'linux')) }} ) + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + demands: + - agent.os -equals Linux + - Role -equals Build + steps: + - task: AzureCLI@2 + displayName: "Deploy NPM to Test Cluster" + inputs: + azureSubscription: ${{ parameters.sub }} + scriptType: "bash" + scriptLocation: "inlineScript" + inlineScript: | + set -ex + + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + + # deploy azure-npm + if ${{ lower(eq(parameters.os, 'windows')) }} + then + # Windows + kubectl apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/examples/windows/azure-npm.yaml + kubectl set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-${{ parameters.tag }} + kubectl rollout status -n kube-system daemonset/azure-npm-win + + # konnectivity agent tends to fail after rollout. Give it time to recover + sleep 60 + # Taint Linux (system) nodes so windows tests do not run on them + kubectl taint nodes -l kubernetes.azure.com/mode=system node-role.kubernetes.io/control-plane:NoSchedule + else + # Linux + kubectl apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/azure-npm.yaml + kubectl set image daemonset/azure-npm -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:${{ parameters.tag }} + kubectl rollout status -n kube-system daemonset/azure-npm + fi + + kubectl get po -n kube-system -owide -A + + # FQDN=`az aks show -n $CLUSTER_NAME -g $CLUSTER_NAME --query fqdn -o tsv` + FQDN=`az aks show -g ${{ parameters.clusterName }}-$(make revision) -n ${{ parameters.clusterName }}-$(make revision) --query fqdn -o tsv` + echo $FQDN + echo "##vso[task.setvariable variable=FQDN]$FQDN" + + - download: current + artifact: Test + + - bash: | + # NetworkPolicy between server and... + focus="\ + client should enforce policy to allow traffic only from a different namespace, based on NamespaceSelector|\ + client should deny egress from pods based on PodSelector|\ + client should enforce multiple, stacked policies with overlapping podSelectors|\ + client should enforce egress policy allowing traffic to a server in a different namespace based on PodSelector and NamespaceSelector|\ + client should work with Ingress, Egress specified together|\ + client should enforce ingress policy allowing any port traffic to a server on a specific protocol|\ + client should not allow access by TCP when a policy specifies only UDP|\ + client should allow egress access to server in CIDR block|\ + client should enforce policy based on Ports|\ + client should support allow-all policy|\ + client should enforce updated policy|\ + client should support denying of egress traffic on the client side|\ + client should stop enforcing policies after they are deleted|\ + client should support a 'default-deny-ingress' policy" + + chmod +x $(Pipeline.Workspace)/Test/e2e.test + + KUBERNETES_SERVICE_HOST="$FQDN" KUBERNETES_SERVICE_PORT=443 \ + $(Pipeline.Workspace)/Test/e2e.test \ + --provider=local \ + --ginkgo.focus="$focus" \ + --ginkgo.skip="NetworkPolicyLegacy|SCTP" \ + --kubeconfig=$HOME/.kube/config + + # Untaint Linux (system) nodes once testing is complete + if ${{ lower(eq(parameters.os, 'windows')) }} + then + kubectl taint nodes -l kubernetes.azure.com/mode=system node-role.kubernetes.io/control-plane:NoSchedule- + fi + displayName: "Run Kubernetes e2e.test" + + diff --git a/.pipelines/pipeline.yaml b/.pipelines/pipeline.yaml index f136e41846..0fbd29a6eb 100644 --- a/.pipelines/pipeline.yaml +++ b/.pipelines/pipeline.yaml @@ -37,6 +37,7 @@ stages: echo "##vso[task.setvariable variable=Tag;isOutput=true]$(make version)" echo "##vso[task.setvariable variable=cniVersion;isOutput=true]$(make cni-version)" echo "##vso[task.setvariable variable=npmVersion;isOutput=true]$(make npm-version)" + echo "##vso[task.setvariable variable=dropgzVersion;isOutput=true]$(make cni-dropgz-version)" cat /etc/os-release uname -a sudo chown -R $(whoami):$(whoami) . @@ -144,9 +145,6 @@ stages: displayName: Create artifact storage container condition: succeeded() - - publish: ./test/apimodels/ - artifact: clusterdefinitions - - stage: containerize displayName: Build Images dependsOn: @@ -167,6 +165,11 @@ stages: arch: amd64 name: cni-dropgz-test os: linux + cni_dropgz_test_windows2022_amd64: + arch: amd64 + name: cni-dropgz-test + os: windows + os_version: ltsc2022 cns_linux_amd64: arch: amd64 name: cns @@ -262,7 +265,7 @@ stages: echo $TAG echo $CURRENT_VERSION echo "Checking if branch up to date with master" - + - stage: publish displayName: Publish Multiarch Manifests dependsOn: @@ -284,7 +287,7 @@ stages: platforms: linux/amd64 linux/arm64 cni_dropgz_test: name: cni-dropgz-test - platforms: linux/amd64 linux/arm64 + platforms: linux/amd64 linux/arm64 windows/amd64 cns: name: cns os_versions: ltsc2019 ltsc2022 @@ -344,25 +347,30 @@ stages: clusterName: "swifte2e" osSku: "Ubuntu" - - template: singletenancy/aks-engine/e2e-job-template.yaml + - template: singletenancy/aks/e2e-job-template.yaml parameters: - name: "ubuntu_18_04_linux_e2e" - displayName: Ubuntu 18.04 - pipelineBuildImage: "$(BUILD_IMAGE)" - clusterDefinition: "cniLinux1804.json" - clusterDefinitionCniTypeKey: "azureCNIURLLinux" - clusterDefinitionCniBuildOS: "linux" - clusterDefinitionCniBuildExt: ".tgz" + name: "aks_ubuntu_22_linux_e2e" + displayName: AKS Ubuntu 22 + arch: 'amd64' + os: 'linux' + clusterType: linux-cniv1-up + clusterName: 'ubuntu22e2e' + vmSize: Standard_B2s + k8sVersion: 1.25 + scaleup: 100 - - template: singletenancy/aks-engine/e2e-job-template.yaml + - template: singletenancy/aks/e2e-job-template.yaml parameters: - name: "windows_19_03_e2e" - displayName: "Windows 1903" - pipelineBuildImage: "$(BUILD_IMAGE)" - clusterDefinition: "cniWindows1903.json" - clusterDefinitionCniTypeKey: "azureCNIURLWindows" - clusterDefinitionCniBuildOS: "windows" - clusterDefinitionCniBuildExt: ".zip" + name: "aks_windows_22_e2e" + displayName: AKS Windows 2022 + arch: amd64 + os: windows + clusterType: windows-cniv1-up + clusterName: 'win22e2e' + vmSize: Standard_B2ms + windowsOsSku: 'Windows2022' + os_version: 'ltsc2022' + scaleup: 100 - stage: validate2 displayName: Validate Tags @@ -383,14 +391,15 @@ stages: echo $TAG echo $CURRENT_VERSION echo "Checking if branch is up to date with master" - + - stage: cleanup displayName: Cleanup dependsOn: - "aks_swift_e2e" - "cilium_e2e" - - "ubuntu_18_04_linux_e2e" - - "windows_19_03_e2e" + - "cilium_overlay_cilium_e2e" + - "aks_ubuntu_22_linux_e2e" + - "aks_windows_22_e2e" jobs: - job: delete_remote_artifacts displayName: Delete remote artifacts diff --git a/.pipelines/singletenancy/aks-engine/e2e-dualstack-job-template.yaml b/.pipelines/singletenancy/aks-engine/e2e-dualstack-job-template.yaml deleted file mode 100644 index 83c36d2cdf..0000000000 --- a/.pipelines/singletenancy/aks-engine/e2e-dualstack-job-template.yaml +++ /dev/null @@ -1,154 +0,0 @@ -parameters: - name: "" - displayName: "" - pipelineBuildImage: "$(BUILD_IMAGE)" - clusterDefinition: "" - clusterDefinitionCniTypeKey: "" - clusterDefinitionCniBuildOS: "" - clusterDefinitionCniBuildExt: "" - -stages: - - stage: ${{ parameters.name }} - displayName: E2E - ${{ parameters.displayName }} - dependsOn: - - setup - - publish - jobs: - - job: Create_cluster - displayName: Create Dualstack cluster - timeoutInMinutes: 120 - pool: - name: $(BUILD_POOL_NAME_DEFAULT) - demands: - - agent.os -equals Linux - - Role -equals Build - container: - image: mcr.microsoft.com/oss/azcu/go-dev:$(GO_DEV_IMAGE_TAG) - variables: - GOPATH: "$(Agent.TempDirectory)/go" # Go workspace path - GOBIN: "$(GOPATH)/bin" # Go binaries path - acnPath: "$(GOPATH)/src/github.com/Azure/azure-container-networking" - CommitHash: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.CommitHash'] ] - StorageID: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.StorageID'] ] - cniVersion: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.cniVersion'] ] - steps: - - task: DownloadPipelineArtifact@2 - inputs: - buildType: current - artifactName: clusterdefinitions - targetPath: $(Pipeline.Workspace) - - bash: | - ls -lah - export CNI_URL=https://$(ARTIFACT_STORAGE).blob.core.windows.net/acn-$(StorageID)/azure-vnet-cni-${{ parameters.clusterDefinitionCniBuildOS }}-amd64-$(cniVersion)${{ parameters.clusterDefinitionCniBuildExt }} - export CNI_TYPE=${{ parameters.clusterDefinitionCniTypeKey }} - echo CNI type is $CNI_TYPE - echo CNI_URL is $CNI_URL - echo Config: '${{ parameters.clusterDefinition }}' - cat '${{ parameters.clusterDefinition }}' - cat '${{ parameters.clusterDefinition }}' | jq --arg cnikey $CNI_TYPE --arg cniurl $CNI_URL '.properties.orchestratorProfile.kubernetesConfig[$cnikey]= $cniurl' > '${{ parameters.clusterDefinition }}'.tmp - cat '${{ parameters.clusterDefinition }}'.tmp | jq --arg ver $(cniVersion) '.properties.orchestratorProfile.kubernetesConfig.azureCNIVersion = $ver' > '${{ parameters.clusterDefinition }}' - mv '${{ parameters.clusterDefinition }}'.tmp '${{ parameters.clusterDefinition }}' - echo "Running E2E tests against a cluster built with the following API model:" - cp ${{ parameters.clusterDefinition }} clusterDefinition.json - displayName: "Configure AKS-Engine Cluster definition" - workingDirectory: $(Pipeline.Workspace) - - bash: | - echo Currently set AKS-Engine Version '$(AKS_ENGINE_VERSION)' - if [ -z '$(AKS_ENGINE_VERSION)' ] - then - echo Found set AKS-Engine version '$(AKS_ENGINE_VERSION)'... - export aksEVersion='$(AKS_ENGINE_VERSION)' - else - echo No AKS-Engine version set, using latest... - export aksEVersion=$(curl -L -s -H 'Accept: application/json' https://github.com/Azure/aks-engine/releases/latest | sed -e 's/.*"tag_name":"\([^"]*\)".*/\1/') - fi - echo Using AKS-Engine version $aksEVersion - - # download binary - #wget https://github.com/Azure/aks-engine/releases/download/$aksEVersion/aks-engine-$aksEVersion-linux-amd64.tar.gz - wget https://github.com/tamilmani1989/aks-engine/releases/download/v1.1.0/aks-engine-tamanoha-linux-amd64.tar.gz - rm -rf ./dualstack-ws - mkdir ./dualstack-ws - - #extract binary - #tar -zxvf aks-engine-$aksEVersion-linux-amd64.tar.gz -C dualstack-ws - tar -zxvf aks-engine-tamanoha-linux-amd64.tar.gz -C dualstack-ws - mv ./dualstack-ws/aks-engine-*/* ./dualstack-ws/ - ls -l ./dualstack-ws - ./dualstack-ws/aks-engine version - displayName: "Install aks-engine binary" - - - task: AzureCLI@1 - inputs: - azureSubscription: $(AKS_ENGINE_SERVICE_CONNECTION) - scriptLocation: "inlineScript" - addSpnToEnvironment: true - inlineScript: | - RG=kubernetes-dual-$(echo "${{ parameters.clusterDefinitionCniBuildOS }}-`date "+%Y-%m-%d-%S"`") - echo "##vso[task.setvariable variable=RESOURCE_GROUP;isOutput=true;]$RG" - region=$(echo $(AKS_ENGINE_REGION)|cut -d',' -f1) - echo "running: ./dualstack-ws/aks-engine deploy -m $(Pipeline.Workspace)/clusterDefinition.json --location $region -g $RG --subscription-id $(AKS_ENGINE_SUBSCRIPTION_ID) --client-id $servicePrincipalId --client-secret $servicePrincipalKey --output-directory ./dualstack-ws/kubeoutput" - ./dualstack-ws/aks-engine deploy -m $(Pipeline.Workspace)/clusterDefinition.json --location $region -g $RG --subscription-id $(AKS_ENGINE_SUBSCRIPTION_ID) --client-id $servicePrincipalId --client-secret $servicePrincipalKey --output-directory ./dualstack-ws/kubeoutput --auto-suffix - mv ./dualstack-ws/kubeoutput/kubeconfig/kubeconfig.* ./dualstack-ws/kubeoutput/kubeconfig/kubeconfig.json - name: DeployAKSEngine - displayName: "Deploy aks-engine cluster" - - publish: ./dualstack-ws/kubeoutput - artifact: kubeoutput-${{ parameters.clusterDefinitionCniBuildOS }} - - - job: Build_kubernetes - displayName: Build kubernetes upstream - timeoutInMinutes: 120 - pool: - name: $(BUILD_POOL_NAME_DEFAULT) - demands: - - agent.os -equals Linux - - Role -equals Build - steps: - - bash: | - git clone https://github.com/kubernetes/kubernetes.git - cd kubernetes - export PATH=$PATH:/usr/local/go/bin/ - make WHAT=test/e2e/e2e.test - displayName: 'Build Kubernetes e2e.test' - - publish: $(System.DefaultWorkingDirectory)/kubernetes/_output/local/bin/linux/amd64 - artifact: Test-${{ parameters.clusterDefinitionCniBuildOS }} - - - job: Run_test - displayName: Run upstream e2e test - dependsOn: [Create_cluster, Build_kubernetes] - timeoutInMinutes: 120 - pool: - name: $(BUILD_POOL_NAME_DEFAULT) - demands: - - agent.os -equals Linux - - Role -equals Build - variables: - RESOURCE_GROUP: $[ dependencies.Create_cluster.outputs['DeployAKSEngine.RESOURCE_GROUP'] ] - steps: - - checkout: none - - download: current - artifact: Test-${{ parameters.clusterDefinitionCniBuildOS }} - - download: current - artifact: kubeoutput-${{ parameters.clusterDefinitionCniBuildOS }} - - bash: | - echo "rgname is:$(RESOURCE_GROUP)" - chmod +x $(Pipeline.Workspace)/Test-${{ parameters.clusterDefinitionCniBuildOS }}/e2e.test - echo "os: ${{ parameters.clusterDefinitionCniBuildOS }}" - if [ "${{ parameters.clusterDefinitionCniBuildOS }}" == "windows" ]; then - echo "Skip LinuxOnly Tests" - $(Pipeline.Workspace)/Test-${{ parameters.clusterDefinitionCniBuildOS }}/e2e.test --provider=local --ginkgo.focus="Feature:IPv6DualStack" --ginkgo.skip="LinuxOnly" --kubeconfig=$(Pipeline.Workspace)/kubeoutput-${{ parameters.clusterDefinitionCniBuildOS }}/kubeconfig/kubeconfig.json - else - echo "Skip SCTP Tests" - $(Pipeline.Workspace)/Test-${{ parameters.clusterDefinitionCniBuildOS }}/e2e.test --provider=local --ginkgo.focus="Feature:IPv6DualStack" --ginkgo.skip="SCTP" --kubeconfig=$(Pipeline.Workspace)/kubeoutput-${{ parameters.clusterDefinitionCniBuildOS }}/kubeconfig/kubeconfig.json - fi - displayName: "Run Test Suite" - - task: AzureCLI@2 - displayName: "Delete Test Cluster Resource Group" - condition: always() - inputs: - azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - echo "Deleting: $(RESOURCE_GROUP)" - az group delete -n $(RESOURCE_GROUP) --yes diff --git a/.pipelines/singletenancy/aks-engine/e2e-job-template.yaml b/.pipelines/singletenancy/aks-engine/e2e-job-template.yaml deleted file mode 100644 index 904e33b2a0..0000000000 --- a/.pipelines/singletenancy/aks-engine/e2e-job-template.yaml +++ /dev/null @@ -1,45 +0,0 @@ -parameters: - name: "" - displayName: "" - pipelineBuildImage: "$(BUILD_IMAGE)" - clusterDefinition: "" - clusterDefinitionCniTypeKey: "" - clusterDefinitionCniBuildOS: "" - clusterDefinitionCniBuildExt: "" - -stages: - - stage: ${{ parameters.name }} - displayName: E2E - ${{ parameters.displayName }} - dependsOn: - - setup - - publish - jobs: - - job: ${{ parameters.name }} - displayName: Singletenancy AKS Engine Suite - (${{ parameters.name }}) - timeoutInMinutes: 120 - pool: - name: $(BUILD_POOL_NAME_DEFAULT) - demands: - - agent.os -equals Linux - - Role -equals Build - container: - image: mcr.microsoft.com/oss/azcu/go-dev:$(GO_DEV_IMAGE_TAG) - variables: - GOPATH: "$(Agent.TempDirectory)/go" # Go workspace path - GOBIN: "$(GOPATH)/bin" # Go binaries path - modulePath: "$(GOPATH)/src/github.com/Azure/aks-engine" - acnPath: "$(GOPATH)/src/github.com/Azure/azure-container-networking" - CommitHash: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.CommitHash'] ] - StorageID: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.StorageID'] ] - cniVersion: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.cniVersion'] ] - npmVersion: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.npmVersion'] ] - steps: - - template: e2e-step-template.yaml - parameters: - name: ${{ parameters.name }} - clusterDefinition: ${{ parameters.clusterDefinition }} - clusterDefinitionCniTypeKey: ${{ parameters.clusterDefinitionCniTypeKey }} - clusterDefinitionCniBuildOS: ${{ parameters.clusterDefinitionCniBuildOS }} - clusterDefinitionCniBuildExt: ${{ parameters.clusterDefinitionCniBuildExt }} - cniVersion: $(cniVersion) - npmVersion: $(npmVersion) diff --git a/.pipelines/singletenancy/aks-engine/e2e-step-template.yaml b/.pipelines/singletenancy/aks-engine/e2e-step-template.yaml deleted file mode 100644 index 3b69e1a417..0000000000 --- a/.pipelines/singletenancy/aks-engine/e2e-step-template.yaml +++ /dev/null @@ -1,135 +0,0 @@ -parameters: - name: "" - clusterDefinition: "" - clusterDefinitionCniTypeKey: "" - clusterDefinitionCniBuildOS: "" - clusterDefinitionCniBuildExt: "" - cniVersion: "" - npmVersion: "" - -steps: - - checkout: none - - bash: | - go version - go env - mkdir -p '$(GOBIN)' - mkdir -p '$(GOPATH)/pkg' - mkdir -p '$(modulePath)' - go get github.com/onsi/ginkgo/ginkgo - go get github.com/onsi/gomega/... - - cd '$(modulePath)' - echo Currently set AKS-Engine Version '$(AKS_ENGINE_VERSION)' - if [ -z '$(AKS_ENGINE_VERSION)' ] - then - echo No AKS-Engine version set, using latest... - export aksEVersion=$(curl -L -s -H 'Accept: application/json' https://github.com/Azure/aks-engine/releases/latest | sed -e 's/.*"tag_name":"\([^"]*\)".*/\1/') - else - echo Found set AKS-Engine version '$(AKS_ENGINE_VERSION)'... - export aksEVersion='$(AKS_ENGINE_VERSION)' - fi - echo Using AKS-Engine version $aksEVersion - - #download source - wget https://github.com/csfmomo/aks-engine/archive/v1.0.9.1.tar.gz - - # extract source - #tar -zxf $aksEVersion.tar.gz - tar -zxf v1.0.9.1.tar.gz - - # move source to current directory - mv aks-engine-*/* . - - # download binary - wget https://github.com/csfmomo/aks-engine/releases/download/v1.0.9.1/aks-engine-v1.0.9.1-linux-amd64.tar.gz - - rm -rf ./bin - mkdir ./bin - - # extract binary - tar -zxvf aks-engine-v1.0.9.1-linux-amd64.tar.gz -C bin - mv ./bin/aks-engine-*/* ./bin/ - ls -l ./bin - ./bin/aks-engine version - echo '##vso[task.prependpath]$(GOBIN)' - echo '##vso[task.prependpath]$(GOROOT)/bin' - name: "GoEnv" - displayName: "Set up the Go environment" - - - task: DownloadPipelineArtifact@2 - inputs: - buildType: current - artifactName: clusterdefinitions - targetPath: "$(modulePath)" - #buildType: 'current' # Options: current, specific - #project: # Required when buildType == Specific - - - bash: | - ls -lah - cd $(modulePath) - export CNI_URL=https://$(ARTIFACT_STORAGE).blob.core.windows.net/acn-$(StorageID)/azure-vnet-cni-${{ parameters.clusterDefinitionCniBuildOS }}-amd64-${{ parameters.cniVersion }}${{ parameters.clusterDefinitionCniBuildExt }} - export CNI_TYPE=${{ parameters.clusterDefinitionCniTypeKey }} - echo CNI type is $CNI_TYPE - echo CNI_URL is $CNI_URL - echo Config: '${{ parameters.clusterDefinition }}' - cat '${{ parameters.clusterDefinition }}' - cat '${{ parameters.clusterDefinition }}' | jq --arg cnikey $CNI_TYPE --arg cniurl $CNI_URL '.properties.orchestratorProfile.kubernetesConfig[$cnikey]= $cniurl' > '${{ parameters.clusterDefinition }}'.tmp - cat '${{ parameters.clusterDefinition }}'.tmp | jq --arg ver ${{ parameters.cniVersion }} '.properties.orchestratorProfile.kubernetesConfig.azureCNIVersion = $ver' > '${{ parameters.clusterDefinition }}' - cat '${{ parameters.clusterDefinition }}' | jq --arg ver $IMAGE_REGISTRY/azure-npm:${{ parameters.npmVersion }} '.properties.orchestratorProfile.kubernetesConfig.addons[0].containers[0].image = $ver' > '${{ parameters.clusterDefinition }}'.tmp - if [ "${{ parameters.Name }}" == "windows_20_22_e2e" ]; then - if [[ -z $(WS2022_IMG_SUBSCRIPTION) || -z $(WS2022_IMG_VERSION) ]]; then - echo WS2022 Gallery/Image Subscription and Version not set, using defaults in cniWindows2022.json - else - echo Using WS2022 Gallery/Image Subscription: $(WS2022_IMG_SUBSCRIPTION) and Version: $(WS2022_IMG_VERSION) - cat '${{ parameters.clusterDefinition }}'.tmp | jq --arg subscription $(WS2022_IMG_SUBSCRIPTION) '.properties.windowsProfile.imageReference.subscriptionId = $subscription' > '${{ parameters.clusterDefinition }}' - cat '${{ parameters.clusterDefinition }}' | jq --arg version $(WS2022_IMG_VERSION) '.properties.windowsProfile.imageReference.version = $version' > '${{ parameters.clusterDefinition }}'.tmp - fi - fi - mv '${{ parameters.clusterDefinition }}'.tmp '${{ parameters.clusterDefinition }}' - echo "Running E2E tests against a cluster built with the following API model:" - cp ${{ parameters.clusterDefinition }} clusterDefinition.json - displayName: Configure AKS-Engine - - - task: AzureCLI@1 - inputs: - azureSubscription: $(AKS_ENGINE_SERVICE_CONNECTION) - scriptLocation: "inlineScript" - addSpnToEnvironment: true - workingDirectory: "$(modulePath)" - inlineScript: | - export CLIENT_ID=$servicePrincipalId - export CLIENT_SECRET=$servicePrincipalKey - export PATH=$PATH:'$(GOPATH)' - export CLUSTER_DEFINITION=./clusterDefinition.json - export ORCHESTRATOR=kubernetes - export CREATE_VNET=false - export TIMEOUT=20m - export TENANT_ID=$(AKS_ENGINE_TENANT_ID) - export SUBSCRIPTION_ID=$(AKS_ENGINE_SUBSCRIPTION_ID) - export CLEANUP_ON_EXIT=true - export CLEANUP_IF_FAIL=false - export REGIONS=$(AKS_ENGINE_REGION) - export IS_JENKINS=false - export DEBUG_CRASHING_PODS=true - export AZURE_CORE_ONLY_SHOW_ERRORS=True - RGNAME="kubernetes"$RANDOM - export RESOURCE_GROUP=$RGNAME - echo "##vso[task.setvariable variable=RESOURCE_GROUP]$RESOURCE_GROUP" - echo Cluster Def $CLUSTER_DEFINITION - cat $CLUSTER_DEFINITION - make test-kubernetes - name: DeployAKSEngine - displayName: Run AKS-Engine E2E Tests - retryCountOnTaskFailure: 0 - - - task: CopyFiles@2 - inputs: - sourceFolder: "$(modulePath)/_output" - targetFolder: $(Build.ArtifactStagingDirectory)/${{ parameters.name }} - condition: always() - - - task: PublishBuildArtifacts@1 - inputs: - artifactName: ${{ parameters.name }} - pathtoPublish: "$(Build.ArtifactStagingDirectory)/${{ parameters.name }}" - condition: always() diff --git a/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml b/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml index 10c33a15f7..5a80782427 100644 --- a/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml +++ b/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml @@ -30,10 +30,11 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | + set -e mkdir -p ~/.kube/ echo "Create AKS cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift byocni-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) OSSKU=${{ parameters.osSku }} + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks byocni-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) OSSKU=${{ parameters.osSku }} echo "Cluster successfully created" displayName: Create test cluster condition: succeeded() @@ -51,7 +52,7 @@ steps: sudo mv gsutil /usr/local/bin name: "installKubetest" displayName: "Set up Conformance Tests" - + - script: | ls -lah pwd @@ -123,9 +124,10 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | + set -e echo "Deleting cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) echo "Cluster and resources down" name: "Cleanupcluster" displayName: "Cleanup cluster" diff --git a/.pipelines/singletenancy/aks/e2e-job-template.yaml b/.pipelines/singletenancy/aks/e2e-job-template.yaml new file mode 100644 index 0000000000..b595699684 --- /dev/null +++ b/.pipelines/singletenancy/aks/e2e-job-template.yaml @@ -0,0 +1,88 @@ +parameters: + name: "" + displayName: "" + arch: "" + os: "" + clusterType: "" + clusterName: "" + vmSize: "" + k8sVersion: "" + windowsOsSku: "" + os_version: "" + scaleup: "" + +stages: + - stage: ${{ parameters.name }} + displayName: E2E - ${{ parameters.displayName }} + variables: + GOPATH: "$(Agent.TempDirectory)/go" # Go workspace path + GOBIN: "$(GOPATH)/bin" # Go binaries path + modulePath: "$(GOPATH)/src/github.com/Azure/azure-container-networking" + dropgzVersion: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.dropgzVersion'] ] + cniVersion: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.cniVersion'] ] + dependsOn: + - setup + - publish + jobs: + - job: ${{ parameters.name }} + displayName: Singletenancy AKS - (${{ parameters.name }}) + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + demands: + - agent.os -equals Linux + - Role -equals $(CUSTOM_E2E_ROLE) + steps: + - template: e2e-step-template.yaml + parameters: + name: ${{ parameters.name }} + clusterType: ${{ parameters.clusterType }} + clusterName: ${{ parameters.clusterName }} + vmSize: ${{ parameters.vmSize }} + arch: ${{ parameters.arch }} + os: ${{ parameters.os }} + k8sVersion: ${{ parameters.k8sVersion }} + windowsOsSku: ${{ parameters.windowsOsSku }} + os_version: ${{ parameters.os_version }} + version: $(dropgzVersion) + cniVersion: $(cniVersion) + scaleup: ${{ parameters.scaleup }} + + - template: ../../cni/k8s-e2e/k8s-e2e-job-template.yaml + parameters: + sub: $(AZURE_TEST_AGENT_SERVICE_CONNECTION) + clusterName: ${{ parameters.clusterName }} + os: ${{ parameters.os }} + datapath: true + dns: true + portforward: true + loadBalancer: true + hybridWin: true + service: true + hostport: true + dependsOn: ${{ parameters.name }} + + - job: cleanup + displayName: "Cleanup" + dependsOn: + - ${{ parameters.name }} + - "cni_k8se2e" + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + condition: always() + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(AZURE_TEST_AGENT_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -e + echo "Deleting cluster" + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) + echo "Cluster and resources down" + displayName: "Delete test cluster" + + diff --git a/.pipelines/singletenancy/aks/e2e-step-template.yaml b/.pipelines/singletenancy/aks/e2e-step-template.yaml new file mode 100644 index 0000000000..3bdab1d0f9 --- /dev/null +++ b/.pipelines/singletenancy/aks/e2e-step-template.yaml @@ -0,0 +1,87 @@ +parameters: + name: "" + clusterType: "" + clusterName: "" + nodeCount: "" + vmSize: "" + k8sVersion: "" + version: "" + os: "" + windowsOsSku: "" + cniVersion: "" + os_version: "" + scaleup: "" + +steps: + - bash: | + go version + go env + mkdir -p '$(GOBIN)' + mkdir -p '$(GOPATH)/pkg' + mkdir -p '$(modulePath)' + echo '##vso[task.prependpath]$(GOBIN)' + echo '##vso[task.prependpath]$(GOROOT)/bin' + name: "GoEnv" + displayName: "Set up the Go environment" + - task: AzureCLI@1 + inputs: + azureSubscription: $(AZURE_TEST_AGENT_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -e + mkdir -p ~/.kube/ + echo "Create AKS cluster" + echo "parameters ${{ parameters.windowsOsSku }}" + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks ${{ parameters.clusterType }} AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) K8S_VER=${{ parameters.k8sVersion }} VM_SIZE=${{ parameters.vmSize }} WINDOWS_OS_SKU=${{ parameters.windowsOsSku }} WINDOWS_VM_SKU=${{ parameters.vmSize }} WINDOWS_USERNAME=${WINDOWS_USERNAME} WINDOWS_PASSWORD=${WINDOWS_PASSWORD} + echo "Cluster successfully created" + displayName: Create test cluster + - script: | + echo "Upload CNI" + if [ "${{parameters.os}}" == "windows" ]; then + export DROP_GZ_URL=$( make cni-dropgz-test-image-name-and-tag OS='linux' ARCH=${{ parameters.arch }} CNI_DROPGZ_VERSION=${{ parameters.version }}) + envsubst < ./test/integration/manifests/cni/cni-installer-v1.yaml | kubectl apply -f - + kubectl rollout status daemonset/azure-cni -n kube-system + echo "Deploying on windows nodes" + export DROP_GZ_URL=$( make cni-dropgz-test-image-name-and-tag OS='windows' ARCH=${{ parameters.arch }} OS_VERSION=${{ parameters.os_version }} CNI_DROPGZ_VERSION=${{ parameters.version }}) + envsubst < ./test/integration/manifests/cni/cni-installer-v1-windows.yaml | kubectl apply -f - + kubectl rollout status daemonset/azure-cni-windows -n kube-system + else + export DROP_GZ_URL=$( make cni-dropgz-test-image-name-and-tag OS=${{ parameters.os }} ARCH=${{ parameters.arch }} CNI_DROPGZ_VERSION=${{ parameters.version }}) + envsubst < ./test/integration/manifests/cni/cni-installer-v1.yaml | kubectl apply -f - + kubectl rollout status daemonset/azure-cni -n kube-system + fi + name: "UploadCni" + displayName: "Upload CNI" + - task: AzureCLI@1 + inputs: + azureSubscription: $(AZURE_TEST_AGENT_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -e + clusterName=${{ parameters.clusterName }}-$(make revision) + echo "Restarting nodes" + for val in $(az vmss list -g MC_${clusterName}_${clusterName}_$(REGION_AKS_CLUSTER_TEST) --query "[].name" -o tsv); do + make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${clusterName} REGION=$(REGION_AKS_CLUSTER_TEST) VMSS_NAME=${val} + done + displayName: "Restart Nodes" + - script: | + kubectl get pods -A -o wide + echo "Deploying test pods" + cd test/integration/load + go test -count 1 -timeout 30m -tags load -run ^TestLoad$ -tags=load -iterations=2 -scaleup=${{ parameters.scaleup }} -os=${{ parameters.os }} + cd ../../.. + # Remove this once we have cniv1 support for validating the test cluster + echo "Validate State skipped for linux cniv1 for now" + if [ "${{parameters.os}}" == "windows" ]; then + make test-validate-state OS=${{ parameters.os }} + fi + kubectl delete ns load-test + displayName: "Validate State" + retryCountOnTaskFailure: 3 + + diff --git a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml index 1d26f9c46a..93d888894f 100644 --- a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml @@ -29,10 +29,11 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | + set -e mkdir -p ~/.kube/ echo "Create AKS Overlay cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) - make -C ./hack/swift overlay-byocni-up AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) + make -C ./hack/aks overlay-no-kube-proxy-up AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms echo "Cluster successfully created" displayName: Create Overlay cluster condition: succeeded() @@ -45,12 +46,17 @@ steps: sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl kubectl cluster-info kubectl get po -owide -A + if [ "$CILIUM_VERSION_TAG" = "cilium-nightly-pipeline" ]; then FILE_PATH=-nightly && echo "Running nightly"; fi echo "deploy Cilium ConfigMap" kubectl apply -f cilium/configmap.yaml - kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml - echo "install Cilium onto Overlay Cluster" - kubectl apply -f test/integration/manifests/cilium/cilium-agent - kubectl apply -f test/integration/manifests/cilium/cilium-operator + kubectl apply -f test/integration/manifests/cilium/cilium${FILE_PATH}-config.yaml + echo "install Cilium ${CILIUM_VERSION_TAG}" + # Passes Cilium image to daemonset and deployment + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/daemonset.yaml | kubectl apply -f - + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - + # Use different file directories for nightly and current cilium version + kubectl apply -f test/integration/manifests/cilium/cilium${FILE_PATH}-agent + kubectl apply -f test/integration/manifests/cilium/cilium${FILE_PATH}-operator kubectl get po -owide -A name: "installCilium" displayName: "Install Cilium on AKS Overlay" @@ -91,7 +97,14 @@ steps: kubectl create configmap config-reconcile.yaml cd ../../../.. kubectl get po -owide -A - sudo -E env "PATH=$PATH" make test-integration CNS_VERSION=$(make cns-version) CNI_DROPGZ_VERSION=$(make cni-dropgz-version) INSTALL_CNS=true INSTALL_OVERLAY=true TEST_DROPGZ=${{ parameters.testDropgz }} + # Nightly does not build images per commit. Will use existing image. + if [ "$CILIUM_VERSION_TAG" = "cilium-nightly-pipeline" ] + then + CNS=$(CNS_VERSION) DROPGZ=$(DROP_GZ_VERSION) && echo "Running nightly" + else + CNS=$(make cns-version) DROPGZ=$(make cni-dropgz-version) + fi + sudo -E env "PATH=$PATH" make test-integration CNS_VERSION=${CNS} CNI_DROPGZ_VERSION=${DROPGZ} INSTALL_CNS=true INSTALL_OVERLAY=true TEST_DROPGZ=${{ parameters.testDropgz }} retryCountOnTaskFailure: 3 name: "aziliumTest" displayName: "Run Azilium E2E on AKS Overlay" @@ -116,6 +129,15 @@ steps: pathtoPublish: "$(Build.ArtifactStagingDirectory)/test-output" condition: always() + - script: | + kubectl get pods -A + echo "Waiting < 2 minutes for cilium to be ready" + # Ensure Cilium is ready Xm\Xs + cilium status --wait --wait-duration 2m + retryCountOnTaskFailure: 3 + name: "CiliumStatus" + displayName: "Cilium Status" + - script: | echo "Run Service Conformance E2E" export PATH=${PATH}:/usr/local/bin/gsutil @@ -134,13 +156,12 @@ steps: - script: | echo "validate pod IP assignment and check systemd-networkd restart" - kubectl apply -f hack/manifests/hostprocess.yaml kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state echo "delete cilium connectivity test resources and re-validate state" kubectl delete ns cilium-test kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state name: "validatePods" displayName: "Validate Pods" @@ -180,9 +201,10 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | + set -e echo "Deleting cluster" - make -C ./hack/swift azcfg AZCLI=az - make -C ./hack/swift down SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az + make -C ./hack/aks down SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) echo "Cluster and resources down" name: "Cleanupcluster" displayName: "Cleanup cluster" diff --git a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml index b030672887..ddb480793f 100644 --- a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml @@ -29,10 +29,11 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | + set -e mkdir -p ~/.kube/ echo "Create AKS cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift byocni-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks swift-no-kube-proxy-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms echo "Cluster successfully created" displayName: Create test cluster condition: succeeded() @@ -48,7 +49,9 @@ steps: echo "deploy Cilium ConfigMap" kubectl apply -f cilium/configmap.yaml kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml - echo "install Cilium" + echo "install Cilium ${CILIUM_VERSION_TAG}" + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/daemonset.yaml | kubectl apply -f - + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - kubectl apply -f test/integration/manifests/cilium/cilium-agent kubectl apply -f test/integration/manifests/cilium/cilium-operator kubectl get po -owide -A @@ -110,6 +113,15 @@ steps: pathtoPublish: "$(Build.ArtifactStagingDirectory)/test-output" condition: always() + - script: | + kubectl get pods -A + echo "Waiting < 2 minutes for cilium to be ready" + # Ensure Cilium is ready Xm\Xs + cilium status --wait --wait-duration 2m + retryCountOnTaskFailure: 3 + name: "CiliumStatus" + displayName: "Cilium Status" + - script: | echo "Run Service Conformance E2E" export PATH=${PATH}:/usr/local/bin/gsutil @@ -129,13 +141,12 @@ steps: - script: | echo "validate pod IP assignment and check systemd-networkd restart" - kubectl apply -f hack/manifests/hostprocess.yaml kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state echo "delete cilium connectivity test resources and re-validate state" kubectl delete ns cilium-test kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state name: "validatePods" displayName: "Validate Pods" @@ -175,9 +186,10 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | + set -e echo "Deleting cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) echo "Cluster and resources down" name: "Cleanupcluster" displayName: "Cleanup cluster" diff --git a/.pipelines/submodules-pipeline.yaml b/.pipelines/submodules-pipeline.yaml index 730c377240..b675c3ac0c 100644 --- a/.pipelines/submodules-pipeline.yaml +++ b/.pipelines/submodules-pipeline.yaml @@ -3,27 +3,48 @@ pr: include: - master paths: - include: - - "zapai/*" - - "azure-ipam/*" - - "dropgz/*" exclude: - - "*" + - ".devcontainer" + - ".hooks" + - ".vscode" + - ".github" + - cnm + - crd + - debug + - docs + - ebtables + - examples + - nmagent + - npm + - proto + - pkgerrlint + - server trigger: paths: - include: - - "zapai/*" - - "azure-ipam/*" - - "dropgz/*" exclude: - - "*" + - ".devcontainer" + - ".hooks" + - ".vscode" + - ".github" + - cnm + - crd + - debug + - docs + - ebtables + - examples + - nmagent + - npm + - pkgerrlint + - proto + - server + tags: include: - "zapai/*" - "azure-ipam/*" - "dropgz/*" - exclude: + - "cni/*" - "v*" stages: @@ -83,7 +104,7 @@ stages: name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - script: | - make all-binaries-platforms + make all-binaries-platforms name: "BuildAllPlatformBinaries" displayName: "Build all platform binaries" @@ -120,8 +141,6 @@ stages: displayName: Create artifact storage container condition: succeeded() - - publish: ./test/apimodels/ - artifact: clusterdefinitions - stage: containerize displayName: Build Images @@ -249,7 +268,7 @@ stages: pipelineBuildImage: "$(BUILD_IMAGE)" testDropgz: true clusterName: "submodules-ciliume2e" - + - template: singletenancy/cilium-overlay/cilium-overlay-e2e-job-template.yaml parameters: name: "cilium_overlay_e2e" diff --git a/Makefile b/Makefile index 8de1d98a41..d2e9cb8b3c 100644 --- a/Makefile +++ b/Makefile @@ -245,6 +245,7 @@ endif ## Image name definitions. ACNCLI_IMAGE = acncli +CNI_PLUGIN_IMAGE = azure-cni-plugin CNI_DROPGZ_IMAGE = cni-dropgz CNI_DROPGZ_TEST_IMAGE = cni-dropgz-test CNS_IMAGE = azure-cns @@ -252,6 +253,7 @@ NPM_IMAGE = azure-npm ## Image platform tags. ACNCLI_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(ACN_VERSION) +CNI_PLUGIN_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(CNI_VERSION) CNI_DROPGZ_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(CNI_DROPGZ_VERSION) CNI_DROPGZ_TEST_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(CNI_DROPGZ_TEST_VERSION) CNS_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(CNS_VERSION) @@ -339,7 +341,7 @@ cni-dropgz-image-name-and-tag: # util target to print the CNI dropgz image name cni-dropgz-image: ## build cni-dropgz container image. $(MAKE) container \ DOCKERFILE=dropgz/build/$(OS).Dockerfile \ - EXTRA_BUILD_ARGS='--build-arg OS=$(OS) --build-arg ARCH=$(ARCH)' \ + EXTRA_BUILD_ARGS='--build-arg OS=$(OS) --build-arg ARCH=$(ARCH) --build-arg OS_VERSION=$(OS_VERSION)' \ IMAGE=$(CNI_DROPGZ_IMAGE) \ TAG=$(CNI_DROPGZ_PLATFORM_TAG) @@ -363,8 +365,8 @@ cni-dropgz-test-image-name-and-tag: # util target to print the CNI dropgz test i cni-dropgz-test-image: ## build cni-dropgz-test container image. $(MAKE) container \ - DOCKERFILE=dropgz/build/cniTest.Dockerfile \ - EXTRA_BUILD_ARGS='--build-arg OS=$(OS)' \ + DOCKERFILE=dropgz/build/cniTest_$(OS).Dockerfile \ + EXTRA_BUILD_ARGS='--build-arg OS=$(OS) --build-arg ARCH=$(ARCH) --build-arg OS_VERSION=$(OS_VERSION)' \ IMAGE=$(CNI_DROPGZ_TEST_IMAGE) \ TAG=$(CNI_DROPGZ_TEST_PLATFORM_TAG) @@ -436,6 +438,21 @@ npm-image-pull: ## pull cns container image. IMAGE=$(NPM_IMAGE) \ TAG=$(NPM_PLATFORM_TAG) +# cni-plugin - Specifically used for windows clusters, will be removed once we have Dropgz for windows +cni-plugin-image-name-and-tag: # util target to print the CNI plugin image name and tag. + @echo $(IMAGE_REGISTRY)/$(CNI_PLUGIN_IMAGE):$(CNI_PLUGIN_PLATFORM_TAG) + +cni-plugin-image: ## build cni plugin container image. + $(MAKE) container \ + DOCKERFILE=cni/build/$(OS).Dockerfile \ + IMAGE=$(CNI_PLUGIN_IMAGE) \ + EXTRA_BUILD_ARGS='--build-arg CNI_AI_PATH=$(CNI_AI_PATH) --build-arg CNI_AI_ID=$(CNI_AI_ID) --build-arg OS_VERSION=$(OS_VERSION)' \ + PLATFORM=$(PLATFORM) \ + TAG=$(CNI_PLUGIN_PLATFORM_TAG) \ + OS=$(OS) \ + ARCH=$(ARCH) \ + OS_VERSION=$(OS_VERSION) + ## Legacy @@ -701,6 +718,8 @@ workspace: ## Set up the Go workspace. ##@ Test COVER_PKG ?= . +#Restart case is used for cni load test pipeline for restarting the nodes cluster. +RESTART_CASE ?= false # COVER_FILTER omits folders with all files tagged with one of 'unit', '!ignore_uncovered', or '!ignore_autogenerated' test-all: ## run all unit tests. @@ -713,6 +732,10 @@ test-integration: ## run all integration tests. CNS_VERSION=$(CNS_VERSION) \ go test -mod=readonly -buildvcs=false -timeout 1h -coverpkg=./... -race -covermode atomic -coverprofile=coverage.out -tags=integration ./test/integration... +test-validate-state: + cd test/integration/load && go test -count 1 -timeout 30m -tags load -run ^TestValidateState -tags=load -restart-case=$(RESTART_CASE) -os=$(OS) + cd ../../.. + test-cyclonus: ## run the cyclonus test for npm. cd test/cyclonus && bash ./test-cyclonus.sh cd .. diff --git a/cni/build/windows.Dockerfile b/cni/build/windows.Dockerfile new file mode 100644 index 0000000000..285382dd37 --- /dev/null +++ b/cni/build/windows.Dockerfile @@ -0,0 +1,21 @@ +ARG OS_VERSION +FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.20 AS builder +ARG VERSION +ARG CNI_AI_PATH +ARG CNI_AI_ID +WORKDIR /azure-container-networking +COPY . . +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet.exe -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/network/plugin/main.go +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet-telemetry.exe -trimpath -ldflags "-X main.version="$VERSION" -X "$CNI_AI_PATH"="$CNI_AI_ID"" -gcflags="-dwarflocationlists=true" cni/telemetry/service/telemetrymain.go +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet-ipam.exe -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/ipam/plugin/main.go + +FROM mcr.microsoft.com/windows/servercore:${OS_VERSION} +SHELL ["powershell", "-command"] +COPY --from=builder /azure-container-networking/azure-vnet.exe azure-vnet.exe +COPY --from=builder /azure-container-networking/azure-vnet-telemetry.exe azure-vnet-telemetry.exe +COPY --from=builder /azure-container-networking/telemetry/azure-vnet-telemetry.config azure-vnet-telemetry.config +COPY --from=builder /azure-container-networking/azure-vnet-ipam.exe azure-vnet-ipam.exe + +# This would be replaced with dropgz version of windows. +COPY --from=builder /azure-container-networking/hack/scripts/updatecni.ps1 updatecni.ps1 +ENTRYPOINT ["powershell.exe", ".\\updatecni.ps1"] diff --git a/cns/configuration/cns_config.json b/cns/configuration/cns_config.json index 23ec3116ad..2efba1def7 100644 --- a/cns/configuration/cns_config.json +++ b/cns/configuration/cns_config.json @@ -29,5 +29,6 @@ "MSISettings": { "ResourceID": "" }, - "PopulateHomeAzCacheRetryIntervalSecs": 15 + "PopulateHomeAzCacheRetryIntervalSecs": 15, + "MellanoxMonitorIntervalSecs": 30 } diff --git a/cns/configuration/configuration.go b/cns/configuration/configuration.go index 8366b6cd4a..2753b4f1b5 100644 --- a/cns/configuration/configuration.go +++ b/cns/configuration/configuration.go @@ -43,6 +43,7 @@ type CNSConfig struct { EnableCNIConflistGeneration bool CNIConflistFilepath string PopulateHomeAzCacheRetryIntervalSecs int + MellanoxMonitorIntervalSecs int } type TelemetrySettings struct { diff --git a/cns/service/main.go b/cns/service/main.go index 77e41b1348..56c4204fd5 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -713,6 +713,17 @@ func main() { return } + // We are only setting the PriorityVLANTag in 'cns.Direct' mode, because it neatly maps today, to 'isUsingMultitenancy' + // In the future, we would want to have a better CNS flag, to explicitly say, this CNS is using multitenancy + if config.ChannelMode == cns.Direct { + // Set Mellanox adapter's PriorityVLANTag value to 3 if adapter exists + // reg key value for PriorityVLANTag = 3 --> Packet priority and VLAN enabled + // for more details goto https://docs.nvidia.com/networking/display/winof2v230/Configuring+the+Driver+Registry+Keys#ConfiguringtheDriverRegistryKeys-GeneralRegistryKeysGeneralRegistryKeys + if platform.HasMellanoxAdapter() { + go platform.MonitorAndSetMellanoxRegKeyPriorityVLANTag(rootCtx, cnsconfig.MellanoxMonitorIntervalSecs) + } + } + // Initialze state in if CNS is running in CRD mode // State must be initialized before we start HTTPRestService if config.ChannelMode == cns.CRD { diff --git a/dropgz/build/cniTest.Dockerfile b/dropgz/build/cniTest_linux.Dockerfile similarity index 74% rename from dropgz/build/cniTest.Dockerfile rename to dropgz/build/cniTest_linux.Dockerfile index 21722963ce..c4f460abd2 100644 --- a/dropgz/build/cniTest.Dockerfile +++ b/dropgz/build/cniTest_linux.Dockerfile @@ -9,6 +9,8 @@ ARG VERSION WORKDIR /azure-container-networking COPY . . RUN CGO_ENABLED=0 go build -a -o bin/azure-vnet -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/network/plugin/main.go +RUN CGO_ENABLED=0 go build -a -o bin/azure-vnet-telemetry -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/telemetry/service/telemetrymain.go +RUN CGO_ENABLED=0 go build -a -o bin/azure-vnet-ipam -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/ipam/plugin/main.go FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS compressor ARG OS @@ -16,9 +18,12 @@ WORKDIR /dropgz COPY dropgz . COPY --from=azure-ipam /azure-ipam/*.conflist pkg/embed/fs COPY --from=azure-ipam /azure-ipam/bin/* pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/cni/azure-$OS.conflist pkg/embed/fs/azure.conflist COPY --from=azure-vnet /azure-container-networking/cni/azure-$OS-swift.conflist pkg/embed/fs/azure-swift.conflist COPY --from=azure-vnet /azure-container-networking/cni/azure-$OS-swift-overlay.conflist pkg/embed/fs/azure-swift-overlay.conflist COPY --from=azure-vnet /azure-container-networking/bin/* pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/telemetry/azure-vnet-telemetry.config pkg/embed/fs + RUN cd pkg/embed/fs/ && sha256sum * > sum.txt RUN gzip --verbose --best --recursive pkg/embed/fs && for f in pkg/embed/fs/*.gz; do mv -- "$f" "${f%%.gz}"; done diff --git a/dropgz/build/cniTest_windows.Dockerfile b/dropgz/build/cniTest_windows.Dockerfile new file mode 100644 index 0000000000..874d46b95d --- /dev/null +++ b/dropgz/build/cniTest_windows.Dockerfile @@ -0,0 +1,30 @@ +ARG ARCH +ARG OS_VERSION +FROM --platform=linux/${ARCH} mcr.microsoft.com/oss/go/microsoft/golang:1.20 AS azure-vnet +ARG VERSION +WORKDIR /azure-container-networking +COPY . . +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet.exe -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/network/plugin/main.go +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet-telemetry.exe -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/telemetry/service/telemetrymain.go +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet-ipam.exe -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/ipam/plugin/main.go + +FROM --platform=linux/${ARCH} mcr.microsoft.com/cbl-mariner/base/core:2.0 AS compressor +ARG OS +WORKDIR /dropgz +COPY dropgz . +COPY --from=azure-vnet /azure-container-networking/azure-vnet.exe pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/azure-vnet-telemetry.exe pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/azure-vnet-ipam.exe pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/telemetry/azure-vnet-telemetry.config pkg/embed/fs +RUN cd pkg/embed/fs/ && sha256sum * > sum.txt +RUN gzip --verbose --best --recursive pkg/embed/fs && for f in pkg/embed/fs/*.gz; do mv -- "$f" "${f%%.gz}"; done + +FROM --platform=linux/${ARCH} mcr.microsoft.com/oss/go/microsoft/golang:1.20 AS dropgz +ARG VERSION +WORKDIR /dropgz +COPY --from=compressor /dropgz . +RUN GOOS=windows CGO_ENABLED=0 go build -a -o bin/dropgz.exe -trimpath -ldflags "-X github.com/Azure/azure-container-networking/dropgz/internal/buildinfo.Version="$VERSION"" -gcflags="-dwarflocationlists=true" main.go + +FROM mcr.microsoft.com/windows/nanoserver:${OS_VERSION} +COPY --from=dropgz /dropgz/bin/dropgz.exe dropgz.exe +ENTRYPOINT [ "dropgz.exe" ] diff --git a/dropgz/build/windows.Dockerfile b/dropgz/build/windows.Dockerfile new file mode 100644 index 0000000000..d9cfdab6cb --- /dev/null +++ b/dropgz/build/windows.Dockerfile @@ -0,0 +1,36 @@ +ARG ARCH +ARG OS_VERSION +FROM --platform=linux/${ARCH} mcr.microsoft.com/cbl-mariner/base/core:2.0 AS tar +RUN tdnf install -y tar +RUN tdnf install -y unzip +RUN tdnf upgrade -y && tdnf install -y ca-certificates + +FROM tar AS azure-vnet +ARG AZCNI_VERSION=v1.5.4 +ARG VERSION +ARG OS +ARG ARCH +WORKDIR /azure-container-networking +COPY . . +RUN curl -LO --cacert /etc/ssl/certs/ca-certificates.crt https://github.com/Azure/azure-container-networking/releases/download/$AZCNI_VERSION/azure-vnet-cni-$OS-$ARCH-$AZCNI_VERSION.zip && unzip -o azure-vnet-cni-$OS-$ARCH-$AZCNI_VERSION.zip + +FROM --platform=linux/${ARCH} mcr.microsoft.com/cbl-mariner/base/core:2.0 AS compressor +ARG OS +WORKDIR /dropgz +COPY dropgz . +COPY --from=azure-vnet /azure-container-networking/azure-vnet.exe pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/azure-vnet-telemetry.exe pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/azure-vnet-ipam.exe pkg/embed/fs +COPY --from=azure-vnet /azure-container-networking/azure-vnet-telemetry.config pkg/embed/fs +RUN cd pkg/embed/fs/ && sha256sum * > sum.txt +RUN gzip --verbose --best --recursive pkg/embed/fs && for f in pkg/embed/fs/*.gz; do mv -- "$f" "${f%%.gz}"; done + +FROM --platform=linux/${ARCH} mcr.microsoft.com/oss/go/microsoft/golang:1.20 AS dropgz +ARG VERSION +WORKDIR /dropgz +COPY --from=compressor /dropgz . +RUN GOOS=windows CGO_ENABLED=0 go build -a -o bin/dropgz.exe -trimpath -ldflags "-X github.com/Azure/azure-container-networking/dropgz/internal/buildinfo.Version="$VERSION"" -gcflags="-dwarflocationlists=true" main.go + +FROM mcr.microsoft.com/windows/nanoserver:${OS_VERSION} +COPY --from=dropgz /dropgz/bin/dropgz.exe dropgz.exe +ENTRYPOINT [ "dropgz.exe" ] diff --git a/dropgz/pkg/embed/payload.go b/dropgz/pkg/embed/payload.go index b36f8cb479..fd69d56eb6 100644 --- a/dropgz/pkg/embed/payload.go +++ b/dropgz/pkg/embed/payload.go @@ -7,8 +7,8 @@ import ( "io" "io/fs" "os" + "path" "path/filepath" - "strings" "github.com/pkg/errors" "go.uber.org/zap" @@ -16,13 +16,13 @@ import ( const ( cwd = "fs" - pathPrefix = cwd + string(filepath.Separator) oldFileSuffix = ".old" ) var ErrArgsMismatched = errors.New("mismatched argument count") // embedfs contains the embedded files for deployment, as a read-only FileSystem containing only "embedfs/". +// //nolint:typecheck // dir is populated at build. //go:embed fs var embedfs embed.FS @@ -36,7 +36,8 @@ func Contents() ([]string, error) { if d.IsDir() { return nil } - contents = append(contents, strings.TrimPrefix(path, pathPrefix)) + _, filename := filepath.Split(path) + contents = append(contents, filename) return nil }) if err != nil { @@ -69,10 +70,10 @@ func (c *compoundReadCloser) Close() error { return nil } -func Extract(path string) (*compoundReadCloser, error) { - f, err := embedfs.Open(filepath.Join(cwd, path)) +func Extract(p string) (*compoundReadCloser, error) { + f, err := embedfs.Open(path.Join(cwd, p)) if err != nil { - return nil, errors.Wrapf(err, "failed to open file %s", path) + return nil, errors.Wrapf(err, "failed to open file %s", p) } r, err := gzip.NewReader(bufio.NewReader(f)) if err != nil { diff --git a/hack/swift/Makefile b/hack/aks/Makefile similarity index 68% rename from hack/swift/Makefile rename to hack/aks/Makefile index 58d185d250..14e2f1e0f7 100644 --- a/hack/swift/Makefile +++ b/hack/aks/Makefile @@ -5,13 +5,16 @@ KUBECFG = $(HOME)/.kube SSH = $(HOME)/.ssh AZCFG = $(HOME)/.azure AZIMG = mcr.microsoft.com/azure-cli -AZCLI ?= docker run --rm -v $(AZCFG):/root/.azure -v $(KUBECFG):/root/.kube -v $(SSH):/root/.ssh $(AZIMG) az +AZCLI ?= docker run --rm -v $(AZCFG):/root/.azure -v $(KUBECFG):/root/.kube -v $(SSH):/root/.ssh -v $(PWD):/root/tmpsrc $(AZIMG) az # overrideable defaults -REGION ?= westus2 -OS_SKU ?= Ubuntu -VM_SIZE ?= Standard_B2s -NODE_COUNT ?= 2 +REGION ?= westus2 +OS_SKU ?= Ubuntu +WINDOWS_OS_SKU ?= Windows2022 +VM_SIZE ?= Standard_B2s +NODE_COUNT ?= 2 +K8S_VER ?= 1.25 # Used only for ubuntu 18 as K8S 1.24.9, as K8S > 1.25 have Ubuntu 22 +WINDOWS_VM_SKU ?= Standard_B2s # overrideable variables SUB ?= $(AZURE_SUBSCRIPTION) @@ -33,7 +36,7 @@ azlogin: azcfg: @$(AZCLI) extension add --name aks-preview --yes - @$(AZCLI) extension update --name aks-preview + @$(AZCLI) extension update --name aks-preview set-kubeconf: ## Adds the kubeconf for $CLUSTER $(AZCLI) aks get-credentials -n $(CLUSTER) -g $(GROUP) @@ -61,7 +64,7 @@ vars: ## Show the input vars configured for the cluster commands ##@ SWIFT Infra -rg-up: ## Create resource group +rg-up: ## Create resource group @$(AZCLI) group create --location $(REGION) --name $(GROUP) rg-down: ## Delete resource group @@ -96,6 +99,20 @@ overlay-byocni-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster --yes @$(MAKE) set-kubeconf +overlay-no-kube-proxy-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster without kube-proxy for Cilium + $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ + --node-count $(NODE_COUNT) \ + --node-vm-size $(VM_SIZE) \ + --load-balancer-sku basic \ + --network-plugin none \ + --network-plugin-mode overlay \ + --pod-cidr 192.168.0.0/16 \ + --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ + --no-ssh-key \ + --kube-proxy-config ./kube-proxy.json \ + --yes + @$(MAKE) set-kubeconf + overlay-cilium-up: rg-up overlay-net-up ## Brings up an Overlay AzCNI cluster $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ --node-count $(NODE_COUNT) \ @@ -136,6 +153,20 @@ swift-byocni-up: rg-up swift-net-up ## Bring up a SWIFT BYO CNI cluster --yes @$(MAKE) set-kubeconf +swift-no-kube-proxy-up: rg-up swift-net-up ## Bring up a SWIFT BYO CNI cluster without kube-proxy for Cilium + $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ + --node-count $(NODE_COUNT) \ + --node-vm-size $(VM_SIZE) \ + --load-balancer-sku basic \ + --network-plugin none \ + --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ + --pod-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/podnet \ + --no-ssh-key \ + --os-sku $(OS_SKU) \ + --kube-proxy-config ./kube-proxy.json \ + --yes + @$(MAKE) set-kubeconf + swift-cilium-up: rg-up swift-net-up ## Bring up a SWIFT Cilium cluster $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ --node-count $(NODE_COUNT) \ @@ -162,6 +193,41 @@ swift-up: rg-up swift-net-up ## Bring up a SWIFT AzCNI cluster --yes @$(MAKE) set-kubeconf +windows-cniv1-up: rg-up overlay-net-up ## Bring up a Windows CNIv1 cluster + $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ + --node-count $(NODE_COUNT) \ + --node-vm-size $(VM_SIZE) \ + --network-plugin azure \ + --windows-admin-password $(WINDOWS_PASSWORD) \ + --windows-admin-username $(WINDOWS_USERNAME) \ + --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ + --no-ssh-key \ + --yes + + $(AZCLI) aks nodepool add --resource-group $(GROUP) --cluster-name $(CLUSTER) \ + --os-type Windows \ + --os-sku $(WINDOWS_OS_SKU) \ + --max-pods 250 \ + --name npwin \ + --node-count $(NODE_COUNT) \ + -s $(WINDOWS_VM_SKU) + + @$(MAKE) set-kubeconf + +linux-cniv1-up: rg-up overlay-net-up + $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ + --node-count $(NODE_COUNT) \ + --node-vm-size $(VM_SIZE) \ + --max-pods 250 \ + --network-plugin azure \ + --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ + --kubernetes-version $(K8S_VER) \ + --os-sku $(OS_SKU) \ + --no-ssh-key \ + --yes + + @$(MAKE) set-kubeconf + down: ## Delete the cluster $(AZCLI) aks delete -g $(GROUP) -n $(CLUSTER) --yes @$(MAKE) unset-kubeconf diff --git a/hack/swift/README.md b/hack/aks/README.md similarity index 95% rename from hack/swift/README.md rename to hack/aks/README.md index 40069d88a3..3a4e80e4f0 100644 --- a/hack/swift/README.md +++ b/hack/aks/README.md @@ -28,6 +28,7 @@ AKS Clusters swift-byocni-up Bring up a SWIFT BYO CNI cluster swift-cilium-up Bring up a SWIFT Cilium cluster swift-up Bring up a SWIFT AzCNI cluster + windows-cniv1-up Bring up a Windows AzCNIv1 cluster down Delete the cluster vmss-restart Restart the nodes of the cluster ``` diff --git a/hack/aks/kube-proxy.json b/hack/aks/kube-proxy.json new file mode 100644 index 0000000000..68bf56cad8 --- /dev/null +++ b/hack/aks/kube-proxy.json @@ -0,0 +1,10 @@ +{ + "enabled": false, + "mode": "IPVS", + "ipvsConfig": { + "scheduler": "LeastConnection", + "TCPTimeoutSeconds": 900, + "TCPFINTimeoutSeconds": 120, + "UDPTimeoutSeconds": 300 + } +} diff --git a/hack/scripts/scale_deployment.sh b/hack/scripts/scale_deployment.sh deleted file mode 100644 index ceb715057d..0000000000 --- a/hack/scripts/scale_deployment.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash -set -ex -total_num_of_run=5 -scale_up_of_pods=2400 -scale_down_pods=1 - -function help() -{ - echo "Scale deployment based on the parameters." - echo "By default script will repeat the process of scale up/down" - echo - echo "Syntax: scale [-h|n|u|s|c|r]" - echo "options:" - echo "h Print this help." - echo "n Number of times the scale down/scale up task should run." - echo "u Number of pods to be scaled up." - echo "s Scale the pods single time. Accepted Values: true, default : false" - echo "c Check deployment status. Accepted Values: true, default : false" - echo -} - -function check_deployment() { - available=-1 - replicas="$1" - while [ "${available}" -ne "${replicas}" ]; do - sleep 5s - current_available=$(kubectl get deployment container -o "jsonpath={.status.availableReplicas}" ) - if [ "$current_available" != '' ]; then - available=$current_available - fi - echo "available replicas: ${available}" - done - echo "deployment complete." -} - -function scale_deployment() -{ - desired_replicas=$1 - kubectl scale deployment container --replicas "$desired_replicas" - echo "Scaled the deployment to $desired_replicas" -} - -function repeat_deployment() { - echo "Total num of run $total_num_of_run" - for ((i=1; i <= total_num_of_run; i++)) - do - echo "Current Run: $i" - echo "Scaling down pods to : $scale_down_pods" - scale_deployment $scale_down_pods - check_deployment $scale_down_pods - echo "Scaling pods to : $scale_up_of_pods" - scale_deployment "$scale_up_of_pods" - check_deployment "$scale_up_of_pods" - done -} - -while getopts ":h:n:u:sc" option; do - case $option in - h) help - exit;; - n) total_num_of_run=$OPTARG;; - u) scale_up_of_pods=$OPTARG;; - s) echo "Scale deployment" - scale_deployment "$scale_up_of_pods";; - c) echo "Check deployment" - check_deployment "$scale_up_of_pods";; - \?) echo "Error: Invalid option" - exit;; - esac -done - -if [ "$total_num_of_run" -gt 0 ]; then - repeat_deployment -fi diff --git a/hack/scripts/updatecni.ps1 b/hack/scripts/updatecni.ps1 new file mode 100644 index 0000000000..630c5fbbc7 --- /dev/null +++ b/hack/scripts/updatecni.ps1 @@ -0,0 +1,57 @@ +Write-Host $env:CONTAINER_SANDBOX_MOUNT_POINT +$sourceCNI = $env:CONTAINER_SANDBOX_MOUNT_POINT + "azure-vnet.exe" +$sourceIpam = $env:CONTAINER_SANDBOX_MOUNT_POINT + "azure-vnet-ipam.exe" +$sourceTelemetry = $env:CONTAINER_SANDBOX_MOUNT_POINT + "azure-vnet-telemetry.exe" +$sourceTelemetryConfig = $env:CONTAINER_SANDBOX_MOUNT_POINT + "azure-vnet-telemetry.config" + +$sourceCNIVersion = & "$sourceCNI" -v +$currentVersion = "" +$sourceTelemetryVersion = & "$sourceTelemetry" -v +$currentTelemetryVersion = "" + +$cniExists = Test-Path "C:\k\azurecni\bin\azure-vnet.exe" +$telemetryExists = Test-Path "C:\k\azurecni\bin\azure-vnet-telemetry.exe" + +Write-Host "Source $sourceCNIVersion" +Write-Host "Source Telemetry $sourceTelemetryVersion" + +if ($cniExists) { + $currentVersion = & "C:\k\azurecni\bin\azure-vnet.exe" -v +} + +if($telemetryExists){ + $currentTelemetryVersion = & "C:\k\azurecni\bin\azure-vnet-telemetry.exe" -v +} + + +Write-Host "Current Host $currentVersion" +Write-Host "Current Telemetry $currentTelemetryVersion" + +## check telemetry was already installed so not to get stuck in a infinite loop of rebooting and killing the process +if ($currentTelemetryVersion -ne $sourceTelemetryVersion){ + $processes = Get-Process -Name azure-vnet-telemetry -ErrorAction SilentlyContinue + for ($i = 0; $i -lt $processes.Count; $i++) { + Write-Host "Killing azure-vnet-telemetry process..." + $processes[$i].Kill() + } + Write-Host "copying azure-vnet-telemetry to windows node..." + Remove-Item "C:\k\azurecni\bin\azure-vnet-telemetry.exe" + Copy-Item $sourceTelemetry -Destination "C:\k\azurecni\bin" + + Write-Host "copying azure-vnet-telemetry.config to windows node..." + Remove-Item "C:\k\azurecni\bin\azure-vnet-telemetry.config" + Copy-Item $sourceTelemetryConfig -Destination "C:\k\azurecni\bin" +} + +## check CNI was already installed so not to get stuck in a infinite loop of rebooting +if ($currentVersion -ne $sourceCNIVersion){ + Write-Host "copying azure-vnet to windows node..." + Remove-Item "C:\k\azurecni\bin\azure-vnet.exe" + Copy-Item $sourceCNI -Destination "C:\k\azurecni\bin" + + Write-Host "copying azure-vnet-ipam to windows node..." + Remove-Item "C:\k\azurecni\bin\azure-vnet-ipam.exe" + Copy-Item $sourceIpam -Destination "C:\k\azurecni\bin" +} + +Start-Sleep -s 1000 diff --git a/hack/scripts/validate_state.sh b/hack/scripts/validate_state.sh deleted file mode 100644 index 8faf55f3a3..0000000000 --- a/hack/scripts/validate_state.sh +++ /dev/null @@ -1,138 +0,0 @@ -#!/bin/bash -function find_in_array() { - for i in $1 - do - if [ "$i" == "$2" ] ; then - return 0 - fi - done - return 1 -} - -for node in $(kubectl get nodes -o name); -do - echo "Current : $node" - node_name="${node##*/}" - node_ip=$(kubectl get "$node" -o jsonpath='{$.status.addresses[?(@.type=="InternalIP")].address}') - echo "Node internal ip: $node_ip" - # Check pod count after restarting nodes, statefile does not exist after restart - echo "checking whether the node has any pods deployed to it or not" - pod_count=$(kubectl get pods -A -o wide | grep "$node_name" -c) - if [[ $pod_count -eq 0 ]]; then - echo "Skipping validation for this node. No pods were deployed after the restart, so no statefile exists" - continue - fi - privileged_pod=$(kubectl get pods -n kube-system -l app=privileged-daemonset -o wide | grep "$node_name" | awk '{print $1}') - echo "privileged pod : $privileged_pod" - if [ "$privileged_pod" == '' ]; then - kubectl describe daemonset privileged-daemonset -n kube-system - exit 1 - fi - while ! [ -s "azure_endpoints.json" ] - do - echo "trying to get the azure_endpoints" - kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "cat /var/run/azure-cns/azure-endpoints.json" > azure_endpoints.json - sleep 10 - done - - cilium_agent=$(kubectl get pod -l k8s-app=cilium -n kube-system -o wide | grep "$node_name" | awk '{print $1}') - echo "cilium agent : $cilium_agent" - - while ! [ -s "cilium_endpoints.json" ] - do - echo "trying to get the cilium_endpoints" - kubectl exec -i "$cilium_agent" -n kube-system -- bash -c "cilium endpoint list -o json" > cilium_endpoints.json - sleep 10 - done - - cns_pod=$(kubectl get pod -l k8s-app=azure-cns -n kube-system -o wide | grep "$node_name" | awk '{print $1}') - echo "azure-cns pod : $cns_pod" - - while ! [ -s "cns_endpoints.json" ] - do - echo "trying to get the cns_endpoints" - kubectl exec -it "$cns_pod" -n kube-system -- curl localhost:10090/debug/ipaddresses -d '{"IPConfigStateFilter":["Assigned"]}' > cns_endpoints.json - sleep 10 - done - - total_pods=$(kubectl get pods --all-namespaces -o wide --field-selector spec.nodeName="$node_name",status.phase=Running --output json) - - echo "Checking if there are any pods with no ips" - pods_with_no_ip=$(echo "$total_pods" | jq -j '(.items[] | select(.status.podIP == "" or .status.podIP == null))') - if [ "$pods_with_no_ip" != "" ]; then - echo "There are some pods with no ip assigned." - kubectl get pods -A -o wide - exit 1 - fi - - total_pods_ips=$(echo "$total_pods" | jq -r '(.items[] | select(.status.podIP != "" and .status.podIP != null)) | .status.podIP') - pod_ips=() - num_of_pod_ips=0 - for ip in $total_pods_ips - do - if [ "$ip" != "$node_ip" ]; then - pod_ips+=("$ip") - num_of_pod_ips=$((num_of_pod_ips+1)) - fi - done - echo "Number of pods running with ip assigned $num_of_pod_ips" - - num_of_azure_endpoint_ips=$( cat azure_endpoints.json | jq -r '[.Endpoints | .[] | .IfnameToIPMap.eth0.IPv4[0].IP] | length' ) - azure_endpoint_ips=$( cat azure_endpoints.json | jq -r '(.Endpoints | .[] | .IfnameToIPMap.eth0.IPv4[0].IP) ' ) - echo "Number of azure endpoint ips : $num_of_azure_endpoint_ips" - - if [ "$num_of_pod_ips" != "$num_of_azure_endpoint_ips" ]; then - printf "Error: Number of pods in running state is less than total ips in the azure endpoint file" >&2 - exit 1 - fi - - echo "checking the ips in the azure endpoints file" - for ip in "${pod_ips[@]}" - do - find_in_array "$azure_endpoint_ips" "$ip" "azure_endpoints.json" - if [[ $? -eq 1 ]]; then - printf "Error: %s Not found in the azure_endpoints.json" "$ip" >&2 - exit 1 - fi - done - - num_of_cilium_endpoints=$(cat cilium_endpoints.json | jq -r '[.[] | select(.status.networking.addressing[0].ipv4 != null)] | length') - cilium_endpoint_ips=$(cat cilium_endpoints.json | jq -r '(.[] | select(.status.networking.addressing[0].ipv4 != null) | .status.networking.addressing[0].ipv4)') - echo "Number of cilium endpoints: $num_of_cilium_endpoints" - - if [ "$num_of_pod_ips" != "$num_of_cilium_endpoints" ]; then - printf "Error: Number of pods in running state is less than total ips in the cilium endpoint file" >&2 - exit 1 - fi - - for ip in "${pod_ips[@]}" - do - find_in_array "$cilium_endpoint_ips" "$ip" "cilium_endpoints.json" - if [[ $? -eq 1 ]]; then - printf "Error: %s Not found in the cilium_endpoints.json" "$ip" >&2 - exit 1 - fi - done - - num_of_cns_endpoints=$(cat cns_endpoints.json | jq -r '[.IPConfigurationStatus | .[] | select(.IPAddress != null)] | length') - cns_endpoint_ips=$(cat cns_endpoints.json | jq -r '(.IPConfigurationStatus | .[] | select(.IPAddress != null) | .IPAddress)') - echo "Number of cns endpoints: $num_of_cns_endpoints" - - if [ "$num_of_pod_ips" != "$num_of_cns_endpoints" ]; then - printf "Error: Number of pods in running state is less than total ips in the cns endpoint file" >&2 - exit 1 - fi - - for ip in "${pod_ips[@]}" - do - find_in_array "$cns_endpoint_ips" "$ip" "cns_endpoints.json" - if [[ $? -eq 1 ]]; then - printf "Error: %s Not found in the cns_endpoints.json" "$ip" >&2 - exit 1 - fi - done - - #We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706 - kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'" - rm -rf cilium_endpoints.json azure_endpoints.json cns_endpoints.json -done diff --git a/platform/Makefile b/platform/Makefile new file mode 100644 index 0000000000..7314b0b9c9 --- /dev/null +++ b/platform/Makefile @@ -0,0 +1,11 @@ +REPO_ROOT = $(shell git rev-parse --show-toplevel) +TOOLS_BIN_DIR = $(REPO_ROOT)/build/tools/bin +MOCKGEN = $(TOOLS_BIN_DIR)/mockgen + +.PHONY: generate + +generate: $(MOCKGEN) ## Generate mock clients + $(MOCKGEN) -source=$(REPO_ROOT)/platform/windows/adapter/network_adapter.go -package=mocks NetworkAdapter > windows/adapter/mocks/networkadapter_generated.go + +$(MOCKGEN): + @make -C $(REPO_ROOT) $(MOCKGEN) diff --git a/platform/os_linux.go b/platform/os_linux.go index 6aae528a79..60356d54ce 100644 --- a/platform/os_linux.go +++ b/platform/os_linux.go @@ -179,3 +179,11 @@ func PrintDependencyPackageDetails() { func ReplaceFile(source, destination string) error { return os.Rename(source, destination) } + +// Mellanox adapter not applicable for linux +func HasMellanoxAdapter() bool { + return false +} + +// Not needed for Linux +func MonitorAndSetMellanoxRegKeyPriorityVLANTag(_ context.Context, _ int) {} diff --git a/platform/os_windows.go b/platform/os_windows.go index 648c82bc0d..6c5b450a3c 100644 --- a/platform/os_windows.go +++ b/platform/os_windows.go @@ -5,6 +5,7 @@ package platform import ( "bytes" + "context" "fmt" "os" "os/exec" @@ -68,6 +69,14 @@ const ( // Command to restart HNS service RestartHnsServiceCommand = "Restart-Service -Name hns" + + // Interval between successive checks for mellanox adapter's PriorityVLANTag value + defaultMellanoxMonitorInterval = 30 * time.Second + + // Value for reg key: PriorityVLANTag for adapter + // reg key value for PriorityVLANTag = 3 --> Packet priority and VLAN enabled + // for more details goto https://learn.microsoft.com/en-us/windows-hardware/drivers/network/standardized-inf-keywords-for-ndis-qos + desiredVLANTagForMellanox = 3 ) // Flag to check if sdnRemoteArpMacAddress registry key is set @@ -191,6 +200,68 @@ func SetSdnRemoteArpMacAddress() error { return nil } +func HasMellanoxAdapter() bool { + m := &mellanox.Mellanox{} + return hasNetworkAdapter(m) +} + +func hasNetworkAdapter(na adapter.NetworkAdapter) bool { + adapterName, err := na.GetAdapterName() + if err != nil { + log.Errorf("Error while getting network adapter name: %v", err) + return false + } + log.Printf("Name of the network adapter : %v", adapterName) + return true +} + +// Regularly monitors the Mellanox PriorityVLANGTag registry value and sets it to desired value if needed +func MonitorAndSetMellanoxRegKeyPriorityVLANTag(ctx context.Context, intervalSecs int) { + m := &mellanox.Mellanox{} + interval := defaultMellanoxMonitorInterval + if intervalSecs > 0 { + interval = time.Duration(intervalSecs) * time.Second + } + err := updatePriorityVLANTagIfRequired(m, desiredVLANTagForMellanox) + if err != nil { + log.Errorf("Error while monitoring mellanox, continuing: %v", err) + } + ticker := time.NewTicker(interval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + log.Printf("context cancelled, stopping Mellanox Monitoring: %v", ctx.Err()) + return + case <-ticker.C: + err := updatePriorityVLANTagIfRequired(m, desiredVLANTagForMellanox) + if err != nil { + log.Errorf("Error while monitoring mellanox, continuing: %v", err) + } + } + } +} + +// Updates the priority VLAN Tag of mellanox adapter if not already set to the desired value +func updatePriorityVLANTagIfRequired(na adapter.NetworkAdapter, desiredValue int) error { + currentVal, err := na.GetPriorityVLANTag() + if err != nil { + return fmt.Errorf("error while getting Priority VLAN Tag value: %w", err) + } + + if currentVal == desiredValue { + log.Printf("Adapter's PriorityVLANTag is already set to %v, skipping reset", desiredValue) + return nil + } + + err = na.SetPriorityVLANTag(desiredValue) + if err != nil { + return fmt.Errorf("error while setting Priority VLAN Tag value: %w", err) + } + + return nil +} + func GetOSDetails() (map[string]string, error) { return nil, nil } diff --git a/platform/windows/adapter/mellanox/mellanox.go b/platform/windows/adapter/mellanox/mellanox.go new file mode 100644 index 0000000000..e7d37cc2e7 --- /dev/null +++ b/platform/windows/adapter/mellanox/mellanox.go @@ -0,0 +1,222 @@ +// Copyright 2017 Microsoft. All rights reserved. +// MIT License + +package mellanox + +import ( + "bytes" + "fmt" + "os/exec" + "strconv" + "strings" + + "github.com/Azure/azure-container-networking/log" +) + +const ( + // Search string to find adapter having Mellanox in description + mellanoxSearchString = "*Mellanox*" + + // PriorityVlanTag reg key for adapter + priorityVLANTagIdentifier = "*PriorityVLANTag" + + // Registry key Path Prefix + registryKeyPrefix = "HKLM:\\System\\CurrentControlSet\\Control\\Class\\" +) + +var ( + errorMellanoxAdapterNotFound = fmt.Errorf("no network adapter found with %s in description", mellanoxSearchString) + errorMellanoxDeviceNotFound = fmt.Errorf("no network device found with %s in description", mellanoxSearchString) + errorPowershellNotFound = fmt.Errorf("failed to find powershell executable") +) + +type Mellanox struct{} + +// GetAdapter returns name of Mellanox adapter if found +// Returns errorMellanoxAdapterNotFound if adapter is not found or adapter name empty +func (m *Mellanox) GetAdapterName() (string, error) { + // get mellanox adapter name + cmd := fmt.Sprintf(`Get-NetAdapter | Where-Object { $_.InterfaceDescription -like '%s' } | Select-Object -ExpandProperty Name`, mellanoxSearchString) + adapterName, err := executePowershellCommand(cmd) + if err != nil { + return "", fmt.Errorf("error while executing powershell command to get net adapter list: %w", err) + } + if adapterName == "" { + return "", errorMellanoxAdapterNotFound + } + return adapterName, nil +} + +// Set Mellanox adapter's PriorityVLANTag value to desired value if adapter exists +// 5/16/23 : right now setting desired reg key value for PriorityVLANTag = 3 --> Packet priority and VLAN enabled +// for more details goto https://docs.nvidia.com/networking/display/winof2v230/Configuring+the+Driver+Registry+Keys#ConfiguringtheDriverRegistryKeys-GeneralRegistryKeysGeneralRegistryKeys +func (m *Mellanox) SetPriorityVLANTag(desiredVal int) error { + adapterName, err := m.GetAdapterName() + if err != nil { + return fmt.Errorf("failed to find mellanox adapter: %w", err) + } + + // Find if adapter has property PriorityVLANTag (version 4 or up) or not (version 3) + cmd := fmt.Sprintf(`Get-NetAdapterAdvancedProperty | Where-Object { $_.RegistryKeyword -like '%s' -and $_.Name -eq '%s' } | Select-Object -ExpandProperty Name`, + priorityVLANTagIdentifier, adapterName) + adapterNameWithVLANTag, err := executePowershellCommand(cmd) + if err != nil { + return fmt.Errorf("error while executing powershell command to get VLAN Tag advance property of %s: %w", adapterName, err) + } + + if adapterNameWithVLANTag != "" { + return m.setMellanoxPriorityVLANTagValueForV4(adapterNameWithVLANTag, desiredVal) + } + return m.setMellanoxPriorityVLANTagValueForV3(adapterName, desiredVal) +} + +// Get PriorityVLANTag returns PriorityVLANTag value for Mellanox Adapter (both version 3 and version 4) +func (m *Mellanox) GetPriorityVLANTag() (int, error) { + adapterName, err := m.GetAdapterName() + if err != nil { + return 0, fmt.Errorf("failed to find mellanox adapter: %w", err) + } + + // Find if adapter has property PriorityVLANTag (version 4 or up) or not (version 3) + cmd := fmt.Sprintf(`Get-NetAdapterAdvancedProperty | Where-Object { $_.RegistryKeyword -like '%s' -and $_.Name -eq '%s' } | Select-Object -ExpandProperty Name`, + priorityVLANTagIdentifier, adapterName) + adapterNameWithVLANTag, err := executePowershellCommand(cmd) + if err != nil { + return 0, fmt.Errorf("error while executing powershell command to get VLAN Tag advance property of %s: %w", adapterName, err) + } + + if adapterNameWithVLANTag != "" { + return m.getMellanoxPriorityVLANTagValueForV4(adapterNameWithVLANTag) + } + + return m.getMellanoxPriorityVLANTagValueForV3() +} + +// Checks if a Mellanox adapter's PriorityVLANTag value +// for version 4 and up is set to the given expected value +func (m *Mellanox) getMellanoxPriorityVLANTagValueForV4(adapterName string) (int, error) { + cmd := fmt.Sprintf( + `Get-NetAdapterAdvancedProperty | Where-Object { $_.RegistryKeyword -like '%s' -and $_.Name -eq '%s' } | Select-Object -ExpandProperty RegistryValue`, + priorityVLANTagIdentifier, adapterName) + + regvalue, err := executePowershellCommand(cmd) + if err != nil { + return 0, err + } + + intValue, err := strconv.Atoi(regvalue) + if err != nil { + return 0, fmt.Errorf("failed to convert PriorityVLANTag value to integer: %w", err) + } + + return intValue, nil +} + +// Checks if a Mellanox adapter's PriorityVLANTag value +// for version 3 and below is set to the given expected value +func (m *Mellanox) getMellanoxPriorityVLANTagValueForV3() (int, error) { + registryKeyFullPath, err := m.getRegistryFullPath() + if err != nil { + return 0, err + } + + cmd := fmt.Sprintf( + `Get-ItemProperty -Path '%s' -Name '%s' | Select-Object -ExpandProperty '%s'`, registryKeyFullPath, priorityVLANTagIdentifier, priorityVLANTagIdentifier) + regvalue, err := executePowershellCommand(cmd) + if err != nil { + return 0, err + } + + intValue, err := strconv.Atoi(regvalue) + if err != nil { + return 0, fmt.Errorf("failed to convert PriorityVLANTag value to integer: %w", err) + } + + return intValue, nil +} + +// adapter is version 4 and up since adapter's advance property consists of reg key : PriorityVLANTag +// set reg value for Priorityvlantag of adapter to 3 if not set already +func (m *Mellanox) setMellanoxPriorityVLANTagValueForV4(adapterName string, desiredVal int) error { + cmd := fmt.Sprintf( + `Set-NetAdapterAdvancedProperty -Name '%s' -RegistryKeyword '%s' -RegistryValue %d`, + adapterName, priorityVLANTagIdentifier, desiredVal) + _, err := executePowershellCommand(cmd) + if err != nil { + return fmt.Errorf("error while setting up registry value for PriorityVLANTag for adapter: %w", err) + } + + log.Printf("Successfully set Mellanox Network Adapter: %s with %s property value as %d", + adapterName, priorityVLANTagIdentifier, desiredVal) + return nil +} + +// Adapter is version 3 or less as PriorityVLANTag was not found in advanced properties of mellanox adapter +func (m *Mellanox) setMellanoxPriorityVLANTagValueForV3(adapterName string, desiredVal int) error { + registryKeyFullPath, err := m.getRegistryFullPath() + if err != nil { + return err + } + + cmd := fmt.Sprintf(`New-ItemProperty -Path '%s' -Name '%s' -Value %d -PropertyType String -Force`, + registryKeyFullPath, priorityVLANTagIdentifier, desiredVal) + _, err = executePowershellCommand(cmd) + if err != nil { + return fmt.Errorf("error while executing powershell command to set Item property for adapter %s: %w", adapterName, err) + } + + log.Printf("Restarting Mellanox network adapter for regkey change to take effect") + cmd = fmt.Sprintf(`Restart-NetAdapter -Name '%s'`, adapterName) + _, err = executePowershellCommand(cmd) + if err != nil { + return fmt.Errorf("error while executing powershell command to restart net adapter %s: %w", adapterName, err) + } + log.Printf("For Mellanox CX-3 adapters, the reg key set to %d", desiredVal) + return nil +} + +// Get registry full path for Mellanox Adapter +func (m *Mellanox) getRegistryFullPath() (string, error) { + log.Printf("Searching through CIM instances for Network devices with %s in the name", mellanoxSearchString) + cmd := fmt.Sprintf( + `Get-CimInstance -Namespace root/cimv2 -ClassName Win32_PNPEntity | Where-Object PNPClass -EQ "Net" | Where-Object { $_.Name -like '%s' } | Select-Object -ExpandProperty DeviceID`, + mellanoxSearchString) + deviceid, err := executePowershellCommand(cmd) + if err != nil { + return "", fmt.Errorf("error while executing powershell command to get device id for Mellanox: %w", err) + } + if deviceid == "" { + return "", errorMellanoxDeviceNotFound + } + + cmd = fmt.Sprintf(`Get-PnpDeviceProperty -InstanceId '%s' | Where-Object KeyName -EQ "DEVPKEY_Device_Driver" | Select-Object -ExpandProperty Data`, deviceid) + registryKeySuffix, err := executePowershellCommand(cmd) + if err != nil { + return "", fmt.Errorf("error while executing powershell command to get registry suffix of device id %s: %w", deviceid, err) + } + + return registryKeyPrefix + registryKeySuffix, nil +} + +// ExecutePowershellCommand executes powershell command +func executePowershellCommand(command string) (string, error) { + ps, err := exec.LookPath("powershell.exe") + if err != nil { + return "", errorPowershellNotFound + } + + log.Printf("[Azure-Utils] %s", command) + + cmd := exec.Command(ps, command) + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err = cmd.Run() + if err != nil { + return "", fmt.Errorf("%s:%w", stderr.String(), err) + } + + return strings.TrimSpace(stdout.String()), nil +} diff --git a/platform/windows/adapter/mocks/networkadapter_generated.go b/platform/windows/adapter/mocks/networkadapter_generated.go new file mode 100644 index 0000000000..42651c221f --- /dev/null +++ b/platform/windows/adapter/mocks/networkadapter_generated.go @@ -0,0 +1,78 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: /mnt/d/Projects/azure-container-networking/platform/windows/adapter/network_adapter.go + +// Package mocks is a generated GoMock package. +package mocks + +import ( + reflect "reflect" + + gomock "github.com/golang/mock/gomock" +) + +// MockNetworkAdapter is a mock of NetworkAdapter interface. +type MockNetworkAdapter struct { + ctrl *gomock.Controller + recorder *MockNetworkAdapterMockRecorder +} + +// MockNetworkAdapterMockRecorder is the mock recorder for MockNetworkAdapter. +type MockNetworkAdapterMockRecorder struct { + mock *MockNetworkAdapter +} + +// NewMockNetworkAdapter creates a new mock instance. +func NewMockNetworkAdapter(ctrl *gomock.Controller) *MockNetworkAdapter { + mock := &MockNetworkAdapter{ctrl: ctrl} + mock.recorder = &MockNetworkAdapterMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockNetworkAdapter) EXPECT() *MockNetworkAdapterMockRecorder { + return m.recorder +} + +// GetAdapterName mocks base method. +func (m *MockNetworkAdapter) GetAdapterName() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetAdapterName") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetAdapterName indicates an expected call of GetAdapterName. +func (mr *MockNetworkAdapterMockRecorder) GetAdapterName() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAdapterName", reflect.TypeOf((*MockNetworkAdapter)(nil).GetAdapterName)) +} + +// GetPriorityVLANTag mocks base method. +func (m *MockNetworkAdapter) GetPriorityVLANTag() (int, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPriorityVLANTag") + ret0, _ := ret[0].(int) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetPriorityVLANTag indicates an expected call of GetPriorityVLANTag. +func (mr *MockNetworkAdapterMockRecorder) GetPriorityVLANTag() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPriorityVLANTag", reflect.TypeOf((*MockNetworkAdapter)(nil).GetPriorityVLANTag)) +} + +// SetPriorityVLANTag mocks base method. +func (m *MockNetworkAdapter) SetPriorityVLANTag(arg0 int) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetPriorityVLANTag", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetPriorityVLANTag indicates an expected call of SetPriorityVLANTag. +func (mr *MockNetworkAdapterMockRecorder) SetPriorityVLANTag(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPriorityVLANTag", reflect.TypeOf((*MockNetworkAdapter)(nil).SetPriorityVLANTag), arg0) +} diff --git a/platform/windows/adapter/network_adapter.go b/platform/windows/adapter/network_adapter.go new file mode 100644 index 0000000000..80f82a6539 --- /dev/null +++ b/platform/windows/adapter/network_adapter.go @@ -0,0 +1,16 @@ +// Copyright 2017 Microsoft. All rights reserved. +// MIT License + +package adapter + +type NetworkAdapter interface { + // GetAdapter returns name of adapter if found + // Must return error if adapter is not found or adapter name empty + GetAdapterName() (string, error) + + // Get PriorityVLANTag returns PriorityVLANTag value for Adapter + GetPriorityVLANTag() (int, error) + + // Set adapter's PriorityVLANTag value to desired value if adapter exists + SetPriorityVLANTag(int) error +} diff --git a/test/apimodels/cniLinux1804.json b/test/apimodels/cniLinux1804.json deleted file mode 100644 index 5d04f815bc..0000000000 --- a/test/apimodels/cniLinux1804.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "apiVersion": "vlabs", - "properties": { - "orchestratorProfile": { - "orchestratorType": "Kubernetes", - "orchestratorRelease": "1.22", - "orchestratorVersion": "1.22.15", - "kubernetesConfig": { - "networkPlugin": "azure", - "networkPolicy": "azure", - "azureCNIVersion": "", - "azureCNIURLLinux": "", - "apiServerConfig": { - "--tls-min-version": "VersionTLS12", - "--tls-cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256" - }, - "addons": [ - { - "name": "azure-npm-daemonset", - "enabled": true, - "containers": [ - { - "name": "azure-npm-daemonset", - "image": "" - } - ] - } - ] - } - }, - "masterProfile": { - "count": 1, - "dnsPrefix": "cniLinux", - "vmSize": "Standard_D2_v2" - }, - "agentPoolProfiles": [ - { - "name": "agentpool1", - "count": 3, - "vmSize": "Standard_D2_v2", - "availabilityProfile": "AvailabilitySet", - "osType": "Linux", - "distro": "aks-ubuntu-18.04" - } - ], - "linuxProfile": { - "adminUsername": "azureuser", - "ssh": { - "publicKeys": [ - { - "keyData": "" - } - ] - } - }, - "servicePrincipalProfile": { - "clientId": "", - "secret": "" - } - } -} diff --git a/test/apimodels/cniLinuxDualstack1804.json b/test/apimodels/cniLinuxDualstack1804.json deleted file mode 100644 index 3da81cc644..0000000000 --- a/test/apimodels/cniLinuxDualstack1804.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "apiVersion": "vlabs", - "properties": { - "featureFlags": { - "enableIPv6DualStack": true - }, - "orchestratorProfile": { - "orchestratorType": "Kubernetes", - "orchestratorRelease": "1.22", - "orchestratorVersion": "1.22.2", - "kubernetesConfig": { - "loadBalancerSku": "Standard", - "excludeMasterFromStandardLB": true, - "clusterSubnet": "10.240.0.0/12,fc00::/48", - "serviceCidr": "10.0.0.0/16,fd00::/108", - "dnsServiceIP": "10.0.0.10", - "networkPlugin": "azure", - "networkMode": "transparent", - "azureCNIVersion": "", - "azureCNIURLLinux": "", - "apiServerConfig": { - "--feature-gates": "IPv6DualStack=true", - "--tls-min-version": "VersionTLS12", - "--tls-cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256" - }, - "kubeletConfig": { - "--feature-gates": "IPv6DualStack=true" - }, - "controllerManagerConfig": { - "--feature-gates": "IPv6DualStack=true" - } - } - }, - "masterProfile": { - "count": 1, - "dnsPrefix": "duale2e", - "vmSize": "Standard_D2_v2" - }, - "agentPoolProfiles": [ - { - "name": "agentdualp1", - "count": 2, - "vmSize": "Standard_D2_v2", - "availabilityProfile": "AvailabilitySet", - "osType": "Linux", - "distro": "aks-ubuntu-18.04" - } - ], - "linuxProfile": { - "adminUsername": "azureuser", - "ssh": { - "publicKeys": [ - { - "keyData":"" - } - ] - } - }, - "servicePrincipalProfile": { - "clientId":"", - "secret":"" - } - } -} diff --git a/test/apimodels/cniWindows1903.json b/test/apimodels/cniWindows1903.json deleted file mode 100644 index 4c59b110ef..0000000000 --- a/test/apimodels/cniWindows1903.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "apiVersion": "vlabs", - "properties": { - "orchestratorProfile": { - "orchestratorType": "Kubernetes", - "orchestratorRelease": "1.22", - "orchestratorVersion": "1.22.15", - "kubernetesConfig": { - "networkPlugin": "azure", - "networkPolicy": "azure", - "containerRuntime": "containerd", - "azureCNIVersion": "", - "azureCNIURLWindows": "", - "apiServerConfig": { - "--tls-min-version": "VersionTLS12", - "--tls-cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256" - }, - "addons": [ - { - "name": "azure-npm-daemonset", - "enabled": true, - "containers": [ - { - "name": "azure-npm-daemonset", - "image": "" - } - ] - } - ] - } - }, - "masterProfile": { - "count": 1, - "dnsPrefix": "cniWindows", - "vmSize": "Standard_D2_v2" - }, - "agentPoolProfiles": [ - { - "name": "windowspool2", - "count": 2, - "vmSize": "Standard_D2_v2", - "availabilityProfile": "VirtualMachineScaleSets", - "osType": "Windows" - } - ], - "windowsProfile": { - "adminUsername": "azureuser", - "adminPassword": "azureTest@!", - "enableAutomaticUpdates": true, - "sshEnabled": true, - "imageReference": { - "name": "containerVMImageDefinition", - "resourceGroup": "container-images-rg", - "subscriptionId": "d9eabe18-12f6-4421-934a-d7e2327585f5", - "gallery": "containerImageComputeGallery", - "version": "latest" - } - }, - "linuxProfile": { - "adminUsername": "azureuser", - "ssh": { - "publicKeys": [ - { - "keyData": "" - } - ] - } - }, - "servicePrincipalProfile": { - "clientId": "", - "secret": "" - } - } -} diff --git a/test/apimodels/cniWindows2004.json b/test/apimodels/cniWindows2004.json deleted file mode 100644 index d3366a593c..0000000000 --- a/test/apimodels/cniWindows2004.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "apiVersion": "vlabs", - "properties": { - "orchestratorProfile": { - "orchestratorType": "Kubernetes", - "orchestratorRelease": "1.22", - "orchestratorVersion": "1.22.15", - "kubernetesConfig": { - "networkPlugin": "azure", - "networkPolicy": "azure", - "azureCNIVersion": "", - "azureCNIURLWindows": "", - "apiServerConfig": { - "--tls-min-version": "VersionTLS12", - "--tls-cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256" - }, - "addons": [{ - "name": "azure-npm-daemonset", - "enabled": true, - "containers": [{ - "name": "azure-npm-daemonset", - "image": "" - }] - }] - } - }, - "masterProfile": { - "count": 1, - "dnsPrefix": "cniWindows", - "vmSize": "Standard_D2_v2" - }, - "agentPoolProfiles": [{ - "name": "windowspool2", - "count": 2, - "vmSize": "Standard_D2_v2", - "availabilityProfile": "VirtualMachineScaleSets", - "osType": "Windows" - }], - "windowsProfile": { - "adminUsername": "azureuser", - "adminPassword": "azureTest@!", - "enableAutomaticUpdates": true, - "sshEnabled": true, - "windowsPublisher": "MicrosoftWindowsServer", - "windowsOffer": "WindowsServer", - "windowsSku": "datacenter-core-2004-with-containers-smalldisk", - "imageVersion": "latest" - }, - "linuxProfile": { - "adminUsername": "azureuser", - "ssh": { - "publicKeys": [{ - "keyData": "" - }] - } - }, - "servicePrincipalProfile": { - "clientId": "", - "secret": "" - } - } -} diff --git a/test/apimodels/cniWindows2022.json b/test/apimodels/cniWindows2022.json deleted file mode 100644 index d38fa22956..0000000000 --- a/test/apimodels/cniWindows2022.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "apiVersion": "vlabs", - "properties": { - "orchestratorProfile": { - "orchestratorType": "Kubernetes", - "orchestratorRelease": "1.22", - "orchestratorVersion": "1.22.15", - "kubernetesConfig": { - "networkPlugin": "azure", - "networkPolicy": "azure", - "containerRuntime": "containerd", - "windowsContainerdURL": "https://github.com/containerd/containerd/releases/download/v1.6.2/containerd-1.6.2-windows-amd64.tar.gz", - "apiServerConfig": { - "--tls-min-version": "VersionTLS12", - "--tls-cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", - "--feature-gates": "WindowsHostProcessContainers=true" - }, - "kubeletConfig": { - "--feature-gates": "WindowsHostProcessContainers=true" - }, - "addons": [ - { - "name": "azure-npm-daemonset", - "enabled": true, - "containers": [ - { - "name": "azure-npm-daemonset", - "image": "" - } - ] - } - ] - } - }, - "masterProfile": { - "count": 1, - "vmSize": "Standard_D2_v2" - }, - "agentPoolProfiles": [{ - "name": "windowspool2", - "count": 2, - "vmSize": "Standard_D2_v2", - "availabilityProfile": "VirtualMachineScaleSets", - "osType": "Windows" - }], - "windowsProfile": { - "adminUsername": "azureuser", - "adminPassword": "azureTest@!", - "enableAutomaticUpdates": true, - "sshEnabled": false, - "windowsPauseImageURL": "mcr.microsoft.com/oss/kubernetes/pause:3.6", - "alwaysPullWindowsPauseImage": true, - "imageReference": { - "subscriptionId": "a15c116e-99e3-4c59-aebc-8f864929b4a0", - "resourceGroup": "akswinvhdbuilderrg", - "gallery": "AKSWindows", - "name": "windows-2022-containerd", - "version": "20348.643.220413" - } - }, - "linuxProfile": { - "adminUsername": "azureuser", - "ssh": { - "publicKeys": [{ - "keyData": "" - }] - } - }, - "servicePrincipalProfile": { - "clientId": "", - "secret": "" - } - } -} diff --git a/test/apimodels/cniWindowsDualstack2004.json b/test/apimodels/cniWindowsDualstack2004.json deleted file mode 100644 index 9aaec6b11f..0000000000 --- a/test/apimodels/cniWindowsDualstack2004.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "apiVersion": "vlabs", - "properties": { - "featureFlags": { - "enableIPv6DualStack": true - }, - "orchestratorProfile": { - "orchestratorType": "Kubernetes", - "orchestratorRelease": "1.22", - "orchestratorVersion": "1.22.2", - "kubernetesConfig": { - "loadBalancerSku": "Standard", - "excludeMasterFromStandardLB": true, - "clusterSubnet": "10.240.0.0/12,fc00::/48", - "serviceCidr": "10.0.0.0/16,fd00::/108", - "dnsServiceIP": "10.0.0.10", - "networkPlugin": "azure", - "apiServerConfig": { - "--feature-gates": "IPv6DualStack=true", - "--tls-min-version": "VersionTLS12", - "--tls-cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256" - - }, - "kubeletConfig": { - "--feature-gates": "IPv6DualStack=true" - }, - "controllerManagerConfig": { - "--feature-gates": "IPv6DualStack=true" - } - } - }, - "masterProfile": { - "count": 1, - "dnsPrefix": "windual", - "vmSize": "Standard_D2_v2" - }, - "agentPoolProfiles": [ - { - "name": "cnidualwin", - "count": 3, - "vmSize": "Standard_D2_v2", - "availabilityProfile": "AvailabilitySet", - "osType": "Windows" - } - ], - "linuxProfile": { - "adminUsername": "azureuser", - "ssh": { - "publicKeys": [ - { - "keyData":"" - } - ] - } - }, - "windowsProfile": { - "adminUsername": "azureuser", - "adminPassword": "azureTest@!", - "enableAutomaticUpdates": true, - "windowsPublisher": "MicrosoftWindowsServer", - "windowsOffer": "WindowsServer", - "windowsSku": "Datacenter-Core-2004-with-Containers-smalldisk", - "imageVersion": "latest" - }, - "servicePrincipalProfile": { - "clientId":"", - "secret":"" - } - } -} diff --git a/test/integration/datapath/datapath_win_test.go b/test/integration/datapath/datapath_win_test.go new file mode 100644 index 0000000000..054a60bb98 --- /dev/null +++ b/test/integration/datapath/datapath_win_test.go @@ -0,0 +1,181 @@ +//go:build connection + +package connection + +import ( + "context" + "flag" + "fmt" + "testing" + + "github.com/Azure/azure-container-networking/test/internal/datapath" + "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/pkg/errors" + "github.com/stretchr/testify/require" + apiv1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" +) + +const ( + WindowsDeployYamlPath = "../manifests/datapath/windows-deployment.yaml" + podLabelKey = "app" + podCount = 2 + nodepoolKey = "agentpool" +) + +var ( + podPrefix = flag.String("podName", "datapod", "Prefix for test pods") + podNamespace = flag.String("namespace", "datapath-win", "Namespace for test pods") + nodepoolSelector = flag.String("nodepoolSelector", "npwin", "Provides nodepool as a Node-Selector for pods") +) + +/* +This test assumes that you have the current credentials loaded in your default kubeconfig for a +k8s cluster with a windows nodepool consisting of at least 2 windows nodes. +*** The expected nodepool name is npwin, if the nodepool has a diferent name ensure that you change nodepoolSelector with: + -nodepoolSelector="yournodepoolname" + +To run the test use one of the following commands: +go test -count=1 test/integration/datapath/datapath_win_test.go -timeout 3m -tags connection -run ^TestDatapathWin$ -tags=connection + or +go test -count=1 test/integration/datapath/datapath_win_test.go -timeout 3m -tags connection -run ^TestDatapathWin$ -podName=acnpod -nodepoolSelector=npwina -tags=connection + + +This test checks pod to pod, pod to node, and pod to internet for datapath connectivity. + +Timeout context is controled by the -timeout flag. + +*/ + +func TestDatapathWin(t *testing.T) { + ctx := context.Background() + + t.Log("Create Clientset") + clientset, err := k8sutils.MustGetClientset() + if err != nil { + require.NoError(t, err, "could not get k8s clientset: %v", err) + } + t.Log("Get REST config") + restConfig := k8sutils.MustGetRestConfig(t) + + t.Log("Create Label Selectors") + podLabelSelector := fmt.Sprintf("%s=%s", podLabelKey, *podPrefix) + nodeLabelSelector := fmt.Sprintf("%s=%s", nodepoolKey, *nodepoolSelector) + + t.Log("Get Nodes") + nodes, err := k8sutils.GetNodeListByLabelSelector(ctx, clientset, nodeLabelSelector) + if err != nil { + require.NoError(t, err, "could not get k8s node list: %v", err) + } + + // Test Namespace + t.Log("Create Namespace") + err = k8sutils.MustCreateNamespace(ctx, clientset, *podNamespace) + createPodFlag := !(apierrors.IsAlreadyExists(err)) + + if createPodFlag { + t.Log("Creating Windows pods through deployment") + deployment, err := k8sutils.MustParseDeployment(WindowsDeployYamlPath) + if err != nil { + require.NoError(t, err) + } + + // Fields for overwritting existing deployment yaml. + // Defaults from flags will not change anything + deployment.Spec.Selector.MatchLabels[podLabelKey] = *podPrefix + deployment.Spec.Template.ObjectMeta.Labels[podLabelKey] = *podPrefix + deployment.Spec.Template.Spec.NodeSelector[nodepoolKey] = *nodepoolSelector + deployment.Name = *podPrefix + deployment.Namespace = *podNamespace + + deploymentsClient := clientset.AppsV1().Deployments(*podNamespace) + err = k8sutils.MustCreateDeployment(ctx, deploymentsClient, deployment) + if err != nil { + require.NoError(t, err) + } + + t.Log("Waiting for pods to be running state") + err = k8sutils.WaitForPodsRunning(ctx, clientset, *podNamespace, podLabelSelector) + if err != nil { + require.NoError(t, err) + } + t.Log("Successfully created customer windows pods") + } else { + // Checks namespace already exists from previous attempt + t.Log("Namespace already exists") + + t.Log("Checking for pods to be running state") + err = k8sutils.WaitForPodsRunning(ctx, clientset, *podNamespace, podLabelSelector) + if err != nil { + require.NoError(t, err) + } + } + t.Log("Checking Windows test environment ") + for _, node := range nodes.Items { + + pods, err := k8sutils.GetPodsByNode(ctx, clientset, *podNamespace, podLabelSelector, node.Name) + if err != nil { + require.NoError(t, err, "could not get k8s clientset: %v", err) + } + if len(pods.Items) <= 1 { + t.Logf("%s", node.Name) + require.NoError(t, errors.New("Less than 2 pods on node")) + } + } + t.Log("Windows test environment ready") + + t.Run("Windows ping tests pod -> node", func(t *testing.T) { + // Windows ping tests between pods and node + for _, node := range nodes.Items { + t.Log("Windows ping tests (1)") + nodeIP := "" + for _, address := range node.Status.Addresses { + if address.Type == "InternalIP" { + nodeIP = address.Address + // Multiple addresses exist, break once Internal IP found. + // Cannot call directly + break + } + } + + err := datapath.WindowsPodToNode(ctx, clientset, node.Name, nodeIP, *podNamespace, podLabelSelector, restConfig) + require.NoError(t, err, "Windows pod to node, ping test failed with: %+v", err) + t.Logf("Windows pod to node, passed for node: %s", node.Name) + } + }) + + t.Run("Windows ping tests pod -> pod", func(t *testing.T) { + // Pod to pod same node + for _, node := range nodes.Items { + if node.Status.NodeInfo.OperatingSystem == string(apiv1.Windows) { + t.Log("Windows ping tests (2) - Same Node") + err := datapath.WindowsPodToPodPingTestSameNode(ctx, clientset, node.Name, *podNamespace, podLabelSelector, restConfig) + require.NoError(t, err, "Windows pod to pod, same node, ping test failed with: %+v", err) + t.Logf("Windows pod to windows pod, same node, passed for node: %s", node.ObjectMeta.Name) + } + } + + // Pod to pod different node + for i := 0; i < len(nodes.Items); i++ { + t.Log("Windows ping tests (2) - Different Node") + firstNode := nodes.Items[i%2].Name + secondNode := nodes.Items[(i+1)%2].Name + err = datapath.WindowsPodToPodPingTestDiffNode(ctx, clientset, firstNode, secondNode, *podNamespace, podLabelSelector, restConfig) + require.NoError(t, err, "Windows pod to pod, different node, ping test failed with: %+v", err) + t.Logf("Windows pod to windows pod, different node, passed for node: %s -> %s", firstNode, secondNode) + + } + }) + + t.Run("Windows url tests pod -> internet", func(t *testing.T) { + // From windows pod, IWR a URL + for _, node := range nodes.Items { + if node.Status.NodeInfo.OperatingSystem == string(apiv1.Windows) { + t.Log("Windows ping tests (3) - Pod to Internet tests") + err := datapath.WindowsPodToInternet(ctx, clientset, node.Name, *podNamespace, podLabelSelector, restConfig) + require.NoError(t, err, "Windows pod to internet test failed with: %+v", err) + t.Logf("Windows pod to Internet url tests") + } + } + }) +} diff --git a/test/integration/k8s_test.go b/test/integration/k8s_test.go index 448f232991..e97995197e 100644 --- a/test/integration/k8s_test.go +++ b/test/integration/k8s_test.go @@ -9,17 +9,16 @@ import ( "flag" "fmt" "os" - "path/filepath" "testing" "time" "github.com/Azure/azure-container-networking/test/integration/goldpinger" - "github.com/Azure/azure-container-networking/test/integration/retry" + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/Azure/azure-container-networking/test/internal/retry" v1 "k8s.io/client-go/kubernetes/typed/apps/v1" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/homedir" ) const ( @@ -38,7 +37,6 @@ const ( var ( defaultRetrier = retry.Retrier{Attempts: retryAttempts, Delay: retryDelaySec} - kubeconfig = flag.String("test-kubeconfig", filepath.Join(homedir.HomeDir(), ".kube", "config"), "(optional) absolute path to the kubeconfig file") delegatedSubnetID = flag.String("delegated-subnet-id", "", "delegated subnet id for node labeling") delegatedSubnetName = flag.String("subnet-name", "", "subnet name for node labeling") ) @@ -83,18 +81,18 @@ todo: */ func TestPodScaling(t *testing.T) { - clientset, err := mustGetClientset() + clientset, err := k8sutils.MustGetClientset() if err != nil { t.Fatal(err) } - restConfig := mustGetRestConfig(t) - deployment, err := mustParseDeployment(gpDeployment) + restConfig := k8sutils.MustGetRestConfig(t) + deployment, err := k8sutils.MustParseDeployment(gpDeployment) if err != nil { t.Fatal(err) } - daemonset, err := mustParseDaemonSet(gpDaemonset) + daemonset, err := k8sutils.MustParseDaemonSet(gpDaemonset) if err != nil { t.Fatal(err) } @@ -102,25 +100,25 @@ func TestPodScaling(t *testing.T) { ctx := context.Background() if shouldLabelNodes() { - mustLabelSwiftNodes(t, ctx, clientset, *delegatedSubnetID, *delegatedSubnetName) + k8sutils.MustLabelSwiftNodes(ctx, t, clientset, *delegatedSubnetID, *delegatedSubnetName) } else { t.Log("swift node labels not passed or set. skipping labeling") } - rbacCleanUpFn, err := mustSetUpClusterRBAC(ctx, clientset, gpClusterRolePath, gpClusterRoleBindingPath, gpServiceAccountPath) + rbacCleanUpFn, err := k8sutils.MustSetUpClusterRBAC(ctx, clientset, gpClusterRolePath, gpClusterRoleBindingPath, gpServiceAccountPath) if err != nil { t.Log(os.Getwd()) t.Fatal(err) } deploymentsClient := clientset.AppsV1().Deployments(deployment.Namespace) - err = mustCreateDeployment(ctx, deploymentsClient, deployment) + err = k8sutils.MustCreateDeployment(ctx, deploymentsClient, deployment) if err != nil { t.Fatal(err) } daemonsetClient := clientset.AppsV1().DaemonSets(daemonset.Namespace) - err = mustCreateDaemonset(ctx, daemonsetClient, daemonset) + err = k8sutils.MustCreateDaemonset(ctx, daemonsetClient, daemonset) if err != nil { t.Fatal(err) } @@ -256,7 +254,7 @@ func updateReplicaCount(t *testing.T, ctx context.Context, deployments v1.Deploy } t.Logf("setting deployment %s to %d replicas", name, replicas) - res.Spec.Replicas = int32ptr(int32(replicas)) + res.Spec.Replicas = k8sutils.Int32ToPtr(int32(replicas)) _, err = deployments.Update(ctx, res, metav1.UpdateOptions{}) return err }) diff --git a/test/integration/load/load_test.go b/test/integration/load/load_test.go new file mode 100644 index 0000000000..90d53aa232 --- /dev/null +++ b/test/integration/load/load_test.go @@ -0,0 +1,180 @@ +//go:build load + +package load + +import ( + "context" + "flag" + "testing" + "time" + + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/Azure/azure-container-networking/test/validate" +) + +const ( + manifestDir = "../manifests" + podLabelSelector = "load-test=true" +) + +var ( + osType = flag.String("os", "linux", "Operating system to run the test on") + cniType = flag.String("cni", "cilium", "CNI to run the test on") + iterations = flag.Int("iterations", 2, "Number of iterations to run the test for") + scaleUpReplicas = flag.Int("scaleup", 10, "Number of replicas to scale up to") + scaleDownReplicas = flag.Int("scaledown", 1, "Number of replicas to scale down to") + replicas = flag.Int("replicas", 1, "Number of replicas to scale up/down to") + validateStateFile = flag.Bool("validate-statefile", false, "Validate the state file") + skipWait = flag.Bool("skip-wait", false, "Skip waiting for pods to be ready") + restartCase = flag.Bool("restart-case", false, "In restart case, skip if we don't find state file") + namespace = "load-test" +) + +var noopDeploymentMap = map[string]string{ + "windows": manifestDir + "/noop-deployment-windows.yaml", + "linux": manifestDir + "/noop-deployment-linux.yaml", +} + +/* +In order to run the scale tests, you need a k8s cluster and its kubeconfig. +If no kubeconfig is passed, the test will attempt to find one in the default location for kubectl config. +Run the tests as follows: + +go test -timeout 30m -tags load -run ^TestLoad$ -tags=load + +The Load test scale the pods up/down on the cluster and validates the pods have IP. By default it runs the +cycle for 2 iterations. + +To validate the state file, set the flag -validate-statefile to true. By default it is set to false. +todo: consider adding the following scenarios +- [x] All pods should be assigned an IP. +- [x] Test the CNS state file. +- [x] Test the CNS Local cache. +- [x] Test the Cilium state file. +- [x] Test the Node restart. +- [ ] Test based on operating system. +- [ ] Test the HNS state file. +- [ ] Parameterize the os, cni and number of iterations. +- [ ] Add deployment yaml for windows. +*/ +func TestLoad(t *testing.T) { + clientset, err := k8sutils.MustGetClientset() + if err != nil { + t.Fatal(err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + namespaceExists, err := k8sutils.NamespaceExists(ctx, clientset, namespace) + if err != nil { + t.Fatal(err) + } + + if !namespaceExists { + err = k8sutils.MustCreateNamespace(ctx, clientset, namespace) + if err != nil { + t.Fatal(err) + } + } + + deployment, err := k8sutils.MustParseDeployment(noopDeploymentMap[*osType]) + if err != nil { + t.Fatal(err) + } + + deploymentsClient := clientset.AppsV1().Deployments(namespace) + err = k8sutils.MustCreateDeployment(ctx, deploymentsClient, deployment) + if err != nil { + t.Fatal(err) + } + + t.Log("Checking pods are running") + err = k8sutils.WaitForPodsRunning(ctx, clientset, namespace, podLabelSelector) + if err != nil { + t.Fatal(err) + } + + t.Log("Repeating the scale up/down cycle") + for i := 0; i < *iterations; i++ { + t.Log("Iteration ", i) + t.Log("Scale down deployment") + err = k8sutils.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *scaleDownReplicas, *skipWait) + if err != nil { + t.Fatal(err) + } + t.Log("Scale up deployment") + err = k8sutils.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *scaleUpReplicas, *skipWait) + if err != nil { + t.Fatal(err) + } + } + t.Log("Checking pods are running and IP assigned") + err = k8sutils.WaitForPodsRunning(ctx, clientset, "", "") + if err != nil { + t.Fatal(err) + } + + if *validateStateFile { + t.Run("Validate state file", TestValidateState) + } +} + +// TestValidateState validates the state file based on the os and cni type. +func TestValidateState(t *testing.T) { + clientset, err := k8sutils.MustGetClientset() + if err != nil { + t.Fatal(err) + } + config := k8sutils.MustGetRestConfig(t) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + t.Log("Validating the state file") + validatorClient := validate.GetValidatorClient(*osType) + validator := validatorClient.CreateClient(ctx, clientset, config, namespace, *cniType, *restartCase) + + err = validator.ValidateStateFile() + if err != nil { + t.Fatal(err) + } + + // We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706 + t.Log("Validating the restart network scenario") + err = validator.ValidateRestartNetwork() + if err != nil { + t.Fatal(err) + } +} + +// TestScaleDeployment scales the deployment up/down based on the replicas passed. +// go test -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas 10 +func TestScaleDeployment(t *testing.T) { + t.Log("Scale deployment") + clientset, err := k8sutils.MustGetClientset() + if err != nil { + t.Fatal(err) + } + ctx := context.Background() + namespaceExists, err := k8sutils.NamespaceExists(ctx, clientset, namespace) + if err != nil { + t.Fatal(err) + } + + if !namespaceExists { + err = k8sutils.MustCreateNamespace(ctx, clientset, namespace) + if err != nil { + t.Fatal(err) + } + } + + deployment, err := k8sutils.MustParseDeployment(noopDeploymentMap[*osType]) + if err != nil { + t.Fatal(err) + } + deploymentsClient := clientset.AppsV1().Deployments(namespace) + err = k8sutils.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *replicas, *skipWait) + if err != nil { + t.Fatal(err) + } +} diff --git a/test/integration/manifests/cilium/cilium-agent/clusterrole.yaml b/test/integration/manifests/cilium/cilium-agent/clusterrole.yaml index 881ec2255e..d7c8bf5c52 100644 --- a/test/integration/manifests/cilium/cilium-agent/clusterrole.yaml +++ b/test/integration/manifests/cilium/cilium-agent/clusterrole.yaml @@ -71,6 +71,13 @@ rules: - create - apiGroups: - cilium.io + # To synchronize garbage collection of such resources + resources: + - ciliumidentities + verbs: + - update +- apiGroups: + - cilium.io resources: - ciliumendpoints verbs: diff --git a/test/integration/manifests/cilium/cilium-nightly-agent/clusterrole.yaml b/test/integration/manifests/cilium/cilium-nightly-agent/clusterrole.yaml new file mode 100644 index 0000000000..7dbdd42326 --- /dev/null +++ b/test/integration/manifests/cilium/cilium-nightly-agent/clusterrole.yaml @@ -0,0 +1,104 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cilium +rules: +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - get + - list + - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - namespaces + - services + - pods + - endpoints + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - list + - watch + # This is used when validating policies in preflight. This will need to stay + # until we figure out how to avoid "get" inside the preflight, and then + # should be removed ideally. + - get +- apiGroups: + - cilium.io + resources: + #Naming changed from ciliumbgploadbalancerippools + - ciliumloadbalancerippools + - ciliumbgppeeringpolicies + - ciliumclusterwideenvoyconfigs + - ciliumclusterwidenetworkpolicies + - ciliumegressgatewaypolicies + - ciliumendpoints + - ciliumendpointslices + - ciliumenvoyconfigs + - ciliumidentities + - ciliumlocalredirectpolicies + - ciliumnetworkpolicies + - ciliumnodes + - ciliumnodeconfigs + #Added in 1.14.0 snapshot 2 + - ciliumcidrgroups + verbs: + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumidentities + - ciliumendpoints + - ciliumnodes + verbs: + - create +- apiGroups: + - cilium.io + resources: + - ciliumidentities + verbs: + - update +- apiGroups: + - cilium.io + resources: + - ciliumendpoints + verbs: + - delete + - get +- apiGroups: + - cilium.io + resources: + - ciliumnodes + - ciliumnodes/status + verbs: + - get + - update +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies/status + - ciliumclusterwidenetworkpolicies/status + - ciliumendpoints/status + - ciliumendpoints + verbs: + - patch diff --git a/test/integration/manifests/cilium/cilium-nightly-agent/clusterrolebinding.yaml b/test/integration/manifests/cilium/cilium-nightly-agent/clusterrolebinding.yaml new file mode 100644 index 0000000000..f5d39b0ffd --- /dev/null +++ b/test/integration/manifests/cilium/cilium-nightly-agent/clusterrolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cilium +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cilium +subjects: +- kind: ServiceAccount + name: "cilium" + namespace: kube-system diff --git a/test/integration/manifests/cilium/cilium-nightly-agent/serviceaccount.yaml b/test/integration/manifests/cilium/cilium-nightly-agent/serviceaccount.yaml new file mode 100644 index 0000000000..edf2e96e34 --- /dev/null +++ b/test/integration/manifests/cilium/cilium-nightly-agent/serviceaccount.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "cilium" + namespace: kube-system + diff --git a/test/integration/manifests/cilium/cilium-nightly-config.yaml b/test/integration/manifests/cilium/cilium-nightly-config.yaml new file mode 100644 index 0000000000..2fd927d34a --- /dev/null +++ b/test/integration/manifests/cilium/cilium-nightly-config.yaml @@ -0,0 +1,89 @@ +apiVersion: v1 +data: + agent-not-ready-taint-key: node.cilium.io/agent-not-ready + arping-refresh-period: 30s + auto-direct-node-routes: "false" + bpf-lb-external-clusterip: "false" + bpf-lb-map-max: "65536" + bpf-lb-mode: snat + bpf-map-dynamic-size-ratio: "0.0025" + bpf-policy-map-max: "16384" + bpf-root: /sys/fs/bpf + cgroup-root: /run/cilium/cgroupv2 + cilium-endpoint-gc-interval: 5m0s + cluster-id: "0" + cluster-name: default + debug: "false" + disable-cnp-status-updates: "true" + disable-endpoint-crd: "false" + enable-auto-protect-node-port-range: "true" + enable-bgp-control-plane: "false" + enable-bpf-clock-probe: "true" + enable-endpoint-health-checking: "false" + enable-endpoint-routes: "true" + enable-health-check-nodeport: "true" + enable-health-checking: "true" + enable-host-legacy-routing: "true" + enable-hubble: "false" + enable-ipv4: "true" + enable-ipv4-masquerade: "false" + enable-ipv6: "false" + enable-ipv6-masquerade: "false" + enable-k8s-terminating-endpoint: "true" + enable-l2-neigh-discovery: "true" + enable-l7-proxy: "false" + enable-local-node-route: "false" + enable-local-redirect-policy: "false" + enable-metrics: "true" + enable-policy: default + enable-remote-node-identity: "true" + enable-session-affinity: "true" + enable-svc-source-range-check: "true" + enable-vtep: "false" + enable-well-known-identities: "false" + enable-xt-socket-fallback: "true" + identity-allocation-mode: crd + install-iptables-rules: "true" + install-no-conntrack-iptables-rules: "false" + ipam: delegated-plugin + kube-proxy-replacement: strict + kube-proxy-replacement-healthz-bind-address: "" + local-router-ipv4: 169.254.23.0 + metrics: +cilium_bpf_map_pressure + monitor-aggregation: medium + monitor-aggregation-flags: all + monitor-aggregation-interval: 5s + node-port-bind-protection: "true" + nodes-gc-interval: 5m0s + operator-api-serve-addr: 127.0.0.1:9234 + operator-prometheus-serve-addr: :9963 + preallocate-bpf-maps: "false" + procfs: /host/proc + prometheus-serve-addr: :9962 + remove-cilium-node-taints: "true" + set-cilium-is-up-condition: "true" + sidecar-istio-proxy-image: cilium/istio_proxy + synchronize-k8s-nodes: "true" + tofqdns-dns-reject-response-code: refused + tofqdns-enable-dns-compression: "true" + tofqdns-endpoint-max-ip-per-hostname: "50" + tofqdns-idle-connection-grace-period: 0s + tofqdns-max-deferred-connection-deletes: "10000" + tofqdns-min-ttl: "3600" + tofqdns-proxy-response-max-delay: 100ms + #Replaces tunnel: disabled in v1.15 + routing-mode: "native" + unmanaged-pod-watcher-interval: "15" + vtep-cidr: "" + vtep-endpoint: "" + vtep-mac: "" + vtep-mask: "" +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: cilium + meta.helm.sh/release-namespace: kube-system + labels: + app.kubernetes.io/managed-by: Helm + name: cilium-config + namespace: kube-system diff --git a/test/integration/manifests/cilium/cilium-nightly-operator/clusterrole.yaml b/test/integration/manifests/cilium/cilium-nightly-operator/clusterrole.yaml new file mode 100644 index 0000000000..8c12e05729 --- /dev/null +++ b/test/integration/manifests/cilium/cilium-nightly-operator/clusterrole.yaml @@ -0,0 +1,184 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cilium-operator +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + # to automatically delete [core|kube]dns pods so that are starting to being + # managed by Cilium + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + # To remove node taints + - nodes + # To set NetworkUnavailable false on startup + - nodes/status + verbs: + - patch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + # to perform LB IP allocation for BGP + - services/status + verbs: + - update + - patch +- apiGroups: + - "" + resources: + # to check apiserver connectivity + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + # to perform the translation of a CNP that contains `ToGroup` to its endpoints + - services + - endpoints + verbs: + - get + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies + - ciliumclusterwidenetworkpolicies + verbs: + # Create auto-generated CNPs and CCNPs from Policies that have 'toGroups' + - create + - update + - deletecollection + # To update the status of the CNPs and CCNPs + - patch + - get + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies/status + - ciliumclusterwidenetworkpolicies/status + verbs: + # Update the auto-generated CNPs and CCNPs status. + - update +- apiGroups: + - cilium.io + resources: + - ciliumendpoints + - ciliumidentities + verbs: + # To perform garbage collection of such resources + - delete + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumidentities + verbs: + - update +- apiGroups: + - cilium.io + resources: + - ciliumnodes + verbs: + - create + - update + - get + - list + - watch + # To perform CiliumNode garbage collector + - delete +- apiGroups: + - cilium.io + resources: + - ciliumnodes/status + verbs: + - update +- apiGroups: + - cilium.io + resources: + - ciliumendpointslices + - ciliumenvoyconfigs + verbs: + - create + - update + - get + - list + - watch + - delete + - patch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - create + - get + - list + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - update + resourceNames: + - ciliumloadbalancerippools.cilium.io + - ciliumbgppeeringpolicies.cilium.io + - ciliumclusterwideenvoyconfigs.cilium.io + - ciliumclusterwidenetworkpolicies.cilium.io + - ciliumegressgatewaypolicies.cilium.io + - ciliumegressnatpolicies.cilium.io + - ciliumendpoints.cilium.io + - ciliumendpointslices.cilium.io + - ciliumenvoyconfigs.cilium.io + - ciliumexternalworkloads.cilium.io + - ciliumidentities.cilium.io + - ciliumlocalredirectpolicies.cilium.io + - ciliumnetworkpolicies.cilium.io + - ciliumnodes.cilium.io + - ciliumnodeconfigs.cilium.io + #Added in 1.14.0 snapshot 2 + - ciliumcidrgroups.cilium.io +# For cilium-operator running in HA mode. +# +# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election +# between multiple running instances. +# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less +# common and fewer objects in the cluster watch "all Leases". +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update diff --git a/test/integration/manifests/cilium/cilium-nightly-operator/clusterrolebinding.yaml b/test/integration/manifests/cilium/cilium-nightly-operator/clusterrolebinding.yaml new file mode 100644 index 0000000000..eb164361d4 --- /dev/null +++ b/test/integration/manifests/cilium/cilium-nightly-operator/clusterrolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cilium-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cilium-operator +subjects: +- kind: ServiceAccount + name: "cilium-operator" + namespace: kube-system diff --git a/test/integration/manifests/cilium/cilium-nightly-operator/serviceaccount.yaml b/test/integration/manifests/cilium/cilium-nightly-operator/serviceaccount.yaml new file mode 100644 index 0000000000..be4bfc048a --- /dev/null +++ b/test/integration/manifests/cilium/cilium-nightly-operator/serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "cilium-operator" + namespace: kube-system diff --git a/test/integration/manifests/cilium/cilium-operator/clusterrole.yaml b/test/integration/manifests/cilium/cilium-operator/clusterrole.yaml index a507d5e63f..a0b4dcf163 100644 --- a/test/integration/manifests/cilium/cilium-operator/clusterrole.yaml +++ b/test/integration/manifests/cilium/cilium-operator/clusterrole.yaml @@ -97,6 +97,13 @@ rules: - delete - list - watch +- apiGroups: + - cilium.io + resources: + - ciliumidentities + verbs: + # To synchronize garbage collection of such resources + - update - apiGroups: - cilium.io resources: diff --git a/test/integration/manifests/cilium/cilium-agent/daemonset.yaml b/test/integration/manifests/cilium/daemonset.yaml similarity index 96% rename from test/integration/manifests/cilium/cilium-agent/daemonset.yaml rename to test/integration/manifests/cilium/daemonset.yaml index 4e24086be8..a710c23360 100644 --- a/test/integration/manifests/cilium/cilium-agent/daemonset.yaml +++ b/test/integration/manifests/cilium/daemonset.yaml @@ -66,7 +66,7 @@ spec: fieldPath: metadata.namespace - name: CILIUM_CLUSTERMESH_CONFIG value: /var/lib/cilium/clustermesh/ - image: mcr.microsoft.com/oss/cilium/cilium:1.12.10 + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 10 @@ -163,7 +163,7 @@ spec: hostNetwork: true initContainers: - name: install-cni-binaries - image: mcr.microsoft.com/oss/cilium/cilium:1.12.10 + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG imagePullPolicy: IfNotPresent command: - "/install-plugin.sh" @@ -192,7 +192,7 @@ spec: value: /run/cilium/cgroupv2 - name: BIN_PATH value: /opt/cni/bin - image: mcr.microsoft.com/oss/cilium/cilium:1.12.10 + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG imagePullPolicy: IfNotPresent name: mount-cgroup resources: {} @@ -224,7 +224,7 @@ spec: env: - name: BIN_PATH value: /opt/cni/bin - image: mcr.microsoft.com/oss/cilium/cilium:1.12.8 + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG imagePullPolicy: IfNotPresent name: apply-sysctl-overwrites resources: {} @@ -252,7 +252,7 @@ spec: - /bin/bash - -c - -- - image: mcr.microsoft.com/oss/cilium/cilium:1.12.10 + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG imagePullPolicy: IfNotPresent name: mount-bpf-fs resources: {} @@ -279,7 +279,7 @@ spec: key: clean-cilium-bpf-state name: cilium-config optional: true - image: mcr.microsoft.com/oss/cilium/cilium:1.12.10 + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG imagePullPolicy: IfNotPresent name: clean-cilium-state resources: diff --git a/test/integration/manifests/cilium/cilium-operator/deployment.yaml b/test/integration/manifests/cilium/deployment.yaml similarity index 97% rename from test/integration/manifests/cilium/cilium-operator/deployment.yaml rename to test/integration/manifests/cilium/deployment.yaml index 7b456e08ce..ac09173c36 100644 --- a/test/integration/manifests/cilium/cilium-operator/deployment.yaml +++ b/test/integration/manifests/cilium/deployment.yaml @@ -7,7 +7,7 @@ metadata: io.cilium/app: operator name: cilium-operator spec: - replicas: 1 + replicas: 2 selector: matchLabels: io.cilium/app: operator @@ -29,7 +29,7 @@ spec: spec: containers: - name: cilium-operator - image: "mcr.microsoft.com/oss/cilium/operator-generic:1.12.10" + image: $CILIUM_IMAGE_REGISTRY/cilium/operator-generic:$CILIUM_VERSION_TAG imagePullPolicy: IfNotPresent command: - cilium-operator-generic diff --git a/test/integration/manifests/cni/cni-installer-v1-windows.yaml b/test/integration/manifests/cni/cni-installer-v1-windows.yaml new file mode 100644 index 0000000000..ca303efff1 --- /dev/null +++ b/test/integration/manifests/cni/cni-installer-v1-windows.yaml @@ -0,0 +1,87 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: azure-cni-windows + labels: + tier: node + app: azure-cni + namespace: kube-system +spec: + selector: + matchLabels: + app: azure-cni + template: + metadata: + labels: + tier: node + app: azure-cni + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - windows + - key: kubernetes.io/arch + operator: In + values: + - amd64 + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\system" + hostNetwork: true + serviceAccountName: azure-cni + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - operator: "Exists" + effect: NoExecute + - operator: "Exists" + effect: NoSchedule + initContainers: + - name: delete-azure-vnet-telemetry + image: mcr.microsoft.com/powershell:lts-nanoserver-ltsc2022 + command: ["powershell.exe", "-command"] + args: ["if (Get-Process -Name 'azure-vnet-telemetry' -ErrorAction SilentlyContinue) { Stop-Process -Name 'azure-vnet-telemetry' -Force }"] + - name: cni-drop + image: ${DROP_GZ_URL} + imagePullPolicy: Always + command: ["%CONTAINER_SANDBOX_MOUNT_POINT%/dropgz.exe"] + args: + - deploy + - azure-vnet.exe + - -o + - /k/azurecni/bin/azure-vnet.exe + - azure-vnet-ipam.exe + - -o + - /k/azurecni/bin/azure-vnet-ipam.exe + - azure-vnet-telemetry.exe + - -o + - /k/azurecni/bin/azure-vnet-telemetry.exe + - azure-vnet-telemetry.config + - -o + - /k/azurecni/bin/azure-vnet-telemetry.config + volumeMounts: + - name: cni-bin + mountPath: /k/azurecni/bin/ + containers: + - name: pause + image: mcr.microsoft.com/oss/kubernetes/pause:3.6 + command: ["%CONTAINER_SANDBOX_MOUNT_POINT%/pause.exe"] + volumes: + - name: cni-bin + hostPath: + path: /k/azurecni/bin + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: azure-cni + namespace: kube-system + labels: + addonmanager.kubernetes.io/mode: EnsureExists diff --git a/test/integration/manifests/cni/cni-installer-v1.yaml b/test/integration/manifests/cni/cni-installer-v1.yaml new file mode 100644 index 0000000000..74565cca13 --- /dev/null +++ b/test/integration/manifests/cni/cni-installer-v1.yaml @@ -0,0 +1,79 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: azure-cni + namespace: kube-system + labels: + app: azure-cni +spec: + selector: + matchLabels: + k8s-app: azure-cni + template: + metadata: + labels: + k8s-app: azure-cni + annotations: + cluster-autoscaler.kubernetes.io/daemonset-pod: "true" + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: type + operator: NotIn + values: + - virtual-kubelet + - key: kubernetes.io/os + operator: In + values: + - linux + priorityClassName: system-node-critical + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - operator: "Exists" + effect: NoExecute + - operator: "Exists" + effect: NoSchedule + initContainers: + - name: azure-cni + image: ${DROP_GZ_URL} + imagePullPolicy: Always + command: ["/dropgz"] + args: + - deploy + - azure-vnet + - -o + - /opt/cni/bin/azure-vnet + - azure-vnet-ipam + - -o + - /opt/cni/bin/azure-vnet-ipam + - azure-vnet-telemetry + - -o + - /opt/cni/bin/azure-vnet-telemetry + - azure.conflist + - -o + - /etc/cni/net.d/10-azure.conflist + - azure-vnet-telemetry.config + - -o + - /opt/cni/bin/azure-vnet-telemetry.config + volumeMounts: + - name: cni-bin + mountPath: /opt/cni/bin + - name: cni-conflist + mountPath: /etc/cni/net.d + containers: + - name: pause + image: mcr.microsoft.com/oss/kubernetes/pause:3.6 + hostNetwork: true + volumes: + - name: cni-conflist + hostPath: + path: /etc/cni/net.d + type: Directory + - name: cni-bin + hostPath: + path: /opt/cni/bin + type: Directory diff --git a/test/integration/manifests/cns/daemonset.yaml b/test/integration/manifests/cns/daemonset.yaml index 32d3314908..44337995f1 100644 --- a/test/integration/manifests/cns/daemonset.yaml +++ b/test/integration/manifests/cns/daemonset.yaml @@ -41,7 +41,7 @@ spec: effect: NoSchedule containers: - name: cns-container - image: acnpublic.azurecr.io/azure-cns:v1.4.33 + image: acnpublic.azurecr.io/azure-cns:v1.5.3 imagePullPolicy: IfNotPresent args: [ "-c", "tcp://$(CNSIpAddress):$(CNSPort)", "-t", "$(CNSLogTarget)"] securityContext: diff --git a/test/integration/manifests/datapath/windows-deployment.yaml b/test/integration/manifests/datapath/windows-deployment.yaml new file mode 100644 index 0000000000..e4a5bb36bf --- /dev/null +++ b/test/integration/manifests/datapath/windows-deployment.yaml @@ -0,0 +1,22 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: windows-pod + namespace: datapath-win +spec: + replicas: 4 + selector: + matchLabels: + app: datapod + template: + metadata: + labels: + app: datapod + spec: + containers: + - name: windows-container + image: mcr.microsoft.com/dotnet/framework/samples:aspnetapp + command: ["powershell"] + args: ["sleep", "5000"] + nodeSelector: + kubernetes.io/os: windows diff --git a/test/integration/manifests/load/privileged-daemonset-windows.yaml b/test/integration/manifests/load/privileged-daemonset-windows.yaml new file mode 100644 index 0000000000..c9a7839013 --- /dev/null +++ b/test/integration/manifests/load/privileged-daemonset-windows.yaml @@ -0,0 +1,32 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: privileged-daemonset + namespace: kube-system + labels: + app: privileged-daemonset +spec: + selector: + matchLabels: + app: privileged-daemonset + template: + metadata: + labels: + app: privileged-daemonset + spec: + nodeSelector: + kubernetes.io/os: windows + containers: + - name: powershell + image: mcr.microsoft.com/powershell:lts-nanoserver-1809 + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\SYSTEM" + command: + - powershell.exe + - -command + - | + while ($true) { Start-Sleep -Seconds 2147483 } + hostNetwork: true + terminationGracePeriodSeconds: 0 diff --git a/hack/manifests/hostprocess.yaml b/test/integration/manifests/load/privileged-daemonset.yaml similarity index 100% rename from hack/manifests/hostprocess.yaml rename to test/integration/manifests/load/privileged-daemonset.yaml diff --git a/hack/manifests/pod.yaml b/test/integration/manifests/noop-deployment-linux.yaml similarity index 57% rename from hack/manifests/pod.yaml rename to test/integration/manifests/noop-deployment-linux.yaml index aaed78ae1d..6b12793189 100644 --- a/hack/manifests/pod.yaml +++ b/test/integration/manifests/noop-deployment-linux.yaml @@ -1,20 +1,23 @@ +# No-op for linux based cluster apiVersion: apps/v1 kind: Deployment metadata: - name: container - namespace: default + name: load-test + namespace: load-test spec: selector: matchLabels: - app: container + load-test: "true" template: metadata: labels: - app: container + load-test: "true" spec: containers: - - name: ubuntu + - name: no-op image: mcr.microsoft.com/oss/kubernetes/pause:3.6 imagePullPolicy: Always securityContext: privileged: true + nodeSelector: + "kubernetes.io/os": linux diff --git a/test/integration/manifests/noop-deployment-windows.yaml b/test/integration/manifests/noop-deployment-windows.yaml new file mode 100644 index 0000000000..96619555c9 --- /dev/null +++ b/test/integration/manifests/noop-deployment-windows.yaml @@ -0,0 +1,23 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: win-load-test + namespace: load-test +spec: + selector: + matchLabels: + os: windows + load-test: "true" + template: + metadata: + labels: + os: windows + load-test: "true" + spec: + containers: + - name: noop + image: mcr.microsoft.com/windows/nanoserver:ltsc2022 + ports: + - containerPort: 80 + nodeSelector: + "kubernetes.io/os": windows diff --git a/test/integration/setup_test.go b/test/integration/setup_test.go index dd94dc9f22..30b8ff91b1 100644 --- a/test/integration/setup_test.go +++ b/test/integration/setup_test.go @@ -75,7 +75,7 @@ func TestMain(m *testing.M) { os.Exit(exitCode) }() - clientset, err := mustGetClientset() + clientset, err := k8sutils.MustGetClientset() if err != nil { return } @@ -102,26 +102,26 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cnsVersion := os.Getenv(envCNSVersion) // setup daemonset - cns, err := mustParseDaemonSet(cnsDaemonSetPath) + cns, err := k8sutils.MustParseDaemonSet(cnsDaemonSetPath) if err != nil { return nil, err } - image, _ := parseImageString(cns.Spec.Template.Spec.Containers[0].Image) - cns.Spec.Template.Spec.Containers[0].Image = getImageString(image, cnsVersion) + image, _ := k8sutils.ParseImageString(cns.Spec.Template.Spec.Containers[0].Image) + cns.Spec.Template.Spec.Containers[0].Image = k8sutils.GetImageString(image, cnsVersion) // check environment scenario log.Printf("Checking environment scenario") if installBoolDropgz := os.Getenv(envTestDropgz); installBoolDropgz != "" { if testDropgzScenario, err := strconv.ParseBool(installBoolDropgz); err == nil && testDropgzScenario == true { log.Printf("Env %v set to true, deploy cniTest.Dockerfile", envTestDropgz) - initImage, _ := parseImageString("acnpublic.azurecr.io/cni-dropgz-test:latest") - cns.Spec.Template.Spec.InitContainers[0].Image = getImageString(initImage, cniDropgzVersion) + initImage, _ := k8sutils.ParseImageString("acnpublic.azurecr.io/cni-dropgz-test:latest") + cns.Spec.Template.Spec.InitContainers[0].Image = k8sutils.GetImageString(initImage, cniDropgzVersion) } } else { log.Printf("Env %v not set to true, deploying cni.Dockerfile", envTestDropgz) - initImage, _ := parseImageString(cns.Spec.Template.Spec.InitContainers[0].Image) - cns.Spec.Template.Spec.InitContainers[0].Image = getImageString(initImage, cniDropgzVersion) + initImage, _ := k8sutils.ParseImageString(cns.Spec.Template.Spec.InitContainers[0].Image) + cns.Spec.Template.Spec.InitContainers[0].Image = k8sutils.GetImageString(initImage, cniDropgzVersion) } if installBool1 := os.Getenv(envInstallAzureVnet); installBool1 != "" { @@ -130,7 +130,7 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cns.Spec.Template.Spec.InitContainers[0].Args = []string{"deploy", "azure-vnet", "-o", "/opt/cni/bin/azure-vnet", "azure-vnet-telemetry", "-o", "/opt/cni/bin/azure-vnet-telemetry", "azure-vnet-ipam", "-o", "/opt/cni/bin/azure-vnet-ipam", "azure-swift.conflist", "-o", "/etc/cni/net.d/10-azure.conflist"} } // setup the CNS swiftconfigmap - if err := mustSetupConfigMap(ctx, clientset, cnsSwiftConfigMapPath); err != nil { + if err := k8sutils.MustSetupConfigMap(ctx, clientset, cnsSwiftConfigMapPath); err != nil { return nil, err } } else { @@ -143,7 +143,7 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cns.Spec.Template.Spec.InitContainers[0].Args = []string{"deploy", "azure-ipam", "-o", "/opt/cni/bin/azure-ipam"} } // setup the CNS ciliumconfigmap - if err := mustSetupConfigMap(ctx, clientset, cnsCiliumConfigMapPath); err != nil { + if err := k8sutils.MustSetupConfigMap(ctx, clientset, cnsCiliumConfigMapPath); err != nil { return nil, err } } else { @@ -156,7 +156,7 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cns.Spec.Template.Spec.InitContainers[0].Args = []string{"deploy", "azure-ipam", "-o", "/opt/cni/bin/azure-ipam"} } // setup the CNS ciliumconfigmap - if err := mustSetupConfigMap(ctx, clientset, cnsOverlayConfigMapPath); err != nil { + if err := k8sutils.MustSetupConfigMap(ctx, clientset, cnsOverlayConfigMapPath); err != nil { return nil, err } } else { @@ -187,25 +187,25 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l log.Printf("Installing CNS with image %s", cns.Spec.Template.Spec.Containers[0].Image) // setup common RBAC, ClusteerRole, ClusterRoleBinding, ServiceAccount - if _, err := mustSetUpClusterRBAC(ctx, clientset, cnsClusterRolePath, cnsClusterRoleBindingPath, cnsServiceAccountPath); err != nil { + if _, err := k8sutils.MustSetUpClusterRBAC(ctx, clientset, cnsClusterRolePath, cnsClusterRoleBindingPath, cnsServiceAccountPath); err != nil { return nil, err } // setup RBAC, Role, RoleBinding - if err := mustSetUpRBAC(ctx, clientset, cnsRolePath, cnsRoleBindingPath); err != nil { + if err := k8sutils.MustSetUpRBAC(ctx, clientset, cnsRolePath, cnsRoleBindingPath); err != nil { return nil, err } - if err = mustCreateDaemonset(ctx, cnsDaemonsetClient, cns); err != nil { + if err = k8sutils.MustCreateDaemonset(ctx, cnsDaemonsetClient, cns); err != nil { return nil, err } - if err = waitForPodsRunning(ctx, clientset, cns.Namespace, cnsLabelSelector); err != nil { + if err = k8sutils.WaitForPodsRunning(ctx, clientset, cns.Namespace, cnsLabelSelector); err != nil { return nil, err } cleanupds := func() error { - if err := exportLogsByLabelSelector(ctx, clientset, cns.Namespace, cnsLabelSelector, logDir); err != nil { + if err := k8sutils.ExportLogsByLabelSelector(ctx, clientset, cns.Namespace, cnsLabelSelector, logDir); err != nil { return err } return nil diff --git a/test/integration/utils_delete_test.go b/test/integration/utils_delete_test.go deleted file mode 100644 index e012a5240f..0000000000 --- a/test/integration/utils_delete_test.go +++ /dev/null @@ -1,32 +0,0 @@ -//go:build integration - -package k8s - -import ( - "context" - - appsv1 "k8s.io/api/apps/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" -) - -func mustDeleteDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetInterface, ds appsv1.DaemonSet) error { - if err := daemonsets.Delete(ctx, ds.Name, metav1.DeleteOptions{}); err != nil { - if !apierrors.IsNotFound(err) { - return err - } - } - - return nil -} - -func mustDeleteDeployment(ctx context.Context, deployments typedappsv1.DeploymentInterface, d appsv1.Deployment) error { - if err := deployments.Delete(ctx, d.Name, metav1.DeleteOptions{}); err != nil { - if !apierrors.IsNotFound(err) { - return err - } - } - - return nil -} diff --git a/test/internal/datapath/datapath_win.go b/test/internal/datapath/datapath_win.go new file mode 100644 index 0000000000..54a317760b --- /dev/null +++ b/test/internal/datapath/datapath_win.go @@ -0,0 +1,178 @@ +package datapath + +import ( + "context" + "fmt" + "strings" + + "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + restclient "k8s.io/client-go/rest" +) + +func podTest(ctx context.Context, clientset *kubernetes.Clientset, srcPod *apiv1.Pod, cmd []string, rc *restclient.Config, passFunc func(string) error) error { + logrus.Infof("podTest() - %v %v", srcPod.Name, cmd) + output, err := k8sutils.ExecCmdOnPod(ctx, clientset, srcPod.Namespace, srcPod.Name, cmd, rc) + if err != nil { + return errors.Wrapf(err, "failed to execute command on pod: %v", srcPod.Name) + } + return passFunc(string(output)) +} + +func WindowsPodToPodPingTestSameNode(ctx context.Context, clientset *kubernetes.Clientset, nodeName, podNamespace, labelSelector string, rc *restclient.Config) error { + logrus.Infof("Get Pods for Node: %s", nodeName) + pods, err := k8sutils.GetPodsByNode(ctx, clientset, podNamespace, labelSelector, nodeName) + if err != nil { + logrus.Error(err) + return errors.Wrap(err, "k8s api call") + } + if len(pods.Items) <= 1 { + return errors.New("Less than 2 pods on node") + } + + // Get first pod on this node + firstPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[0].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", firstPod.Name, err)) + } + logrus.Infof("First pod: %v %v", firstPod.Name, firstPod.Status.PodIP) + + // Get the second pod on this node + secondPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[1].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", secondPod.Name, err)) + } + logrus.Infof("Second pod: %v %v", secondPod.Name, secondPod.Status.PodIP) + + // Ping the second pod from the first pod + return podTest(ctx, clientset, firstPod, []string{"ping", secondPod.Status.PodIP}, rc, pingPassedWindows) +} + +func WindowsPodToPodPingTestDiffNode(ctx context.Context, clientset *kubernetes.Clientset, nodeName1, nodeName2, podNamespace, labelSelector string, rc *restclient.Config) error { + logrus.Infof("Get Pods for Node 1: %s", nodeName1) + // Node 1 + pods, err := k8sutils.GetPodsByNode(ctx, clientset, podNamespace, labelSelector, nodeName1) + if err != nil { + logrus.Error(err) + return errors.Wrap(err, "k8s api call") + } + firstPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[0].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", firstPod.Name, err)) + } + logrus.Infof("First pod: %v %v", firstPod.Name, firstPod.Status.PodIP) + + logrus.Infof("Get Pods for Node 2: %s", nodeName2) + // Node 2 + pods, err = k8sutils.GetPodsByNode(ctx, clientset, podNamespace, labelSelector, nodeName2) + if err != nil { + logrus.Error(err) + return errors.Wrap(err, "k8s api call") + } + secondPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[0].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", secondPod.Name, err)) + } + logrus.Infof("Second pod: %v %v", secondPod.Name, secondPod.Status.PodIP) + + // Ping the second pod from the first pod located on different nodes + return podTest(ctx, clientset, firstPod, []string{"ping", secondPod.Status.PodIP}, rc, pingPassedWindows) +} + +func WindowsPodToNode(ctx context.Context, clientset *kubernetes.Clientset, nodeName, nodeIP, podNamespace, labelSelector string, rc *restclient.Config) error { + logrus.Infof("Get Pods by Node: %s %s", nodeName, nodeIP) + pods, err := k8sutils.GetPodsByNode(ctx, clientset, podNamespace, labelSelector, nodeName) + if err != nil { + logrus.Error(err) + return errors.Wrap(err, "k8s api call") + } + if len(pods.Items) <= 1 { + return errors.New("Less than 2 pods on node") + } + // Get first pod on this node + firstPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[0].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", firstPod.Name, err)) + } + logrus.Infof("First pod: %v", firstPod.Name) + + // Get the second pod on this node + secondPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[1].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", secondPod.Name, err)) + } + logrus.Infof("Second pod: %v", secondPod.Name) + + // Ping from pod to node + resultOne := podTest(ctx, clientset, firstPod, []string{"ping", nodeIP}, rc, pingPassedWindows) + resultTwo := podTest(ctx, clientset, secondPod, []string{"ping", nodeIP}, rc, pingPassedWindows) + + if resultOne != nil { + return resultOne + } + + if resultTwo != nil { + return resultTwo + } + + return nil +} + +func WindowsPodToInternet(ctx context.Context, clientset *kubernetes.Clientset, nodeName, podNamespace, labelSelector string, rc *restclient.Config) error { + logrus.Infof("Get Pods by Node: %s", nodeName) + pods, err := k8sutils.GetPodsByNode(ctx, clientset, podNamespace, labelSelector, nodeName) + if err != nil { + logrus.Error(err) + return errors.Wrap(err, "k8s api call") + } + if len(pods.Items) <= 1 { + return errors.New("Less than 2 pods on node") + } + + // Get first pod on this node + firstPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[0].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", firstPod.Name, err)) + } + logrus.Infof("First pod: %v", firstPod.Name) + + // Get the second pod on this node + secondPod, err := clientset.CoreV1().Pods(podNamespace).Get(ctx, pods.Items[1].Name, metav1.GetOptions{}) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("Getting pod %s failed with %v", secondPod.Name, err)) + } + logrus.Infof("Second pod: %v", secondPod.Name) + + resultOne := podTest(ctx, clientset, firstPod, []string{"powershell", "Invoke-WebRequest", "www.bing.com", "-UseBasicParsing"}, rc, webRequestPassedWindows) + resultTwo := podTest(ctx, clientset, secondPod, []string{"powershell", "Invoke-WebRequest", "www.bing.com", "-UseBasicParsing"}, rc, webRequestPassedWindows) + + if resultOne != nil { + return resultOne + } + + if resultTwo != nil { + return resultTwo + } + + return nil +} + +func webRequestPassedWindows(output string) error { + const searchString = "200 OK" + if strings.Contains(output, searchString) { + return nil + } + return errors.Wrapf(errors.New("Output did not contain \"200 OK\""), "output was: %s", output) +} + +func pingPassedWindows(output string) error { + const searchString = "0% loss" + if strings.Contains(output, searchString) { + return nil + } + return errors.Wrapf(errors.New("Ping did not contain\"0% loss\""), "output was: %s", output) +} diff --git a/test/integration/label.go b/test/internal/k8sutils/label.go similarity index 97% rename from test/integration/label.go rename to test/internal/k8sutils/label.go index a8f439f0c2..51079c43a0 100644 --- a/test/integration/label.go +++ b/test/internal/k8sutils/label.go @@ -1,4 +1,4 @@ -package k8s +package k8sutils import ( "context" diff --git a/test/integration/utils_test.go b/test/internal/k8sutils/utils.go similarity index 56% rename from test/integration/utils_test.go rename to test/internal/k8sutils/utils.go index cf79f7cab1..95041ca693 100644 --- a/test/integration/utils_test.go +++ b/test/internal/k8sutils/utils.go @@ -1,39 +1,48 @@ -//go:build integration - -package k8s +package k8sutils import ( "bytes" "context" + "flag" "io" "log" "os" + "path/filepath" "strings" "testing" "time" - "github.com/pkg/errors" - // crd "dnc/requestcontroller/kubernetes" - "github.com/Azure/azure-container-networking/test/integration/retry" - apiv1 "k8s.io/api/core/v1" + "github.com/Azure/azure-container-networking/test/internal/retry" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/yaml" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/remotecommand" + "k8s.io/client-go/util/homedir" ) const ( DelegatedSubnetIDLabel = "kubernetes.azure.com/podnetwork-delegationguid" SubnetNameLabel = "kubernetes.azure.com/podnetwork-subnet" + + // RetryAttempts is the number of times to retry a test. + RetryAttempts = 30 + RetryDelay = 30 * time.Second ) -func mustGetClientset() (*kubernetes.Clientset, error) { - config, err := clientcmd.BuildConfigFromFlags("", *kubeconfig) +var Kubeconfig = flag.String("test-kubeconfig", filepath.Join(homedir.HomeDir(), ".kube", "config"), "(optional) absolute path to the kubeconfig file") + +func MustGetClientset() (*kubernetes.Clientset, error) { + config, err := clientcmd.BuildConfigFromFlags("", *Kubeconfig) if err != nil { return nil, err } @@ -44,8 +53,8 @@ func mustGetClientset() (*kubernetes.Clientset, error) { return clientset, nil } -func mustGetRestConfig(t *testing.T) *rest.Config { - config, err := clientcmd.BuildConfigFromFlags("", *kubeconfig) +func MustGetRestConfig(t *testing.T) *rest.Config { + config, err := clientcmd.BuildConfigFromFlags("", *Kubeconfig) if err != nil { t.Fatal(err) } @@ -66,7 +75,7 @@ func mustParseResource(path string, out interface{}) error { return err } -func mustLabelSwiftNodes(t *testing.T, ctx context.Context, clientset *kubernetes.Clientset, delegatedSubnetID, delegatedSubnetName string) { +func MustLabelSwiftNodes(ctx context.Context, t *testing.T, clientset *kubernetes.Clientset, delegatedSubnetID, delegatedSubnetName string) { swiftNodeLabels := map[string]string{ DelegatedSubnetIDLabel: delegatedSubnetID, SubnetNameLabel: delegatedSubnetName, @@ -85,7 +94,7 @@ func mustLabelSwiftNodes(t *testing.T, ctx context.Context, clientset *kubernete } } -func mustSetUpClusterRBAC(ctx context.Context, clientset *kubernetes.Clientset, clusterRolePath, clusterRoleBindingPath, serviceAccountPath string) (func(), error) { +func MustSetUpClusterRBAC(ctx context.Context, clientset *kubernetes.Clientset, clusterRolePath, clusterRoleBindingPath, serviceAccountPath string) (func(), error) { var ( err error clusterRole v1.ClusterRole @@ -140,7 +149,7 @@ func mustSetUpClusterRBAC(ctx context.Context, clientset *kubernetes.Clientset, return cleanupFunc, nil } -func mustSetUpRBAC(ctx context.Context, clientset *kubernetes.Clientset, rolePath, roleBindingPath string) error { +func MustSetUpRBAC(ctx context.Context, clientset *kubernetes.Clientset, rolePath, roleBindingPath string) error { var ( err error role v1.Role @@ -169,7 +178,7 @@ func mustSetUpRBAC(ctx context.Context, clientset *kubernetes.Clientset, rolePat return nil } -func mustSetupConfigMap(ctx context.Context, clientset *kubernetes.Clientset, configMapPath string) error { +func MustSetupConfigMap(ctx context.Context, clientset *kubernetes.Clientset, configMapPath string) error { var ( err error cm corev1.ConfigMap @@ -184,24 +193,24 @@ func mustSetupConfigMap(ctx context.Context, clientset *kubernetes.Clientset, co return mustCreateConfigMap(ctx, configmaps, cm) } -func int32ptr(i int32) *int32 { return &i } +func Int32ToPtr(i int32) *int32 { return &i } -func parseImageString(s string) (image, version string) { +func ParseImageString(s string) (image, version string) { sl := strings.Split(s, ":") return sl[0], sl[1] } -func getImageString(image, version string) string { +func GetImageString(image, version string) string { return image + ":" + version } -func waitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector string) error { +func WaitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector string) error { podsClient := clientset.CoreV1().Pods(namespace) checkPodIPsFn := func() error { podList, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: labelselector}) if err != nil { - return err + return errors.Wrapf(err, "could not list pods with label selector %s", labelselector) } if len(podList.Items) == 0 { @@ -209,7 +218,7 @@ func waitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, na } for _, pod := range podList.Items { - if pod.Status.Phase == apiv1.PodPending { + if pod.Status.Phase == corev1.PodPending { return errors.New("some pods still pending") } } @@ -223,11 +232,51 @@ func waitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, na return nil } - retrier := retry.Retrier{Attempts: 10, Delay: 6 * time.Second} + retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay} return retrier.Do(ctx, checkPodIPsFn) } -func exportLogsByLabelSelector(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, logDir string) error { +func WaitForPodDeployment(ctx context.Context, clientset *kubernetes.Clientset, namespace, deploymentName, podLabelSelector string, replicas int) error { + podsClient := clientset.CoreV1().Pods(namespace) + deploymentsClient := clientset.AppsV1().Deployments(namespace) + checkPodDeploymentFn := func() error { + deployment, err := deploymentsClient.Get(ctx, deploymentName, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "could not get deployment %s", deploymentName) + } + + if deployment.Status.AvailableReplicas != int32(replicas) { + return errors.New("deployment does not have the expected number of available replicas") + } + + podList, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: podLabelSelector}) + if err != nil { + return errors.Wrapf(err, "could not list pods with label selector %s", podLabelSelector) + } + + log.Printf("deployment %s has %d pods, expected %d", deploymentName, len(podList.Items), replicas) + if len(podList.Items) != replicas { + return errors.New("some pods of the deployment are still not ready") + } + return nil + } + + retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay} + return errors.Wrapf(retrier.Do(ctx, checkPodDeploymentFn), "could not wait for deployment %s", deploymentName) +} + +func MustUpdateReplica(ctx context.Context, deploymentsClient typedappsv1.DeploymentInterface, deploymentName string, replicas int32) error { + deployment, err := deploymentsClient.Get(ctx, deploymentName, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "could not get deployment %s", deploymentName) + } + + deployment.Spec.Replicas = Int32ToPtr(replicas) + _, err = deploymentsClient.Update(ctx, deployment, metav1.UpdateOptions{}) + return errors.Wrapf(err, "could not update deployment %s", deploymentName) +} + +func ExportLogsByLabelSelector(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, logDir string) error { podsClient := clientset.CoreV1().Pods(namespace) podLogOpts := corev1.PodLogOptions{} logExtension := ".log" @@ -279,3 +328,47 @@ func writeToFile(dir, fileName, str string) error { _, err = f.WriteString(str) return err } + +func ExecCmdOnPod(ctx context.Context, clientset *kubernetes.Clientset, namespace, podName string, cmd []string, config *rest.Config) ([]byte, error) { + req := clientset.CoreV1().RESTClient().Post(). + Resource("pods"). + Name(podName). + Namespace(namespace). + SubResource("exec"). + VersionedParams(&corev1.PodExecOptions{ + Command: cmd, + Stdin: false, + Stdout: true, + Stderr: true, + TTY: false, + }, scheme.ParameterCodec) + + exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL()) + if err != nil { + return []byte{}, errors.Wrapf(err, "error in creating executor for req %s", req.URL()) + } + + var stdout, stderr bytes.Buffer + err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + Tty: false, + }) + if err != nil { + return []byte{}, errors.Wrapf(err, "error in executing command %s", cmd) + } + + return stdout.Bytes(), nil +} + +func NamespaceExists(ctx context.Context, clientset *kubernetes.Clientset, namespace string) (bool, error) { + _, err := clientset.CoreV1().Namespaces().Get(ctx, namespace, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, errors.Wrapf(err, "error in getting namespace %s", namespace) + } + return true, nil +} diff --git a/test/integration/utils_create_test.go b/test/internal/k8sutils/utils_create.go similarity index 68% rename from test/integration/utils_create_test.go rename to test/internal/k8sutils/utils_create.go index 6a24bc08fa..8d21af1035 100644 --- a/test/integration/utils_create_test.go +++ b/test/internal/k8sutils/utils_create.go @@ -1,24 +1,35 @@ -//go:build integration - -package k8s +package k8sutils import ( "context" "log" - // crd "dnc/requestcontroller/kubernetes" - + "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" typedrbacv1 "k8s.io/client-go/kubernetes/typed/rbac/v1" ) -func mustCreateDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetInterface, ds appsv1.DaemonSet) error { +func MustCreateOrUpdatePod(ctx context.Context, podI typedcorev1.PodInterface, pod corev1.Pod) error { + if err := MustDeletePod(ctx, podI, pod); err != nil { + if !apierrors.IsNotFound(err) { + return err + } + } + if _, err := podI.Create(ctx, &pod, metav1.CreateOptions{}); err != nil { + return errors.Wrapf(err, "failed to create pod %v", pod.Name) + } + + return nil +} + +func MustCreateDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetInterface, ds appsv1.DaemonSet) error { if err := mustDeleteDaemonset(ctx, daemonsets, ds); err != nil { return err } @@ -30,7 +41,7 @@ func mustCreateDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetIn return nil } -func mustCreateDeployment(ctx context.Context, deployments typedappsv1.DeploymentInterface, d appsv1.Deployment) error { +func MustCreateDeployment(ctx context.Context, deployments typedappsv1.DeploymentInterface, d appsv1.Deployment) error { if err := mustDeleteDeployment(ctx, deployments, d); err != nil { return err } @@ -125,3 +136,41 @@ func mustCreateConfigMap(ctx context.Context, cmi typedcorev1.ConfigMapInterface return nil } + +func MustScaleDeployment(ctx context.Context, + deploymentsClient typedappsv1.DeploymentInterface, + deployment appsv1.Deployment, + clientset *kubernetes.Clientset, + namespace, + podLabelSelector string, + replicas int, + skipWait bool, +) error { + log.Printf("Scaling deployment %v to %v replicas", deployment.Name, replicas) + err := MustUpdateReplica(ctx, deploymentsClient, deployment.Name, int32(replicas)) + if err != nil { + return err + } + + if !skipWait { + log.Printf("Waiting for pods to be ready..") + err = WaitForPodDeployment(ctx, clientset, namespace, deployment.Name, podLabelSelector, replicas) + if err != nil { + return err + } + } + return nil +} + +func MustCreateNamespace(ctx context.Context, clienset *kubernetes.Clientset, namespace string) error { + _, err := clienset.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + }, + }, metav1.CreateOptions{}) + + if err != nil { + return errors.Wrapf(err, "failed to create namespace %v", namespace) + } + return nil +} diff --git a/test/internal/k8sutils/utils_delete.go b/test/internal/k8sutils/utils_delete.go new file mode 100644 index 0000000000..1032b406eb --- /dev/null +++ b/test/internal/k8sutils/utils_delete.go @@ -0,0 +1,52 @@ +package k8sutils + +import ( + "context" + + "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" + typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" +) + +func MustDeletePod(ctx context.Context, podI typedcorev1.PodInterface, pod corev1.Pod) error { + if err := podI.Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return errors.Wrap(err, "failed to delete pod") + } + } + return nil +} + +func mustDeleteDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetInterface, ds appsv1.DaemonSet) error { + if err := daemonsets.Delete(ctx, ds.Name, metav1.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return err + } + } + + return nil +} + +func mustDeleteDeployment(ctx context.Context, deployments typedappsv1.DeploymentInterface, d appsv1.Deployment) error { + if err := deployments.Delete(ctx, d.Name, metav1.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return err + } + } + + return nil +} + +func MustDeleteNamespace(ctx context.Context, clienset *kubernetes.Clientset, namespace string) error { + if err := clienset.CoreV1().Namespaces().Delete(ctx, namespace, metav1.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "failed to delete namespace %v", namespace) + } + } + return nil +} diff --git a/test/internal/k8sutils/utils_get.go b/test/internal/k8sutils/utils_get.go new file mode 100644 index 0000000000..531ec38fce --- /dev/null +++ b/test/internal/k8sutils/utils_get.go @@ -0,0 +1,51 @@ +package k8sutils + +import ( + "context" + + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +func GetNodeList(ctx context.Context, clientset *kubernetes.Clientset) (*corev1.NodeList, error) { + nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, errors.Wrap(err, "failed to get nodes") + } + + return nodes, nil +} + +func GetNodeListByLabelSelector(ctx context.Context, clientset *kubernetes.Clientset, labelSelector string) (*corev1.NodeList, error) { + nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) + if err != nil { + return nil, errors.Wrapf(err, "failed to get nodes with labelselector: %s", labelSelector) + } + + return nodes, nil +} + +func GetPodsByNode(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, nodeName string) (*corev1.PodList, error) { + pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + FieldSelector: "spec.nodeName=" + nodeName, + LabelSelector: labelselector, + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to get pods by node %s", nodeName) + } + return pods, nil +} + +func GetPodsIpsByNode(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, nodeName string) ([]string, error) { + pods, err := GetPodsByNode(ctx, clientset, namespace, labelselector, nodeName) + if err != nil { + return nil, err + } + ips := make([]string, 0, len(pods.Items)) + for index := range pods.Items { + ips = append(ips, pods.Items[index].Status.PodIP) + } + return ips, nil +} diff --git a/test/integration/utils_parse_test.go b/test/internal/k8sutils/utils_parse.go similarity index 77% rename from test/integration/utils_parse_test.go rename to test/internal/k8sutils/utils_parse.go index 787121176a..9113788d35 100644 --- a/test/integration/utils_parse_test.go +++ b/test/internal/k8sutils/utils_parse.go @@ -1,6 +1,4 @@ -//go:build integration - -package k8s +package k8sutils import ( appsv1 "k8s.io/api/apps/v1" @@ -8,13 +6,20 @@ import ( rbacv1 "k8s.io/api/rbac/v1" ) -func mustParseDaemonSet(path string) (appsv1.DaemonSet, error) { +// ParsePod parses a corev1.Pod from the provided yaml or json file path. +func MustParsePod(path string) (corev1.Pod, error) { + var pod corev1.Pod + err := mustParseResource(path, &pod) + return pod, err +} + +func MustParseDaemonSet(path string) (appsv1.DaemonSet, error) { var ds appsv1.DaemonSet err := mustParseResource(path, &ds) return ds, err } -func mustParseDeployment(path string) (appsv1.Deployment, error) { +func MustParseDeployment(path string) (appsv1.Deployment, error) { var depl appsv1.Deployment err := mustParseResource(path, &depl) return depl, err diff --git a/test/integration/retry/retry.go b/test/internal/retry/retry.go similarity index 96% rename from test/integration/retry/retry.go rename to test/internal/retry/retry.go index 051793aeea..7b46f9266e 100644 --- a/test/integration/retry/retry.go +++ b/test/internal/retry/retry.go @@ -1,5 +1,3 @@ -// +build integration - // todo: there are more robust retry packages out there, discuss with team package retry diff --git a/test/validate/client.go b/test/validate/client.go new file mode 100644 index 0000000000..b4cc5f5cb8 --- /dev/null +++ b/test/validate/client.go @@ -0,0 +1,40 @@ +package validate + +import ( + "context" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +type Validator struct { + ctx context.Context + clientset *kubernetes.Clientset + config *rest.Config + namespace string + cni string + restartCase bool +} + +// Todo: Add the validation for the data path function for the linux/windows client. +type IValidator interface { + ValidateStateFile() error + ValidateRestartNetwork() error + // ValidateDataPath() error +} + +type validatorClient interface { + CreateClient(ctx context.Context, clienset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool) IValidator +} + +// Func to get the type of validator client based on the Operating system. +func GetValidatorClient(os string) validatorClient { + switch os { + case "linux": + return &LinuxClient{} + case "windows": + return &WindowsClient{} + default: + return nil + } +} diff --git a/test/validate/linux_validate.go b/test/validate/linux_validate.go new file mode 100644 index 0000000000..d2839f4098 --- /dev/null +++ b/test/validate/linux_validate.go @@ -0,0 +1,232 @@ +package validate + +import ( + "context" + "encoding/json" + "log" + + "github.com/Azure/azure-container-networking/cns" + restserver "github.com/Azure/azure-container-networking/cns/restserver" + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/pkg/errors" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +const ( + privilegedDaemonSetPath = "../manifests/load/privileged-daemonset.yaml" + privilegedLabelSelector = "app=privileged-daemonset" + privilegedNamespace = "kube-system" + + cnsLabelSelector = "k8s-app=azure-cns" + ciliumLabelSelector = "k8s-app=cilium" +) + +var ( + restartNetworkCmd = []string{"bash", "-c", "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'"} + cnsStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-cns/azure-endpoints.json"} + ciliumStateFileCmd = []string{"bash", "-c", "cilium endpoint list -o json"} + cnsLocalCacheCmd = []string{"curl", "localhost:10090/debug/ipaddresses", "-d", "{\"IPConfigStateFilter\":[\"Assigned\"]}"} +) + +type stateFileIpsFunc func([]byte) (map[string]string, error) + +type LinuxClient struct{} + +type LinuxValidator struct { + Validator +} + +type CnsState struct { + Endpoints map[string]restserver.EndpointInfo `json:"Endpoints"` +} + +type CNSLocalCache struct { + IPConfigurationStatus []cns.IPConfigurationStatus `json:"IPConfigurationStatus"` +} + +type CiliumEndpointStatus struct { + Status NetworkingStatus `json:"status"` +} + +type NetworkingStatus struct { + Networking NetworkingAddressing `json:"networking"` +} + +type NetworkingAddressing struct { + Addresses []Address `json:"addressing"` + InterfaceName string `json:"interface-name"` +} + +type Address struct { + Addr string `json:"ipv4"` +} + +func (l *LinuxClient) CreateClient(ctx context.Context, clienset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool) IValidator { + // deploy privileged pod + privilegedDaemonSet, err := k8sutils.MustParseDaemonSet(privilegedDaemonSetPath) + if err != nil { + panic(err) + } + daemonsetClient := clienset.AppsV1().DaemonSets(privilegedNamespace) + err = k8sutils.MustCreateDaemonset(ctx, daemonsetClient, privilegedDaemonSet) + if err != nil { + panic(err) + } + err = k8sutils.WaitForPodsRunning(ctx, clienset, privilegedNamespace, privilegedLabelSelector) + if err != nil { + panic(err) + } + return &LinuxValidator{ + Validator: Validator{ + ctx: ctx, + clientset: clienset, + config: config, + namespace: namespace, + cni: cni, + restartCase: restartCase, + }, + } +} + +// Todo: Based on cni version validate different state files +func (v *LinuxValidator) ValidateStateFile() error { + checks := []struct { + name string + stateFileIps func([]byte) (map[string]string, error) + podLabelSelector string + podNamespace string + cmd []string + }{ + {"cns", cnsStateFileIps, cnsLabelSelector, privilegedNamespace, cnsStateFileCmd}, + {"cilium", ciliumStateFileIps, ciliumLabelSelector, privilegedNamespace, ciliumStateFileCmd}, + {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsLocalCacheCmd}, + } + + for _, check := range checks { + err := v.validate(check.stateFileIps, check.cmd, check.name, check.podNamespace, check.podLabelSelector) + if err != nil { + return err + } + } + return nil +} + +func (v *LinuxValidator) ValidateRestartNetwork() error { + nodes, err := k8sutils.GetNodeList(v.ctx, v.clientset) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + + for index := range nodes.Items { + // get the privileged pod + pod, err := k8sutils.GetPodsByNode(v.ctx, v.clientset, privilegedNamespace, privilegedLabelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + + privelegedPod := pod.Items[0] + // exec into the pod to get the state file + _, err = k8sutils.ExecCmdOnPod(v.ctx, v.clientset, privilegedNamespace, privelegedPod.Name, restartNetworkCmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod") + } + err = k8sutils.WaitForPodsRunning(v.ctx, v.clientset, "", "") + if err != nil { + return errors.Wrapf(err, "failed to wait for pods running") + } + } + return nil +} + +func cnsStateFileIps(result []byte) (map[string]string, error) { + var cnsResult CnsState + err := json.Unmarshal(result, &cnsResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal cns endpoint list") + } + + cnsPodIps := make(map[string]string) + for _, v := range cnsResult.Endpoints { + for ifName, ip := range v.IfnameToIPMap { + if ifName == "eth0" { + ip := ip.IPv4[0].IP.String() + cnsPodIps[ip] = v.PodName + } + } + } + return cnsPodIps, nil +} + +func ciliumStateFileIps(result []byte) (map[string]string, error) { + var ciliumResult []CiliumEndpointStatus + err := json.Unmarshal(result, &ciliumResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal cilium endpoint list") + } + + ciliumPodIps := make(map[string]string) + for _, v := range ciliumResult { + for _, addr := range v.Status.Networking.Addresses { + if addr.Addr != "" { + ciliumPodIps[addr.Addr] = v.Status.Networking.InterfaceName + } + } + } + return ciliumPodIps, nil +} + +func cnsCacheStateFileIps(result []byte) (map[string]string, error) { + var cnsLocalCache CNSLocalCache + + err := json.Unmarshal(result, &cnsLocalCache) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal cns local cache") + } + + cnsPodIps := make(map[string]string) + for index := range cnsLocalCache.IPConfigurationStatus { + cnsPodIps[cnsLocalCache.IPConfigurationStatus[index].IPAddress] = cnsLocalCache.IPConfigurationStatus[index].PodInfo.Name() + } + return cnsPodIps, nil +} + +func (v *LinuxValidator) validate(stateFileIps stateFileIpsFunc, cmd []string, checkType, namespace, labelSelector string) error { + log.Printf("Validating %s state file", checkType) + nodes, err := k8sutils.GetNodeList(v.ctx, v.clientset) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + + for index := range nodes.Items { + // get the privileged pod + pod, err := k8sutils.GetPodsByNode(v.ctx, v.clientset, namespace, labelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + podName := pod.Items[0].Name + // exec into the pod to get the state file + result, err := k8sutils.ExecCmdOnPod(v.ctx, v.clientset, namespace, podName, cmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod") + } + filePodIps, err := stateFileIps(result) + if err != nil { + return errors.Wrapf(err, "failed to get pod ips from state file") + } + if len(filePodIps) == 0 && v.restartCase { + log.Printf("No pods found on node %s", nodes.Items[index].Name) + continue + } + // get the pod ips + podIps := getPodIPsWithoutNodeIP(v.ctx, v.clientset, nodes.Items[index]) + + check := compareIPs(filePodIps, podIps) + + if !check { + return errors.Wrapf(errors.New("State file validation failed"), "for %s on node %s", checkType, nodes.Items[index].Name) + } + } + log.Printf("State file validation for %s passed", checkType) + return nil +} diff --git a/test/validate/utils.go b/test/validate/utils.go new file mode 100644 index 0000000000..7180c7bc66 --- /dev/null +++ b/test/validate/utils.go @@ -0,0 +1,39 @@ +package validate + +import ( + "context" + + "github.com/Azure/azure-container-networking/test/internal/k8sutils" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" +) + +func compareIPs(expected map[string]string, actual []string) bool { + if len(expected) != len(actual) { + return false + } + + for _, ip := range actual { + if _, ok := expected[ip]; !ok { + return false + } + } + + return true +} + +// func to get the pods ip without the node ip (ie. host network as false) +func getPodIPsWithoutNodeIP(ctx context.Context, clientset *kubernetes.Clientset, node corev1.Node) []string { + podsIpsWithoutNodeIP := []string{} + podIPs, err := k8sutils.GetPodsIpsByNode(ctx, clientset, "", "", node.Name) + if err != nil { + return podsIpsWithoutNodeIP + } + nodeIP := node.Status.Addresses[0].Address + for _, podIP := range podIPs { + if podIP != nodeIP { + podsIpsWithoutNodeIP = append(podsIpsWithoutNodeIP, podIP) + } + } + return podsIpsWithoutNodeIP +} diff --git a/test/validate/windows_validate.go b/test/validate/windows_validate.go new file mode 100644 index 0000000000..9e54f61bef --- /dev/null +++ b/test/validate/windows_validate.go @@ -0,0 +1,228 @@ +package validate + +import ( + "context" + "encoding/json" + "log" + "net" + + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/pkg/errors" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +const ( + privilegedWindowsDaemonSetPath = "../manifests/load/privileged-daemonset-windows.yaml" + windowsNodeSelector = "kubernetes.io/os=windows" +) + +var ( + hnsEndPpointCmd = []string{"powershell", "-c", "Get-HnsEndpoint | ConvertTo-Json"} + azureVnetCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet.json"} + azureVnetIpamCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet-ipam.json"} +) + +type WindowsClient struct{} + +type WindowsValidator struct { + Validator +} + +type HNSEndpoint struct { + MacAddress string `json:"MacAddress"` + IPAddress net.IP `json:"IPAddress"` + IPv6Address net.IP `json:",omitempty"` + IsRemoteEndpoint bool `json:",omitempty"` +} + +type AzureVnet struct { + NetworkInfo NetworkInfo `json:"Network"` +} + +type NetworkInfo struct { + ExternalInterfaces map[string]ExternalInterface `json:"ExternalInterfaces"` +} + +type ExternalInterface struct { + Networks map[string]Network `json:"Networks"` +} + +type Network struct { + Endpoints map[string]Endpoint `json:"Endpoints"` +} + +type Endpoint struct { + IPAddresses []net.IPNet `json:"IPAddresses"` + IfName string `json:"IfName"` +} + +type AzureVnetIpam struct { + IPAM AddressSpaces `json:"IPAM"` +} + +type AddressSpaces struct { + AddrSpaces map[string]AddressSpace `json:"AddressSpaces"` +} + +type AddressSpace struct { + Pools map[string]AddressPool `json:"Pools"` +} + +type AddressPool struct { + Addresses map[string]AddressRecord `json:"Addresses"` +} + +type AddressRecord struct { + Addr net.IP + InUse bool +} + +func (w *WindowsClient) CreateClient(ctx context.Context, clienset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool) IValidator { + // deploy privileged pod + privilegedDaemonSet, err := k8sutils.MustParseDaemonSet(privilegedWindowsDaemonSetPath) + if err != nil { + panic(err) + } + daemonsetClient := clienset.AppsV1().DaemonSets(privilegedNamespace) + err = k8sutils.MustCreateDaemonset(ctx, daemonsetClient, privilegedDaemonSet) + if err != nil { + panic(err) + } + err = k8sutils.WaitForPodsRunning(ctx, clienset, privilegedNamespace, privilegedLabelSelector) + if err != nil { + panic(err) + } + return &WindowsValidator{ + Validator: Validator{ + ctx: ctx, + clientset: clienset, + config: config, + namespace: namespace, + cni: cni, + restartCase: restartCase, + }, + } +} + +func (v *WindowsValidator) ValidateStateFile() error { + checks := []struct { + name string + stateFileIps func([]byte) (map[string]string, error) + podLabelSelector string + podNamespace string + cmd []string + }{ + {"hns", hnsStateFileIps, privilegedLabelSelector, privilegedNamespace, hnsEndPpointCmd}, + {"azure-vnet", azureVnetIps, privilegedLabelSelector, privilegedNamespace, azureVnetCmd}, + {"azure-vnet-ipam", azureVnetIpamIps, privilegedLabelSelector, privilegedNamespace, azureVnetIpamCmd}, + } + + for _, check := range checks { + err := v.validate(check.stateFileIps, check.cmd, check.name, check.podNamespace, check.podLabelSelector) + if err != nil { + return err + } + } + return nil +} + +func hnsStateFileIps(result []byte) (map[string]string, error) { + var hnsResult []HNSEndpoint + err := json.Unmarshal(result, &hnsResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal hns endpoint list") + } + + hnsPodIps := make(map[string]string) + for _, v := range hnsResult { + if !v.IsRemoteEndpoint { + hnsPodIps[v.IPAddress.String()] = v.MacAddress + } + } + return hnsPodIps, nil +} + +func azureVnetIps(result []byte) (map[string]string, error) { + var azureVnetResult AzureVnet + err := json.Unmarshal(result, &azureVnetResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal azure vnet") + } + + azureVnetPodIps := make(map[string]string) + for _, v := range azureVnetResult.NetworkInfo.ExternalInterfaces { + for _, v := range v.Networks { + for _, e := range v.Endpoints { + for _, v := range e.IPAddresses { + azureVnetPodIps[v.IP.String()] = e.IfName + } + } + } + } + return azureVnetPodIps, nil +} + +func azureVnetIpamIps(result []byte) (map[string]string, error) { + var azureVnetIpamResult AzureVnetIpam + err := json.Unmarshal(result, &azureVnetIpamResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal azure vnet ipam") + } + + azureVnetIpamPodIps := make(map[string]string) + + for _, v := range azureVnetIpamResult.IPAM.AddrSpaces { + for _, v := range v.Pools { + for _, v := range v.Addresses { + if v.InUse { + azureVnetIpamPodIps[v.Addr.String()] = v.Addr.String() + } + } + } + } + return azureVnetIpamPodIps, nil +} + +func (v *WindowsValidator) validate(stateFileIps stateFileIpsFunc, cmd []string, checkType, namespace, labelSelector string) error { + log.Println("Validating ", checkType, " state file") + nodes, err := k8sutils.GetNodeListByLabelSelector(v.ctx, v.clientset, windowsNodeSelector) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + for index := range nodes.Items { + // get the privileged pod + pod, err := k8sutils.GetPodsByNode(v.ctx, v.clientset, namespace, labelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + podName := pod.Items[0].Name + // exec into the pod to get the state file + result, err := k8sutils.ExecCmdOnPod(v.ctx, v.clientset, namespace, podName, cmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod") + } + filePodIps, err := stateFileIps(result) + if err != nil { + return errors.Wrapf(err, "failed to get pod ips from state file") + } + if len(filePodIps) == 0 && v.restartCase { + log.Printf("No pods found on node %s", nodes.Items[index].Name) + continue + } + // get the pod ips + podIps := getPodIPsWithoutNodeIP(v.ctx, v.clientset, nodes.Items[index]) + + check := compareIPs(filePodIps, podIps) + + if !check { + return errors.Wrapf(errors.New("State file validation failed"), "for %s on node %s", checkType, nodes.Items[index].Name) + } + } + log.Printf("State file validation for %s passed", checkType) + return nil +} + +func (v *WindowsValidator) ValidateRestartNetwork() error { + return nil +}