Skip to content

Commit

Permalink
ci: Add CNIv2 Linux to Load Test pipeline (#2141)
Browse files Browse the repository at this point in the history
* Initial Commit

* Add sleep for swift cluster

* Change NPM|CNI Integration

* Addressing comments

* Add: NPM continueOnError

* Add: Generate logs for NPM

* Change NPM Linux branch - long sleep 10s

* refactor: linux validate

* fix: rebase

* Add: maxSkew for noop deployments

* Add: Capture improper node restart

* Add: Restart CNS case for Cilium

* Addressing Comments

* Add: Restart CNS template
  • Loading branch information
jpayne3506 committed Sep 19, 2023
1 parent 4fa3bf4 commit 4772008
Show file tree
Hide file tree
Showing 13 changed files with 612 additions and 303 deletions.
123 changes: 42 additions & 81 deletions .pipelines/cni/cilium/cilium-overlay-load-test-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ parameters:
vmSize: "Standard_DS4_v2"

stages:

- stage: createAKScluster
dependsOn: ${{ parameters.dependsOn }}
displayName: "AKS Cluster with Cilium"
- stage: create_${{ parameters.name }}
variables:
commitID: $[ stagedependencies.setup.env.outputs['SetEnvVars.commitID'] ]
dependsOn:
- setup
displayName: "Create Cluster - ${{ parameters.clusterName }}"
jobs:
- job: create_aks_cluster_with_${{ parameters.name }}
pool:
Expand All @@ -19,16 +21,22 @@ stages:
- template: ../load-test-templates/create-cluster-template.yaml
parameters:
clusterType: ${{ parameters.clusterType }}
clusterName: ${{ parameters.clusterName }}
clusterName: ${{ parameters.clusterName }}-$(commitID)
nodeCount: ${{ parameters.nodeCount }}
vmSize: ${{ parameters.vmSize }}
- stage: install_cilium
dependsOn: createAKScluster
displayName: "Install Cilium on AKS Overlay"
region: $(LOCATION)

- stage: ${{ parameters.name }}
variables:
commitID: $[ stagedependencies.setup.env.outputs['SetEnvVars.commitID'] ]
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
dependsOn:
- create_${{ parameters.name }}
- setup
displayName: "Cilium Test - ${{ parameters.name }}"
jobs:
- job: deploy_cilium_components
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
steps:
- task: AzureCLI@1
displayName: "Install Cilium, CNS, and ip-masq-agent"
Expand All @@ -40,7 +48,7 @@ stages:
inlineScript: |
set -ex
az extension add --name aks-preview
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision)
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(commitID)
ls -lah
pwd
kubectl cluster-info
Expand All @@ -64,64 +72,46 @@ stages:
kubectl create configmap config-reconcile.yaml
cd ../../../..
kubectl get po -owide -A
- stage: pod_deployment
dependsOn: install_cilium
displayName: "Pod Deployment"
jobs:
- job: deploy_pods
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
displayName: "Scale Test"
dependsOn: deploy_cilium_components
steps:
- template: ../load-test-templates/pod-deployment-template.yaml
parameters:
clusterName: ${{ parameters.clusterName }}
clusterName: ${{ parameters.clusterName }}-$(commitID)
scaleup: ${CILIUM_SCALEUP}
os: linux
iterations: ${CILIUM_ITERATIONS}
nodeCount: ${{ parameters.nodeCount }}
- stage: validate_state
dependsOn: pod_deployment
displayName: "Validate State"
jobs:
- job: validate_state
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
steps:
- template: ../load-test-templates/validate-state-template.yaml
parameters:
clusterName: ${{ parameters.clusterName }}
- stage: restart_nodes
dependsOn: validate_state
displayName: "Restart Node"
jobs:
clusterName: ${{ parameters.clusterName }}-$(commitID)
- job: restart_nodes
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
displayName: "Restart Test"
dependsOn: deploy_pods
steps:
- template: ../load-test-templates/restart-node-template.yaml
parameters:
clusterName: ${{ parameters.clusterName }}
clusterName: ${{ parameters.clusterName }}-$(commitID)
nodeCount: ${{ parameters.nodeCount }}
scaleup: ${CILIUM_SCALEUP}
- stage: validate_restart_state
dependsOn: restart_nodes
displayName: "Validate Restart State"
jobs:
- job: validate_restart_state
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
steps:
- template: ../load-test-templates/validate-state-template.yaml
parameters:
clusterName: ${{ parameters.clusterName }}
clusterName: ${{ parameters.clusterName }}-$(commitID)
restartCase: "true"
- stage: connectivity_tests
dependsOn: validate_restart_state
displayName: "Connectivity Tests"
jobs:
cnsManagedEndpoint: "true"
- job: restart_cns
displayName: "Restart and Validate CNS"
dependsOn: restart_nodes
steps:
- template: ../load-test-templates/restart-cns-template.yaml
parameters:
clusterName: ${{ parameters.clusterName }}-$(commitID)
scaleup: ${CILIUM_SCALEUP}
nodeCount: ${{ parameters.nodeCount }}
- job: cni_tests
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
displayName: "Cilium Test"
dependsOn: restart_cns
steps:
- script: |
echo "install cilium CLI"
Expand All @@ -135,13 +125,13 @@ stages:
displayName: "Install Cilium CLI"
- task: AzureCLI@1
inputs:
azureSubscription: $(TEST_SUB_SERVICE_CONNECTION)
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptLocation: "inlineScript"
scriptType: "bash"
addSpnToEnvironment: true
inlineScript: |
set -ex
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision)
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(commitID)
name: "GetCluster"
displayName: "Get AKS Cluster"
- script: |
Expand All @@ -150,33 +140,4 @@ stages:
retryCountOnTaskFailure: 6
name: "CiliumConnectivityTests"
displayName: "Run Cilium Connectivity Tests"
- stage: delete
displayName: "Delete Resources"
dependsOn:
- connectivity_tests
jobs:
- job: delete_resources
pool:
name: "$(BUILD_POOL_NAME_DEFAULT)"
steps:
- task: AzureCLI@1
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptLocation: "inlineScript"
scriptType: "bash"
addSpnToEnvironment: true
inlineScript: |
set -ex
if [ "$(DELETE_RESOURCES)" ]
then
echo "Deleting Cluster and resource group"
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision)
make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION)
make -C ./hack/aks down AZCLI=az REGION=$(LOCATION) SUB=$(SUB_AZURE_NETWORK_AGENT_BUILD_VALIDATIONS) CLUSTER=${{ parameters.clusterName }}-$(make revision)
echo "Cluster and resources down"
else
echo "Deletion of resources is False"
fi
name: "CleanUpCluster"
displayName: "Cleanup cluster"
condition: always()
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ steps:
addSpnToEnvironment: true
inlineScript: |
set -ex
make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION)
make -C ./hack/aks ${{ parameters.clusterType }} AZCLI=az REGION=$(LOCATION) SUB=$(SUB_AZURE_NETWORK_AGENT_BUILD_VALIDATIONS) CLUSTER=${{ parameters.clusterName }}-$(make revision) NODE_COUNT=${{ parameters.nodeCount }} VM_SIZE=${{ parameters.vmSize }} WINDOWS_VM_SKU=${{ parameters.windowsVMSize }} WINDOWS_USERNAME=${WINDOWS_USERNAME} WINDOWS_PASSWORD=${WINDOWS_PASSWORD}
make -C ./hack/aks azcfg AZCLI=az REGION=${{ parameters.region }}
make -C ./hack/aks ${{ parameters.clusterType }} AZCLI=az REGION=${{ parameters.region }} SUB=$(SUB_AZURE_NETWORK_AGENT_BUILD_VALIDATIONS) CLUSTER=${{ parameters.clusterName }} NODE_COUNT=${{ parameters.nodeCount }} VM_SIZE=${{ parameters.vmSize }} WINDOWS_VM_SKU=${{ parameters.windowsVMSize }} WINDOWS_USERNAME=${WINDOWS_USERNAME} WINDOWS_PASSWORD=${WINDOWS_PASSWORD}
name: "CreateAksCluster"
displayName: "Create AKS Cluster"
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ steps:
inlineScript: |
set -ex
az extension add --name aks-preview
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision)
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}
cd test/integration/load
scale=$(( ${{ parameters.scaleup }} * ${{ parameters.nodeCount }} ))
go test -timeout 30m -tags load -run ^TestLoad$ -tags=load -iterations=${{ parameters.iterations }} -scaleup=$scale -os=${{ parameters.os }}
34 changes: 34 additions & 0 deletions .pipelines/cni/load-test-templates/restart-cns-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
parameters:
clusterName: ""
cni: "cilium"
scaleup: 100
nodeCount: 10

steps:
- task: AzureCLI@1
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptLocation: "inlineScript"
scriptType: "bash"
addSpnToEnvironment: true
inlineScript: |
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}
kubectl get pod -owide -A
echo "Ensure there are pods scheduled on each node"
cd test/integration/load
scale=$(( ${{ parameters.scaleup }} * ${{ parameters.nodeCount }} ))
go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=$scale
cd ../../../
echo "Validate pod IP assignment before CNS restart"
make test-validate-state CNI_TYPE=${{ parameters.cni }}
echo "restart CNS"
kubectl rollout restart ds azure-cns -n kube-system
kubectl rollout status ds azure-cns -n kube-system
kubectl get pod -owide -A
echo "Validate pod IP assignment after CNS restart"
make test-validate-state CNI_TYPE=${{ parameters.cni }}
name: "restartCNS"
displayName: "Restart CNS and Validate pods"
retryCountOnTaskFailure: 3
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ steps:
addSpnToEnvironment: true
inlineScript: |
echo "Scale up the pods and immediated restart the nodes"
clusterName=${{ parameters.clusterName }}-$(make revision)
clusterName=${{ parameters.clusterName }}
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${clusterName}
make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION)
cd test/integration/load
Expand Down
14 changes: 13 additions & 1 deletion .pipelines/cni/load-test-templates/validate-state-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ parameters:
os: "linux"
restartCase: "false"
cni: "cilium"
cnsManagedEndpoint: "false"

steps:
- task: AzureCLI@1
Expand All @@ -12,9 +13,20 @@ steps:
scriptType: "bash"
addSpnToEnvironment: true
inlineScript: |
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision)
if [ ${{ parameters.cnsManagedEndpoint }} == "true" ] && [ ${{ parameters.restartCase }} == "true" ]
then
echo If this step fails, test manually.
echo Only fails when CNS is managing the endpoint state and state file does not exist due to node restart with 0 load-test pods scheduled on node.
echo Failure is due to slow node restart which occurs after scale down. Scale down causes the 0 pod scenario as it is not a controlable feature, leading to nodes with 100+ load-test pods and others with 0.
echo This delay also misses the intent of the restart node scenario as the scenario requires the operation to be interuptted by the restart.
echo Timing should be: Scale down > Restart nodes during scale down > complete scale down > validate.
echo Contnuing on Error only when endpoint state is managed by CNS and RestartCase == True to allow for further test cases to run.
fi
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}
kubectl get pods -A
make test-validate-state OS=${{ parameters.os }} RESTART_CASE=${{ parameters.restartCase }} CNI_TYPE=${{ parameters.cni }}
name: "ValidateState"
displayName: "Validate State"
retryCountOnTaskFailure: 3
continueOnError: ${{ parameters.cnsManagedEndpoint }}
Loading

0 comments on commit 4772008

Please sign in to comment.