Skip to content

Commit

Permalink
chore: refactor load test suite (#2229)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjohnst5 authored Sep 22, 2023
1 parent a61940d commit ec519a5
Show file tree
Hide file tree
Showing 14 changed files with 393 additions and 252 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ steps:
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}
cd test/integration/load
scale=$(( ${{ parameters.scaleup }} * ${{ parameters.nodeCount }} ))
go test -timeout 30m -tags load -run ^TestLoad$ -tags=load -iterations=${{ parameters.iterations }} -scaleup=$scale -os=${{ parameters.os }}
ITERATIONS=${{ parameters.iterations }} SCALE_UP=$scale OS_TYPE=${{ parameters.os }} go test -timeout 30m -tags load -run ^TestLoad$
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ steps:
echo "Ensure there are pods scheduled on each node"
cd test/integration/load
scale=$(( ${{ parameters.scaleup }} * ${{ parameters.nodeCount }} ))
go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=$scale
REPLICAS=$scale go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load
cd ../../../
echo "Validate pod IP assignment before CNS restart"
Expand Down
4 changes: 2 additions & 2 deletions .pipelines/cni/load-test-templates/restart-node-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ steps:
# Capture a scaledown, scaling down to 50% of initial value
scale=$(( ${{ parameters.scaleup }} * ${{ parameters.nodeCount }} / 2))
echo "Scaling the pods down to $(( $scale / ${{ parameters.nodeCount }} )) per node"
go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=$scale -skip-wait=true
REPLICAS=$scale SKIP_WAIT=true go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load
cd ../../../
echo "Restarting the nodes"
vmss_name=$(az vmss list -g MC_${clusterName}_${clusterName}_$(LOCATION) --query "[].name" -o tsv)
make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${clusterName} REGION=$(LOCATION) VMSS_NAME=$vmss_name
cd test/integration/load
go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=$scale
REPLICAS=$scale go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$
name: "RestartNodes"
displayName: "Restart Nodes"
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ steps:
make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}
kubectl get pods -A
make test-validate-state OS=${{ parameters.os }} RESTART_CASE=${{ parameters.restartCase }} CNI_TYPE=${{ parameters.cni }}
make test-validate-state OS_TYPE=${{ parameters.os }} RESTART_CASE=${{ parameters.restartCase }} CNI_TYPE=${{ parameters.cni }}
name: "ValidateState"
displayName: "Validate State"
retryCountOnTaskFailure: 3
Expand Down
6 changes: 2 additions & 4 deletions .pipelines/singletenancy/aks/e2e-step-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,13 @@ steps:
kubectl get pods -A -o wide
echo "Deploying test pods"
cd test/integration/load
go test -count 1 -timeout 30m -tags load -run ^TestLoad$ -iterations=2 -scaleup=${{ parameters.scaleup }} -os=${{ parameters.os }}
ITERATIONS=2 SCALE_UP=${{ parameters.scaleup }} OS_TYPE=${{ parameters.os }} go test -count 1 -timeout 30m -tags load -run ^TestLoad$
cd ../../..
# Remove this once we have cniv1 support for validating the test cluster
echo "Validate State skipped for linux cniv1 for now"
if [ "${{parameters.os}}" == "windows" ]; then
make test-validate-state OS=${{ parameters.os }} CNI_TYPE=cniv1
make test-validate-state OS_TYPE=${{ parameters.os }} CNI_TYPE=cniv1
fi
kubectl delete ns load-test
displayName: "Validate State"
retryCountOnTaskFailure: 3
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ steps:
make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST)
kubectl get pods -owide -A
echo "Validating Node Restart"
sudo go test -timeout 30m -tags load -cni cniv2 -run ^TestValidateState$ -restart-case=true
sudo CNI_TYPE=cniv2 RESTART_CASE=true go test -timeout 30m -tags load -run ^TestValidateState$
displayName: "Validate Node Restart"
retryCountOnTaskFailure: 3

Expand All @@ -132,12 +132,12 @@ steps:
kubectl get pod -owide -A
echo "validate pod state before CNS restarts"
cd test/integration/load
sudo go test -timeout 30m -tags load -cni cniv2 -run ^TestValidateState$
sudo CNI_TYPE=cniv2 go test -timeout 30m -tags load -run ^TestValidateState$
kubectl rollout restart ds azure-cns -n kube-system
kubectl rollout status ds azure-cns -n kube-system
kubectl get pod -owide -A
echo "validate pods after CNS restart"
sudo go test -timeout 30m -tags load -cni cniv2 -run ^TestValidateState$
sudo CNI_TYPE=cniv2 go test -timeout 30m -tags load -run ^TestValidateState$
name: "restartCNS_ValidatePodState"
displayName: "Restart CNS and validate pod state"
retryCountOnTaskFailure: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ steps:
set -e
cd test/integration/load
echo "DualStack Overlay Linux control plane Node properties test"
sudo go test -timeout 30m -tags load -run ^TestDualStackProperties$
sudo CNI_TYPE=dualstack go test -timeout 30m -tags load -run ^TestDualStackProperties$
echo "DualStack Overlay Linux control plane Load test"
sudo go test -timeout 30m -tags load -run ^TestLoad$
echo "DualStack Overlay Linux control plane CNS validation test"
sudo go test -timeout 30m -tags load -cni dualstack -run ^TestValidateState$
sudo CNI_TYPE=dualstack go test -timeout 30m -tags load -run ^TestValidateState$
cd ../datapath
echo "Dualstack Overlay Linux datapath IPv6 test"
sudo go test -count=1 datapath_linux_test.go -timeout 3m -tags connection -run ^TestDatapathLinux$ -tags=connection,integration -isDualStack=true
Expand Down Expand Up @@ -97,7 +97,7 @@ steps:
make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_DUALSTACKOVERLAY_CLUSTER_TEST)
kubectl get pods -A
echo "Validating Node Restart"
sudo go test -timeout 30m -tags load -cni dualstack -run ^TestValidateState$ -restart-case=true
sudo CNI_TYPE=dualstack RESTART_CASE=true go test -timeout 30m -tags load -run ^TestValidateState$
displayName: "Validate Node Restart"
retryCountOnTaskFailure: 3

Expand All @@ -106,12 +106,12 @@ steps:
kubectl get pod -owide -A
echo "validate pod state before CNS restarts"
cd test/integration/load
sudo go test -timeout 30m -tags load -cni dualstack -run ^TestValidateState$
sudo CNI_TYPE=dualstack go test -timeout 30m -tags load -run ^TestValidateState$
kubectl rollout restart ds azure-cns -n kube-system
kubectl rollout status ds azure-cns -n kube-system
kubectl get pod -owide -A
echo "validate pods after CNS restarts"
sudo go test -timeout 30m -tags load -cni dualstack -run ^TestValidateState$
sudo CNI_TYPE=dualstack go test -timeout 30m -tags load -run ^TestValidateState$
name: "restartCNS_ValidatePodState"
displayName: "Restart CNS and Validate Pod State"
retryCountOnTaskFailure: 3
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -727,8 +727,13 @@ test-integration: ## run all integration tests.
CNS_VERSION=$(CNS_VERSION) \
go test -mod=readonly -buildvcs=false -timeout 1h -coverpkg=./... -race -covermode atomic -coverprofile=coverage.out -tags=integration ./test/integration...

test-load: ## run all load tests
CNI_DROPGZ_VERSION=$(CNI_DROPGZ_VERSION) \
CNS_VERSION=$(CNS_VERSION) \
go test -timeout 30m -race -tags=load ./test/integration/load...

test-validate-state:
cd test/integration/load && go test -mod=readonly -count=1 -timeout 30m -tags load -run ^TestValidateState -restart-case=$(RESTART_CASE) -os=$(OS) -cni=$(CNI_TYPE)
cd test/integration/load && go test -mod=readonly -count=1 -timeout 30m -tags load -run ^TestValidateState
cd ../../..

test-cyclonus: ## run the cyclonus test for npm.
Expand Down
163 changes: 81 additions & 82 deletions test/integration/load/load_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,35 @@ package load

import (
"context"
"flag"
"testing"
"time"

"github.com/Azure/azure-container-networking/test/internal/kubernetes"
"github.com/Azure/azure-container-networking/test/validate"
"github.com/stretchr/testify/require"
)

type TestConfig struct {
OSType string `env:"OS_TYPE" default:"linux"`
CNIType string `env:"CNI_TYPE" default:"cilium"`
Iterations int `env:"ITERATIONS" default:"2"`
ScaleUpReplicas int `env:"SCALE_UP" default:"10"`
ScaleDownReplicas int `env:"SCALE_DOWN" default:"1"`
Replicas int `env:"REPLICAS" default:"1"`
ValidateStateFile bool `env:"VALIDATE_STATEFILE" default:"false"`
ValidateDualStack bool `env:"VALIDATE_DUALSTACK" default:"false"`
SkipWait bool `env:"SKIP_WAIT" default:"false"`
RestartCase bool `env:"RESTART_CASE" default:"false"`
Cleanup bool `env:"CLEANUP" default:"false"`
}

const (
manifestDir = "../manifests"
podLabelSelector = "load-test=true"
namespace = "load-test"
)

var (
osType = flag.String("os", "linux", "Operating system to run the test on")
cniType = flag.String("cni", "cilium", "CNI to run the test on")
iterations = flag.Int("iterations", 2, "Number of iterations to run the test for")
scaleUpReplicas = flag.Int("scaleup", 10, "Number of replicas to scale up to")
scaleDownReplicas = flag.Int("scaledown", 1, "Number of replicas to scale down to")
replicas = flag.Int("replicas", 1, "Number of replicas to scale up/down to")
validateStateFile = flag.Bool("validate-statefile", false, "Validate the state file")
validateDualStack = flag.Bool("validate-dualstack", false, "Validate the dualstack overlay")
skipWait = flag.Bool("skip-wait", false, "Skip waiting for pods to be ready")
restartCase = flag.Bool("restart-case", false, "In restart case, skip if we don't find state file")
namespace = "load-test"
)
var testConfig = &TestConfig{}

var noopDeploymentMap = map[string]string{
"windows": manifestDir + "/noop-deployment-windows.yaml",
Expand All @@ -41,7 +44,7 @@ In order to run the scale tests, you need a k8s cluster and its kubeconfig.
If no kubeconfig is passed, the test will attempt to find one in the default location for kubectl config.
Run the tests as follows:
go test -timeout 30m -tags load -run ^TestLoad$ -tags=load
go test -timeout 30m -tags load -run ^TestLoad$
The Load test scale the pods up/down on the cluster and validates the pods have IP. By default it runs the
cycle for 2 iterations.
Expand All @@ -60,142 +63,138 @@ todo: consider adding the following scenarios
*/
func TestLoad(t *testing.T) {
clientset, err := kubernetes.MustGetClientset()
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()

// Create namespace if it doesn't exist
namespaceExists, err := kubernetes.NamespaceExists(ctx, clientset, namespace)
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

if !namespaceExists {
err = kubernetes.MustCreateNamespace(ctx, clientset, namespace)
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)
}

deployment, err := kubernetes.MustParseDeployment(noopDeploymentMap[*osType])
if err != nil {
t.Fatal(err)
}
deployment, err := kubernetes.MustParseDeployment(noopDeploymentMap[testConfig.OSType])
require.NoError(t, err)

deploymentsClient := clientset.AppsV1().Deployments(namespace)
err = kubernetes.MustCreateDeployment(ctx, deploymentsClient, deployment)
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

t.Log("Checking pods are running")
err = kubernetes.WaitForPodsRunning(ctx, clientset, namespace, podLabelSelector)
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

t.Log("Repeating the scale up/down cycle")
for i := 0; i < *iterations; i++ {
for i := 0; i < testConfig.Iterations; i++ {
t.Log("Iteration ", i)
t.Log("Scale down deployment")
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *scaleDownReplicas, *skipWait)
if err != nil {
t.Fatal(err)
}
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, testConfig.ScaleDownReplicas, testConfig.SkipWait)
require.NoError(t, err)

t.Log("Scale up deployment")
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *scaleUpReplicas, *skipWait)
if err != nil {
t.Fatal(err)
}
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, testConfig.ScaleUpReplicas, testConfig.SkipWait)
require.NoError(t, err)
}
t.Log("Checking pods are running and IP assigned")
err = kubernetes.WaitForPodsRunning(ctx, clientset, "", "")
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

if *validateStateFile {
if testConfig.ValidateStateFile {
t.Run("Validate state file", TestValidateState)
}

if *validateDualStack {
if testConfig.ValidateDualStack {
t.Run("Validate dualstack overlay", TestDualStackProperties)
}

if testConfig.Cleanup {
err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment)
require.NoError(t, err, "error deleteing load deployment")
}
}

// TestValidateState validates the state file based on the os and cni type.
func TestValidateState(t *testing.T) {
clientset, err := kubernetes.MustGetClientset()
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

config := kubernetes.MustGetRestConfig(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()

validator, err := validate.CreateValidator(ctx, clientset, config, namespace, *cniType, *restartCase, *osType)
if err != nil {
t.Fatal(err)
}
if err := validator.Validate(ctx); err != nil {
t.Fatal(err)
validator, err := validate.CreateValidator(ctx, clientset, config, namespace, testConfig.CNIType, testConfig.RestartCase, testConfig.OSType)
require.NoError(t, err)

err = validator.Validate(ctx)
require.NoError(t, err)

if testConfig.Cleanup {
err = validator.Cleanup(ctx)
require.NoError(t, err, "failed to cleanup validator")
}
}

// TestScaleDeployment scales the deployment up/down based on the replicas passed.
// go test -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas 10
// REPLICAS=10 go test -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load
func TestScaleDeployment(t *testing.T) {
t.Log("Scale deployment")
clientset, err := kubernetes.MustGetClientset()
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

ctx := context.Background()
// Create namespace if it doesn't exist
namespaceExists, err := kubernetes.NamespaceExists(ctx, clientset, namespace)
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)

if !namespaceExists {
err = kubernetes.MustCreateNamespace(ctx, clientset, namespace)
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)
}

deployment, err := kubernetes.MustParseDeployment(noopDeploymentMap[*osType])
if err != nil {
t.Fatal(err)
deployment, err := kubernetes.MustParseDeployment(noopDeploymentMap[testConfig.OSType])
require.NoError(t, err)

if testConfig.Cleanup {
deploymentsClient := clientset.AppsV1().Deployments(namespace)
err = kubernetes.MustCreateDeployment(ctx, deploymentsClient, deployment)
require.NoError(t, err)
}

deploymentsClient := clientset.AppsV1().Deployments(namespace)
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *replicas, *skipWait)
if err != nil {
t.Fatal(err)
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, testConfig.Replicas, testConfig.SkipWait)
require.NoError(t, err)

if testConfig.Cleanup {
err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment)
require.NoError(t, err, "error deleteing load deployment")
}
}

func TestDualStackProperties(t *testing.T) {
clientset, err := kubernetes.MustGetClientset()
if err != nil {
t.Fatal(err)
if !testConfig.ValidateDualStack {
return
}
clientset, err := kubernetes.MustGetClientset()
require.NoError(t, err)

config := kubernetes.MustGetRestConfig(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()

t.Log("Validating the dualstack node labels")
validator, err := validate.CreateValidator(ctx, clientset, config, namespace, *cniType, *restartCase, *osType)
if err != nil {
t.Fatal(err)
}
validator, err := validate.CreateValidator(ctx, clientset, config, namespace, testConfig.CNIType, testConfig.RestartCase, testConfig.OSType)
require.NoError(t, err)

// validate dualstack overlay scenarios
err = validator.ValidateDualStackControlPlane(ctx)
if err != nil {
t.Fatal(err)
require.NoError(t, err)

if testConfig.Cleanup {
err = validator.Cleanup(ctx)
require.NoError(t, err, "failed to cleanup validator")
}
}
Loading

0 comments on commit ec519a5

Please sign in to comment.