From 4087e17c558cbf010982a3dfe9c959bf2d5ca079 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 26 Oct 2022 15:29:04 -0700 Subject: [PATCH 1/5] prevent chance of overlap in resource groups --- .pipelines/npm/npm-conformance-tests.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pipelines/npm/npm-conformance-tests.yaml b/.pipelines/npm/npm-conformance-tests.yaml index 44a1520478..11bb22acad 100644 --- a/.pipelines/npm/npm-conformance-tests.yaml +++ b/.pipelines/npm/npm-conformance-tests.yaml @@ -26,7 +26,8 @@ jobs: mkdir -p '$(GOBIN)' mkdir -p '$(GOPATH)/pkg' BUILD_NUMBER=$(Build.BuildNumber) - RG=e2e-$(echo "npm-`date "+%Y-%m-%d-%S"`") + # format: npm----- + RG=e2e-$(echo "npm-`date "+%Y-%m-%d-%M-%S"`") TAG=$(make npm-version) echo "Resource group: $RG" echo "Image tag: $TAG" From 4b069f42f3f8c835ff0dc00eaff91cf6d0960384 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 26 Oct 2022 15:35:49 -0700 Subject: [PATCH 2/5] print npm pod state and capture previous logs --- .pipelines/npm/npm-conformance-tests.yaml | 28 ++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/.pipelines/npm/npm-conformance-tests.yaml b/.pipelines/npm/npm-conformance-tests.yaml index 11bb22acad..e01be4f485 100644 --- a/.pipelines/npm/npm-conformance-tests.yaml +++ b/.pipelines/npm/npm-conformance-tests.yaml @@ -268,11 +268,9 @@ jobs: mkdir -p $npmLogsFolder cp ./kubeconfig $npmLogsFolder/kubeconfig - ## write to all NPM pod logs in the background (do this in the background instead of after to make sure the logs aren't truncated) npmPodList=`kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm | awk '{print $1}'` echo "Found NPM pods: $npmPodList" - ## Run all Conformance tests in the background echo $FQDN chmod +x $(Pipeline.Workspace)/Test/e2e.test @@ -346,12 +344,25 @@ jobs: exitCode=$? fi fi - # kill the background processes (the logs) that have this process' pid (i.e. $$) as a parent + # get all current npm pods + kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm + npmPodList=`kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm | awk '{print $1}'` + # capture all logs for npmPod in $npmPodList; do ./kubectl --kubeconfig=./kubeconfig logs -n kube-system $npmPod > $npmLogsFolder/$npmPod-logs.txt done + # capture any previous logs in case there was a crash + for npmPod in $npmPodList; do + previousLogFile=$npmLogsFolder/previous-$npmPod-logs.txt + ./kubectl --kubeconfig=./kubeconfig logs -n kube-system $npmPod -p > $previousLogFile + if [[ $? -ne 0 ]]; then + # remove the empty file if kubectl logs failed (e.g. there was no previous terminated container) + rm $previousLogFile + fi + done + exit $exitCode displayName: "Run Test Suite and Get Logs" failOnStderr: false @@ -480,7 +491,18 @@ jobs: cp cyclonus-$CLUSTER_NAME $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/cyclonus-$CLUSTER_NAME echo "Getting cluster state for $CLUSTER_NAME" mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME + kubectl get pods -n kube-system | grep npm kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt + # capture any previous logs in case there was a crash + npmPodList=`kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm | awk '{print $1}'` + for npmPod in $npmPodList; do + previousLogFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/previous-npm-logs_$(PROFILE).txt + kubectl logs -n kube-system $npmPod -p > $previousLogFile + if [[ $? -ne 0 ]]; then + # remove the empty file if kubectl logs failed (e.g. there was no previous terminated container) + rm $previousLogFile + fi + done cp ./kubeconfig $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/.kubeconfig condition: always() From f766ccb8c6ca501a4175ffa3ac92b256c501209b Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Wed, 26 Oct 2022 15:37:34 -0700 Subject: [PATCH 3/5] fail cyclonus if it doesn't complete --- test/cyclonus/test-cyclonus-windows.sh | 15 ++++++++++----- test/cyclonus/test-cyclonus.sh | 13 ++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/test/cyclonus/test-cyclonus-windows.sh b/test/cyclonus/test-cyclonus-windows.sh index 59c2d361a2..17a39b4df5 100755 --- a/test/cyclonus/test-cyclonus-windows.sh +++ b/test/cyclonus/test-cyclonus-windows.sh @@ -1,4 +1,5 @@ curl -fsSL github.com/mattfenwick/cyclonus/releases/latest/download/cyclonus_linux_amd64.tar.gz | tar -zxv +LOG_FILE=cyclonus-$CLUSTER_NAME ./cyclonus_linux_amd64/cyclonus generate \ --noisy=true \ --retries=7 \ @@ -8,12 +9,16 @@ curl -fsSL github.com/mattfenwick/cyclonus/releases/latest/download/cyclonus_lin --pod-creation-timeout-seconds=480 \ --job-timeout-seconds=15 \ --server-protocol=TCP,UDP \ - --exclude sctp,named-port,ip-block-with-except,multi-peer,upstream-e2e,example,end-port,namespaces-by-default-label,update-policy | tee cyclonus-$CLUSTER_NAME + --exclude sctp,named-port,ip-block-with-except,multi-peer,upstream-e2e,example,end-port,namespaces-by-default-label,update-policy | tee $LOG_FILE -rc=0 -cat cyclonus-$CLUSTER_NAME | grep "failed" > /dev/null 2>&1 || rc=$? -echo $rc -if [ $rc -eq 0 ]; then +cat $LOG_FILE | grep "SummaryTable:" -q +if [[ $? -ne 0 ]]; then + echo "cyclonus tests did not complete" + exit 2 +fi + +cat $LOG_FILE | grep "failed" -q +if [[ $? -eq 0 ]]; then echo "failures detected" exit 1 fi diff --git a/test/cyclonus/test-cyclonus.sh b/test/cyclonus/test-cyclonus.sh index 3a16612f0c..054ef3ec4a 100755 --- a/test/cyclonus/test-cyclonus.sh +++ b/test/cyclonus/test-cyclonus.sh @@ -54,11 +54,14 @@ kubectl delete --ignore-not-found=true clusterrolebinding cyclonus kubectl delete --ignore-not-found=true sa cyclonus -n kube-system kubectl delete --ignore-not-found=true -f $cyclonusProfile -# if 'failure' is in the logs, fail; otherwise succeed -rc=0 +cat $LOG_FILE | grep "SummaryTable:" -q +if [[ $? -ne 0 ]]; then + echo "cyclonus tests did not complete" + exit 2 +fi -cat "$LOG_FILE" | grep "failed" > /dev/null 2>&1 || rc=$? -echo $rc -if [ $rc -eq 0 ]; then +cat $LOG_FILE | grep "failed" -q +if [[ $? -eq 0 ]]; then + echo "failures detected" exit 1 fi From 72fec25ade7f6a5a945cd519fbd1f8afa57974a9 Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 27 Oct 2022 09:49:53 -0700 Subject: [PATCH 4/5] redirect to /dev/null --- test/cyclonus/test-cyclonus-windows.sh | 19 ++++++++++++------- test/cyclonus/test-cyclonus.sh | 19 ++++++++++++------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/test/cyclonus/test-cyclonus-windows.sh b/test/cyclonus/test-cyclonus-windows.sh index 17a39b4df5..ae1544b648 100755 --- a/test/cyclonus/test-cyclonus-windows.sh +++ b/test/cyclonus/test-cyclonus-windows.sh @@ -11,14 +11,19 @@ LOG_FILE=cyclonus-$CLUSTER_NAME --server-protocol=TCP,UDP \ --exclude sctp,named-port,ip-block-with-except,multi-peer,upstream-e2e,example,end-port,namespaces-by-default-label,update-policy | tee $LOG_FILE -cat $LOG_FILE | grep "SummaryTable:" -q -if [[ $? -ne 0 ]]; then - echo "cyclonus tests did not complete" +# might need to redirect to /dev/null 2>&1 instead of just grepping with -q to avoid "cat: write error: Broken pipe" +rc=999 +cat $LOG_FILE | grep "SummaryTable:" > /dev/null 2>&1 && rc=$? +echo $rc +if [ $rc -ne 0 ]; then + echo "FAILING because cyclonus tests did not complete" exit 2 fi -cat $LOG_FILE | grep "failed" -q -if [[ $? -eq 0 ]]; then - echo "failures detected" - exit 1 +rc=0 +cat $LOG_FILE | grep "failed" > /dev/null 2>&1 || rc=$? +echo $rc +if [ $rc -eq 0 ]; then + echo "FAILING because cyclonus completed but failures detected" + exit 3 fi diff --git a/test/cyclonus/test-cyclonus.sh b/test/cyclonus/test-cyclonus.sh index 054ef3ec4a..37e26e8539 100755 --- a/test/cyclonus/test-cyclonus.sh +++ b/test/cyclonus/test-cyclonus.sh @@ -54,14 +54,19 @@ kubectl delete --ignore-not-found=true clusterrolebinding cyclonus kubectl delete --ignore-not-found=true sa cyclonus -n kube-system kubectl delete --ignore-not-found=true -f $cyclonusProfile -cat $LOG_FILE | grep "SummaryTable:" -q -if [[ $? -ne 0 ]]; then - echo "cyclonus tests did not complete" +# might need to redirect to /dev/null 2>&1 instead of just grepping with -q to avoid "cat: write error: Broken pipe" +rc=999 +cat $LOG_FILE | grep "SummaryTable:" > /dev/null 2>&1 && rc=$? +echo $rc +if [ $rc -ne 0 ]; then + echo "FAILING because cyclonus tests did not complete" exit 2 fi -cat $LOG_FILE | grep "failed" -q -if [[ $? -eq 0 ]]; then - echo "failures detected" - exit 1 +rc=0 +cat $LOG_FILE | grep "failed" > /dev/null 2>&1 || rc=$? +echo $rc +if [ $rc -eq 0 ]; then + echo "FAILING because cyclonus completed but failures detected" + exit 3 fi From 82e29ba0f8394bba312c0112cf964301839eb2ec Mon Sep 17 00:00:00 2001 From: Hunter Gregory Date: Thu, 27 Oct 2022 15:38:20 -0700 Subject: [PATCH 5/5] remove incorrect use of --kubeconfig --- .pipelines/npm/npm-conformance-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/npm/npm-conformance-tests.yaml b/.pipelines/npm/npm-conformance-tests.yaml index e01be4f485..556a9724ae 100644 --- a/.pipelines/npm/npm-conformance-tests.yaml +++ b/.pipelines/npm/npm-conformance-tests.yaml @@ -494,7 +494,7 @@ jobs: kubectl get pods -n kube-system | grep npm kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt # capture any previous logs in case there was a crash - npmPodList=`kubectl --kubeconfig=./kubeconfig get pods -n kube-system | grep npm | awk '{print $1}'` + npmPodList=`kubectl get pods -n kube-system | grep npm | awk '{print $1}'` for npmPod in $npmPodList; do previousLogFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/previous-npm-logs_$(PROFILE).txt kubectl logs -n kube-system $npmPod -p > $previousLogFile