diff --git a/.github/actions/submit-delete-k8s-job/action.yml b/.github/actions/submit-delete-k8s-job/action.yml index 45ee34f4c..9fb31c185 100644 --- a/.github/actions/submit-delete-k8s-job/action.yml +++ b/.github/actions/submit-delete-k8s-job/action.yml @@ -20,59 +20,58 @@ runs: TIMEOUT_JOB_CREATION=60s TIMEOUT_JOB_WAIT=14400s TIMEOUT_JOB_START=600s + INPUT_JOB_NAME=${{ inputs.job-name }} + INPUT_JOB_CONFIG_FILE=${{ inputs.job-config-file }} echo "Submit K8s job" - kubectl apply -f "${{ inputs.job-config-file }}" - kubectl get event | grep ${{ inputs.job-name }} + kubectl apply -f "${INPUT_JOB_CONFIG_FILE}" + kubectl get event | grep ${INPUT_JOB_NAME} # Wait for job to be created - kubectl wait --for=create job/${{ inputs.job-name }} --timeout=$TIMEOUT_JOB_CREATION - + kubectl wait --for=create job/${INPUT_JOB_NAME} --timeout=$TIMEOUT_JOB_CREATION # Wait for job to be unsuspended - kubectl wait --for=jsonpath='{.spec.suspend}=false' job/${{ inputs.job-name }} --timeout=$TIMEOUT_JOB_WAIT - + kubectl wait --for=jsonpath='{.spec.suspend}=false' job/${INPUT_JOB_NAME} --timeout=$TIMEOUT_JOB_WAIT # Wait for pods to be running kubectl wait --for=condition=Ready \ - --selector=batch.kubernetes.io/job-name=${{ inputs.job-name }} \ + --selector=batch.kubernetes.io/job-name=${INPUT_JOB_NAME} \ --timeout=$TIMEOUT_JOB_START pod # Stream logs - kubectl logs --all-containers=true --all-pods=true --follow job/${{ inputs.job-name }} + kubectl logs --all-containers=true --all-pods=true --follow job/${INPUT_JOB_NAME} # Detect job parallelism - parallelism=$(kubectl get job/"${{ inputs.job-name }}" -o jsonpath='{.spec.parallelism}') + parallelism=$(kubectl get job/${INPUT_JOB_NAME} -o jsonpath='{.spec.parallelism}') # if parallelism is not set, use default value of 1 - echo "Parallelism ${parallelism}" if [ -z "${parallelism}" ]; then echo "No parallelism specified, defaulting to 1" parallelism=1 fi - while IFS=: read -r failures successes; do - failures="${failures:-0}" - successes="${successes:-0}" + while true; do + job_status_counts=$(kubectl get job/${INPUT_JOB_NAME} -o 'jsonpath={.status.failed}:{.status.succeeded}') + + IFS=: + set -- $job_status_counts + failures=${1:-0} + successes=${2:-0} + total=$((failures + successes)) + echo "status: failures=${failures}, successes=${successes}, total=${total}, parallelism=${parallelism}" + if [ $total -lt $parallelism ]; then # neither "failed" nor "succeeded", so wait - sleep 1 - elif [ $total -eq $parallelism ]; then - # we have total=parallelism => either X successes or X failures - # In any case, the job is done - break - else - # Log here - echo "Unexpected number of completed pods ${total} with parallelism ${parallelism}" - exit 255 + sleep 2 + continue fi - done <