Skip to content

Commit

Permalink
Fix several Jenkins issues (NOAA-EMC#2334)
Browse files Browse the repository at this point in the history
Jenkins Updates Resolving final kinks:
- Removed all `git` shell commands and now exclusively use Software Control
Manger (**scm**) plugin.
- Add feature for skipping hosts per configuration specified in case yaml files.
- Solved and tested false positive builds and experiments.
- Tested archiving of task error log on case fail
- First case fail quits pipeline and cancels all pending scheduled jobs
- Duel build per yaml configuration arguments supported
- All designated case files in PR directory pass on intended host (fully tested
on Hera)

Remaining updates:
- Fist build fail short circuit when building sub-modules and archiving build
error log.
- Re-build/no-build built in logic for Replay and Rerunning previously failed
experiments.
  • Loading branch information
TerrenceMcGuinness-NOAA committed Feb 23, 2024
1 parent c67393a commit 950c38a
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 20 deletions.
51 changes: 31 additions & 20 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
def Machine = 'none'
def machine = 'none'
def HOME = 'none'
def localworkspace = 'none'
def commonworkspace = 'none'
def caseList = ''
def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES']

pipeline {
agent { label 'built-in' }
Expand All @@ -20,7 +20,6 @@ pipeline {
agent { label 'built-in' }
steps {
script {
localworkspace = env.WORKSPACE
machine = 'none'
for (label in pullRequest.labels) {
echo "Label: ${label}"
Expand All @@ -42,14 +41,16 @@ pipeline {
agent { label "${machine}-emc" }
steps {
script {
properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hera-EMC', 'Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])])
HOME = "${WORKSPACE}/TESTDIR"
commonworkspace = "${WORKSPACE}"
sh(script: "mkdir -p ${HOME}/RUNTESTS")
pullRequest.addLabel("CI-${Machine}-Building")
if (pullRequest.labels.any { value -> value.matches("CI-${Machine}-Ready") }) {
pullRequest.removeLabel("CI-${Machine}-Ready")
ws("${custom_workspace[machine]}/${env.CHANGE_ID}") {
properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hera-EMC', 'Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])])
HOME = "${WORKSPACE}"
sh(script: "mkdir -p ${HOME}/RUNTESTS;rm -Rf ${HOME}/RUNTESTS/error.logs")
pullRequest.addLabel("CI-${Machine}-Building")
if (pullRequest.labels.any { value -> value.matches("CI-${Machine}-Ready") }) {
pullRequest.removeLabel("CI-${Machine}-Ready")
}
}
pullRequest.comment("Building and running on ${Machine} in directory ${HOME}")
}
}
}
Expand All @@ -73,17 +74,15 @@ pipeline {
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to build the system on per system basis under the common workspace HOME
sh(script: "mkdir -p ${HOMEgfs}")
ws(HOMEgfs) {
env.MACHINE_ID = machine // MACHINE_ID is used in the build scripts to determine the machine and is added to the shell environment
if (fileExists("${HOMEgfs}/sorc/BUILT_semaphor")) { // if the system is already built, skip the build in the case of re-runs
sh(script: "cat ${HOMEgfs}/sorc/BUILT_semaphor", returnStdout: true).trim() // TODO: and user configurable control to manage build semphore
pullRequest.comment("Cloned PR already built (or build skipped) on ${machine} in directory ${HOMEgfs}<br>Still doing a checkout to get the latest changes")
sh(script: 'source workflow/gw_setup.sh; git pull --recurse-submodules')
checkout scm
dir('sorc') {
sh(script: './link_workflow.sh')
}
} else {
checkout scm
sh(script: 'source workflow/gw_setup.sh;which git;git --version;git submodule update --init --recursive')
def builds_file = readYaml file: 'ci/cases/yamls/build.yaml'
def build_args_list = builds_file['builds']
def build_args = build_args_list[system].join(' ').trim().replaceAll('null', '')
Expand All @@ -99,6 +98,9 @@ pipeline {
}
pullRequest.addLabel("CI-${Machine}-Running")
}
if (system == 'gfs') {
caseList = sh(script: "${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split()
}
}
}
}
Expand All @@ -114,11 +116,15 @@ pipeline {
axis {
name 'Case'
// TODO add dynamic list of cases from env vars (needs addtional plugins)
values 'C48_ATM', 'C48_S2SWA_gefs', 'C48_S2SW', 'C96_atm3DVar', 'C96C48_hybatmDA', 'C96_atmsnowDA' // 'C48mx500_3DVarAOWCDA'
values 'C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmsnowDA'
}
}
stages {
stage('Create Experiment') {

stage('Create Experiments') {
when {
expression { return caseList.contains(Case) }
}
steps {
script {
sh(script: "sed -n '/{.*}/!p' ${HOME}/gfs/ci/cases/pr/${Case}.yaml > ${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp")
Expand All @@ -130,15 +136,20 @@ pipeline {
}
}
}

stage('Run Experiments') {
when {
expression { return caseList.contains(Case) }
}
steps {
script {
HOMEgfs = "${HOME}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
ws(HOMEgfs) {
pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${HOME}/RUNTESTS ${Case}", returnStdout: true).trim()
// pullRequest.comment("**Running** experiment: ${Case} on ${Machine}<br>With the experiment in directory:<br>`${HOME}/RUNTESTS/${pslot}`")
err = sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot}")
if (err != 0) {
pullRequest.comment("**Running** experiment: ${Case} on ${Machine}<br>With the experiment in directory:<br>`${HOME}/RUNTESTS/${pslot}`")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot}")
} catch (Exception e) {
pullRequest.comment("**FAILURE** running experiment: ${Case} on ${Machine}")
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_all_batch_jobs ${HOME}/RUNTESTS")
ws(HOME) {
Expand All @@ -153,12 +164,12 @@ pipeline {
}
error("Failed to run experiments ${Case} on ${Machine}")
}
// pullRequest.comment("**SUCCESS** running experiment: ${Case} on ${Machine}")
}
pullRequest.comment("**SUCCESS** running experiment: ${Case} on ${Machine}")
}

}
}

}
}
}
Expand Down
32 changes: 32 additions & 0 deletions ci/scripts/utils/get_host_case_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python3
import os
from os.path import basename, splitext
import sys
import glob
from wxflow import parse_j2yaml
from wxflow import AttrDict

_here = os.path.dirname(__file__)
_top = os.path.abspath(os.path.join(os.path.abspath(_here), '../../..'))

if __name__ == '__main__':

if len(sys.argv) < 2:
print('Usage: get_host_case_list.py <host_name>')
sys.exit(1)

host = sys.argv[1]

case_list = []
HOMEgfs = _top
data = AttrDict(HOMEgfs=_top)
data.update(os.environ)

case_files = glob.glob(f'{HOMEgfs}/ci/cases/pr/*.yaml')
for case_yaml in case_files:
case_conf = parse_j2yaml(path=case_yaml, data=data)
if 'skip_ci_on_hosts' in case_conf:
if host.lower() in [machine.lower() for machine in case_conf.skip_ci_on_hosts]:
continue
case_list.append(splitext(basename(case_yaml))[0])
print(' '.join(case_list))
1 change: 1 addition & 0 deletions ci/scripts/utils/wxflow

0 comments on commit 950c38a

Please sign in to comment.