Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "anywhere" option for NCCS #503

Draft
wants to merge 6 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions AGCM.rc.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
NX: @NX
NY: @NY

IOSERVER_NODES: @NUM_OSERVER_NODES
NUM_BACKEND_PES: @NUM_BACKEND_PES

AGCM_IM: @AGCM_IM
AGCM_JM: @AGCM_JM
AGCM_LM: @AGCM_LM
Expand Down
137 changes: 77 additions & 60 deletions gcm_run.j
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ set OGCM_JM = `grep '^\s*OGCM\.JM_WORLD:' $HOMDIR/AGCM.rc | cut -d: -f2`
# --------------------------------------------

set USE_IOSERVER = @USE_IOSERVER
set NUM_OSERVER_NODES = `grep '^\s*IOSERVER_NODES:' $HOMDIR/AGCM.rc | cut -d: -f2`
set NUM_BACKEND_PES = `grep '^\s*NUM_BACKEND_PES:' $HOMDIR/AGCM.rc | cut -d: -f2`

# Check for Over-Specification of CPU Resources
# ---------------------------------------------
Expand All @@ -106,30 +104,49 @@ endif

@ MODEL_NPES = $NX * $NY

set NCPUS_PER_NODE = @NCPUS_PER_NODE
@RUN_SPECIFIC set NCPUS_PER_NODE = @NCPUS_PER_NODE
@RUN_ANYWHERE set NCPUS_PER_NODE=`echo "scale=6;($NCPUS / $SLURM_NNODES)" | bc | awk 'function ceil(x, y){y=int(x); return(x>y?y+1:y)} {print ceil($1)}'`
set NUM_MODEL_NODES=`echo "scale=6;($MODEL_NPES / $NCPUS_PER_NODE)" | bc | awk 'function ceil(x, y){y=int(x); return(x>y?y+1:y)} {print ceil($1)}'`

if ( $NCPUS != NULL ) then

if ( $USE_IOSERVER == 1 ) then

@ TOTAL_NODES = $NUM_MODEL_NODES + $NUM_OSERVER_NODES
@ TOTAL_PES = $TOTAL_NODES * $NCPUS_PER_NODE

if( $TOTAL_PES > $NCPUS ) then
echo "CPU Resources are Over-Specified"
echo "--------------------------------"
echo "Allotted NCPUs: $NCPUS"
echo "Requested NCPUs: $TOTAL_PES"
echo ""
echo "Specified NX: $NX"
echo "Specified NY: $NY"
echo ""
echo "Specified model nodes: $NUM_MODEL_NODES"
echo "Specified oserver nodes: $NUM_OSERVER_NODES"
echo "Specified cores per node: $NCPUS_PER_NODE"
exit
endif
# Next the number of frontend PEs is 10% of the model PEs
set NUM_FRONTEND_PES=`echo "scale=6;($MODEL_NPES * 0.1)" | bc | awk 'function ceil(x, y){y=int(x); return(x>y?y+1:y)} {print ceil($1)}'`

# We roughly figure out the number of collections in the HISTORY.rc (this is not perfect, but is close to right)
set NUM_HIST_COLLECTIONS=`cat HISTORY.rc | sed -n '/^COLLECTIONS:/,/^ *::$/{p;/^ *::$/q}' | grep -v '^ *#' | wc -l`

# And the total number of oserver PEs is frontend PEs plus number of history collections
@ NUM_OSERVER_PES=$NUM_FRONTEND_PES + $NUM_HIST_COLLECTIONS

# Now calculate the number of oserver nodes
set NUM_OSERVER_NODES=`echo "scale=6;($NUM_OSERVER_PES / $NCPUS_PER_NODE)" | bc | awk 'function ceil(x, y){y=int(x); return(x>y?y+1:y)} {print ceil($1)}'`

# And then the number of backend PEs is the number of history collections divided by the number of oserver nodes
set NUM_BACKEND_PES=`echo "scale=6;($NUM_HIST_COLLECTIONS / $NUM_OSERVER_NODES)" | bc | awk 'function ceil(x, y){y=int(x); return(x>y?y+1:y)} {print ceil($1)}'`

# multigroup requires at least two backend pes
if ($NUM_BACKEND_PES < 2) set NUM_BACKEND_PES = 2

@RUN_ANYWHERE @ TOTAL_PES = $NCPUS
@RUN_SPECIFIC @ TOTAL_NODES = $NUM_MODEL_NODES + $NUM_OSERVER_NODES
@RUN_SPECIFIC @ TOTAL_PES = $TOTAL_NODES * $NCPUS_PER_NODE
@RUN_SPECIFIC if( $TOTAL_PES > $NCPUS ) then
@RUN_SPECIFIC echo "CPU Resources are Over-Specified"
@RUN_SPECIFIC echo "--------------------------------"
@RUN_SPECIFIC echo "Allotted NCPUs: $NCPUS"
@RUN_SPECIFIC echo "Requested NCPUs: $TOTAL_PES"
@RUN_SPECIFIC echo ""
@RUN_SPECIFIC echo "Specified NX: $NX"
@RUN_SPECIFIC echo "Specified NY: $NY"
@RUN_SPECIFIC echo ""
@RUN_SPECIFIC echo "Specified model nodes: $NUM_MODEL_NODES"
@RUN_SPECIFIC echo "Specified oserver nodes: $NUM_OSERVER_NODES"
@RUN_SPECIFIC echo "Specified cores per node: $NCPUS_PER_NODE"
@RUN_SPECIFIC exit
@RUN_SPECIFIC endif

else

Expand All @@ -144,8 +161,8 @@ if ( $NCPUS != NULL ) then
echo "Specified NX: $NX"
echo "Specified NY: $NY"
echo ""
echo "Specified model nodes: $NUM_MODEL_NODES"
echo "Specified cores per node: $NCPUS_PER_NODE"
@RUN_ANYWHERE echo "Specified model nodes: $NUM_MODEL_NODES"
@RUN_ANYWHERE echo "Specified cores per node: $NCPUS_PER_NODE"
exit
endif

Expand Down Expand Up @@ -325,7 +342,7 @@ setenv EMISSIONS @EMISSIONS
@MOM5setenv BCTAG `basename $ABCSDIR`
@MOM6setenv BCTAG `basename $ABCSDIR`
#this is hard-wired for NAS for now - should make it more general
@MITsetenv GRIDDIR /nobackupp18/afahad/GEOSMITgcmFiles/GRIDDIR/a${AGCM_IM}x${AGCM_JM}_o${OGCM_IM}x${OGCM_JM}
@MITsetenv GRIDDIR /nobackupp18/afahad/GEOSMITgcmFiles/GRIDDIR/a${AGCM_IM}x${AGCM_JM}_o${OGCM_IM}x${OGCM_JM}
@MITsetenv BCTAG `basename $GRIDDIR`
@DATAOCEANsetenv BCTAG `basename $BCSDIR`

Expand Down Expand Up @@ -544,15 +561,15 @@ set tile_rsts = (catch catchcn route lake landice openwater saltwater seaicether
# ----------------------------------
set rst_by_face = NO
if( $GCMEMIP == TRUE ) then
if(-e $EXPDIR/restarts/$RSTDATE/fvcore_internal_rst & -e $EXPDIR/restarts/$RSTDATE/fvcore_internal_face_1_rst) then
if(-e $EXPDIR/restarts/$RSTDATE/fvcore_internal_rst & -e $EXPDIR/restarts/$RSTDATE/fvcore_internal_face_1_rst) then
echo "grid-based internal_rst and internal_face_x_rst should not co-exist"
echo "please remove all *internal_rst except these tile-based restarts :"
foreach rst ( $tile_rsts )
echo ${rst}_internal_rst
end
exit
endif
if(-e $EXPDIR/restarts/$RSTDATE/fvcore_internal_face_1_rst) then
if(-e $EXPDIR/restarts/$RSTDATE/fvcore_internal_face_1_rst) then
set rst_by_face = YES
endif
else
Expand All @@ -564,7 +581,7 @@ else
end
exit
endif
if(-e $EXPDIR/fvcore_internal_face_1_rst) then
if(-e $EXPDIR/fvcore_internal_face_1_rst) then
set rst_by_face = YES
endif
endif
Expand Down Expand Up @@ -609,7 +626,7 @@ foreach rst ( $dummy )
if ( $rst =~ *$tile_rst* ) then
set is_tile_rst = TRUE
break
endif
endif
end
endif
if ($is_tile_rst == FALSE & $rst_by_face == YES) then
Expand All @@ -618,7 +635,7 @@ foreach rst ( $dummy )
set rst = ${part1}_face_${n}_rst
set rst_file_names = `echo $rst_file_names $rst`
end
else
else
set rst_file_names = `echo $rst_file_names $rst`
endif
end
Expand Down Expand Up @@ -1038,53 +1055,53 @@ endif
@MIT # ---------------------------------------------------
@MIT # For MITgcm restarts - before running GEOSgcm.x
@MIT # ---------------------------------------------------
@MIT
@MIT
@MIT # set time interval for segment in seconds
@MIT
@MIT
@MIT set yearc = `echo $nymdc | cut -c1-4`
@MIT set monthc = `echo $nymdc | cut -c5-6`
@MIT set dayc = `echo $nymdc | cut -c7-8`
@MIT set hourc = `echo $nhmsc | cut -c1-2`
@MIT set minutec = `echo $nhmsc | cut -c3-4`
@MIT set secondc = `echo $nhmsc | cut -c5-6`
@MIT
@MIT
@MIT set yearf = `echo $nymdf | cut -c1-4`
@MIT set monthf = `echo $nymdf | cut -c5-6`
@MIT set dayf = `echo $nymdf | cut -c7-8`
@MIT set hourf = `echo $nhmsf | cut -c1-2`
@MIT set minutef = `echo $nhmsf | cut -c3-4`
@MIT set secondf = `echo $nhmsf | cut -c5-6`
@MIT
@MIT
@MIT set yearf = `echo $nymdf | cut -c1-4`
@MIT
@MIT
@MIT set time1 = `date -u -d "${yearc}-${monthc}-${dayc}T${hourc}:${minutec}:${secondc}" "+%s"`
@MIT set time2 = `date -u -d "${yearf}-${monthf}-${dayf}T${hourf}:${minutef}:${secondf}" "+%s"`
@MIT
@MIT
@MIT @ mitdt = $time2 - $time1
@MIT echo "Segment time: $mitdt"
@MIT
@MIT
@MIT
@MIT
@MIT # Set-up MITgcm run directory
@MIT if (! -e mitocean_run) mkdir -p mitocean_run
@MIT cd mitocean_run
@MIT
@MIT
@MIT # link mit configuration and initialization files
@MIT ln -sf $EXPDIR/mit_input/* .
@MIT # link mitgcm restarts if exist
@MIT /bin/ln -sf $EXPDIR/restarts/pic* .
@MIT # make an archive folder for mitgcm run
@MIT mkdir $EXPDIR/mit_output
@MIT
@MIT
@MIT # Calculate segment time steps
@MIT set mit_nTimeSteps = `cat ${SCRDIR}/AGCM.rc | grep OGCM_RUN_DT: | cut -d: -f2 | tr -s " " | cut -d" " -f2`
@MIT @ mit_nTimeSteps = ${mitdt} / $mit_nTimeSteps
@MIT
@MIT
@MIT #change namelist variables in data - nTimeSteps, chkptFreq and monitorFreq
@MIT sed -i "s/nTimeSteps.*/nTimeSteps = ${mit_nTimeSteps},/" data
@MIT sed -i "s/chkptFreq.*/chkptFreq = ${mitdt}.0,/" data
@MIT sed -i "s/pChkptFreq.*/pChkptFreq = ${mitdt}.0,/" data
@MIT # get nIter0
@MIT
@MIT
@MIT if (! -e ${EXPDIR}/restarts/MITgcm_restart_dates.txt ) then
@MIT set nIter0 = `grep nIter0 data | tr -s " " | cut -d"=" -f2 | cut -d"," -f1 | awk '{$1=$1;print}'`
@MIT else
Expand All @@ -1099,7 +1116,7 @@ endif
@MIT sed -i "s/nIter0.*/ nIter0 = ${nIter0},/" data
@MIT endif
@MIT endif
@MIT
@MIT
@MIT cd ..
@MIT # ---------------------------------------------------
@MIT # End MITgcm restarts - before running GEOSgcm.x
Expand All @@ -1114,7 +1131,7 @@ setenv OMP_NUM_THREADS 1
if( $USE_SHMEM == 1 ) $GEOSBIN/RmShmKeys_sshmpi.csh >& /dev/null

if( $USE_IOSERVER == 1 ) then
set IOSERVER_OPTIONS = "--npes_model $MODEL_NPES --nodes_output_server $NUM_OSERVER_NODES"
set IOSERVER_OPTIONS = "--npes_model $MODEL_NPES "
set IOSERVER_EXTRA = "--oserver_type multigroup --npes_backend_pernode $NUM_BACKEND_PES"
else
set IOSERVER_OPTIONS = ""
Expand All @@ -1136,29 +1153,29 @@ echo GEOSgcm Run Status: $rc
@MIT # ---------------------------------------------------
@MIT # For MITgcm restarts - after running GEOSgcm.x
@MIT # ---------------------------------------------------
@MIT
@MIT
@MIT set STEADY_STATE_OCEAN=`grep STEADY_STATE_OCEAN AGCM.rc | cut -d':' -f2 | tr -d " "`
@MIT
@MIT
@MIT # update ocean only if activated. Otherwize use the same pickups (passive ocean).
@MIT if ( ${STEADY_STATE_OCEAN} != 0 ) then
@MIT
@MIT
@MIT if ( ${rc} == 0 ) then
@MIT
@MIT
@MIT # Update nIter0 for next segment
@MIT set znIter00 = `echo $nIter0 | awk '{printf("%010d",$1)}'`
@MIT @ nIter0 = $nIter0 + $mit_nTimeSteps
@MIT set znIter0 = `echo $nIter0 | awk '{printf("%010d",$1)}'`
@MIT
@MIT
@MIT # to update MITgcm restart list file
@MIT sed -i "/${nIter0}/d" ${EXPDIR}/restarts/MITgcm_restart_dates.txt
@MIT echo "Date_GEOS5 $nymdf $nhmsf NITER0_MITgcm ${nIter0}" >> ${EXPDIR}/restarts/MITgcm_restart_dates.txt
@MIT
@MIT
@MIT /bin/mv $SCRDIR/mitocean_run/STDOUT.0000 $EXPDIR/mit_output/STDOUT.${znIter00}
@MIT
@MIT
@MIT endif
@MIT
@MIT
@MIT cd $SCRDIR/mitocean_run
@MIT
@MIT
@MIT # Check existance of roling pickups
@MIT set nonomatch rp = ( pickup*ckptA* )
@MIT echo $rp
Expand All @@ -1172,7 +1189,7 @@ echo GEOSgcm Run Status: $rc
@MIT /bin/mv ${fname} $EXPDIR/restarts/${bname}.${timeStepNumber}.${aname}
@MIT end
@MIT endif
@MIT
@MIT
@MIT # Check existance of permanent pickups
@MIT set nonomatch pp = ( pickup* )
@MIT echo $pp
Expand All @@ -1182,39 +1199,39 @@ echo GEOSgcm Run Status: $rc
@MIT if ( ! -e $EXPDIR/restarts/${fname} ) /bin/mv ${fname} $EXPDIR/restarts/${fname}
@MIT end
@MIT endif
@MIT
@MIT
@MIT /bin/mv T.* $EXPDIR/mit_output/
@MIT /bin/mv S.* $EXPDIR/mit_output/
@MIT /bin/mv U.* $EXPDIR/mit_output/
@MIT /bin/mv V.* $EXPDIR/mit_output/
@MIT /bin/mv W.* $EXPDIR/mit_output/
@MIT /bin/mv PH* $EXPDIR/mit_output/
@MIT /bin/mv Eta.* $EXPDIR/mit_output/
@MIT
@MIT
@MIT /bin/mv AREA.* $EXPDIR/mit_output/
@MIT /bin/mv HEFF.* $EXPDIR/mit_output/
@MIT /bin/mv HSNOW.* $EXPDIR/mit_output/
@MIT /bin/mv UICE.* $EXPDIR/mit_output/
@MIT /bin/mv VICE.* $EXPDIR/mit_output/
@MIT
@MIT
@MIT #copy mit output to mit_output
@MIT foreach i (`grep -i filename data.diagnostics | grep "^ " | cut -d"=" -f2 | cut -d"'" -f2 | awk '{$1=$1;print}'`)
@MIT /bin/mv ${i}* $EXPDIR/mit_output/
@MIT end
@MIT
@MIT
@MIT foreach i (`grep -i stat_fName data.diagnostics | grep "^ " | cut -d"=" -f2 | cut -d"'" -f2 | awk '{$1=$1;print}'`)
@MIT /bin/mv ${i}* $EXPDIR/mit_output/
@MIT end
@MIT
@MIT
@MIT cd $SCRDIR
@MIT
@MIT
@MIT endif
@MIT
@MIT
@MIT # ---------------------------------------------------
@MIT # End MITgcm restarts - after running GEOSgcm.x
@MIT # ---------------------------------------------------


#######################################################################
# Rename Final Checkpoints => Restarts for Next Segment and Archive
# Note: cap_restart contains the current NYMD and NHMS
Expand Down
Loading