From 6428dce98da535db4c0770ebc931ae9feb0a3e31 Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Tue, 28 Sep 2021 15:16:20 +0100 Subject: [PATCH] Harden TERAPI tests. - Sometimes when a test failed, it left running processes behind. I made sure that the trap command and cleanup functions are defined BEFORE we start tc_server or ABIN. Now this should behave better, even if we manually stop running test suite in the middle. - Bump MPICH version in GitHub CI --- .github/workflows/gfortran.yml | 2 +- dev_scripts/install_mpich.sh | 2 +- dev_scripts/install_openmpi.sh | 2 +- dev_scripts/install_pfunit.sh | 7 ++++++- dev_scripts/install_plumed.sh | 2 +- src/modules.F90 | 3 ++- tests/MINI/input.in | 5 ++--- tests/TERAPI-FAILS/test.sh | 2 -- tests/TERAPI-FAILS/test1.sh | 25 ++++++++++++------------- tests/TERAPI-FAILS/test2.sh | 26 +++++++++++++------------- tests/TERAPI-FAILS/test3.sh | 24 +++++++++++++----------- tests/TERAPI-FAILS/test4.sh | 25 ++++++++++++------------- tests/TERAPI-FAILS/test5.sh | 23 ++++++++++------------- tests/TERAPI-FAILS/test6.sh | 20 ++++++++++---------- tests/TERAPI-FAILS/test7.sh | 21 ++++++++++----------- tests/TERAPI-FAILS/test8.sh | 25 ++++++++++++------------- tests/TERAPI-PIMD-PARALLEL/test.sh | 27 +++++++++++---------------- tests/TERAPI-PIMD/dipoles.dat.ref | 9 +++++++++ tests/TERAPI-PIMD/test.sh | 24 +++++++++--------------- tests/TERAPI-REMD/test.sh | 13 +++++++------ tests/TERAPI/dipoles.dat.ref | 2 +- tests/TERAPI/test.sh | 19 +++++++------------ tests/test.sh | 5 ++--- tests/test_tc_server_utils.sh | 21 +++++++++++---------- utils/r.terabin | 4 ++-- 25 files changed, 165 insertions(+), 173 deletions(-) create mode 100644 tests/TERAPI-PIMD/dipoles.dat.ref diff --git a/.github/workflows/gfortran.yml b/.github/workflows/gfortran.yml index 88699e09..514f27a7 100644 --- a/.github/workflows/gfortran.yml +++ b/.github/workflows/gfortran.yml @@ -179,7 +179,7 @@ jobs: fail-fast: false matrix: gcc_v: [7, 9, 10] - mpich_v: ["3.3.2", "3.4.1"] + mpich_v: ["3.3.2", "3.4.2"] env: # To speed-up MPICH build CFLAGS: -O0 diff --git a/dev_scripts/install_mpich.sh b/dev_scripts/install_mpich.sh index e3161138..601d2f42 100755 --- a/dev_scripts/install_mpich.sh +++ b/dev_scripts/install_mpich.sh @@ -8,7 +8,7 @@ set -euo pipefail # Path as an optional first parameter -MPICH_DIR="${1-/home/$USER/mpich}" +MPICH_DIR="${1-$HOME/mpich}" # We take current stable version as default # (as of 06 Nov 2020). MPICH_VERSION="${2-"3.3.2"}" diff --git a/dev_scripts/install_openmpi.sh b/dev_scripts/install_openmpi.sh index 0e9236a9..85d46e41 100755 --- a/dev_scripts/install_openmpi.sh +++ b/dev_scripts/install_openmpi.sh @@ -8,7 +8,7 @@ set -euo pipefail # Path as an optional first parameter -OPENMPI_DIR="${1-/home/$USER/openmpi}" +OPENMPI_DIR="${1-$HOME/openmpi}" # We take current stable version as default # (as of 06 Nov 2020) OPENMPI_VERSION=${2-"4.0"} diff --git a/dev_scripts/install_pfunit.sh b/dev_scripts/install_pfunit.sh index 09e297f6..4bdf6e11 100755 --- a/dev_scripts/install_pfunit.sh +++ b/dev_scripts/install_pfunit.sh @@ -3,11 +3,16 @@ # Exit script immediately upon error set -euo pipefail -REPO_DIR="/home/$USER/pfunit" +REPO_DIR="$HOME/pfunit" if [[ "$#" -eq 1 && ! -z $1 ]];then REPO_DIR=$1 fi +if [[ -e $REPO_DIR ]];then + echo "ERROR: $REPO_DIR already exists." + exit 1 +fi + git clone --recursive https://github.com/Goddard-Fortran-Ecosystem/pFUnit $REPO_DIR && cd $REPO_DIR mkdir -p build && cd build diff --git a/dev_scripts/install_plumed.sh b/dev_scripts/install_plumed.sh index ea107616..ce7ac4f0 100755 --- a/dev_scripts/install_plumed.sh +++ b/dev_scripts/install_plumed.sh @@ -8,7 +8,7 @@ set -euo pipefail # Path as an optional first parameter -PLUMED_DIR="${1-/home/$USER/plumed}" +PLUMED_DIR="${1-$HOME/plumed}" # We take the current stable version as default # (as of 17 Dec 2020) PLUMED_VERSION="${2-"2.6.2"}" diff --git a/src/modules.F90 b/src/modules.F90 index 00597b66..2e98c5ef 100644 --- a/src/modules.F90 +++ b/src/modules.F90 @@ -443,6 +443,7 @@ end subroutine mass_init end module mod_system ! module for permanent file handling +! TODO: Move this to a separate file. module mod_files implicit none public @@ -612,7 +613,7 @@ subroutine files_init(isbc, phase, ndist, nang, ndih) write (UCHARGES, *) '# Time_step Bead_index ', (names(i), i=1, natom) open (UDIP, file=chfiles(UDIP), access=chaccess, action='write') - write (UDIP, *) '# Time |D| Dx Dy Dz' + write (UDIP, *) '# Time Bead_index |D| Dx Dy Dz' end if if (isbc == 1) then diff --git a/tests/MINI/input.in b/tests/MINI/input.in index 93c89eec..16c00151 100644 --- a/tests/MINI/input.in +++ b/tests/MINI/input.in @@ -1,5 +1,4 @@ -This is a sample input file for ABIN -NOTE: order of input sections matters!! +Very naive steepest descent minimization. &general nstep=100, @@ -8,6 +7,7 @@ irest=0, ! should we restart from restart.xyz? (ignoring mini.dat), NOT working pot='mmwater' !not done yet,options should be g09,orca,tera,turbo,molpro,nab,harm,morse,guillot,2dho ipimd=3, !classical simulation 0, quantum simulation 1 nwalk=1, !number of random walkers +! TODO: Reduce the number of steps to reduce numerical differences between compilers. nstep=200, !equilibration period,!not properly implemented yet istage=0, !staging transformation (1), without staging (0) @@ -27,4 +27,3 @@ natom=3, !number of atoms inose=0, ! Thermostating: Nose-Hoover 1, microcanonical 0,GLE 2, LE 3 temp=298.15, ! initial temperature for Maxwell-Boltzmann sampling [au] / - diff --git a/tests/TERAPI-FAILS/test.sh b/tests/TERAPI-FAILS/test.sh index 1e34f731..440232a0 100755 --- a/tests/TERAPI-FAILS/test.sh +++ b/tests/TERAPI-FAILS/test.sh @@ -34,8 +34,6 @@ check_for_openmpi # Compile default TC server $MPICXX $TCSRC -Wall -o $TCEXE -#trap cleanup INT ABRT TERM EXIT - echo "########### SUBTEST 1 ###################" ./test1.sh echo "########### SUBTEST 2 ###################" diff --git a/tests/TERAPI-FAILS/test1.sh b/tests/TERAPI-FAILS/test1.sh index e7a48609..ff527f05 100755 --- a/tests/TERAPI-FAILS/test1.sh +++ b/tests/TERAPI-FAILS/test1.sh @@ -8,6 +8,16 @@ set -euo pipefail source ../test_tc_server_utils.sh +# Rewrite cleanup function from utils sourced above. +function cleanup { + kill -9 $tcpid $abinpid > /dev/null 2>&1 || true + grep 'what()' $TCOUT > TC_ERROR$IDX + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=1 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX @@ -20,28 +30,17 @@ $MPICXX $TCSRC -Wall -o $TCEXE launch_hydra_nameserver $MPICH_HYDRA -hostname=$HOSTNAME -MPIRUN="$MPIRUN -nameserver $hostname -n 1" +MPIRUN="$MPIRUN -nameserver $HOSTNAME -n 1" TC_PORT="test1.$$" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM -M $TC_PORT" TC_CMD="./$TCEXE $TC_PORT.1" +trap cleanup INT ABRT TERM EXIT $MPIRUN $TC_CMD > $TCOUT 2>&1 || true & tcpid=$! $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 || true & abinpid=$! -function cleanup { - kill -9 $tcpid $abinpid > /dev/null 2>&1 || true - grep 'what()' $TCOUT > TC_ERROR$IDX - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes $abinpid $tcpid diff --git a/tests/TERAPI-FAILS/test2.sh b/tests/TERAPI-FAILS/test2.sh index b6cab508..5a89f4b4 100755 --- a/tests/TERAPI-FAILS/test2.sh +++ b/tests/TERAPI-FAILS/test2.sh @@ -8,34 +8,34 @@ set -euo pipefail source ../test_tc_server_utils.sh +function cleanup { + kill -9 $tcpid $abinpid > /dev/null 2>&1 || true + grep 'what()' $TCOUT > TC_ERROR$IDX + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=2 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX TCOUT=${TCOUT}$IDX launch_hydra_nameserver $MPICH_HYDRA -hostname=$HOSTNAME -MPIRUN="$MPIRUN -nameserver $hostname -n 1" + +MPIRUN="$MPIRUN -nameserver $HOSTNAME -n 1" TC_PORT="test$IDX.$$" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM -M $TC_PORT" TC_CMD="./$TCEXE $TC_PORT.1" +trap cleanup INT ABRT TERM EXIT + $MPIRUN $TC_CMD > $TCOUT 2>&1 || true & tcpid=$! $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 || true & abinpid=$! -function cleanup { - kill -9 $tcpid $abinpid > /dev/null 2>&1 || true - grep 'what()' $TCOUT > TC_ERROR$IDX - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes $abinpid $tcpid diff --git a/tests/TERAPI-FAILS/test3.sh b/tests/TERAPI-FAILS/test3.sh index 78c5d724..b1222383 100755 --- a/tests/TERAPI-FAILS/test3.sh +++ b/tests/TERAPI-FAILS/test3.sh @@ -5,6 +5,15 @@ set -euo pipefail source ../test_tc_server_utils.sh +function cleanup { + kill -9 ${job_pids[@]} > /dev/null 2>&1 || true + grep 'what()' $TCOUT.* > TC_ERROR$IDX + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=3 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX @@ -15,11 +24,15 @@ MPIRUN="$MPIRUN -n 1" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" declare -A job_pids + +trap cleanup INT ABRT TERM EXIT + for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { $MPIRUN ./$TCEXE > $TCOUT.$itera 2>&1 & job_pids[$itera]=$! } sleep 1 + # Grep port names from TC outputs, pass to ABIN via a file. for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { grep 'port name' $TCOUT.$itera | awk -F"port name: " '{print $2;exit}' > $TC_PORT_FILE.$itera @@ -28,15 +41,4 @@ for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 & job_pids[$(expr $N_TERA_SERVERS + 1)]=$! -function cleanup { - kill -9 ${job_pids[@]} > /dev/null 2>&1 || true - grep 'what()' $TCOUT.* > TC_ERROR$IDX - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes ${job_pids[@]} diff --git a/tests/TERAPI-FAILS/test4.sh b/tests/TERAPI-FAILS/test4.sh index 0cd8b8c0..3729b7b5 100755 --- a/tests/TERAPI-FAILS/test4.sh +++ b/tests/TERAPI-FAILS/test4.sh @@ -8,6 +8,15 @@ set -euo pipefail source ../test_tc_server_utils.sh +function cleanup { + kill -9 $tcpid $abinpid > /dev/null 2>&1 || true + grep 'what()' $TCOUT > TC_ERROR$IDX + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=4 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX @@ -20,28 +29,18 @@ $MPICXX $TCSRC -Wall -o $TCEXE launch_hydra_nameserver $MPICH_HYDRA -hostname=$HOSTNAME -MPIRUN="$MPIRUN -nameserver $hostname -n 1" +MPIRUN="$MPIRUN -nameserver $HOSTNAME -n 1" TC_PORT="test$IDX.$$" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM -M $TC_PORT" TC_CMD="./$TCEXE $TC_PORT.1" +trap cleanup INT ABRT TERM EXIT + $MPIRUN $TC_CMD > $TCOUT 2>&1 || true & tcpid=$! $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 || true & abinpid=$! -function cleanup { - kill -9 $tcpid $abinpid > /dev/null 2>&1 || true - grep 'what()' $TCOUT > TC_ERROR$IDX - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes $abinpid $tcpid diff --git a/tests/TERAPI-FAILS/test5.sh b/tests/TERAPI-FAILS/test5.sh index 93f887cc..3577b8d4 100755 --- a/tests/TERAPI-FAILS/test5.sh +++ b/tests/TERAPI-FAILS/test5.sh @@ -7,29 +7,26 @@ set -euo pipefail source ../test_tc_server_utils.sh +function cleanup { + kill -9 $abinpid > /dev/null 2>&1 || true + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=5 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX launch_hydra_nameserver $MPICH_HYDRA -hostname=$HOSTNAME -#MPIRUN="$MPIRUN -nameserver $hostname -n 1" - TC_PORT="test$IDX.$$" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM -M $TC_PORT" +trap cleanup INT ABRT TERM EXIT + $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 || true & abinpid=$! -function cleanup { - kill -9 $abinpid > /dev/null 2>&1 || true - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes $abinpid diff --git a/tests/TERAPI-FAILS/test6.sh b/tests/TERAPI-FAILS/test6.sh index 84ed4394..615a51a5 100755 --- a/tests/TERAPI-FAILS/test6.sh +++ b/tests/TERAPI-FAILS/test6.sh @@ -7,6 +7,14 @@ set -euo pipefail source ../test_tc_server_utils.sh +function cleanup { + kill -9 $abinpid > /dev/null 2>&1 || true + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=6 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX @@ -17,17 +25,9 @@ MPIRUN="$MPIRUN -n 1" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" +trap cleanup INT ABRT TERM EXIT + $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 || true & abinpid=$! -function cleanup { - kill -9 $abinpid > /dev/null 2>&1 || true - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes $abinpid diff --git a/tests/TERAPI-FAILS/test7.sh b/tests/TERAPI-FAILS/test7.sh index ffea5789..7ed7cb80 100755 --- a/tests/TERAPI-FAILS/test7.sh +++ b/tests/TERAPI-FAILS/test7.sh @@ -8,6 +8,15 @@ set -euo pipefail source ../test_tc_server_utils.sh +function cleanup { + kill -9 $abinpid > /dev/null 2>&1 || true + rm -f port.txt.1 + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=7 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX @@ -18,19 +27,9 @@ MPIRUN="$MPIRUN -n 1" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" +trap cleanup INT ABRT TERM EXIT $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 || true & abinpid=$! -function cleanup { - kill -9 $abinpid > /dev/null 2>&1 || true - rm -f port.txt.1 - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes $abinpid diff --git a/tests/TERAPI-FAILS/test8.sh b/tests/TERAPI-FAILS/test8.sh index b0af1b9c..0f31e071 100755 --- a/tests/TERAPI-FAILS/test8.sh +++ b/tests/TERAPI-FAILS/test8.sh @@ -8,6 +8,15 @@ set -euo pipefail source ../test_tc_server_utils.sh +function cleanup { + kill -9 $tcpid $abinpid > /dev/null 2>&1 || true + grep 'what()' $TCOUT > TC_ERROR$IDX + if [[ -f ERROR ]];then + mv ERROR ABIN_ERROR$IDX + fi + exit 0 +} + IDX=8 ABININ=input.in$IDX ABINOUT=${ABINOUT}$IDX @@ -20,28 +29,18 @@ $MPICXX $TCSRC -Wall -o $TCEXE launch_hydra_nameserver $MPICH_HYDRA -hostname=$HOSTNAME -MPIRUN="$MPIRUN -nameserver $hostname -n 1" +MPIRUN="$MPIRUN -nameserver $HOSTNAME -n 1" TC_PORT="test$IDX.$$" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM -M $TC_PORT" TC_CMD="./$TCEXE $TC_PORT.1" +trap cleanup INT ABRT TERM EXIT + $MPIRUN $TC_CMD > $TCOUT 2>&1 || true & tcpid=$! $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 || true & abinpid=$! -function cleanup { - kill -9 $tcpid $abinpid > /dev/null 2>&1 || true - grep 'what()' $TCOUT > TC_ERROR$IDX - if [[ -f ERROR ]];then - mv ERROR ABIN_ERROR$IDX - fi - exit 0 -} - -trap cleanup INT ABRT TERM EXIT - check_running_processes $abinpid $tcpid diff --git a/tests/TERAPI-PIMD-PARALLEL/test.sh b/tests/TERAPI-PIMD-PARALLEL/test.sh index 4cdabed3..3112e518 100755 --- a/tests/TERAPI-PIMD-PARALLEL/test.sh +++ b/tests/TERAPI-PIMD-PARALLEL/test.sh @@ -1,7 +1,5 @@ #/bin/bash set -euo pipefail -# Useful for debugging -#set -x ABINEXE=$1 source ../test_tc_server_utils.sh @@ -27,35 +25,32 @@ $MPICXX $TCSRC -Wall -o $TCEXE # https://github.com/pmodels/mpich/issues/5058 # # Therefore, we pass the port_name to ABIN via files, see below. -#TC_SERVER_NAME="tcserver.$$" -#launch_hydra_nameserver $MPICH_HYDRA -#hostname=$HOSTNAME -#MPIRUN="$MPIRUN -nameserver $hostname -n 1" MPIRUN="$MPIRUN -n 1" -ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" # -M $TC_SERVER_NAME" +ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" -let NUM_JOBS=N_TERA_SERVERS+1 declare -A job_pids + +function cleanup { + kill -9 ${job_pids[@]} > /dev/null 2>&1 || true + exit 0 +} + +trap cleanup INT ABRT TERM EXIT + for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { - #$MPIRUN ./$TCEXE $TC_SERVER_NAME.$itera > $TCOUT.$itera 2>&1 & $MPIRUN ./$TCEXE > $TCOUT.$itera 2>&1 & job_pids[$itera]=$! } sleep 1 + # Grep port names from TC output, pass to ABIN via a file. for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { grep 'port name' $TCOUT.$itera | awk -F"port name: " '{print $2;exit}' > $TC_PORT_FILE.$itera } $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 & -job_pids[$NUM_JOBS]=$! - -function cleanup { - kill -9 ${job_pids[@]} > /dev/null 2>&1 || true - exit 0 -} +job_pids[abin]=$! -trap cleanup INT ABRT TERM EXIT check_running_processes ${job_pids[@]} diff --git a/tests/TERAPI-PIMD/dipoles.dat.ref b/tests/TERAPI-PIMD/dipoles.dat.ref new file mode 100644 index 00000000..f7eee879 --- /dev/null +++ b/tests/TERAPI-PIMD/dipoles.dat.ref @@ -0,0 +1,9 @@ + # Time Bead_index |D| Dx Dy Dz + 0 1 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 + 0 2 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 + 0 3 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 + 0 4 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 + 1 1 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 + 1 2 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 + 1 3 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 + 1 4 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 diff --git a/tests/TERAPI-PIMD/test.sh b/tests/TERAPI-PIMD/test.sh index 4cdabed3..8e69529a 100755 --- a/tests/TERAPI-PIMD/test.sh +++ b/tests/TERAPI-PIMD/test.sh @@ -1,7 +1,5 @@ #/bin/bash set -euo pipefail -# Useful for debugging -#set -x ABINEXE=$1 source ../test_tc_server_utils.sh @@ -18,7 +16,7 @@ N_TERA_SERVERS=$(egrep --only-matching 'nteraservers\s*=\s*[0-9]' $ABININ | egre # Exit early for OpenMPI build. check_for_openmpi -# Compiled the fake TC server +# Compile fake TC server $MPICXX $TCSRC -Wall -o $TCEXE # NOTE: We very intentionally do NOT launch @@ -27,23 +25,25 @@ $MPICXX $TCSRC -Wall -o $TCEXE # https://github.com/pmodels/mpich/issues/5058 # # Therefore, we pass the port_name to ABIN via files, see below. -#TC_SERVER_NAME="tcserver.$$" -#launch_hydra_nameserver $MPICH_HYDRA -#hostname=$HOSTNAME -#MPIRUN="$MPIRUN -nameserver $hostname -n 1" MPIRUN="$MPIRUN -n 1" -ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" # -M $TC_SERVER_NAME" +ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" + +function cleanup { + kill -9 ${job_pids[@]} > /dev/null 2>&1 || true + exit 0 +} +trap cleanup INT ABRT TERM EXIT let NUM_JOBS=N_TERA_SERVERS+1 declare -A job_pids for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { - #$MPIRUN ./$TCEXE $TC_SERVER_NAME.$itera > $TCOUT.$itera 2>&1 & $MPIRUN ./$TCEXE > $TCOUT.$itera 2>&1 & job_pids[$itera]=$! } sleep 1 + # Grep port names from TC output, pass to ABIN via a file. for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { grep 'port name' $TCOUT.$itera | awk -F"port name: " '{print $2;exit}' > $TC_PORT_FILE.$itera @@ -52,10 +52,4 @@ for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { $MPIRUN $ABIN_CMD > $ABINOUT 2>&1 & job_pids[$NUM_JOBS]=$! -function cleanup { - kill -9 ${job_pids[@]} > /dev/null 2>&1 || true - exit 0 -} - -trap cleanup INT ABRT TERM EXIT check_running_processes ${job_pids[@]} diff --git a/tests/TERAPI-REMD/test.sh b/tests/TERAPI-REMD/test.sh index d6bcfa81..b44eb512 100755 --- a/tests/TERAPI-REMD/test.sh +++ b/tests/TERAPI-REMD/test.sh @@ -1,6 +1,11 @@ #/bin/bash set -euo pipefail +function local_cleanup { + kill -9 ${job_pids[@]} > /dev/null 2>&1 || true + exit 0 +} + ABINEXE=$1 source ../test_tc_server_utils.sh @@ -27,6 +32,8 @@ TC_SERVER_NAME="tcserver.$$" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" # -M $TC_SERVER_NAME" +trap local_cleanup INT ABRT TERM EXIT + let NUM_JOBS=N_TERA_SERVERS+1 declare -A job_pids for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { @@ -42,10 +49,4 @@ for ((itera=1;itera<=N_TERA_SERVERS;itera++)) { $MPIRUN -np 3 $ABINEXE -i input.in -x mini.xyz -v vel0.in > $ABINOUT 2>&1 job_pids[$NUM_JOBS]=$! -function cleanup { - kill -9 ${job_pids[@]} > /dev/null 2>&1 || true - exit 0 -} - -trap cleanup INT ABRT TERM EXIT check_running_processes ${job_pids[@]} diff --git a/tests/TERAPI/dipoles.dat.ref b/tests/TERAPI/dipoles.dat.ref index 50dadf46..8e7e6077 100644 --- a/tests/TERAPI/dipoles.dat.ref +++ b/tests/TERAPI/dipoles.dat.ref @@ -1,3 +1,3 @@ - # Time |D| Dx Dy Dz + # Time Bead_index |D| Dx Dy Dz 0 1 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 1 1 0.3742E-01 -0.1000E-01 -0.2000E-01 -0.3000E-01 diff --git a/tests/TERAPI/test.sh b/tests/TERAPI/test.sh index cae7150e..a755ed02 100755 --- a/tests/TERAPI/test.sh +++ b/tests/TERAPI/test.sh @@ -1,7 +1,5 @@ #/bin/bash set -euo pipefail -# Useful for debugging -#set -x ABINEXE=$1 source ../test_tc_server_utils.sh @@ -22,25 +20,22 @@ $MPICXX $TCSRC -Wall -o $TCEXE launch_hydra_nameserver $MPICH_HYDRA -hostname=$HOSTNAME -MPIRUN="$MPIRUN -nameserver $hostname -n 1" +MPIRUN="$MPIRUN -nameserver $HOSTNAME -n 1" TC_PORT="tcport.$$" ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM -M $TC_PORT" TC_CMD="./$TCEXE $TC_PORT.1" -$MPIRUN $TC_CMD > $TCOUT 2>&1 & -tcpid=$! - -$MPIRUN $ABIN_CMD > $ABINOUT 2>&1 & -abinpid=$! - function cleanup { kill -9 $tcpid $abinpid > /dev/null 2>&1 || true - #kill -9 $tcpid $abinpid $hydrapid > /dev/null 2>&1 || true exit 0 } - trap cleanup INT ABRT TERM EXIT +$MPIRUN $TC_CMD > $TCOUT 2>&1 & +tcpid=$! + +$MPIRUN $ABIN_CMD > $ABINOUT 2>&1 & +abinpid=$! + check_running_processes $abinpid $tcpid diff --git a/tests/test.sh b/tests/test.sh index d9fc2735..86099404 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -211,9 +211,8 @@ do continue fi - # For special cases such as REMD, we need a more complicated - # test setup. If a file, test.sh is present in the test directory, - # we will use it. + # For special cases such as REMD, we need a more complicated test setup. + # If a file 'test.sh' is present in the test directory we will use it. if [[ -f "test.sh" ]];then # Redirection to dev/null apparently needed for CP2K tests. diff --git a/tests/test_tc_server_utils.sh b/tests/test_tc_server_utils.sh index 1931885e..4e8a06b1 100644 --- a/tests/test_tc_server_utils.sh +++ b/tests/test_tc_server_utils.sh @@ -90,19 +90,19 @@ clean_output_files() { } -# Sillently kill all processes whose PIDs -# are passed as parameters. Note that some of -# them could have already ended sucessfully. +# Sillently kill all processes whose PIDs are passed as parameters. +# Typically, all these processes should have already ended. kill_processes() { kill -9 $* > /dev/null 2>&1 || true } -# Not that it is hard in general to know -# whether we ended successfully or not, -# so we always return 0. Validation is then -# always done on the output files. +# NOTE that this function will typically get overwritten +# in the TERAPI*/test.sh scripts. cleanup() { - kill_processeses $* + kill_processes $* + # It is hard in general to know whether we ended successfully or not, + # so we always return 0. Validation is then + # always done on the output files. exit 0 } @@ -128,7 +128,6 @@ check_running_processes() { while true;do running=$(ps -eo pid | egrep "$regex" | wc -l) if [[ $running -eq 0 ]];then - #echo "Both ABIN and TeraChem servers stopped" break elif [[ $running -lt $num_jobs ]];then # Give the others time to finish @@ -136,7 +135,9 @@ check_running_processes() { running=$(ps -eo pid | egrep "$regex" | wc -l) if [[ $running -ne 0 ]];then echo "One of the TC servers or ABIN died. Killing the rest." - cat ${ABINOUT}* ${TCOUT}* + #echo "Printing ABIN and TC server outputs for debugging." + #echo "##################################################" + #cat ${ABINOUT}* ${TCOUT}* fi break fi diff --git a/utils/r.terabin b/utils/r.terabin index e40a37f9..7c03a807 100755 --- a/utils/r.terabin +++ b/utils/r.terabin @@ -158,10 +158,10 @@ validate_inputs parse_inputs # SET THE ENVIRONMENT -set +ux +set +u source SetEnvironment.sh TERACHEM source SetEnvironment.sh ABIN -set -ux +set -u if [[ $N_TERA_SERVERS -gt 1 ]];then # hydra_nameserver <= v3.4.1 does not work with multiple servers.