diff --git a/conda/configure-balsam-test.sh b/conda/configure-balsam-test.sh index a284fd92e..56377aa5f 100755 --- a/conda/configure-balsam-test.sh +++ b/conda/configure-balsam-test.sh @@ -19,11 +19,11 @@ export THIS_DIR=$PWD export SCRIPT_BASENAME=script_test_balsam_hworld # Set proper permissions, initialize Balsam DB, activate DB -export BALSAM_DB_PATH='~/test-balsam' +export BALSAM_DB_PATH=$HOME/test-balsam sudo chown -R postgres:postgres /var/run/postgresql sudo chmod a+w /var/run/postgresql -balsam init ~/test-balsam -sudo chmod -R 700 ~/test-balsam/balsamdb +balsam init $HOME/test-balsam +sudo chmod -R 700 $HOME/test-balsam/balsamdb source balsamactivate test-balsam # Refresh DB diff --git a/conda/test_balsam_hworld.py b/conda/test_balsam_hworld.py index 900301bd8..bb728b3d7 100644 --- a/conda/test_balsam_hworld.py +++ b/conda/test_balsam_hworld.py @@ -1,15 +1,15 @@ import subprocess import os import time -import sys import libensemble -from libensemble.tests.regression_tests.common import modify_Balsam_worker +from libensemble.tests.regression_tests.common import modify_Balsam_worker, modify_Balsam_hostprint # TESTSUITE_COMMS: local # TESTSUITE_NPROCS: 3 -# This test is NOT submitted as a job to Balsam. Instead, script_test_balsam_hworld.py -# This test executes that job through the 'runstr' line in run_Balsam_job() +# This test is NOT submitted as a job to Balsam. script_test_balsam_hworld.py is +# the executable submitted to Balsam as a job. This test executes that job +# through the 'runstr' line in run_Balsam_job() def run_Balsam_job(): @@ -27,8 +27,7 @@ def wait_for_job_dir(basedb): print('Waiting for Job Directory'.format(sleeptime)) while len(os.listdir(basedb)) == 0 and sleeptime < 15: - print('{}'.format(sleeptime), end=" ") - sys.stdout.flush() + print(sleeptime, end=" ", flush=True) time.sleep(1) sleeptime += 1 @@ -44,10 +43,9 @@ def wait_for_job_output(jobdir): print('Checking for Balsam output file: {}'.format(output)) while not os.path.isfile(output) and sleeptime < 30: - print('{}'.format(sleeptime), end=" ") - sys.stdout.flush() - time.sleep(2) - sleeptime += 2 + print(sleeptime, end=" ", flush=True) + time.sleep(1) + sleeptime += 1 return output @@ -56,7 +54,9 @@ def print_job_output(outscript): sleeptime = 0 print('Output file found. Waiting for complete Balsam Job Output.') - lastlines = ['Job 4 done on worker 1\n', 'Job 4 done on worker 2\n'] + lastlines = ['Job 4 done on worker 1\n', 'Job 4 done on worker 2\n', + 'Run completed.\n'] + lastposition = 0 while sleeptime < 60: @@ -66,10 +66,9 @@ def print_job_output(outscript): lastposition = f.tell() if len(new) > 0: - print(new) + print(new, flush=True) else: - print('{}'.format(sleeptime), end=" ") - sys.stdout.flush() + print(sleeptime, end=" ", flush=True) if any(new.endswith(line) for line in lastlines): break @@ -92,13 +91,14 @@ def move_job_coverage(jobdir): if __name__ == '__main__': - # For Balsam-specific Coverage config file, to not evaluate Balsam data dir + # Used by Balsam Coverage config file. Dont evaluate Balsam data dir libepath = os.path.dirname(libensemble.__file__) os.environ['LIBE_PATH'] = libepath - basedb = os.path.expanduser('~/test-balsam/data/libe_test-balsam') + basedb = os.environ['HOME'] + '/test-balsam/data/libe_test-balsam' modify_Balsam_worker() + modify_Balsam_hostprint() run_Balsam_job() jobdir = wait_for_job_dir(basedb) @@ -107,3 +107,20 @@ def move_job_coverage(jobdir): move_job_coverage(jobdir) print('Test complete.') + + +# IN BALSAM LOG: + +# 11-Sep-2019 14:36:27|7301| ERROR|balsam:47] Uncaught Exception : Cooley WorkerGroup needs workers_file to setup +# Traceback (most recent call last): +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/launcher.py", line 443, in +# main(args) +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/launcher.py", line 422, in main +# launcher = Launcher(wf_filter, timelimit_min, gpus_per_node) +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/launcher.py", line 104, in __init__ +# self.worker_group = worker.WorkerGroup() +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/worker.py", line 50, in __init__ +# self.setup() +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/worker.py", line 112, in setup_COOLEY +# raise ValueError("Cooley WorkerGroup needs workers_file to setup") +# ValueError: Cooley WorkerGroup needs workers_file to setup diff --git a/libensemble/tests/regression_tests/common.py b/libensemble/tests/regression_tests/common.py index 65f8ff953..82b2bc49e 100644 --- a/libensemble/tests/regression_tests/common.py +++ b/libensemble/tests/regression_tests/common.py @@ -225,3 +225,43 @@ def modify_Balsam_pyCoverage(): with open(balsam_commands_path, 'w') as f: for line in lines: f.write(line) + + +def modify_Balsam_hostprint(): + # Also modify Balsam Worker & Worker Gropu to print Host type (for debugging + # purposes). Balsam test bug may be caused by setup_COOLEY() being called + # instead of setup_DEFAULT() within Balsam's worker.py + import balsam + + print_lines = {"host": " print('HOST TYPE: ', self.host_type)\n", + "COOLEY": " print('IN setup_COOLEY')\n", + "DEFAULT": " print('IN setup_DEFAULT')\n"} + + host_prior_lines = [" self.host_type = JobEnv.host_type\n", + " self.host_type = host_type\n"] + + setup_prior_lines = [" def setup_COOLEY(self):\n", + " def setup_DEFAULT(self):\n"] + + workerfile = 'worker.py' + balsam_path = os.path.dirname(balsam.__file__) + '/launcher' + balsam_worker_path = os.path.join(balsam_path, workerfile) + + with open(balsam_worker_path, 'r') as f: + lines = f.readlines() + + newlines = [] + for line in lines: + if line in print_lines.values(): + continue + newlines.append(line) # Line of code from prior + if line in host_prior_lines: # + newlines.append(print_lines['host']) + elif line == setup_prior_lines[0]: + newlines.append(print_lines['COOLEY']) + elif line == setup_prior_lines[1]: + newlines.append(print_lines['DEFAULT']) + + with open(balsam_worker_path, 'w') as f: + for line in newlines: + f.write(line) diff --git a/libensemble/tests/regression_tests/script_test_balsam_hworld.py b/libensemble/tests/regression_tests/script_test_balsam_hworld.py index fd9430229..16772441b 100644 --- a/libensemble/tests/regression_tests/script_test_balsam_hworld.py +++ b/libensemble/tests/regression_tests/script_test_balsam_hworld.py @@ -3,7 +3,6 @@ import os import numpy as np -import multiprocessing import mpi4py from mpi4py import MPI @@ -31,12 +30,6 @@ def build_simfunc(): is_master = MPI.COMM_WORLD.Get_rank() == 0 cores_per_job = 1 -logical_cores = multiprocessing.cpu_count() -cores_all_jobs = nworkers*cores_per_job - -if is_master: - print('\nCores req: {} Cores avail: {}\n'.format(cores_all_jobs, - logical_cores)) sim_app = './my_simjob.x' if not os.path.isfile(sim_app):