From 7be87e1c633763d3a3a7626efff01945fe16e4fe Mon Sep 17 00:00:00 2001 From: jlnav Date: Tue, 10 Sep 2019 10:03:16 -0500 Subject: [PATCH 1/9] use HOME env var instead of ~ or expanduser --- conda/configure-balsam-test.sh | 6 +++--- conda/test_balsam_hworld.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/conda/configure-balsam-test.sh b/conda/configure-balsam-test.sh index a284fd92e..56377aa5f 100755 --- a/conda/configure-balsam-test.sh +++ b/conda/configure-balsam-test.sh @@ -19,11 +19,11 @@ export THIS_DIR=$PWD export SCRIPT_BASENAME=script_test_balsam_hworld # Set proper permissions, initialize Balsam DB, activate DB -export BALSAM_DB_PATH='~/test-balsam' +export BALSAM_DB_PATH=$HOME/test-balsam sudo chown -R postgres:postgres /var/run/postgresql sudo chmod a+w /var/run/postgresql -balsam init ~/test-balsam -sudo chmod -R 700 ~/test-balsam/balsamdb +balsam init $HOME/test-balsam +sudo chmod -R 700 $HOME/test-balsam/balsamdb source balsamactivate test-balsam # Refresh DB diff --git a/conda/test_balsam_hworld.py b/conda/test_balsam_hworld.py index 900301bd8..94ec92d50 100644 --- a/conda/test_balsam_hworld.py +++ b/conda/test_balsam_hworld.py @@ -95,8 +95,9 @@ def move_job_coverage(jobdir): # For Balsam-specific Coverage config file, to not evaluate Balsam data dir libepath = os.path.dirname(libensemble.__file__) os.environ['LIBE_PATH'] = libepath + home = os.environ['HOME'] - basedb = os.path.expanduser('~/test-balsam/data/libe_test-balsam') + basedb = home + '/test-balsam/data/libe_test-balsam' modify_Balsam_worker() run_Balsam_job() From a592345bb230cceb70877999066027a1ed5f7844 Mon Sep 17 00:00:00 2001 From: jlnav Date: Tue, 10 Sep 2019 14:57:38 -0500 Subject: [PATCH 2/9] spring cleaning --- conda/test_balsam_hworld.py | 22 ++++++++----------- .../script_test_balsam_hworld.py | 8 +------ 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/conda/test_balsam_hworld.py b/conda/test_balsam_hworld.py index 94ec92d50..844847ef6 100644 --- a/conda/test_balsam_hworld.py +++ b/conda/test_balsam_hworld.py @@ -1,15 +1,15 @@ import subprocess import os import time -import sys import libensemble from libensemble.tests.regression_tests.common import modify_Balsam_worker # TESTSUITE_COMMS: local # TESTSUITE_NPROCS: 3 -# This test is NOT submitted as a job to Balsam. Instead, script_test_balsam_hworld.py -# This test executes that job through the 'runstr' line in run_Balsam_job() +# This test is NOT submitted as a job to Balsam. script_test_balsam_hworld.py is +# the executable submitted to Balsam as a job. This test executes that job +# through the 'runstr' line in run_Balsam_job() def run_Balsam_job(): @@ -27,8 +27,7 @@ def wait_for_job_dir(basedb): print('Waiting for Job Directory'.format(sleeptime)) while len(os.listdir(basedb)) == 0 and sleeptime < 15: - print('{}'.format(sleeptime), end=" ") - sys.stdout.flush() + print('{}'.format(sleeptime), end=" ", flush=True) time.sleep(1) sleeptime += 1 @@ -44,8 +43,7 @@ def wait_for_job_output(jobdir): print('Checking for Balsam output file: {}'.format(output)) while not os.path.isfile(output) and sleeptime < 30: - print('{}'.format(sleeptime), end=" ") - sys.stdout.flush() + print('{}'.format(sleeptime), end=" ", flush=True) time.sleep(2) sleeptime += 2 @@ -66,10 +64,9 @@ def print_job_output(outscript): lastposition = f.tell() if len(new) > 0: - print(new) + print(new, flush=True) else: - print('{}'.format(sleeptime), end=" ") - sys.stdout.flush() + print('{}'.format(sleeptime), end=" ", flush=True) if any(new.endswith(line) for line in lastlines): break @@ -92,12 +89,11 @@ def move_job_coverage(jobdir): if __name__ == '__main__': - # For Balsam-specific Coverage config file, to not evaluate Balsam data dir + # Used by Balsam Coverage config file. Dont evaluate Balsam data dir libepath = os.path.dirname(libensemble.__file__) os.environ['LIBE_PATH'] = libepath - home = os.environ['HOME'] - basedb = home + '/test-balsam/data/libe_test-balsam' + basedb = os.environ['HOME'] + '/test-balsam/data/libe_test-balsam' modify_Balsam_worker() run_Balsam_job() diff --git a/libensemble/tests/regression_tests/script_test_balsam_hworld.py b/libensemble/tests/regression_tests/script_test_balsam_hworld.py index fd9430229..e86ff3b82 100644 --- a/libensemble/tests/regression_tests/script_test_balsam_hworld.py +++ b/libensemble/tests/regression_tests/script_test_balsam_hworld.py @@ -31,12 +31,6 @@ def build_simfunc(): is_master = MPI.COMM_WORLD.Get_rank() == 0 cores_per_job = 1 -logical_cores = multiprocessing.cpu_count() -cores_all_jobs = nworkers*cores_per_job - -if is_master: - print('\nCores req: {} Cores avail: {}\n'.format(cores_all_jobs, - logical_cores)) sim_app = './my_simjob.x' if not os.path.isfile(sim_app): @@ -63,7 +57,7 @@ def build_simfunc(): persis_info = per_worker_stream({}, nworkers + 1) -exit_criteria = {'elapsed_wallclock_time': 30} +exit_criteria = {'elapsed_wallclock_time': 15} # Perform the run H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, From 965fa05ce190a8820972899db7d3c11ce6c950ba Mon Sep 17 00:00:00 2001 From: jlnav Date: Tue, 10 Sep 2019 16:41:48 -0500 Subject: [PATCH 3/9] adjusting prints --- conda/test_balsam_hworld.py | 14 ++++++++------ .../regression_tests/script_test_balsam_hworld.py | 3 +-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/conda/test_balsam_hworld.py b/conda/test_balsam_hworld.py index 844847ef6..c5cd6fb7c 100644 --- a/conda/test_balsam_hworld.py +++ b/conda/test_balsam_hworld.py @@ -27,7 +27,7 @@ def wait_for_job_dir(basedb): print('Waiting for Job Directory'.format(sleeptime)) while len(os.listdir(basedb)) == 0 and sleeptime < 15: - print('{}'.format(sleeptime), end=" ", flush=True) + print(sleeptime, end=" ", flush=True) time.sleep(1) sleeptime += 1 @@ -43,9 +43,9 @@ def wait_for_job_output(jobdir): print('Checking for Balsam output file: {}'.format(output)) while not os.path.isfile(output) and sleeptime < 30: - print('{}'.format(sleeptime), end=" ", flush=True) - time.sleep(2) - sleeptime += 2 + print(sleeptime, end=" ", flush=True) + time.sleep(1) + sleeptime += 1 return output @@ -54,7 +54,9 @@ def print_job_output(outscript): sleeptime = 0 print('Output file found. Waiting for complete Balsam Job Output.') - lastlines = ['Job 4 done on worker 1\n', 'Job 4 done on worker 2\n'] + lastlines = ['Job 4 done on worker 1\n', 'Job 4 done on worker 2\n', + 'Run completed.\n'] + lastposition = 0 while sleeptime < 60: @@ -66,7 +68,7 @@ def print_job_output(outscript): if len(new) > 0: print(new, flush=True) else: - print('{}'.format(sleeptime), end=" ", flush=True) + print(sleeptime, end=" ", flush=True) if any(new.endswith(line) for line in lastlines): break diff --git a/libensemble/tests/regression_tests/script_test_balsam_hworld.py b/libensemble/tests/regression_tests/script_test_balsam_hworld.py index e86ff3b82..16772441b 100644 --- a/libensemble/tests/regression_tests/script_test_balsam_hworld.py +++ b/libensemble/tests/regression_tests/script_test_balsam_hworld.py @@ -3,7 +3,6 @@ import os import numpy as np -import multiprocessing import mpi4py from mpi4py import MPI @@ -57,7 +56,7 @@ def build_simfunc(): persis_info = per_worker_stream({}, nworkers + 1) -exit_criteria = {'elapsed_wallclock_time': 15} +exit_criteria = {'elapsed_wallclock_time': 30} # Perform the run H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, From a1171030b8506560aa8758d3616b395f730dce51 Mon Sep 17 00:00:00 2001 From: jlnav Date: Wed, 11 Sep 2019 10:59:24 -0500 Subject: [PATCH 4/9] Modify Balsam to print hosttype --- conda/test_balsam_hworld.py | 21 +++++++++++++++++- libensemble/tests/regression_tests/common.py | 23 ++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/conda/test_balsam_hworld.py b/conda/test_balsam_hworld.py index c5cd6fb7c..cef3f1ed1 100644 --- a/conda/test_balsam_hworld.py +++ b/conda/test_balsam_hworld.py @@ -2,7 +2,7 @@ import os import time import libensemble -from libensemble.tests.regression_tests.common import modify_Balsam_worker +from libensemble.tests.regression_tests.common import modify_Balsam_worker, modify_Balsam_hostprint # TESTSUITE_COMMS: local # TESTSUITE_NPROCS: 3 @@ -98,6 +98,7 @@ def move_job_coverage(jobdir): basedb = os.environ['HOME'] + '/test-balsam/data/libe_test-balsam' modify_Balsam_worker() + modify_Balsam_hostprint() run_Balsam_job() jobdir = wait_for_job_dir(basedb) @@ -106,3 +107,21 @@ def move_job_coverage(jobdir): move_job_coverage(jobdir) print('Test complete.') + +# travis_run_before_install && travis_run_install && travis_run_before_script + +# 11-Sep-2019 14:36:27|7301| ERROR|balsam:47] Uncaught Exception : Cooley WorkerGroup needs workers_file to setup +# Traceback (most recent call last): +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/launcher.py", line 443, in +# main(args) +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/launcher.py", line 422, in main +# launcher = Launcher(wf_filter, timelimit_min, gpus_per_node) +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/launcher.py", line 104, in __init__ +# self.worker_group = worker.WorkerGroup() +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/worker.py", line 50, in __init__ +# self.setup() +# File "/home/travis/build/Libensemble/balsam/balsam/launcher/worker.py", line 112, in setup_COOLEY +# raise ValueError("Cooley WorkerGroup needs workers_file to setup") +# ValueError: Cooley WorkerGroup needs workers_file to setup + +# print('HOST TYPE:', self.host_type) diff --git a/libensemble/tests/regression_tests/common.py b/libensemble/tests/regression_tests/common.py index 65f8ff953..d8094a5fc 100644 --- a/libensemble/tests/regression_tests/common.py +++ b/libensemble/tests/regression_tests/common.py @@ -225,3 +225,26 @@ def modify_Balsam_pyCoverage(): with open(balsam_commands_path, 'w') as f: for line in lines: f.write(line) + +def modify_Balsam_hostprint(): + # Also modify Balsam to print Host type (for debugging purposes) + import balsam + + print_line = " print('HOST TYPE: ', self.host_type)\n" + + workerfile = 'worker.py' + balsam_path = os.path.dirname(balsam.__file__) + '/launcher' + balsam_worker_path = os.path.join(balsam_path, workerfile) + + with open(balsam_worker_path, 'r') as f: + lines = f.readlines() + + newlines = [] + for line in lines: + newlines.append(line) + if line == " self.host_type = JobEnv.host_type\n": + newlines.append(print_line) + + with open(balsam_worker_path, 'w') as f: + for line in newlines: + f.write(line) From a34f86da8a4788865089dde6c39621fc3d730f7b Mon Sep 17 00:00:00 2001 From: jlnav Date: Wed, 11 Sep 2019 11:07:43 -0500 Subject: [PATCH 5/9] flake8 --- libensemble/tests/regression_tests/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libensemble/tests/regression_tests/common.py b/libensemble/tests/regression_tests/common.py index d8094a5fc..2f64cf673 100644 --- a/libensemble/tests/regression_tests/common.py +++ b/libensemble/tests/regression_tests/common.py @@ -226,6 +226,7 @@ def modify_Balsam_pyCoverage(): for line in lines: f.write(line) + def modify_Balsam_hostprint(): # Also modify Balsam to print Host type (for debugging purposes) import balsam From 32a980ba4392a9744f7439a17aa7b08ba69cb86f Mon Sep 17 00:00:00 2001 From: jlnav Date: Wed, 11 Sep 2019 11:29:16 -0500 Subject: [PATCH 6/9] don't append print line on subsequent runs --- libensemble/tests/regression_tests/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libensemble/tests/regression_tests/common.py b/libensemble/tests/regression_tests/common.py index 2f64cf673..f804592d5 100644 --- a/libensemble/tests/regression_tests/common.py +++ b/libensemble/tests/regression_tests/common.py @@ -242,10 +242,13 @@ def modify_Balsam_hostprint(): newlines = [] for line in lines: + if line == print_line: + continue newlines.append(line) if line == " self.host_type = JobEnv.host_type\n": newlines.append(print_line) + with open(balsam_worker_path, 'w') as f: for line in newlines: f.write(line) From 975b119a16ba877c0f937e711c634d265cea9d8b Mon Sep 17 00:00:00 2001 From: jlnav Date: Wed, 11 Sep 2019 11:41:25 -0500 Subject: [PATCH 7/9] flake8... --- libensemble/tests/regression_tests/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libensemble/tests/regression_tests/common.py b/libensemble/tests/regression_tests/common.py index f804592d5..d5e03d306 100644 --- a/libensemble/tests/regression_tests/common.py +++ b/libensemble/tests/regression_tests/common.py @@ -248,7 +248,6 @@ def modify_Balsam_hostprint(): if line == " self.host_type = JobEnv.host_type\n": newlines.append(print_line) - with open(balsam_worker_path, 'w') as f: for line in newlines: f.write(line) From 1349275d8e1c62b20987311032c88f32ca4f275b Mon Sep 17 00:00:00 2001 From: jlnav Date: Wed, 11 Sep 2019 14:12:42 -0500 Subject: [PATCH 8/9] test commit --- conda/test_balsam_hworld.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/conda/test_balsam_hworld.py b/conda/test_balsam_hworld.py index cef3f1ed1..5270763e5 100644 --- a/conda/test_balsam_hworld.py +++ b/conda/test_balsam_hworld.py @@ -123,5 +123,3 @@ def move_job_coverage(jobdir): # File "/home/travis/build/Libensemble/balsam/balsam/launcher/worker.py", line 112, in setup_COOLEY # raise ValueError("Cooley WorkerGroup needs workers_file to setup") # ValueError: Cooley WorkerGroup needs workers_file to setup - -# print('HOST TYPE:', self.host_type) From 4cc561012d915bfce769468dbe0d76f925d5604d Mon Sep 17 00:00:00 2001 From: jlnav Date: Wed, 11 Sep 2019 15:07:56 -0500 Subject: [PATCH 9/9] additional modifications to Balsam for debugging --- conda/test_balsam_hworld.py | 3 ++- libensemble/tests/regression_tests/common.py | 26 +++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/conda/test_balsam_hworld.py b/conda/test_balsam_hworld.py index 5270763e5..bb728b3d7 100644 --- a/conda/test_balsam_hworld.py +++ b/conda/test_balsam_hworld.py @@ -108,7 +108,8 @@ def move_job_coverage(jobdir): print('Test complete.') -# travis_run_before_install && travis_run_install && travis_run_before_script + +# IN BALSAM LOG: # 11-Sep-2019 14:36:27|7301| ERROR|balsam:47] Uncaught Exception : Cooley WorkerGroup needs workers_file to setup # Traceback (most recent call last): diff --git a/libensemble/tests/regression_tests/common.py b/libensemble/tests/regression_tests/common.py index d5e03d306..82b2bc49e 100644 --- a/libensemble/tests/regression_tests/common.py +++ b/libensemble/tests/regression_tests/common.py @@ -228,10 +228,20 @@ def modify_Balsam_pyCoverage(): def modify_Balsam_hostprint(): - # Also modify Balsam to print Host type (for debugging purposes) + # Also modify Balsam Worker & Worker Gropu to print Host type (for debugging + # purposes). Balsam test bug may be caused by setup_COOLEY() being called + # instead of setup_DEFAULT() within Balsam's worker.py import balsam - print_line = " print('HOST TYPE: ', self.host_type)\n" + print_lines = {"host": " print('HOST TYPE: ', self.host_type)\n", + "COOLEY": " print('IN setup_COOLEY')\n", + "DEFAULT": " print('IN setup_DEFAULT')\n"} + + host_prior_lines = [" self.host_type = JobEnv.host_type\n", + " self.host_type = host_type\n"] + + setup_prior_lines = [" def setup_COOLEY(self):\n", + " def setup_DEFAULT(self):\n"] workerfile = 'worker.py' balsam_path = os.path.dirname(balsam.__file__) + '/launcher' @@ -242,11 +252,15 @@ def modify_Balsam_hostprint(): newlines = [] for line in lines: - if line == print_line: + if line in print_lines.values(): continue - newlines.append(line) - if line == " self.host_type = JobEnv.host_type\n": - newlines.append(print_line) + newlines.append(line) # Line of code from prior + if line in host_prior_lines: # + newlines.append(print_lines['host']) + elif line == setup_prior_lines[0]: + newlines.append(print_lines['COOLEY']) + elif line == setup_prior_lines[1]: + newlines.append(print_lines['DEFAULT']) with open(balsam_worker_path, 'w') as f: for line in newlines: