diff --git a/examples/run_small_heuristic.sh b/examples/run_small_heuristic.sh new file mode 100755 index 0000000..069333a --- /dev/null +++ b/examples/run_small_heuristic.sh @@ -0,0 +1,10 @@ + +multiraxml=../multi-raxml/multi-raxml.py +output=results/small_heuristic +msa_directory=data/small/fasta_files/ +raxml_global_options=data/small/raxml_global_options.txt +cores=4 + +rm -rf ${output} +python3 ${multiraxml} -a ${msa_directory} -o ${output} -r ${raxml_global_options} -c ${cores} --percentage-jobs-double-core 0 + diff --git a/examples/run_small_onecore.sh b/examples/run_small_onecore.sh new file mode 100755 index 0000000..305571a --- /dev/null +++ b/examples/run_small_onecore.sh @@ -0,0 +1,10 @@ + +multiraxml=../multi-raxml/multi-raxml.py +output=results/small_onecore +msa_directory=data/small/fasta_files/ +raxml_global_options=data/small/raxml_global_options.txt +cores=4 + +rm -rf ${output} +python ${multiraxml} -a ${msa_directory} -o ${output} -r ${raxml_global_options} -c ${cores} --scheduler onecore + diff --git a/multi-raxml/mr_arguments.py b/multi-raxml/mr_arguments.py index a8d7610..d14307a 100644 --- a/multi-raxml/mr_arguments.py +++ b/multi-raxml/mr_arguments.py @@ -46,6 +46,11 @@ def parse_arguments(): choices=["split", "onecore"], default="split", help="Sceduling strategy. onecore might be interesting for debugging crashing jobs") + parser.add_argument("--core-assignment", + dest="core_assignment", + choices=["high", "medium", "low"], + default="medium", + help="Policy to decide the per-job number of cores (low favors a low per-job number of cores)") # raxml arguments parser.add_argument("--per-msa-raxml-parameters", dest="per_msa_raxml_parameters", @@ -71,6 +76,11 @@ def parse_arguments(): type=int, default=0, help="The number of bootstrap trees to compute") + parser.add_argument("--percentage-jobs-double-cores", + dest="percentage_jobs_double_cores", + type=float, + default=0.03, + help="Percentage (between 0 and 1) of jobs that will receive twice more cores") # modeltest arguments parser.add_argument("-m", "--use-modeltest", dest="use_modeltest", @@ -94,11 +104,6 @@ def parse_arguments(): action="store_true", default=False, help="For experimenting only! Removes the sorting step in the scheduler") - parser.add_argument("--core-assignment", - dest="core_assignment", - choices=["high", "medium", "low"], - default="medium", - help="Policy to decide the per-job number of cores (low favors a low per-job number of cores)") op = parser.parse_args() diff --git a/multi-raxml/mr_raxml.py b/multi-raxml/mr_raxml.py index 29ac381..c5bfa04 100644 --- a/multi-raxml/mr_raxml.py +++ b/multi-raxml/mr_raxml.py @@ -41,7 +41,7 @@ def parse_msa_info(log_file, msa, core_assignment): if (msa.sites * msa.taxa == 0): msa.valid = False -def improve_cores_assignment(msas): +def improve_cores_assignment(msas, op): average_taxa = 0 max_taxa = 0 average_sites = 0 @@ -54,18 +54,20 @@ def improve_cores_assignment(msas): max_taxa = max(max_taxa, msa.taxa) max_sites = max(max_sites, msa.patterns) taxa_numbers.sort() - limit_taxa = taxa_numbers[(len(msas) * 97) // 100] - print("Limit taxa: " + str(limit_taxa)) average_taxa /= len(msas) average_sites /= len(msas) print("Average number of taxa: " + str(average_taxa)) print("Max number of taxa: " + str(max_taxa)) print("Average number of sites: " + str(average_sites)) print("Max number of sites: " + str(max_sites)) - for name, msa in msas.items(): - if (msa.taxa < limit_taxa): - if (msa.cores > 1): - msa.cores = msa.cores // 2 + if (op.percentage_jobs_double_cores > 0.0): + ratio = 1.0 - op.percentage_jobs_double_cores + limit_taxa = taxa_numbers[int(float(len(msas)) * ratio)] + print("Limit taxa: " + str(limit_taxa)) + for name, msa in msas.items(): + if (msa.taxa < limit_taxa): + if (msa.cores > 1): + msa.cores = msa.cores // 2 def run_parsing_step(msas, library, scheduler, parse_run_output_dir, cores): @@ -89,8 +91,9 @@ def run_parsing_step(msas, library, scheduler, parse_run_output_dir, cores): writer.write("\n") mr_scheduler.run_mpi_scheduler(library, scheduler, parse_commands_file, parse_run_output_dir, cores) -def analyse_parsed_msas(msas, core_assignment, output_dir): +def analyse_parsed_msas(msas, op, output_dir): """ Analyse results from run_parsing_step and store them into msas """ + core_assignment = op.core_assignment parse_run_output_dir = os.path.join(output_dir, "parse_run") parse_run_results = os.path.join(parse_run_output_dir, "results") invalid_msas = [] @@ -103,7 +106,7 @@ def analyse_parsed_msas(msas, core_assignment, output_dir): parse_result = parse_msa_info(parse_run_log, msa, core_assignment) if (not msa.valid): invalid_msas.append(msa) - improve_cores_assignment(msas) + improve_cores_assignment(msas, op) if (len(invalid_msas) > 0): invalid_msas_file = os.path.join(output_dir, "invalid_msas.txt") print("[Warning] Found " + str(len(invalid_msas)) + " invalid MSAs (see " + invalid_msas_file + ")") diff --git a/multi-raxml/multi-raxml.py b/multi-raxml/multi-raxml.py index 3885acb..b0bd698 100644 --- a/multi-raxml/multi-raxml.py +++ b/multi-raxml/multi-raxml.py @@ -54,7 +54,7 @@ def main_raxml_runner(op): mr_raxml.run_parsing_step(msas, raxml_library, op.scheduler, os.path.join(output_dir, "parse_run"), op.cores) mr_checkpoint.write_checkpoint(output_dir, 1) timed_print(start, "end of parsing mpi-scheduler run") - mr_raxml.analyse_parsed_msas(msas, op.core_assignment, output_dir) + mr_raxml.analyse_parsed_msas(msas, op, output_dir) timed_print(start, "end of anlysing parsing results") if (op.use_modeltest): if (checkpoint < 2):