Skip to content

Commit

Permalink
Add an option to change the number of jobs that recieve twice more jobs
Browse files Browse the repository at this point in the history
Fixes #23
  • Loading branch information
BenoitMorel committed Jun 17, 2018
1 parent 0004038 commit 82dece0
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 15 deletions.
10 changes: 10 additions & 0 deletions examples/run_small_heuristic.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

multiraxml=../multi-raxml/multi-raxml.py
output=results/small_heuristic
msa_directory=data/small/fasta_files/
raxml_global_options=data/small/raxml_global_options.txt
cores=4

rm -rf ${output}
python3 ${multiraxml} -a ${msa_directory} -o ${output} -r ${raxml_global_options} -c ${cores} --percentage-jobs-double-core 0

10 changes: 10 additions & 0 deletions examples/run_small_onecore.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

multiraxml=../multi-raxml/multi-raxml.py
output=results/small_onecore
msa_directory=data/small/fasta_files/
raxml_global_options=data/small/raxml_global_options.txt
cores=4

rm -rf ${output}
python ${multiraxml} -a ${msa_directory} -o ${output} -r ${raxml_global_options} -c ${cores} --scheduler onecore

15 changes: 10 additions & 5 deletions multi-raxml/mr_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def parse_arguments():
choices=["split", "onecore"],
default="split",
help="Sceduling strategy. onecore might be interesting for debugging crashing jobs")
parser.add_argument("--core-assignment",
dest="core_assignment",
choices=["high", "medium", "low"],
default="medium",
help="Policy to decide the per-job number of cores (low favors a low per-job number of cores)")
# raxml arguments
parser.add_argument("--per-msa-raxml-parameters",
dest="per_msa_raxml_parameters",
Expand All @@ -71,6 +76,11 @@ def parse_arguments():
type=int,
default=0,
help="The number of bootstrap trees to compute")
parser.add_argument("--percentage-jobs-double-cores",
dest="percentage_jobs_double_cores",
type=float,
default=0.03,
help="Percentage (between 0 and 1) of jobs that will receive twice more cores")
# modeltest arguments
parser.add_argument("-m", "--use-modeltest",
dest="use_modeltest",
Expand All @@ -94,11 +104,6 @@ def parse_arguments():
action="store_true",
default=False,
help="For experimenting only! Removes the sorting step in the scheduler")
parser.add_argument("--core-assignment",
dest="core_assignment",
choices=["high", "medium", "low"],
default="medium",
help="Policy to decide the per-job number of cores (low favors a low per-job number of cores)")


op = parser.parse_args()
Expand Down
21 changes: 12 additions & 9 deletions multi-raxml/mr_raxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def parse_msa_info(log_file, msa, core_assignment):
if (msa.sites * msa.taxa == 0):
msa.valid = False

def improve_cores_assignment(msas):
def improve_cores_assignment(msas, op):
average_taxa = 0
max_taxa = 0
average_sites = 0
Expand All @@ -54,18 +54,20 @@ def improve_cores_assignment(msas):
max_taxa = max(max_taxa, msa.taxa)
max_sites = max(max_sites, msa.patterns)
taxa_numbers.sort()
limit_taxa = taxa_numbers[(len(msas) * 97) // 100]
print("Limit taxa: " + str(limit_taxa))
average_taxa /= len(msas)
average_sites /= len(msas)
print("Average number of taxa: " + str(average_taxa))
print("Max number of taxa: " + str(max_taxa))
print("Average number of sites: " + str(average_sites))
print("Max number of sites: " + str(max_sites))
for name, msa in msas.items():
if (msa.taxa < limit_taxa):
if (msa.cores > 1):
msa.cores = msa.cores // 2
if (op.percentage_jobs_double_cores > 0.0):
ratio = 1.0 - op.percentage_jobs_double_cores
limit_taxa = taxa_numbers[int(float(len(msas)) * ratio)]
print("Limit taxa: " + str(limit_taxa))
for name, msa in msas.items():
if (msa.taxa < limit_taxa):
if (msa.cores > 1):
msa.cores = msa.cores // 2


def run_parsing_step(msas, library, scheduler, parse_run_output_dir, cores):
Expand All @@ -89,8 +91,9 @@ def run_parsing_step(msas, library, scheduler, parse_run_output_dir, cores):
writer.write("\n")
mr_scheduler.run_mpi_scheduler(library, scheduler, parse_commands_file, parse_run_output_dir, cores)

def analyse_parsed_msas(msas, core_assignment, output_dir):
def analyse_parsed_msas(msas, op, output_dir):
""" Analyse results from run_parsing_step and store them into msas """
core_assignment = op.core_assignment
parse_run_output_dir = os.path.join(output_dir, "parse_run")
parse_run_results = os.path.join(parse_run_output_dir, "results")
invalid_msas = []
Expand All @@ -103,7 +106,7 @@ def analyse_parsed_msas(msas, core_assignment, output_dir):
parse_result = parse_msa_info(parse_run_log, msa, core_assignment)
if (not msa.valid):
invalid_msas.append(msa)
improve_cores_assignment(msas)
improve_cores_assignment(msas, op)
if (len(invalid_msas) > 0):
invalid_msas_file = os.path.join(output_dir, "invalid_msas.txt")
print("[Warning] Found " + str(len(invalid_msas)) + " invalid MSAs (see " + invalid_msas_file + ")")
Expand Down
2 changes: 1 addition & 1 deletion multi-raxml/multi-raxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def main_raxml_runner(op):
mr_raxml.run_parsing_step(msas, raxml_library, op.scheduler, os.path.join(output_dir, "parse_run"), op.cores)
mr_checkpoint.write_checkpoint(output_dir, 1)
timed_print(start, "end of parsing mpi-scheduler run")
mr_raxml.analyse_parsed_msas(msas, op.core_assignment, output_dir)
mr_raxml.analyse_parsed_msas(msas, op, output_dir)
timed_print(start, "end of anlysing parsing results")
if (op.use_modeltest):
if (checkpoint < 2):
Expand Down

0 comments on commit 82dece0

Please sign in to comment.