Skip to content

Commit

Permalink
Split release job in single steps and chains them up in nextflow
Browse files Browse the repository at this point in the history
  • Loading branch information
tcezard committed Jun 20, 2024
1 parent 203b559 commit 3fb084e
Show file tree
Hide file tree
Showing 11 changed files with 692 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,17 @@ def get_release_properties_for_assembly(private_config_xml_file, profile, taxono

def create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version,
assembly_release_folder):
assembly_release_folder, job_name='ACCESSION_RELEASE_JOB',
file_name='release'):
os.makedirs(assembly_release_folder, exist_ok=True)
output_file = "{0}/{1}_release.properties".format(assembly_release_folder, assembly_accession)
output_file = f"{assembly_release_folder}/{assembly_accession}_{file_name}.properties"
release_properties = get_release_properties_for_assembly(
private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version
)
properties_string = SpringPropertiesGenerator(profile, private_config_xml_file).get_release_properties(
temp_mongo_db=release_properties['mongo_accessioning_db'],
job_name='ACCESSION_RELEASE_JOB',
job_name=job_name,
assembly_accession=assembly_accession,
taxonomy_accession=taxonomy_id,
fasta=release_properties['fasta_path'],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version)
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, assembly_release_folder,
job_name='ACTIVE_ACCESSION_RELEASE_JOB',
file_name='active_release')
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} --spring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
exit_code = 0
except Exception as ex:
logger.error("Encountered an error while running release for assembly: " + assembly_accession + "\n"
+ traceback.format_exc())
exit_code = -1
finally:
close_mongo_port_to_tempmongo(port_forwarding_process_id)
logger.info("Java release pipeline run completed with exit_code: " + str(exit_code))
sys.exit(exit_code)


@click.option("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True)
@click.option("--profile", help="Maven profile to use, ex: internal", required=True)
@click.option("--taxonomy-id", help="ex: 9913", required=True)
@click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True)
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version)
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, assembly_release_folder,
job_name='DEPRECATED_ACCESSION_RELEASE_JOB',
file_name='deprecated_release')
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} --spring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
exit_code = 0
except Exception as ex:
logger.error("Encountered an error while running release for assembly: " + assembly_accession + "\n"
+ traceback.format_exc())
exit_code = -1
finally:
close_mongo_port_to_tempmongo(port_forwarding_process_id)
logger.info("Java release pipeline run completed with exit_code: " + str(exit_code))
sys.exit(exit_code)


@click.option("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True)
@click.option("--profile", help="Maven profile to use, ex: internal", required=True)
@click.option("--taxonomy-id", help="ex: 9913", required=True)
@click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True)
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
nextflow.enable.dsl=2

workflow {
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | run_release_for_assembly | \
merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | validate_release_vcf_files | \
analyze_vcf_validation_results | count_rs_ids_in_release_files | validate_rs_release_files | \
update_sequence_names_to_ena | update_release_status_for_assembly
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | \
run_release_active_for_assembly | run_release_merged_for_assembly | run_release_deprecated_for_assembly | \
run_release_merged_deprecated_for_assembly | merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | \
validate_release_vcf_files | analyze_vcf_validation_results | count_rs_ids_in_release_files | \
validate_rs_release_files | update_sequence_names_to_ena | update_release_status_for_assembly
}

process initiate_release_status_for_assembly {
Expand Down Expand Up @@ -43,7 +44,7 @@ process copy_accessioning_collections_to_embassy {
"""
}

process run_release_for_assembly {
process run_release_active_for_assembly {

label 'long_time', 'med_mem'

Expand All @@ -56,7 +57,58 @@ process run_release_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.run_release_active_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_merged_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_merged_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_deprecated_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_deprecated_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_merged_deprecated_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_merged_deprecated_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version)
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, assembly_release_folder,
job_name='MERGED_DEPRECATED_ACCESSION_RELEASE_JOB',
file_name='merged_deprecated_release')
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} --spring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
exit_code = 0
except Exception as ex:
logger.error("Encountered an error while running release for assembly: " + assembly_accession + "\n"
+ traceback.format_exc())
exit_code = -1
finally:
close_mongo_port_to_tempmongo(port_forwarding_process_id)
logger.info("Java release pipeline run completed with exit_code: " + str(exit_code))
sys.exit(exit_code)


@click.option("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True)
@click.option("--profile", help="Maven profile to use, ex: internal", required=True)
@click.option("--taxonomy-id", help="ex: 9913", required=True)
@click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True)
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


if __name__ == "__main__":
main()
Loading

0 comments on commit 3fb084e

Please sign in to comment.