Skip to content

Commit

Permalink
Merge pull request #455 from tcezard/EVA3586_split_release_in_steps
Browse files Browse the repository at this point in the history
EVA-3586 - Split release job in single steps and chains them up in nextflow
  • Loading branch information
tcezard committed Jul 2, 2024
2 parents 2e27979 + fa35ef5 commit 57a91fb
Show file tree
Hide file tree
Showing 15 changed files with 724 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
This document describes technical changes associated with the EVA RefSNP release.

EVA RefSNP release 6
====================

- No significant changes compare to release 5


EVA RefSNP release 5
====================

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
EVA RefSNP release 5
EVA RefSNP release 6

Feedback
==================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from ebi_eva_common_pyutils.logger import logging_config
from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle
from ebi_eva_internal_pyutils.pg_utils import get_all_results_for_query
from run_release_in_embassy.run_release_for_species import load_config
from run_release_in_embassy.release_common_utils import get_release_folder_name
from run_release_in_embassy.run_release_for_species import load_config, get_release_folder
from run_release_in_embassy.release_metadata import release_vcf_file_categories, release_text_file_categories

by_assembly_folder_name = "by_assembly"
Expand All @@ -51,9 +52,9 @@ def __init__(self, release_version):
self.private_config_xml_file = cfg.query('maven', 'settings_file')
self.profile = cfg.query('maven', 'environment')
self.release_version = release_version
self.release_species_inventory_table = cfg["inventory_table"]
self.staging_release_folder = cfg["release_output"]
self.public_ftp_release_base_folder = cfg["public_ftp_release_base_folder"]
self.release_species_inventory_table = cfg.query('release', 'inventory_table')
self.staging_release_folder = get_release_folder(release_version)
self.public_ftp_release_base_folder = cfg.query('release', 'public_ftp_release_base_folder')
self.public_ftp_current_release_folder = os.path.join(self.public_ftp_release_base_folder,
f"release_{self.release_version}")
self.public_ftp_previous_release_folder = os.path.join(self.public_ftp_release_base_folder,
Expand Down Expand Up @@ -130,9 +131,11 @@ def create_symlink_to_species_folder_from_assembly_folder(current_release_assemb
assembly_accession)
run_command_with_output(f"""Creating symlink from assembly folder {public_release_assembly_species_folder} to
species folder {public_release_species_assembly_folder}""",
'bash -c "cd {0} && ln -sfT {1} {2}"'.format(public_release_assembly_folder,
os.path.relpath(public_release_species_assembly_folder,
public_release_assembly_folder), public_release_assembly_species_folder))
'bash -c "cd {0} && ln -sfT {1} {2}"'.format(
public_release_assembly_folder,
os.path.relpath(public_release_species_assembly_folder,
public_release_assembly_folder),
public_release_assembly_species_folder))


def recreate_public_release_species_assembly_folder(assembly_accession, public_release_species_assembly_folder):
Expand All @@ -145,15 +148,15 @@ def recreate_public_release_species_assembly_folder(assembly_accession, public_r
def copy_current_assembly_data_to_ftp(current_release_assembly_info, release_properties,
public_release_species_assembly_folder):
assembly_accession = current_release_assembly_info["assembly_accession"]
species_release_folder_name = current_release_assembly_info["release_folder_name"]
species_release_source_folder_name = get_release_folder_name(current_release_assembly_info['taxonomy'])
md5sum_output_file = os.path.join(public_release_species_assembly_folder, "md5checksums.txt")
run_command_with_output(f"Removing md5 checksum file {md5sum_output_file} for assembly if it exists...",
f"rm -f {md5sum_output_file}")

recreate_public_release_species_assembly_folder(assembly_accession, public_release_species_assembly_folder)

for filename in get_release_file_list_for_assembly(current_release_assembly_info):
source_file_path = os.path.join(release_properties.staging_release_folder, species_release_folder_name,
source_file_path = os.path.join(release_properties.staging_release_folder, species_release_source_folder_name,
assembly_accession, filename)
run_command_with_output(f"Copying {filename} to {public_release_species_assembly_folder}...",
f"cp {source_file_path} {public_release_species_assembly_folder}")
Expand Down Expand Up @@ -346,4 +349,4 @@ def main():


if __name__ == "__main__":
main()
main()
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,17 @@ def get_release_properties_for_assembly(private_config_xml_file, profile, taxono

def create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version,
assembly_release_folder):
assembly_release_folder, job_name='ACCESSION_RELEASE_JOB',
file_name='release'):
os.makedirs(assembly_release_folder, exist_ok=True)
output_file = "{0}/{1}_release.properties".format(assembly_release_folder, assembly_accession)
output_file = f"{assembly_release_folder}/{assembly_accession}_{file_name}.properties"
release_properties = get_release_properties_for_assembly(
private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version
)
properties_string = SpringPropertiesGenerator(profile, private_config_xml_file).get_release_properties(
temp_mongo_db=release_properties['mongo_accessioning_db'],
job_name='ACCESSION_RELEASE_JOB',
job_name=job_name,
assembly_accession=assembly_accession,
taxonomy_accession=taxonomy_id,
fasta=release_properties['fasta_path'],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version)
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, assembly_release_folder,
job_name='ACTIVE_ACCESSION_RELEASE_JOB',
file_name='active_release')
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} --spring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
exit_code = 0
except Exception as ex:
logger.error("Encountered an error while running release for assembly: " + assembly_accession + "\n"
+ traceback.format_exc())
exit_code = -1
finally:
close_mongo_port_to_tempmongo(port_forwarding_process_id)
logger.info("Java release pipeline run completed with exit_code: " + str(exit_code))
sys.exit(exit_code)


@click.option("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True)
@click.option("--profile", help="Maven profile to use, ex: internal", required=True)
@click.option("--taxonomy-id", help="ex: 9913", required=True)
@click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True)
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version)
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, assembly_release_folder,
job_name='DEPRECATED_ACCESSION_RELEASE_JOB',
file_name='deprecated_release')
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} --spring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
exit_code = 0
except Exception as ex:
logger.error("Encountered an error while running release for assembly: " + assembly_accession + "\n"
+ traceback.format_exc())
exit_code = -1
finally:
close_mongo_port_to_tempmongo(port_forwarding_process_id)
logger.info("Java release pipeline run completed with exit_code: " + str(exit_code))
sys.exit(exit_code)


@click.option("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True)
@click.option("--profile", help="Maven profile to use, ex: internal", required=True)
@click.option("--taxonomy-id", help="ex: 9913", required=True)
@click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True)
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
nextflow.enable.dsl=2

workflow {
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | run_release_for_assembly | \
merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | validate_release_vcf_files | \
analyze_vcf_validation_results | count_rs_ids_in_release_files | validate_rs_release_files | \
update_sequence_names_to_ena | update_release_status_for_assembly
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | \
run_release_active_for_assembly | run_release_merged_for_assembly | run_release_deprecated_for_assembly | \
run_release_merged_deprecated_for_assembly | merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | \
validate_release_vcf_files | analyze_vcf_validation_results | count_rs_ids_in_release_files | \
validate_rs_release_files | update_sequence_names_to_ena | update_release_status_for_assembly
}

process initiate_release_status_for_assembly {
Expand Down Expand Up @@ -43,7 +44,7 @@ process copy_accessioning_collections_to_embassy {
"""
}

process run_release_for_assembly {
process run_release_active_for_assembly {

label 'long_time', 'med_mem'

Expand All @@ -56,7 +57,58 @@ process run_release_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.run_release_active_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_merged_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_merged_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_deprecated_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_deprecated_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_merged_deprecated_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_merged_deprecated_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

Expand Down
Loading

0 comments on commit 57a91fb

Please sign in to comment.