Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3586 - Split release job in single steps and chains them up in nextflow #455

Merged
merged 6 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
This document describes technical changes associated with the EVA RefSNP release.

EVA RefSNP release 6
====================

- No significant changes compare to release 5


EVA RefSNP release 5
====================

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
EVA RefSNP release 5
EVA RefSNP release 6

Feedback
==================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from ebi_eva_common_pyutils.logger import logging_config
from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle
from ebi_eva_internal_pyutils.pg_utils import get_all_results_for_query
from run_release_in_embassy.run_release_for_species import load_config
from run_release_in_embassy.release_common_utils import get_release_folder_name
from run_release_in_embassy.run_release_for_species import load_config, get_release_folder
from run_release_in_embassy.release_metadata import release_vcf_file_categories, release_text_file_categories

by_assembly_folder_name = "by_assembly"
Expand All @@ -51,9 +52,9 @@ def __init__(self, release_version):
self.private_config_xml_file = cfg.query('maven', 'settings_file')
self.profile = cfg.query('maven', 'environment')
self.release_version = release_version
self.release_species_inventory_table = cfg["inventory_table"]
self.staging_release_folder = cfg["release_output"]
self.public_ftp_release_base_folder = cfg["public_ftp_release_base_folder"]
self.release_species_inventory_table = cfg.query('release', 'inventory_table')
self.staging_release_folder = get_release_folder(release_version)
self.public_ftp_release_base_folder = cfg.query('release', 'public_ftp_release_base_folder')
self.public_ftp_current_release_folder = os.path.join(self.public_ftp_release_base_folder,
f"release_{self.release_version}")
self.public_ftp_previous_release_folder = os.path.join(self.public_ftp_release_base_folder,
Expand Down Expand Up @@ -130,9 +131,11 @@ def create_symlink_to_species_folder_from_assembly_folder(current_release_assemb
assembly_accession)
run_command_with_output(f"""Creating symlink from assembly folder {public_release_assembly_species_folder} to
species folder {public_release_species_assembly_folder}""",
'bash -c "cd {0} && ln -sfT {1} {2}"'.format(public_release_assembly_folder,
os.path.relpath(public_release_species_assembly_folder,
public_release_assembly_folder), public_release_assembly_species_folder))
'bash -c "cd {0} && ln -sfT {1} {2}"'.format(
public_release_assembly_folder,
os.path.relpath(public_release_species_assembly_folder,
public_release_assembly_folder),
public_release_assembly_species_folder))


def recreate_public_release_species_assembly_folder(assembly_accession, public_release_species_assembly_folder):
Expand All @@ -145,15 +148,15 @@ def recreate_public_release_species_assembly_folder(assembly_accession, public_r
def copy_current_assembly_data_to_ftp(current_release_assembly_info, release_properties,
public_release_species_assembly_folder):
assembly_accession = current_release_assembly_info["assembly_accession"]
species_release_folder_name = current_release_assembly_info["release_folder_name"]
species_release_source_folder_name = get_release_folder_name(current_release_assembly_info['taxonomy'])
md5sum_output_file = os.path.join(public_release_species_assembly_folder, "md5checksums.txt")
run_command_with_output(f"Removing md5 checksum file {md5sum_output_file} for assembly if it exists...",
f"rm -f {md5sum_output_file}")

recreate_public_release_species_assembly_folder(assembly_accession, public_release_species_assembly_folder)

for filename in get_release_file_list_for_assembly(current_release_assembly_info):
source_file_path = os.path.join(release_properties.staging_release_folder, species_release_folder_name,
source_file_path = os.path.join(release_properties.staging_release_folder, species_release_source_folder_name,
assembly_accession, filename)
run_command_with_output(f"Copying {filename} to {public_release_species_assembly_folder}...",
f"cp {source_file_path} {public_release_species_assembly_folder}")
Expand Down Expand Up @@ -346,4 +349,4 @@ def main():


if __name__ == "__main__":
main()
main()
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,17 @@ def get_release_properties_for_assembly(private_config_xml_file, profile, taxono

def create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version,
assembly_release_folder):
assembly_release_folder, job_name='ACCESSION_RELEASE_JOB',
file_name='release'):
os.makedirs(assembly_release_folder, exist_ok=True)
output_file = "{0}/{1}_release.properties".format(assembly_release_folder, assembly_accession)
output_file = f"{assembly_release_folder}/{assembly_accession}_{file_name}.properties"
release_properties = get_release_properties_for_assembly(
private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version
)
properties_string = SpringPropertiesGenerator(profile, private_config_xml_file).get_release_properties(
temp_mongo_db=release_properties['mongo_accessioning_db'],
job_name='ACCESSION_RELEASE_JOB',
job_name=job_name,
assembly_accession=assembly_accession,
taxonomy_accession=taxonomy_id,
fasta=release_properties['fasta_path'],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version)
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, assembly_release_folder,
job_name='ACTIVE_ACCESSION_RELEASE_JOB',
file_name='active_release')
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} --spring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
exit_code = 0
except Exception as ex:
logger.error("Encountered an error while running release for assembly: " + assembly_accession + "\n"
+ traceback.format_exc())
exit_code = -1
finally:
close_mongo_port_to_tempmongo(port_forwarding_process_id)
logger.info("Java release pipeline run completed with exit_code: " + str(exit_code))
sys.exit(exit_code)


@click.option("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True)
@click.option("--profile", help="Maven profile to use, ex: internal", required=True)
@click.option("--taxonomy-id", help="ex: 9913", required=True)
@click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True)
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version)
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, assembly_release_folder,
job_name='DEPRECATED_ACCESSION_RELEASE_JOB',
file_name='deprecated_release')
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} --spring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
exit_code = 0
except Exception as ex:
logger.error("Encountered an error while running release for assembly: " + assembly_accession + "\n"
+ traceback.format_exc())
exit_code = -1
finally:
close_mongo_port_to_tempmongo(port_forwarding_process_id)
logger.info("Java release pipeline run completed with exit_code: " + str(exit_code))
sys.exit(exit_code)


@click.option("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True)
@click.option("--profile", help="Maven profile to use, ex: internal", required=True)
@click.option("--taxonomy-id", help="ex: 9913", required=True)
@click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True)
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
nextflow.enable.dsl=2

workflow {
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | run_release_for_assembly | \
merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | validate_release_vcf_files | \
analyze_vcf_validation_results | count_rs_ids_in_release_files | validate_rs_release_files | \
update_sequence_names_to_ena | update_release_status_for_assembly
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | \
run_release_active_for_assembly | run_release_merged_for_assembly | run_release_deprecated_for_assembly | \
run_release_merged_deprecated_for_assembly | merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | \
validate_release_vcf_files | analyze_vcf_validation_results | count_rs_ids_in_release_files | \
validate_rs_release_files | update_sequence_names_to_ena | update_release_status_for_assembly
}

process initiate_release_status_for_assembly {
Expand Down Expand Up @@ -43,7 +44,7 @@ process copy_accessioning_collections_to_embassy {
"""
}

process run_release_for_assembly {
process run_release_active_for_assembly {

label 'long_time', 'med_mem'

Expand All @@ -56,7 +57,58 @@ process run_release_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.run_release_active_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_merged_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_merged_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_deprecated_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_deprecated_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

process run_release_merged_deprecated_for_assembly {

label 'long_time', 'med_mem'

input:
val flag

output:
val true, emit: flag

script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python_interpreter -m run_release_in_embassy.run_release_merged_deprecated_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

Expand Down
Loading
Loading