From cb229e34bfd5c31368e8ba07950d01a54e28d131 Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 6 Jun 2024 10:43:06 +0100 Subject: [PATCH 1/5] Add logging handlers so that each script can print our their logging statements --- .../run_release_in_embassy/analyze_vcf_validation_results.py | 3 ++- .../copy_accessioning_collections_to_embassy.py | 2 ++ .../run_release_in_embassy/count_rs_ids_in_release_files.py | 3 +++ .../run_release_in_embassy/create_release_properties_file.py | 2 ++ .../initiate_release_status_for_assembly.py | 2 ++ .../run_release_in_embassy/merge_dbsnp_eva_release_files.py | 2 ++ .../run_release_in_embassy/release_common_utils.py | 5 ++--- .../run_release_in_embassy/run_release_for_assembly.py | 1 + .../run_release_in_embassy/sort_bgzip_index_release_files.py | 3 +++ .../update_release_status_for_assembly.py | 2 ++ .../run_release_in_embassy/update_sequence_names_to_ena.py | 2 ++ .../run_release_in_embassy/validate_release_vcf_files.py | 3 +++ .../run_release_in_embassy/validate_rs_release_files.py | 3 +++ 13 files changed, 29 insertions(+), 4 deletions(-) diff --git a/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py b/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py index d4285e0b1..1ea1dd2e0 100644 --- a/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py +++ b/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py @@ -19,7 +19,7 @@ import sys from ebi_eva_common_pyutils.command_utils import run_command_with_output -from ebi_eva_common_pyutils.logger import logging_config as log_cfg +from ebi_eva_common_pyutils.logger import logging_config as log_cfg, logging_config from run_release_in_embassy.release_metadata import vcf_validation_output_file_pattern, asm_report_output_file_pattern logger = log_cfg.get_logger(__name__) @@ -84,6 +84,7 @@ def analyze_vcf_validation_results(species_release_folder, assembly_accession): @click.option("--assembly-accession", required=True) @click.command() def main(species_release_folder, assembly_accession): + logging_config.add_stdout_handler() analyze_vcf_validation_results(species_release_folder, assembly_accession) diff --git a/eva-accession-release-automation/run_release_in_embassy/copy_accessioning_collections_to_embassy.py b/eva-accession-release-automation/run_release_in_embassy/copy_accessioning_collections_to_embassy.py index 7443e923e..d42802652 100644 --- a/eva-accession-release-automation/run_release_in_embassy/copy_accessioning_collections_to_embassy.py +++ b/eva-accession-release-automation/run_release_in_embassy/copy_accessioning_collections_to_embassy.py @@ -19,6 +19,7 @@ import traceback from ebi_eva_common_pyutils.command_utils import run_command_with_output +from ebi_eva_common_pyutils.logger import logging_config from ebi_eva_internal_pyutils.config_utils import get_mongo_uri_for_eva_profile from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle from ebi_eva_internal_pyutils.mongo_utils import copy_db @@ -153,6 +154,7 @@ def copy_accessioning_collections_to_embassy(private_config_xml_file, profile, t @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, collections_to_copy, release_species_inventory_table, release_version, dump_dir): + logging_config.add_stdout_handler() copy_accessioning_collections_to_embassy(private_config_xml_file, profile, taxonomy_id, assembly_accession, collections_to_copy, release_species_inventory_table, release_version, dump_dir) diff --git a/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py b/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py index 0394c9cf2..d950fa52a 100644 --- a/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py @@ -16,6 +16,8 @@ import os from ebi_eva_common_pyutils.command_utils import run_command_with_output +from ebi_eva_common_pyutils.logger import logging_config + from run_release_in_embassy.release_metadata import release_vcf_file_categories, release_text_file_categories from run_release_in_embassy.release_common_utils import get_release_vcf_file_name_genbank, get_release_text_file_name @@ -47,6 +49,7 @@ def count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_a @click.option("--species-release-folder", required=True) @click.command() def main(count_ids_script_path, taxonomy_id, assembly_accession, species_release_folder): + logging_config.add_stdout_handler() count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_accession, species_release_folder) diff --git a/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py b/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py index c12e307f6..926effa3f 100644 --- a/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py +++ b/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py @@ -14,6 +14,7 @@ import os import click +from ebi_eva_common_pyutils.logger import logging_config from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle from ebi_eva_internal_pyutils.spring_properties import SpringPropertiesGenerator @@ -67,6 +68,7 @@ def create_release_properties_file_for_assembly(private_config_xml_file, profile @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder): + logging_config.add_stdout_handler() create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder) diff --git a/eva-accession-release-automation/run_release_in_embassy/initiate_release_status_for_assembly.py b/eva-accession-release-automation/run_release_in_embassy/initiate_release_status_for_assembly.py index 2223cde57..25f348411 100644 --- a/eva-accession-release-automation/run_release_in_embassy/initiate_release_status_for_assembly.py +++ b/eva-accession-release-automation/run_release_in_embassy/initiate_release_status_for_assembly.py @@ -15,6 +15,7 @@ import click import logging +from ebi_eva_common_pyutils.logger import logging_config from run_release_in_embassy.release_metadata import update_release_progress_status from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle @@ -42,6 +43,7 @@ def initiate_release_status_for_assembly(private_config_xml_file, profile, relea @click.command() def main(private_config_xml_file, profile, release_species_inventory_table, taxonomy_id, assembly_accession, release_version): + logging_config.add_stdout_handler() initiate_release_status_for_assembly(private_config_xml_file, profile, release_species_inventory_table, taxonomy_id, assembly_accession, release_version) diff --git a/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py b/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py index ad9da4c16..0c45aa7c7 100644 --- a/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py @@ -18,6 +18,7 @@ import os from ebi_eva_common_pyutils.command_utils import run_command_with_output +from ebi_eva_common_pyutils.logger import logging_config from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle from run_release_in_embassy.release_metadata import release_vcf_file_categories, release_text_file_categories, \ get_release_inventory_info_for_assembly @@ -205,6 +206,7 @@ def merge_dbsnp_eva_release_files(private_config_xml_file, profile, bgzip_path, @click.command() def main(private_config_xml_file, profile, bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder): + logging_config.add_stdout_handler() merge_dbsnp_eva_release_files(private_config_xml_file, profile, bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder) diff --git a/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py b/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py index 58829a864..8bc966245 100644 --- a/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py +++ b/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py @@ -46,9 +46,8 @@ def open_mongo_port_to_tempmongo(private_config_xml_file, profile, taxonomy_id, def close_mongo_port_to_tempmongo(port_forwarding_process_id): os.kill(port_forwarding_process_id, signal.SIGTERM) - os.system('echo -e "Killed port forwarding from remote port with signal 1 - SIGTERM. ' - '\\033[31;1;4mIGNORE OS MESSAGE ' # escape sequences for bold red and underlined text - '\'Killed by Signal 1\' in the preceding/following text\\033[0m".') + logger.info('Killed port forwarding from remote port with signal 1 - SIGTERM. ' + 'IGNORE OS MESSAGE \'Killed by Signal 1\' in the preceding/following text.') def get_bgzip_bcftools_index_commands_for_file(bgzip_path, bcftools_path, file): diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py index f6c6b326c..19d6af055 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py @@ -66,6 +66,7 @@ def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, asse @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder, release_jar_path, memory): + logging_config.add_stdout_handler() run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder, release_jar_path, memory) diff --git a/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py b/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py index 436f85fdd..15bbc09ca 100644 --- a/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py @@ -16,6 +16,8 @@ import click from ebi_eva_common_pyutils.command_utils import run_command_with_output +from ebi_eva_common_pyutils.logger import logging_config + from run_release_in_embassy.release_metadata import release_vcf_file_categories, release_text_file_categories from run_release_in_embassy.release_common_utils import get_release_vcf_file_name_genbank, \ get_unsorted_release_vcf_file_name, get_release_text_file_name, get_unsorted_release_text_file_name, \ @@ -66,6 +68,7 @@ def sort_bgzip_index_release_files(bgzip_path, bcftools_path, vcf_sort_script_pa @click.option("--species-release-folder", required=True) @click.command() def main(bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, species_release_folder): + logging_config.add_stdout_handler() sort_bgzip_index_release_files(bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, species_release_folder) diff --git a/eva-accession-release-automation/run_release_in_embassy/update_release_status_for_assembly.py b/eva-accession-release-automation/run_release_in_embassy/update_release_status_for_assembly.py index 3f59450a1..789f75624 100644 --- a/eva-accession-release-automation/run_release_in_embassy/update_release_status_for_assembly.py +++ b/eva-accession-release-automation/run_release_in_embassy/update_release_status_for_assembly.py @@ -15,6 +15,7 @@ import click import logging +from ebi_eva_common_pyutils.logger import logging_config from run_release_in_embassy.release_metadata import update_release_progress_status from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle @@ -41,6 +42,7 @@ def update_release_status_for_assembly(private_config_xml_file, profile, release @click.option("--release-version", help="ex: 2", type=int, required=True) @click.command() def main(private_config_xml_file, profile, release_species_inventory_table, taxonomy_id, assembly_accession, release_version): + logging_config.add_stdout_handler() update_release_status_for_assembly(private_config_xml_file, profile, release_species_inventory_table, taxonomy_id, assembly_accession, release_version) diff --git a/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py b/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py index 68cb1d16f..a15f5c975 100644 --- a/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py +++ b/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py @@ -14,6 +14,7 @@ import click +from ebi_eva_common_pyutils.logger import logging_config from run_release_in_embassy.release_common_utils import get_release_vcf_file_name, get_release_vcf_file_name_genbank from run_release_in_embassy.release_metadata import release_vcf_file_categories @@ -44,6 +45,7 @@ def update_sequence_name(taxonomy_id, assembly_accession, species_release_folder @click.option("--bcftools-path", help="ex: /path/to/bcftools/binary", required=True) @click.command() def main(taxonomy_id, assembly_accession, species_release_folder, sequence_name_converter_path, bcftools_path): + logging_config.add_stdout_handler() update_sequence_name(taxonomy_id, assembly_accession, species_release_folder, sequence_name_converter_path, bcftools_path) diff --git a/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py b/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py index cdf91bd59..b3b543e2b 100644 --- a/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py @@ -15,6 +15,8 @@ import click import os +from ebi_eva_common_pyutils.logger import logging_config + from run_release_in_embassy.release_common_utils import get_release_vcf_file_name_genbank from run_release_in_embassy.release_metadata import get_release_inventory_info_for_assembly, \ release_vcf_file_categories, vcf_validation_output_file_pattern, asm_report_output_file_pattern @@ -79,6 +81,7 @@ def validate_release_vcf_files(private_config_xml_file, profile, taxonomy_id, as @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder, vcf_validator_path, assembly_checker_path): + logging_config.add_stdout_handler() validate_release_vcf_files(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder, vcf_validator_path, assembly_checker_path) diff --git a/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py b/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py index 8ca9e867c..23809e23b 100644 --- a/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py @@ -26,6 +26,8 @@ from ebi_eva_common_pyutils.command_utils import run_command_with_output from ebi_eva_common_pyutils.file_utils import file_diff, FileDiffOption +from ebi_eva_common_pyutils.logger import logging_config + from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo, \ get_release_db_name_in_tempmongo_instance from run_release_in_embassy.copy_accessioning_collections_to_embassy import collections_assembly_attribute_map, \ @@ -498,6 +500,7 @@ def validate_rs_release_files(private_config_xml_file, profile, taxonomy_id, ass @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder): + logging_config.add_stdout_handler() validate_rs_release_files(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, species_release_folder) From 4b173c73226a9477beb4404043492580b5c261d9 Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 6 Jun 2024 16:21:25 +0100 Subject: [PATCH 2/5] Change the top level directory from config to not include the release Pass the assembly_release_folder to run_release_for_assembly.py Python interpreter is now taken form the currently used interpreter --- .../create_release_properties_file.py | 18 ++++++------- .../run_release_for_assembly.nf | 22 +++++++-------- .../run_release_for_assembly.py | 10 +++---- .../run_release_for_species.py | 27 ++++++++++++------- 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py b/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py index 926effa3f..fcd7acfa6 100644 --- a/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py +++ b/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py @@ -23,7 +23,7 @@ def get_release_properties_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, - release_species_inventory_table, release_version, species_release_folder): + release_species_inventory_table, release_version): with get_metadata_connection_handle(profile, private_config_xml_file) as metadata_connection_handle: release_inventory_info_for_assembly = get_release_inventory_info_for_assembly(taxonomy_id, assembly_accession, release_species_inventory_table, @@ -36,13 +36,13 @@ def get_release_properties_for_assembly(private_config_xml_file, profile, taxono def create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, - species_release_folder): - assembly_species_release_folder = os.path.join(species_release_folder, assembly_accession) - os.makedirs(assembly_species_release_folder, exist_ok=True) - output_file = "{0}/{1}_release.properties".format(assembly_species_release_folder, assembly_accession) - release_properties = get_release_properties_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, - release_species_inventory_table, release_version, - species_release_folder) + assembly_release_folder): + os.makedirs(assembly_release_folder, exist_ok=True) + output_file = "{0}/{1}_release.properties".format(assembly_release_folder, assembly_accession) + release_properties = get_release_properties_for_assembly( + private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, + release_version + ) properties_string = SpringPropertiesGenerator(profile, private_config_xml_file).get_release_properties( temp_mongo_db=release_properties['mongo_accessioning_db'], job_name='ACCESSION_RELEASE_JOB', @@ -51,7 +51,7 @@ def create_release_properties_file_for_assembly(private_config_xml_file, profile fasta=release_properties['fasta_path'], assembly_report=release_properties['report_path'], contig_naming='SEQUENCE_NAME', - output_folder=assembly_species_release_folder + output_folder=assembly_release_folder ) open(output_file, "w").write(properties_string) return output_file diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf index c294f025b..8423ed164 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf @@ -22,7 +22,7 @@ process initiate_release_status_for_assembly { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 """ } @@ -39,7 +39,7 @@ process copy_accessioning_collections_to_embassy { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1 """ } @@ -56,7 +56,7 @@ process run_release_for_assembly { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1 """ } @@ -73,7 +73,7 @@ process merge_dbsnp_eva_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -90,7 +90,7 @@ process sort_bgzip_index_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -107,7 +107,7 @@ process validate_release_vcf_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1 """ } @@ -124,7 +124,7 @@ process analyze_vcf_validation_results { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1 """ } @@ -141,7 +141,7 @@ process count_rs_ids_in_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -158,7 +158,7 @@ process validate_rs_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -175,7 +175,7 @@ process update_sequence_names_to_ena { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1 """ } @@ -192,6 +192,6 @@ process update_release_status_for_assembly { script: """ export PYTHONPATH=$params.python_path - $params.executable.python.interpreter -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 """ } diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py index 19d6af055..85b6f3264 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py @@ -27,7 +27,7 @@ def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, - release_species_inventory_table, release_version, species_release_folder, release_jar_path, + release_species_inventory_table, release_version, assembly_release_folder, release_jar_path, memory): exit_code = -1 try: @@ -38,7 +38,7 @@ def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, asse release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, - release_version, species_release_folder) + release_version, assembly_release_folder) release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} -Dspring.data.mongodb.port={3}'\ .format(memory, release_jar_path, release_properties_file, mongo_port) run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command) @@ -60,15 +60,15 @@ def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, asse @click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker", required=False) @click.option("--release-version", help="ex: 2", type=int, required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.option("--release-jar-path", required=True) @click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False) @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, - release_version, species_release_folder, release_jar_path, memory): + release_version, assembly_release_folder, release_jar_path, memory): logging_config.add_stdout_handler() run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, - release_species_inventory_table, release_version, species_release_folder, release_jar_path, + release_species_inventory_table, release_version, assembly_release_folder, release_jar_path, memory) diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py b/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py index 1c9ebc200..7311f25cb 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py @@ -31,10 +31,12 @@ def get_nextflow_params(taxonomy_id, assembly_accession, release_version): - dump_dir = os.path.join(get_species_release_folder(taxonomy_id), 'dumps') - release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession) + dump_dir = os.path.join(get_species_release_folder(release_version, taxonomy_id), 'dumps') + release_dir = get_assembly_release_folder(release_version, taxonomy_id, assembly_accession) config_param = os.path.join(release_dir, f'nextflow_params_{taxonomy_id}_{assembly_accession}.yaml') os.makedirs(dump_dir, exist_ok=True) + # Add the same python interpreter as the one we're using to use with the python step scripts + cfg['executable']['python_interpreter'] = sys.executable yaml_data = { 'assembly': assembly_accession, 'dump_dir': dump_dir, @@ -62,20 +64,25 @@ def get_run_release_for_assembly_nextflow(): return os.path.join(curr_dir, 'run_release_for_assembly.nf') -def get_release_log_file_name(taxonomy_id, assembly_accession): - return f"{get_assembly_release_folder(taxonomy_id, assembly_accession)}/release_{taxonomy_id}_{assembly_accession}.log" +def get_release_log_file_name(release_version, taxonomy_id, assembly_accession): + return f"{get_assembly_release_folder(release_version, taxonomy_id, assembly_accession)}/release_{taxonomy_id}_{assembly_accession}.log" +@lru_cache +def get_release_folder(release_version): + folder = os.path.join(cfg.query('release', 'release_output'), f'release_{release_version}') + os.makedirs(folder, exist_ok=True) + return folder @lru_cache -def get_species_release_folder(taxonomy_id): - folder = os.path.join(cfg.query('release', 'release_output'), get_release_folder_name(taxonomy_id)) +def get_species_release_folder(release_version, taxonomy_id): + folder = os.path.join(get_release_folder(release_version), get_release_folder_name(taxonomy_id)) os.makedirs(folder, exist_ok=True) return folder @lru_cache -def get_assembly_release_folder(taxonomy_id, assembly_accession): - folder = os.path.join(get_species_release_folder(taxonomy_id), assembly_accession) +def get_assembly_release_folder(release_version, taxonomy_id, assembly_accession): + folder = os.path.join(get_species_release_folder(release_version, taxonomy_id), assembly_accession) os.makedirs(folder, exist_ok=True) return folder @@ -93,7 +100,7 @@ def run_release_for_species(taxonomy_id, release_assemblies, release_version, re for assembly_accession in release_assemblies: nextflow_params = get_nextflow_params(taxonomy_id, assembly_accession, release_version) workflow_file_path = get_run_release_for_assembly_nextflow() - release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession) + release_dir = get_assembly_release_folder(release_version, taxonomy_id, assembly_accession) nextflow_config = get_nextflow_config() workflow_command = ' '.join(( f"cd {release_dir} &&", @@ -128,7 +135,7 @@ def load_config(*args): cfg.load_config_file( *args, os.environ.get('RELEASE_CONFIG'), - '~/.release_config.yml' + os.path.expanduser('~/.release_config.yml') ) From e577312fdcd9cb2501111a52801a64926efb5acc Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 6 Jun 2024 16:43:19 +0100 Subject: [PATCH 3/5] add missing release_version for log file name generation --- .../run_release_in_embassy/run_release_for_species.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py b/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py index 7311f25cb..57417710d 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py @@ -42,7 +42,7 @@ def get_nextflow_params(taxonomy_id, assembly_accession, release_version): 'dump_dir': dump_dir, 'executable': cfg['executable'], 'jar': cfg['jar'], - 'log_file': get_release_log_file_name(taxonomy_id, assembly_accession), + 'log_file': get_release_log_file_name(release_version, taxonomy_id, assembly_accession), 'maven': cfg['maven'], 'python_path': os.environ['PYTHONPATH'], 'release_version': release_version, From 055e82a6fee79d3e34524329c4149405d4d5536a Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 6 Jun 2024 17:14:45 +0100 Subject: [PATCH 4/5] replace species folder with assembly folder --- .../analyze_vcf_validation_results.py | 15 +++++------ .../count_rs_ids_in_release_files.py | 14 +++++----- .../create_release_properties_file.py | 6 ++--- .../merge_dbsnp_eva_release_files.py | 26 +++++++++---------- .../release_common_utils.py | 23 ++++++++-------- .../run_release_for_assembly.nf | 16 ++++++------ .../sort_bgzip_index_release_files.py | 20 +++++++------- .../update_sequence_names_to_ena.py | 12 ++++----- .../validate_release_vcf_files.py | 12 ++++----- .../validate_rs_release_files.py | 16 ++++++------ 10 files changed, 79 insertions(+), 81 deletions(-) diff --git a/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py b/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py index 1ea1dd2e0..0d83083eb 100644 --- a/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py +++ b/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py @@ -70,22 +70,21 @@ def analyze_asm_report_files(asm_report_files): return exit_code -def analyze_vcf_validation_results(species_release_folder, assembly_accession): - vcf_validation_report_files = glob.glob("{0}/{1}/{2}".format(species_release_folder, assembly_accession, - vcf_validation_output_file_pattern)) +def analyze_vcf_validation_results(assembly_release_folder, assembly_accession): + vcf_validation_report_files = glob.glob("{0}/{2}".format(assembly_release_folder, assembly_accession, + vcf_validation_output_file_pattern)) exit_code = analyze_vcf_validation_files(vcf_validation_report_files) - asm_report_files = glob.glob("{0}/{1}/{2}".format(species_release_folder, assembly_accession, - asm_report_output_file_pattern)) + asm_report_files = glob.glob("{0}/{2}".format(assembly_release_folder, asm_report_output_file_pattern)) exit_code = exit_code or analyze_asm_report_files(asm_report_files) sys.exit(exit_code) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.option("--assembly-accession", required=True) @click.command() -def main(species_release_folder, assembly_accession): +def main(assembly_release_folder, assembly_accession): logging_config.add_stdout_handler() - analyze_vcf_validation_results(species_release_folder, assembly_accession) + analyze_vcf_validation_results(assembly_release_folder, assembly_accession) if __name__ == '__main__': diff --git a/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py b/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py index d950fa52a..8d75004dc 100644 --- a/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py @@ -22,19 +22,19 @@ from run_release_in_embassy.release_common_utils import get_release_vcf_file_name_genbank, get_release_text_file_name -def count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_accession, species_release_folder): - release_count_filename = os.path.join(species_release_folder, assembly_accession, "README_rs_ids_counts.txt") +def count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_accession, assembly_release_folder): + release_count_filename = os.path.join(assembly_release_folder, assembly_accession, "README_rs_ids_counts.txt") with open(release_count_filename, "w") as release_count_file_handle: release_count_file_handle.write("# Unique RS ID counts\n") for vcf_file_category in release_vcf_file_categories: - release_vcf_file_name = get_release_vcf_file_name_genbank(species_release_folder, taxonomy_id, assembly_accession, + release_vcf_file_name = get_release_vcf_file_name_genbank(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) num_ids_in_file = run_command_with_output("Counting RS IDs in file: " + release_vcf_file_name, "{0} {1}.gz".format(count_ids_script_path, release_vcf_file_name), return_process_output=True) release_count_file_handle.write(num_ids_in_file) for text_release_file_category in release_text_file_categories: - text_release_file_name = get_release_text_file_name(species_release_folder, taxonomy_id, assembly_accession, + text_release_file_name = get_release_text_file_name(assembly_release_folder, taxonomy_id, assembly_accession, text_release_file_category) num_ids_in_file = run_command_with_output("Counting RS IDs in file: " + text_release_file_name, "zcat {0}.gz | cut -f1 | uniq | wc -l" @@ -46,11 +46,11 @@ def count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_a @click.option("--count-ids-script-path", help="ex: /path/to/count/ids/script", required=True) @click.option("--taxonomy-id", help="ex: 9913", required=True) @click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.command() -def main(count_ids_script_path, taxonomy_id, assembly_accession, species_release_folder): +def main(count_ids_script_path, taxonomy_id, assembly_accession, assembly_release_folder): logging_config.add_stdout_handler() - count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_accession, species_release_folder) + count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_accession, assembly_release_folder) if __name__ == "__main__": diff --git a/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py b/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py index fcd7acfa6..0a7a298e6 100644 --- a/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py +++ b/eva-accession-release-automation/run_release_in_embassy/create_release_properties_file.py @@ -64,14 +64,14 @@ def create_release_properties_file_for_assembly(private_config_xml_file, profile @click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker", required=False) @click.option("--release-version", help="ex: 2", type=int, required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, - release_version, species_release_folder): + release_version, assembly_release_folder): logging_config.add_stdout_handler() create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, - species_release_folder) + assembly_release_folder) if __name__ == "__main__": diff --git a/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py b/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py index 0c45aa7c7..47c901404 100644 --- a/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/merge_dbsnp_eva_release_files.py @@ -26,10 +26,10 @@ get_release_vcf_file_name, get_unsorted_release_vcf_file_name, get_unsorted_release_text_file_name -def move_release_files_to_unsorted_category(taxonomy_id, assembly_accession, species_release_folder, vcf_file_category, +def move_release_files_to_unsorted_category(taxonomy_id, assembly_accession, assembly_release_folder, vcf_file_category, unsorted_release_file_path): unsorted_release_file_name = os.path.basename(unsorted_release_file_path) - release_file_path = get_release_vcf_file_name(species_release_folder, taxonomy_id, assembly_accession, + release_file_path = get_release_vcf_file_name(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) release_file_name = os.path.basename(release_file_path) for variant_source in ["eva", "dbsnp"]: @@ -87,17 +87,17 @@ def merge_dbsnp_eva_vcf_headers(file1, file2, output_file): def merge_dbsnp_eva_vcf_files(bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, - species_release_folder, vcf_file_category, data_sources): + assembly_release_folder, vcf_file_category, data_sources): vcf_merge_commands = [] # This is the desired post-merge output file name in the format _.vcf # ex: 60711_GCA_000409795.2_merged_ids.vcf - unsorted_release_file_path = get_unsorted_release_vcf_file_name(species_release_folder, taxonomy_id, assembly_accession, + unsorted_release_file_path = get_unsorted_release_vcf_file_name(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) unsorted_release_file_name = os.path.basename(unsorted_release_file_path) # After release pipeline is run on a species, the default VCF output files are in the formats like below # ex: eva_GCA_000409795.2_merged_ids.vcf and dbsnp_GCA_000409795.2_merged_ids.vcf # Move them to files with _unsorted suffix to avoid confusion - move_release_files_to_unsorted_category(taxonomy_id, assembly_accession, species_release_folder, vcf_file_category, + move_release_files_to_unsorted_category(taxonomy_id, assembly_accession, assembly_release_folder, vcf_file_category, unsorted_release_file_path) dbsnp_vcf_file_pattern = unsorted_release_file_path.replace(unsorted_release_file_name, "dbsnp*_" + unsorted_release_file_name.replace(f'{str(taxonomy_id)}_', '')) @@ -134,10 +134,10 @@ def merge_dbsnp_eva_vcf_files(bgzip_path, bcftools_path, vcf_sort_script_path, t return vcf_merge_commands -def merge_dbsnp_eva_text_files(taxonomy_id, assembly_accession, species_release_folder, text_release_file_category, +def merge_dbsnp_eva_text_files(taxonomy_id, assembly_accession, assembly_release_folder, text_release_file_category, data_sources): text_release_file_merge_commands = [] - unsorted_release_file_path = get_unsorted_release_text_file_name(species_release_folder, taxonomy_id, assembly_accession, + unsorted_release_file_path = get_unsorted_release_text_file_name(assembly_release_folder, taxonomy_id, assembly_accession, text_release_file_category) unsorted_release_file_name = os.path.basename(unsorted_release_file_path) # After release is run on a species, the default text (i.e., non-vcf) output files have ".unsorted.txt" file suffix @@ -174,7 +174,7 @@ def merge_dbsnp_eva_text_files(taxonomy_id, assembly_accession, species_release_ def merge_dbsnp_eva_release_files(private_config_xml_file, profile, bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, - species_release_folder): + assembly_release_folder): with get_metadata_connection_handle(profile, private_config_xml_file) as metadata_connection_handle: release_info = get_release_inventory_info_for_assembly(taxonomy_id, assembly_accession, release_species_inventory_table, @@ -182,10 +182,10 @@ def merge_dbsnp_eva_release_files(private_config_xml_file, profile, bgzip_path, merge_commands = [] for vcf_file_category in release_vcf_file_categories: merge_commands.extend(merge_dbsnp_eva_vcf_files(bgzip_path, bcftools_path, vcf_sort_script_path, - taxonomy_id, assembly_accession, species_release_folder, + taxonomy_id, assembly_accession, assembly_release_folder, vcf_file_category, release_info["sources"])) for text_release_file_category in release_text_file_categories: - merge_commands.extend(merge_dbsnp_eva_text_files(taxonomy_id, assembly_accession, species_release_folder, + merge_commands.extend(merge_dbsnp_eva_text_files(taxonomy_id, assembly_accession, assembly_release_folder, text_release_file_category, release_info["sources"])) final_merge_command = " && ".join(merge_commands) run_command_with_output(f"Merging dbSNP and EVA release files for taxonomy {taxonomy_id} and assembly {assembly_accession}", @@ -202,14 +202,14 @@ def merge_dbsnp_eva_release_files(private_config_xml_file, profile, bgzip_path, @click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker", required=False) @click.option("--release-version", help="ex: 2", type=int, required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.command() def main(private_config_xml_file, profile, bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, - assembly_accession, release_species_inventory_table, release_version, species_release_folder): + assembly_accession, release_species_inventory_table, release_version, assembly_release_folder): logging_config.add_stdout_handler() merge_dbsnp_eva_release_files(private_config_xml_file, profile, bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, - species_release_folder) + assembly_release_folder) if __name__ == "__main__": diff --git a/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py b/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py index 8bc966245..d5017053d 100644 --- a/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py +++ b/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py @@ -56,32 +56,31 @@ def get_bgzip_bcftools_index_commands_for_file(bgzip_path, bcftools_path, file): return commands -def get_release_vcf_file_name(species_release_folder, taxonomy_id, assembly_accession, vcf_file_category): - return os.path.join(species_release_folder, assembly_accession, "{0}_{1}_{2}.vcf".format(taxonomy_id, - assembly_accession, - vcf_file_category)) +def get_release_vcf_file_name(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category): + return os.path.join(assembly_release_folder, "{0}_{1}_{2}.vcf".format(taxonomy_id, assembly_accession, + vcf_file_category)) -def get_release_vcf_file_name_genbank(species_release_folder, taxonomy_id, assembly_accession, vcf_file_category): +def get_release_vcf_file_name_genbank(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category): return os.path.join( - species_release_folder, assembly_accession, + assembly_release_folder, "{0}_{1}_{2}_with_genbank.vcf".format(taxonomy_id, assembly_accession, vcf_file_category) ) -def get_unsorted_release_vcf_file_name(species_release_folder, taxonomy_id, assembly_accession, vcf_file_category): - vcf_file_path = get_release_vcf_file_name(species_release_folder, taxonomy_id, assembly_accession, vcf_file_category) +def get_unsorted_release_vcf_file_name(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category): + vcf_file_path = get_release_vcf_file_name(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) filename = os.path.basename(vcf_file_path) return vcf_file_path.replace(filename, filename.replace(".vcf", "_unsorted.vcf")) -def get_release_text_file_name(species_release_folder, taxonomy_id, assembly_accession, release_text_file_category): - return os.path.join(species_release_folder, assembly_accession, +def get_release_text_file_name(assembly_release_folder, taxonomy_id, assembly_accession, release_text_file_category): + return os.path.join(assembly_release_folder, "{0}_{1}_{2}.txt".format(taxonomy_id, assembly_accession, release_text_file_category)) -def get_unsorted_release_text_file_name(species_release_folder, taxonomy_id, assembly_accession, release_text_file_category): - release_text_file_path = get_release_text_file_name(species_release_folder, taxonomy_id, assembly_accession, +def get_unsorted_release_text_file_name(assembly_release_folder, taxonomy_id, assembly_accession, release_text_file_category): + release_text_file_path = get_release_text_file_name(assembly_release_folder, taxonomy_id, assembly_accession, release_text_file_category) filename = os.path.basename(release_text_file_path) return release_text_file_path.replace(filename, filename.replace(".txt", ".unsorted.txt")) diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf index 8423ed164..7f71d5d5e 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf @@ -56,7 +56,7 @@ process run_release_for_assembly { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1 """ } @@ -73,7 +73,7 @@ process merge_dbsnp_eva_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -90,7 +90,7 @@ process sort_bgzip_index_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --assembly-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -107,7 +107,7 @@ process validate_release_vcf_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1 """ } @@ -124,7 +124,7 @@ process analyze_vcf_validation_results { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.analyze_vcf_validation_results --assembly-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1 """ } @@ -141,7 +141,7 @@ process count_rs_ids_in_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --assembly-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -158,7 +158,7 @@ process validate_rs_release_files { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --assembly-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -175,7 +175,7 @@ process update_sequence_names_to_ena { script: """ export PYTHONPATH=$params.python_path - $params.executable.python_interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1 + $params.executable.python_interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --assembly-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1 """ } diff --git a/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py b/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py index 15bbc09ca..f67900f40 100644 --- a/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/sort_bgzip_index_release_files.py @@ -25,20 +25,20 @@ def sort_bgzip_index_release_files(bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, - species_release_folder): + assembly_release_folder): commands = [] # These files are left behind by the sort_vcf_sorted_chromosomes.sh script # To be idempotent, remove such files - commands.append("rm -f {0}/{1}/*.chromosomes".format(species_release_folder, assembly_accession)) + commands.append("rm -f {0}/*.chromosomes".format(assembly_release_folder)) for vcf_file_category in release_vcf_file_categories: - unsorted_release_file_name = get_unsorted_release_vcf_file_name(species_release_folder, taxonomy_id, + unsorted_release_file_name = get_unsorted_release_vcf_file_name(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) - sorted_release_file_name = get_release_vcf_file_name_genbank(species_release_folder, taxonomy_id, + sorted_release_file_name = get_release_vcf_file_name_genbank(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) if vcf_file_category == 'current_ids': commands.append( f"rm -f {sorted_release_file_name} && " - f"{bcftools_path} sort -T {species_release_folder} -m 2G -o {sorted_release_file_name} " + f"{bcftools_path} sort -T {assembly_release_folder} -m 2G -o {sorted_release_file_name} " f"{unsorted_release_file_name}" ) else: @@ -47,9 +47,9 @@ def sort_bgzip_index_release_files(bgzip_path, bcftools_path, vcf_sort_script_pa sorted_release_file_name)) commands.extend(get_bgzip_bcftools_index_commands_for_file(bgzip_path, bcftools_path, sorted_release_file_name)) for text_release_file_category in release_text_file_categories: - unsorted_release_file_name = get_unsorted_release_text_file_name(species_release_folder, taxonomy_id, + unsorted_release_file_name = get_unsorted_release_text_file_name(assembly_release_folder, taxonomy_id, assembly_accession, text_release_file_category) - sorted_release_file_name = get_release_text_file_name(species_release_folder, taxonomy_id, assembly_accession, + sorted_release_file_name = get_release_text_file_name(assembly_release_folder, taxonomy_id, assembly_accession, text_release_file_category) commands.append("(sort -V {1} | uniq > {2})".format(vcf_sort_script_path, unsorted_release_file_name, @@ -65,12 +65,12 @@ def sort_bgzip_index_release_files(bgzip_path, bcftools_path, vcf_sort_script_pa @click.option("--vcf-sort-script-path", help="ex: /path/to/vcf/sort/script", required=True) @click.option("--taxonomy-id", help="ex: 9913", required=True) @click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.command() -def main(bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, species_release_folder): +def main(bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, assembly_release_folder): logging_config.add_stdout_handler() sort_bgzip_index_release_files(bgzip_path, bcftools_path, vcf_sort_script_path, taxonomy_id, assembly_accession, - species_release_folder) + assembly_release_folder) if __name__ == "__main__": diff --git a/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py b/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py index a15f5c975..b066ab3c4 100644 --- a/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py +++ b/eva-accession-release-automation/run_release_in_embassy/update_sequence_names_to_ena.py @@ -21,12 +21,12 @@ from ebi_eva_common_pyutils.command_utils import run_command_with_output -def update_sequence_name(taxonomy_id, assembly_accession, species_release_folder, sequence_name_converter_path, +def update_sequence_name(taxonomy_id, assembly_accession, assembly_release_folder, sequence_name_converter_path, bcftools_path): for vcf_file_category in release_vcf_file_categories: - release_vcf_file_name = get_release_vcf_file_name_genbank(species_release_folder, taxonomy_id, + release_vcf_file_name = get_release_vcf_file_name_genbank(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) - release_vcf_file_output_name = get_release_vcf_file_name(species_release_folder, taxonomy_id, + release_vcf_file_output_name = get_release_vcf_file_name(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) # Commands run separately so the index isn't attempted if the conversion fails @@ -40,13 +40,13 @@ def update_sequence_name(taxonomy_id, assembly_accession, species_release_folder @click.option("--taxonomy-id", help="ex: 9913", required=True) @click.option("--assembly-accession", help="ex: GCA_000003055.6", required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.option("--sequence-name-converter-path", help="/path/to/vcf/sequence-name-converter", required=True) @click.option("--bcftools-path", help="ex: /path/to/bcftools/binary", required=True) @click.command() -def main(taxonomy_id, assembly_accession, species_release_folder, sequence_name_converter_path, bcftools_path): +def main(taxonomy_id, assembly_accession, assembly_release_folder, sequence_name_converter_path, bcftools_path): logging_config.add_stdout_handler() - update_sequence_name(taxonomy_id, assembly_accession, species_release_folder, sequence_name_converter_path, bcftools_path) + update_sequence_name(taxonomy_id, assembly_accession, assembly_release_folder, sequence_name_converter_path, bcftools_path) if __name__ == "__main__": diff --git a/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py b/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py index b3b543e2b..45ef71e1e 100644 --- a/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py @@ -32,10 +32,10 @@ def remove_index_if_outdated(fasta_path): def validate_release_vcf_files(private_config_xml_file, profile, taxonomy_id, assembly_accession, - release_species_inventory_table, release_version, species_release_folder, + release_species_inventory_table, release_version, assembly_release_folder, vcf_validator_path, assembly_checker_path): run_command_with_output("Remove existing VCF validation and assembly report outputs...", - "rm -f {0}/{1}/{2} {0}/{1}/{3}".format(species_release_folder, assembly_accession, + "rm -f {0}/{1}/{2} {0}/{1}/{3}".format(assembly_release_folder, assembly_accession, vcf_validation_output_file_pattern, asm_report_output_file_pattern)) validate_release_vcf_files_commands = [] @@ -52,7 +52,7 @@ def validate_release_vcf_files(private_config_xml_file, profile, taxonomy_id, as for vcf_file_category in release_vcf_file_categories: - release_vcf_file_name = get_release_vcf_file_name_genbank(species_release_folder, taxonomy_id, + release_vcf_file_name = get_release_vcf_file_name_genbank(assembly_release_folder, taxonomy_id, assembly_accession, vcf_file_category) release_vcf_dir = os.path.dirname(release_vcf_file_name) if "multimap" not in vcf_file_category: @@ -75,15 +75,15 @@ def validate_release_vcf_files(private_config_xml_file, profile, taxonomy_id, as @click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker", required=False) @click.option("--release-version", help="ex: 2", type=int, required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.option("--vcf-validator-path", help="/path/to/vcf/validator/binary", required=True) @click.option("--assembly-checker-path", help="/path/to/assembly/checker/binary", required=True) @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, release_version, - species_release_folder, vcf_validator_path, assembly_checker_path): + assembly_release_folder, vcf_validator_path, assembly_checker_path): logging_config.add_stdout_handler() validate_release_vcf_files(private_config_xml_file, profile, taxonomy_id, assembly_accession, - release_species_inventory_table, release_version, species_release_folder, + release_species_inventory_table, release_version, assembly_release_folder, vcf_validator_path, assembly_checker_path) diff --git a/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py b/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py index 23809e23b..e894be046 100644 --- a/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py @@ -257,8 +257,8 @@ def read_next_batch_of_missing_ids(missing_rs_ids_file_handle): yield lines_read -def get_unique_release_rs_ids(species_release_folder, taxonomy_id, assembly_accession): - folder_prefix = os.path.join(species_release_folder, assembly_accession, f'{taxonomy_id}_{assembly_accession}') +def get_unique_release_rs_ids(assembly_release_folder, taxonomy_id, assembly_accession): + folder_prefix = os.path.join(assembly_release_folder, assembly_accession, f'{taxonomy_id}_{assembly_accession}') active_rs_ids_file = folder_prefix + "_current_ids_with_genbank.vcf.gz" merged_rs_ids_file = folder_prefix + "_merged_ids_with_genbank.vcf.gz" multimap_rs_ids_file = folder_prefix + "_multimap_ids_with_genbank.vcf.gz" @@ -460,7 +460,7 @@ def export_unique_rs_ids_from_mongo(mongo_database_handle, taxonomy_id, assembly def validate_rs_release_files(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, - release_version, species_release_folder): + release_version, assembly_release_folder): port_forwarding_process_id, mongo_port, exit_code = None, None, -1 try: port_forwarding_process_id, mongo_port = open_mongo_port_to_tempmongo(private_config_xml_file, profile, taxonomy_id, @@ -469,10 +469,10 @@ def validate_rs_release_files(private_config_xml_file, profile, taxonomy_id, ass db_name_in_tempmongo_instance = get_release_db_name_in_tempmongo_instance(taxonomy_id, assembly_accession) with MongoClient(port=mongo_port) as client: db_handle = client[db_name_in_tempmongo_instance] - mongo_unique_rs_ids_file = os.path.join(species_release_folder, assembly_accession, + mongo_unique_rs_ids_file = os.path.join(assembly_release_folder, assembly_accession, "{0}_mongo_unique_rs_ids.txt".format(assembly_accession)) export_unique_rs_ids_from_mongo(db_handle, taxonomy_id, assembly_accession, mongo_unique_rs_ids_file) - unique_release_rs_ids_file = get_unique_release_rs_ids(species_release_folder, taxonomy_id, + unique_release_rs_ids_file = get_unique_release_rs_ids(assembly_release_folder, taxonomy_id, assembly_accession) missing_rs_ids_file = os.path.join(os.path.dirname(unique_release_rs_ids_file), assembly_accession + "_missing_ids.txt") @@ -496,13 +496,13 @@ def validate_rs_release_files(private_config_xml_file, profile, taxonomy_id, ass @click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker", required=False) @click.option("--release-version", help="ex: 2", type=int, required=True) -@click.option("--species-release-folder", required=True) +@click.option("--assembly-release-folder", required=True) @click.command() def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table, - release_version, species_release_folder): + release_version, assembly_release_folder): logging_config.add_stdout_handler() validate_rs_release_files(private_config_xml_file, profile, taxonomy_id, assembly_accession, - release_species_inventory_table, release_version, species_release_folder) + release_species_inventory_table, release_version, assembly_release_folder) if __name__ == '__main__': From 849193e64eaed85eca6c8d9d0bddf29da132dd27 Mon Sep 17 00:00:00 2001 From: tcezard Date: Fri, 7 Jun 2024 08:26:30 +0100 Subject: [PATCH 5/5] replace species folder with assembly folder --- .../run_release_in_embassy/analyze_vcf_validation_results.py | 4 ++-- .../run_release_in_embassy/count_rs_ids_in_release_files.py | 2 +- .../run_release_in_embassy/validate_release_vcf_files.py | 5 ++--- .../run_release_in_embassy/validate_rs_release_files.py | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py b/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py index 0d83083eb..c022fd0e1 100644 --- a/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py +++ b/eva-accession-release-automation/run_release_in_embassy/analyze_vcf_validation_results.py @@ -71,10 +71,10 @@ def analyze_asm_report_files(asm_report_files): def analyze_vcf_validation_results(assembly_release_folder, assembly_accession): - vcf_validation_report_files = glob.glob("{0}/{2}".format(assembly_release_folder, assembly_accession, + vcf_validation_report_files = glob.glob("{0}/{1}".format(assembly_release_folder, vcf_validation_output_file_pattern)) exit_code = analyze_vcf_validation_files(vcf_validation_report_files) - asm_report_files = glob.glob("{0}/{2}".format(assembly_release_folder, asm_report_output_file_pattern)) + asm_report_files = glob.glob("{0}/{1}".format(assembly_release_folder, asm_report_output_file_pattern)) exit_code = exit_code or analyze_asm_report_files(asm_report_files) sys.exit(exit_code) diff --git a/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py b/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py index 8d75004dc..6d0712902 100644 --- a/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/count_rs_ids_in_release_files.py @@ -23,7 +23,7 @@ def count_rs_ids_in_release_files(count_ids_script_path, taxonomy_id, assembly_accession, assembly_release_folder): - release_count_filename = os.path.join(assembly_release_folder, assembly_accession, "README_rs_ids_counts.txt") + release_count_filename = os.path.join(assembly_release_folder, "README_rs_ids_counts.txt") with open(release_count_filename, "w") as release_count_file_handle: release_count_file_handle.write("# Unique RS ID counts\n") for vcf_file_category in release_vcf_file_categories: diff --git a/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py b/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py index 45ef71e1e..0cb11fd46 100644 --- a/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/validate_release_vcf_files.py @@ -35,9 +35,8 @@ def validate_release_vcf_files(private_config_xml_file, profile, taxonomy_id, as release_species_inventory_table, release_version, assembly_release_folder, vcf_validator_path, assembly_checker_path): run_command_with_output("Remove existing VCF validation and assembly report outputs...", - "rm -f {0}/{1}/{2} {0}/{1}/{3}".format(assembly_release_folder, assembly_accession, - vcf_validation_output_file_pattern, - asm_report_output_file_pattern)) + "rm -f {0}/{1} {0}/{2}".format(assembly_release_folder, vcf_validation_output_file_pattern, + asm_report_output_file_pattern)) validate_release_vcf_files_commands = [] with get_metadata_connection_handle(profile, private_config_xml_file) as metadata_connection_handle: release_inventory_info_for_assembly = get_release_inventory_info_for_assembly(taxonomy_id, assembly_accession, diff --git a/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py b/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py index e894be046..8a1aafcf7 100644 --- a/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py +++ b/eva-accession-release-automation/run_release_in_embassy/validate_rs_release_files.py @@ -258,7 +258,7 @@ def read_next_batch_of_missing_ids(missing_rs_ids_file_handle): def get_unique_release_rs_ids(assembly_release_folder, taxonomy_id, assembly_accession): - folder_prefix = os.path.join(assembly_release_folder, assembly_accession, f'{taxonomy_id}_{assembly_accession}') + folder_prefix = os.path.join(assembly_release_folder, f'{taxonomy_id}_{assembly_accession}') active_rs_ids_file = folder_prefix + "_current_ids_with_genbank.vcf.gz" merged_rs_ids_file = folder_prefix + "_merged_ids_with_genbank.vcf.gz" multimap_rs_ids_file = folder_prefix + "_multimap_ids_with_genbank.vcf.gz" @@ -469,7 +469,7 @@ def validate_rs_release_files(private_config_xml_file, profile, taxonomy_id, ass db_name_in_tempmongo_instance = get_release_db_name_in_tempmongo_instance(taxonomy_id, assembly_accession) with MongoClient(port=mongo_port) as client: db_handle = client[db_name_in_tempmongo_instance] - mongo_unique_rs_ids_file = os.path.join(assembly_release_folder, assembly_accession, + mongo_unique_rs_ids_file = os.path.join(assembly_release_folder, "{0}_mongo_unique_rs_ids.txt".format(assembly_accession)) export_unique_rs_ids_from_mongo(db_handle, taxonomy_id, assembly_accession, mongo_unique_rs_ids_file) unique_release_rs_ids_file = get_unique_release_rs_ids(assembly_release_folder, taxonomy_id,