From f027907ecece9a4097bae2a49f9423d0cbee35b1 Mon Sep 17 00:00:00 2001 From: danielp Date: Wed, 20 Mar 2024 15:18:39 +0000 Subject: [PATCH 1/4] Minor Fix for taxid scope --- src/ensembl/production/metadata/updater/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index 39023688..ae5ea186 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -290,8 +290,8 @@ def get_or_new_organism(self, species_id, meta_session): # Getting the common name from the meta table, otherwise we grab it from ncbi. common_name = self.get_meta_single_meta_key(species_id, "species.common_name") + taxid = self.get_meta_single_meta_key(species_id, "species.taxonomy_id") if common_name is None: - taxid = self.get_meta_single_meta_key(species_id, "species.taxonomy_id") with self.taxonomy_db.session_scope() as session: common_name = session.query(NCBITaxaName).filter( From c8ac2b6050c6d0b9ba2a526397ec2e24bb329dca Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 21 Mar 2024 15:32:34 +0000 Subject: [PATCH 2/4] Bugfix for new_homology --- src/ensembl/production/metadata/updater/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index ae5ea186..cdfbec21 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -585,9 +585,9 @@ def get_or_new_genebuild(self, species_id, meta_session, source=None, existing=F def new_homology(self, meta_session, species_id, genome=None, source=None, dataset_attributes=None, version="1.0"): if source is None: production_name = self.get_meta_single_meta_key(species_id, "species.production_name") - db_version = self.get_meta_single_meta_key(species_id, "schema_version") + db_version = self.get_meta_single_meta_key(None, "schema_version") compara_name = production_name + "_compara_" + db_version - dataset_source = self.get_or_new_source(meta_session, "compara", name="compara_name") + dataset_source = self.get_or_new_source(meta_session, "compara", name=compara_name) else: dataset_source = source dataset_type = meta_session.query(DatasetType).filter(DatasetType.name == "homologies").first() From 3fd7dd6da9b5c1ae6d07d4a42ed9629121583813 Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 21 Mar 2024 15:47:15 +0000 Subject: [PATCH 3/4] fixed tests --- src/tests/databases/core_1/meta.txt | 2 +- src/tests/databases/core_2/meta.txt | 2 +- src/tests/databases/core_3/meta.txt | 2 +- src/tests/databases/core_4/meta.txt | 2 +- src/tests/databases/core_5/meta.txt | 2 +- src/tests/databases/core_6/meta.txt | 2 +- src/tests/databases/core_7/meta.txt | 2 +- src/tests/databases/core_8/meta.txt | 2 +- src/tests/databases/core_9/meta.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tests/databases/core_1/meta.txt b/src/tests/databases/core_1/meta.txt index bcf484ff..6d875fc3 100644 --- a/src/tests/databases/core_1/meta.txt +++ b/src/tests/databases/core_1/meta.txt @@ -21,4 +21,4 @@ 23 1 genebuild.provider_name test 24 1 genebuild.start_date 2023-07-Ensembl 25 1 assembly.alt_accession GCA_0000012345.3 -26 1 schema_version 110 \ No newline at end of file +26 \N schema_version 110 \ No newline at end of file diff --git a/src/tests/databases/core_2/meta.txt b/src/tests/databases/core_2/meta.txt index 0743014a..c066f909 100644 --- a/src/tests/databases/core_2/meta.txt +++ b/src/tests/databases/core_2/meta.txt @@ -21,4 +21,4 @@ 21 1 genome.genome_uuid test 23 1 genebuild.provider_name test2 24 1 genebuild.start_date 2023-07-Ensembl -25 1 schema_version 110 \ No newline at end of file +25 \N schema_version 110 \ No newline at end of file diff --git a/src/tests/databases/core_3/meta.txt b/src/tests/databases/core_3/meta.txt index daca8ee4..41cf1027 100644 --- a/src/tests/databases/core_3/meta.txt +++ b/src/tests/databases/core_3/meta.txt @@ -19,4 +19,4 @@ 20 1 strain.type test 23 1 genebuild.provider_name test 24 1 genebuild.start_date 2023-07-Ensembl -25 1 schema_version 110 \ No newline at end of file +25 \N schema_version 110 \ No newline at end of file diff --git a/src/tests/databases/core_4/meta.txt b/src/tests/databases/core_4/meta.txt index cf7d2852..409cd9f8 100644 --- a/src/tests/databases/core_4/meta.txt +++ b/src/tests/databases/core_4/meta.txt @@ -20,4 +20,4 @@ 20 1 strain.type test 23 1 genebuild.provider_name test 24 1 genebuild.start_date 2023-07-Ensembl -25 1 schema_version 110 \ No newline at end of file +25 \N schema_version 110 \ No newline at end of file diff --git a/src/tests/databases/core_5/meta.txt b/src/tests/databases/core_5/meta.txt index 0576cf78..c30703d5 100644 --- a/src/tests/databases/core_5/meta.txt +++ b/src/tests/databases/core_5/meta.txt @@ -16,4 +16,4 @@ 20 1 strain.type test 23 1 genebuild.provider_name removed_for_test 24 1 genebuild.start_date 2023-07-Ensembl -25 1 schema_version 110 \ No newline at end of file +25 \N schema_version 110 \ No newline at end of file diff --git a/src/tests/databases/core_6/meta.txt b/src/tests/databases/core_6/meta.txt index 3a6a78f0..0106f13d 100644 --- a/src/tests/databases/core_6/meta.txt +++ b/src/tests/databases/core_6/meta.txt @@ -21,4 +21,4 @@ 21 1 genome.genome_uuid 90720316-006c-470b-a7dd-82d28f952264 23 1 genebuild.provider_name test 24 1 genebuild.start_date 2023-07-Ensembl -25 1 schema_version 110 \ No newline at end of file +25 \N schema_version 110 \ No newline at end of file diff --git a/src/tests/databases/core_7/meta.txt b/src/tests/databases/core_7/meta.txt index 7c285648..579ae4de 100644 --- a/src/tests/databases/core_7/meta.txt +++ b/src/tests/databases/core_7/meta.txt @@ -21,5 +21,5 @@ 23 1 genebuild.provider_name testfornamenew 24 1 genebuild.start_date 2023-08-Ensembl 25 1 genebuild.havana_datafreeze_date test2 -26 1 schema_version 110 +26 \N schema_version 110 27 1 assembly.total_coding_sequence_length 8989 \ No newline at end of file diff --git a/src/tests/databases/core_8/meta.txt b/src/tests/databases/core_8/meta.txt index bfe90127..7272c90f 100644 --- a/src/tests/databases/core_8/meta.txt +++ b/src/tests/databases/core_8/meta.txt @@ -20,4 +20,4 @@ 20 1 strain.type test 23 1 genebuild.provider_name test 24 1 genebuild.start_date 2023-07-Ensembl -25 1 schema_version 110 \ No newline at end of file +25 \N schema_version 110 \ No newline at end of file diff --git a/src/tests/databases/core_9/meta.txt b/src/tests/databases/core_9/meta.txt index 11f69da5..1818c24c 100644 --- a/src/tests/databases/core_9/meta.txt +++ b/src/tests/databases/core_9/meta.txt @@ -22,4 +22,4 @@ 24 1 genebuild.havana_datafreeze_date test2 25 1 assembly.total_genome_length 546 26 1 genebuild.start_date 2023-07-Ensembl -27 1 schema_version 110 +27 \N schema_version 110 From 3d69fdcea831e7b1e4bddf399aaaccdeb50b1a15 Mon Sep 17 00:00:00 2001 From: danielp Date: Fri, 22 Mar 2024 10:17:40 +0000 Subject: [PATCH 4/4] Added fix for common name --- src/ensembl/production/metadata/updater/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index cdfbec21..bf72bee8 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -291,7 +291,7 @@ def get_or_new_organism(self, species_id, meta_session): # Getting the common name from the meta table, otherwise we grab it from ncbi. common_name = self.get_meta_single_meta_key(species_id, "species.common_name") taxid = self.get_meta_single_meta_key(species_id, "species.taxonomy_id") - if common_name is None: + if common_name is None or common_name == "": with self.taxonomy_db.session_scope() as session: common_name = session.query(NCBITaxaName).filter(