Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/ensembl/production/metadata/api/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.


class MetaException(Exception):
"""Base Metadata API Exception class"""
pass
Expand Down Expand Up @@ -43,3 +44,7 @@ class MissingMetaException(MetaException, RuntimeError):
class UpdateBackCoreException(UpdaterException, RuntimeError):
"""An error occurred while updating back the core database"""
pass

class TypeNotFoundException(UpdaterException, RuntimeError):
"""Dataset Type not found"""
pass
39 changes: 30 additions & 9 deletions src/ensembl/production/metadata/api/models/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sqlalchemy import Column, Integer, String, ForeignKey
from sqlalchemy.dialects.mysql import DATETIME, TINYINT
from sqlalchemy.orm import relationship

from ensembl.production.metadata.api.exceptions import *
from ensembl.production.metadata.api.models.base import Base, LoadAble


Expand Down Expand Up @@ -54,20 +54,41 @@ def get_public_path(self, type='all', release=None):

genebuild_source_name = genebuild_annotation_source_attribute.value
common_path = f"{self.organism.scientific_name.replace(' ', '_')}/{self.assembly.accession}/{genebuild_source_name}"
unique_dataset_types = {gd.dataset.dataset_type.name for gd in self.genome_datasets}

if 'regulatory_features' in unique_dataset_types or 'regulation_build' in unique_dataset_types:
unique_dataset_types.discard('regulatory_features')
unique_dataset_types.discard('regulation_build')
unique_dataset_types.add('regulation')
if 'regulatory_features' == type or 'regulation_build' == type:
type = 'regulation'

if type in ['genebuild', 'assembly', 'homology', 'regulation', 'variation', 'all']:
if type in unique_dataset_types or type == 'all':
if type == 'genebuild':
paths.append(f"{common_path}/genebuild/{genebuild_dataset.dataset.version}")
elif type == 'assembly':
paths.append(f"{common_path}/genome")
elif type in ['homology', 'regulation', 'variation']:
paths.append(f"{common_path}/{type}")
elif type == 'homologies':
paths.append(f"{common_path}/homology")
elif type == 'regulation':
paths.append(f"{common_path}/regulation")
elif type == 'variation':
paths.append(f"{common_path}/variation")
elif type == 'all':
# Add paths for all types
for t in ['genebuild', 'assembly', 'homology', 'regulation', 'variation']:
paths.extend(self.get_public_path(type=t))
return paths

for t in unique_dataset_types:
if t == 'genebuild':
paths.append(f"{common_path}/genebuild/{genebuild_dataset.dataset.version}")
elif t == 'assembly':
paths.append(f"{common_path}/genome")
elif t == 'homologies':
paths.append(f"{common_path}/homology")
elif t in ['regulation', 'variation']:
paths.append(f"{common_path}/{t}")
else:
raise TypeNotFoundException(f"Dataset Type : {type} has no associated path. ")
return paths
else:
raise TypeNotFoundException(f"Dataset Type : {type} not found in metadata. ")


class GenomeDataset(LoadAble, Base):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@
36 1068ba70-0088-4927-98bd-8fabcfb9a384 4 evidence \N 2023-06-02 13:32:52 10 Manual Add Submitted
38 47d54c33-80d6-4174-8620-52b6c8506db2 6 homologies \N 2023-06-02 13:32:52 11 Manual Add Submitted
42 ea044d8e-33f1-4c9f-9b9f-8c0bd1dcf642 6 homologies \N 2023-06-02 13:32:52 11 Manual Add Submitted
46 385f1ec2-bd06-40ce-873a-98e199f10505 1 asssembly \N 2023-08-18 12:22:34 13 GCA_000001735.1 Submitted
46 385f1ec2-bd06-40ce-873a-98e199f10505 1 asssembly \N 2023-08-18 12:22:34 13 GCA_000001735.1 Submitted
47 385f1ec2-bd06-40ce-873a-98e199f10534 5 regulation_build \N 2023-08-18 12:22:34 13 GCA_000001735.1 Submitted
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@
57 44 7 \N 0
58 45 7 \N 0
59 46 9 \N 0
60 47 6 1 1
25 changes: 24 additions & 1 deletion src/ensembl/production/metadata/updater/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,6 @@ def get_or_new_assembly(self, species_id, meta_session, source=None, existing=No
assembly_default=self.get_meta_single_meta_key(species_id, "assembly.default"),
tol_id=tol_id,
created=func.now(),
ensembl_name=self.get_meta_single_meta_key(species_id, "assembly.name"),
assembly_uuid=str(uuid.uuid4()),
url_name=self.get_meta_single_meta_key(species_id, "assembly.url_name"),
is_reference=is_reference
Expand Down Expand Up @@ -540,7 +539,31 @@ def get_or_new_genebuild(self, species_id, meta_session, source=None, existing=F
dataset_source = source

dataset_type = meta_session.query(DatasetType).filter(DatasetType.name == "genebuild").first()

genebuild_start_date = self.get_meta_single_meta_key(species_id, "genebuild.start_date")
genebuild_provider_name = self.get_meta_single_meta_key(species_id, "genebuild.provider_name")

test_status = meta_session.query(Dataset).filter(Dataset.label == genebuild_accession).one_or_none()
if test_status:
# Check for genebuild.provider_name
provider_name_check = meta_session.query(DatasetAttribute).join(Attribute).filter(
DatasetAttribute.dataset_id == test_status.dataset_id,
Attribute.name == "genebuild.provider_name",
DatasetAttribute.value == genebuild_provider_name
).one_or_none()

if provider_name_check:
# Check for genebuild.start_date
start_date_check = meta_session.query(DatasetAttribute).join(Attribute).filter(
DatasetAttribute.dataset_id == test_status.dataset_id,
Attribute.name == "genebuild.start_date",
DatasetAttribute.value == genebuild_start_date
).one_or_none()

if start_date_check is None:
test_status = None


if test_status is not None and existing is False:
genebuild_dataset = test_status
genebuild_dataset_attributes = genebuild_dataset.dataset_attributes
Expand Down
4 changes: 3 additions & 1 deletion src/tests/databases/core_1/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
21 1 assembly.test_value test
22 1 genebuild.test_value test
22 1 genebuild.test_value test
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 3 additions & 1 deletion src/tests/databases/core_2/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@
18 1 sample.gene_param ENSAMXG00005000318
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
21 1 genome.genome_uuid test
21 1 genome.genome_uuid test
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 3 additions & 1 deletion src/tests/databases/core_3/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@
17 1 genebuild.version 1
18 1 sample.gene_param ENSAMXG00005000318
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
20 1 strain.type test
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 3 additions & 1 deletion src/tests/databases/core_4/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@
17 1 genebuild.version 2
18 1 sample.gene_param ENSAMXG00005000318
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
20 1 strain.type test
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
2 changes: 2 additions & 0 deletions src/tests/databases/core_5/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@
18 1 sample.gene_param ENSAMXG00005000318
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 3 additions & 1 deletion src/tests/databases/core_6/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@
18 1 sample.gene_param ENSAMXG00005000318
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
21 1 genome.genome_uuid 90720316-006c-470b-a7dd-82d28f952264
21 1 genome.genome_uuid 90720316-006c-470b-a7dd-82d28f952264
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 3 additions & 1 deletion src/tests/databases/core_7/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
21 1 assembly.test_value test2
22 1 genebuild.test_value test2
22 1 genebuild.test_value test2
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 3 additions & 1 deletion src/tests/databases/core_8/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@
19 1 sample.location_param KB871578.1:9766653-9817473
20 1 strain.type test
21 1 assembly.test_value test
22 1 genebuild.test_value test
22 1 genebuild.test_value test
23 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 3 additions & 1 deletion src/tests/databases/core_9/meta.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@
20 1 strain.type test
21 1 assembly.test_value test3
22 1 genebuild.test_value test3
23 1 genome.genome_uuid a733550b-93e7-11ec-a39d-005056b38ce3
23 1 genome.genome_uuid a733550b-93e7-11ec-a39d-005056b38ce3
24 1 genebuild.provider_name test
24 1 genebuild.start_date test
4 changes: 2 additions & 2 deletions src/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ def test_get_public_path_genebuild(self, multi_dbs):
assert path[0] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/test_anno_source/genome'
path = genome.get_public_path(type='variation')
assert path[0] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/test_anno_source/variation'
path = genome.get_public_path(type='homology')
path = genome.get_public_path(type='homologies')
assert path[0] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/test_anno_source/homology'
path = genome.get_public_path(type='regulation')
path = genome.get_public_path(type='regulatory_features')
assert path[0] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/test_anno_source/regulation'

def test_organism_ensembl_name_compat(self, multi_dbs):
Expand Down