Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions src/ensembl/production/metadata/api/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
from sqlalchemy.sql import func
import datetime
import uuid
import logging

from ensembl.production.metadata.api.exceptions import MissingMetaException
from ensembl.production.metadata.api.models.base import Base, LoadAble


logger = logging.getLogger(__name__)

class Attribute(LoadAble, Base):
__tablename__ = 'attribute'
Expand All @@ -35,6 +36,7 @@ class Attribute(LoadAble, Base):
# many to one relationships
# none


class Dataset(LoadAble, Base):
__tablename__ = 'dataset'

Expand All @@ -50,7 +52,8 @@ class Dataset(LoadAble, Base):

# One to many relationships
# dataset_id to dataset attribute and genome dataset
dataset_attributes = relationship("DatasetAttribute", back_populates='dataset', cascade="all, delete, delete-orphan")
dataset_attributes = relationship("DatasetAttribute", back_populates='dataset',
cascade="all, delete, delete-orphan")
genome_datasets = relationship("GenomeDataset", back_populates='dataset', cascade="all, delete, delete-orphan")
# many to one relationships
# dataset_type_id to dataset_type
Expand All @@ -66,12 +69,14 @@ def genebuild_version(self):

return next(
(att.value for att in self.dataset_attributes if att.attribute.name == 'genebuild.last_geneset_update'),
next((att.value for att in self.dataset_attributes if att.attribute.name == 'genebuild.start_date'), None))
next((att.value for att in self.dataset_attributes if att.attribute.name == 'genebuild.start_date'),
None))
else:
# return Related genebuild version
logger.debug(F"Related datasets! : {self.genome_datasets.datasets}")
genebuild_ds = next(
(dataset for dataset in self.genome_datasets.datasets if dataset.dataset_type.name == 'genebuild'), None)
(dataset for dataset in self.genome_datasets.datasets if dataset.dataset_type.name == 'genebuild'),
None)
if genebuild_ds:
return genebuild_ds.genebuild_version
else:
Expand Down Expand Up @@ -125,4 +130,3 @@ class DatasetType(LoadAble, Base):
datasets = relationship('Dataset', back_populates='dataset_type')
# many to one relationships
# none

6 changes: 2 additions & 4 deletions src/ensembl/production/metadata/api/models/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ def get_public_path(self, dataset_type='all', release=None):
da.attribute.name == "genebuild.annotation_source"),
'ensembl')
# Genebuild version is either the laste_geneset_update or the start_date if not specified.
genebuild_version = next(
(da.value for da in genebuild_dataset.dataset_attributes if
da.attribute.name == "genebuild.version"), genebuild_dataset.version)
try:
genebuild_version = re.sub(r"[^\w\s]", '', re.sub(r"\s+", '_', genebuild_version))
match = re.match(r'^(\d{4}-\d{2})', genebuild_dataset.genebuild_version)
genebuild_version = match.group(1).replace('-', '_')
except TypeError as e:
logger.fatal(f"For genome {self.genome_uuid}, can't find genebuild_version directory")
raise RuntimeError(e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12587,7 +12587,7 @@
14999 6600 30 402
14965 1.00 31 402
14984 2018-10 33 402
14985 2018-10 34 402
14985 2018-10-Ensembl 34 402
15010 toplevel 35 402
14980 14733 36 402
15008 import 37 402
Expand Down
12 changes: 6 additions & 6 deletions src/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ def test_get_public_path(self, multi_dbs):
assert len(paths) == 4
# assert all("/genebuild/" in path for path in paths)
path = genome.get_public_path(dataset_type='genebuild')
assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/genebuild/EXT01'
assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/genebuild/2018_10'
path = genome.get_public_path(dataset_type='assembly')
assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/genome'
path = genome.get_public_path(dataset_type='variation')
assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/variation/EXT01'
assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/variation/2018_10'
path = genome.get_public_path(dataset_type='homologies')
assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/homology/EXT01'
assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/homology/2018_10'
with pytest.raises(TypeNotFoundException):
genome.get_public_path(dataset_type='regulatory_features')
# assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/regulation'
Expand All @@ -58,13 +58,13 @@ def test_default_public_path(self, multi_dbs):
assert len(paths) == 5
# assert all("/genebuild/" in path for path in paths)
path = genome.get_public_path(dataset_type='genebuild')
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/genebuild/GENCODE44'
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/genebuild/2023_03'
path = genome.get_public_path(dataset_type='assembly')
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/genome'
path = genome.get_public_path(dataset_type='variation')
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/variation/GENCODE44'
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/variation/2023_03'
path = genome.get_public_path(dataset_type='homologies')
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/homology/GENCODE44'
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/homology/2023_03'
path = genome.get_public_path(dataset_type='regulatory_features')
assert path[0]['path'] == 'Homo_sapiens/GCA_000001405.29/ensembl/regulation'

Expand Down