Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 25 additions & 21 deletions src/ensembl/production/metadata/api/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, organism_uuid=None, as

# Apply additional filters based on the provided parameters
if unreleased_only:
genome_select = genome_select.filter(Genome.genome_releases is None)
genome_select = genome_select.filter(~Genome.genome_releases.any())

# These options are in order of decreasing specificity,
# and thus the ones later in the list can be redundant.
Expand Down Expand Up @@ -216,7 +216,6 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, organism_uuid=None, as
# current_only will be executed only if none of the condition above are met
genome_select = genome_select.filter(GenomeRelease.is_current == 1)

# print(f"genome_select query ====> {str(genome_select)}")
with self.metadata_db.session_scope() as session:
session.expire_on_commit = False
return session.execute(genome_select.order_by("ensembl_name")).all()
Expand Down Expand Up @@ -389,9 +388,10 @@ def fetch_sequences_by_assembly_accession(
assembly_accession=assembly_accession, chromosomal_only=chromosomal_only
)


def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, organism_uuid=None, unreleased_datasets=False,
dataset_uuid=None, dataset_name=None, dataset_source=None, dataset_type=None,
release_version=None):
release_version=None, dataset_attributes=None):
"""
Fetches genome datasets based on the provided parameters.

Expand All @@ -405,6 +405,7 @@ def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, organism_uuid=
dataset_source (str or None): Dataset source to filter by.
dataset_type (str or None): Dataset type to filter by.
release_version (float or None): EnsemblRelease version to filter by.
dataset_attributes (bool): Flag to include dataset attributes

Returns:
List[Tuple[
Expand Down Expand Up @@ -433,15 +434,11 @@ def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, organism_uuid=
Dataset,
DatasetType,
DatasetSource,
DatasetAttribute,
Attribute
).select_from(Genome) \
.join(GenomeDataset, Genome.genome_id == GenomeDataset.genome_id) \
.join(Dataset, GenomeDataset.dataset_id == Dataset.dataset_id) \
.join(DatasetType, Dataset.dataset_type_id == DatasetType.dataset_type_id) \
.join(DatasetSource, Dataset.dataset_source_id == DatasetSource.dataset_source_id) \
.join(DatasetAttribute, DatasetAttribute.dataset_id == Dataset.dataset_id) \
.join(Attribute, Attribute.attribute_id == DatasetAttribute.attribute_id)
.join(DatasetSource, Dataset.dataset_source_id == DatasetSource.dataset_source_id)

# set default group topic as 'assembly' to fetch unique datasource
if not dataset_name:
Expand All @@ -468,7 +465,7 @@ def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, organism_uuid=
genome_select = genome_select.filter(Dataset.dataset_uuid.in_(dataset_uuid))

if unreleased_datasets:
genome_select = genome_select.filter(GenomeDataset.ensembl_release is None)
genome_select = genome_select.filter(~GenomeDataset.ensembl_release.has())

if dataset_name is not None and "all" not in dataset_name:
genome_select = genome_select.filter(DatasetType.name.in_(dataset_name))
Expand All @@ -479,6 +476,10 @@ def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, organism_uuid=
if dataset_type is not None:
genome_select = genome_select.filter(DatasetType.name.in_(dataset_type))

if dataset_attributes:
genome_select = genome_select.add_columns(DatasetAttribute,Attribute).join(DatasetAttribute, DatasetAttribute.dataset_id == Dataset.dataset_id) \
.join(Attribute, Attribute.attribute_id == DatasetAttribute.attribute_id)

with self.metadata_db.session_scope() as session:
# This is needed in order to ovoid tests throwing:
# sqlalchemy.orm.exc.DetachedInstanceError: Instance <DatasetType at 0x7fc5c05a13d0>
Expand All @@ -487,19 +488,19 @@ def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, organism_uuid=
session.expire_on_commit = False
# copy genome_select as we don't want to include GenomeDataset
# because it results in multiple row for a given genome (genome can have many datasets)
prep_query = genome_select.filter(GenomeDataset.ensembl_release is not None)
is_genome_released = session.execute(prep_query).first()

if is_genome_released:
# Include release related info if released_only is True
genome_select = genome_select.add_columns(EnsemblRelease) \
if not unreleased_datasets:

prep_query = genome_select.filter(GenomeDataset.ensembl_release.has())
is_genome_released = session.execute(prep_query).first()

if is_genome_released:
# Include release related info if released_only is True
genome_select = genome_select.add_columns(EnsemblRelease) \
.join(EnsemblRelease, GenomeDataset.release_id == EnsemblRelease.release_id)

if release_version:
genome_select = genome_select.filter(EnsemblRelease.version <= release_version)

# print(f"genome_select str ====> {str(genome_select)}")
logger.debug(genome_select)
if release_version:
genome_select = genome_select.filter(EnsemblRelease.version <= release_version)

return session.execute(genome_select).all()

Expand All @@ -516,7 +517,9 @@ def fetch_genomes_info(
group_type=None,
unreleased_datasets=False,
dataset_name=None,
dataset_source=None
dataset_source=None,
dataset_attributes=True,

):
try:
genome_id = check_parameter(genome_id)
Expand Down Expand Up @@ -549,7 +552,8 @@ def fetch_genomes_info(
genome_uuid=genome[0].genome_uuid,
unreleased_datasets=unreleased_datasets,
dataset_name=dataset_name,
dataset_source=dataset_source
dataset_source=dataset_source,
dataset_attributes=dataset_attributes
)
res = [{'genome': genome, 'datasets': dataset}]
yield res
Expand Down