Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions protos/ensembl/production/metadata/grpc/ensembl_metadata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,10 @@ message GenomeUUID {

message OrganismsGroup {
uint32 species_taxonomy_id = 1;
string ensembl_name = 2;
string common_name = 3;
string scientific_name = 4;
uint32 order = 5;
uint32 count = 6;
string common_name = 2;
string scientific_name = 3;
uint32 order = 4;
uint32 count = 5;
}

message OrganismsGroupCount {
Expand Down Expand Up @@ -420,7 +419,7 @@ message GenomeDatatypeRequest {
Genome info filter used by Compara (EA-1090)
*/
message GenomeInfoRequest {
string ensembl_name = 1; // Mandatory
string production_name = 1; // Mandatory
string assembly_name = 2; // Mandatory
bool use_default = 3; // Optional
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
1 a7335667-93e7-11ec-a39d-005056b38ce3 1 1 2023-05-12 13:30:58 0 test
2 3704ceb1-948d-11ec-a39d-005056b38ce3 2 1 2023-05-12 13:32:06 0 test1
3 a73351f7-93e7-11ec-a39d-005056b38ce3 3 2 2023-05-12 13:32:14 0 test2
4 a73356e1-93e7-11ec-a39d-005056b38ce3 4 3 2023-05-12 13:32:25 0 test3
5 a73357ab-93e7-11ec-a39d-005056b38ce3 5 4 2023-05-12 13:32:36 0 test4
6 a733574a-93e7-11ec-a39d-005056b38ce3 6 5 2023-05-12 13:32:46 0 test5
7 a733550b-93e7-11ec-a39d-005056b38ce3 7 6 2023-05-12 13:32:52 0 test6
1 a7335667-93e7-11ec-a39d-005056b38ce3 1 1 2023-05-12 13:30:58 0 homo_sapiens
2 3704ceb1-948d-11ec-a39d-005056b38ce3 2 1 2023-05-12 13:32:06 0 homo_sapiens_37
3 a73351f7-93e7-11ec-a39d-005056b38ce3 3 2 2023-05-12 13:32:14 0 escherichia_coli_str_k_12_substr_mg1655
4 a73356e1-93e7-11ec-a39d-005056b38ce3 4 3 2023-05-12 13:32:25 0 plasmodium_falciparum
5 a73357ab-93e7-11ec-a39d-005056b38ce3 5 4 2023-05-12 13:32:36 0 triticum_aestivum
6 a733574a-93e7-11ec-a39d-005056b38ce3 6 5 2023-05-12 13:32:46 0 saccharomyces_cerevisiae
7 a733550b-93e7-11ec-a39d-005056b38ce3 7 6 2023-05-12 13:32:52 0 caenorhabditis_elegans
8 a7335667-93e7-11ec-a39d-00aasab38ce3 8 1 2023-09-07 16:30:58 0 test7
9 90720316-006c-470b-a7dd-82d28f952264 9 8 2023-08-18 12:22:34 0 test8
11 changes: 7 additions & 4 deletions src/ensembl/production/metadata/grpc/adaptors/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def fetch_taxonomy_ids(self, taxonomy_names):

def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organism_uuid=None, assembly_uuid=None,
assembly_accession=None, assembly_name=None, use_default_assembly=False, ensembl_name=None,
taxonomy_id=None, group=None, group_type=None, allow_unreleased=False, unreleased_only=False,
site_name=None, release_type=None, release_version=None, current_only=True):
production_name=None, taxonomy_id=None, group=None, group_type=None, allow_unreleased=False,
unreleased_only=False, site_name=None, release_type=None, release_version=None, current_only=True):
"""
Fetches genome information based on the specified parameters.

Expand All @@ -95,6 +95,7 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ
assembly_name (Union[str, List[str]]): The name(s) of the assembly(s) to fetch.
use_default_assembly (bool): Whether to use default assembly name or not.
ensembl_name (Union[str, List[str]]): The Ensembl name(s) of the organism(s) to fetch.
production_name (Union[str, List[str]]): The production name(s) of the organism(s) to fetch.
taxonomy_id (Union[int, List[int]]): The taxonomy ID(s) of the organism(s) to fetch.
group (Union[str, List[str]]): The name(s) of the organism group(s) to filter by.
group_type (Union[str, List[str]]): The type(s) of the organism group(s) to filter by.
Expand Down Expand Up @@ -132,6 +133,7 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ
assembly_accession = check_parameter(assembly_accession)
assembly_name = check_parameter(assembly_name)
ensembl_name = check_parameter(ensembl_name)
production_name = check_parameter(production_name)
taxonomy_id = check_parameter(taxonomy_id)
group = check_parameter(group)
group_type = check_parameter(group_type)
Expand Down Expand Up @@ -194,6 +196,9 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ
if ensembl_name is not None:
genome_select = genome_select.filter(Organism.ensembl_name.in_(ensembl_name))

if production_name is not None:
genome_select = genome_select.filter(Genome.production_name.in_(production_name))

if taxonomy_id is not None:
genome_select = genome_select.filter(Organism.taxonomy_id.in_(taxonomy_id))

Expand Down Expand Up @@ -612,7 +617,6 @@ def fetch_organisms_group_counts(self, release_version=None, group_code='popular
# Get latest released organisms
query = db.select(
o_species.species_taxonomy_id,
o_species.ensembl_name,
o_species.common_name,
o_species.scientific_name,
OrganismGroupMember.order.label('order'),
Expand All @@ -629,7 +633,6 @@ def fetch_organisms_group_counts(self, release_version=None, group_code='popular

query = query.group_by(
o_species.species_taxonomy_id,
o_species.ensembl_name,
o_species.common_name,
o_species.scientific_name,
OrganismGroupMember.order
Expand Down
6 changes: 3 additions & 3 deletions src/ensembl/production/metadata/grpc/client_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,15 +273,15 @@ def get_dataset_infos_by_dataset_type(stub):

def get_genome_uuid(stub):
request1 = GenomeInfoRequest(
ensembl_name="homo_sapiens_37", assembly_name="GRCh37.p13"
production_name="homo_sapiens_37", assembly_name="GRCh37.p13"
)
genome_uuid1 = stub.GetGenomeUUID(request1)
request2 = GenomeInfoRequest(
ensembl_name="homo_sapiens_37", assembly_name="GRCh37", use_default=True
production_name="homo_sapiens_37", assembly_name="GRCh37", use_default=True
)
genome_uuid2 = stub.GetGenomeUUID(request2)
request3 = GenomeInfoRequest(
ensembl_name="homo_sapiens_37", assembly_name="GRCh37.p13", use_default=True
production_name="homo_sapiens_37", assembly_name="GRCh37.p13", use_default=True
)
genome_uuid3 = stub.GetGenomeUUID(request3)

Expand Down
82 changes: 41 additions & 41 deletions src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions src/ensembl/production/metadata/grpc/protobuf_msg_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,11 +396,10 @@ def create_organisms_group_count(data, release_version):
for organism in data:
created_organism_group = ensembl_metadata_pb2.OrganismsGroup(
species_taxonomy_id=organism[0],
ensembl_name=organism[1],
common_name=organism[2],
scientific_name=organism[3],
order=organism[4],
count=organism[5],
common_name=organism[1],
scientific_name=organism[2],
order=organism[3],
count=organism[4],
)
organisms_list.append(created_organism_group)

Expand Down
2 changes: 1 addition & 1 deletion src/ensembl/production/metadata/grpc/servicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def GetTopLevelStatisticsByUUID(self, request, context):
return utils.get_top_level_statistics_by_uuid(self.db, request.genome_uuid)

def GetGenomeUUID(self, request, context):
return utils.get_genome_uuid(self.db, request.ensembl_name, request.assembly_name, request.use_default)
return utils.get_genome_uuid(self.db, request.production_name, request.assembly_name, request.use_default)

def GetGenomeByUUID(self, request, context):
return utils.get_genome_by_uuid(self.db, request.genome_uuid, request.release_version)
Expand Down
18 changes: 3 additions & 15 deletions src/ensembl/production/metadata/grpc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,12 @@ def get_sub_species_info(db_conn, organism_uuid, group):
return msg_factory.create_sub_species()


def get_genome_uuid(db_conn, ensembl_name, assembly_name, use_default=False):
if ensembl_name is None or assembly_name is None:
def get_genome_uuid(db_conn, production_name, assembly_name, use_default=False):
if production_name is None or assembly_name is None:
return msg_factory.create_genome_uuid()

genome_uuid_result = db_conn.fetch_genomes(
ensembl_name=ensembl_name,
production_name=production_name,
assembly_name=assembly_name,
use_default_assembly=use_default,
allow_unreleased=cfg.allow_unreleased
Expand All @@ -194,18 +194,6 @@ def get_genome_uuid(db_conn, ensembl_name, assembly_name, use_default=False):
return msg_factory.create_genome_uuid(
{"genome_uuid": genome_uuid_result[0].Genome.genome_uuid}
)
# PATCH: This is a special case, see EA-1112 for more details
elif len(genome_uuid_result) == 0:
# Try looking using only assembly_default (no ensembl_name is needed)
using_default_assembly_only_result = db_conn.fetch_genomes(
assembly_name=assembly_name,
use_default_assembly=True,
allow_unreleased=cfg.allow_unreleased
)
if len(using_default_assembly_only_result) == 1:
return msg_factory.create_genome_uuid(
{"genome_uuid": using_default_assembly_only_result[0].Genome.genome_uuid}
)

return msg_factory.create_genome_uuid()

Expand Down
6 changes: 3 additions & 3 deletions src/tests/test_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,13 +376,13 @@ def test_fetch_organisms_group_counts(self, multi_dbs, species_taxonomy_id, expe
test = conn.fetch_organisms_group_counts()
# When fetching everything:
# First result should be Human
assert test[0][2] == expected_organism
assert test[0][1] == expected_organism
# We should have three assemblies associated with Human (Two for grch37.38 organism + one t2t)
assert test[0][5] == expected_assemblies_count
assert test[0][4] == expected_assemblies_count

for data in test[1:]:
# All others have only one genome in test DB
assert data[5] == 1
assert data[4] == 1

@pytest.mark.parametrize(
"organism_uuid, expected_assemblies_count",
Expand Down
1 change: 0 additions & 1 deletion src/tests/test_protobuf_msg_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ def test_create_organisms_group_count(self, multi_dbs, genome_db_conn):
"organismsGroupCount": [
{
"speciesTaxonomyId": 9606,
"ensemblName": "Homo_sapiens",
"commonName": "Human",
"scientificName": "Homo sapiens",
"order": 1,
Expand Down
12 changes: 5 additions & 7 deletions src/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,21 +683,20 @@ def test_get_dataset_by_genome_id_no_results(self, genome_db_conn):
assert output == {}

@pytest.mark.parametrize(
"ensembl_name, assembly_name, use_default, expected_output",
"production_name, assembly_name, use_default, expected_output",
[
("homo_sapiens", "GRCh38.p13", False, {"genomeUuid": "a7335667-93e7-11ec-a39d-005056b38ce3"}),
("homo_sapiens", "GRCh38.p13", True, {}),
("homo_sapiens", "GRCh38", True, {"genomeUuid": "a7335667-93e7-11ec-a39d-005056b38ce3"}),
("random_ensembl_name", "GRCh38", False, {"genomeUuid": "a7335667-93e7-11ec-a39d-005056b38ce3"}),
("random_ensembl_name", "random_assembly_name", True, {}),
("random_ensembl_name", "random_assembly_name", False, {}),
("random_production_name", "random_assembly_name", True, {}),
("random_production_name", "random_assembly_name", False, {}),
]
)
def test_get_genome_uuid(self, genome_db_conn, ensembl_name, assembly_name, use_default, expected_output):
def test_get_genome_uuid(self, genome_db_conn, production_name, assembly_name, use_default, expected_output):
output = json_format.MessageToJson(
utils.get_genome_uuid(
db_conn=genome_db_conn,
ensembl_name=ensembl_name,
production_name=production_name,
assembly_name=assembly_name,
use_default=use_default
))
Expand Down Expand Up @@ -1109,7 +1108,6 @@ def test_get_organisms_group_count(self, genome_db_conn):
"organismsGroupCount": [
{
"speciesTaxonomyId": 9606,
"ensemblName": "Homo_sapiens",
"commonName": "Human",
"scientificName": "Homo sapiens",
"order": 1,
Expand Down