Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 28 additions & 35 deletions src/ensembl/production/metadata/api/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sqlalchemy as db
from sqlalchemy.engine import make_url

from ensembl.database import DBConnection
from ensembl.ncbi_taxonomy.models import NCBITaxaName
Expand All @@ -28,42 +27,36 @@ def __init__(self, metadata_uri, taxonomy_uri=None):
super().__init__(metadata_uri)
self.taxonomy_db = DBConnection(taxonomy_uri)

def fetch_taxonomy_names(self, taxonomy_ids):
def fetch_taxonomy_names(self, taxonomy_ids, synonyms=[]):

taxonomy_ids = check_parameter(taxonomy_ids)
synonyms = [
"common name",
"equivalent name",
"genbank synonym",
"synonym",
] if len(check_parameter(synonyms)) == 0 else synonyms
required_class_name = ["genbank common name", "scientific name"]
taxons = {}
for tid in taxonomy_ids:
names = {"scientific_name": None, "synonym": []}
taxons[tid] = names
for taxon in taxons:
sci_name_select = db.select(
NCBITaxaName.name
).filter(
NCBITaxaName.taxon_id == taxon,
NCBITaxaName.name_class == "scientific name",
)
synonym_class = [
"common name",
"equivalent name",
"genbank common name",
"genbank synonym",
"synonym",
]

synonyms_select = db.select(
NCBITaxaName.name
).filter(
NCBITaxaName.taxon_id == taxon,
NCBITaxaName.name_class.in_(synonym_class),
)
with self.taxonomy_db.session_scope() as session:
for tid in taxonomy_ids:
taxons[tid] = {"scientific_name": None, "genbank_common_name": None, "synonym": []}

taxonomyname_query = db.select(
NCBITaxaName.name,
NCBITaxaName.name_class,
).filter(
NCBITaxaName.taxon_id == tid,
NCBITaxaName.name_class.in_(required_class_name + synonyms),
)

with self.taxonomy_db.session_scope() as session:
sci_name = session.execute(sci_name_select).one()
taxons[taxon]["scientific_name"] = sci_name[0]
synonyms = session.execute(synonyms_select).all()
for synonym in synonyms:
taxons[taxon]["synonym"].append(synonym[0])
return taxons
for taxon_name in session.execute(taxonomyname_query).all():
if taxon_name[1] in synonyms:
taxons[tid]['synonym'].append(taxon_name[0])
if taxon_name[1] in required_class_name:
taxon_format_name = "_".join(taxon_name[1].split(' '))
taxons[tid][taxon_format_name] = taxon_name[0]
return taxons

def fetch_taxonomy_ids(self, taxonomy_names):
taxids = []
Expand Down Expand Up @@ -284,8 +277,8 @@ def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, unreleased_dat
genome_select = genome_select.filter(Dataset.dataset_uuid.in_(dataset_uuid))

if unreleased_datasets:
genome_select = genome_select.filter(GenomeDataset.release_id.is_(None)) \
.filter(GenomeDataset.is_current == 0)
genome_select = genome_select.filter(GenomeDataset.release_id.is_(None))

if dataset_name is not None:
genome_select = genome_select.filter(DatasetType.name.in_(dataset_name))

Expand Down
9 changes: 6 additions & 3 deletions src/ensembl/production/metadata/api/models/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ class Assembly(Base):
level = Column(String(32), nullable=False)
name = Column(String(128), nullable=False)
accession_body = Column(String(32))
assembly_default = Column(String(32))
assembly_default = Column(String(128))
tol_id = Column(String(32), unique=True)
created = Column(DateTime)
ensembl_name = Column(String(255), unique=True)
alt_accession = Column(String(16), nullable=True)
is_reference = Column(TINYINT(1), nullable=False)
url_name = Column(String(128), nullable=False)
# One to many relationships
# assembly_id within assembly_sequence
assembly_sequences = relationship("AssemblySequence", back_populates="assembly", cascade="all, delete, delete-orphan")
Expand All @@ -51,10 +53,11 @@ class AssemblySequence(Base):
assembly_id = Column(ForeignKey('assembly.assembly_id'), nullable=False, index=True)
accession = Column(String(128), nullable=False)
chromosomal = Column(TINYINT(1), nullable=False)
chromosome_rank = Column(Integer)
length = Column(Integer, nullable=False)
sequence_location = Column(String(10))
sequence_checksum = Column(String(32))
ga4gh_identifier = Column(String(32))
md5 = Column(String(32))
sha512t4u = Column(String(128))
# One to many relationships
# none
# many to one relationships
Expand Down
1 change: 1 addition & 0 deletions src/ensembl/production/metadata/api/models/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class GenomeRelease(Base):
genome_id = Column(ForeignKey("genome.genome_id"), nullable=False, index=True)
release_id = Column(ForeignKey("ensembl_release.release_id"), nullable=False, index=True)
is_current = Column(TINYINT(1), nullable=False)
is_best = Column(TINYINT(1), nullable=False)
# One to many relationships
# none
# many to one relationships
Expand Down
10 changes: 5 additions & 5 deletions src/ensembl/production/metadata/api/models/organism.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,22 @@ class Organism(Base):
organism_uuid = Column(String(128), unique=True, nullable=False, default=uuid.uuid4)
taxonomy_id = Column(Integer, nullable=False)
species_taxonomy_id = Column(Integer)
display_name = Column(String(128), nullable=False)
common_name = Column(String(128), nullable=False)
strain = Column(String(128))
scientific_name = Column(String(128))
url_name = Column(String(128), nullable=False)
ensembl_name = Column(String(128), nullable=False, unique=True)
scientific_parlance_name = Column(String(255))
# One to many relationships
# Organism_id to organism_group_member and genome
genomes = relationship("Genome", back_populates="organism", cascade="all, delete, delete-orphan")
organism_group_members = relationship("OrganismGroupMember", back_populates="organism")

strain_type = Column(String(128), nullable=True, unique=False)
# many to one relationships
# organim_id and taxonomy_id to taxonomy_node #DIFFERENT DATABASE
def __repr__(self):
return f"organism_id={self.organism_id}, taxonomy_id={self.taxonomy_id}, species_taxonomy_id={self.species_taxonomy_id}, " \
f"display_name={self.display_name}, strain={self.strain}, scientific_name={self.scientific_name}, " \
f"url_name={self.url_name}, ensembl_name={self.ensembl_name}, scientific_parlance_name={self.scientific_parlance_name}"
f"common_name={self.common_name}, strain={self.strain}, scientific_name={self.scientific_name}, " \
f"ensembl_name={self.ensembl_name}, scientific_parlance_name={self.scientific_parlance_name}"


class OrganismGroup(Base):
Expand Down Expand Up @@ -74,6 +73,7 @@ class OrganismGroupMember(Base):

organism_group_member_id = Column(Integer, primary_key=True)
is_reference = Column(TINYINT(1), nullable=False)
order = Column(Integer, nullable=True)
organism_id = Column(ForeignKey("organism.organism_id"), nullable=False)
organism_group_id = Column(ForeignKey("organism_group.organism_group_id"), nullable=False, index=True)
# One to many relationships
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
1 eeaaa2bf-151c-4848-8b85-a05a9993101e hg38 GCA_000001405.28 chromosome GRCh38.p13 \N GRCh38 \N 2023-05-12 13:30:58 GRCh38.p13 \N
2 633034c3-2268-40a2-866a-9f492cac84bf hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-05-12 13:32:06 GRCh37.p13 \N
3 f78618ef-1075-47ee-a496-be26cad47912 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-05-12 13:32:14 ASM584v2 \N
4 224d836f-36a7-4c4e-b917-ecff740e404f \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-05-12 13:32:25 ASM276v2 \N
5 ec1c4b53-c2ef-431c-ad0e-b2aef19b44f1 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-05-12 13:32:36 IWGSC \N
6 7e8ed3a8-d724-4cba-92e1-e968719b7a18 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-05-12 13:32:46 R64-1-1 \N
7 f7de35c9-e0e8-4e81-b186-2962098d6361 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-05-12 13:32:52 WBcel235 \N
1 eeaaa2bf-151c-4848-8b85-a05a9993101e hg38 GCA_000001405.28 chromosome 1 GRCh38.p13 \N GRCh38 \N 2023-05-12 13:30:58 GRCh38.p13 \N 1 \N
2 633034c3-2268-40a2-866a-9f492cac84bf hg19 GCA_000001405.14 chromosome 2 GRCh37.p13 \N GRCh37 \N 2023-05-12 13:32:06 GRCh37.p13 \N 0 \N
3 f78618ef-1075-47ee-a496-be26cad47912 \N GCA_000005845.2 chromosome \N ASM584v2 \N ASM584v2 \N 2023-05-12 13:32:14 ASM584v2 \N 0 \N
4 224d836f-36a7-4c4e-b917-ecff740e404f \N GCA_000002765.2 chromosome \N ASM276v2 \N ASM276v2 \N 2023-05-12 13:32:25 ASM276v2 \N 0 \N
5 ec1c4b53-c2ef-431c-ad0e-b2aef19b44f1 \N GCA_900519105.1 chromosome \N IWGSC \N IWGSC \N 2023-05-12 13:32:36 IWGSC \N 0 \N
6 7e8ed3a8-d724-4cba-92e1-e968719b7a18 \N GCA_000146045.2 chromosome \N R64-1-1 \N R64-1-1 \N 2023-05-12 13:32:46 R64-1-1 \N 0 \N
7 f7de35c9-e0e8-4e81-b186-2962098d6361 \N GCA_000002985.3 chromosome \N WBcel235 \N WBcel235 \N 2023-05-12 13:32:52 WBcel235 \N 0 \N
Loading