Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.0
1.1.1
3 changes: 3 additions & 0 deletions src/ensembl/production/metadata/api/models/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import uuid

from sqlalchemy import Column, Integer, String, DateTime, Index, ForeignKey
from sqlalchemy.dialects.mysql import TINYINT
from sqlalchemy.orm import relationship
Expand All @@ -20,6 +22,7 @@ class Assembly(Base):
__tablename__ = 'assembly'

assembly_id = Column(Integer, primary_key=True)
assembly_uuid = Column(String(128), unique=True, nullable=False, default=uuid.uuid4)
ucsc_name = Column(String(16))
accession = Column(String(16), nullable=False, unique=True)
level = Column(String(32), nullable=False)
Expand Down
3 changes: 2 additions & 1 deletion src/ensembl/production/metadata/api/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from sqlalchemy import Column, Integer, String, Enum, text, ForeignKey, Index
from sqlalchemy.dialects.mysql import DATETIME
from sqlalchemy.orm import relationship
import uuid

from ensembl.production.metadata.api.models.base import Base

Expand All @@ -35,7 +36,7 @@ class Dataset(Base):
__tablename__ = 'dataset'

dataset_id = Column(Integer, primary_key=True)
dataset_uuid = Column(String(128), nullable=False, unique=True)
dataset_uuid = Column(String(128), nullable=False, unique=True, default=str(uuid.uuid4))
dataset_type_id = Column(ForeignKey('dataset_type.dataset_type_id'), nullable=False, index=True)
name = Column(String(128), nullable=False)
version = Column(String(128))
Expand Down
42 changes: 22 additions & 20 deletions src/ensembl/production/metadata/api/models/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import uuid

from sqlalchemy import Column, Integer, String, ForeignKey
from sqlalchemy.dialects.mysql import DATETIME, TINYINT
from sqlalchemy.orm import relationship
Expand All @@ -17,54 +19,54 @@


class Genome(Base):
__tablename__ = 'genome'
__tablename__ = "genome"

genome_id = Column(Integer, primary_key=True)
genome_uuid = Column(String(128), nullable=False, unique=True)
assembly_id = Column(ForeignKey('assembly.assembly_id'), nullable=False, index=True)
organism_id = Column(ForeignKey('organism.organism_id'), nullable=False, index=True)
genome_uuid = Column(String(128), nullable=False, unique=True, default=str(uuid.uuid4))
assembly_id = Column(ForeignKey("assembly.assembly_id"), nullable=False, index=True)
organism_id = Column(ForeignKey("organism.organism_id"), nullable=False, index=True)
created = Column(DATETIME(fsp=6), nullable=False)
# One to many relationships
# genome_id to genome_dataset and genome release
genome_datasets = relationship('GenomeDataset', back_populates='genome')
genome_releases = relationship('GenomeRelease', back_populates='genome')
genome_datasets = relationship("GenomeDataset", back_populates="genome")
genome_releases = relationship("GenomeRelease", back_populates="genome")
# many to one relationships
# assembly_id to assembly
assembly = relationship('Assembly', back_populates="genomes")
assembly = relationship("Assembly", back_populates="genomes")
# organism_id to organism
organism = relationship('Organism', back_populates="genomes")
organism = relationship("Organism", back_populates="genomes")


class GenomeDataset(Base):
__tablename__ = 'genome_dataset'
__tablename__ = "genome_dataset"

genome_dataset_id = Column(Integer, primary_key=True)
dataset_id = Column(ForeignKey('dataset.dataset_id'), nullable=False, index=True)
genome_id = Column(ForeignKey('genome.genome_id'), nullable=False, index=True)
release_id = Column(ForeignKey('ensembl_release.release_id'), index=True)
dataset_id = Column(ForeignKey("dataset.dataset_id"), nullable=False, index=True)
genome_id = Column(ForeignKey("genome.genome_id"), nullable=False, index=True)
release_id = Column(ForeignKey("ensembl_release.release_id"), index=True)
is_current = Column(TINYINT(1), nullable=False)
# One to many relationships
# none
# many to one relationships
# genome_dataset_id to genome
dataset = relationship('Dataset', back_populates="genome_datasets")
dataset = relationship("Dataset", back_populates="genome_datasets")
# genome_id to genome
genome = relationship('Genome', back_populates="genome_datasets")
genome = relationship("Genome", back_populates="genome_datasets")
# release_id to release
ensembl_release = relationship('EnsemblRelease', back_populates="genome_datasets")
ensembl_release = relationship("EnsemblRelease", back_populates="genome_datasets")


class GenomeRelease(Base):
__tablename__ = 'genome_release'
__tablename__ = "genome_release"

genome_release_id = Column(Integer, primary_key=True)
genome_id = Column(ForeignKey('genome.genome_id'), nullable=False, index=True)
release_id = Column(ForeignKey('ensembl_release.release_id'), nullable=False, index=True)
genome_id = Column(ForeignKey("genome.genome_id"), nullable=False, index=True)
release_id = Column(ForeignKey("ensembl_release.release_id"), nullable=False, index=True)
is_current = Column(TINYINT(1), nullable=False)
# One to many relationships
# none
# many to one relationships
# genome_release_id to genome_release
genome = relationship('Genome', back_populates='genome_releases')
genome = relationship("Genome", back_populates="genome_releases")
# release_id to ensembl release
ensembl_release = relationship('EnsemblRelease', back_populates='genome_releases')
ensembl_release = relationship("EnsemblRelease", back_populates="genome_releases")
41 changes: 22 additions & 19 deletions src/ensembl/production/metadata/api/models/organism.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import uuid

from sqlalchemy import Column, Integer, String, Index, ForeignKey
from sqlalchemy.dialects.mysql import TINYINT
from sqlalchemy.orm import relationship
Expand All @@ -17,9 +19,10 @@


class Organism(Base):
__tablename__ = 'organism'
__tablename__ = "organism"

organism_id = Column(Integer, primary_key=True)
organism_uuid = Column(String(128), unique=True, nullable=False, default=uuid.uuid4)
taxonomy_id = Column(Integer, nullable=False)
species_taxonomy_id = Column(Integer)
display_name = Column(String(128), nullable=False)
Expand All @@ -30,21 +33,21 @@ class Organism(Base):
scientific_parlance_name = Column(String(255))
# One to many relationships
# Organism_id to organism_group_member and genome
genomes = relationship('Genome', back_populates='organism')
organism_group_members = relationship('OrganismGroupMember', back_populates='organism')
genomes = relationship("Genome", back_populates="organism")
organism_group_members = relationship("OrganismGroupMember", back_populates="organism")

# many to one relationships
# organim_id and taxonomy_id to taxonomy_node #DIFFERENT DATABASE
def __repr__(self):
return f'organism_id={self.organism_id}, taxonomy_id={self.taxonomy_id}, species_taxonomy_id={self.species_taxonomy_id}, ' \
f'display_name={self.display_name}, strain={self.strain}, scientific_name={self.scientific_name}, ' \
f'url_name={self.url_name}, ensembl_name={self.ensembl_name}, scientific_parlance_name={self.scientific_parlance_name}'
return f"organism_id={self.organism_id}, taxonomy_id={self.taxonomy_id}, species_taxonomy_id={self.species_taxonomy_id}, " \
f"display_name={self.display_name}, strain={self.strain}, scientific_name={self.scientific_name}, " \
f"url_name={self.url_name}, ensembl_name={self.ensembl_name}, scientific_parlance_name={self.scientific_parlance_name}"


class OrganismGroup(Base):
__tablename__ = 'organism_group'
__tablename__ = "organism_group"
__table_args__ = (
Index('group_type_name_63c2f6ac_uniq', 'type', 'name', unique=True),
Index("group_type_name_63c2f6ac_uniq", "type", "name", unique=True),
)

organism_group_id = Column(Integer, primary_key=True)
Expand All @@ -53,34 +56,34 @@ class OrganismGroup(Base):
code = Column(String(48), unique=True)
# One to many relationships
# Organism_group_id to organism_group_member
organism_group_members = relationship('OrganismGroupMember', back_populates='organism_group')
organism_group_members = relationship("OrganismGroupMember", back_populates="organism_group")

# many to one relationships
# none
def __repr__(self):
return f'organism_group_id={self.organism_group_id}, type={self.type}, name={self.name}, ' \
f'code={self.code}'
return f"organism_group_id={self.organism_group_id}, type={self.type}, name={self.name}, " \
f"code={self.code}"


class OrganismGroupMember(Base):
__tablename__ = 'organism_group_member'
__tablename__ = "organism_group_member"
__table_args__ = (
Index('organism_group_member_organism_id_organism_gro_fe8f49ac_uniq', 'organism_id', 'organism_group_id',
Index("organism_group_member_organism_id_organism_gro_fe8f49ac_uniq", "organism_id", "organism_group_id",
unique=True),
)

organism_group_member_id = Column(Integer, primary_key=True)
is_reference = Column(TINYINT(1), nullable=False)
organism_id = Column(ForeignKey('organism.organism_id'), nullable=False)
organism_group_id = Column(ForeignKey('organism_group.organism_group_id'), nullable=False, index=True)
organism_id = Column(ForeignKey("organism.organism_id"), nullable=False)
organism_group_id = Column(ForeignKey("organism_group.organism_group_id"), nullable=False, index=True)
# One to many relationships
# none
# many to one relationships
# Organism_group_id to organism_group_member
# organism_id to organism
organism_group = relationship('OrganismGroup', back_populates='organism_group_members')
organism = relationship('Organism', back_populates='organism_group_members')
organism_group = relationship("OrganismGroup", back_populates="organism_group_members")
organism = relationship("Organism", back_populates="organism_group_members")

def __repr__(self):
return f'organism_group_member_id={self.organism_group_member_id}, is_reference={self.is_reference}, organism_id={self.organism_id}, ' \
f'organism_group_id={self.organism_group_id}'
return f"organism_group_member_id={self.organism_group_member_id}, is_reference={self.is_reference}, organism_id={self.organism_id}, " \
f"organism_group_id={self.organism_group_id}"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
1 hg38 GCA_000001405.28 chromosome GRCh38.p13 \N GRCh38 \N 2023-05-12 13:30:58 GRCh38.p13
2 hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-05-12 13:32:06 GRCh37.p13
3 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-05-12 13:32:14 ASM584v2
4 \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-05-12 13:32:25 ASM276v2
5 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-05-12 13:32:36 IWGSC
6 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-05-12 13:32:46 R64-1-1
7 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-05-12 13:32:52 WBcel235
1 eeaaa2bf-151c-4848-8b85-a05a9993101e hg38 GCA_000001405.28 chromosome GRCh38.p13 \N GRCh38 \N 2023-05-12 13:30:58 GRCh38.p13
2 633034c3-2268-40a2-866a-9f492cac84bf hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-05-12 13:32:06 GRCh37.p13
3 f78618ef-1075-47ee-a496-be26cad47912 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-05-12 13:32:14 ASM584v2
4 224d836f-36a7-4c4e-b917-ecff740e404f \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-05-12 13:32:25 ASM276v2
5 ec1c4b53-c2ef-431c-ad0e-b2aef19b44f1 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-05-12 13:32:36 IWGSC
6 7e8ed3a8-d724-4cba-92e1-e968719b7a18 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-05-12 13:32:46 R64-1-1
7 f7de35c9-e0e8-4e81-b186-2962098d6361 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-05-12 13:32:52 WBcel235
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1 108.0 2023-05-15 Beta Release 1 0 1 integrated
1 108.0 2023-05-15 Beta Release 1 1 1 integrated
2 108.1 2023-09-15 Scaling Phase 1 0 1 partial
3 108.2 2023-11-15 Scaling Phase 2 0 1 partial
4 109.0 2023-12-15 MVP Release 1 1 integrated
4 110.0 2023-12-15 MVP Release 1 0 1 integrated
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
1 1 1 1
2 2 1 1
2 2 1 0
3 3 1 1
4 4 1 1
5 5 1 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
1 9606 9606 Human \N Homo sapiens Homo_sapiens homo_sapiens
2 511145 562 Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) \N Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) Escherichia_coli_str_k_12_substr_mg1655_gca_000005845 escherichia_coli_str_k_12_substr_mg1655_gca_000005845
3 36329 5833 Plasmodium falciparum 3D7 \N Plasmodium falciparum 3D7 Plasmodium_falciparum plasmodium_falciparum
4 4565 4565 Triticum aestivum reference (Chinese spring) Triticum aestivum Triticum_aestivum triticum_aestivum
5 559292 4932 Saccharomyces cerevisiae S288C Saccharomyces cerevisiae S288c Saccharomyces_cerevisiae saccharomyces_cerevisiae
6 6239 6239 Caenorhabditis elegans (PRJNA13758) N2 Caenorhabditis elegans Caenorhabditis_elegans caenorhabditis_elegans
1 db2a5f09-2db8-429b-a407-c15a4ca2876d 9606 9606 Human \N Homo sapiens Homo_sapiens homo_sapiens
2 21279e3e-e651-43e1-a6fc-79e390b9e8a8 511145 562 Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) \N Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) Escherichia_coli_str_k_12_substr_mg1655_gca_000005845 escherichia_coli_str_k_12_substr_mg1655_gca_000005845
3 e61faf49-0964-4d0e-8f3a-b2ffa3514698 36329 5833 Plasmodium falciparum 3D7 \N Plasmodium falciparum 3D7 Plasmodium_falciparum plasmodium_falciparum
4 d64c34ca-b37a-476b-83b5-f21d07a3ae67 4565 4565 Triticum aestivum reference (Chinese spring) Triticum aestivum Triticum_aestivum triticum_aestivum
5 0dc46f87-0b61-403a-8cd3-86b7e0cce8f0 559292 4932 Saccharomyces cerevisiae S288C Saccharomyces cerevisiae S288c Saccharomyces_cerevisiae saccharomyces_cerevisiae
6 0f4aad7b-db15-4a72-af1e-82bbae54226 6239 6239 Caenorhabditis elegans (PRJNA13758) N2 Caenorhabditis elegans Caenorhabditis_elegans caenorhabditis_elegans
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CREATE TABLE assembly
(
assembly_id int auto_increment
primary key,
assembly_id int auto_increment primary key,
assembly_uuid varchar(128) not null,
ucsc_name varchar(16) null,
accession varchar(16) not null,
level varchar(32) not null,
Expand All @@ -11,6 +11,8 @@ CREATE TABLE assembly
tol_id varchar(32) null,
created datetime null,
ensembl_name varchar(255) null,
constraint assembly_uuid
unique (assembly_uuid),
constraint accession
unique (accession),
constraint assembly_ensembl_name_uindex
Expand Down Expand Up @@ -147,6 +149,7 @@ CREATE TABLE organism
(
organism_id int auto_increment
primary key,
organism_uuid varchar(128) not null,
taxonomy_id int not null,
species_taxonomy_id int null,
display_name varchar(128) not null,
Expand All @@ -155,6 +158,8 @@ CREATE TABLE organism
url_name varchar(128) not null,
ensembl_name varchar(128) not null,
scientific_parlance_name varchar(255) null,
constraint organism_uuid
unique (organism_uuid),
constraint ensembl_name
unique (ensembl_name)
);
Expand Down