From f600049acc73902cbabb956c246084a68e584e2f Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Tue, 20 Jun 2023 12:15:58 +0100 Subject: [PATCH 1/2] Added assembly_uuid and organism_uuid fields --- .../metadata/api/models/assembly.py | 3 ++ .../production/metadata/api/models/dataset.py | 3 +- .../production/metadata/api/models/genome.py | 42 ++++++++++--------- .../metadata/api/models/organism.py | 41 +++++++++--------- .../api/sample/ensembl_metadata/assembly.txt | 14 +++---- .../ensembl_metadata/ensembl_release.txt | 4 +- .../ensembl_metadata/genome_release.txt | 2 +- .../api/sample/ensembl_metadata/organism.txt | 12 +++--- .../api/sample/ensembl_metadata/table.sql | 9 +++- 9 files changed, 72 insertions(+), 58 deletions(-) diff --git a/src/ensembl/production/metadata/api/models/assembly.py b/src/ensembl/production/metadata/api/models/assembly.py index 69fa6c69..8835e001 100644 --- a/src/ensembl/production/metadata/api/models/assembly.py +++ b/src/ensembl/production/metadata/api/models/assembly.py @@ -9,6 +9,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import uuid + from sqlalchemy import Column, Integer, String, DateTime, Index, ForeignKey from sqlalchemy.dialects.mysql import TINYINT from sqlalchemy.orm import relationship @@ -20,6 +22,7 @@ class Assembly(Base): __tablename__ = 'assembly' assembly_id = Column(Integer, primary_key=True) + assembly_uuid = Column(String(128), unique=True, nullable=False, default=uuid.uuid4) ucsc_name = Column(String(16)) accession = Column(String(16), nullable=False, unique=True) level = Column(String(32), nullable=False) diff --git a/src/ensembl/production/metadata/api/models/dataset.py b/src/ensembl/production/metadata/api/models/dataset.py index 5bf41e21..4a458b12 100644 --- a/src/ensembl/production/metadata/api/models/dataset.py +++ b/src/ensembl/production/metadata/api/models/dataset.py @@ -12,6 +12,7 @@ from sqlalchemy import Column, Integer, String, Enum, text, ForeignKey, Index from sqlalchemy.dialects.mysql import DATETIME from sqlalchemy.orm import relationship +import uuid from ensembl.production.metadata.api.models.base import Base @@ -35,7 +36,7 @@ class Dataset(Base): __tablename__ = 'dataset' dataset_id = Column(Integer, primary_key=True) - dataset_uuid = Column(String(128), nullable=False, unique=True) + dataset_uuid = Column(String(128), nullable=False, unique=True, default=str(uuid.uuid4)) dataset_type_id = Column(ForeignKey('dataset_type.dataset_type_id'), nullable=False, index=True) name = Column(String(128), nullable=False) version = Column(String(128)) diff --git a/src/ensembl/production/metadata/api/models/genome.py b/src/ensembl/production/metadata/api/models/genome.py index c3b8c9b1..cff25617 100644 --- a/src/ensembl/production/metadata/api/models/genome.py +++ b/src/ensembl/production/metadata/api/models/genome.py @@ -9,6 +9,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import uuid + from sqlalchemy import Column, Integer, String, ForeignKey from sqlalchemy.dialects.mysql import DATETIME, TINYINT from sqlalchemy.orm import relationship @@ -17,54 +19,54 @@ class Genome(Base): - __tablename__ = 'genome' + __tablename__ = "genome" genome_id = Column(Integer, primary_key=True) - genome_uuid = Column(String(128), nullable=False, unique=True) - assembly_id = Column(ForeignKey('assembly.assembly_id'), nullable=False, index=True) - organism_id = Column(ForeignKey('organism.organism_id'), nullable=False, index=True) + genome_uuid = Column(String(128), nullable=False, unique=True, default=str(uuid.uuid4)) + assembly_id = Column(ForeignKey("assembly.assembly_id"), nullable=False, index=True) + organism_id = Column(ForeignKey("organism.organism_id"), nullable=False, index=True) created = Column(DATETIME(fsp=6), nullable=False) # One to many relationships # genome_id to genome_dataset and genome release - genome_datasets = relationship('GenomeDataset', back_populates='genome') - genome_releases = relationship('GenomeRelease', back_populates='genome') + genome_datasets = relationship("GenomeDataset", back_populates="genome") + genome_releases = relationship("GenomeRelease", back_populates="genome") # many to one relationships # assembly_id to assembly - assembly = relationship('Assembly', back_populates="genomes") + assembly = relationship("Assembly", back_populates="genomes") # organism_id to organism - organism = relationship('Organism', back_populates="genomes") + organism = relationship("Organism", back_populates="genomes") class GenomeDataset(Base): - __tablename__ = 'genome_dataset' + __tablename__ = "genome_dataset" genome_dataset_id = Column(Integer, primary_key=True) - dataset_id = Column(ForeignKey('dataset.dataset_id'), nullable=False, index=True) - genome_id = Column(ForeignKey('genome.genome_id'), nullable=False, index=True) - release_id = Column(ForeignKey('ensembl_release.release_id'), index=True) + dataset_id = Column(ForeignKey("dataset.dataset_id"), nullable=False, index=True) + genome_id = Column(ForeignKey("genome.genome_id"), nullable=False, index=True) + release_id = Column(ForeignKey("ensembl_release.release_id"), index=True) is_current = Column(TINYINT(1), nullable=False) # One to many relationships # none # many to one relationships # genome_dataset_id to genome - dataset = relationship('Dataset', back_populates="genome_datasets") + dataset = relationship("Dataset", back_populates="genome_datasets") # genome_id to genome - genome = relationship('Genome', back_populates="genome_datasets") + genome = relationship("Genome", back_populates="genome_datasets") # release_id to release - ensembl_release = relationship('EnsemblRelease', back_populates="genome_datasets") + ensembl_release = relationship("EnsemblRelease", back_populates="genome_datasets") class GenomeRelease(Base): - __tablename__ = 'genome_release' + __tablename__ = "genome_release" genome_release_id = Column(Integer, primary_key=True) - genome_id = Column(ForeignKey('genome.genome_id'), nullable=False, index=True) - release_id = Column(ForeignKey('ensembl_release.release_id'), nullable=False, index=True) + genome_id = Column(ForeignKey("genome.genome_id"), nullable=False, index=True) + release_id = Column(ForeignKey("ensembl_release.release_id"), nullable=False, index=True) is_current = Column(TINYINT(1), nullable=False) # One to many relationships # none # many to one relationships # genome_release_id to genome_release - genome = relationship('Genome', back_populates='genome_releases') + genome = relationship("Genome", back_populates="genome_releases") # release_id to ensembl release - ensembl_release = relationship('EnsemblRelease', back_populates='genome_releases') + ensembl_release = relationship("EnsemblRelease", back_populates="genome_releases") diff --git a/src/ensembl/production/metadata/api/models/organism.py b/src/ensembl/production/metadata/api/models/organism.py index c5b8d700..9dc4f747 100644 --- a/src/ensembl/production/metadata/api/models/organism.py +++ b/src/ensembl/production/metadata/api/models/organism.py @@ -9,6 +9,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import uuid + from sqlalchemy import Column, Integer, String, Index, ForeignKey from sqlalchemy.dialects.mysql import TINYINT from sqlalchemy.orm import relationship @@ -17,9 +19,10 @@ class Organism(Base): - __tablename__ = 'organism' + __tablename__ = "organism" organism_id = Column(Integer, primary_key=True) + organism_uuid = Column(String(128), unique=True, nullable=False, default=uuid.uuid4) taxonomy_id = Column(Integer, nullable=False) species_taxonomy_id = Column(Integer) display_name = Column(String(128), nullable=False) @@ -30,21 +33,21 @@ class Organism(Base): scientific_parlance_name = Column(String(255)) # One to many relationships # Organism_id to organism_group_member and genome - genomes = relationship('Genome', back_populates='organism') - organism_group_members = relationship('OrganismGroupMember', back_populates='organism') + genomes = relationship("Genome", back_populates="organism") + organism_group_members = relationship("OrganismGroupMember", back_populates="organism") # many to one relationships # organim_id and taxonomy_id to taxonomy_node #DIFFERENT DATABASE def __repr__(self): - return f'organism_id={self.organism_id}, taxonomy_id={self.taxonomy_id}, species_taxonomy_id={self.species_taxonomy_id}, ' \ - f'display_name={self.display_name}, strain={self.strain}, scientific_name={self.scientific_name}, ' \ - f'url_name={self.url_name}, ensembl_name={self.ensembl_name}, scientific_parlance_name={self.scientific_parlance_name}' + return f"organism_id={self.organism_id}, taxonomy_id={self.taxonomy_id}, species_taxonomy_id={self.species_taxonomy_id}, " \ + f"display_name={self.display_name}, strain={self.strain}, scientific_name={self.scientific_name}, " \ + f"url_name={self.url_name}, ensembl_name={self.ensembl_name}, scientific_parlance_name={self.scientific_parlance_name}" class OrganismGroup(Base): - __tablename__ = 'organism_group' + __tablename__ = "organism_group" __table_args__ = ( - Index('group_type_name_63c2f6ac_uniq', 'type', 'name', unique=True), + Index("group_type_name_63c2f6ac_uniq", "type", "name", unique=True), ) organism_group_id = Column(Integer, primary_key=True) @@ -53,34 +56,34 @@ class OrganismGroup(Base): code = Column(String(48), unique=True) # One to many relationships # Organism_group_id to organism_group_member - organism_group_members = relationship('OrganismGroupMember', back_populates='organism_group') + organism_group_members = relationship("OrganismGroupMember", back_populates="organism_group") # many to one relationships # none def __repr__(self): - return f'organism_group_id={self.organism_group_id}, type={self.type}, name={self.name}, ' \ - f'code={self.code}' + return f"organism_group_id={self.organism_group_id}, type={self.type}, name={self.name}, " \ + f"code={self.code}" class OrganismGroupMember(Base): - __tablename__ = 'organism_group_member' + __tablename__ = "organism_group_member" __table_args__ = ( - Index('organism_group_member_organism_id_organism_gro_fe8f49ac_uniq', 'organism_id', 'organism_group_id', + Index("organism_group_member_organism_id_organism_gro_fe8f49ac_uniq", "organism_id", "organism_group_id", unique=True), ) organism_group_member_id = Column(Integer, primary_key=True) is_reference = Column(TINYINT(1), nullable=False) - organism_id = Column(ForeignKey('organism.organism_id'), nullable=False) - organism_group_id = Column(ForeignKey('organism_group.organism_group_id'), nullable=False, index=True) + organism_id = Column(ForeignKey("organism.organism_id"), nullable=False) + organism_group_id = Column(ForeignKey("organism_group.organism_group_id"), nullable=False, index=True) # One to many relationships # none # many to one relationships # Organism_group_id to organism_group_member # organism_id to organism - organism_group = relationship('OrganismGroup', back_populates='organism_group_members') - organism = relationship('Organism', back_populates='organism_group_members') + organism_group = relationship("OrganismGroup", back_populates="organism_group_members") + organism = relationship("Organism", back_populates="organism_group_members") def __repr__(self): - return f'organism_group_member_id={self.organism_group_member_id}, is_reference={self.is_reference}, organism_id={self.organism_id}, ' \ - f'organism_group_id={self.organism_group_id}' + return f"organism_group_member_id={self.organism_group_member_id}, is_reference={self.is_reference}, organism_id={self.organism_id}, " \ + f"organism_group_id={self.organism_group_id}" diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt b/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt index 877b159a..872ad2e8 100644 --- a/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt +++ b/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt @@ -1,7 +1,7 @@ -1 hg38 GCA_000001405.28 chromosome GRCh38.p13 \N GRCh38 \N 2023-05-12 13:30:58 GRCh38.p13 -2 hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-05-12 13:32:06 GRCh37.p13 -3 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-05-12 13:32:14 ASM584v2 -4 \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-05-12 13:32:25 ASM276v2 -5 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-05-12 13:32:36 IWGSC -6 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-05-12 13:32:46 R64-1-1 -7 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-05-12 13:32:52 WBcel235 +1 eeaaa2bf-151c-4848-8b85-a05a9993101e hg38 GCA_000001405.28 chromosome GRCh38.p13 \N GRCh38 \N 2023-05-12 13:30:58 GRCh38.p13 +2 633034c3-2268-40a2-866a-9f492cac84bf hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-05-12 13:32:06 GRCh37.p13 +3 f78618ef-1075-47ee-a496-be26cad47912 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-05-12 13:32:14 ASM584v2 +4 224d836f-36a7-4c4e-b917-ecff740e404f \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-05-12 13:32:25 ASM276v2 +5 ec1c4b53-c2ef-431c-ad0e-b2aef19b44f1 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-05-12 13:32:36 IWGSC +6 7e8ed3a8-d724-4cba-92e1-e968719b7a18 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-05-12 13:32:46 R64-1-1 +7 f7de35c9-e0e8-4e81-b186-2962098d6361 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-05-12 13:32:52 WBcel235 diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt b/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt index 65554a9f..2de6f884 100644 --- a/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt +++ b/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt @@ -1,4 +1,4 @@ -1 108.0 2023-05-15 Beta Release 1 0 1 integrated +1 108.0 2023-05-15 Beta Release 1 1 1 integrated 2 108.1 2023-09-15 Scaling Phase 1 0 1 partial 3 108.2 2023-11-15 Scaling Phase 2 0 1 partial -4 109.0 2023-12-15 MVP Release 1 1 integrated +4 110.0 2023-12-15 MVP Release 1 0 1 integrated diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt b/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt index 3b5e278e..dcb5008c 100644 --- a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt +++ b/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt @@ -1,5 +1,5 @@ 1 1 1 1 -2 2 1 1 +2 2 1 0 3 3 1 1 4 4 1 1 5 5 1 1 diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt b/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt index 82d68407..a1e8896d 100644 --- a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt +++ b/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt @@ -1,6 +1,6 @@ -1 9606 9606 Human \N Homo sapiens Homo_sapiens homo_sapiens -2 511145 562 Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) \N Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) Escherichia_coli_str_k_12_substr_mg1655_gca_000005845 escherichia_coli_str_k_12_substr_mg1655_gca_000005845 -3 36329 5833 Plasmodium falciparum 3D7 \N Plasmodium falciparum 3D7 Plasmodium_falciparum plasmodium_falciparum -4 4565 4565 Triticum aestivum reference (Chinese spring) Triticum aestivum Triticum_aestivum triticum_aestivum -5 559292 4932 Saccharomyces cerevisiae S288C Saccharomyces cerevisiae S288c Saccharomyces_cerevisiae saccharomyces_cerevisiae -6 6239 6239 Caenorhabditis elegans (PRJNA13758) N2 Caenorhabditis elegans Caenorhabditis_elegans caenorhabditis_elegans +1 db2a5f09-2db8-429b-a407-c15a4ca2876d 9606 9606 Human \N Homo sapiens Homo_sapiens homo_sapiens +2 21279e3e-e651-43e1-a6fc-79e390b9e8a8 511145 562 Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) \N Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) Escherichia_coli_str_k_12_substr_mg1655_gca_000005845 escherichia_coli_str_k_12_substr_mg1655_gca_000005845 +3 e61faf49-0964-4d0e-8f3a-b2ffa3514698 36329 5833 Plasmodium falciparum 3D7 \N Plasmodium falciparum 3D7 Plasmodium_falciparum plasmodium_falciparum +4 d64c34ca-b37a-476b-83b5-f21d07a3ae67 4565 4565 Triticum aestivum reference (Chinese spring) Triticum aestivum Triticum_aestivum triticum_aestivum +5 0dc46f87-0b61-403a-8cd3-86b7e0cce8f0 559292 4932 Saccharomyces cerevisiae S288C Saccharomyces cerevisiae S288c Saccharomyces_cerevisiae saccharomyces_cerevisiae +6 0f4aad7b-db15-4a72-af1e-82bbae54226 6239 6239 Caenorhabditis elegans (PRJNA13758) N2 Caenorhabditis elegans Caenorhabditis_elegans caenorhabditis_elegans diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql b/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql index 3667c542..a0176efe 100644 --- a/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql +++ b/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql @@ -1,7 +1,7 @@ CREATE TABLE assembly ( - assembly_id int auto_increment - primary key, + assembly_id int auto_increment primary key, + assembly_uuid varchar(128) not null, ucsc_name varchar(16) null, accession varchar(16) not null, level varchar(32) not null, @@ -11,6 +11,8 @@ CREATE TABLE assembly tol_id varchar(32) null, created datetime null, ensembl_name varchar(255) null, + constraint assembly_uuid + unique (assembly_uuid), constraint accession unique (accession), constraint assembly_ensembl_name_uindex @@ -147,6 +149,7 @@ CREATE TABLE organism ( organism_id int auto_increment primary key, + organism_uuid varchar(128) not null, taxonomy_id int not null, species_taxonomy_id int null, display_name varchar(128) not null, @@ -155,6 +158,8 @@ CREATE TABLE organism url_name varchar(128) not null, ensembl_name varchar(128) not null, scientific_parlance_name varchar(255) null, + constraint organism_uuid + unique (organism_uuid), constraint ensembl_name unique (ensembl_name) ); From 6dfade837825f73ced19f0982a772832f5b16306 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Tue, 20 Jun 2023 12:17:27 +0100 Subject: [PATCH 2/2] Updated target revision --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1cc5f657..8cfbc905 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.0 \ No newline at end of file +1.1.1 \ No newline at end of file